radeon_exa_funcs.c revision 70cce690
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2005 Eric Anholt
3209ff23fSmrg * Copyright 2005 Benjamin Herrenschmidt
4209ff23fSmrg * Copyright 2006 Tungsten Graphics, Inc.
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
8209ff23fSmrg * copy of this software and associated documentation files (the "Software"),
9209ff23fSmrg * to deal in the Software without restriction, including without limitation
10209ff23fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11209ff23fSmrg * and/or sell copies of the Software, and to permit persons to whom the
12209ff23fSmrg * Software is furnished to do so, subject to the following conditions:
13209ff23fSmrg *
14209ff23fSmrg * The above copyright notice and this permission notice (including the next
15209ff23fSmrg * paragraph) shall be included in all copies or substantial portions of the
16209ff23fSmrg * Software.
17209ff23fSmrg *
18209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19209ff23fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20209ff23fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21209ff23fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22209ff23fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24209ff23fSmrg * SOFTWARE.
25209ff23fSmrg *
26209ff23fSmrg * Authors:
27209ff23fSmrg *    Eric Anholt <anholt@FreeBSD.org>
28209ff23fSmrg *    Zack Rusin <zrusin@trolltech.com>
29209ff23fSmrg *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
30209ff23fSmrg *    Michel Dänzer <michel@tungstengraphics.com>
31209ff23fSmrg *
32209ff23fSmrg */
33209ff23fSmrg
34209ff23fSmrg#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
35209ff23fSmrg#error Cannot define both MMIO and CP acceleration!
36209ff23fSmrg#endif
37209ff23fSmrg
38209ff23fSmrg#if !defined(UNIXCPP) || defined(ANSICPP)
39209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
40209ff23fSmrg#else
41209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
42209ff23fSmrg#endif
43209ff23fSmrg
44209ff23fSmrg#ifdef ACCEL_MMIO
45209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
46209ff23fSmrg#else
47209ff23fSmrg#ifdef ACCEL_CP
48209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
49209ff23fSmrg#else
50209ff23fSmrg#error No accel type defined!
51209ff23fSmrg#endif
52209ff23fSmrg#endif
53209ff23fSmrg
54209ff23fSmrg#include <errno.h>
55209ff23fSmrg#include <string.h>
56209ff23fSmrg
57209ff23fSmrg#include "radeon.h"
58209ff23fSmrg
59209ff23fSmrg#include "exa.h"
60209ff23fSmrg
61209ff23fSmrgstatic int
62209ff23fSmrgFUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen)
63209ff23fSmrg{
64209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
65209ff23fSmrg
66209ff23fSmrg    TRACE;
67209ff23fSmrg
68b7e1c893Smrg    return ++info->accel_state->exaSyncMarker;
69209ff23fSmrg}
70209ff23fSmrg
71209ff23fSmrgstatic void
72209ff23fSmrgFUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
73209ff23fSmrg{
74209ff23fSmrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
76209ff23fSmrg
77f1bc02b7Smrg    if (info->cs)
78f1bc02b7Smrg	    return;
79f1bc02b7Smrg
80209ff23fSmrg    TRACE;
81209ff23fSmrg
82b7e1c893Smrg    if (info->accel_state->exaMarkerSynced != marker) {
83209ff23fSmrg	FUNC_NAME(RADEONWaitForIdle)(pScrn);
84b7e1c893Smrg	info->accel_state->exaMarkerSynced = marker;
85209ff23fSmrg    }
86209ff23fSmrg
87b7e1c893Smrg    RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
88209ff23fSmrg}
89209ff23fSmrg
90f1bc02b7Smrgstatic void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op)
91f1bc02b7Smrg{
92f1bc02b7Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
93f1bc02b7Smrg    int has_src;
94f1bc02b7Smrg    ACCEL_PREAMBLE();
95f1bc02b7Smrg
96f1bc02b7Smrg    /* don't emit if no operation in progress */
97f1bc02b7Smrg    if (info->state_2d.op == 0 && op == 0)
98f1bc02b7Smrg	return;
99f1bc02b7Smrg
100f1bc02b7Smrg    has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo);
101f1bc02b7Smrg
102f1bc02b7Smrg    if (has_src) {
103f1bc02b7Smrg      BEGIN_ACCEL_RELOC(10, 2);
104f1bc02b7Smrg    } else {
105f1bc02b7Smrg      BEGIN_ACCEL_RELOC(9, 1);
106f1bc02b7Smrg    }
107f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
108f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
109f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
110f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
111f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
112f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
113f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask);
114f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl);
115f1bc02b7Smrg
116f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
117f1bc02b7Smrg    if (info->cs)
118f1bc02b7Smrg	OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
119f1bc02b7Smrg
120f1bc02b7Smrg    if (has_src) {
121f1bc02b7Smrg	    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
122f1bc02b7Smrg	    if (info->cs)
123f1bc02b7Smrg		OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
124f1bc02b7Smrg
125f1bc02b7Smrg    }
126f1bc02b7Smrg    FINISH_ACCEL();
127f1bc02b7Smrg
128f1bc02b7Smrg    if (op)
129f1bc02b7Smrg	info->state_2d.op = op;
130f1bc02b7Smrg    if (info->cs)
131f1bc02b7Smrg	info->reemit_current2d = FUNC_NAME(Emit2DState);
132f1bc02b7Smrg}
133f1bc02b7Smrg
134f1bc02b7Smrgstatic void
135f1bc02b7SmrgFUNC_NAME(RADEONDone2D)(PixmapPtr pPix)
136f1bc02b7Smrg{
137f1bc02b7Smrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
138f1bc02b7Smrg    ACCEL_PREAMBLE();
139f1bc02b7Smrg
140f1bc02b7Smrg    TRACE;
141f1bc02b7Smrg
142f1bc02b7Smrg    info->state_2d.op = 0;
143f1bc02b7Smrg    BEGIN_ACCEL(2);
144f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
145f1bc02b7Smrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
146f1bc02b7Smrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
147f1bc02b7Smrg    FINISH_ACCEL();
148f1bc02b7Smrg}
149f1bc02b7Smrg
150209ff23fSmrgstatic Bool
151209ff23fSmrgFUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
152209ff23fSmrg{
153209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
154209ff23fSmrg    uint32_t datatype, dst_pitch_offset;
155209ff23fSmrg
156209ff23fSmrg    TRACE;
157209ff23fSmrg
158209ff23fSmrg    if (pPix->drawable.bitsPerPixel == 24)
159209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported\n"));
160209ff23fSmrg    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
161209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
162209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
163209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
164209ff23fSmrg
165209ff23fSmrg    RADEON_SWITCH_TO_2D();
166209ff23fSmrg
167f1bc02b7Smrg#ifdef XF86DRM_MODE
168f1bc02b7Smrg    if (info->cs) {
169f1bc02b7Smrg	struct radeon_exa_pixmap_priv *driver_priv;
170f1bc02b7Smrg	int ret;
171f1bc02b7Smrg
172f1bc02b7Smrg	radeon_cs_space_reset_bos(info->cs);
173f1bc02b7Smrg
174f1bc02b7Smrg	driver_priv = exaGetPixmapDriverPrivate(pPix);
175f1bc02b7Smrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
176f1bc02b7Smrg
177f1bc02b7Smrg	ret = radeon_cs_space_check(info->cs);
178f1bc02b7Smrg	if (ret)
179f1bc02b7Smrg	    RADEON_FALLBACK(("Not enough RAM to hw accel solid operation\n"));
180f1bc02b7Smrg
181f1bc02b7Smrg	driver_priv = exaGetPixmapDriverPrivate(pPix);
182f1bc02b7Smrg	if (driver_priv)
183f1bc02b7Smrg	    info->state_2d.dst_bo = driver_priv->bo;
184f1bc02b7Smrg    }
185f1bc02b7Smrg#endif
186f1bc02b7Smrg
187f1bc02b7Smrg    info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX |
188f1bc02b7Smrg					       RADEON_DEFAULT_SC_BOTTOM_MAX);
189f1bc02b7Smrg    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
190f1bc02b7Smrg    info->state_2d.dp_src_frgd_clr = 0xffffffff;
191f1bc02b7Smrg    info->state_2d.dp_src_bkgd_clr = 0x00000000;
192f1bc02b7Smrg    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
193f1bc02b7Smrg					  RADEON_GMC_BRUSH_SOLID_COLOR |
194f1bc02b7Smrg					  (datatype << 8) |
195f1bc02b7Smrg					  RADEON_GMC_SRC_DATATYPE_COLOR |
196f1bc02b7Smrg					  RADEON_ROP[alu].pattern |
197f1bc02b7Smrg					  RADEON_GMC_CLR_CMP_CNTL_DIS);
198f1bc02b7Smrg    info->state_2d.dp_brush_frgd_clr = fg;
199f1bc02b7Smrg    info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM);
200f1bc02b7Smrg    info->state_2d.dp_write_mask = pm;
201f1bc02b7Smrg    info->state_2d.dst_pitch_offset = dst_pitch_offset;
202f1bc02b7Smrg    info->state_2d.src_pitch_offset = 0;
203f1bc02b7Smrg    info->state_2d.src_bo = NULL;
204f1bc02b7Smrg
205f1bc02b7Smrg    info->accel_state->dst_pix = pPix;
206f1bc02b7Smrg
207f1bc02b7Smrg    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID);
208209ff23fSmrg
209209ff23fSmrg    return TRUE;
210209ff23fSmrg}
211209ff23fSmrg
212209ff23fSmrg
213209ff23fSmrgstatic void
214209ff23fSmrgFUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
215209ff23fSmrg{
216209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
217209ff23fSmrg    ACCEL_PREAMBLE();
218209ff23fSmrg
219209ff23fSmrg    TRACE;
220209ff23fSmrg
221f1bc02b7Smrg#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
222f1bc02b7Smrg    if (info->cs && CS_FULL(info->cs)) {
223f1bc02b7Smrg	FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix);
224f1bc02b7Smrg	radeon_cs_flush_indirect(pScrn);
225f1bc02b7Smrg    }
226f1bc02b7Smrg#endif
227f1bc02b7Smrg
228b7e1c893Smrg    if (info->accel_state->vsync)
229f1bc02b7Smrg	FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix,
230f1bc02b7Smrg				      radeon_pick_best_crtc(pScrn, x1, x2, y1, y2),
231f1bc02b7Smrg				      y1, y2);
232b7e1c893Smrg
233209ff23fSmrg    BEGIN_ACCEL(2);
234209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
235209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
236209ff23fSmrg    FINISH_ACCEL();
237209ff23fSmrg}
238209ff23fSmrg
239209ff23fSmrgvoid
240209ff23fSmrgFUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
241209ff23fSmrg			       uint32_t dst_pitch_offset, uint32_t datatype, int rop,
242209ff23fSmrg			       Pixel planemask)
243209ff23fSmrg{
244209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
245209ff23fSmrg
246f1bc02b7Smrg    /* setup 2D state */
247f1bc02b7Smrg    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
248f1bc02b7Smrg					  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
249f1bc02b7Smrg					  RADEON_GMC_BRUSH_NONE |
250f1bc02b7Smrg					  (datatype << 8) |
251f1bc02b7Smrg					  RADEON_GMC_SRC_DATATYPE_COLOR |
252f1bc02b7Smrg					  RADEON_ROP[rop].rop |
253f1bc02b7Smrg					  RADEON_DP_SRC_SOURCE_MEMORY |
254f1bc02b7Smrg					  RADEON_GMC_CLR_CMP_CNTL_DIS);
255f1bc02b7Smrg    info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
256f1bc02b7Smrg			       (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0));
257f1bc02b7Smrg    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
258f1bc02b7Smrg    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
259f1bc02b7Smrg    info->state_2d.dp_src_frgd_clr = 0xffffffff;
260f1bc02b7Smrg    info->state_2d.dp_src_bkgd_clr = 0x00000000;
261f1bc02b7Smrg    info->state_2d.dp_write_mask = planemask;
262f1bc02b7Smrg    info->state_2d.dst_pitch_offset = dst_pitch_offset;
263f1bc02b7Smrg    info->state_2d.src_pitch_offset = src_pitch_offset;
264f1bc02b7Smrg    info->state_2d.default_sc_bottom_right =  (RADEON_DEFAULT_SC_RIGHT_MAX
265f1bc02b7Smrg						| RADEON_DEFAULT_SC_BOTTOM_MAX);
266f1bc02b7Smrg
267f1bc02b7Smrg    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY);
268209ff23fSmrg}
269209ff23fSmrg
270209ff23fSmrgstatic Bool
271209ff23fSmrgFUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
272209ff23fSmrg			     int xdir, int ydir,
273209ff23fSmrg			     int rop,
274209ff23fSmrg			     Pixel planemask)
275209ff23fSmrg{
276209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
277209ff23fSmrg    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
278209ff23fSmrg    TRACE;
279209ff23fSmrg
280209ff23fSmrg    if (pDst->drawable.bitsPerPixel == 24)
281209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported"));
282209ff23fSmrg    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
283209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
284209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
285209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
286209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
287209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
288209ff23fSmrg
289f1bc02b7Smrg    RADEON_SWITCH_TO_2D();
290f1bc02b7Smrg
291f1bc02b7Smrg#ifdef XF86DRM_MODE
292f1bc02b7Smrg    if (info->cs) {
293f1bc02b7Smrg	struct radeon_exa_pixmap_priv *driver_priv;
294f1bc02b7Smrg	int ret;
295f1bc02b7Smrg
296f1bc02b7Smrg	radeon_cs_space_reset_bos(info->cs);
297f1bc02b7Smrg
298f1bc02b7Smrg	driver_priv = exaGetPixmapDriverPrivate(pSrc);
299f1bc02b7Smrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
300f1bc02b7Smrg	info->state_2d.src_bo = driver_priv->bo;
301f1bc02b7Smrg
302f1bc02b7Smrg	driver_priv = exaGetPixmapDriverPrivate(pDst);
303f1bc02b7Smrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
304f1bc02b7Smrg	info->state_2d.dst_bo = driver_priv->bo;
305f1bc02b7Smrg
306f1bc02b7Smrg	ret = radeon_cs_space_check(info->cs);
307f1bc02b7Smrg	if (ret)
308f1bc02b7Smrg	    RADEON_FALLBACK(("Not enough RAM to hw accel copy operation\n"));
309f1bc02b7Smrg    }
310f1bc02b7Smrg#endif
311f1bc02b7Smrg
312f1bc02b7Smrg    info->accel_state->xdir = xdir;
313f1bc02b7Smrg    info->accel_state->ydir = ydir;
314f1bc02b7Smrg    info->accel_state->dst_pix = pDst;
315f1bc02b7Smrg
316209ff23fSmrg    FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
317209ff23fSmrg				   datatype, rop, planemask);
318209ff23fSmrg
319209ff23fSmrg    return TRUE;
320209ff23fSmrg}
321209ff23fSmrg
322209ff23fSmrgvoid
323209ff23fSmrgFUNC_NAME(RADEONCopy)(PixmapPtr pDst,
324209ff23fSmrg		      int srcX, int srcY,
325209ff23fSmrg		      int dstX, int dstY,
326209ff23fSmrg		      int w, int h)
327209ff23fSmrg{
328209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
329209ff23fSmrg    ACCEL_PREAMBLE();
330209ff23fSmrg
331209ff23fSmrg    TRACE;
332209ff23fSmrg
333f1bc02b7Smrg#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
334f1bc02b7Smrg    if (info->cs && CS_FULL(info->cs)) {
335f1bc02b7Smrg	FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix);
336f1bc02b7Smrg	radeon_cs_flush_indirect(pScrn);
337f1bc02b7Smrg    }
338f1bc02b7Smrg#endif
339f1bc02b7Smrg
340b7e1c893Smrg    if (info->accel_state->xdir < 0) {
341209ff23fSmrg	srcX += w - 1;
342209ff23fSmrg	dstX += w - 1;
343209ff23fSmrg    }
344b7e1c893Smrg    if (info->accel_state->ydir < 0) {
345209ff23fSmrg	srcY += h - 1;
346209ff23fSmrg	dstY += h - 1;
347209ff23fSmrg    }
348209ff23fSmrg
349f1bc02b7Smrg    if (info->accel_state->vsync)
350f1bc02b7Smrg	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
351f1bc02b7Smrg				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
352f1bc02b7Smrg				      dstY, dstY + h);
353b7e1c893Smrg
354209ff23fSmrg    BEGIN_ACCEL(3);
355209ff23fSmrg
356209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
357209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
358209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
359209ff23fSmrg
360209ff23fSmrg    FINISH_ACCEL();
361209ff23fSmrg}
362209ff23fSmrg
363b7e1c893Smrg#ifdef ACCEL_CP
364b7e1c893Smrg
365209ff23fSmrgstatic Bool
366b7e1c893SmrgRADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h,
367b7e1c893Smrg		       char *src, int src_pitch)
368209ff23fSmrg{
369209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
370209ff23fSmrg    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
371209ff23fSmrg    unsigned int   hpass;
372209ff23fSmrg    uint32_t	   buf_pitch, dst_pitch_off;
373209ff23fSmrg
374209ff23fSmrg    TRACE;
375209ff23fSmrg
376209ff23fSmrg    if (bpp < 8)
377209ff23fSmrg	return FALSE;
378209ff23fSmrg
379209ff23fSmrg    if (info->directRenderingEnabled &&
380209ff23fSmrg	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) {
381209ff23fSmrg	uint8_t *buf;
382209ff23fSmrg	int cpp = bpp / 8;
383209ff23fSmrg	ACCEL_PREAMBLE();
384209ff23fSmrg
385209ff23fSmrg	RADEON_SWITCH_TO_2D();
386b7e1c893Smrg
387b7e1c893Smrg	if (info->accel_state->vsync)
388f1bc02b7Smrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
389f1bc02b7Smrg					  radeon_pick_best_crtc(pScrn, x, x + w, y, y + h),
390f1bc02b7Smrg					  y, y + h);
391b7e1c893Smrg
392209ff23fSmrg	while ((buf = RADEONHostDataBlit(pScrn,
393209ff23fSmrg					 cpp, w, dst_pitch_off, &buf_pitch,
394209ff23fSmrg					 x, &y, (unsigned int*)&h, &hpass)) != 0) {
395209ff23fSmrg	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src,
396209ff23fSmrg				       hpass, buf_pitch, src_pitch);
397209ff23fSmrg	    src += hpass * src_pitch;
398209ff23fSmrg	}
399209ff23fSmrg
400209ff23fSmrg	exaMarkSync(pDst->drawable.pScreen);
401209ff23fSmrg	return TRUE;
402209ff23fSmrg    }
403209ff23fSmrg
404b7e1c893Smrg    return FALSE;
405209ff23fSmrg}
406209ff23fSmrg
407209ff23fSmrg/* Emit blit with arbitrary source and destination offsets and pitches */
408209ff23fSmrgstatic void
409f1bc02b7SmrgRADEONBlitChunk(ScrnInfoPtr pScrn, struct radeon_bo *src_bo,
410f1bc02b7Smrg                struct radeon_bo *dst_bo, uint32_t datatype,
411f1bc02b7Smrg                uint32_t src_pitch_offset, uint32_t dst_pitch_offset,
412f1bc02b7Smrg                int srcX, int srcY, int dstX, int dstY, int w, int h,
413f1bc02b7Smrg                uint32_t src_domain, uint32_t dst_domain)
414209ff23fSmrg{
415209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
416209ff23fSmrg    ACCEL_PREAMBLE();
417209ff23fSmrg
418f1bc02b7Smrg    if (src_bo && dst_bo) {
419f1bc02b7Smrg        BEGIN_ACCEL_RELOC(6, 2);
420f1bc02b7Smrg    } else if (src_bo && dst_bo == NULL) {
421f1bc02b7Smrg        BEGIN_ACCEL_RELOC(6, 1);
422f1bc02b7Smrg    } else {
423f1bc02b7Smrg        BEGIN_ACCEL(6);
424f1bc02b7Smrg    }
425209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
426209ff23fSmrg		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
427209ff23fSmrg		  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
428209ff23fSmrg		  RADEON_GMC_BRUSH_NONE |
429209ff23fSmrg		  (datatype << 8) |
430209ff23fSmrg		  RADEON_GMC_SRC_DATATYPE_COLOR |
431209ff23fSmrg		  RADEON_ROP3_S |
432209ff23fSmrg		  RADEON_DP_SRC_SOURCE_MEMORY |
433209ff23fSmrg		  RADEON_GMC_CLR_CMP_CNTL_DIS |
434209ff23fSmrg		  RADEON_GMC_WR_MSK_DIS);
435209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
436f1bc02b7Smrg    if (src_bo) {
437f1bc02b7Smrg	OUT_RELOC(src_bo, src_domain, 0);
438f1bc02b7Smrg    }
439209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
440f1bc02b7Smrg    if (dst_bo) {
441f1bc02b7Smrg	OUT_RELOC(dst_bo, 0, dst_domain);
442f1bc02b7Smrg    }
443209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
444209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
445209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
446209ff23fSmrg    FINISH_ACCEL();
447209ff23fSmrg    BEGIN_ACCEL(2);
448209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
449209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
450209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
451209ff23fSmrg    FINISH_ACCEL();
452209ff23fSmrg}
453b7e1c893Smrg
454f1bc02b7Smrg#if defined(XF86DRM_MODE)
455f1bc02b7Smrgstatic Bool
456f1bc02b7SmrgRADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
457f1bc02b7Smrg		       char *src, int src_pitch)
458f1bc02b7Smrg{
459f1bc02b7Smrg    ScreenPtr pScreen = pDst->drawable.pScreen;
460f1bc02b7Smrg    RINFO_FROM_SCREEN(pScreen);
461f1bc02b7Smrg    struct radeon_exa_pixmap_priv *driver_priv;
46269d0ef43Smrg    struct radeon_bo *scratch = NULL;
46369d0ef43Smrg    struct radeon_bo *copy_dst;
464f1bc02b7Smrg    unsigned char *dst;
465f1bc02b7Smrg    unsigned size;
466f1bc02b7Smrg    uint32_t datatype = 0;
467f1bc02b7Smrg    uint32_t dst_domain;
468f1bc02b7Smrg    uint32_t dst_pitch_offset;
469f1bc02b7Smrg    unsigned bpp = pDst->drawable.bitsPerPixel;
470f1bc02b7Smrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
47169d0ef43Smrg    uint32_t copy_pitch;
472f1bc02b7Smrg    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
47369d0ef43Smrg    int ret;
47469d0ef43Smrg    Bool flush = TRUE;
475f1bc02b7Smrg    Bool r;
476f1bc02b7Smrg    int i;
47770cce690Smrg    uint32_t tiling_flags = 0, pitch = 0;
478f1bc02b7Smrg
479f1bc02b7Smrg    if (bpp < 8)
480f1bc02b7Smrg	return FALSE;
481f1bc02b7Smrg
482f1bc02b7Smrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
483f1bc02b7Smrg    if (!driver_priv || !driver_priv->bo)
484f1bc02b7Smrg	return FALSE;
485f1bc02b7Smrg
48670cce690Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
48770cce690Smrg    if (ret)
48870cce690Smrg	ErrorF("radeon_bo_get_tiling failed\n");
48970cce690Smrg
490f1bc02b7Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
491f1bc02b7Smrg    switch (bpp) {
492f1bc02b7Smrg    case 32:
493f1bc02b7Smrg	swap = RADEON_HOST_DATA_SWAP_32BIT;
494f1bc02b7Smrg	break;
495f1bc02b7Smrg    case 16:
496f1bc02b7Smrg	swap = RADEON_HOST_DATA_SWAP_16BIT;
497f1bc02b7Smrg	break;
498f1bc02b7Smrg    }
499f1bc02b7Smrg#endif
500f1bc02b7Smrg
50169d0ef43Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
50269d0ef43Smrg    copy_dst = driver_priv->bo;
50369d0ef43Smrg    copy_pitch = pDst->devKind;
50470cce690Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
50570cce690Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
50670cce690Smrg	    flush = FALSE;
50770cce690Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
50870cce690Smrg		goto copy;
50970cce690Smrg	}
510f1bc02b7Smrg    }
511f1bc02b7Smrg
512f1bc02b7Smrg    size = scratch_pitch * h;
513f1bc02b7Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
514f1bc02b7Smrg    if (scratch == NULL) {
51569d0ef43Smrg	goto copy;
516f1bc02b7Smrg    }
517f1bc02b7Smrg    radeon_cs_space_reset_bos(info->cs);
518f1bc02b7Smrg    radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
519f1bc02b7Smrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
52069d0ef43Smrg    ret = radeon_cs_space_check(info->cs);
52169d0ef43Smrg    if (ret) {
52269d0ef43Smrg	goto copy;
523f1bc02b7Smrg    }
52469d0ef43Smrg    copy_dst = scratch;
52569d0ef43Smrg    copy_pitch = scratch_pitch;
52669d0ef43Smrg    flush = FALSE;
527f1bc02b7Smrg
528f1bc02b7Smrgcopy:
52969d0ef43Smrg    if (flush)
53069d0ef43Smrg	radeon_cs_flush_indirect(pScrn);
53169d0ef43Smrg
53269d0ef43Smrg    ret = radeon_bo_map(copy_dst, 0);
53369d0ef43Smrg    if (ret) {
534f1bc02b7Smrg        r = FALSE;
535f1bc02b7Smrg        goto out;
536f1bc02b7Smrg    }
537f1bc02b7Smrg    r = TRUE;
538f1bc02b7Smrg    size = w * bpp / 8;
53969d0ef43Smrg    dst = copy_dst->ptr;
54069d0ef43Smrg    if (copy_dst == driver_priv->bo)
54169d0ef43Smrg	dst += y * copy_pitch + x * bpp / 8;
542f1bc02b7Smrg    for (i = 0; i < h; i++) {
54369d0ef43Smrg        RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap);
544f1bc02b7Smrg        src += src_pitch;
545f1bc02b7Smrg    }
54669d0ef43Smrg    radeon_bo_unmap(copy_dst);
547f1bc02b7Smrg
54869d0ef43Smrg    if (copy_dst == scratch) {
549f1bc02b7Smrg	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
550f1bc02b7Smrg	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
551f1bc02b7Smrg	ACCEL_PREAMBLE();
552f1bc02b7Smrg	RADEON_SWITCH_TO_2D();
553f1bc02b7Smrg	RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
554f1bc02b7Smrg			dst_pitch_offset, 0, 0, x, y, w, h,
555f1bc02b7Smrg			RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
556f1bc02b7Smrg    }
557f1bc02b7Smrg
558f1bc02b7Smrgout:
55969d0ef43Smrg    if (scratch)
560f1bc02b7Smrg	radeon_bo_unref(scratch);
561f1bc02b7Smrg    return r;
562f1bc02b7Smrg}
563f1bc02b7Smrg
564f1bc02b7Smrgstatic Bool
565f1bc02b7SmrgRADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
566f1bc02b7Smrg                           int h, char *dst, int dst_pitch)
567f1bc02b7Smrg{
568f1bc02b7Smrg    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
569f1bc02b7Smrg    struct radeon_exa_pixmap_priv *driver_priv;
57069d0ef43Smrg    struct radeon_bo *scratch = NULL;
57169d0ef43Smrg    struct radeon_bo *copy_src;
572f1bc02b7Smrg    unsigned size;
573f1bc02b7Smrg    uint32_t datatype = 0;
574f1bc02b7Smrg    uint32_t src_domain = 0;
575f1bc02b7Smrg    uint32_t src_pitch_offset;
576f1bc02b7Smrg    unsigned bpp = pSrc->drawable.bitsPerPixel;
577f1bc02b7Smrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
57869d0ef43Smrg    uint32_t copy_pitch;
579f1bc02b7Smrg    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
58069d0ef43Smrg    int ret;
58169d0ef43Smrg    Bool flush = FALSE;
582f1bc02b7Smrg    Bool r;
58370cce690Smrg    uint32_t tiling_flags = 0, pitch = 0;
584f1bc02b7Smrg
585f1bc02b7Smrg    if (bpp < 8)
586f1bc02b7Smrg	return FALSE;
587f1bc02b7Smrg
588f1bc02b7Smrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
589f1bc02b7Smrg    if (!driver_priv || !driver_priv->bo)
590f1bc02b7Smrg	return FALSE;
591f1bc02b7Smrg
59270cce690Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
59370cce690Smrg    if (ret)
59470cce690Smrg	ErrorF("radeon_bo_get_tiling failed\n");
59570cce690Smrg
596f1bc02b7Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
597f1bc02b7Smrg    switch (bpp) {
598f1bc02b7Smrg    case 32:
599f1bc02b7Smrg	swap = RADEON_HOST_DATA_SWAP_32BIT;
600f1bc02b7Smrg	break;
601f1bc02b7Smrg    case 16:
602f1bc02b7Smrg	swap = RADEON_HOST_DATA_SWAP_16BIT;
603f1bc02b7Smrg	break;
604f1bc02b7Smrg    }
605f1bc02b7Smrg#endif
606f1bc02b7Smrg
60769d0ef43Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
60869d0ef43Smrg    copy_src = driver_priv->bo;
60969d0ef43Smrg    copy_pitch = pSrc->devKind;
61070cce690Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
61170cce690Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
61270cce690Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
61370cce690Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
61470cce690Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
61570cce690Smrg		src_domain = 0;
61670cce690Smrg	    else /* A write may be scheduled */
61770cce690Smrg		flush = TRUE;
61870cce690Smrg	}
619f1bc02b7Smrg
62070cce690Smrg	if (!src_domain)
62170cce690Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
622f1bc02b7Smrg
62370cce690Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
62470cce690Smrg	    goto copy;
625f1bc02b7Smrg    }
626f1bc02b7Smrg    size = scratch_pitch * h;
627f1bc02b7Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
628f1bc02b7Smrg    if (scratch == NULL) {
62969d0ef43Smrg	goto copy;
630f1bc02b7Smrg    }
631f1bc02b7Smrg    radeon_cs_space_reset_bos(info->cs);
632f1bc02b7Smrg    radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
633f1bc02b7Smrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
63469d0ef43Smrg    ret = radeon_cs_space_check(info->cs);
63569d0ef43Smrg    if (ret) {
63669d0ef43Smrg	goto copy;
637f1bc02b7Smrg    }
638f1bc02b7Smrg    RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
639f1bc02b7Smrg    RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
640f1bc02b7Smrg    ACCEL_PREAMBLE();
641f1bc02b7Smrg    RADEON_SWITCH_TO_2D();
642f1bc02b7Smrg    RADEONBlitChunk(pScrn, driver_priv->bo, scratch, datatype, src_pitch_offset,
643f1bc02b7Smrg                    scratch_pitch << 16, x, y, 0, 0, w, h,
644f1bc02b7Smrg                    RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
645f1bc02b7Smrg                    RADEON_GEM_DOMAIN_GTT);
64669d0ef43Smrg    copy_src = scratch;
64769d0ef43Smrg    copy_pitch = scratch_pitch;
64869d0ef43Smrg    flush = TRUE;
649f1bc02b7Smrg
650f1bc02b7Smrgcopy:
65169d0ef43Smrg    if (flush)
65269d0ef43Smrg	FLUSH_RING();
65369d0ef43Smrg
65469d0ef43Smrg    ret = radeon_bo_map(copy_src, 0);
65569d0ef43Smrg    if (ret) {
65669d0ef43Smrg	ErrorF("failed to map pixmap: %d\n", ret);
657f1bc02b7Smrg        r = FALSE;
658f1bc02b7Smrg        goto out;
659f1bc02b7Smrg    }
660f1bc02b7Smrg    r = TRUE;
661f1bc02b7Smrg    w *= bpp / 8;
66269d0ef43Smrg    if (copy_src == driver_priv->bo)
66369d0ef43Smrg	size = y * copy_pitch + x * bpp / 8;
664f1bc02b7Smrg    else
665f1bc02b7Smrg	size = 0;
666f1bc02b7Smrg    while (h--) {
66769d0ef43Smrg        RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap);
66869d0ef43Smrg        size += copy_pitch;
669f1bc02b7Smrg        dst += dst_pitch;
670f1bc02b7Smrg    }
67169d0ef43Smrg    radeon_bo_unmap(copy_src);
672f1bc02b7Smrgout:
67369d0ef43Smrg    if (scratch)
674f1bc02b7Smrg	radeon_bo_unref(scratch);
675f1bc02b7Smrg    return r;
676f1bc02b7Smrg}
677f1bc02b7Smrg#endif
678209ff23fSmrg
679209ff23fSmrgstatic Bool
680b7e1c893SmrgRADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h,
681209ff23fSmrg				    char *dst, int dst_pitch)
682209ff23fSmrg{
683209ff23fSmrg    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
684209ff23fSmrg    uint8_t	  *src	     = info->FB + exaGetPixmapOffset(pSrc);
685209ff23fSmrg    int		   bpp	     = pSrc->drawable.bitsPerPixel;
686f1bc02b7Smrg    uint32_t datatype, src_pitch_offset, scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64), scratch_off = 0;
687209ff23fSmrg    drmBufPtr scratch;
688209ff23fSmrg
689209ff23fSmrg    TRACE;
690209ff23fSmrg
691209ff23fSmrg    /*
692209ff23fSmrg     * Try to accelerate download. Use an indirect buffer as scratch space,
693209ff23fSmrg     * blitting the bits to one half while copying them out of the other one and
694209ff23fSmrg     * then swapping the halves.
695209ff23fSmrg     */
696b7e1c893Smrg    if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
697209ff23fSmrg	RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
698209ff23fSmrg	(scratch = RADEONCPGetBuffer(pScrn)))
699209ff23fSmrg    {
700209ff23fSmrg	int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
701209ff23fSmrg	int hpass = min(h, scratch->total/2 / scratch_pitch);
702209ff23fSmrg	uint32_t scratch_pitch_offset = scratch_pitch << 16
703b7e1c893Smrg				    | (info->gartLocation + info->dri->bufStart
704209ff23fSmrg				       + scratch->idx * scratch->total) >> 10;
705b7e1c893Smrg	drm_radeon_indirect_t indirect;
706209ff23fSmrg	ACCEL_PREAMBLE();
707209ff23fSmrg
708209ff23fSmrg	RADEON_SWITCH_TO_2D();
709209ff23fSmrg
710209ff23fSmrg	/* Kick the first blit as early as possible */
711f1bc02b7Smrg	RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
712f1bc02b7Smrg                        scratch_pitch_offset, x, y, 0, 0, w, hpass, 0, 0);
713209ff23fSmrg	FLUSH_RING();
714209ff23fSmrg
715209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
716209ff23fSmrg	switch (bpp) {
717209ff23fSmrg	case 16:
718209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_16BIT;
719209ff23fSmrg	  break;
720209ff23fSmrg	case 32:
721209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_32BIT;
722209ff23fSmrg	  break;
723209ff23fSmrg	}
724209ff23fSmrg#endif
725209ff23fSmrg
726209ff23fSmrg	while (h) {
727209ff23fSmrg	    int oldhpass = hpass, i = 0;
728209ff23fSmrg
729209ff23fSmrg	    src = (uint8_t*)scratch->address + scratch_off;
730209ff23fSmrg
731209ff23fSmrg	    y += oldhpass;
732209ff23fSmrg	    h -= oldhpass;
733209ff23fSmrg	    hpass = min(h, scratch->total/2 / scratch_pitch);
734209ff23fSmrg
735209ff23fSmrg	    /* Prepare next blit if anything's left */
736209ff23fSmrg	    if (hpass) {
737209ff23fSmrg		scratch_off = scratch->total/2 - scratch_off;
738f1bc02b7Smrg		RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
739f1bc02b7Smrg                                scratch_pitch_offset + (scratch_off >> 10),
740f1bc02b7Smrg				x, y, 0, 0, w, hpass, 0, 0);
741209ff23fSmrg	    }
742209ff23fSmrg
743209ff23fSmrg	    /*
744209ff23fSmrg	     * Wait for previous blit to complete.
745209ff23fSmrg	     *
746209ff23fSmrg	     * XXX: Doing here essentially the same things this ioctl does in
747209ff23fSmrg	     * the DRM results in corruption with 'small' transfers, apparently
748209ff23fSmrg	     * because the data doesn't actually land in system RAM before the
749209ff23fSmrg	     * memcpy. I suspect the ioctl helps mostly due to its latency; what
750209ff23fSmrg	     * we'd really need is a way to reliably wait for the host interface
751209ff23fSmrg	     * to be done with pushing the data to the host.
752209ff23fSmrg	     */
753b7e1c893Smrg	    while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY)
754209ff23fSmrg		   && (i++ < RADEON_TIMEOUT))
755209ff23fSmrg		;
756209ff23fSmrg
757209ff23fSmrg	    /* Kick next blit */
758209ff23fSmrg	    if (hpass)
759209ff23fSmrg		FLUSH_RING();
760209ff23fSmrg
761209ff23fSmrg	    /* Copy out data from previous blit */
762209ff23fSmrg	    if (wpass == scratch_pitch && wpass == dst_pitch) {
763209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap);
764209ff23fSmrg		dst += dst_pitch * oldhpass;
765209ff23fSmrg	    } else while (oldhpass--) {
766209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass, swap);
767209ff23fSmrg		src += scratch_pitch;
768209ff23fSmrg		dst += dst_pitch;
769209ff23fSmrg	    }
770209ff23fSmrg	}
771209ff23fSmrg
772209ff23fSmrg	indirect.idx = scratch->idx;
773209ff23fSmrg	indirect.start = indirect.end = 0;
774209ff23fSmrg	indirect.discard = 1;
775209ff23fSmrg
776b7e1c893Smrg	drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
777b7e1c893Smrg			    &indirect, sizeof(drm_radeon_indirect_t));
778209ff23fSmrg
779b7e1c893Smrg	info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker;
780209ff23fSmrg
781209ff23fSmrg	return TRUE;
782209ff23fSmrg    }
783209ff23fSmrg
784b7e1c893Smrg    return FALSE;
785b7e1c893Smrg}
786209ff23fSmrg
787b7e1c893Smrg#endif	/* def ACCEL_CP */
788209ff23fSmrg
789209ff23fSmrg
790209ff23fSmrgBool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
791209ff23fSmrg{
792209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
793209ff23fSmrg
794b7e1c893Smrg    if (info->accel_state->exa == NULL) {
795209ff23fSmrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
796209ff23fSmrg	return FALSE;
797209ff23fSmrg    }
798209ff23fSmrg
799b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
800b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
801209ff23fSmrg
802b7e1c893Smrg    info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
803b7e1c893Smrg    info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid);
804f1bc02b7Smrg    info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDone2D);
805209ff23fSmrg
806b7e1c893Smrg    info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
807b7e1c893Smrg    info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy);
808f1bc02b7Smrg    info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDone2D);
809209ff23fSmrg
810b7e1c893Smrg    info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync);
811b7e1c893Smrg    info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync);
812b7e1c893Smrg#ifdef ACCEL_CP
813f1bc02b7Smrg    if (!info->kms_enabled) {
814f1bc02b7Smrg	info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP;
815f1bc02b7Smrg	if (info->accelDFS)
816f1bc02b7Smrg	    info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP;
817f1bc02b7Smrg    }
818f1bc02b7Smrg# if defined(XF86DRM_MODE)
819f1bc02b7Smrg    else {
820f1bc02b7Smrg	info->accel_state->exa->UploadToScreen = &RADEONUploadToScreenCS;
821f1bc02b7Smrg        info->accel_state->exa->DownloadFromScreen = &RADEONDownloadFromScreenCS;
822f1bc02b7Smrg    }
823f1bc02b7Smrg# endif
824b7e1c893Smrg#endif
825209ff23fSmrg
826209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
827f1bc02b7Smrg    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE;
828f1bc02b7Smrg    info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE;
829f1bc02b7Smrg#endif
830209ff23fSmrg
831b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
832b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
833b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
834b7e1c893Smrg#endif
835f1bc02b7Smrg#ifdef EXA_SUPPORTS_OFFSCREEN_OVERLAPS
836f1bc02b7Smrg    /* The 2D engine supports overlapping memory areas */
837f1bc02b7Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_OFFSCREEN_OVERLAPS;
838f1bc02b7Smrg#endif
839f1bc02b7Smrg    info->accel_state->exa->pixmapOffsetAlign = RADEON_GPU_PAGE_SIZE;
840b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 64;
841209ff23fSmrg
842f1bc02b7Smrg#ifdef EXA_HANDLES_PIXMAPS
843f1bc02b7Smrg    if (info->cs) {
844f1bc02b7Smrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
845f1bc02b7Smrg#ifdef EXA_MIXED_PIXMAPS
846f1bc02b7Smrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
847f1bc02b7Smrg#endif
848f1bc02b7Smrg    }
849f1bc02b7Smrg#endif
850f1bc02b7Smrg
851209ff23fSmrg#ifdef RENDER
852209ff23fSmrg    if (info->RenderAccel) {
853f1bc02b7Smrg	if (IS_R300_3D || IS_R500_3D) {
854209ff23fSmrg	    if ((info->ChipFamily < CHIP_FAMILY_RS400)
855209ff23fSmrg#ifdef XF86DRI
856209ff23fSmrg		|| (info->directRenderingEnabled)
857209ff23fSmrg#endif
858209ff23fSmrg		) {
859209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
860209ff23fSmrg			       "enabled for R300/R400/R500 type cards.\n");
861b7e1c893Smrg		info->accel_state->exa->CheckComposite = R300CheckComposite;
862b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
863209ff23fSmrg		    FUNC_NAME(R300PrepareComposite);
864b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
865b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
866209ff23fSmrg	    } else
867209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
868f1bc02b7Smrg	} else if (IS_R200_3D) {
869209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
870209ff23fSmrg			       "enabled for R200 type cards.\n");
871b7e1c893Smrg		info->accel_state->exa->CheckComposite = R200CheckComposite;
872b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
873209ff23fSmrg		    FUNC_NAME(R200PrepareComposite);
874b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
875b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
876209ff23fSmrg	} else {
877209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
878209ff23fSmrg			       "enabled for R100 type cards.\n");
879b7e1c893Smrg		info->accel_state->exa->CheckComposite = R100CheckComposite;
880b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
881209ff23fSmrg		    FUNC_NAME(R100PrepareComposite);
882b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
883b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
884209ff23fSmrg	}
885209ff23fSmrg    }
886209ff23fSmrg#endif
887209ff23fSmrg
888f1bc02b7Smrg#ifdef XF86DRM_MODE
889f1bc02b7Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
890f1bc02b7Smrg    if (info->cs) {
891f1bc02b7Smrg        info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
892f1bc02b7Smrg        info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
893f1bc02b7Smrg        info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
894f1bc02b7Smrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
895f1bc02b7Smrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
896f1bc02b7Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
897f1bc02b7Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
898f1bc02b7Smrg#endif
899f1bc02b7Smrg    }
900f1bc02b7Smrg#endif
901f1bc02b7Smrg#endif
902f1bc02b7Smrg
903f1bc02b7Smrg
904209ff23fSmrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
905209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
906209ff23fSmrg
907b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 16320;
908a4f79855Smrg    info->accel_state->exa->maxX = 8191;
909209ff23fSmrg#else
910b7e1c893Smrg    info->accel_state->exa->maxX = 16320 / 4;
911209ff23fSmrg#endif
912a4f79855Smrg    info->accel_state->exa->maxY = 8191;
913b7e1c893Smrg
914b7e1c893Smrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
915b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
916b7e1c893Smrg	info->accel_state->vsync = TRUE;
917b7e1c893Smrg    } else
918b7e1c893Smrg	info->accel_state->vsync = FALSE;
919209ff23fSmrg
920209ff23fSmrg    RADEONEngineInit(pScrn);
921209ff23fSmrg
922b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
92351b40f85Smrg	free(info->accel_state->exa);
924209ff23fSmrg	return FALSE;
925209ff23fSmrg    }
926209ff23fSmrg    exaMarkSync(pScreen);
927209ff23fSmrg
928209ff23fSmrg    return TRUE;
929209ff23fSmrg}
930209ff23fSmrg
931209ff23fSmrg#undef FUNC_NAME
932