radeon_exa_funcs.c revision 70cce690
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * Copyright 2006 Tungsten Graphics, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 *    Eric Anholt <anholt@FreeBSD.org>
28 *    Zack Rusin <zrusin@trolltech.com>
29 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
30 *    Michel Dänzer <michel@tungstengraphics.com>
31 *
32 */
33
34#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
35#error Cannot define both MMIO and CP acceleration!
36#endif
37
38#if !defined(UNIXCPP) || defined(ANSICPP)
39#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
40#else
41#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
42#endif
43
44#ifdef ACCEL_MMIO
45#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
46#else
47#ifdef ACCEL_CP
48#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
49#else
50#error No accel type defined!
51#endif
52#endif
53
54#include <errno.h>
55#include <string.h>
56
57#include "radeon.h"
58
59#include "exa.h"
60
61static int
62FUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen)
63{
64    RINFO_FROM_SCREEN(pScreen);
65
66    TRACE;
67
68    return ++info->accel_state->exaSyncMarker;
69}
70
71static void
72FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
73{
74    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75    RADEONInfoPtr info = RADEONPTR(pScrn);
76
77    if (info->cs)
78	    return;
79
80    TRACE;
81
82    if (info->accel_state->exaMarkerSynced != marker) {
83	FUNC_NAME(RADEONWaitForIdle)(pScrn);
84	info->accel_state->exaMarkerSynced = marker;
85    }
86
87    RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
88}
89
90static void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op)
91{
92    RADEONInfoPtr info = RADEONPTR(pScrn);
93    int has_src;
94    ACCEL_PREAMBLE();
95
96    /* don't emit if no operation in progress */
97    if (info->state_2d.op == 0 && op == 0)
98	return;
99
100    has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo);
101
102    if (has_src) {
103      BEGIN_ACCEL_RELOC(10, 2);
104    } else {
105      BEGIN_ACCEL_RELOC(9, 1);
106    }
107    OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
108    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
109    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
110    OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
111    OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
112    OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
113    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask);
114    OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl);
115
116    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
117    if (info->cs)
118	OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
119
120    if (has_src) {
121	    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
122	    if (info->cs)
123		OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
124
125    }
126    FINISH_ACCEL();
127
128    if (op)
129	info->state_2d.op = op;
130    if (info->cs)
131	info->reemit_current2d = FUNC_NAME(Emit2DState);
132}
133
134static void
135FUNC_NAME(RADEONDone2D)(PixmapPtr pPix)
136{
137    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
138    ACCEL_PREAMBLE();
139
140    TRACE;
141
142    info->state_2d.op = 0;
143    BEGIN_ACCEL(2);
144    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
145    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
146                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
147    FINISH_ACCEL();
148}
149
150static Bool
151FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
152{
153    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
154    uint32_t datatype, dst_pitch_offset;
155
156    TRACE;
157
158    if (pPix->drawable.bitsPerPixel == 24)
159	RADEON_FALLBACK(("24bpp unsupported\n"));
160    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
161	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
162    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
163	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
164
165    RADEON_SWITCH_TO_2D();
166
167#ifdef XF86DRM_MODE
168    if (info->cs) {
169	struct radeon_exa_pixmap_priv *driver_priv;
170	int ret;
171
172	radeon_cs_space_reset_bos(info->cs);
173
174	driver_priv = exaGetPixmapDriverPrivate(pPix);
175	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
176
177	ret = radeon_cs_space_check(info->cs);
178	if (ret)
179	    RADEON_FALLBACK(("Not enough RAM to hw accel solid operation\n"));
180
181	driver_priv = exaGetPixmapDriverPrivate(pPix);
182	if (driver_priv)
183	    info->state_2d.dst_bo = driver_priv->bo;
184    }
185#endif
186
187    info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX |
188					       RADEON_DEFAULT_SC_BOTTOM_MAX);
189    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
190    info->state_2d.dp_src_frgd_clr = 0xffffffff;
191    info->state_2d.dp_src_bkgd_clr = 0x00000000;
192    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
193					  RADEON_GMC_BRUSH_SOLID_COLOR |
194					  (datatype << 8) |
195					  RADEON_GMC_SRC_DATATYPE_COLOR |
196					  RADEON_ROP[alu].pattern |
197					  RADEON_GMC_CLR_CMP_CNTL_DIS);
198    info->state_2d.dp_brush_frgd_clr = fg;
199    info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM);
200    info->state_2d.dp_write_mask = pm;
201    info->state_2d.dst_pitch_offset = dst_pitch_offset;
202    info->state_2d.src_pitch_offset = 0;
203    info->state_2d.src_bo = NULL;
204
205    info->accel_state->dst_pix = pPix;
206
207    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID);
208
209    return TRUE;
210}
211
212
213static void
214FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
215{
216    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
217    ACCEL_PREAMBLE();
218
219    TRACE;
220
221#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
222    if (info->cs && CS_FULL(info->cs)) {
223	FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix);
224	radeon_cs_flush_indirect(pScrn);
225    }
226#endif
227
228    if (info->accel_state->vsync)
229	FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix,
230				      radeon_pick_best_crtc(pScrn, x1, x2, y1, y2),
231				      y1, y2);
232
233    BEGIN_ACCEL(2);
234    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
235    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
236    FINISH_ACCEL();
237}
238
239void
240FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
241			       uint32_t dst_pitch_offset, uint32_t datatype, int rop,
242			       Pixel planemask)
243{
244    RADEONInfoPtr info = RADEONPTR(pScrn);
245
246    /* setup 2D state */
247    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
248					  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
249					  RADEON_GMC_BRUSH_NONE |
250					  (datatype << 8) |
251					  RADEON_GMC_SRC_DATATYPE_COLOR |
252					  RADEON_ROP[rop].rop |
253					  RADEON_DP_SRC_SOURCE_MEMORY |
254					  RADEON_GMC_CLR_CMP_CNTL_DIS);
255    info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
256			       (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0));
257    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
258    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
259    info->state_2d.dp_src_frgd_clr = 0xffffffff;
260    info->state_2d.dp_src_bkgd_clr = 0x00000000;
261    info->state_2d.dp_write_mask = planemask;
262    info->state_2d.dst_pitch_offset = dst_pitch_offset;
263    info->state_2d.src_pitch_offset = src_pitch_offset;
264    info->state_2d.default_sc_bottom_right =  (RADEON_DEFAULT_SC_RIGHT_MAX
265						| RADEON_DEFAULT_SC_BOTTOM_MAX);
266
267    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY);
268}
269
270static Bool
271FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
272			     int xdir, int ydir,
273			     int rop,
274			     Pixel planemask)
275{
276    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
277    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
278    TRACE;
279
280    if (pDst->drawable.bitsPerPixel == 24)
281	RADEON_FALLBACK(("24bpp unsupported"));
282    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
283	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
284    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
285	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
286    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
287	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
288
289    RADEON_SWITCH_TO_2D();
290
291#ifdef XF86DRM_MODE
292    if (info->cs) {
293	struct radeon_exa_pixmap_priv *driver_priv;
294	int ret;
295
296	radeon_cs_space_reset_bos(info->cs);
297
298	driver_priv = exaGetPixmapDriverPrivate(pSrc);
299	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
300	info->state_2d.src_bo = driver_priv->bo;
301
302	driver_priv = exaGetPixmapDriverPrivate(pDst);
303	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
304	info->state_2d.dst_bo = driver_priv->bo;
305
306	ret = radeon_cs_space_check(info->cs);
307	if (ret)
308	    RADEON_FALLBACK(("Not enough RAM to hw accel copy operation\n"));
309    }
310#endif
311
312    info->accel_state->xdir = xdir;
313    info->accel_state->ydir = ydir;
314    info->accel_state->dst_pix = pDst;
315
316    FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
317				   datatype, rop, planemask);
318
319    return TRUE;
320}
321
322void
323FUNC_NAME(RADEONCopy)(PixmapPtr pDst,
324		      int srcX, int srcY,
325		      int dstX, int dstY,
326		      int w, int h)
327{
328    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
329    ACCEL_PREAMBLE();
330
331    TRACE;
332
333#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
334    if (info->cs && CS_FULL(info->cs)) {
335	FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix);
336	radeon_cs_flush_indirect(pScrn);
337    }
338#endif
339
340    if (info->accel_state->xdir < 0) {
341	srcX += w - 1;
342	dstX += w - 1;
343    }
344    if (info->accel_state->ydir < 0) {
345	srcY += h - 1;
346	dstY += h - 1;
347    }
348
349    if (info->accel_state->vsync)
350	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
351				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
352				      dstY, dstY + h);
353
354    BEGIN_ACCEL(3);
355
356    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
357    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
358    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
359
360    FINISH_ACCEL();
361}
362
363#ifdef ACCEL_CP
364
365static Bool
366RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h,
367		       char *src, int src_pitch)
368{
369    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
370    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
371    unsigned int   hpass;
372    uint32_t	   buf_pitch, dst_pitch_off;
373
374    TRACE;
375
376    if (bpp < 8)
377	return FALSE;
378
379    if (info->directRenderingEnabled &&
380	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) {
381	uint8_t *buf;
382	int cpp = bpp / 8;
383	ACCEL_PREAMBLE();
384
385	RADEON_SWITCH_TO_2D();
386
387	if (info->accel_state->vsync)
388	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
389					  radeon_pick_best_crtc(pScrn, x, x + w, y, y + h),
390					  y, y + h);
391
392	while ((buf = RADEONHostDataBlit(pScrn,
393					 cpp, w, dst_pitch_off, &buf_pitch,
394					 x, &y, (unsigned int*)&h, &hpass)) != 0) {
395	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src,
396				       hpass, buf_pitch, src_pitch);
397	    src += hpass * src_pitch;
398	}
399
400	exaMarkSync(pDst->drawable.pScreen);
401	return TRUE;
402    }
403
404    return FALSE;
405}
406
407/* Emit blit with arbitrary source and destination offsets and pitches */
408static void
409RADEONBlitChunk(ScrnInfoPtr pScrn, struct radeon_bo *src_bo,
410                struct radeon_bo *dst_bo, uint32_t datatype,
411                uint32_t src_pitch_offset, uint32_t dst_pitch_offset,
412                int srcX, int srcY, int dstX, int dstY, int w, int h,
413                uint32_t src_domain, uint32_t dst_domain)
414{
415    RADEONInfoPtr info = RADEONPTR(pScrn);
416    ACCEL_PREAMBLE();
417
418    if (src_bo && dst_bo) {
419        BEGIN_ACCEL_RELOC(6, 2);
420    } else if (src_bo && dst_bo == NULL) {
421        BEGIN_ACCEL_RELOC(6, 1);
422    } else {
423        BEGIN_ACCEL(6);
424    }
425    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
426		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
427		  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
428		  RADEON_GMC_BRUSH_NONE |
429		  (datatype << 8) |
430		  RADEON_GMC_SRC_DATATYPE_COLOR |
431		  RADEON_ROP3_S |
432		  RADEON_DP_SRC_SOURCE_MEMORY |
433		  RADEON_GMC_CLR_CMP_CNTL_DIS |
434		  RADEON_GMC_WR_MSK_DIS);
435    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
436    if (src_bo) {
437	OUT_RELOC(src_bo, src_domain, 0);
438    }
439    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
440    if (dst_bo) {
441	OUT_RELOC(dst_bo, 0, dst_domain);
442    }
443    OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
444    OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
445    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
446    FINISH_ACCEL();
447    BEGIN_ACCEL(2);
448    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
449    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
450                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
451    FINISH_ACCEL();
452}
453
454#if defined(XF86DRM_MODE)
455static Bool
456RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
457		       char *src, int src_pitch)
458{
459    ScreenPtr pScreen = pDst->drawable.pScreen;
460    RINFO_FROM_SCREEN(pScreen);
461    struct radeon_exa_pixmap_priv *driver_priv;
462    struct radeon_bo *scratch = NULL;
463    struct radeon_bo *copy_dst;
464    unsigned char *dst;
465    unsigned size;
466    uint32_t datatype = 0;
467    uint32_t dst_domain;
468    uint32_t dst_pitch_offset;
469    unsigned bpp = pDst->drawable.bitsPerPixel;
470    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
471    uint32_t copy_pitch;
472    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
473    int ret;
474    Bool flush = TRUE;
475    Bool r;
476    int i;
477    uint32_t tiling_flags = 0, pitch = 0;
478
479    if (bpp < 8)
480	return FALSE;
481
482    driver_priv = exaGetPixmapDriverPrivate(pDst);
483    if (!driver_priv || !driver_priv->bo)
484	return FALSE;
485
486    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
487    if (ret)
488	ErrorF("radeon_bo_get_tiling failed\n");
489
490#if X_BYTE_ORDER == X_BIG_ENDIAN
491    switch (bpp) {
492    case 32:
493	swap = RADEON_HOST_DATA_SWAP_32BIT;
494	break;
495    case 16:
496	swap = RADEON_HOST_DATA_SWAP_16BIT;
497	break;
498    }
499#endif
500
501    /* If we know the BO won't be busy, don't bother with a scratch */
502    copy_dst = driver_priv->bo;
503    copy_pitch = pDst->devKind;
504    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
505	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
506	    flush = FALSE;
507	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
508		goto copy;
509	}
510    }
511
512    size = scratch_pitch * h;
513    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
514    if (scratch == NULL) {
515	goto copy;
516    }
517    radeon_cs_space_reset_bos(info->cs);
518    radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
519    radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
520    ret = radeon_cs_space_check(info->cs);
521    if (ret) {
522	goto copy;
523    }
524    copy_dst = scratch;
525    copy_pitch = scratch_pitch;
526    flush = FALSE;
527
528copy:
529    if (flush)
530	radeon_cs_flush_indirect(pScrn);
531
532    ret = radeon_bo_map(copy_dst, 0);
533    if (ret) {
534        r = FALSE;
535        goto out;
536    }
537    r = TRUE;
538    size = w * bpp / 8;
539    dst = copy_dst->ptr;
540    if (copy_dst == driver_priv->bo)
541	dst += y * copy_pitch + x * bpp / 8;
542    for (i = 0; i < h; i++) {
543        RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap);
544        src += src_pitch;
545    }
546    radeon_bo_unmap(copy_dst);
547
548    if (copy_dst == scratch) {
549	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
550	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
551	ACCEL_PREAMBLE();
552	RADEON_SWITCH_TO_2D();
553	RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
554			dst_pitch_offset, 0, 0, x, y, w, h,
555			RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
556    }
557
558out:
559    if (scratch)
560	radeon_bo_unref(scratch);
561    return r;
562}
563
564static Bool
565RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
566                           int h, char *dst, int dst_pitch)
567{
568    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
569    struct radeon_exa_pixmap_priv *driver_priv;
570    struct radeon_bo *scratch = NULL;
571    struct radeon_bo *copy_src;
572    unsigned size;
573    uint32_t datatype = 0;
574    uint32_t src_domain = 0;
575    uint32_t src_pitch_offset;
576    unsigned bpp = pSrc->drawable.bitsPerPixel;
577    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
578    uint32_t copy_pitch;
579    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
580    int ret;
581    Bool flush = FALSE;
582    Bool r;
583    uint32_t tiling_flags = 0, pitch = 0;
584
585    if (bpp < 8)
586	return FALSE;
587
588    driver_priv = exaGetPixmapDriverPrivate(pSrc);
589    if (!driver_priv || !driver_priv->bo)
590	return FALSE;
591
592    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
593    if (ret)
594	ErrorF("radeon_bo_get_tiling failed\n");
595
596#if X_BYTE_ORDER == X_BIG_ENDIAN
597    switch (bpp) {
598    case 32:
599	swap = RADEON_HOST_DATA_SWAP_32BIT;
600	break;
601    case 16:
602	swap = RADEON_HOST_DATA_SWAP_16BIT;
603	break;
604    }
605#endif
606
607    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
608    copy_src = driver_priv->bo;
609    copy_pitch = pSrc->devKind;
610    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
611	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
612	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
613	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
614		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
615		src_domain = 0;
616	    else /* A write may be scheduled */
617		flush = TRUE;
618	}
619
620	if (!src_domain)
621	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
622
623	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
624	    goto copy;
625    }
626    size = scratch_pitch * h;
627    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
628    if (scratch == NULL) {
629	goto copy;
630    }
631    radeon_cs_space_reset_bos(info->cs);
632    radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
633    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
634    ret = radeon_cs_space_check(info->cs);
635    if (ret) {
636	goto copy;
637    }
638    RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
639    RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
640    ACCEL_PREAMBLE();
641    RADEON_SWITCH_TO_2D();
642    RADEONBlitChunk(pScrn, driver_priv->bo, scratch, datatype, src_pitch_offset,
643                    scratch_pitch << 16, x, y, 0, 0, w, h,
644                    RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
645                    RADEON_GEM_DOMAIN_GTT);
646    copy_src = scratch;
647    copy_pitch = scratch_pitch;
648    flush = TRUE;
649
650copy:
651    if (flush)
652	FLUSH_RING();
653
654    ret = radeon_bo_map(copy_src, 0);
655    if (ret) {
656	ErrorF("failed to map pixmap: %d\n", ret);
657        r = FALSE;
658        goto out;
659    }
660    r = TRUE;
661    w *= bpp / 8;
662    if (copy_src == driver_priv->bo)
663	size = y * copy_pitch + x * bpp / 8;
664    else
665	size = 0;
666    while (h--) {
667        RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap);
668        size += copy_pitch;
669        dst += dst_pitch;
670    }
671    radeon_bo_unmap(copy_src);
672out:
673    if (scratch)
674	radeon_bo_unref(scratch);
675    return r;
676}
677#endif
678
679static Bool
680RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h,
681				    char *dst, int dst_pitch)
682{
683    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
684    uint8_t	  *src	     = info->FB + exaGetPixmapOffset(pSrc);
685    int		   bpp	     = pSrc->drawable.bitsPerPixel;
686    uint32_t datatype, src_pitch_offset, scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64), scratch_off = 0;
687    drmBufPtr scratch;
688
689    TRACE;
690
691    /*
692     * Try to accelerate download. Use an indirect buffer as scratch space,
693     * blitting the bits to one half while copying them out of the other one and
694     * then swapping the halves.
695     */
696    if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
697	RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
698	(scratch = RADEONCPGetBuffer(pScrn)))
699    {
700	int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
701	int hpass = min(h, scratch->total/2 / scratch_pitch);
702	uint32_t scratch_pitch_offset = scratch_pitch << 16
703				    | (info->gartLocation + info->dri->bufStart
704				       + scratch->idx * scratch->total) >> 10;
705	drm_radeon_indirect_t indirect;
706	ACCEL_PREAMBLE();
707
708	RADEON_SWITCH_TO_2D();
709
710	/* Kick the first blit as early as possible */
711	RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
712                        scratch_pitch_offset, x, y, 0, 0, w, hpass, 0, 0);
713	FLUSH_RING();
714
715#if X_BYTE_ORDER == X_BIG_ENDIAN
716	switch (bpp) {
717	case 16:
718	  swap = RADEON_HOST_DATA_SWAP_16BIT;
719	  break;
720	case 32:
721	  swap = RADEON_HOST_DATA_SWAP_32BIT;
722	  break;
723	}
724#endif
725
726	while (h) {
727	    int oldhpass = hpass, i = 0;
728
729	    src = (uint8_t*)scratch->address + scratch_off;
730
731	    y += oldhpass;
732	    h -= oldhpass;
733	    hpass = min(h, scratch->total/2 / scratch_pitch);
734
735	    /* Prepare next blit if anything's left */
736	    if (hpass) {
737		scratch_off = scratch->total/2 - scratch_off;
738		RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
739                                scratch_pitch_offset + (scratch_off >> 10),
740				x, y, 0, 0, w, hpass, 0, 0);
741	    }
742
743	    /*
744	     * Wait for previous blit to complete.
745	     *
746	     * XXX: Doing here essentially the same things this ioctl does in
747	     * the DRM results in corruption with 'small' transfers, apparently
748	     * because the data doesn't actually land in system RAM before the
749	     * memcpy. I suspect the ioctl helps mostly due to its latency; what
750	     * we'd really need is a way to reliably wait for the host interface
751	     * to be done with pushing the data to the host.
752	     */
753	    while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY)
754		   && (i++ < RADEON_TIMEOUT))
755		;
756
757	    /* Kick next blit */
758	    if (hpass)
759		FLUSH_RING();
760
761	    /* Copy out data from previous blit */
762	    if (wpass == scratch_pitch && wpass == dst_pitch) {
763		RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap);
764		dst += dst_pitch * oldhpass;
765	    } else while (oldhpass--) {
766		RADEONCopySwap((uint8_t*)dst, src, wpass, swap);
767		src += scratch_pitch;
768		dst += dst_pitch;
769	    }
770	}
771
772	indirect.idx = scratch->idx;
773	indirect.start = indirect.end = 0;
774	indirect.discard = 1;
775
776	drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
777			    &indirect, sizeof(drm_radeon_indirect_t));
778
779	info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker;
780
781	return TRUE;
782    }
783
784    return FALSE;
785}
786
787#endif	/* def ACCEL_CP */
788
789
790Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
791{
792    RINFO_FROM_SCREEN(pScreen);
793
794    if (info->accel_state->exa == NULL) {
795	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
796	return FALSE;
797    }
798
799    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
800    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
801
802    info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
803    info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid);
804    info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDone2D);
805
806    info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
807    info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy);
808    info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDone2D);
809
810    info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync);
811    info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync);
812#ifdef ACCEL_CP
813    if (!info->kms_enabled) {
814	info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP;
815	if (info->accelDFS)
816	    info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP;
817    }
818# if defined(XF86DRM_MODE)
819    else {
820	info->accel_state->exa->UploadToScreen = &RADEONUploadToScreenCS;
821        info->accel_state->exa->DownloadFromScreen = &RADEONDownloadFromScreenCS;
822    }
823# endif
824#endif
825
826#if X_BYTE_ORDER == X_BIG_ENDIAN
827    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE;
828    info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE;
829#endif
830
831    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
832#ifdef EXA_SUPPORTS_PREPARE_AUX
833    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
834#endif
835#ifdef EXA_SUPPORTS_OFFSCREEN_OVERLAPS
836    /* The 2D engine supports overlapping memory areas */
837    info->accel_state->exa->flags |= EXA_SUPPORTS_OFFSCREEN_OVERLAPS;
838#endif
839    info->accel_state->exa->pixmapOffsetAlign = RADEON_GPU_PAGE_SIZE;
840    info->accel_state->exa->pixmapPitchAlign = 64;
841
842#ifdef EXA_HANDLES_PIXMAPS
843    if (info->cs) {
844	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
845#ifdef EXA_MIXED_PIXMAPS
846	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
847#endif
848    }
849#endif
850
851#ifdef RENDER
852    if (info->RenderAccel) {
853	if (IS_R300_3D || IS_R500_3D) {
854	    if ((info->ChipFamily < CHIP_FAMILY_RS400)
855#ifdef XF86DRI
856		|| (info->directRenderingEnabled)
857#endif
858		) {
859		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
860			       "enabled for R300/R400/R500 type cards.\n");
861		info->accel_state->exa->CheckComposite = R300CheckComposite;
862		info->accel_state->exa->PrepareComposite =
863		    FUNC_NAME(R300PrepareComposite);
864		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
865		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
866	    } else
867		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
868	} else if (IS_R200_3D) {
869		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
870			       "enabled for R200 type cards.\n");
871		info->accel_state->exa->CheckComposite = R200CheckComposite;
872		info->accel_state->exa->PrepareComposite =
873		    FUNC_NAME(R200PrepareComposite);
874		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
875		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
876	} else {
877		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
878			       "enabled for R100 type cards.\n");
879		info->accel_state->exa->CheckComposite = R100CheckComposite;
880		info->accel_state->exa->PrepareComposite =
881		    FUNC_NAME(R100PrepareComposite);
882		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
883		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
884	}
885    }
886#endif
887
888#ifdef XF86DRM_MODE
889#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
890    if (info->cs) {
891        info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
892        info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
893        info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
894	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
895	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
896#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
897        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
898#endif
899    }
900#endif
901#endif
902
903
904#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
905    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
906
907    info->accel_state->exa->maxPitchBytes = 16320;
908    info->accel_state->exa->maxX = 8191;
909#else
910    info->accel_state->exa->maxX = 16320 / 4;
911#endif
912    info->accel_state->exa->maxY = 8191;
913
914    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
915	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
916	info->accel_state->vsync = TRUE;
917    } else
918	info->accel_state->vsync = FALSE;
919
920    RADEONEngineInit(pScrn);
921
922    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
923	free(info->accel_state->exa);
924	return FALSE;
925    }
926    exaMarkSync(pScreen);
927
928    return TRUE;
929}
930
931#undef FUNC_NAME
932