radeon_exa_funcs.c revision f3a0071a
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * Copyright 2006 Tungsten Graphics, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 *    Eric Anholt <anholt@FreeBSD.org>
28 *    Zack Rusin <zrusin@trolltech.com>
29 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
30 *    Michel Dänzer <michel@daenzer.net>
31 *
32 */
33
34#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
35#error Cannot define both MMIO and CP acceleration!
36#endif
37
38#if !defined(UNIXCPP) || defined(ANSICPP)
39#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
40#else
41#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
42#endif
43
44#ifdef ACCEL_MMIO
45#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
46#else
47#ifdef ACCEL_CP
48#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
49#else
50#error No accel type defined!
51#endif
52#endif
53
54#include <errno.h>
55#include <string.h>
56
57#include "radeon.h"
58
59#include "exa.h"
60
61static int
62FUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen)
63{
64    RINFO_FROM_SCREEN(pScreen);
65
66    TRACE;
67
68    return ++info->accel_state->exaSyncMarker;
69}
70
71static void
72FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
73{
74    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75    RADEONInfoPtr info = RADEONPTR(pScrn);
76
77    if (info->cs)
78	    return;
79
80    TRACE;
81
82    if (info->accel_state->exaMarkerSynced != marker) {
83	FUNC_NAME(RADEONWaitForIdle)(pScrn);
84	info->accel_state->exaMarkerSynced = marker;
85    }
86
87    RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
88}
89
90static void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op)
91{
92    RADEONInfoPtr info = RADEONPTR(pScrn);
93    int has_src;
94    ACCEL_PREAMBLE();
95
96    /* don't emit if no operation in progress */
97    if (info->state_2d.op == 0 && op == 0)
98	return;
99
100    has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo);
101
102    if (has_src) {
103      BEGIN_ACCEL_RELOC(10, 2);
104    } else {
105      BEGIN_ACCEL_RELOC(9, 1);
106    }
107    OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
108    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
109    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
110    OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
111    OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
112    OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
113    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask);
114    OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl);
115
116    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
117    if (info->cs)
118	OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
119
120    if (has_src) {
121	    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
122	    if (info->cs)
123		OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
124
125    }
126    FINISH_ACCEL();
127
128    if (op)
129	info->state_2d.op = op;
130    if (info->cs)
131	info->reemit_current2d = FUNC_NAME(Emit2DState);
132}
133
134static void
135FUNC_NAME(RADEONFlush2D)(PixmapPtr pPix)
136{
137    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
138    ACCEL_PREAMBLE();
139
140    TRACE;
141
142    BEGIN_ACCEL(2);
143    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
144    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
145                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
146    FINISH_ACCEL();
147}
148
149static void
150FUNC_NAME(RADEONDone2D)(PixmapPtr pPix)
151{
152    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
153    info->state_2d.op = 0;
154
155    FUNC_NAME(RADEONFlush2D)(pPix);
156}
157
158static Bool
159FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
160{
161    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
162    uint32_t datatype, dst_pitch_offset;
163
164    TRACE;
165
166    if (pPix->drawable.bitsPerPixel == 24)
167	RADEON_FALLBACK(("24bpp unsupported\n"));
168    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
169	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
170    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
171	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
172
173    RADEON_SWITCH_TO_2D();
174
175#ifdef XF86DRM_MODE
176    if (info->cs) {
177	struct radeon_exa_pixmap_priv *driver_priv;
178	int ret;
179
180	radeon_cs_space_reset_bos(info->cs);
181
182	driver_priv = exaGetPixmapDriverPrivate(pPix);
183	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
184
185	ret = radeon_cs_space_check(info->cs);
186	if (ret)
187	    RADEON_FALLBACK(("Not enough RAM to hw accel solid operation\n"));
188
189	driver_priv = exaGetPixmapDriverPrivate(pPix);
190	if (driver_priv)
191	    info->state_2d.dst_bo = driver_priv->bo;
192    }
193#endif
194
195    info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX |
196					       RADEON_DEFAULT_SC_BOTTOM_MAX);
197    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
198    info->state_2d.dp_src_frgd_clr = 0xffffffff;
199    info->state_2d.dp_src_bkgd_clr = 0x00000000;
200    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
201					  RADEON_GMC_BRUSH_SOLID_COLOR |
202					  (datatype << 8) |
203					  RADEON_GMC_SRC_DATATYPE_COLOR |
204					  RADEON_ROP[alu].pattern |
205					  RADEON_GMC_CLR_CMP_CNTL_DIS);
206    info->state_2d.dp_brush_frgd_clr = fg;
207    info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM);
208    info->state_2d.dp_write_mask = pm;
209    info->state_2d.dst_pitch_offset = dst_pitch_offset;
210    info->state_2d.src_pitch_offset = 0;
211    info->state_2d.src_bo = NULL;
212
213    info->accel_state->dst_pix = pPix;
214
215    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID);
216
217    return TRUE;
218}
219
220
221static void
222FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
223{
224    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
225    ACCEL_PREAMBLE();
226
227    TRACE;
228
229#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
230    if (info->cs && CS_FULL(info->cs)) {
231	FUNC_NAME(RADEONFlush2D)(info->accel_state->dst_pix);
232	radeon_cs_flush_indirect(pScrn);
233    }
234#endif
235
236    if (info->accel_state->vsync)
237	FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix,
238				      radeon_pick_best_crtc(pScrn, x1, x2, y1, y2),
239				      y1, y2);
240
241    BEGIN_ACCEL(2);
242    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
243    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
244    FINISH_ACCEL();
245}
246
247void
248FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
249			       uint32_t dst_pitch_offset, uint32_t datatype, int rop,
250			       Pixel planemask)
251{
252    RADEONInfoPtr info = RADEONPTR(pScrn);
253
254    /* setup 2D state */
255    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
256					  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
257					  RADEON_GMC_BRUSH_NONE |
258					  (datatype << 8) |
259					  RADEON_GMC_SRC_DATATYPE_COLOR |
260					  RADEON_ROP[rop].rop |
261					  RADEON_DP_SRC_SOURCE_MEMORY |
262					  RADEON_GMC_CLR_CMP_CNTL_DIS);
263    info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
264			       (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0));
265    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
266    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
267    info->state_2d.dp_src_frgd_clr = 0xffffffff;
268    info->state_2d.dp_src_bkgd_clr = 0x00000000;
269    info->state_2d.dp_write_mask = planemask;
270    info->state_2d.dst_pitch_offset = dst_pitch_offset;
271    info->state_2d.src_pitch_offset = src_pitch_offset;
272    info->state_2d.default_sc_bottom_right =  (RADEON_DEFAULT_SC_RIGHT_MAX
273						| RADEON_DEFAULT_SC_BOTTOM_MAX);
274
275    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY);
276}
277
278static Bool
279FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
280			     int xdir, int ydir,
281			     int rop,
282			     Pixel planemask)
283{
284    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
285    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
286    TRACE;
287
288    if (pDst->drawable.bitsPerPixel == 24)
289	RADEON_FALLBACK(("24bpp unsupported"));
290    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
291	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
292    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
293	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
294    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
295	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
296
297    RADEON_SWITCH_TO_2D();
298
299#ifdef XF86DRM_MODE
300    if (info->cs) {
301	struct radeon_exa_pixmap_priv *driver_priv;
302	int ret;
303
304	radeon_cs_space_reset_bos(info->cs);
305
306	driver_priv = exaGetPixmapDriverPrivate(pSrc);
307	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
308	info->state_2d.src_bo = driver_priv->bo;
309
310	driver_priv = exaGetPixmapDriverPrivate(pDst);
311	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
312	info->state_2d.dst_bo = driver_priv->bo;
313
314	ret = radeon_cs_space_check(info->cs);
315	if (ret)
316	    RADEON_FALLBACK(("Not enough RAM to hw accel copy operation\n"));
317    }
318#endif
319
320    info->accel_state->xdir = xdir;
321    info->accel_state->ydir = ydir;
322    info->accel_state->dst_pix = pDst;
323
324    FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
325				   datatype, rop, planemask);
326
327    return TRUE;
328}
329
330void
331FUNC_NAME(RADEONCopy)(PixmapPtr pDst,
332		      int srcX, int srcY,
333		      int dstX, int dstY,
334		      int w, int h)
335{
336    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
337    ACCEL_PREAMBLE();
338
339    TRACE;
340
341#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
342    if (info->cs && CS_FULL(info->cs)) {
343	FUNC_NAME(RADEONFlush2D)(info->accel_state->dst_pix);
344	radeon_cs_flush_indirect(pScrn);
345    }
346#endif
347
348    if (info->accel_state->xdir < 0) {
349	srcX += w - 1;
350	dstX += w - 1;
351    }
352    if (info->accel_state->ydir < 0) {
353	srcY += h - 1;
354	dstY += h - 1;
355    }
356
357    if (info->accel_state->vsync)
358	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
359				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
360				      dstY, dstY + h);
361
362    BEGIN_ACCEL(3);
363
364    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
365    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
366    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
367
368    FINISH_ACCEL();
369}
370
371#ifdef ACCEL_CP
372
373static Bool
374RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h,
375		       char *src, int src_pitch)
376{
377    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
378    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
379    unsigned int   hpass;
380    uint32_t	   buf_pitch, dst_pitch_off;
381
382    TRACE;
383
384    if (bpp < 8)
385	return FALSE;
386
387    if (info->directRenderingEnabled &&
388	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) {
389	uint8_t *buf;
390	int cpp = bpp / 8;
391	ACCEL_PREAMBLE();
392
393	RADEON_SWITCH_TO_2D();
394
395	if (info->accel_state->vsync)
396	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
397					  radeon_pick_best_crtc(pScrn, x, x + w, y, y + h),
398					  y, y + h);
399
400	while ((buf = RADEONHostDataBlit(pScrn,
401					 cpp, w, dst_pitch_off, &buf_pitch,
402					 x, &y, (unsigned int*)&h, &hpass)) != 0) {
403	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src,
404				       hpass, buf_pitch, src_pitch);
405	    src += hpass * src_pitch;
406	}
407
408	exaMarkSync(pDst->drawable.pScreen);
409	return TRUE;
410    }
411
412    return FALSE;
413}
414
415/* Emit blit with arbitrary source and destination offsets and pitches */
416static void
417RADEONBlitChunk(ScrnInfoPtr pScrn, struct radeon_bo *src_bo,
418                struct radeon_bo *dst_bo, uint32_t datatype,
419                uint32_t src_pitch_offset, uint32_t dst_pitch_offset,
420                int srcX, int srcY, int dstX, int dstY, int w, int h,
421                uint32_t src_domain, uint32_t dst_domain)
422{
423    RADEONInfoPtr info = RADEONPTR(pScrn);
424    ACCEL_PREAMBLE();
425
426    if (src_bo && dst_bo) {
427        BEGIN_ACCEL_RELOC(6, 2);
428    } else if (src_bo && dst_bo == NULL) {
429        BEGIN_ACCEL_RELOC(6, 1);
430    } else {
431        BEGIN_ACCEL(6);
432    }
433    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
434		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
435		  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
436		  RADEON_GMC_BRUSH_NONE |
437		  (datatype << 8) |
438		  RADEON_GMC_SRC_DATATYPE_COLOR |
439		  RADEON_ROP3_S |
440		  RADEON_DP_SRC_SOURCE_MEMORY |
441		  RADEON_GMC_CLR_CMP_CNTL_DIS |
442		  RADEON_GMC_WR_MSK_DIS);
443    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
444    if (src_bo) {
445	OUT_RELOC(src_bo, src_domain, 0);
446    }
447    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
448    if (dst_bo) {
449	OUT_RELOC(dst_bo, 0, dst_domain);
450    }
451    OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
452    OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
453    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
454    FINISH_ACCEL();
455    BEGIN_ACCEL(2);
456    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
457    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
458                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
459    FINISH_ACCEL();
460}
461
462#if defined(XF86DRM_MODE)
463static Bool
464RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
465		       char *src, int src_pitch)
466{
467    ScreenPtr pScreen = pDst->drawable.pScreen;
468    RINFO_FROM_SCREEN(pScreen);
469    struct radeon_exa_pixmap_priv *driver_priv;
470    struct radeon_bo *scratch = NULL;
471    struct radeon_bo *copy_dst;
472    unsigned char *dst;
473    unsigned size;
474    uint32_t datatype = 0;
475    uint32_t dst_domain;
476    uint32_t dst_pitch_offset;
477    unsigned bpp = pDst->drawable.bitsPerPixel;
478    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
479    uint32_t copy_pitch;
480    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
481    int ret;
482    Bool flush = TRUE;
483    Bool r;
484    int i;
485
486    if (bpp < 8)
487	return FALSE;
488
489    driver_priv = exaGetPixmapDriverPrivate(pDst);
490    if (!driver_priv || !driver_priv->bo)
491	return FALSE;
492
493#if X_BYTE_ORDER == X_BIG_ENDIAN
494    switch (bpp) {
495    case 32:
496	swap = RADEON_HOST_DATA_SWAP_32BIT;
497	break;
498    case 16:
499	swap = RADEON_HOST_DATA_SWAP_16BIT;
500	break;
501    }
502#endif
503
504    /* If we know the BO won't be busy, don't bother with a scratch */
505    copy_dst = driver_priv->bo;
506    copy_pitch = pDst->devKind;
507    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
508	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
509	    flush = FALSE;
510	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
511		goto copy;
512	}
513    }
514
515    size = scratch_pitch * h;
516    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
517    if (scratch == NULL) {
518	goto copy;
519    }
520    radeon_cs_space_reset_bos(info->cs);
521    radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
522    radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
523    ret = radeon_cs_space_check(info->cs);
524    if (ret) {
525	goto copy;
526    }
527    copy_dst = scratch;
528    copy_pitch = scratch_pitch;
529    flush = FALSE;
530
531copy:
532    if (flush)
533	radeon_cs_flush_indirect(pScrn);
534
535    ret = radeon_bo_map(copy_dst, 0);
536    if (ret) {
537        r = FALSE;
538        goto out;
539    }
540    r = TRUE;
541    size = w * bpp / 8;
542    dst = copy_dst->ptr;
543    if (copy_dst == driver_priv->bo)
544	dst += y * copy_pitch + x * bpp / 8;
545    for (i = 0; i < h; i++) {
546        RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap);
547        src += src_pitch;
548    }
549    radeon_bo_unmap(copy_dst);
550
551    if (copy_dst == scratch) {
552	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
553	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
554	ACCEL_PREAMBLE();
555	RADEON_SWITCH_TO_2D();
556	RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
557			dst_pitch_offset, 0, 0, x, y, w, h,
558			RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
559    }
560
561out:
562    if (scratch)
563	radeon_bo_unref(scratch);
564    return r;
565}
566
567static Bool
568RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
569                           int h, char *dst, int dst_pitch)
570{
571    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
572    struct radeon_exa_pixmap_priv *driver_priv;
573    struct radeon_bo *scratch = NULL;
574    struct radeon_bo *copy_src;
575    unsigned size;
576    uint32_t datatype = 0;
577    uint32_t src_domain = 0;
578    uint32_t src_pitch_offset;
579    unsigned bpp = pSrc->drawable.bitsPerPixel;
580    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
581    uint32_t copy_pitch;
582    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
583    int ret;
584    Bool flush = FALSE;
585    Bool r;
586
587    if (bpp < 8)
588	return FALSE;
589
590    driver_priv = exaGetPixmapDriverPrivate(pSrc);
591    if (!driver_priv || !driver_priv->bo)
592	return FALSE;
593
594#if X_BYTE_ORDER == X_BIG_ENDIAN
595    switch (bpp) {
596    case 32:
597	swap = RADEON_HOST_DATA_SWAP_32BIT;
598	break;
599    case 16:
600	swap = RADEON_HOST_DATA_SWAP_16BIT;
601	break;
602    }
603#endif
604
605    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
606    copy_src = driver_priv->bo;
607    copy_pitch = pSrc->devKind;
608    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
609	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
610	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
611	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
612		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
613		src_domain = 0;
614	    else /* A write may be scheduled */
615		flush = TRUE;
616	}
617
618	if (!src_domain)
619	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
620
621	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
622	    goto copy;
623    }
624    size = scratch_pitch * h;
625    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
626    if (scratch == NULL) {
627	goto copy;
628    }
629    radeon_cs_space_reset_bos(info->cs);
630    radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
631    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
632    ret = radeon_cs_space_check(info->cs);
633    if (ret) {
634	goto copy;
635    }
636    RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
637    RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
638    ACCEL_PREAMBLE();
639    RADEON_SWITCH_TO_2D();
640    RADEONBlitChunk(pScrn, driver_priv->bo, scratch, datatype, src_pitch_offset,
641                    scratch_pitch << 16, x, y, 0, 0, w, h,
642                    RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
643                    RADEON_GEM_DOMAIN_GTT);
644    copy_src = scratch;
645    copy_pitch = scratch_pitch;
646    flush = TRUE;
647
648copy:
649    if (flush)
650	FLUSH_RING();
651
652    ret = radeon_bo_map(copy_src, 0);
653    if (ret) {
654	ErrorF("failed to map pixmap: %d\n", ret);
655        r = FALSE;
656        goto out;
657    }
658    r = TRUE;
659    w *= bpp / 8;
660    if (copy_src == driver_priv->bo)
661	size = y * copy_pitch + x * bpp / 8;
662    else
663	size = 0;
664    while (h--) {
665        RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap);
666        size += copy_pitch;
667        dst += dst_pitch;
668    }
669    radeon_bo_unmap(copy_src);
670out:
671    if (scratch)
672	radeon_bo_unref(scratch);
673    return r;
674}
675#endif
676
677static Bool
678RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h,
679				    char *dst, int dst_pitch)
680{
681    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
682    uint8_t	  *src	     = info->FB + exaGetPixmapOffset(pSrc);
683    int		   bpp	     = pSrc->drawable.bitsPerPixel;
684    uint32_t datatype, src_pitch_offset, scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64), scratch_off = 0;
685    drmBufPtr scratch;
686
687    TRACE;
688
689    /*
690     * Try to accelerate download. Use an indirect buffer as scratch space,
691     * blitting the bits to one half while copying them out of the other one and
692     * then swapping the halves.
693     */
694    if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
695	RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
696	(scratch = RADEONCPGetBuffer(pScrn)))
697    {
698	int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
699	int hpass = min(h, scratch->total/2 / scratch_pitch);
700	uint32_t scratch_pitch_offset = scratch_pitch << 16
701				    | (info->gartLocation + info->dri->bufStart
702				       + scratch->idx * scratch->total) >> 10;
703	drm_radeon_indirect_t indirect;
704	ACCEL_PREAMBLE();
705
706	RADEON_SWITCH_TO_2D();
707
708	/* Kick the first blit as early as possible */
709	RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
710                        scratch_pitch_offset, x, y, 0, 0, w, hpass, 0, 0);
711	FLUSH_RING();
712
713#if X_BYTE_ORDER == X_BIG_ENDIAN
714	switch (bpp) {
715	case 16:
716	  swap = RADEON_HOST_DATA_SWAP_16BIT;
717	  break;
718	case 32:
719	  swap = RADEON_HOST_DATA_SWAP_32BIT;
720	  break;
721	}
722#endif
723
724	while (h) {
725	    int oldhpass = hpass, i = 0;
726
727	    src = (uint8_t*)scratch->address + scratch_off;
728
729	    y += oldhpass;
730	    h -= oldhpass;
731	    hpass = min(h, scratch->total/2 / scratch_pitch);
732
733	    /* Prepare next blit if anything's left */
734	    if (hpass) {
735		scratch_off = scratch->total/2 - scratch_off;
736		RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset,
737                                scratch_pitch_offset + (scratch_off >> 10),
738				x, y, 0, 0, w, hpass, 0, 0);
739	    }
740
741	    /*
742	     * Wait for previous blit to complete.
743	     *
744	     * XXX: Doing here essentially the same things this ioctl does in
745	     * the DRM results in corruption with 'small' transfers, apparently
746	     * because the data doesn't actually land in system RAM before the
747	     * memcpy. I suspect the ioctl helps mostly due to its latency; what
748	     * we'd really need is a way to reliably wait for the host interface
749	     * to be done with pushing the data to the host.
750	     */
751	    while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY)
752		   && (i++ < RADEON_TIMEOUT))
753		;
754
755	    /* Kick next blit */
756	    if (hpass)
757		FLUSH_RING();
758
759	    /* Copy out data from previous blit */
760	    if (wpass == scratch_pitch && wpass == dst_pitch) {
761		RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap);
762		dst += dst_pitch * oldhpass;
763	    } else while (oldhpass--) {
764		RADEONCopySwap((uint8_t*)dst, src, wpass, swap);
765		src += scratch_pitch;
766		dst += dst_pitch;
767	    }
768	}
769
770	indirect.idx = scratch->idx;
771	indirect.start = indirect.end = 0;
772	indirect.discard = 1;
773
774	drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
775			    &indirect, sizeof(drm_radeon_indirect_t));
776
777	info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker;
778
779	return TRUE;
780    }
781
782    return FALSE;
783}
784
785#endif	/* def ACCEL_CP */
786
787
788Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
789{
790    RINFO_FROM_SCREEN(pScreen);
791
792    if (info->accel_state->exa == NULL) {
793	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
794	return FALSE;
795    }
796
797    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
798    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
799
800    info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
801    info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid);
802    info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDone2D);
803
804    info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
805    info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy);
806    info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDone2D);
807
808    info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync);
809    info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync);
810#ifdef ACCEL_CP
811    if (!info->kms_enabled) {
812	info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP;
813	if (info->accelDFS)
814	    info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP;
815    }
816# if defined(XF86DRM_MODE)
817    else {
818	info->accel_state->exa->UploadToScreen = &RADEONUploadToScreenCS;
819        info->accel_state->exa->DownloadFromScreen = &RADEONDownloadFromScreenCS;
820    }
821# endif
822#endif
823
824#if X_BYTE_ORDER == X_BIG_ENDIAN
825    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE;
826    info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE;
827#endif
828
829    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
830#ifdef EXA_SUPPORTS_PREPARE_AUX
831    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
832#endif
833#ifdef EXA_SUPPORTS_OFFSCREEN_OVERLAPS
834    /* The 2D engine supports overlapping memory areas */
835    info->accel_state->exa->flags |= EXA_SUPPORTS_OFFSCREEN_OVERLAPS;
836#endif
837    info->accel_state->exa->pixmapOffsetAlign = RADEON_GPU_PAGE_SIZE;
838    info->accel_state->exa->pixmapPitchAlign = 64;
839
840#ifdef EXA_HANDLES_PIXMAPS
841    if (info->cs) {
842	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
843#ifdef EXA_MIXED_PIXMAPS
844	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
845#endif
846    }
847#endif
848
849#ifdef RENDER
850    if (info->RenderAccel) {
851	if (IS_R300_3D || IS_R500_3D) {
852	    if ((info->ChipFamily < CHIP_FAMILY_RS400)
853#ifdef XF86DRI
854		|| (info->directRenderingEnabled)
855#endif
856		) {
857		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
858			       "enabled for R300/R400/R500 type cards.\n");
859		info->accel_state->exa->CheckComposite = R300CheckComposite;
860		info->accel_state->exa->PrepareComposite =
861		    FUNC_NAME(R300PrepareComposite);
862		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
863		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
864	    } else
865		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
866	} else if (IS_R200_3D) {
867		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
868			       "enabled for R200 type cards.\n");
869		info->accel_state->exa->CheckComposite = R200CheckComposite;
870		info->accel_state->exa->PrepareComposite =
871		    FUNC_NAME(R200PrepareComposite);
872		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
873		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
874	} else {
875		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
876			       "enabled for R100 type cards.\n");
877		info->accel_state->exa->CheckComposite = R100CheckComposite;
878		info->accel_state->exa->PrepareComposite =
879		    FUNC_NAME(R100PrepareComposite);
880		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
881		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
882	}
883    }
884#endif
885
886#ifdef XF86DRM_MODE
887#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
888    if (info->cs) {
889        info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
890        info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
891        info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
892	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
893	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
894#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
895        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
896#endif
897    }
898#endif
899#endif
900
901
902#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
903    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
904
905    info->accel_state->exa->maxPitchBytes = 16320;
906    info->accel_state->exa->maxX = 8191;
907#else
908    info->accel_state->exa->maxX = 16320 / 4;
909#endif
910    info->accel_state->exa->maxY = 8191;
911
912    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
913	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
914	info->accel_state->vsync = TRUE;
915    } else
916	info->accel_state->vsync = FALSE;
917
918    RADEONEngineInit(pScrn);
919
920    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
921	free(info->accel_state->exa);
922	return FALSE;
923    }
924    exaMarkSync(pScreen);
925
926    return TRUE;
927}
928
929#undef FUNC_NAME
930