radeon_exa.c revision 209ff23f
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include "radeon.h"
37#include "radeon_reg.h"
38#ifdef XF86DRI
39#include "radeon_dri.h"
40#endif
41#include "radeon_macros.h"
42#include "radeon_probe.h"
43#include "radeon_version.h"
44#ifdef XF86DRI
45#include "radeon_sarea.h"
46#endif
47
48#include "xf86.h"
49
50
51/***********************************************************************/
52#define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn =  xf86Screens[pScr->myNum]; \
53    RADEONInfoPtr info   = RADEONPTR(pScrn)
54
55#define RADEON_TRACE_FALL 0
56#define RADEON_TRACE_DRAW 0
57
58#if RADEON_TRACE_FALL
59#define RADEON_FALLBACK(x)     		\
60do {					\
61	ErrorF("%s: ", __FUNCTION__);	\
62	ErrorF x;			\
63	return FALSE;			\
64} while (0)
65#else
66#define RADEON_FALLBACK(x) return FALSE
67#endif
68
69#if RADEON_TRACE_DRAW
70#define TRACE do { ErrorF("TRACE: %s\n", __FUNCTION__); } while(0)
71#else
72#define TRACE
73#endif
74
75static struct {
76    int rop;
77    int pattern;
78} RADEON_ROP[] = {
79    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
80    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
81    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
82    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
83    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
84    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
85    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
86    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
87    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
88    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
89    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
90    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
91    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
92    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
93    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
94    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
95};
96
97/* Compute log base 2 of val. */
98static __inline__ int
99RADEONLog2(int val)
100{
101	int bits;
102#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
103	__asm volatile("bsrl	%1, %0"
104		: "=r" (bits)
105		: "c" (val)
106	);
107	return bits;
108#else
109	for (bits = 0; val != 0; val >>= 1, ++bits)
110		;
111	return bits - 1;
112#endif
113}
114
115static __inline__ uint32_t F_TO_DW(float val)
116{
117    union {
118	float f;
119	uint32_t l;
120    } tmp;
121    tmp.f = val;
122    return tmp.l;
123}
124
125/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
126 * require src and dest datatypes to be equal.
127 */
128Bool RADEONGetDatatypeBpp(int bpp, uint32_t *type)
129{
130	switch (bpp) {
131	case 8:
132		*type = ATI_DATATYPE_CI8;
133		return TRUE;
134	case 16:
135		*type = ATI_DATATYPE_RGB565;
136		return TRUE;
137	case 24:
138		*type = ATI_DATATYPE_CI8;
139		return TRUE;
140	case 32:
141		*type = ATI_DATATYPE_ARGB8888;
142		return TRUE;
143	default:
144		RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp));
145		return FALSE;
146	}
147}
148
149static Bool RADEONPixmapIsColortiled(PixmapPtr pPix)
150{
151    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
152
153    /* This doesn't account for the back buffer, which we may want to wrap in
154     * a pixmap at some point for the purposes of DRI buffer moves.
155     */
156    if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0)
157	return TRUE;
158    else
159	return FALSE;
160}
161
162static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset,
163				 unsigned int offset, unsigned int pitch)
164{
165	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
166
167	if (pitch > 16320 || pitch % info->exa->pixmapPitchAlign != 0)
168		RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch));
169
170	if (offset % info->exa->pixmapOffsetAlign != 0)
171		RADEON_FALLBACK(("Bad offset 0x%08x\n", offset));
172
173	pitch = pitch >> 6;
174	*pitch_offset = (pitch << 22) | (offset >> 10);
175
176	/* If it's the front buffer, we've got to note that it's tiled? */
177	if (RADEONPixmapIsColortiled(pPix))
178		*pitch_offset |= RADEON_DST_TILE_MACRO;
179	return TRUE;
180}
181
182Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
183{
184	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
185	uint32_t pitch, offset;
186	int bpp;
187
188	bpp = pPix->drawable.bitsPerPixel;
189	if (bpp == 24)
190		bpp = 8;
191
192	offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
193	pitch = exaGetPixmapPitch(pPix);
194
195	return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
196}
197
198#if X_BYTE_ORDER == X_BIG_ENDIAN
199
200static unsigned long swapper_surfaces[3];
201
202static Bool RADEONPrepareAccess(PixmapPtr pPix, int index)
203{
204    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
205    unsigned char *RADEONMMIO = info->MMIO;
206    uint32_t offset = exaGetPixmapOffset(pPix);
207    int bpp, soff;
208    uint32_t size, flags;
209
210    /* Front buffer is always set with proper swappers */
211    if (offset == 0)
212        return TRUE;
213
214    /* If same bpp as front buffer, just do nothing as the main
215     * swappers will apply
216     */
217    bpp = pPix->drawable.bitsPerPixel;
218    if (bpp == pScrn->bitsPerPixel)
219        return TRUE;
220
221    /* We need to setup a separate swapper, let's request a
222     * surface. We need to align the size first
223     */
224    size = exaGetPixmapSize(pPix);
225    size = (size + RADEON_BUFFER_ALIGN) & ~(RADEON_BUFFER_ALIGN);
226
227    /* Set surface to tiling disabled with appropriate swapper */
228    switch (bpp) {
229    case 16:
230        flags = RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
231	break;
232    case 32:
233        flags = RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
234	break;
235    default:
236        flags = 0;
237    }
238#if defined(XF86DRI)
239    if (info->directRenderingEnabled && info->allowColorTiling) {
240	drmRadeonSurfaceAlloc drmsurfalloc;
241	int rc;
242
243        drmsurfalloc.address = offset;
244        drmsurfalloc.size = size;
245	drmsurfalloc.flags = flags | 1; /* bogus pitch to please DRM */
246
247        rc = drmCommandWrite(info->drmFD, DRM_RADEON_SURF_ALLOC,
248			     &drmsurfalloc, sizeof(drmsurfalloc));
249	if (rc < 0) {
250	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
251		       "drm: could not allocate surface for access"
252		       " swapper, err: %d!\n", rc);
253	    return FALSE;
254	}
255	swapper_surfaces[index] = offset;
256
257	return TRUE;
258    }
259#endif
260    soff = (index + 1) * 0x10;
261    OUTREG(RADEON_SURFACE0_INFO + soff, flags);
262    OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset);
263    OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1);
264    swapper_surfaces[index] = offset;
265    return TRUE;
266}
267
268static void RADEONFinishAccess(PixmapPtr pPix, int index)
269{
270    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
271    unsigned char *RADEONMMIO = info->MMIO;
272    uint32_t offset = exaGetPixmapOffset(pPix);
273    int soff;
274
275    /* Front buffer is always set with proper swappers */
276    if (offset == 0)
277        return;
278
279    if (swapper_surfaces[index] == 0)
280        return;
281#if defined(XF86DRI)
282    if (info->directRenderingEnabled && info->allowColorTiling) {
283	drmRadeonSurfaceFree drmsurffree;
284
285	drmsurffree.address = offset;
286	drmCommandWrite(info->drmFD, DRM_RADEON_SURF_FREE,
287			&drmsurffree, sizeof(drmsurffree));
288	swapper_surfaces[index] = 0;
289	return;
290    }
291#endif
292    soff = (index + 1) * 0x10;
293    OUTREG(RADEON_SURFACE0_INFO + soff, 0);
294    OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0);
295    OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0);
296    swapper_surfaces[index] = 0;
297}
298
299#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
300
301#define RADEON_SWITCH_TO_2D()						\
302do {									\
303	uint32_t wait_until = 0;			\
304	BEGIN_ACCEL(1);							\
305	switch (info->engineMode) {					\
306	case EXA_ENGINEMODE_UNKNOWN:					\
307	    wait_until |= RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN;	\
308	case EXA_ENGINEMODE_3D:						\
309	    wait_until |= RADEON_WAIT_3D_IDLECLEAN;			\
310	case EXA_ENGINEMODE_2D:						\
311	    break;							\
312	}								\
313	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, wait_until);			\
314	FINISH_ACCEL();							\
315        info->engineMode = EXA_ENGINEMODE_2D;                           \
316} while (0);
317
318#define RADEON_SWITCH_TO_3D()						\
319do {									\
320	uint32_t wait_until = 0;			\
321	BEGIN_ACCEL(1);							\
322	switch (info->engineMode) {					\
323	case EXA_ENGINEMODE_UNKNOWN:					\
324	    wait_until |= RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN;	\
325	case EXA_ENGINEMODE_2D:						\
326	    wait_until |= RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE;		\
327	case EXA_ENGINEMODE_3D:						\
328	    break;							\
329	}								\
330	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, wait_until);			\
331	FINISH_ACCEL();							\
332        info->engineMode = EXA_ENGINEMODE_3D;                           \
333} while (0);
334
335#define ENTER_DRAW(x) TRACE
336#define LEAVE_DRAW(x) TRACE
337/***********************************************************************/
338
339#define ACCEL_MMIO
340#define ACCEL_PREAMBLE()	unsigned char *RADEONMMIO = info->MMIO
341#define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
342#define OUT_ACCEL_REG(reg, val)	OUTREG(reg, val)
343#define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val))
344#define FINISH_ACCEL()
345
346#ifdef RENDER
347#include "radeon_exa_render.c"
348#endif
349#include "radeon_exa_funcs.c"
350
351#undef ACCEL_MMIO
352#undef ACCEL_PREAMBLE
353#undef BEGIN_ACCEL
354#undef OUT_ACCEL_REG
355#undef FINISH_ACCEL
356
357#ifdef XF86DRI
358
359#define ACCEL_CP
360#define ACCEL_PREAMBLE()						\
361    RING_LOCALS;							\
362    RADEONCP_REFRESH(pScrn, info)
363#define BEGIN_ACCEL(n)		BEGIN_RING(2*(n))
364#define OUT_ACCEL_REG(reg, val)	OUT_RING_REG(reg, val)
365#define FINISH_ACCEL()		ADVANCE_RING()
366
367#define OUT_RING_F(x) OUT_RING(F_TO_DW(x))
368
369#ifdef RENDER
370#include "radeon_exa_render.c"
371#endif
372#include "radeon_exa_funcs.c"
373
374#endif /* XF86DRI */
375
376/*
377 * Once screen->off_screen_base is set, this function
378 * allocates the remaining memory appropriately
379 */
380Bool RADEONSetupMemEXA (ScreenPtr pScreen)
381{
382    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
383    RADEONInfoPtr info = RADEONPTR(pScrn);
384    xf86CrtcConfigPtr   xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
385    int cpp = info->CurrentLayout.pixel_bytes;
386    int screen_size;
387    int byteStride = pScrn->displayWidth * cpp;
388
389    if (info->exa != NULL) {
390	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map already initialized\n");
391	return FALSE;
392    }
393    info->exa = exaDriverAlloc();
394    if (info->exa == NULL)
395	return FALSE;
396
397    /* Need to adjust screen size for 16 line tiles, and then make it align to.
398     * the buffer alignment requirement.
399     */
400    if (info->allowColorTiling)
401	screen_size = RADEON_ALIGN(pScrn->virtualY, 16) * byteStride;
402    else
403	screen_size = pScrn->virtualY * byteStride;
404
405    info->exa->memoryBase = info->FB;
406    info->exa->memorySize = info->FbMapSize - info->FbSecureSize;
407    info->exa->offScreenBase = screen_size;
408
409    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating from a screen of %ld kb\n",
410	       info->exa->memorySize / 1024);
411
412
413    /* Reserve static area for hardware cursor */
414    if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) {
415	int cursor_size = 64 * 4 * 64;
416	int c;
417
418	for (c = 0; c < xf86_config->num_crtc; c++) {
419	    xf86CrtcPtr crtc = xf86_config->crtc[c];
420	    RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
421
422	    radeon_crtc->cursor_offset = info->exa->offScreenBase;
423	    info->exa->offScreenBase += cursor_size;
424
425	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
426		       "Will use %d kb for hardware cursor %d at offset 0x%08x\n",
427		       (cursor_size * xf86_config->num_crtc) / 1024,
428		       c,
429		       (unsigned int)radeon_crtc->cursor_offset);
430	}
431
432
433    }
434
435#if defined(XF86DRI)
436    if (info->directRenderingEnabled) {
437	int depthCpp = (info->depthBits - 8) / 4, l, next, depth_size;
438
439	info->frontOffset = 0;
440	info->frontPitch = pScrn->displayWidth;
441
442        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
443	       "Will use %d kb for front buffer at offset 0x%08x\n",
444	       screen_size / 1024, info->frontOffset);
445	RADEONDRIAllocatePCIGARTTable(pScreen);
446
447	if (info->cardType==CARD_PCIE)
448	  xf86DrvMsg(pScrn->scrnIndex, X_INFO,
449		     "Will use %d kb for PCI GART at offset 0x%08x\n",
450		     info->pciGartSize / 1024,
451		     (int)info->pciGartOffset);
452
453	/* Reserve a static area for the back buffer the same size as the
454	 * visible screen.  XXX: This would be better initialized in ati_dri.c
455	 * when GLX is set up, but the offscreen memory manager's allocations
456	 * don't last through VT switches, while the kernel's understanding of
457	 * offscreen locations does.
458	 */
459	info->backPitch = pScrn->displayWidth;
460	next = RADEON_ALIGN(info->exa->offScreenBase, RADEON_BUFFER_ALIGN);
461	if (!info->noBackBuffer &&
462	    next + screen_size <= info->exa->memorySize)
463	{
464	    info->backOffset = next;
465	    info->exa->offScreenBase = next + screen_size;
466	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
467		       "Will use %d kb for back buffer at offset 0x%08x\n",
468		       screen_size / 1024, info->backOffset);
469	}
470
471	/* Reserve the static depth buffer, and adjust pitch and height to
472	 * handle tiling.
473	 */
474	info->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
475	depth_size = RADEON_ALIGN(pScrn->virtualY, 16) * info->depthPitch * depthCpp;
476	next = RADEON_ALIGN(info->exa->offScreenBase, RADEON_BUFFER_ALIGN);
477	if (next + depth_size <= info->exa->memorySize)
478	{
479	    info->depthOffset = next;
480	    info->exa->offScreenBase = next + depth_size;
481	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
482		       "Will use %d kb for depth buffer at offset 0x%08x\n",
483		       depth_size / 1024, info->depthOffset);
484	}
485
486	info->textureSize *= (info->exa->memorySize -
487			      info->exa->offScreenBase) / 100;
488
489	l = RADEONLog2(info->textureSize / RADEON_NR_TEX_REGIONS);
490	if (l < RADEON_LOG_TEX_GRANULARITY)
491	    l = RADEON_LOG_TEX_GRANULARITY;
492	info->textureSize = (info->textureSize >> l) << l;
493	if (info->textureSize >= 512 * 1024) {
494	    info->textureOffset = info->exa->offScreenBase;
495	    info->exa->offScreenBase += info->textureSize;
496	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
497		       "Will use %d kb for textures at offset 0x%08x\n",
498		       info->textureSize / 1024, info->textureOffset);
499	} else {
500	    /* Minimum texture size is for 2 256x256x32bpp textures */
501	    info->textureSize = 0;
502	}
503    } else
504#endif /* XF86DRI */
505    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
506		       "Will use %d kb for front buffer at offset 0x%08x\n",
507		       screen_size / 1024, 0);
508
509    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
510	       "Will use %ld kb for X Server offscreen at offset 0x%08lx\n",
511	       (info->exa->memorySize - info->exa->offScreenBase) /
512	       1024, info->exa->offScreenBase);
513
514    return TRUE;
515}
516
517#ifdef XF86DRI
518
519#ifndef ExaOffscreenMarkUsed
520extern void ExaOffscreenMarkUsed(PixmapPtr);
521#endif
522
523unsigned long long
524RADEONTexOffsetStart(PixmapPtr pPix)
525{
526    exaMoveInPixmap(pPix);
527    ExaOffscreenMarkUsed(pPix);
528
529    return RADEONPTR(xf86Screens[pPix->drawable.pScreen->myNum])->fbLocation +
530	exaGetPixmapOffset(pPix);
531}
532#endif
533