radeon_exa.c revision b7e1c893
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include "radeon.h"
37#include "radeon_reg.h"
38#include "r600_reg.h"
39#ifdef XF86DRI
40#include "radeon_drm.h"
41#endif
42#include "radeon_macros.h"
43#include "radeon_probe.h"
44#include "radeon_version.h"
45
46#include "xf86.h"
47
48
49/***********************************************************************/
50#define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn =  xf86Screens[pScr->myNum]; \
51    RADEONInfoPtr info   = RADEONPTR(pScrn)
52
53#define RADEON_TRACE_FALL 0
54#define RADEON_TRACE_DRAW 0
55
56#if RADEON_TRACE_FALL
57#define RADEON_FALLBACK(x)     		\
58do {					\
59	ErrorF("%s: ", __FUNCTION__);	\
60	ErrorF x;			\
61	return FALSE;			\
62} while (0)
63#else
64#define RADEON_FALLBACK(x) return FALSE
65#endif
66
67#if RADEON_TRACE_DRAW
68#define TRACE do { ErrorF("TRACE: %s\n", __FUNCTION__); } while(0)
69#else
70#define TRACE
71#endif
72
73static struct {
74    int rop;
75    int pattern;
76} RADEON_ROP[] = {
77    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
78    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
79    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
80    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
81    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
82    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
83    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
84    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
85    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
86    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
87    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
88    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
89    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
90    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
91    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
92    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
93};
94
95/* Compute log base 2 of val. */
96static __inline__ int
97RADEONLog2(int val)
98{
99	int bits;
100#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
101	__asm volatile("bsrl	%1, %0"
102		: "=r" (bits)
103		: "c" (val)
104	);
105	return bits;
106#else
107	for (bits = 0; val != 0; val >>= 1, ++bits)
108		;
109	return bits - 1;
110#endif
111}
112
113static __inline__ uint32_t F_TO_DW(float val)
114{
115    union {
116	float f;
117	uint32_t l;
118    } tmp;
119    tmp.f = val;
120    return tmp.l;
121}
122
123/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
124 * require src and dest datatypes to be equal.
125 */
126Bool RADEONGetDatatypeBpp(int bpp, uint32_t *type)
127{
128	switch (bpp) {
129	case 8:
130		*type = ATI_DATATYPE_CI8;
131		return TRUE;
132	case 16:
133		*type = ATI_DATATYPE_RGB565;
134		return TRUE;
135	case 24:
136		*type = ATI_DATATYPE_CI8;
137		return TRUE;
138	case 32:
139		*type = ATI_DATATYPE_ARGB8888;
140		return TRUE;
141	default:
142		RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp));
143		return FALSE;
144	}
145}
146
147static Bool RADEONPixmapIsColortiled(PixmapPtr pPix)
148{
149    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
150
151    /* This doesn't account for the back buffer, which we may want to wrap in
152     * a pixmap at some point for the purposes of DRI buffer moves.
153     */
154    if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0)
155	return TRUE;
156    else
157	return FALSE;
158}
159
160static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset,
161				 unsigned int offset, unsigned int pitch)
162{
163	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
164
165	if (pitch > 16320 || pitch % info->accel_state->exa->pixmapPitchAlign != 0)
166		RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch));
167
168	if (offset % info->accel_state->exa->pixmapOffsetAlign != 0)
169		RADEON_FALLBACK(("Bad offset 0x%08x\n", offset));
170
171	pitch = pitch >> 6;
172	*pitch_offset = (pitch << 22) | (offset >> 10);
173
174	/* If it's the front buffer, we've got to note that it's tiled? */
175	if (RADEONPixmapIsColortiled(pPix))
176		*pitch_offset |= RADEON_DST_TILE_MACRO;
177	return TRUE;
178}
179
180Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
181{
182	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
183	uint32_t pitch, offset;
184	int bpp;
185
186	bpp = pPix->drawable.bitsPerPixel;
187	if (bpp == 24)
188		bpp = 8;
189
190	offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
191	pitch = exaGetPixmapPitch(pPix);
192
193	return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
194}
195
196/*
197 * Used for vblank render stalling.
198 * Ideally we'd have one pixmap per crtc.
199 * syncing per-blit is unrealistic so,
200 * we sync to whichever crtc has a larger area.
201 */
202int RADEONBiggerCrtcArea(PixmapPtr pPix)
203{
204    ScrnInfoPtr pScrn =  xf86Screens[pPix->drawable.pScreen->myNum];
205    xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
206    int c, crtc_num = -1, area = 0;
207
208    for (c = 0; c < xf86_config->num_crtc; c++) {
209	xf86CrtcPtr crtc = xf86_config->crtc[c];
210
211	if (!crtc->enabled)
212	    continue;
213
214	if ((crtc->mode.HDisplay * crtc->mode.VDisplay) > area) {
215	    area = crtc->mode.HDisplay * crtc->mode.VDisplay;
216	    crtc_num = c;
217	}
218    }
219
220    return crtc_num;
221}
222
223#if X_BYTE_ORDER == X_BIG_ENDIAN
224
225static unsigned long swapper_surfaces[6];
226
227static Bool RADEONPrepareAccess(PixmapPtr pPix, int index)
228{
229    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
230    unsigned char *RADEONMMIO = info->MMIO;
231    uint32_t offset = exaGetPixmapOffset(pPix);
232    int bpp, soff;
233    uint32_t size, flags;
234
235    /* Front buffer is always set with proper swappers */
236    if (offset == 0)
237        return TRUE;
238
239    /* If same bpp as front buffer, just do nothing as the main
240     * swappers will apply
241     */
242    bpp = pPix->drawable.bitsPerPixel;
243    if (bpp == pScrn->bitsPerPixel)
244        return TRUE;
245
246    /* We need to setup a separate swapper, let's request a
247     * surface. We need to align the size first
248     */
249    size = exaGetPixmapSize(pPix);
250    size = (size + RADEON_BUFFER_ALIGN) & ~(RADEON_BUFFER_ALIGN);
251
252    /* Set surface to tiling disabled with appropriate swapper */
253    switch (bpp) {
254    case 16:
255        flags = RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
256	break;
257    case 32:
258        flags = RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
259	break;
260    default:
261        flags = 0;
262    }
263#if defined(XF86DRI)
264    if (info->directRenderingEnabled && info->allowColorTiling) {
265	struct drm_radeon_surface_alloc drmsurfalloc;
266	int rc;
267
268        drmsurfalloc.address = offset;
269        drmsurfalloc.size = size;
270	drmsurfalloc.flags = flags | 1; /* bogus pitch to please DRM */
271
272        rc = drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_ALLOC,
273			     &drmsurfalloc, sizeof(drmsurfalloc));
274	if (rc < 0) {
275	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
276		       "drm: could not allocate surface for access"
277		       " swapper, err: %d!\n", rc);
278	    return FALSE;
279	}
280	swapper_surfaces[index] = offset;
281
282	return TRUE;
283    }
284#endif
285    soff = (index + 1) * 0x10;
286    OUTREG(RADEON_SURFACE0_INFO + soff, flags);
287    OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset);
288    OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1);
289    swapper_surfaces[index] = offset;
290    return TRUE;
291}
292
293static void RADEONFinishAccess(PixmapPtr pPix, int index)
294{
295    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
296    unsigned char *RADEONMMIO = info->MMIO;
297    uint32_t offset = exaGetPixmapOffset(pPix);
298    int soff;
299
300    /* Front buffer is always set with proper swappers */
301    if (offset == 0)
302        return;
303
304    if (swapper_surfaces[index] == 0)
305        return;
306#if defined(XF86DRI)
307    if (info->directRenderingEnabled && info->allowColorTiling) {
308	struct drm_radeon_surface_free drmsurffree;
309
310	drmsurffree.address = offset;
311	drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_FREE,
312			&drmsurffree, sizeof(drmsurffree));
313	swapper_surfaces[index] = 0;
314	return;
315    }
316#endif
317    soff = (index + 1) * 0x10;
318    OUTREG(RADEON_SURFACE0_INFO + soff, 0);
319    OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0);
320    OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0);
321    swapper_surfaces[index] = 0;
322}
323
324#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
325
326#define ENTER_DRAW(x) TRACE
327#define LEAVE_DRAW(x) TRACE
328/***********************************************************************/
329
330#define ACCEL_MMIO
331#define ACCEL_PREAMBLE()	unsigned char *RADEONMMIO = info->MMIO
332#define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
333#define OUT_ACCEL_REG(reg, val)	OUTREG(reg, val)
334#define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val))
335#define FINISH_ACCEL()
336
337#ifdef RENDER
338#include "radeon_exa_render.c"
339#endif
340#include "radeon_exa_funcs.c"
341
342#undef ACCEL_MMIO
343#undef ACCEL_PREAMBLE
344#undef BEGIN_ACCEL
345#undef OUT_ACCEL_REG
346#undef OUT_ACCEL_REG_F
347#undef FINISH_ACCEL
348
349#ifdef XF86DRI
350
351#define ACCEL_CP
352#define ACCEL_PREAMBLE()						\
353    RING_LOCALS;							\
354    RADEONCP_REFRESH(pScrn, info)
355#define BEGIN_ACCEL(n)		BEGIN_RING(2*(n))
356#define OUT_ACCEL_REG(reg, val)	OUT_RING_REG(reg, val)
357#define FINISH_ACCEL()		ADVANCE_RING()
358
359#define OUT_RING_F(x) OUT_RING(F_TO_DW(x))
360
361#ifdef RENDER
362#include "radeon_exa_render.c"
363#endif
364#include "radeon_exa_funcs.c"
365
366#undef ACCEL_CP
367#undef ACCEL_PREAMBLE
368#undef BEGIN_ACCEL
369#undef OUT_ACCEL_REG
370#undef FINISH_ACCEL
371#undef OUT_RING_F
372
373#endif /* XF86DRI */
374
375/*
376 * Once screen->off_screen_base is set, this function
377 * allocates the remaining memory appropriately
378 */
379Bool RADEONSetupMemEXA (ScreenPtr pScreen)
380{
381    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
382    RADEONInfoPtr info = RADEONPTR(pScrn);
383    xf86CrtcConfigPtr   xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
384    int cpp = info->CurrentLayout.pixel_bytes;
385    int screen_size;
386    int byteStride = pScrn->displayWidth * cpp;
387
388    if (info->accel_state->exa != NULL) {
389	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map already initialized\n");
390	return FALSE;
391    }
392    info->accel_state->exa = exaDriverAlloc();
393    if (info->accel_state->exa == NULL)
394	return FALSE;
395
396    /* Need to adjust screen size for 16 line tiles, and then make it align to.
397     * the buffer alignment requirement.
398     */
399    if (info->allowColorTiling)
400	screen_size = RADEON_ALIGN(pScrn->virtualY, 16) * byteStride;
401    else
402	screen_size = pScrn->virtualY * byteStride;
403
404    info->accel_state->exa->memoryBase = info->FB;
405    info->accel_state->exa->memorySize = info->FbMapSize - info->FbSecureSize;
406    info->accel_state->exa->offScreenBase = screen_size;
407
408    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating from a screen of %ld kb\n",
409	       info->accel_state->exa->memorySize / 1024);
410
411    /* Reserve static area for hardware cursor */
412    if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) {
413        int cursor_size = 64 * 4 * 64;
414        int align = IS_AVIVO_VARIANT ? 4096 : 256;
415        int c;
416
417        for (c = 0; c < xf86_config->num_crtc; c++) {
418            xf86CrtcPtr crtc = xf86_config->crtc[c];
419            RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
420
421            radeon_crtc->cursor_offset =
422                RADEON_ALIGN(info->accel_state->exa->offScreenBase, align);
423            info->accel_state->exa->offScreenBase = radeon_crtc->cursor_offset + cursor_size;
424
425            xf86DrvMsg(pScrn->scrnIndex, X_INFO,
426                       "Will use %d kb for hardware cursor %d at offset 0x%08x\n",
427                       (cursor_size * xf86_config->num_crtc) / 1024,
428                       c,
429                       (unsigned int)radeon_crtc->cursor_offset);
430        }
431    }
432
433#if defined(XF86DRI)
434    if (info->directRenderingEnabled) {
435	int depthCpp = (info->dri->depthBits - 8) / 4, l, next, depth_size;
436
437	info->dri->frontOffset = 0;
438	info->dri->frontPitch = pScrn->displayWidth;
439
440        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
441	       "Will use %d kb for front buffer at offset 0x%08x\n",
442	       screen_size / 1024, info->dri->frontOffset);
443	RADEONDRIAllocatePCIGARTTable(pScreen);
444
445	if (info->cardType==CARD_PCIE)
446	  xf86DrvMsg(pScrn->scrnIndex, X_INFO,
447		     "Will use %d kb for PCI GART at offset 0x%08x\n",
448		     info->dri->pciGartSize / 1024,
449		     (int)info->dri->pciGartOffset);
450
451	/* Reserve a static area for the back buffer the same size as the
452	 * visible screen.  XXX: This would be better initialized in ati_dri.c
453	 * when GLX is set up, but the offscreen memory manager's allocations
454	 * don't last through VT switches, while the kernel's understanding of
455	 * offscreen locations does.
456	 */
457	info->dri->backPitch = pScrn->displayWidth;
458	next = RADEON_ALIGN(info->accel_state->exa->offScreenBase, RADEON_BUFFER_ALIGN);
459	if (!info->dri->noBackBuffer &&
460	    next + screen_size <= info->accel_state->exa->memorySize)
461	{
462	    info->dri->backOffset = next;
463	    info->accel_state->exa->offScreenBase = next + screen_size;
464	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
465		       "Will use %d kb for back buffer at offset 0x%08x\n",
466		       screen_size / 1024, info->dri->backOffset);
467	}
468
469	/* Reserve the static depth buffer, and adjust pitch and height to
470	 * handle tiling.
471	 */
472	info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
473	depth_size = RADEON_ALIGN(pScrn->virtualY, 16) * info->dri->depthPitch * depthCpp;
474	next = RADEON_ALIGN(info->accel_state->exa->offScreenBase, RADEON_BUFFER_ALIGN);
475	if (next + depth_size <= info->accel_state->exa->memorySize)
476	{
477	    info->dri->depthOffset = next;
478	    info->accel_state->exa->offScreenBase = next + depth_size;
479	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
480		       "Will use %d kb for depth buffer at offset 0x%08x\n",
481		       depth_size / 1024, info->dri->depthOffset);
482	}
483
484	info->dri->textureSize *= (info->accel_state->exa->memorySize -
485				   info->accel_state->exa->offScreenBase) / 100;
486
487	l = RADEONLog2(info->dri->textureSize / RADEON_NR_TEX_REGIONS);
488	if (l < RADEON_LOG_TEX_GRANULARITY)
489	    l = RADEON_LOG_TEX_GRANULARITY;
490	info->dri->textureSize = (info->dri->textureSize >> l) << l;
491	if (info->dri->textureSize >= 512 * 1024) {
492	    info->dri->textureOffset = info->accel_state->exa->offScreenBase;
493	    info->accel_state->exa->offScreenBase += info->dri->textureSize;
494	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
495		       "Will use %d kb for textures at offset 0x%08x\n",
496		       info->dri->textureSize / 1024, info->dri->textureOffset);
497	} else {
498	    /* Minimum texture size is for 2 256x256x32bpp textures */
499	    info->dri->textureSize = 0;
500	}
501    } else
502#endif /* XF86DRI */
503    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
504		       "Will use %d kb for front buffer at offset 0x%08x\n",
505		       screen_size / 1024, 0);
506
507    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
508	       "Will use %ld kb for X Server offscreen at offset 0x%08lx\n",
509	       (info->accel_state->exa->memorySize - info->accel_state->exa->offScreenBase) /
510	       1024, info->accel_state->exa->offScreenBase);
511
512    return TRUE;
513}
514
515#ifdef XF86DRI
516
517#ifndef ExaOffscreenMarkUsed
518extern void ExaOffscreenMarkUsed(PixmapPtr);
519#endif
520
521unsigned long long
522RADEONTexOffsetStart(PixmapPtr pPix)
523{
524    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
525    unsigned long long offset;
526    exaMoveInPixmap(pPix);
527    ExaOffscreenMarkUsed(pPix);
528
529    offset = exaGetPixmapOffset(pPix);
530
531    if (offset > info->FbMapSize)
532	return ~0ULL;
533    else
534	return info->fbLocation + offset;
535}
536#endif
537