radeon_accel.c revision c503f109
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78				/* Driver data structures */
79#include "radeon.h"
80#include "radeon_reg.h"
81#include "r600_reg.h"
82#include "radeon_macros.h"
83#include "radeon_probe.h"
84#include "radeon_version.h"
85#ifdef XF86DRI
86#define _XF86DRI_SERVER_
87#include "radeon_drm.h"
88#endif
89
90#include "ati_pciids_gen.h"
91
92				/* Line support */
93#include "miline.h"
94
95				/* X and server generic header files */
96#include "xf86.h"
97
98static void R600EngineReset(ScrnInfoPtr pScrn);
99
100#ifdef USE_XAA
101static struct {
102    int rop;
103    int pattern;
104} RADEON_ROP[] = {
105    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
106    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
107    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
108    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
109    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
110    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
111    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
112    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
113    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
114    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
115    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
116    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
117    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
118    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
119    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
120    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
121};
122#endif
123
124/* The FIFO has 64 slots.  This routines waits until at least `entries'
125 * of these slots are empty.
126 */
127void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
128{
129    RADEONInfoPtr  info       = RADEONPTR(pScrn);
130    unsigned char *RADEONMMIO = info->MMIO;
131    int            i;
132
133    for (;;) {
134	for (i = 0; i < RADEON_TIMEOUT; i++) {
135	    info->accel_state->fifo_slots =
136		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
137	    if (info->accel_state->fifo_slots >= entries) return;
138	}
139	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
140		       "FIFO timed out: %u entries, stat=0x%08x\n",
141		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
142		       (unsigned int)INREG(RADEON_RBBM_STATUS));
143	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
144		   "FIFO timed out, resetting engine...\n");
145	RADEONEngineReset(pScrn);
146	RADEONEngineRestore(pScrn);
147#ifdef XF86DRI
148	if (info->directRenderingEnabled) {
149	    RADEONCP_RESET(pScrn, info);
150	    RADEONCP_START(pScrn, info);
151	}
152#endif
153    }
154}
155
156void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
157{
158    RADEONInfoPtr  info       = RADEONPTR(pScrn);
159    unsigned char *RADEONMMIO = info->MMIO;
160    int            i;
161
162    for (;;) {
163	for (i = 0; i < RADEON_TIMEOUT; i++) {
164	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
165		info->accel_state->fifo_slots =
166		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
167	    else
168		info->accel_state->fifo_slots =
169		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
170	    if (info->accel_state->fifo_slots >= entries) return;
171	}
172	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
173		       "FIFO timed out: stat=0x%08x\n",
174		       (unsigned int)INREG(R600_GRBM_STATUS));
175	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
176		   "FIFO timed out, resetting engine...\n");
177	R600EngineReset(pScrn);
178#ifdef XF86DRI
179	if (info->directRenderingEnabled) {
180	    RADEONCP_RESET(pScrn, info);
181	    RADEONCP_START(pScrn, info);
182	}
183#endif
184    }
185}
186
187/* Flush all dirty data in the Pixel Cache to memory */
188void RADEONEngineFlush(ScrnInfoPtr pScrn)
189{
190    RADEONInfoPtr  info       = RADEONPTR(pScrn);
191    unsigned char *RADEONMMIO = info->MMIO;
192    int            i;
193
194    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
195	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
196		RADEON_RB3D_DC_FLUSH_ALL,
197		~RADEON_RB3D_DC_FLUSH_ALL);
198	for (i = 0; i < RADEON_TIMEOUT; i++) {
199	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
200		break;
201	}
202	if (i == RADEON_TIMEOUT) {
203	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
204			   "DC flush timeout: %x\n",
205			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
206	}
207    } else {
208	OUTREGP(R300_DSTCACHE_CTLSTAT,
209		R300_RB2D_DC_FLUSH_ALL,
210		~R300_RB2D_DC_FLUSH_ALL);
211	for (i = 0; i < RADEON_TIMEOUT; i++) {
212	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
213		break;
214	}
215	if (i == RADEON_TIMEOUT) {
216	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
217			   "DC flush timeout: %x\n",
218			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
219	}
220    }
221}
222
223/* Reset graphics card to known state */
224void RADEONEngineReset(ScrnInfoPtr pScrn)
225{
226    RADEONInfoPtr  info       = RADEONPTR(pScrn);
227    unsigned char *RADEONMMIO = info->MMIO;
228    uint32_t       clock_cntl_index;
229    uint32_t       mclk_cntl;
230    uint32_t       rbbm_soft_reset;
231    uint32_t       host_path_cntl;
232
233    /* The following RBBM_SOFT_RESET sequence can help un-wedge
234     * an R300 after the command processor got stuck.
235     */
236    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
237    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
238                                   RADEON_SOFT_RESET_CP |
239                                   RADEON_SOFT_RESET_HI |
240                                   RADEON_SOFT_RESET_SE |
241                                   RADEON_SOFT_RESET_RE |
242                                   RADEON_SOFT_RESET_PP |
243                                   RADEON_SOFT_RESET_E2 |
244                                   RADEON_SOFT_RESET_RB));
245    INREG(RADEON_RBBM_SOFT_RESET);
246    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
247                                   ~(RADEON_SOFT_RESET_CP |
248                                     RADEON_SOFT_RESET_HI |
249                                     RADEON_SOFT_RESET_SE |
250                                     RADEON_SOFT_RESET_RE |
251                                     RADEON_SOFT_RESET_PP |
252                                     RADEON_SOFT_RESET_E2 |
253                                     RADEON_SOFT_RESET_RB)));
254    INREG(RADEON_RBBM_SOFT_RESET);
255    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
256    INREG(RADEON_RBBM_SOFT_RESET);
257
258    RADEONEngineFlush(pScrn);
259
260    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
261    RADEONPllErrataAfterIndex(info);
262
263#if 0 /* taken care of by new PM code */
264    /* Some ASICs have bugs with dynamic-on feature, which are
265     * ASIC-version dependent, so we force all blocks on for now
266     */
267    if (info->HasCRTC2) {
268	uint32_t tmp;
269
270	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
271	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
272				  RADEON_CP_MAX_DYN_STOP_LAT |
273				  RADEON_SCLK_FORCEON_MASK));
274
275	if (info->ChipFamily == CHIP_FAMILY_RV200) {
276	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
277	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
278	}
279    }
280#endif /* new PM code */
281
282    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
283
284#if 0 /* handled by new PM code */
285    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
286			      RADEON_FORCEON_MCLKA |
287			      RADEON_FORCEON_MCLKB |
288			      RADEON_FORCEON_YCLKA |
289			      RADEON_FORCEON_YCLKB |
290			      RADEON_FORCEON_MC |
291			      RADEON_FORCEON_AIC));
292#endif /* new PM code */
293
294    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
295     * unexpected behaviour on some machines.  Here we use
296     * RADEON_HOST_PATH_CNTL to reset it.
297     */
298    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
299    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
300
301    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
302	uint32_t tmp;
303
304	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
305					RADEON_SOFT_RESET_CP |
306					RADEON_SOFT_RESET_HI |
307					RADEON_SOFT_RESET_E2));
308	INREG(RADEON_RBBM_SOFT_RESET);
309	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
310	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
311	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
312    } else {
313	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
314					RADEON_SOFT_RESET_CP |
315					RADEON_SOFT_RESET_SE |
316					RADEON_SOFT_RESET_RE |
317					RADEON_SOFT_RESET_PP |
318					RADEON_SOFT_RESET_E2 |
319					RADEON_SOFT_RESET_RB));
320	INREG(RADEON_RBBM_SOFT_RESET);
321	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
322					~(RADEON_SOFT_RESET_CP |
323					  RADEON_SOFT_RESET_SE |
324					  RADEON_SOFT_RESET_RE |
325					  RADEON_SOFT_RESET_PP |
326					  RADEON_SOFT_RESET_E2 |
327					  RADEON_SOFT_RESET_RB)));
328	INREG(RADEON_RBBM_SOFT_RESET);
329    }
330
331    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
332    INREG(RADEON_HOST_PATH_CNTL);
333    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
334
335    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
336	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
337
338    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
339    RADEONPllErrataAfterIndex(info);
340    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
341}
342
343/* Reset graphics card to known state */
344static void R600EngineReset(ScrnInfoPtr pScrn)
345{
346    RADEONInfoPtr  info       = RADEONPTR(pScrn);
347    unsigned char *RADEONMMIO = info->MMIO;
348    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
349
350    cp_ptr = INREG(R600_CP_RB_WPTR);
351
352    cp_me_cntl = INREG(R600_CP_ME_CNTL);
353    OUTREG(R600_CP_ME_CNTL, 0x10000000);
354
355    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
356    INREG(R600_GRBM_SOFT_RESET);
357    usleep (50);
358    OUTREG(R600_GRBM_SOFT_RESET, 0);
359    INREG(R600_GRBM_SOFT_RESET);
360
361    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
362    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
363    OUTREG(R600_CP_RB_CNTL, 0x80000000);
364
365    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
366    OUTREG(R600_CP_RB_WPTR, cp_ptr);
367    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
368    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
369
370}
371
372/* Restore the acceleration hardware to its previous state */
373void RADEONEngineRestore(ScrnInfoPtr pScrn)
374{
375    RADEONInfoPtr  info       = RADEONPTR(pScrn);
376    unsigned char *RADEONMMIO = info->MMIO;
377
378    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
379		   "EngineRestore (%d/%d)\n",
380		   info->CurrentLayout.pixel_code,
381		   info->CurrentLayout.bitsPerPixel);
382
383    /* Setup engine location. This shouldn't be necessary since we
384     * set them appropriately before any accel ops, but let's avoid
385     * random bogus DMA in case we inadvertently trigger the engine
386     * in the wrong place (happened).
387     */
388    RADEONWaitForFifo(pScrn, 2);
389    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
391
392    RADEONWaitForFifo(pScrn, 1);
393#if X_BYTE_ORDER == X_BIG_ENDIAN
394    OUTREGP(RADEON_DP_DATATYPE,
395	    RADEON_HOST_BIG_ENDIAN_EN,
396	    ~RADEON_HOST_BIG_ENDIAN_EN);
397#else
398    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
399#endif
400
401    /* Restore SURFACE_CNTL */
402    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
403
404    RADEONWaitForFifo(pScrn, 1);
405    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
406					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
407    RADEONWaitForFifo(pScrn, 1);
408    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
409				       | RADEON_GMC_BRUSH_SOLID_COLOR
410				       | RADEON_GMC_SRC_DATATYPE_COLOR));
411
412    RADEONWaitForFifo(pScrn, 5);
413    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
414    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
415    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
416    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
417    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
418
419    RADEONWaitForIdleMMIO(pScrn);
420
421    info->accel_state->XInited3D = FALSE;
422}
423
424/* Initialize the acceleration hardware */
425void RADEONEngineInit(ScrnInfoPtr pScrn)
426{
427    RADEONInfoPtr  info       = RADEONPTR(pScrn);
428    unsigned char *RADEONMMIO = info->MMIO;
429    int datatype = 0;
430    info->accel_state->num_gb_pipes = 0;
431
432    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
433		   "EngineInit (%d/%d)\n",
434		   info->CurrentLayout.pixel_code,
435		   info->CurrentLayout.bitsPerPixel);
436
437#ifdef XF86DRI
438    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
439	drm_radeon_getparam_t np;
440	int num_pipes;
441
442	memset(&np, 0, sizeof(np));
443	np.param = RADEON_PARAM_NUM_GB_PIPES;
444	np.value = &num_pipes;
445
446	if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np,
447				sizeof(np)) < 0) {
448	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
449		       "Failed to determine num pipes from DRM, falling back to "
450		       "manual look-up!\n");
451	    info->accel_state->num_gb_pipes = 0;
452	} else {
453	    info->accel_state->num_gb_pipes = num_pipes;
454	}
455    }
456#endif
457
458    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
459	(info->ChipFamily == CHIP_FAMILY_R420)  ||
460	(info->ChipFamily == CHIP_FAMILY_RS600) ||
461	(info->ChipFamily == CHIP_FAMILY_RS690) ||
462	(info->ChipFamily == CHIP_FAMILY_RS740) ||
463	(info->ChipFamily == CHIP_FAMILY_RS400) ||
464	(info->ChipFamily == CHIP_FAMILY_RS480) ||
465	IS_R500_3D) {
466	if (info->accel_state->num_gb_pipes == 0) {
467	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
468
469	    info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
470	    if (IS_R500_3D)
471		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
472	}
473    } else {
474	if (info->accel_state->num_gb_pipes == 0) {
475	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
476		(info->ChipFamily == CHIP_FAMILY_R350)) {
477		/* R3xx chips */
478		info->accel_state->num_gb_pipes = 2;
479	    } else {
480		/* RV3xx chips */
481		info->accel_state->num_gb_pipes = 1;
482	    }
483	}
484    }
485
486    /* RV410 SE cards only have 1 quadpipe */
487    if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
488	(info->Chipset == PCI_CHIP_RV410_5E4F))
489	info->accel_state->num_gb_pipes = 1;
490
491    if (IS_R300_3D || IS_R500_3D)
492	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
493		   "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
494
495    if (IS_R300_3D || IS_R500_3D) {
496	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
497
498	switch(info->accel_state->num_gb_pipes) {
499	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
500	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
501	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
502	default:
503	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
504	}
505
506	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
507	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
508	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
509	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
510					 R300_DC_AUTOFLUSH_ENABLE |
511					 R300_DC_DC_DISABLE_IGNORE_PE));
512    } else
513	OUTREG(RADEON_RB3D_CNTL, 0);
514
515    RADEONEngineReset(pScrn);
516
517    switch (info->CurrentLayout.pixel_code) {
518    case 8:  datatype = 2; break;
519    case 15: datatype = 3; break;
520    case 16: datatype = 4; break;
521    case 24: datatype = 5; break;
522    case 32: datatype = 6; break;
523    default:
524	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
525		       "Unknown depth/bpp = %d/%d (code = %d)\n",
526		       info->CurrentLayout.depth,
527		       info->CurrentLayout.bitsPerPixel,
528		       info->CurrentLayout.pixel_code);
529    }
530
531    info->accel_state->dp_gui_master_cntl =
532	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
533	 | RADEON_GMC_CLR_CMP_CNTL_DIS
534	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
535
536    RADEONEngineRestore(pScrn);
537}
538
539
540#define ACCEL_MMIO
541#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
542#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
543#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
544#define FINISH_ACCEL()
545
546#include "radeon_commonfuncs.c"
547#if defined(RENDER) && defined(USE_XAA)
548#include "radeon_render.c"
549#endif
550#include "radeon_accelfuncs.c"
551
552#undef ACCEL_MMIO
553#undef ACCEL_PREAMBLE
554#undef BEGIN_ACCEL
555#undef OUT_ACCEL_REG
556#undef FINISH_ACCEL
557
558#ifdef XF86DRI
559
560#define ACCEL_CP
561#define ACCEL_PREAMBLE()						\
562    RING_LOCALS;							\
563    RADEONCP_REFRESH(pScrn, info)
564#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
565#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
566#define FINISH_ACCEL()          ADVANCE_RING()
567
568
569#include "radeon_commonfuncs.c"
570#if defined(RENDER) && defined(USE_XAA)
571#include "radeon_render.c"
572#endif
573#include "radeon_accelfuncs.c"
574
575#undef ACCEL_CP
576#undef ACCEL_PREAMBLE
577#undef BEGIN_ACCEL
578#undef OUT_ACCEL_REG
579#undef FINISH_ACCEL
580
581/* Stop the CP */
582int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
583{
584    drm_radeon_cp_stop_t  stop;
585    int              ret, i;
586
587    stop.flush = 1;
588    stop.idle  = 1;
589
590    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
591			  sizeof(drm_radeon_cp_stop_t));
592
593    if (ret == 0) {
594	return 0;
595    } else if (errno != EBUSY) {
596	return -errno;
597    }
598
599    stop.flush = 0;
600
601    i = 0;
602    do {
603	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
604			      sizeof(drm_radeon_cp_stop_t));
605    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
606
607    if (ret == 0) {
608	return 0;
609    } else if (errno != EBUSY) {
610	return -errno;
611    }
612
613    stop.idle = 0;
614
615    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
616			&stop, sizeof(drm_radeon_cp_stop_t))) {
617	return -errno;
618    } else {
619	return 0;
620    }
621}
622
623/* Get an indirect buffer for the CP 2D acceleration commands  */
624drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
625{
626    RADEONInfoPtr  info = RADEONPTR(pScrn);
627    drmDMAReq      dma;
628    drmBufPtr      buf = NULL;
629    int            indx = 0;
630    int            size = 0;
631    int            i = 0;
632    int            ret;
633
634#if 0
635    /* FIXME: pScrn->pScreen has not been initialized when this is first
636     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
637     * the screen index from pScrn, which is initialized, and then get
638     * the screen from screenInfo.screens[index], but that is a hack.
639     */
640    dma.context = DRIGetContext(pScrn->pScreen);
641#else
642    /* This is the X server's context */
643    dma.context = 0x00000001;
644#endif
645
646    dma.send_count    = 0;
647    dma.send_list     = NULL;
648    dma.send_sizes    = NULL;
649    dma.flags         = 0;
650    dma.request_count = 1;
651    dma.request_size  = RADEON_BUFFER_SIZE;
652    dma.request_list  = &indx;
653    dma.request_sizes = &size;
654    dma.granted_count = 0;
655
656    while (1) {
657	do {
658	    ret = drmDMA(info->dri->drmFD, &dma);
659	    if (ret && ret != -EBUSY) {
660		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
661			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
662	    }
663	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
664
665	if (ret == 0) {
666	    buf = &info->dri->buffers->list[indx];
667	    buf->used = 0;
668	    if (RADEON_VERBOSE) {
669		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
670			   "   GetBuffer returning %d %p\n",
671			   buf->idx, buf->address);
672	    }
673	    return buf;
674	}
675
676	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
677		   "GetBuffer timed out, resetting engine...\n");
678
679	if (info->ChipFamily < CHIP_FAMILY_R600) {
680	    RADEONEngineReset(pScrn);
681	    RADEONEngineRestore(pScrn);
682	} else
683	    R600EngineReset(pScrn);
684
685	/* Always restart the engine when doing CP 2D acceleration */
686	RADEONCP_RESET(pScrn, info);
687	RADEONCP_START(pScrn, info);
688    }
689}
690
691/* Flush the indirect buffer to the kernel for submission to the card */
692void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
693{
694    RADEONInfoPtr      info   = RADEONPTR(pScrn);
695    drmBufPtr          buffer = info->cp->indirectBuffer;
696    int                start  = info->cp->indirectStart;
697    drm_radeon_indirect_t  indirect;
698
699    if (!buffer) return;
700    if (start == buffer->used && !discard) return;
701
702    if (RADEON_VERBOSE) {
703	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
704		   buffer->idx);
705    }
706
707    if (info->ChipFamily >= CHIP_FAMILY_R600) {
708	if (buffer->used & 0x3c) {
709	    RING_LOCALS;
710
711	    while (buffer->used & 0x3c) {
712		BEGIN_RING(1);
713		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
714		ADVANCE_RING();
715	    }
716	}
717    }
718
719    indirect.idx     = buffer->idx;
720    indirect.start   = start;
721    indirect.end     = buffer->used;
722    indirect.discard = discard;
723
724    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
725			&indirect, sizeof(drm_radeon_indirect_t));
726
727    if (discard) {
728	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
729	info->cp->indirectStart  = 0;
730    } else {
731	/* Start on a double word boundary */
732	info->cp->indirectStart  = buffer->used = (buffer->used + 7) & ~7;
733	if (RADEON_VERBOSE) {
734	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
735		       info->cp->indirectStart);
736	}
737    }
738}
739
740/* Flush and release the indirect buffer */
741void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
742{
743    RADEONInfoPtr      info   = RADEONPTR(pScrn);
744    drmBufPtr          buffer = info->cp->indirectBuffer;
745    int                start  = info->cp->indirectStart;
746    drm_radeon_indirect_t  indirect;
747
748    if (info->ChipFamily >= CHIP_FAMILY_R600) {
749	if (buffer && (buffer->used & 0x3c)) {
750	    RING_LOCALS;
751
752	    while (buffer->used & 0x3c) {
753		BEGIN_RING(1);
754		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
755		ADVANCE_RING();
756	    }
757	}
758    }
759
760    info->cp->indirectBuffer = NULL;
761    info->cp->indirectStart  = 0;
762
763    if (!buffer) return;
764
765    if (RADEON_VERBOSE) {
766	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
767		   buffer->idx);
768    }
769
770    indirect.idx     = buffer->idx;
771    indirect.start   = start;
772    indirect.end     = buffer->used;
773    indirect.discard = 1;
774
775    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
776			&indirect, sizeof(drm_radeon_indirect_t));
777}
778
779/** \brief Calculate HostDataBlit parameters from pointer and pitch
780 *
781 * This is a helper for the trivial HostDataBlit users that don't need to worry
782 * about tiling etc.
783 */
784void
785RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
786		     uint32_t *dstPitchOff, int *x, int *y)
787{
788    RADEONInfoPtr info = RADEONPTR( pScrn );
789    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
790
791    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
792    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
793    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
794}
795
796/* Set up a hostdata blit to transfer data from system memory to the
797 * framebuffer. Returns the address where the data can be written to and sets
798 * the dstPitch and hpass variables as required.
799 */
800uint8_t*
801RADEONHostDataBlit(
802    ScrnInfoPtr pScrn,
803    unsigned int cpp,
804    unsigned int w,
805    uint32_t dstPitchOff,
806    uint32_t *bufPitch,
807    int x,
808    int *y,
809    unsigned int *h,
810    unsigned int *hpass
811){
812    RADEONInfoPtr info = RADEONPTR( pScrn );
813    uint32_t format, dwords;
814    uint8_t *ret;
815    RING_LOCALS;
816
817    if ( *h == 0 )
818    {
819	return NULL;
820    }
821
822    switch ( cpp )
823    {
824    case 4:
825	format = RADEON_GMC_DST_32BPP;
826	*bufPitch = 4 * w;
827	break;
828    case 2:
829	format = RADEON_GMC_DST_16BPP;
830	*bufPitch = 2 * ((w + 1) & ~1);
831	break;
832    case 1:
833	format = RADEON_GMC_DST_8BPP_CI;
834	*bufPitch = (w + 3) & ~3;
835	break;
836    default:
837	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
838		    "%s: Unsupported cpp %d!\n", __func__, cpp );
839	return NULL;
840    }
841
842#if X_BYTE_ORDER == X_BIG_ENDIAN
843    /* Swap doesn't work on R300 and later, it's handled during the
844     * copy to ind. buffer pass
845     */
846    if (info->ChipFamily < CHIP_FAMILY_R300) {
847        BEGIN_RING(2);
848	if (cpp == 2)
849	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
850			 RADEON_HOST_DATA_SWAP_HDW);
851	else if (cpp == 1)
852	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
853			 RADEON_HOST_DATA_SWAP_32BIT);
854	else
855	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
856			 RADEON_HOST_DATA_SWAP_NONE);
857	ADVANCE_RING();
858    }
859#endif
860
861    /*RADEON_PURGE_CACHE();
862      RADEON_WAIT_UNTIL_IDLE();*/
863
864    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
865    dwords = *hpass * *bufPitch / 4;
866
867    BEGIN_RING( dwords + 10 );
868    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
869    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
870	    | RADEON_GMC_DST_CLIPPING
871	    | RADEON_GMC_BRUSH_NONE
872	    | format
873	    | RADEON_GMC_SRC_DATATYPE_COLOR
874	    | RADEON_ROP3_S
875	    | RADEON_DP_SRC_SOURCE_HOST_DATA
876	    | RADEON_GMC_CLR_CMP_CNTL_DIS
877	    | RADEON_GMC_WR_MSK_DIS );
878    OUT_RING( dstPitchOff );
879    OUT_RING( (*y << 16) | x );
880    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
881    OUT_RING( 0xffffffff );
882    OUT_RING( 0xffffffff );
883    OUT_RING( *y << 16 | x );
884    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
885    OUT_RING( dwords );
886
887    ret = ( uint8_t* )&__head[__count];
888
889    __count += dwords;
890    ADVANCE_RING();
891
892    *y += *hpass;
893    *h -= *hpass;
894
895    return ret;
896}
897
898void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
899{
900    switch(swap) {
901    case RADEON_HOST_DATA_SWAP_HDW:
902        {
903	    unsigned int *d = (unsigned int *)dst;
904	    unsigned int *s = (unsigned int *)src;
905	    unsigned int nwords = size >> 2;
906
907	    for (; nwords > 0; --nwords, ++d, ++s)
908		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
909	    return;
910        }
911    case RADEON_HOST_DATA_SWAP_32BIT:
912        {
913	    unsigned int *d = (unsigned int *)dst;
914	    unsigned int *s = (unsigned int *)src;
915	    unsigned int nwords = size >> 2;
916
917	    for (; nwords > 0; --nwords, ++d, ++s)
918#ifdef __powerpc__
919		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
920#else
921		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
922			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
923#endif
924	    return;
925        }
926    case RADEON_HOST_DATA_SWAP_16BIT:
927        {
928	    unsigned short *d = (unsigned short *)dst;
929	    unsigned short *s = (unsigned short *)src;
930	    unsigned int nwords = size >> 1;
931
932	    for (; nwords > 0; --nwords, ++d, ++s)
933#ifdef __powerpc__
934		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
935#else
936	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
937			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
938#endif
939	    return;
940	}
941    }
942    if (src != dst)
943	    memmove(dst, src, size);
944}
945
946/* Copies a single pass worth of data for a hostdata blit set up by
947 * RADEONHostDataBlit().
948 */
949void
950RADEONHostDataBlitCopyPass(
951    ScrnInfoPtr pScrn,
952    unsigned int cpp,
953    uint8_t *dst,
954    uint8_t *src,
955    unsigned int hpass,
956    unsigned int dstPitch,
957    unsigned int srcPitch
958){
959
960#if X_BYTE_ORDER == X_BIG_ENDIAN
961    RADEONInfoPtr info = RADEONPTR( pScrn );
962#endif
963
964    /* RADEONHostDataBlitCopy can return NULL ! */
965    if( (dst==NULL) || (src==NULL)) return;
966
967    if ( dstPitch == srcPitch )
968    {
969#if X_BYTE_ORDER == X_BIG_ENDIAN
970        if (info->ChipFamily >= CHIP_FAMILY_R300) {
971	    switch(cpp) {
972	    case 1:
973		RADEONCopySwap(dst, src, hpass * dstPitch,
974			       RADEON_HOST_DATA_SWAP_32BIT);
975		return;
976	    case 2:
977	        RADEONCopySwap(dst, src, hpass * dstPitch,
978			       RADEON_HOST_DATA_SWAP_HDW);
979		return;
980	    }
981	}
982#endif
983	memcpy( dst, src, hpass * dstPitch );
984    }
985    else
986    {
987	unsigned int minPitch = min( dstPitch, srcPitch );
988	while ( hpass-- )
989	{
990#if X_BYTE_ORDER == X_BIG_ENDIAN
991            if (info->ChipFamily >= CHIP_FAMILY_R300) {
992		switch(cpp) {
993		case 1:
994		    RADEONCopySwap(dst, src, minPitch,
995				   RADEON_HOST_DATA_SWAP_32BIT);
996		    goto next;
997		case 2:
998	            RADEONCopySwap(dst, src, minPitch,
999				   RADEON_HOST_DATA_SWAP_HDW);
1000		    goto next;
1001		}
1002	    }
1003#endif
1004	    memcpy( dst, src, minPitch );
1005#if X_BYTE_ORDER == X_BIG_ENDIAN
1006	next:
1007#endif
1008	    src += srcPitch;
1009	    dst += dstPitch;
1010	}
1011    }
1012}
1013
1014#endif
1015
1016Bool RADEONAccelInit(ScreenPtr pScreen)
1017{
1018    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1019    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1020
1021#ifdef USE_EXA
1022    if (info->useEXA) {
1023# ifdef XF86DRI
1024	if (info->directRenderingEnabled) {
1025	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1026		if (!R600DrawInit(pScreen))
1027		    return FALSE;
1028	    } else {
1029		if (!RADEONDrawInitCP(pScreen))
1030		    return FALSE;
1031	    }
1032	} else
1033# endif /* XF86DRI */
1034	{
1035	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1036		return FALSE;
1037	    else {
1038		if (!RADEONDrawInitMMIO(pScreen))
1039		    return FALSE;
1040	    }
1041	}
1042    }
1043#endif /* USE_EXA */
1044#ifdef USE_XAA
1045    if (!info->useEXA) {
1046	XAAInfoRecPtr  a;
1047
1048	if (info->ChipFamily >= CHIP_FAMILY_R600)
1049	    return FALSE;
1050
1051	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1052	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1053	    return FALSE;
1054	}
1055
1056#ifdef XF86DRI
1057	if (info->directRenderingEnabled)
1058	    RADEONAccelInitCP(pScreen, a);
1059	else
1060#endif /* XF86DRI */
1061	    RADEONAccelInitMMIO(pScreen, a);
1062
1063	RADEONEngineInit(pScrn);
1064
1065	if (!XAAInit(pScreen, a)) {
1066	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1067	    return FALSE;
1068	}
1069    }
1070#endif /* USE_XAA */
1071    return TRUE;
1072}
1073
1074void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1075{
1076    RADEONInfoPtr info = RADEONPTR (pScrn);
1077
1078#ifdef XF86DRI
1079    if (info->directRenderingEnabled) {
1080	drm_radeon_sarea_t *pSAREAPriv;
1081
1082	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1083	pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1084	RADEONInit3DEngineCP(pScrn);
1085    } else
1086#endif
1087	RADEONInit3DEngineMMIO(pScrn);
1088
1089    info->accel_state->XInited3D = TRUE;
1090}
1091
1092#ifdef USE_XAA
1093#ifdef XF86DRI
1094Bool
1095RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1096{
1097    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1098    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1099    int            cpp = info->CurrentLayout.pixel_bytes;
1100    int            depthCpp = (info->dri->depthBits - 8) / 4;
1101    int            width_bytes = pScrn->displayWidth * cpp;
1102    int            bufferSize;
1103    int            depthSize;
1104    int            l;
1105    int            scanlines;
1106    int            texsizerequest;
1107    BoxRec         MemBox;
1108    FBAreaPtr      fbarea;
1109
1110    info->dri->frontOffset = 0;
1111    info->dri->frontPitch = pScrn->displayWidth;
1112    info->dri->backPitch = pScrn->displayWidth;
1113
1114    /* make sure we use 16 line alignment for tiling (8 might be enough).
1115     * Might need that for non-XF86DRI too?
1116     */
1117    if (info->allowColorTiling) {
1118	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
1119		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1120    } else {
1121        bufferSize = (pScrn->virtualY * width_bytes
1122		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1123    }
1124
1125    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1126     * which is always the case if color tiling is used due to color pitch
1127     * but not necessarily otherwise, and its height a multiple of 16 lines.
1128     */
1129    info->dri->depthPitch = (pScrn->displayWidth + 31) & ~31;
1130    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->dri->depthPitch
1131		  * depthCpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
1132
1133    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1134	       "Using %d MB GART aperture\n", info->dri->gartSize);
1135    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1136	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1137    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1138	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1139    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1140	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1141
1142    /* Try for front, back, depth, and three framebuffers worth of
1143     * pixmap cache.  Should be enough for a fullscreen background
1144     * image plus some leftovers.
1145     * If the FBTexPercent option was used, try to achieve that percentage instead,
1146     * but still have at least one pixmap buffer (get problems with xvideo/render
1147     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1148     * probably useless for XAA.
1149     */
1150    if (info->dri->textureSize >= 0) {
1151	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1152			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1153	/* first divide, then multiply or we'll get an overflow (been there...) */
1154			 / 100 * info->dri->textureSize;
1155    }
1156    else {
1157	texsizerequest = (int)info->FbMapSize / 2;
1158    }
1159    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1160
1161    /* If that gives us less than the requested memory, let's
1162     * be greedy and grab some more.  Sorry, I care more about 3D
1163     * performance than playing nicely, and you'll get around a full
1164     * framebuffer's worth of pixmap cache anyway.
1165     */
1166    if (info->dri->textureSize < texsizerequest) {
1167        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1168    }
1169    if (info->dri->textureSize < texsizerequest) {
1170        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1171    }
1172
1173    /* If there's still no space for textures, try without pixmap cache, but
1174     * never use the reserved space, the space hw cursor and PCIGART table might
1175     * use.
1176     */
1177    if (info->dri->textureSize < 0) {
1178	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1179	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1180    }
1181
1182    /* Check to see if there is more room available after the 8192nd
1183     * scanline for textures
1184     */
1185    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1186    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1187	> info->dri->textureSize) {
1188	info->dri->textureSize =
1189		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1190    }
1191
1192    /* If backbuffer is disabled, don't allocate memory for it */
1193    if (info->dri->noBackBuffer) {
1194	info->dri->textureSize += bufferSize;
1195    }
1196
1197    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1198       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1199       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1200       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1201       area otherwise).
1202       This might cause some space at the end of the video memory to be unused, since it
1203       can't be used (?) due to that log_tex_granularity thing???
1204       Could use different copyscreentoscreen function for the pageflip copies
1205       (which would use different src and dst offsets) to avoid this. */
1206    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1207	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1208			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1209    }
1210    if (info->dri->textureSize > 0) {
1211	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1212	if (l < RADEON_LOG_TEX_GRANULARITY)
1213	    l = RADEON_LOG_TEX_GRANULARITY;
1214	/* Round the texture size up to the nearest whole number of
1215	 * texture regions.  Again, be greedy about this, don't
1216	 * round down.
1217	 */
1218	info->dri->log2TexGran = l;
1219	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1220    } else {
1221	info->dri->textureSize = 0;
1222    }
1223
1224    /* Set a minimum usable local texture heap size.  This will fit
1225     * two 256x256x32bpp textures.
1226     */
1227    if (info->dri->textureSize < 512 * 1024) {
1228	info->dri->textureOffset = 0;
1229	info->dri->textureSize = 0;
1230    }
1231
1232    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1233	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1234				    (width_bytes * 16)) * (width_bytes * 16);
1235    }
1236    else {
1237	/* Reserve space for textures */
1238	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize +
1239				     RADEON_BUFFER_ALIGN) &
1240				    ~(uint32_t)RADEON_BUFFER_ALIGN);
1241    }
1242
1243    /* Reserve space for the shared depth
1244     * buffer.
1245     */
1246    info->dri->depthOffset = ((info->dri->textureOffset - depthSize +
1247			       RADEON_BUFFER_ALIGN) &
1248			      ~(uint32_t)RADEON_BUFFER_ALIGN);
1249
1250    /* Reserve space for the shared back buffer */
1251    if (info->dri->noBackBuffer) {
1252       info->dri->backOffset = info->dri->depthOffset;
1253    } else {
1254       info->dri->backOffset = ((info->dri->depthOffset - bufferSize +
1255				 RADEON_BUFFER_ALIGN) &
1256				~(uint32_t)RADEON_BUFFER_ALIGN);
1257    }
1258
1259    info->dri->backY = info->dri->backOffset / width_bytes;
1260    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1261
1262    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1263    if (scanlines > 8191)
1264	scanlines = 8191;
1265
1266    MemBox.x1 = 0;
1267    MemBox.y1 = 0;
1268    MemBox.x2 = pScrn->displayWidth;
1269    MemBox.y2 = scanlines;
1270
1271    if (!xf86InitFBManager(pScreen, &MemBox)) {
1272        xf86DrvMsg(scrnIndex, X_ERROR,
1273		   "Memory manager initialization to "
1274		   "(%d,%d) (%d,%d) failed\n",
1275		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1276	return FALSE;
1277    } else {
1278	int  width, height;
1279
1280	xf86DrvMsg(scrnIndex, X_INFO,
1281		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1282		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1283	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1284	   aligned... sigh */
1285	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1286						pScrn->displayWidth,
1287						info->allowColorTiling ?
1288						((pScrn->virtualY + 15) & ~15)
1289						- pScrn->virtualY + 2 : 2,
1290						0, NULL, NULL,
1291						NULL))) {
1292	    xf86DrvMsg(scrnIndex, X_INFO,
1293		       "Reserved area from (%d,%d) to (%d,%d)\n",
1294		       fbarea->box.x1, fbarea->box.y1,
1295		       fbarea->box.x2, fbarea->box.y2);
1296	} else {
1297	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1298	}
1299
1300	RADEONDRIAllocatePCIGARTTable(pScreen);
1301
1302	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1303					  &height, 0, 0, 0)) {
1304	    xf86DrvMsg(scrnIndex, X_INFO,
1305		       "Largest offscreen area available: %d x %d\n",
1306		       width, height);
1307
1308	    /* Lines in offscreen area needed for depth buffer and
1309	     * textures
1310	     */
1311	    info->dri->depthTexLines = (scanlines
1312					- info->dri->depthOffset / width_bytes);
1313	    info->dri->backLines	    = (scanlines
1314					       - info->dri->backOffset / width_bytes
1315					       - info->dri->depthTexLines);
1316	    info->dri->backArea	    = NULL;
1317	} else {
1318	    xf86DrvMsg(scrnIndex, X_ERROR,
1319		       "Unable to determine largest offscreen area "
1320		       "available\n");
1321	    return FALSE;
1322	}
1323    }
1324
1325    xf86DrvMsg(scrnIndex, X_INFO,
1326	       "Will use front buffer at offset 0x%x\n",
1327	       info->dri->frontOffset);
1328
1329    xf86DrvMsg(scrnIndex, X_INFO,
1330	       "Will use back buffer at offset 0x%x\n",
1331	       info->dri->backOffset);
1332    xf86DrvMsg(scrnIndex, X_INFO,
1333	       "Will use depth buffer at offset 0x%x\n",
1334	       info->dri->depthOffset);
1335    if (info->cardType==CARD_PCIE)
1336    	xf86DrvMsg(scrnIndex, X_INFO,
1337	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1338		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1339    xf86DrvMsg(scrnIndex, X_INFO,
1340	       "Will use %d kb for textures at offset 0x%x\n",
1341	       info->dri->textureSize/1024, info->dri->textureOffset);
1342
1343    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1344				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1345
1346    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1347				  ((info->dri->backOffset + info->fbLocation) >> 10));
1348
1349    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1350				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1351    return TRUE;
1352}
1353#endif /* XF86DRI */
1354
1355Bool
1356RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1357{
1358    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1359    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1360    BoxRec         MemBox;
1361    int            y2;
1362
1363    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1364
1365    MemBox.x1 = 0;
1366    MemBox.y1 = 0;
1367    MemBox.x2 = pScrn->displayWidth;
1368    y2 = info->FbMapSize / width_bytes;
1369    if (y2 >= 32768)
1370	y2 = 32767; /* because MemBox.y2 is signed short */
1371    MemBox.y2 = y2;
1372
1373    /* The acceleration engine uses 14 bit
1374     * signed coordinates, so we can't have any
1375     * drawable caches beyond this region.
1376     */
1377    if (MemBox.y2 > 8191)
1378	MemBox.y2 = 8191;
1379
1380    if (!xf86InitFBManager(pScreen, &MemBox)) {
1381	xf86DrvMsg(scrnIndex, X_ERROR,
1382		   "Memory manager initialization to "
1383		   "(%d,%d) (%d,%d) failed\n",
1384		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1385	return FALSE;
1386    } else {
1387	int       width, height;
1388	FBAreaPtr fbarea;
1389
1390	xf86DrvMsg(scrnIndex, X_INFO,
1391		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1392		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1393	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1394						pScrn->displayWidth,
1395						info->allowColorTiling ?
1396						((pScrn->virtualY + 15) & ~15)
1397						- pScrn->virtualY + 2 : 2,
1398						0, NULL, NULL,
1399						NULL))) {
1400	    xf86DrvMsg(scrnIndex, X_INFO,
1401		       "Reserved area from (%d,%d) to (%d,%d)\n",
1402		       fbarea->box.x1, fbarea->box.y1,
1403		       fbarea->box.x2, fbarea->box.y2);
1404	} else {
1405	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1406	}
1407	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1408					      0, 0, 0)) {
1409	    xf86DrvMsg(scrnIndex, X_INFO,
1410		       "Largest offscreen area available: %d x %d\n",
1411		       width, height);
1412	}
1413	return TRUE;
1414    }
1415}
1416#endif /* USE_XAA */
1417