radeon_accel.c revision 2f39173d
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78				/* Driver data structures */
79#include "radeon.h"
80#include "radeon_reg.h"
81#include "r600_reg.h"
82#include "radeon_macros.h"
83#include "radeon_probe.h"
84#include "radeon_version.h"
85#ifdef XF86DRI
86#define _XF86DRI_SERVER_
87#include "radeon_drm.h"
88#endif
89
90#include "ati_pciids_gen.h"
91
92				/* Line support */
93#include "miline.h"
94
95				/* X and server generic header files */
96#include "xf86.h"
97
98static void R600EngineReset(ScrnInfoPtr pScrn);
99
100#ifdef USE_XAA
101static struct {
102    int rop;
103    int pattern;
104} RADEON_ROP[] = {
105    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
106    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
107    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
108    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
109    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
110    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
111    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
112    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
113    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
114    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
115    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
116    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
117    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
118    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
119    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
120    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
121};
122#endif
123
124/* The FIFO has 64 slots.  This routines waits until at least `entries'
125 * of these slots are empty.
126 */
127void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
128{
129    RADEONInfoPtr  info       = RADEONPTR(pScrn);
130    unsigned char *RADEONMMIO = info->MMIO;
131    int            i;
132
133    for (;;) {
134	for (i = 0; i < RADEON_TIMEOUT; i++) {
135	    info->accel_state->fifo_slots =
136		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
137	    if (info->accel_state->fifo_slots >= entries) return;
138	}
139	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
140		       "FIFO timed out: %u entries, stat=0x%08x\n",
141		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
142		       (unsigned int)INREG(RADEON_RBBM_STATUS));
143	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
144		   "FIFO timed out, resetting engine...\n");
145	RADEONEngineReset(pScrn);
146	RADEONEngineRestore(pScrn);
147#ifdef XF86DRI
148	if (info->directRenderingEnabled) {
149	    RADEONCP_RESET(pScrn, info);
150	    RADEONCP_START(pScrn, info);
151	}
152#endif
153    }
154}
155
156void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
157{
158    RADEONInfoPtr  info       = RADEONPTR(pScrn);
159    unsigned char *RADEONMMIO = info->MMIO;
160    int            i;
161
162    for (;;) {
163	for (i = 0; i < RADEON_TIMEOUT; i++) {
164	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
165		info->accel_state->fifo_slots =
166		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
167	    else
168		info->accel_state->fifo_slots =
169		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
170	    if (info->accel_state->fifo_slots >= entries) return;
171	}
172	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
173		       "FIFO timed out: stat=0x%08x\n",
174		       (unsigned int)INREG(R600_GRBM_STATUS));
175	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
176		   "FIFO timed out, resetting engine...\n");
177	R600EngineReset(pScrn);
178#ifdef XF86DRI
179	if (info->directRenderingEnabled) {
180	    RADEONCP_RESET(pScrn, info);
181	    RADEONCP_START(pScrn, info);
182	}
183#endif
184    }
185}
186
187/* Flush all dirty data in the Pixel Cache to memory */
188void RADEONEngineFlush(ScrnInfoPtr pScrn)
189{
190    RADEONInfoPtr  info       = RADEONPTR(pScrn);
191    unsigned char *RADEONMMIO = info->MMIO;
192    int            i;
193
194    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
195	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
196		RADEON_RB3D_DC_FLUSH_ALL,
197		~RADEON_RB3D_DC_FLUSH_ALL);
198	for (i = 0; i < RADEON_TIMEOUT; i++) {
199	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
200		break;
201	}
202	if (i == RADEON_TIMEOUT) {
203	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
204			   "DC flush timeout: %x\n",
205			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
206	}
207    } else {
208	OUTREGP(R300_DSTCACHE_CTLSTAT,
209		R300_RB2D_DC_FLUSH_ALL,
210		~R300_RB2D_DC_FLUSH_ALL);
211	for (i = 0; i < RADEON_TIMEOUT; i++) {
212	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
213		break;
214	}
215	if (i == RADEON_TIMEOUT) {
216	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
217			   "DC flush timeout: %x\n",
218			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
219	}
220    }
221}
222
223/* Reset graphics card to known state */
224void RADEONEngineReset(ScrnInfoPtr pScrn)
225{
226    RADEONInfoPtr  info       = RADEONPTR(pScrn);
227    unsigned char *RADEONMMIO = info->MMIO;
228    uint32_t       clock_cntl_index;
229    uint32_t       mclk_cntl;
230    uint32_t       rbbm_soft_reset;
231    uint32_t       host_path_cntl;
232
233    /* The following RBBM_SOFT_RESET sequence can help un-wedge
234     * an R300 after the command processor got stuck.
235     */
236    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
237    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
238                                   RADEON_SOFT_RESET_CP |
239                                   RADEON_SOFT_RESET_HI |
240                                   RADEON_SOFT_RESET_SE |
241                                   RADEON_SOFT_RESET_RE |
242                                   RADEON_SOFT_RESET_PP |
243                                   RADEON_SOFT_RESET_E2 |
244                                   RADEON_SOFT_RESET_RB));
245    INREG(RADEON_RBBM_SOFT_RESET);
246    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
247                                   ~(RADEON_SOFT_RESET_CP |
248                                     RADEON_SOFT_RESET_HI |
249                                     RADEON_SOFT_RESET_SE |
250                                     RADEON_SOFT_RESET_RE |
251                                     RADEON_SOFT_RESET_PP |
252                                     RADEON_SOFT_RESET_E2 |
253                                     RADEON_SOFT_RESET_RB)));
254    INREG(RADEON_RBBM_SOFT_RESET);
255    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
256    INREG(RADEON_RBBM_SOFT_RESET);
257
258    RADEONEngineFlush(pScrn);
259
260    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
261    RADEONPllErrataAfterIndex(info);
262
263#if 0 /* taken care of by new PM code */
264    /* Some ASICs have bugs with dynamic-on feature, which are
265     * ASIC-version dependent, so we force all blocks on for now
266     */
267    if (info->HasCRTC2) {
268	uint32_t tmp;
269
270	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
271	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
272				  RADEON_CP_MAX_DYN_STOP_LAT |
273				  RADEON_SCLK_FORCEON_MASK));
274
275	if (info->ChipFamily == CHIP_FAMILY_RV200) {
276	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
277	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
278	}
279    }
280#endif /* new PM code */
281
282    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
283
284#if 0 /* handled by new PM code */
285    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
286			      RADEON_FORCEON_MCLKA |
287			      RADEON_FORCEON_MCLKB |
288			      RADEON_FORCEON_YCLKA |
289			      RADEON_FORCEON_YCLKB |
290			      RADEON_FORCEON_MC |
291			      RADEON_FORCEON_AIC));
292#endif /* new PM code */
293
294    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
295     * unexpected behaviour on some machines.  Here we use
296     * RADEON_HOST_PATH_CNTL to reset it.
297     */
298    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
299    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
300
301    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
302	uint32_t tmp;
303
304	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
305					RADEON_SOFT_RESET_CP |
306					RADEON_SOFT_RESET_HI |
307					RADEON_SOFT_RESET_E2));
308	INREG(RADEON_RBBM_SOFT_RESET);
309	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
310	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
311	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
312    } else {
313	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
314					RADEON_SOFT_RESET_CP |
315					RADEON_SOFT_RESET_SE |
316					RADEON_SOFT_RESET_RE |
317					RADEON_SOFT_RESET_PP |
318					RADEON_SOFT_RESET_E2 |
319					RADEON_SOFT_RESET_RB));
320	INREG(RADEON_RBBM_SOFT_RESET);
321	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
322					~(RADEON_SOFT_RESET_CP |
323					  RADEON_SOFT_RESET_SE |
324					  RADEON_SOFT_RESET_RE |
325					  RADEON_SOFT_RESET_PP |
326					  RADEON_SOFT_RESET_E2 |
327					  RADEON_SOFT_RESET_RB)));
328	INREG(RADEON_RBBM_SOFT_RESET);
329    }
330
331    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
332	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
333
334    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
335    RADEONPllErrataAfterIndex(info);
336    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
337}
338
339/* Reset graphics card to known state */
340static void R600EngineReset(ScrnInfoPtr pScrn)
341{
342    RADEONInfoPtr  info       = RADEONPTR(pScrn);
343    unsigned char *RADEONMMIO = info->MMIO;
344    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
345
346    cp_ptr = INREG(R600_CP_RB_WPTR);
347
348    cp_me_cntl = INREG(R600_CP_ME_CNTL);
349    OUTREG(R600_CP_ME_CNTL, 0x10000000);
350
351    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
352    INREG(R600_GRBM_SOFT_RESET);
353    usleep (50);
354    OUTREG(R600_GRBM_SOFT_RESET, 0);
355    INREG(R600_GRBM_SOFT_RESET);
356
357    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
358    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
359    OUTREG(R600_CP_RB_CNTL, 0x80000000);
360
361    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
362    OUTREG(R600_CP_RB_WPTR, cp_ptr);
363    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
364    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
365
366}
367
368/* Restore the acceleration hardware to its previous state */
369void RADEONEngineRestore(ScrnInfoPtr pScrn)
370{
371    RADEONInfoPtr  info       = RADEONPTR(pScrn);
372    unsigned char *RADEONMMIO = info->MMIO;
373
374    if (info->cs)
375      return;
376
377    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
378		   "EngineRestore (%d/%d)\n",
379		   info->CurrentLayout.pixel_code,
380		   info->CurrentLayout.bitsPerPixel);
381
382    /* Setup engine location. This shouldn't be necessary since we
383     * set them appropriately before any accel ops, but let's avoid
384     * random bogus DMA in case we inadvertently trigger the engine
385     * in the wrong place (happened).
386     */
387    RADEONWaitForFifo(pScrn, 2);
388    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
389    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390
391    RADEONWaitForFifo(pScrn, 1);
392#if X_BYTE_ORDER == X_BIG_ENDIAN
393    OUTREGP(RADEON_DP_DATATYPE,
394	    RADEON_HOST_BIG_ENDIAN_EN,
395	    ~RADEON_HOST_BIG_ENDIAN_EN);
396#else
397    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
398#endif
399
400    /* Restore SURFACE_CNTL */
401    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
402
403    RADEONWaitForFifo(pScrn, 1);
404    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
405					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
406    RADEONWaitForFifo(pScrn, 1);
407    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
408				       | RADEON_GMC_BRUSH_SOLID_COLOR
409				       | RADEON_GMC_SRC_DATATYPE_COLOR));
410
411    RADEONWaitForFifo(pScrn, 5);
412    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
413    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
414    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
415    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
416    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
417
418    RADEONWaitForIdleMMIO(pScrn);
419
420    info->accel_state->XInited3D = FALSE;
421}
422
423static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
424{
425    RADEONInfoPtr info = RADEONPTR(pScrn);
426    if (info->dri->pKernelDRMVersion->version_major < 2) {
427        drm_radeon_getparam_t np;
428
429        memset(&np, 0, sizeof(np));
430        np.param = RADEON_PARAM_NUM_GB_PIPES;
431        np.value = num_pipes;
432        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
433    } else {
434        struct drm_radeon_info np2;
435        np2.value = (unsigned long)num_pipes;
436        np2.request = RADEON_INFO_NUM_GB_PIPES;
437        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
438    }
439}
440
441/* Initialize the acceleration hardware */
442void RADEONEngineInit(ScrnInfoPtr pScrn)
443{
444    RADEONInfoPtr  info       = RADEONPTR(pScrn);
445    unsigned char *RADEONMMIO = info->MMIO;
446    int datatype = 0;
447    info->accel_state->num_gb_pipes = 0;
448
449    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
450		   "EngineInit (%d/%d)\n",
451		   info->CurrentLayout.pixel_code,
452		   info->CurrentLayout.bitsPerPixel);
453
454#ifdef XF86DRI
455    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
456	int num_pipes;
457
458	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
459	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
460		       "Failed to determine num pipes from DRM, falling back to "
461		       "manual look-up!\n");
462	    info->accel_state->num_gb_pipes = 0;
463	} else {
464	    info->accel_state->num_gb_pipes = num_pipes;
465	}
466    }
467#endif
468
469    if (!info->cs) {
470	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
471	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
472	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
473	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
474	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
475	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
476	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
477	    IS_R500_3D) {
478	    if (info->accel_state->num_gb_pipes == 0) {
479		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
480
481		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
482		if (IS_R500_3D)
483		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
484	    }
485	} else {
486	    if (info->accel_state->num_gb_pipes == 0) {
487		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
488		    (info->ChipFamily == CHIP_FAMILY_R350)) {
489		    /* R3xx chips */
490		    info->accel_state->num_gb_pipes = 2;
491		} else {
492		    /* RV3xx chips */
493		    info->accel_state->num_gb_pipes = 1;
494		}
495	    }
496	}
497
498	/* SE cards only have 1 quadpipe */
499	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
500	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
501	    (info->Chipset == PCI_CHIP_R300_AD) ||
502	    (info->Chipset == PCI_CHIP_R350_AH))
503	    info->accel_state->num_gb_pipes = 1;
504
505	if (IS_R300_3D || IS_R500_3D)
506	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
507		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
508
509	if (IS_R300_3D || IS_R500_3D) {
510	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
511
512	    switch(info->accel_state->num_gb_pipes) {
513	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
514	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
515	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
516	    default:
517	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
518	    }
519
520	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
521	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
522	    if (info->ChipFamily >= CHIP_FAMILY_R420)
523		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
524	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
525					     R300_DC_AUTOFLUSH_ENABLE |
526					     R300_DC_DC_DISABLE_IGNORE_PE));
527	} else
528	    OUTREG(RADEON_RB3D_CNTL, 0);
529
530	RADEONEngineReset(pScrn);
531    }
532
533    switch (info->CurrentLayout.pixel_code) {
534    case 8:  datatype = 2; break;
535    case 15: datatype = 3; break;
536    case 16: datatype = 4; break;
537    case 24: datatype = 5; break;
538    case 32: datatype = 6; break;
539    default:
540	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
541		       "Unknown depth/bpp = %d/%d (code = %d)\n",
542		       info->CurrentLayout.depth,
543		       info->CurrentLayout.bitsPerPixel,
544		       info->CurrentLayout.pixel_code);
545    }
546
547    info->accel_state->dp_gui_master_cntl =
548	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
549	 | RADEON_GMC_CLR_CMP_CNTL_DIS
550	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
551
552    RADEONEngineRestore(pScrn);
553}
554
555uint32_t radeonGetPixmapOffset(PixmapPtr pPix)
556{
557    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
558    RADEONInfoPtr info = RADEONPTR(pScrn);
559    uint32_t offset = 0;
560    if (info->cs)
561	return 0;
562#ifdef USE_EXA
563    if (info->useEXA) {
564	offset = exaGetPixmapOffset(pPix);
565    } else
566#endif
567    {
568	offset = pPix->devPrivate.ptr - info->FB;
569    }
570    offset += info->fbLocation + pScrn->fbOffset;
571    return offset;
572}
573
574int radeon_cs_space_remaining(ScrnInfoPtr pScrn)
575{
576    RADEONInfoPtr info = RADEONPTR(pScrn);
577
578#ifdef XF86DRM_MODE
579    if (info->cs)
580	return (info->cs->ndw - info->cs->cdw);
581    else
582#endif
583        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
584}
585
586#define ACCEL_MMIO
587#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
588#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
589#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
590#define FINISH_ACCEL()
591
592#include "radeon_commonfuncs.c"
593#if defined(RENDER) && defined(USE_XAA)
594#include "radeon_render.c"
595#endif
596#include "radeon_accelfuncs.c"
597
598#undef ACCEL_MMIO
599#undef ACCEL_PREAMBLE
600#undef BEGIN_ACCEL
601#undef OUT_ACCEL_REG
602#undef FINISH_ACCEL
603
604#ifdef XF86DRI
605
606#define ACCEL_CP
607#define ACCEL_PREAMBLE()						\
608    RING_LOCALS;							\
609    RADEONCP_REFRESH(pScrn, info)
610#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
611#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
612#define FINISH_ACCEL()          ADVANCE_RING()
613
614
615#include "radeon_commonfuncs.c"
616#if defined(RENDER) && defined(USE_XAA)
617#include "radeon_render.c"
618#endif
619#include "radeon_accelfuncs.c"
620
621#undef ACCEL_CP
622#undef ACCEL_PREAMBLE
623#undef BEGIN_ACCEL
624#undef OUT_ACCEL_REG
625#undef FINISH_ACCEL
626
627/* Stop the CP */
628int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
629{
630    drm_radeon_cp_stop_t  stop;
631    int              ret, i;
632
633    stop.flush = 1;
634    stop.idle  = 1;
635
636    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
637			  sizeof(drm_radeon_cp_stop_t));
638
639    if (ret == 0) {
640	return 0;
641    } else if (errno != EBUSY) {
642	return -errno;
643    }
644
645    stop.flush = 0;
646
647    i = 0;
648    do {
649	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
650			      sizeof(drm_radeon_cp_stop_t));
651    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
652
653    if (ret == 0) {
654	return 0;
655    } else if (errno != EBUSY) {
656	return -errno;
657    }
658
659    stop.idle = 0;
660
661    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
662			&stop, sizeof(drm_radeon_cp_stop_t))) {
663	return -errno;
664    } else {
665	return 0;
666    }
667}
668
669#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
670
671/* Get an indirect buffer for the CP 2D acceleration commands  */
672drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
673{
674    RADEONInfoPtr  info = RADEONPTR(pScrn);
675    drmDMAReq      dma;
676    drmBufPtr      buf = NULL;
677    int            indx = 0;
678    int            size = 0;
679    int            i = 0;
680    int            ret;
681
682#if 0
683    /* FIXME: pScrn->pScreen has not been initialized when this is first
684     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
685     * the screen index from pScrn, which is initialized, and then get
686     * the screen from screenInfo.screens[index], but that is a hack.
687     */
688    dma.context = DRIGetContext(pScrn->pScreen);
689#else
690    /* This is the X server's context */
691    dma.context = 0x00000001;
692#endif
693
694    dma.send_count    = 0;
695    dma.send_list     = NULL;
696    dma.send_sizes    = NULL;
697    dma.flags         = 0;
698    dma.request_count = 1;
699    dma.request_size  = RADEON_BUFFER_SIZE;
700    dma.request_list  = &indx;
701    dma.request_sizes = &size;
702    dma.granted_count = 0;
703
704    while (1) {
705	do {
706	    ret = drmDMA(info->dri->drmFD, &dma);
707	    if (ret && ret != -EBUSY) {
708		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
709			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
710	    }
711	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
712
713	if (ret == 0) {
714	    buf = &info->dri->buffers->list[indx];
715	    buf->used = 0;
716	    if (RADEON_VERBOSE) {
717		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
718			   "   GetBuffer returning %d %p\n",
719			   buf->idx, buf->address);
720	    }
721	    return buf;
722	}
723
724	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
725		   "GetBuffer timed out, resetting engine...\n");
726
727	if (info->ChipFamily < CHIP_FAMILY_R600) {
728	    RADEONEngineReset(pScrn);
729	    RADEONEngineRestore(pScrn);
730	} else
731	    R600EngineReset(pScrn);
732
733	/* Always restart the engine when doing CP 2D acceleration */
734	RADEONCP_RESET(pScrn, info);
735	RADEONCP_START(pScrn, info);
736    }
737}
738
739/* Flush the indirect buffer to the kernel for submission to the card */
740void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
741{
742    RADEONInfoPtr      info   = RADEONPTR(pScrn);
743    drmBufPtr          buffer = info->cp->indirectBuffer;
744    int                start  = info->cp->indirectStart;
745    drm_radeon_indirect_t  indirect;
746
747    assert(!info->cs);
748    if (!buffer) return;
749    if (start == buffer->used && !discard) return;
750
751    if (RADEON_VERBOSE) {
752	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
753		   buffer->idx);
754    }
755
756    if (info->ChipFamily >= CHIP_FAMILY_R600) {
757	if (buffer->used & 0x3c) {
758	    RING_LOCALS;
759
760	    while (buffer->used & 0x3c) {
761		BEGIN_RING(1);
762		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
763		ADVANCE_RING();
764	    }
765	}
766    }
767
768    indirect.idx     = buffer->idx;
769    indirect.start   = start;
770    indirect.end     = buffer->used;
771    indirect.discard = discard;
772
773    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
774			&indirect, sizeof(drm_radeon_indirect_t));
775
776    if (discard) {
777	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
778	info->cp->indirectStart  = 0;
779    } else {
780	/* Start on a double word boundary */
781	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
782	if (RADEON_VERBOSE) {
783	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
784		       info->cp->indirectStart);
785	}
786    }
787}
788
789/* Flush and release the indirect buffer */
790void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
791{
792    RADEONInfoPtr      info   = RADEONPTR(pScrn);
793    drmBufPtr          buffer = info->cp->indirectBuffer;
794    int                start  = info->cp->indirectStart;
795    drm_radeon_indirect_t  indirect;
796
797    assert(!info->cs);
798    if (info->ChipFamily >= CHIP_FAMILY_R600) {
799	if (buffer && (buffer->used & 0x3c)) {
800	    RING_LOCALS;
801
802	    while (buffer->used & 0x3c) {
803		BEGIN_RING(1);
804		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
805		ADVANCE_RING();
806	    }
807	}
808    }
809
810    info->cp->indirectBuffer = NULL;
811    info->cp->indirectStart  = 0;
812
813    if (!buffer) return;
814
815    if (RADEON_VERBOSE) {
816	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
817		   buffer->idx);
818    }
819
820    indirect.idx     = buffer->idx;
821    indirect.start   = start;
822    indirect.end     = buffer->used;
823    indirect.discard = 1;
824
825    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
826			&indirect, sizeof(drm_radeon_indirect_t));
827}
828
829/** \brief Calculate HostDataBlit parameters from pointer and pitch
830 *
831 * This is a helper for the trivial HostDataBlit users that don't need to worry
832 * about tiling etc.
833 */
834void
835RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
836		     uint32_t *dstPitchOff, int *x, int *y)
837{
838    RADEONInfoPtr info = RADEONPTR( pScrn );
839    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
840
841    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
842    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
843    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
844}
845
846/* Set up a hostdata blit to transfer data from system memory to the
847 * framebuffer. Returns the address where the data can be written to and sets
848 * the dstPitch and hpass variables as required.
849 */
850uint8_t*
851RADEONHostDataBlit(
852    ScrnInfoPtr pScrn,
853    unsigned int cpp,
854    unsigned int w,
855    uint32_t dstPitchOff,
856    uint32_t *bufPitch,
857    int x,
858    int *y,
859    unsigned int *h,
860    unsigned int *hpass
861){
862    RADEONInfoPtr info = RADEONPTR( pScrn );
863    uint32_t format, dwords;
864    uint8_t *ret;
865    RING_LOCALS;
866
867    if ( *h == 0 )
868    {
869	return NULL;
870    }
871
872    switch ( cpp )
873    {
874    case 4:
875	format = RADEON_GMC_DST_32BPP;
876	*bufPitch = 4 * w;
877	break;
878    case 2:
879	format = RADEON_GMC_DST_16BPP;
880	*bufPitch = 2 * RADEON_ALIGN(w, 2);
881	break;
882    case 1:
883	format = RADEON_GMC_DST_8BPP_CI;
884	*bufPitch = RADEON_ALIGN(w, 4);
885	break;
886    default:
887	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
888		    "%s: Unsupported cpp %d!\n", __func__, cpp );
889	return NULL;
890    }
891
892#if X_BYTE_ORDER == X_BIG_ENDIAN
893    /* Swap doesn't work on R300 and later, it's handled during the
894     * copy to ind. buffer pass
895     */
896    if (info->ChipFamily < CHIP_FAMILY_R300) {
897        BEGIN_RING(2);
898	if (cpp == 2)
899	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
900			 RADEON_HOST_DATA_SWAP_HDW);
901	else if (cpp == 1)
902	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
903			 RADEON_HOST_DATA_SWAP_32BIT);
904	else
905	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
906			 RADEON_HOST_DATA_SWAP_NONE);
907	ADVANCE_RING();
908    }
909#endif
910
911    /*RADEON_PURGE_CACHE();
912      RADEON_WAIT_UNTIL_IDLE();*/
913
914    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
915    dwords = *hpass * *bufPitch / 4;
916
917    BEGIN_RING( dwords + 10 );
918    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
919    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
920	    | RADEON_GMC_DST_CLIPPING
921	    | RADEON_GMC_BRUSH_NONE
922	    | format
923	    | RADEON_GMC_SRC_DATATYPE_COLOR
924	    | RADEON_ROP3_S
925	    | RADEON_DP_SRC_SOURCE_HOST_DATA
926	    | RADEON_GMC_CLR_CMP_CNTL_DIS
927	    | RADEON_GMC_WR_MSK_DIS );
928    OUT_RING( dstPitchOff );
929    OUT_RING( (*y << 16) | x );
930    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
931    OUT_RING( 0xffffffff );
932    OUT_RING( 0xffffffff );
933    OUT_RING( *y << 16 | x );
934    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
935    OUT_RING( dwords );
936
937    ret = ( uint8_t* )&__head[__count];
938
939    __count += dwords;
940    ADVANCE_RING();
941
942    *y += *hpass;
943    *h -= *hpass;
944
945    return ret;
946}
947
948void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
949{
950    switch(swap) {
951    case RADEON_HOST_DATA_SWAP_HDW:
952        {
953	    unsigned int *d = (unsigned int *)dst;
954	    unsigned int *s = (unsigned int *)src;
955	    unsigned int nwords = size >> 2;
956
957	    for (; nwords > 0; --nwords, ++d, ++s)
958		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
959	    return;
960        }
961    case RADEON_HOST_DATA_SWAP_32BIT:
962        {
963	    unsigned int *d = (unsigned int *)dst;
964	    unsigned int *s = (unsigned int *)src;
965	    unsigned int nwords = size >> 2;
966
967	    for (; nwords > 0; --nwords, ++d, ++s)
968#ifdef __powerpc__
969		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
970#else
971		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
972			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
973#endif
974	    return;
975        }
976    case RADEON_HOST_DATA_SWAP_16BIT:
977        {
978	    unsigned short *d = (unsigned short *)dst;
979	    unsigned short *s = (unsigned short *)src;
980	    unsigned int nwords = size >> 1;
981
982	    for (; nwords > 0; --nwords, ++d, ++s)
983#ifdef __powerpc__
984		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
985#else
986	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
987			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
988#endif
989	    return;
990	}
991    }
992    if (src != dst)
993	memcpy(dst, src, size);
994}
995
996/* Copies a single pass worth of data for a hostdata blit set up by
997 * RADEONHostDataBlit().
998 */
999void
1000RADEONHostDataBlitCopyPass(
1001    ScrnInfoPtr pScrn,
1002    unsigned int cpp,
1003    uint8_t *dst,
1004    uint8_t *src,
1005    unsigned int hpass,
1006    unsigned int dstPitch,
1007    unsigned int srcPitch
1008){
1009
1010#if X_BYTE_ORDER == X_BIG_ENDIAN
1011    RADEONInfoPtr info = RADEONPTR( pScrn );
1012#endif
1013
1014    /* RADEONHostDataBlitCopy can return NULL ! */
1015    if( (dst==NULL) || (src==NULL)) return;
1016
1017    if ( dstPitch == srcPitch )
1018    {
1019#if X_BYTE_ORDER == X_BIG_ENDIAN
1020        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1021	    switch(cpp) {
1022	    case 1:
1023		RADEONCopySwap(dst, src, hpass * dstPitch,
1024			       RADEON_HOST_DATA_SWAP_32BIT);
1025		return;
1026	    case 2:
1027	        RADEONCopySwap(dst, src, hpass * dstPitch,
1028			       RADEON_HOST_DATA_SWAP_HDW);
1029		return;
1030	    }
1031	}
1032#endif
1033	memcpy( dst, src, hpass * dstPitch );
1034    }
1035    else
1036    {
1037	unsigned int minPitch = min( dstPitch, srcPitch );
1038	while ( hpass-- )
1039	{
1040#if X_BYTE_ORDER == X_BIG_ENDIAN
1041            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1042		switch(cpp) {
1043		case 1:
1044		    RADEONCopySwap(dst, src, minPitch,
1045				   RADEON_HOST_DATA_SWAP_32BIT);
1046		    goto next;
1047		case 2:
1048	            RADEONCopySwap(dst, src, minPitch,
1049				   RADEON_HOST_DATA_SWAP_HDW);
1050		    goto next;
1051		}
1052	    }
1053#endif
1054	    memcpy( dst, src, minPitch );
1055#if X_BYTE_ORDER == X_BIG_ENDIAN
1056	next:
1057#endif
1058	    src += srcPitch;
1059	    dst += dstPitch;
1060	}
1061    }
1062}
1063
1064#endif
1065
1066Bool RADEONAccelInit(ScreenPtr pScreen)
1067{
1068    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1069    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1070
1071#ifdef USE_EXA
1072    if (info->useEXA) {
1073# ifdef XF86DRI
1074	if (info->directRenderingEnabled) {
1075	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1076		if (!R600DrawInit(pScreen))
1077		    return FALSE;
1078	    } else {
1079		if (!RADEONDrawInitCP(pScreen))
1080		    return FALSE;
1081	    }
1082	} else
1083# endif /* XF86DRI */
1084	{
1085	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1086		return FALSE;
1087	    else {
1088		if (!RADEONDrawInitMMIO(pScreen))
1089		    return FALSE;
1090	    }
1091	}
1092    }
1093#endif /* USE_EXA */
1094#ifdef USE_XAA
1095    if (!info->useEXA) {
1096	XAAInfoRecPtr  a;
1097
1098	if (info->ChipFamily >= CHIP_FAMILY_R600)
1099	    return FALSE;
1100
1101	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1102	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1103	    return FALSE;
1104	}
1105
1106#ifdef XF86DRI
1107	if (info->directRenderingEnabled)
1108	    RADEONAccelInitCP(pScreen, a);
1109	else
1110#endif /* XF86DRI */
1111	    RADEONAccelInitMMIO(pScreen, a);
1112
1113	RADEONEngineInit(pScrn);
1114
1115	if (!XAAInit(pScreen, a)) {
1116	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1117	    return FALSE;
1118	}
1119    }
1120#endif /* USE_XAA */
1121    return TRUE;
1122}
1123
1124void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1125{
1126    RADEONInfoPtr info = RADEONPTR (pScrn);
1127
1128#ifdef XF86DRI
1129    if (info->directRenderingEnabled) {
1130	drm_radeon_sarea_t *pSAREAPriv;
1131
1132	if (!info->kms_enabled) {
1133	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1134	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1135	}
1136	RADEONInit3DEngineCP(pScrn);
1137    } else
1138#endif
1139	RADEONInit3DEngineMMIO(pScrn);
1140
1141    info->accel_state->XInited3D = TRUE;
1142}
1143
1144#ifdef USE_XAA
1145#ifdef XF86DRI
1146Bool
1147RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1148{
1149    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1150    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1151    int            cpp = info->CurrentLayout.pixel_bytes;
1152    int            depthCpp = (info->dri->depthBits - 8) / 4;
1153    int            width_bytes = pScrn->displayWidth * cpp;
1154    int            bufferSize;
1155    int            depthSize;
1156    int            l;
1157    int            scanlines;
1158    int            texsizerequest;
1159    BoxRec         MemBox;
1160    FBAreaPtr      fbarea;
1161
1162    info->dri->frontOffset = 0;
1163    info->dri->frontPitch = pScrn->displayWidth;
1164    info->dri->backPitch = pScrn->displayWidth;
1165
1166    /* make sure we use 16 line alignment for tiling (8 might be enough).
1167     * Might need that for non-XF86DRI too?
1168     */
1169    if (info->allowColorTiling) {
1170	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1171		      RADEON_GPU_PAGE_SIZE);
1172    } else {
1173        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1174		      RADEON_GPU_PAGE_SIZE);
1175    }
1176
1177    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1178     * which is always the case if color tiling is used due to color pitch
1179     * but not necessarily otherwise, and its height a multiple of 16 lines.
1180     */
1181    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1182    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1183		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1184
1185    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1186	       "Using %d MB GART aperture\n", info->dri->gartSize);
1187    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1188	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1189    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1190	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1191    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1192	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1193
1194    /* Try for front, back, depth, and three framebuffers worth of
1195     * pixmap cache.  Should be enough for a fullscreen background
1196     * image plus some leftovers.
1197     * If the FBTexPercent option was used, try to achieve that percentage instead,
1198     * but still have at least one pixmap buffer (get problems with xvideo/render
1199     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1200     * probably useless for XAA.
1201     */
1202    if (info->dri->textureSize >= 0) {
1203	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1204			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1205	/* first divide, then multiply or we'll get an overflow (been there...) */
1206			 / 100 * info->dri->textureSize;
1207    }
1208    else {
1209	texsizerequest = (int)info->FbMapSize / 2;
1210    }
1211    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1212
1213    /* If that gives us less than the requested memory, let's
1214     * be greedy and grab some more.  Sorry, I care more about 3D
1215     * performance than playing nicely, and you'll get around a full
1216     * framebuffer's worth of pixmap cache anyway.
1217     */
1218    if (info->dri->textureSize < texsizerequest) {
1219        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1220    }
1221    if (info->dri->textureSize < texsizerequest) {
1222        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1223    }
1224
1225    /* If there's still no space for textures, try without pixmap cache, but
1226     * never use the reserved space, the space hw cursor and PCIGART table might
1227     * use.
1228     */
1229    if (info->dri->textureSize < 0) {
1230	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1231	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1232    }
1233
1234    /* Check to see if there is more room available after the 8192nd
1235     * scanline for textures
1236     */
1237    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1238    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1239	> info->dri->textureSize) {
1240	info->dri->textureSize =
1241		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1242    }
1243
1244    /* If backbuffer is disabled, don't allocate memory for it */
1245    if (info->dri->noBackBuffer) {
1246	info->dri->textureSize += bufferSize;
1247    }
1248
1249    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1250       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1251       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1252       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1253       area otherwise).
1254       This might cause some space at the end of the video memory to be unused, since it
1255       can't be used (?) due to that log_tex_granularity thing???
1256       Could use different copyscreentoscreen function for the pageflip copies
1257       (which would use different src and dst offsets) to avoid this. */
1258    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1259	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1260			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1261    }
1262    if (info->dri->textureSize > 0) {
1263	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1264	if (l < RADEON_LOG_TEX_GRANULARITY)
1265	    l = RADEON_LOG_TEX_GRANULARITY;
1266	/* Round the texture size up to the nearest whole number of
1267	 * texture regions.  Again, be greedy about this, don't
1268	 * round down.
1269	 */
1270	info->dri->log2TexGran = l;
1271	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1272    } else {
1273	info->dri->textureSize = 0;
1274    }
1275
1276    /* Set a minimum usable local texture heap size.  This will fit
1277     * two 256x256x32bpp textures.
1278     */
1279    if (info->dri->textureSize < 512 * 1024) {
1280	info->dri->textureOffset = 0;
1281	info->dri->textureSize = 0;
1282    }
1283
1284    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1285	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1286				    (width_bytes * 16)) * (width_bytes * 16);
1287    }
1288    else {
1289	/* Reserve space for textures */
1290	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1291				     RADEON_GPU_PAGE_SIZE);
1292    }
1293
1294    /* Reserve space for the shared depth
1295     * buffer.
1296     */
1297    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1298			       RADEON_GPU_PAGE_SIZE);
1299
1300    /* Reserve space for the shared back buffer */
1301    if (info->dri->noBackBuffer) {
1302       info->dri->backOffset = info->dri->depthOffset;
1303    } else {
1304       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1305				 RADEON_GPU_PAGE_SIZE);
1306    }
1307
1308    info->dri->backY = info->dri->backOffset / width_bytes;
1309    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1310
1311    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1312    if (scanlines > 8191)
1313	scanlines = 8191;
1314
1315    MemBox.x1 = 0;
1316    MemBox.y1 = 0;
1317    MemBox.x2 = pScrn->displayWidth;
1318    MemBox.y2 = scanlines;
1319
1320    if (!xf86InitFBManager(pScreen, &MemBox)) {
1321        xf86DrvMsg(scrnIndex, X_ERROR,
1322		   "Memory manager initialization to "
1323		   "(%d,%d) (%d,%d) failed\n",
1324		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1325	return FALSE;
1326    } else {
1327	int  width, height;
1328
1329	xf86DrvMsg(scrnIndex, X_INFO,
1330		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1331		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1332	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1333	   aligned... sigh */
1334	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1335						pScrn->displayWidth,
1336						info->allowColorTiling ?
1337						(RADEON_ALIGN(pScrn->virtualY, 16))
1338						- pScrn->virtualY + 2 : 2,
1339						0, NULL, NULL,
1340						NULL))) {
1341	    xf86DrvMsg(scrnIndex, X_INFO,
1342		       "Reserved area from (%d,%d) to (%d,%d)\n",
1343		       fbarea->box.x1, fbarea->box.y1,
1344		       fbarea->box.x2, fbarea->box.y2);
1345	} else {
1346	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1347	}
1348
1349	RADEONDRIAllocatePCIGARTTable(pScreen);
1350
1351	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1352					  &height, 0, 0, 0)) {
1353	    xf86DrvMsg(scrnIndex, X_INFO,
1354		       "Largest offscreen area available: %d x %d\n",
1355		       width, height);
1356
1357	    /* Lines in offscreen area needed for depth buffer and
1358	     * textures
1359	     */
1360	    info->dri->depthTexLines = (scanlines
1361					- info->dri->depthOffset / width_bytes);
1362	    info->dri->backLines	    = (scanlines
1363					       - info->dri->backOffset / width_bytes
1364					       - info->dri->depthTexLines);
1365	    info->dri->backArea	    = NULL;
1366	} else {
1367	    xf86DrvMsg(scrnIndex, X_ERROR,
1368		       "Unable to determine largest offscreen area "
1369		       "available\n");
1370	    return FALSE;
1371	}
1372    }
1373
1374    xf86DrvMsg(scrnIndex, X_INFO,
1375	       "Will use front buffer at offset 0x%x\n",
1376	       info->dri->frontOffset);
1377
1378    xf86DrvMsg(scrnIndex, X_INFO,
1379	       "Will use back buffer at offset 0x%x\n",
1380	       info->dri->backOffset);
1381    xf86DrvMsg(scrnIndex, X_INFO,
1382	       "Will use depth buffer at offset 0x%x\n",
1383	       info->dri->depthOffset);
1384    if (info->cardType==CARD_PCIE)
1385    	xf86DrvMsg(scrnIndex, X_INFO,
1386	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1387		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1388    xf86DrvMsg(scrnIndex, X_INFO,
1389	       "Will use %d kb for textures at offset 0x%x\n",
1390	       info->dri->textureSize/1024, info->dri->textureOffset);
1391
1392    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1393				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1394
1395    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1396				  ((info->dri->backOffset + info->fbLocation) >> 10));
1397
1398    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1399				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1400    return TRUE;
1401}
1402#endif /* XF86DRI */
1403
1404Bool
1405RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1406{
1407    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1408    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1409    BoxRec         MemBox;
1410    int            y2;
1411
1412    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1413
1414    MemBox.x1 = 0;
1415    MemBox.y1 = 0;
1416    MemBox.x2 = pScrn->displayWidth;
1417    y2 = info->FbMapSize / width_bytes;
1418    if (y2 >= 32768)
1419	y2 = 32767; /* because MemBox.y2 is signed short */
1420    MemBox.y2 = y2;
1421
1422    /* The acceleration engine uses 14 bit
1423     * signed coordinates, so we can't have any
1424     * drawable caches beyond this region.
1425     */
1426    if (MemBox.y2 > 8191)
1427	MemBox.y2 = 8191;
1428
1429    if (!xf86InitFBManager(pScreen, &MemBox)) {
1430	xf86DrvMsg(scrnIndex, X_ERROR,
1431		   "Memory manager initialization to "
1432		   "(%d,%d) (%d,%d) failed\n",
1433		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1434	return FALSE;
1435    } else {
1436	int       width, height;
1437	FBAreaPtr fbarea;
1438
1439	xf86DrvMsg(scrnIndex, X_INFO,
1440		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1441		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1442	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1443						pScrn->displayWidth,
1444						info->allowColorTiling ?
1445						(RADEON_ALIGN(pScrn->virtualY, 16))
1446						- pScrn->virtualY + 2 : 2,
1447						0, NULL, NULL,
1448						NULL))) {
1449	    xf86DrvMsg(scrnIndex, X_INFO,
1450		       "Reserved area from (%d,%d) to (%d,%d)\n",
1451		       fbarea->box.x1, fbarea->box.y1,
1452		       fbarea->box.x2, fbarea->box.y2);
1453	} else {
1454	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1455	}
1456	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1457					      0, 0, 0)) {
1458	    xf86DrvMsg(scrnIndex, X_INFO,
1459		       "Largest offscreen area available: %d x %d\n",
1460		       width, height);
1461	}
1462	return TRUE;
1463    }
1464}
1465#endif /* USE_XAA */
1466