radeon_accel.c revision ad43ddac
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78				/* Driver data structures */
79#include "radeon.h"
80#include "radeon_reg.h"
81#include "r600_reg.h"
82#include "radeon_macros.h"
83#include "radeon_probe.h"
84#include "radeon_version.h"
85#ifdef XF86DRI
86#define _XF86DRI_SERVER_
87#include "radeon_drm.h"
88#endif
89
90#include "ati_pciids_gen.h"
91
92				/* Line support */
93#include "miline.h"
94
95				/* X and server generic header files */
96#include "xf86.h"
97
98static void R600EngineReset(ScrnInfoPtr pScrn);
99
100#ifdef USE_XAA
101static struct {
102    int rop;
103    int pattern;
104} RADEON_ROP[] = {
105    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
106    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
107    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
108    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
109    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
110    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
111    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
112    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
113    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
114    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
115    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
116    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
117    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
118    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
119    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
120    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
121};
122#endif
123
124/* The FIFO has 64 slots.  This routines waits until at least `entries'
125 * of these slots are empty.
126 */
127void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
128{
129    RADEONInfoPtr  info       = RADEONPTR(pScrn);
130    unsigned char *RADEONMMIO = info->MMIO;
131    int            i;
132
133    for (;;) {
134	for (i = 0; i < RADEON_TIMEOUT; i++) {
135	    info->accel_state->fifo_slots =
136		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
137	    if (info->accel_state->fifo_slots >= entries) return;
138	}
139	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
140		       "FIFO timed out: %u entries, stat=0x%08x\n",
141		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
142		       (unsigned int)INREG(RADEON_RBBM_STATUS));
143	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
144		   "FIFO timed out, resetting engine...\n");
145	RADEONEngineReset(pScrn);
146	RADEONEngineRestore(pScrn);
147#ifdef XF86DRI
148	if (info->directRenderingEnabled) {
149	    RADEONCP_RESET(pScrn, info);
150	    RADEONCP_START(pScrn, info);
151	}
152#endif
153    }
154}
155
156void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
157{
158    RADEONInfoPtr  info       = RADEONPTR(pScrn);
159    unsigned char *RADEONMMIO = info->MMIO;
160    int            i;
161
162    for (;;) {
163	for (i = 0; i < RADEON_TIMEOUT; i++) {
164	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
165		info->accel_state->fifo_slots =
166		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
167	    else
168		info->accel_state->fifo_slots =
169		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
170	    if (info->accel_state->fifo_slots >= entries) return;
171	}
172	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
173		       "FIFO timed out: stat=0x%08x\n",
174		       (unsigned int)INREG(R600_GRBM_STATUS));
175	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
176		   "FIFO timed out, resetting engine...\n");
177	R600EngineReset(pScrn);
178#ifdef XF86DRI
179	if (info->directRenderingEnabled) {
180	    RADEONCP_RESET(pScrn, info);
181	    RADEONCP_START(pScrn, info);
182	}
183#endif
184    }
185}
186
187/* Flush all dirty data in the Pixel Cache to memory */
188void RADEONEngineFlush(ScrnInfoPtr pScrn)
189{
190    RADEONInfoPtr  info       = RADEONPTR(pScrn);
191    unsigned char *RADEONMMIO = info->MMIO;
192    int            i;
193
194    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
195	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
196		RADEON_RB3D_DC_FLUSH_ALL,
197		~RADEON_RB3D_DC_FLUSH_ALL);
198	for (i = 0; i < RADEON_TIMEOUT; i++) {
199	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
200		break;
201	}
202	if (i == RADEON_TIMEOUT) {
203	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
204			   "DC flush timeout: %x\n",
205			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
206	}
207    } else {
208	OUTREGP(R300_DSTCACHE_CTLSTAT,
209		R300_RB2D_DC_FLUSH_ALL,
210		~R300_RB2D_DC_FLUSH_ALL);
211	for (i = 0; i < RADEON_TIMEOUT; i++) {
212	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
213		break;
214	}
215	if (i == RADEON_TIMEOUT) {
216	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
217			   "DC flush timeout: %x\n",
218			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
219	}
220    }
221}
222
223/* Reset graphics card to known state */
224void RADEONEngineReset(ScrnInfoPtr pScrn)
225{
226    RADEONInfoPtr  info       = RADEONPTR(pScrn);
227    unsigned char *RADEONMMIO = info->MMIO;
228    uint32_t       clock_cntl_index;
229    uint32_t       mclk_cntl;
230    uint32_t       rbbm_soft_reset;
231    uint32_t       host_path_cntl;
232
233    /* The following RBBM_SOFT_RESET sequence can help un-wedge
234     * an R300 after the command processor got stuck.
235     */
236    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
237    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
238                                   RADEON_SOFT_RESET_CP |
239                                   RADEON_SOFT_RESET_HI |
240                                   RADEON_SOFT_RESET_SE |
241                                   RADEON_SOFT_RESET_RE |
242                                   RADEON_SOFT_RESET_PP |
243                                   RADEON_SOFT_RESET_E2 |
244                                   RADEON_SOFT_RESET_RB));
245    INREG(RADEON_RBBM_SOFT_RESET);
246    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
247                                   ~(RADEON_SOFT_RESET_CP |
248                                     RADEON_SOFT_RESET_HI |
249                                     RADEON_SOFT_RESET_SE |
250                                     RADEON_SOFT_RESET_RE |
251                                     RADEON_SOFT_RESET_PP |
252                                     RADEON_SOFT_RESET_E2 |
253                                     RADEON_SOFT_RESET_RB)));
254    INREG(RADEON_RBBM_SOFT_RESET);
255    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
256    INREG(RADEON_RBBM_SOFT_RESET);
257
258    RADEONEngineFlush(pScrn);
259
260    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
261    RADEONPllErrataAfterIndex(info);
262
263#if 0 /* taken care of by new PM code */
264    /* Some ASICs have bugs with dynamic-on feature, which are
265     * ASIC-version dependent, so we force all blocks on for now
266     */
267    if (info->HasCRTC2) {
268	uint32_t tmp;
269
270	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
271	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
272				  RADEON_CP_MAX_DYN_STOP_LAT |
273				  RADEON_SCLK_FORCEON_MASK));
274
275	if (info->ChipFamily == CHIP_FAMILY_RV200) {
276	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
277	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
278	}
279    }
280#endif /* new PM code */
281
282    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
283
284#if 0 /* handled by new PM code */
285    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
286			      RADEON_FORCEON_MCLKA |
287			      RADEON_FORCEON_MCLKB |
288			      RADEON_FORCEON_YCLKA |
289			      RADEON_FORCEON_YCLKB |
290			      RADEON_FORCEON_MC |
291			      RADEON_FORCEON_AIC));
292#endif /* new PM code */
293
294    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
295     * unexpected behaviour on some machines.  Here we use
296     * RADEON_HOST_PATH_CNTL to reset it.
297     */
298    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
299    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
300
301    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
302	uint32_t tmp;
303
304	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
305					RADEON_SOFT_RESET_CP |
306					RADEON_SOFT_RESET_HI |
307					RADEON_SOFT_RESET_E2));
308	INREG(RADEON_RBBM_SOFT_RESET);
309	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
310	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
311	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
312    } else {
313	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
314					RADEON_SOFT_RESET_CP |
315					RADEON_SOFT_RESET_SE |
316					RADEON_SOFT_RESET_RE |
317					RADEON_SOFT_RESET_PP |
318					RADEON_SOFT_RESET_E2 |
319					RADEON_SOFT_RESET_RB));
320	INREG(RADEON_RBBM_SOFT_RESET);
321	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
322					~(RADEON_SOFT_RESET_CP |
323					  RADEON_SOFT_RESET_SE |
324					  RADEON_SOFT_RESET_RE |
325					  RADEON_SOFT_RESET_PP |
326					  RADEON_SOFT_RESET_E2 |
327					  RADEON_SOFT_RESET_RB)));
328	INREG(RADEON_RBBM_SOFT_RESET);
329    }
330
331    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
332    INREG(RADEON_HOST_PATH_CNTL);
333    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
334
335    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
336	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
337
338    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
339    RADEONPllErrataAfterIndex(info);
340    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
341}
342
343/* Reset graphics card to known state */
344static void R600EngineReset(ScrnInfoPtr pScrn)
345{
346    RADEONInfoPtr  info       = RADEONPTR(pScrn);
347    unsigned char *RADEONMMIO = info->MMIO;
348    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
349
350    cp_ptr = INREG(R600_CP_RB_WPTR);
351
352    cp_me_cntl = INREG(R600_CP_ME_CNTL);
353    OUTREG(R600_CP_ME_CNTL, 0x10000000);
354
355    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
356    INREG(R600_GRBM_SOFT_RESET);
357    usleep (50);
358    OUTREG(R600_GRBM_SOFT_RESET, 0);
359    INREG(R600_GRBM_SOFT_RESET);
360
361    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
362    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
363    OUTREG(R600_CP_RB_CNTL, 0x80000000);
364
365    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
366    OUTREG(R600_CP_RB_WPTR, cp_ptr);
367    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
368    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
369
370}
371
372/* Restore the acceleration hardware to its previous state */
373void RADEONEngineRestore(ScrnInfoPtr pScrn)
374{
375    RADEONInfoPtr  info       = RADEONPTR(pScrn);
376    unsigned char *RADEONMMIO = info->MMIO;
377
378    if (info->cs)
379      return;
380
381    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
382		   "EngineRestore (%d/%d)\n",
383		   info->CurrentLayout.pixel_code,
384		   info->CurrentLayout.bitsPerPixel);
385
386    /* Setup engine location. This shouldn't be necessary since we
387     * set them appropriately before any accel ops, but let's avoid
388     * random bogus DMA in case we inadvertently trigger the engine
389     * in the wrong place (happened).
390     */
391    RADEONWaitForFifo(pScrn, 2);
392    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
393    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
394
395    RADEONWaitForFifo(pScrn, 1);
396#if X_BYTE_ORDER == X_BIG_ENDIAN
397    OUTREGP(RADEON_DP_DATATYPE,
398	    RADEON_HOST_BIG_ENDIAN_EN,
399	    ~RADEON_HOST_BIG_ENDIAN_EN);
400#else
401    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
402#endif
403
404    /* Restore SURFACE_CNTL */
405    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
406
407    RADEONWaitForFifo(pScrn, 1);
408    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
409					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
410    RADEONWaitForFifo(pScrn, 1);
411    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
412				       | RADEON_GMC_BRUSH_SOLID_COLOR
413				       | RADEON_GMC_SRC_DATATYPE_COLOR));
414
415    RADEONWaitForFifo(pScrn, 5);
416    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
417    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
418    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
419    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
420    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
421
422    RADEONWaitForIdleMMIO(pScrn);
423
424    info->accel_state->XInited3D = FALSE;
425}
426
427static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
428{
429    RADEONInfoPtr info = RADEONPTR(pScrn);
430    if (info->dri->pKernelDRMVersion->version_major < 2) {
431        drm_radeon_getparam_t np;
432
433        memset(&np, 0, sizeof(np));
434        np.param = RADEON_PARAM_NUM_GB_PIPES;
435        np.value = num_pipes;
436        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
437    } else {
438        struct drm_radeon_info np2;
439        np2.value = (unsigned long)num_pipes;
440        np2.request = RADEON_INFO_NUM_GB_PIPES;
441        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
442    }
443}
444
445/* Initialize the acceleration hardware */
446void RADEONEngineInit(ScrnInfoPtr pScrn)
447{
448    RADEONInfoPtr  info       = RADEONPTR(pScrn);
449    unsigned char *RADEONMMIO = info->MMIO;
450    int datatype = 0;
451    info->accel_state->num_gb_pipes = 0;
452
453    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
454		   "EngineInit (%d/%d)\n",
455		   info->CurrentLayout.pixel_code,
456		   info->CurrentLayout.bitsPerPixel);
457
458#ifdef XF86DRI
459    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
460	int num_pipes;
461
462	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
463	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
464		       "Failed to determine num pipes from DRM, falling back to "
465		       "manual look-up!\n");
466	    info->accel_state->num_gb_pipes = 0;
467	} else {
468	    info->accel_state->num_gb_pipes = num_pipes;
469	}
470    }
471#endif
472
473    if (!info->cs) {
474	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
475	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
476	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
477	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
478	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
479	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
480	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
481	    IS_R500_3D) {
482	    if (info->accel_state->num_gb_pipes == 0) {
483		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
484
485		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
486		if (IS_R500_3D)
487		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
488	    }
489	} else {
490	    if (info->accel_state->num_gb_pipes == 0) {
491		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
492		    (info->ChipFamily == CHIP_FAMILY_R350)) {
493		    /* R3xx chips */
494		    info->accel_state->num_gb_pipes = 2;
495		} else {
496		    /* RV3xx chips */
497		    info->accel_state->num_gb_pipes = 1;
498		}
499	    }
500	}
501
502	/* RV410 SE cards only have 1 quadpipe */
503	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
504	    (info->Chipset == PCI_CHIP_RV410_5E4F))
505	    info->accel_state->num_gb_pipes = 1;
506
507	if (IS_R300_3D || IS_R500_3D)
508	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
509		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
510
511	if (IS_R300_3D || IS_R500_3D) {
512	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
513
514	    switch(info->accel_state->num_gb_pipes) {
515	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
516	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
517	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
518	    default:
519	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
520	    }
521
522	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
523	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
524	    if (info->ChipFamily >= CHIP_FAMILY_R420)
525		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
526	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
527					     R300_DC_AUTOFLUSH_ENABLE |
528					     R300_DC_DC_DISABLE_IGNORE_PE));
529	} else
530	    OUTREG(RADEON_RB3D_CNTL, 0);
531
532	RADEONEngineReset(pScrn);
533    }
534
535    switch (info->CurrentLayout.pixel_code) {
536    case 8:  datatype = 2; break;
537    case 15: datatype = 3; break;
538    case 16: datatype = 4; break;
539    case 24: datatype = 5; break;
540    case 32: datatype = 6; break;
541    default:
542	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
543		       "Unknown depth/bpp = %d/%d (code = %d)\n",
544		       info->CurrentLayout.depth,
545		       info->CurrentLayout.bitsPerPixel,
546		       info->CurrentLayout.pixel_code);
547    }
548
549    info->accel_state->dp_gui_master_cntl =
550	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
551	 | RADEON_GMC_CLR_CMP_CNTL_DIS
552	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
553
554    RADEONEngineRestore(pScrn);
555}
556
557uint32_t radeonGetPixmapOffset(PixmapPtr pPix)
558{
559    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
560    RADEONInfoPtr info = RADEONPTR(pScrn);
561    uint32_t offset = 0;
562    if (info->cs)
563	return 0;
564#ifdef USE_EXA
565    if (info->useEXA) {
566	offset = exaGetPixmapOffset(pPix);
567    } else
568#endif
569    {
570	offset = pPix->devPrivate.ptr - info->FB;
571    }
572    offset += info->fbLocation + pScrn->fbOffset;
573    return offset;
574}
575
576#define ACCEL_MMIO
577#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
578#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
579#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
580#define FINISH_ACCEL()
581
582#include "radeon_commonfuncs.c"
583#if defined(RENDER) && defined(USE_XAA)
584#include "radeon_render.c"
585#endif
586#include "radeon_accelfuncs.c"
587
588#undef ACCEL_MMIO
589#undef ACCEL_PREAMBLE
590#undef BEGIN_ACCEL
591#undef OUT_ACCEL_REG
592#undef FINISH_ACCEL
593
594#ifdef XF86DRI
595
596#define ACCEL_CP
597#define ACCEL_PREAMBLE()						\
598    RING_LOCALS;							\
599    RADEONCP_REFRESH(pScrn, info)
600#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
601#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
602#define FINISH_ACCEL()          ADVANCE_RING()
603
604
605#include "radeon_commonfuncs.c"
606#if defined(RENDER) && defined(USE_XAA)
607#include "radeon_render.c"
608#endif
609#include "radeon_accelfuncs.c"
610
611#undef ACCEL_CP
612#undef ACCEL_PREAMBLE
613#undef BEGIN_ACCEL
614#undef OUT_ACCEL_REG
615#undef FINISH_ACCEL
616
617/* Stop the CP */
618int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
619{
620    drm_radeon_cp_stop_t  stop;
621    int              ret, i;
622
623    stop.flush = 1;
624    stop.idle  = 1;
625
626    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
627			  sizeof(drm_radeon_cp_stop_t));
628
629    if (ret == 0) {
630	return 0;
631    } else if (errno != EBUSY) {
632	return -errno;
633    }
634
635    stop.flush = 0;
636
637    i = 0;
638    do {
639	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
640			      sizeof(drm_radeon_cp_stop_t));
641    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
642
643    if (ret == 0) {
644	return 0;
645    } else if (errno != EBUSY) {
646	return -errno;
647    }
648
649    stop.idle = 0;
650
651    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
652			&stop, sizeof(drm_radeon_cp_stop_t))) {
653	return -errno;
654    } else {
655	return 0;
656    }
657}
658
659#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
660
661/* Get an indirect buffer for the CP 2D acceleration commands  */
662drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
663{
664    RADEONInfoPtr  info = RADEONPTR(pScrn);
665    drmDMAReq      dma;
666    drmBufPtr      buf = NULL;
667    int            indx = 0;
668    int            size = 0;
669    int            i = 0;
670    int            ret;
671
672#if 0
673    /* FIXME: pScrn->pScreen has not been initialized when this is first
674     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
675     * the screen index from pScrn, which is initialized, and then get
676     * the screen from screenInfo.screens[index], but that is a hack.
677     */
678    dma.context = DRIGetContext(pScrn->pScreen);
679#else
680    /* This is the X server's context */
681    dma.context = 0x00000001;
682#endif
683
684    dma.send_count    = 0;
685    dma.send_list     = NULL;
686    dma.send_sizes    = NULL;
687    dma.flags         = 0;
688    dma.request_count = 1;
689    dma.request_size  = RADEON_BUFFER_SIZE;
690    dma.request_list  = &indx;
691    dma.request_sizes = &size;
692    dma.granted_count = 0;
693
694    while (1) {
695	do {
696	    ret = drmDMA(info->dri->drmFD, &dma);
697	    if (ret && ret != -EBUSY) {
698		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
699			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
700	    }
701	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
702
703	if (ret == 0) {
704	    buf = &info->dri->buffers->list[indx];
705	    buf->used = 0;
706	    if (RADEON_VERBOSE) {
707		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
708			   "   GetBuffer returning %d %p\n",
709			   buf->idx, buf->address);
710	    }
711	    return buf;
712	}
713
714	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
715		   "GetBuffer timed out, resetting engine...\n");
716
717	if (info->ChipFamily < CHIP_FAMILY_R600) {
718	    RADEONEngineReset(pScrn);
719	    RADEONEngineRestore(pScrn);
720	} else
721	    R600EngineReset(pScrn);
722
723	/* Always restart the engine when doing CP 2D acceleration */
724	RADEONCP_RESET(pScrn, info);
725	RADEONCP_START(pScrn, info);
726    }
727}
728
729/* Flush the indirect buffer to the kernel for submission to the card */
730void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
731{
732    RADEONInfoPtr      info   = RADEONPTR(pScrn);
733    drmBufPtr          buffer = info->cp->indirectBuffer;
734    int                start  = info->cp->indirectStart;
735    drm_radeon_indirect_t  indirect;
736
737    assert(!info->cs);
738    if (!buffer) return;
739    if (start == buffer->used && !discard) return;
740
741    if (RADEON_VERBOSE) {
742	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
743		   buffer->idx);
744    }
745
746    if (info->ChipFamily >= CHIP_FAMILY_R600) {
747	if (buffer->used & 0x3c) {
748	    RING_LOCALS;
749
750	    while (buffer->used & 0x3c) {
751		BEGIN_RING(1);
752		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
753		ADVANCE_RING();
754	    }
755	}
756    }
757
758    indirect.idx     = buffer->idx;
759    indirect.start   = start;
760    indirect.end     = buffer->used;
761    indirect.discard = discard;
762
763    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
764			&indirect, sizeof(drm_radeon_indirect_t));
765
766    if (discard) {
767	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
768	info->cp->indirectStart  = 0;
769    } else {
770	/* Start on a double word boundary */
771	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
772	if (RADEON_VERBOSE) {
773	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
774		       info->cp->indirectStart);
775	}
776    }
777}
778
779/* Flush and release the indirect buffer */
780void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
781{
782    RADEONInfoPtr      info   = RADEONPTR(pScrn);
783    drmBufPtr          buffer = info->cp->indirectBuffer;
784    int                start  = info->cp->indirectStart;
785    drm_radeon_indirect_t  indirect;
786
787    assert(!info->cs);
788    if (info->ChipFamily >= CHIP_FAMILY_R600) {
789	if (buffer && (buffer->used & 0x3c)) {
790	    RING_LOCALS;
791
792	    while (buffer->used & 0x3c) {
793		BEGIN_RING(1);
794		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
795		ADVANCE_RING();
796	    }
797	}
798    }
799
800    info->cp->indirectBuffer = NULL;
801    info->cp->indirectStart  = 0;
802
803    if (!buffer) return;
804
805    if (RADEON_VERBOSE) {
806	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
807		   buffer->idx);
808    }
809
810    indirect.idx     = buffer->idx;
811    indirect.start   = start;
812    indirect.end     = buffer->used;
813    indirect.discard = 1;
814
815    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
816			&indirect, sizeof(drm_radeon_indirect_t));
817}
818
819/** \brief Calculate HostDataBlit parameters from pointer and pitch
820 *
821 * This is a helper for the trivial HostDataBlit users that don't need to worry
822 * about tiling etc.
823 */
824void
825RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
826		     uint32_t *dstPitchOff, int *x, int *y)
827{
828    RADEONInfoPtr info = RADEONPTR( pScrn );
829    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
830
831    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
832    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
833    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
834}
835
836/* Set up a hostdata blit to transfer data from system memory to the
837 * framebuffer. Returns the address where the data can be written to and sets
838 * the dstPitch and hpass variables as required.
839 */
840uint8_t*
841RADEONHostDataBlit(
842    ScrnInfoPtr pScrn,
843    unsigned int cpp,
844    unsigned int w,
845    uint32_t dstPitchOff,
846    uint32_t *bufPitch,
847    int x,
848    int *y,
849    unsigned int *h,
850    unsigned int *hpass
851){
852    RADEONInfoPtr info = RADEONPTR( pScrn );
853    uint32_t format, dwords;
854    uint8_t *ret;
855    RING_LOCALS;
856
857    if ( *h == 0 )
858    {
859	return NULL;
860    }
861
862    switch ( cpp )
863    {
864    case 4:
865	format = RADEON_GMC_DST_32BPP;
866	*bufPitch = 4 * w;
867	break;
868    case 2:
869	format = RADEON_GMC_DST_16BPP;
870	*bufPitch = 2 * RADEON_ALIGN(w, 2);
871	break;
872    case 1:
873	format = RADEON_GMC_DST_8BPP_CI;
874	*bufPitch = RADEON_ALIGN(w, 4);
875	break;
876    default:
877	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
878		    "%s: Unsupported cpp %d!\n", __func__, cpp );
879	return NULL;
880    }
881
882#if X_BYTE_ORDER == X_BIG_ENDIAN
883    /* Swap doesn't work on R300 and later, it's handled during the
884     * copy to ind. buffer pass
885     */
886    if (info->ChipFamily < CHIP_FAMILY_R300) {
887        BEGIN_RING(2);
888	if (cpp == 2)
889	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
890			 RADEON_HOST_DATA_SWAP_HDW);
891	else if (cpp == 1)
892	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
893			 RADEON_HOST_DATA_SWAP_32BIT);
894	else
895	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
896			 RADEON_HOST_DATA_SWAP_NONE);
897	ADVANCE_RING();
898    }
899#endif
900
901    /*RADEON_PURGE_CACHE();
902      RADEON_WAIT_UNTIL_IDLE();*/
903
904    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
905    dwords = *hpass * *bufPitch / 4;
906
907    BEGIN_RING( dwords + 10 );
908    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
909    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
910	    | RADEON_GMC_DST_CLIPPING
911	    | RADEON_GMC_BRUSH_NONE
912	    | format
913	    | RADEON_GMC_SRC_DATATYPE_COLOR
914	    | RADEON_ROP3_S
915	    | RADEON_DP_SRC_SOURCE_HOST_DATA
916	    | RADEON_GMC_CLR_CMP_CNTL_DIS
917	    | RADEON_GMC_WR_MSK_DIS );
918    OUT_RING( dstPitchOff );
919    OUT_RING( (*y << 16) | x );
920    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
921    OUT_RING( 0xffffffff );
922    OUT_RING( 0xffffffff );
923    OUT_RING( *y << 16 | x );
924    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
925    OUT_RING( dwords );
926
927    ret = ( uint8_t* )&__head[__count];
928
929    __count += dwords;
930    ADVANCE_RING();
931
932    *y += *hpass;
933    *h -= *hpass;
934
935    return ret;
936}
937
938void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
939{
940    switch(swap) {
941    case RADEON_HOST_DATA_SWAP_HDW:
942        {
943	    unsigned int *d = (unsigned int *)dst;
944	    unsigned int *s = (unsigned int *)src;
945	    unsigned int nwords = size >> 2;
946
947	    for (; nwords > 0; --nwords, ++d, ++s)
948		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
949	    return;
950        }
951    case RADEON_HOST_DATA_SWAP_32BIT:
952        {
953	    unsigned int *d = (unsigned int *)dst;
954	    unsigned int *s = (unsigned int *)src;
955	    unsigned int nwords = size >> 2;
956
957	    for (; nwords > 0; --nwords, ++d, ++s)
958#ifdef __powerpc__
959		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
960#else
961		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
962			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
963#endif
964	    return;
965        }
966    case RADEON_HOST_DATA_SWAP_16BIT:
967        {
968	    unsigned short *d = (unsigned short *)dst;
969	    unsigned short *s = (unsigned short *)src;
970	    unsigned int nwords = size >> 1;
971
972	    for (; nwords > 0; --nwords, ++d, ++s)
973#ifdef __powerpc__
974		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
975#else
976	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
977			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
978#endif
979	    return;
980	}
981    }
982    if (src != dst)
983	memcpy(dst, src, size);
984}
985
986/* Copies a single pass worth of data for a hostdata blit set up by
987 * RADEONHostDataBlit().
988 */
989void
990RADEONHostDataBlitCopyPass(
991    ScrnInfoPtr pScrn,
992    unsigned int cpp,
993    uint8_t *dst,
994    uint8_t *src,
995    unsigned int hpass,
996    unsigned int dstPitch,
997    unsigned int srcPitch
998){
999
1000#if X_BYTE_ORDER == X_BIG_ENDIAN
1001    RADEONInfoPtr info = RADEONPTR( pScrn );
1002#endif
1003
1004    /* RADEONHostDataBlitCopy can return NULL ! */
1005    if( (dst==NULL) || (src==NULL)) return;
1006
1007    if ( dstPitch == srcPitch )
1008    {
1009#if X_BYTE_ORDER == X_BIG_ENDIAN
1010        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1011	    switch(cpp) {
1012	    case 1:
1013		RADEONCopySwap(dst, src, hpass * dstPitch,
1014			       RADEON_HOST_DATA_SWAP_32BIT);
1015		return;
1016	    case 2:
1017	        RADEONCopySwap(dst, src, hpass * dstPitch,
1018			       RADEON_HOST_DATA_SWAP_HDW);
1019		return;
1020	    }
1021	}
1022#endif
1023	memcpy( dst, src, hpass * dstPitch );
1024    }
1025    else
1026    {
1027	unsigned int minPitch = min( dstPitch, srcPitch );
1028	while ( hpass-- )
1029	{
1030#if X_BYTE_ORDER == X_BIG_ENDIAN
1031            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1032		switch(cpp) {
1033		case 1:
1034		    RADEONCopySwap(dst, src, minPitch,
1035				   RADEON_HOST_DATA_SWAP_32BIT);
1036		    goto next;
1037		case 2:
1038	            RADEONCopySwap(dst, src, minPitch,
1039				   RADEON_HOST_DATA_SWAP_HDW);
1040		    goto next;
1041		}
1042	    }
1043#endif
1044	    memcpy( dst, src, minPitch );
1045#if X_BYTE_ORDER == X_BIG_ENDIAN
1046	next:
1047#endif
1048	    src += srcPitch;
1049	    dst += dstPitch;
1050	}
1051    }
1052}
1053
1054#endif
1055
1056Bool RADEONAccelInit(ScreenPtr pScreen)
1057{
1058    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1059    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1060
1061#ifdef USE_EXA
1062    if (info->useEXA) {
1063# ifdef XF86DRI
1064	if (info->directRenderingEnabled) {
1065	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1066		if (!R600DrawInit(pScreen))
1067		    return FALSE;
1068	    } else {
1069		if (!RADEONDrawInitCP(pScreen))
1070		    return FALSE;
1071	    }
1072	} else
1073# endif /* XF86DRI */
1074	{
1075	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1076		return FALSE;
1077	    else {
1078		if (!RADEONDrawInitMMIO(pScreen))
1079		    return FALSE;
1080	    }
1081	}
1082    }
1083#endif /* USE_EXA */
1084#ifdef USE_XAA
1085    if (!info->useEXA) {
1086	XAAInfoRecPtr  a;
1087
1088	if (info->ChipFamily >= CHIP_FAMILY_R600)
1089	    return FALSE;
1090
1091	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1092	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1093	    return FALSE;
1094	}
1095
1096#ifdef XF86DRI
1097	if (info->directRenderingEnabled)
1098	    RADEONAccelInitCP(pScreen, a);
1099	else
1100#endif /* XF86DRI */
1101	    RADEONAccelInitMMIO(pScreen, a);
1102
1103	RADEONEngineInit(pScrn);
1104
1105	if (!XAAInit(pScreen, a)) {
1106	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1107	    return FALSE;
1108	}
1109    }
1110#endif /* USE_XAA */
1111    return TRUE;
1112}
1113
1114void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1115{
1116    RADEONInfoPtr info = RADEONPTR (pScrn);
1117
1118#ifdef XF86DRI
1119    if (info->directRenderingEnabled) {
1120	drm_radeon_sarea_t *pSAREAPriv;
1121
1122	if (!info->kms_enabled) {
1123	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1124	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1125	}
1126	RADEONInit3DEngineCP(pScrn);
1127    } else
1128#endif
1129	RADEONInit3DEngineMMIO(pScrn);
1130
1131    info->accel_state->XInited3D = TRUE;
1132}
1133
1134#ifdef USE_XAA
1135#ifdef XF86DRI
1136Bool
1137RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1138{
1139    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1140    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1141    int            cpp = info->CurrentLayout.pixel_bytes;
1142    int            depthCpp = (info->dri->depthBits - 8) / 4;
1143    int            width_bytes = pScrn->displayWidth * cpp;
1144    int            bufferSize;
1145    int            depthSize;
1146    int            l;
1147    int            scanlines;
1148    int            texsizerequest;
1149    BoxRec         MemBox;
1150    FBAreaPtr      fbarea;
1151
1152    info->dri->frontOffset = 0;
1153    info->dri->frontPitch = pScrn->displayWidth;
1154    info->dri->backPitch = pScrn->displayWidth;
1155
1156    /* make sure we use 16 line alignment for tiling (8 might be enough).
1157     * Might need that for non-XF86DRI too?
1158     */
1159    if (info->allowColorTiling) {
1160	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1161		      RADEON_GPU_PAGE_SIZE);
1162    } else {
1163        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1164		      RADEON_GPU_PAGE_SIZE);
1165    }
1166
1167    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1168     * which is always the case if color tiling is used due to color pitch
1169     * but not necessarily otherwise, and its height a multiple of 16 lines.
1170     */
1171    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1172    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1173		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1174
1175    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1176	       "Using %d MB GART aperture\n", info->dri->gartSize);
1177    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1178	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1179    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1180	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1181    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1182	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1183
1184    /* Try for front, back, depth, and three framebuffers worth of
1185     * pixmap cache.  Should be enough for a fullscreen background
1186     * image plus some leftovers.
1187     * If the FBTexPercent option was used, try to achieve that percentage instead,
1188     * but still have at least one pixmap buffer (get problems with xvideo/render
1189     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1190     * probably useless for XAA.
1191     */
1192    if (info->dri->textureSize >= 0) {
1193	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1194			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1195	/* first divide, then multiply or we'll get an overflow (been there...) */
1196			 / 100 * info->dri->textureSize;
1197    }
1198    else {
1199	texsizerequest = (int)info->FbMapSize / 2;
1200    }
1201    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1202
1203    /* If that gives us less than the requested memory, let's
1204     * be greedy and grab some more.  Sorry, I care more about 3D
1205     * performance than playing nicely, and you'll get around a full
1206     * framebuffer's worth of pixmap cache anyway.
1207     */
1208    if (info->dri->textureSize < texsizerequest) {
1209        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1210    }
1211    if (info->dri->textureSize < texsizerequest) {
1212        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1213    }
1214
1215    /* If there's still no space for textures, try without pixmap cache, but
1216     * never use the reserved space, the space hw cursor and PCIGART table might
1217     * use.
1218     */
1219    if (info->dri->textureSize < 0) {
1220	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1221	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1222    }
1223
1224    /* Check to see if there is more room available after the 8192nd
1225     * scanline for textures
1226     */
1227    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1228    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1229	> info->dri->textureSize) {
1230	info->dri->textureSize =
1231		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1232    }
1233
1234    /* If backbuffer is disabled, don't allocate memory for it */
1235    if (info->dri->noBackBuffer) {
1236	info->dri->textureSize += bufferSize;
1237    }
1238
1239    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1240       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1241       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1242       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1243       area otherwise).
1244       This might cause some space at the end of the video memory to be unused, since it
1245       can't be used (?) due to that log_tex_granularity thing???
1246       Could use different copyscreentoscreen function for the pageflip copies
1247       (which would use different src and dst offsets) to avoid this. */
1248    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1249	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1250			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1251    }
1252    if (info->dri->textureSize > 0) {
1253	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1254	if (l < RADEON_LOG_TEX_GRANULARITY)
1255	    l = RADEON_LOG_TEX_GRANULARITY;
1256	/* Round the texture size up to the nearest whole number of
1257	 * texture regions.  Again, be greedy about this, don't
1258	 * round down.
1259	 */
1260	info->dri->log2TexGran = l;
1261	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1262    } else {
1263	info->dri->textureSize = 0;
1264    }
1265
1266    /* Set a minimum usable local texture heap size.  This will fit
1267     * two 256x256x32bpp textures.
1268     */
1269    if (info->dri->textureSize < 512 * 1024) {
1270	info->dri->textureOffset = 0;
1271	info->dri->textureSize = 0;
1272    }
1273
1274    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1275	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1276				    (width_bytes * 16)) * (width_bytes * 16);
1277    }
1278    else {
1279	/* Reserve space for textures */
1280	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1281				     RADEON_GPU_PAGE_SIZE);
1282    }
1283
1284    /* Reserve space for the shared depth
1285     * buffer.
1286     */
1287    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1288			       RADEON_GPU_PAGE_SIZE);
1289
1290    /* Reserve space for the shared back buffer */
1291    if (info->dri->noBackBuffer) {
1292       info->dri->backOffset = info->dri->depthOffset;
1293    } else {
1294       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1295				 RADEON_GPU_PAGE_SIZE);
1296    }
1297
1298    info->dri->backY = info->dri->backOffset / width_bytes;
1299    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1300
1301    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1302    if (scanlines > 8191)
1303	scanlines = 8191;
1304
1305    MemBox.x1 = 0;
1306    MemBox.y1 = 0;
1307    MemBox.x2 = pScrn->displayWidth;
1308    MemBox.y2 = scanlines;
1309
1310    if (!xf86InitFBManager(pScreen, &MemBox)) {
1311        xf86DrvMsg(scrnIndex, X_ERROR,
1312		   "Memory manager initialization to "
1313		   "(%d,%d) (%d,%d) failed\n",
1314		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1315	return FALSE;
1316    } else {
1317	int  width, height;
1318
1319	xf86DrvMsg(scrnIndex, X_INFO,
1320		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1321		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1322	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1323	   aligned... sigh */
1324	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1325						pScrn->displayWidth,
1326						info->allowColorTiling ?
1327						(RADEON_ALIGN(pScrn->virtualY, 16))
1328						- pScrn->virtualY + 2 : 2,
1329						0, NULL, NULL,
1330						NULL))) {
1331	    xf86DrvMsg(scrnIndex, X_INFO,
1332		       "Reserved area from (%d,%d) to (%d,%d)\n",
1333		       fbarea->box.x1, fbarea->box.y1,
1334		       fbarea->box.x2, fbarea->box.y2);
1335	} else {
1336	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1337	}
1338
1339	RADEONDRIAllocatePCIGARTTable(pScreen);
1340
1341	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1342					  &height, 0, 0, 0)) {
1343	    xf86DrvMsg(scrnIndex, X_INFO,
1344		       "Largest offscreen area available: %d x %d\n",
1345		       width, height);
1346
1347	    /* Lines in offscreen area needed for depth buffer and
1348	     * textures
1349	     */
1350	    info->dri->depthTexLines = (scanlines
1351					- info->dri->depthOffset / width_bytes);
1352	    info->dri->backLines	    = (scanlines
1353					       - info->dri->backOffset / width_bytes
1354					       - info->dri->depthTexLines);
1355	    info->dri->backArea	    = NULL;
1356	} else {
1357	    xf86DrvMsg(scrnIndex, X_ERROR,
1358		       "Unable to determine largest offscreen area "
1359		       "available\n");
1360	    return FALSE;
1361	}
1362    }
1363
1364    xf86DrvMsg(scrnIndex, X_INFO,
1365	       "Will use front buffer at offset 0x%x\n",
1366	       info->dri->frontOffset);
1367
1368    xf86DrvMsg(scrnIndex, X_INFO,
1369	       "Will use back buffer at offset 0x%x\n",
1370	       info->dri->backOffset);
1371    xf86DrvMsg(scrnIndex, X_INFO,
1372	       "Will use depth buffer at offset 0x%x\n",
1373	       info->dri->depthOffset);
1374    if (info->cardType==CARD_PCIE)
1375    	xf86DrvMsg(scrnIndex, X_INFO,
1376	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1377		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1378    xf86DrvMsg(scrnIndex, X_INFO,
1379	       "Will use %d kb for textures at offset 0x%x\n",
1380	       info->dri->textureSize/1024, info->dri->textureOffset);
1381
1382    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1383				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1384
1385    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1386				  ((info->dri->backOffset + info->fbLocation) >> 10));
1387
1388    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1389				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1390    return TRUE;
1391}
1392#endif /* XF86DRI */
1393
1394Bool
1395RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1396{
1397    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1398    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1399    BoxRec         MemBox;
1400    int            y2;
1401
1402    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1403
1404    MemBox.x1 = 0;
1405    MemBox.y1 = 0;
1406    MemBox.x2 = pScrn->displayWidth;
1407    y2 = info->FbMapSize / width_bytes;
1408    if (y2 >= 32768)
1409	y2 = 32767; /* because MemBox.y2 is signed short */
1410    MemBox.y2 = y2;
1411
1412    /* The acceleration engine uses 14 bit
1413     * signed coordinates, so we can't have any
1414     * drawable caches beyond this region.
1415     */
1416    if (MemBox.y2 > 8191)
1417	MemBox.y2 = 8191;
1418
1419    if (!xf86InitFBManager(pScreen, &MemBox)) {
1420	xf86DrvMsg(scrnIndex, X_ERROR,
1421		   "Memory manager initialization to "
1422		   "(%d,%d) (%d,%d) failed\n",
1423		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1424	return FALSE;
1425    } else {
1426	int       width, height;
1427	FBAreaPtr fbarea;
1428
1429	xf86DrvMsg(scrnIndex, X_INFO,
1430		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1431		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1432	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1433						pScrn->displayWidth,
1434						info->allowColorTiling ?
1435						(RADEON_ALIGN(pScrn->virtualY, 16))
1436						- pScrn->virtualY + 2 : 2,
1437						0, NULL, NULL,
1438						NULL))) {
1439	    xf86DrvMsg(scrnIndex, X_INFO,
1440		       "Reserved area from (%d,%d) to (%d,%d)\n",
1441		       fbarea->box.x1, fbarea->box.y1,
1442		       fbarea->box.x2, fbarea->box.y2);
1443	} else {
1444	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1445	}
1446	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1447					      0, 0, 0)) {
1448	    xf86DrvMsg(scrnIndex, X_INFO,
1449		       "Largest offscreen area available: %d x %d\n",
1450		       width, height);
1451	}
1452	return TRUE;
1453    }
1454}
1455#endif /* USE_XAA */
1456