radeon_accel.c revision 43df4709
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78#include <assert.h>
79				/* Driver data structures */
80#include "radeon.h"
81#include "radeon_reg.h"
82#include "r600_reg.h"
83#include "radeon_macros.h"
84#include "radeon_probe.h"
85#include "radeon_version.h"
86#ifdef XF86DRI
87#define _XF86DRI_SERVER_
88#include "radeon_drm.h"
89#endif
90
91#include "ati_pciids_gen.h"
92
93				/* Line support */
94#include "miline.h"
95
96				/* X and server generic header files */
97#include "xf86.h"
98
99static void R600EngineReset(ScrnInfoPtr pScrn);
100
101#ifdef USE_XAA
102static struct {
103    int rop;
104    int pattern;
105} RADEON_ROP[] = {
106    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
107    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
108    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
109    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
110    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
111    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
112    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
113    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
114    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
115    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
116    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
117    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
118    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
119    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
120    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
121    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
122};
123#endif
124
125/* The FIFO has 64 slots.  This routines waits until at least `entries'
126 * of these slots are empty.
127 */
128void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
129{
130    RADEONInfoPtr  info       = RADEONPTR(pScrn);
131    unsigned char *RADEONMMIO = info->MMIO;
132    int            i;
133
134    for (;;) {
135	for (i = 0; i < RADEON_TIMEOUT; i++) {
136	    info->accel_state->fifo_slots =
137		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
138	    if (info->accel_state->fifo_slots >= entries) return;
139	}
140	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
141		       "FIFO timed out: %u entries, stat=0x%08x\n",
142		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
143		       (unsigned int)INREG(RADEON_RBBM_STATUS));
144	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
145		   "FIFO timed out, resetting engine...\n");
146	RADEONEngineReset(pScrn);
147	RADEONEngineRestore(pScrn);
148#ifdef XF86DRI
149	if (info->directRenderingEnabled) {
150	    RADEONCP_RESET(pScrn, info);
151	    RADEONCP_START(pScrn, info);
152	}
153#endif
154    }
155}
156
157void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
158{
159    RADEONInfoPtr  info       = RADEONPTR(pScrn);
160    unsigned char *RADEONMMIO = info->MMIO;
161    int            i;
162
163    for (;;) {
164	for (i = 0; i < RADEON_TIMEOUT; i++) {
165	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
166		info->accel_state->fifo_slots =
167		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
168	    else
169		info->accel_state->fifo_slots =
170		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
171	    if (info->accel_state->fifo_slots >= entries) return;
172	}
173	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
174		       "FIFO timed out: stat=0x%08x\n",
175		       (unsigned int)INREG(R600_GRBM_STATUS));
176	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
177		   "FIFO timed out, resetting engine...\n");
178	R600EngineReset(pScrn);
179#ifdef XF86DRI
180	if (info->directRenderingEnabled) {
181	    RADEONCP_RESET(pScrn, info);
182	    RADEONCP_START(pScrn, info);
183	}
184#endif
185    }
186}
187
188/* Flush all dirty data in the Pixel Cache to memory */
189void RADEONEngineFlush(ScrnInfoPtr pScrn)
190{
191    RADEONInfoPtr  info       = RADEONPTR(pScrn);
192    unsigned char *RADEONMMIO = info->MMIO;
193    int            i;
194
195    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
196	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
197		RADEON_RB3D_DC_FLUSH_ALL,
198		~RADEON_RB3D_DC_FLUSH_ALL);
199	for (i = 0; i < RADEON_TIMEOUT; i++) {
200	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
201		break;
202	}
203	if (i == RADEON_TIMEOUT) {
204	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
205			   "DC flush timeout: %x\n",
206			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
207	}
208    } else {
209	OUTREGP(R300_DSTCACHE_CTLSTAT,
210		R300_RB2D_DC_FLUSH_ALL,
211		~R300_RB2D_DC_FLUSH_ALL);
212	for (i = 0; i < RADEON_TIMEOUT; i++) {
213	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
214		break;
215	}
216	if (i == RADEON_TIMEOUT) {
217	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
218			   "DC flush timeout: %x\n",
219			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
220	}
221    }
222}
223
224/* Reset graphics card to known state */
225void RADEONEngineReset(ScrnInfoPtr pScrn)
226{
227    RADEONInfoPtr  info       = RADEONPTR(pScrn);
228    unsigned char *RADEONMMIO = info->MMIO;
229    uint32_t       clock_cntl_index;
230    uint32_t       mclk_cntl;
231    uint32_t       rbbm_soft_reset;
232    uint32_t       host_path_cntl;
233
234    /* The following RBBM_SOFT_RESET sequence can help un-wedge
235     * an R300 after the command processor got stuck.
236     */
237    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
238    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
239                                   RADEON_SOFT_RESET_CP |
240                                   RADEON_SOFT_RESET_HI |
241                                   RADEON_SOFT_RESET_SE |
242                                   RADEON_SOFT_RESET_RE |
243                                   RADEON_SOFT_RESET_PP |
244                                   RADEON_SOFT_RESET_E2 |
245                                   RADEON_SOFT_RESET_RB));
246    INREG(RADEON_RBBM_SOFT_RESET);
247    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
248                                   ~(RADEON_SOFT_RESET_CP |
249                                     RADEON_SOFT_RESET_HI |
250                                     RADEON_SOFT_RESET_SE |
251                                     RADEON_SOFT_RESET_RE |
252                                     RADEON_SOFT_RESET_PP |
253                                     RADEON_SOFT_RESET_E2 |
254                                     RADEON_SOFT_RESET_RB)));
255    INREG(RADEON_RBBM_SOFT_RESET);
256    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
257    INREG(RADEON_RBBM_SOFT_RESET);
258
259    RADEONEngineFlush(pScrn);
260
261    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
262    RADEONPllErrataAfterIndex(info);
263
264#if 0 /* taken care of by new PM code */
265    /* Some ASICs have bugs with dynamic-on feature, which are
266     * ASIC-version dependent, so we force all blocks on for now
267     */
268    if (info->HasCRTC2) {
269	uint32_t tmp;
270
271	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
272	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
273				  RADEON_CP_MAX_DYN_STOP_LAT |
274				  RADEON_SCLK_FORCEON_MASK));
275
276	if (info->ChipFamily == CHIP_FAMILY_RV200) {
277	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
278	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
279	}
280    }
281#endif /* new PM code */
282
283    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
284
285#if 0 /* handled by new PM code */
286    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
287			      RADEON_FORCEON_MCLKA |
288			      RADEON_FORCEON_MCLKB |
289			      RADEON_FORCEON_YCLKA |
290			      RADEON_FORCEON_YCLKB |
291			      RADEON_FORCEON_MC |
292			      RADEON_FORCEON_AIC));
293#endif /* new PM code */
294
295    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
296     * unexpected behaviour on some machines.  Here we use
297     * RADEON_HOST_PATH_CNTL to reset it.
298     */
299    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
300    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
301
302    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
303	uint32_t tmp;
304
305	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
306					RADEON_SOFT_RESET_CP |
307					RADEON_SOFT_RESET_HI |
308					RADEON_SOFT_RESET_E2));
309	INREG(RADEON_RBBM_SOFT_RESET);
310	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
311	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
312	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
313    } else {
314	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
315					RADEON_SOFT_RESET_CP |
316					RADEON_SOFT_RESET_SE |
317					RADEON_SOFT_RESET_RE |
318					RADEON_SOFT_RESET_PP |
319					RADEON_SOFT_RESET_E2 |
320					RADEON_SOFT_RESET_RB));
321	INREG(RADEON_RBBM_SOFT_RESET);
322	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
323					~(RADEON_SOFT_RESET_CP |
324					  RADEON_SOFT_RESET_SE |
325					  RADEON_SOFT_RESET_RE |
326					  RADEON_SOFT_RESET_PP |
327					  RADEON_SOFT_RESET_E2 |
328					  RADEON_SOFT_RESET_RB)));
329	INREG(RADEON_RBBM_SOFT_RESET);
330    }
331
332    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
333	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
334
335    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
336    RADEONPllErrataAfterIndex(info);
337    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
338}
339
340/* Reset graphics card to known state */
341static void R600EngineReset(ScrnInfoPtr pScrn)
342{
343    RADEONInfoPtr  info       = RADEONPTR(pScrn);
344    unsigned char *RADEONMMIO = info->MMIO;
345    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
346
347    cp_ptr = INREG(R600_CP_RB_WPTR);
348
349    cp_me_cntl = INREG(R600_CP_ME_CNTL);
350    OUTREG(R600_CP_ME_CNTL, 0x10000000);
351
352    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
353    INREG(R600_GRBM_SOFT_RESET);
354    usleep (50);
355    OUTREG(R600_GRBM_SOFT_RESET, 0);
356    INREG(R600_GRBM_SOFT_RESET);
357
358    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
359    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
360    OUTREG(R600_CP_RB_CNTL, 0x80000000);
361
362    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
363    OUTREG(R600_CP_RB_WPTR, cp_ptr);
364    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
365    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
366
367}
368
369/* Restore the acceleration hardware to its previous state */
370void RADEONEngineRestore(ScrnInfoPtr pScrn)
371{
372    RADEONInfoPtr  info       = RADEONPTR(pScrn);
373    unsigned char *RADEONMMIO = info->MMIO;
374
375    if (info->cs)
376      return;
377
378    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
379		   "EngineRestore (%d/%d)\n",
380		   info->CurrentLayout.pixel_code,
381		   info->CurrentLayout.bitsPerPixel);
382
383    /* Setup engine location. This shouldn't be necessary since we
384     * set them appropriately before any accel ops, but let's avoid
385     * random bogus DMA in case we inadvertently trigger the engine
386     * in the wrong place (happened).
387     */
388    RADEONWaitForFifo(pScrn, 2);
389    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
391
392    RADEONWaitForFifo(pScrn, 1);
393#if X_BYTE_ORDER == X_BIG_ENDIAN
394    OUTREGP(RADEON_DP_DATATYPE,
395	    RADEON_HOST_BIG_ENDIAN_EN,
396	    ~RADEON_HOST_BIG_ENDIAN_EN);
397#else
398    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
399#endif
400
401    /* Restore SURFACE_CNTL */
402    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
403
404    RADEONWaitForFifo(pScrn, 1);
405    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
406					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
407    RADEONWaitForFifo(pScrn, 1);
408    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
409				       | RADEON_GMC_BRUSH_SOLID_COLOR
410				       | RADEON_GMC_SRC_DATATYPE_COLOR));
411
412    RADEONWaitForFifo(pScrn, 5);
413    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
414    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
415    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
416    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
417    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
418
419    RADEONWaitForIdleMMIO(pScrn);
420
421    info->accel_state->XInited3D = FALSE;
422}
423
424static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
425{
426    RADEONInfoPtr info = RADEONPTR(pScrn);
427    if (info->dri->pKernelDRMVersion->version_major < 2) {
428        drm_radeon_getparam_t np;
429
430        memset(&np, 0, sizeof(np));
431        np.param = RADEON_PARAM_NUM_GB_PIPES;
432        np.value = num_pipes;
433        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
434    } else {
435        struct drm_radeon_info np2;
436        np2.value = (unsigned long)num_pipes;
437        np2.request = RADEON_INFO_NUM_GB_PIPES;
438        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
439    }
440}
441
442/* Initialize the acceleration hardware */
443void RADEONEngineInit(ScrnInfoPtr pScrn)
444{
445    RADEONInfoPtr  info       = RADEONPTR(pScrn);
446    unsigned char *RADEONMMIO = info->MMIO;
447    int datatype = 0;
448    info->accel_state->num_gb_pipes = 0;
449
450    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
451		   "EngineInit (%d/%d)\n",
452		   info->CurrentLayout.pixel_code,
453		   info->CurrentLayout.bitsPerPixel);
454
455#ifdef XF86DRI
456    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
457	int num_pipes;
458
459	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
460	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
461		       "Failed to determine num pipes from DRM, falling back to "
462		       "manual look-up!\n");
463	    info->accel_state->num_gb_pipes = 0;
464	} else {
465	    info->accel_state->num_gb_pipes = num_pipes;
466	}
467    }
468#endif
469
470    if (!info->cs) {
471	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
472	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
473	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
474	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
475	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
476	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
477	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
478	    IS_R500_3D) {
479	    if (info->accel_state->num_gb_pipes == 0) {
480		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
481
482		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
483		if (IS_R500_3D)
484		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
485	    }
486	} else {
487	    if (info->accel_state->num_gb_pipes == 0) {
488		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
489		    (info->ChipFamily == CHIP_FAMILY_R350)) {
490		    /* R3xx chips */
491		    info->accel_state->num_gb_pipes = 2;
492		} else {
493		    /* RV3xx chips */
494		    info->accel_state->num_gb_pipes = 1;
495		}
496	    }
497	}
498
499	/* SE cards only have 1 quadpipe */
500	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
501	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
502	    (info->Chipset == PCI_CHIP_R300_AD) ||
503	    (info->Chipset == PCI_CHIP_R350_AH))
504	    info->accel_state->num_gb_pipes = 1;
505
506	if (IS_R300_3D || IS_R500_3D)
507	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
508		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
509
510	if (IS_R300_3D || IS_R500_3D) {
511	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
512
513	    switch(info->accel_state->num_gb_pipes) {
514	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
515	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
516	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
517	    default:
518	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
519	    }
520
521	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
522	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
523	    if (info->ChipFamily >= CHIP_FAMILY_R420)
524		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
525	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
526					     R300_DC_AUTOFLUSH_ENABLE |
527					     R300_DC_DC_DISABLE_IGNORE_PE));
528	} else
529	    OUTREG(RADEON_RB3D_CNTL, 0);
530
531	RADEONEngineReset(pScrn);
532    }
533
534    switch (info->CurrentLayout.pixel_code) {
535    case 8:  datatype = 2; break;
536    case 15: datatype = 3; break;
537    case 16: datatype = 4; break;
538    case 24: datatype = 5; break;
539    case 32: datatype = 6; break;
540    default:
541	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
542		       "Unknown depth/bpp = %d/%d (code = %d)\n",
543		       info->CurrentLayout.depth,
544		       info->CurrentLayout.bitsPerPixel,
545		       info->CurrentLayout.pixel_code);
546    }
547
548    info->accel_state->dp_gui_master_cntl =
549	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
550	 | RADEON_GMC_CLR_CMP_CNTL_DIS
551	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
552
553    RADEONEngineRestore(pScrn);
554}
555
556uint32_t radeonGetPixmapOffset(PixmapPtr pPix)
557{
558    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
559    RADEONInfoPtr info = RADEONPTR(pScrn);
560    uint32_t offset = 0;
561    if (info->cs)
562	return 0;
563#ifdef USE_EXA
564    if (info->useEXA) {
565	offset = exaGetPixmapOffset(pPix);
566    } else
567#endif
568    {
569	offset = pPix->devPrivate.ptr - info->FB;
570    }
571    offset += info->fbLocation + pScrn->fbOffset;
572    return offset;
573}
574
575int radeon_cs_space_remaining(ScrnInfoPtr pScrn)
576{
577    RADEONInfoPtr info = RADEONPTR(pScrn);
578
579#ifdef XF86DRM_MODE
580    if (info->cs)
581	return (info->cs->ndw - info->cs->cdw);
582    else
583#endif
584        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
585}
586
587#define ACCEL_MMIO
588#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
589#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
590#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
591#define FINISH_ACCEL()
592
593#include "radeon_commonfuncs.c"
594#if defined(RENDER) && defined(USE_XAA)
595#include "radeon_render.c"
596#endif
597#include "radeon_accelfuncs.c"
598
599#undef ACCEL_MMIO
600#undef ACCEL_PREAMBLE
601#undef BEGIN_ACCEL
602#undef OUT_ACCEL_REG
603#undef FINISH_ACCEL
604
605#ifdef XF86DRI
606
607#define ACCEL_CP
608#define ACCEL_PREAMBLE()						\
609    RING_LOCALS;							\
610    RADEONCP_REFRESH(pScrn, info)
611#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
612#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
613#define FINISH_ACCEL()          ADVANCE_RING()
614
615
616#include "radeon_commonfuncs.c"
617#if defined(RENDER) && defined(USE_XAA)
618#include "radeon_render.c"
619#endif
620#include "radeon_accelfuncs.c"
621
622#undef ACCEL_CP
623#undef ACCEL_PREAMBLE
624#undef BEGIN_ACCEL
625#undef OUT_ACCEL_REG
626#undef FINISH_ACCEL
627
628/* Stop the CP */
629int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
630{
631    drm_radeon_cp_stop_t  stop;
632    int              ret, i;
633
634    stop.flush = 1;
635    stop.idle  = 1;
636
637    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
638			  sizeof(drm_radeon_cp_stop_t));
639
640    if (ret == 0) {
641	return 0;
642    } else if (errno != EBUSY) {
643	return -errno;
644    }
645
646    stop.flush = 0;
647
648    i = 0;
649    do {
650	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
651			      sizeof(drm_radeon_cp_stop_t));
652    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
653
654    if (ret == 0) {
655	return 0;
656    } else if (errno != EBUSY) {
657	return -errno;
658    }
659
660    stop.idle = 0;
661
662    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
663			&stop, sizeof(drm_radeon_cp_stop_t))) {
664	return -errno;
665    } else {
666	return 0;
667    }
668}
669
670#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
671
672/* Get an indirect buffer for the CP 2D acceleration commands  */
673drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
674{
675    RADEONInfoPtr  info = RADEONPTR(pScrn);
676    drmDMAReq      dma;
677    drmBufPtr      buf = NULL;
678    int            indx = 0;
679    int            size = 0;
680    int            i = 0;
681    int            ret;
682
683#if 0
684    /* FIXME: pScrn->pScreen has not been initialized when this is first
685     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
686     * the screen index from pScrn, which is initialized, and then get
687     * the screen from screenInfo.screens[index], but that is a hack.
688     */
689    dma.context = DRIGetContext(pScrn->pScreen);
690#else
691    /* This is the X server's context */
692    dma.context = 0x00000001;
693#endif
694
695    dma.send_count    = 0;
696    dma.send_list     = NULL;
697    dma.send_sizes    = NULL;
698    dma.flags         = 0;
699    dma.request_count = 1;
700    dma.request_size  = RADEON_BUFFER_SIZE;
701    dma.request_list  = &indx;
702    dma.request_sizes = &size;
703    dma.granted_count = 0;
704
705    while (1) {
706	do {
707	    ret = drmDMA(info->dri->drmFD, &dma);
708	    if (ret && ret != -EBUSY) {
709		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
710			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
711	    }
712	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
713
714	if (ret == 0) {
715	    buf = &info->dri->buffers->list[indx];
716	    buf->used = 0;
717	    if (RADEON_VERBOSE) {
718		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
719			   "   GetBuffer returning %d %p\n",
720			   buf->idx, buf->address);
721	    }
722	    return buf;
723	}
724
725	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
726		   "GetBuffer timed out, resetting engine...\n");
727
728	if (info->ChipFamily < CHIP_FAMILY_R600) {
729	    RADEONEngineReset(pScrn);
730	    RADEONEngineRestore(pScrn);
731	} else
732	    R600EngineReset(pScrn);
733
734	/* Always restart the engine when doing CP 2D acceleration */
735	RADEONCP_RESET(pScrn, info);
736	RADEONCP_START(pScrn, info);
737    }
738}
739
740/* Flush the indirect buffer to the kernel for submission to the card */
741void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
742{
743    RADEONInfoPtr      info   = RADEONPTR(pScrn);
744    drmBufPtr          buffer = info->cp->indirectBuffer;
745    int                start  = info->cp->indirectStart;
746    drm_radeon_indirect_t  indirect;
747
748    assert(!info->cs);
749    if (!buffer) return;
750    if (start == buffer->used && !discard) return;
751
752    if (RADEON_VERBOSE) {
753	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
754		   buffer->idx);
755    }
756
757    if (info->ChipFamily >= CHIP_FAMILY_R600) {
758	if (buffer->used & 0x3c) {
759	    RING_LOCALS;
760
761	    while (buffer->used & 0x3c) {
762		BEGIN_RING(1);
763		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
764		ADVANCE_RING();
765	    }
766	}
767    }
768
769    indirect.idx     = buffer->idx;
770    indirect.start   = start;
771    indirect.end     = buffer->used;
772    indirect.discard = discard;
773
774    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
775			&indirect, sizeof(drm_radeon_indirect_t));
776
777    if (discard) {
778	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
779	info->cp->indirectStart  = 0;
780    } else {
781	/* Start on a double word boundary */
782	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
783	if (RADEON_VERBOSE) {
784	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
785		       info->cp->indirectStart);
786	}
787    }
788}
789
790/* Flush and release the indirect buffer */
791void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
792{
793    RADEONInfoPtr      info   = RADEONPTR(pScrn);
794    drmBufPtr          buffer = info->cp->indirectBuffer;
795    int                start  = info->cp->indirectStart;
796    drm_radeon_indirect_t  indirect;
797
798    assert(!info->cs);
799    if (info->ChipFamily >= CHIP_FAMILY_R600) {
800	if (buffer && (buffer->used & 0x3c)) {
801	    RING_LOCALS;
802
803	    while (buffer->used & 0x3c) {
804		BEGIN_RING(1);
805		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
806		ADVANCE_RING();
807	    }
808	}
809    }
810
811    info->cp->indirectBuffer = NULL;
812    info->cp->indirectStart  = 0;
813
814    if (!buffer) return;
815
816    if (RADEON_VERBOSE) {
817	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
818		   buffer->idx);
819    }
820
821    indirect.idx     = buffer->idx;
822    indirect.start   = start;
823    indirect.end     = buffer->used;
824    indirect.discard = 1;
825
826    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
827			&indirect, sizeof(drm_radeon_indirect_t));
828}
829
830/** \brief Calculate HostDataBlit parameters from pointer and pitch
831 *
832 * This is a helper for the trivial HostDataBlit users that don't need to worry
833 * about tiling etc.
834 */
835void
836RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
837		     uint32_t *dstPitchOff, int *x, int *y)
838{
839    RADEONInfoPtr info = RADEONPTR( pScrn );
840    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
841
842    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
843    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
844    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
845}
846
847/* Set up a hostdata blit to transfer data from system memory to the
848 * framebuffer. Returns the address where the data can be written to and sets
849 * the dstPitch and hpass variables as required.
850 */
851uint8_t*
852RADEONHostDataBlit(
853    ScrnInfoPtr pScrn,
854    unsigned int cpp,
855    unsigned int w,
856    uint32_t dstPitchOff,
857    uint32_t *bufPitch,
858    int x,
859    int *y,
860    unsigned int *h,
861    unsigned int *hpass
862){
863    RADEONInfoPtr info = RADEONPTR( pScrn );
864    uint32_t format, dwords;
865    uint8_t *ret;
866    RING_LOCALS;
867
868    if ( *h == 0 )
869    {
870	return NULL;
871    }
872
873    switch ( cpp )
874    {
875    case 4:
876	format = RADEON_GMC_DST_32BPP;
877	*bufPitch = 4 * w;
878	break;
879    case 2:
880	format = RADEON_GMC_DST_16BPP;
881	*bufPitch = 2 * RADEON_ALIGN(w, 2);
882	break;
883    case 1:
884	format = RADEON_GMC_DST_8BPP_CI;
885	*bufPitch = RADEON_ALIGN(w, 4);
886	break;
887    default:
888	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
889		    "%s: Unsupported cpp %d!\n", __func__, cpp );
890	return NULL;
891    }
892
893#if X_BYTE_ORDER == X_BIG_ENDIAN
894    /* Swap doesn't work on R300 and later, it's handled during the
895     * copy to ind. buffer pass
896     */
897    if (info->ChipFamily < CHIP_FAMILY_R300) {
898        BEGIN_RING(2);
899	if (cpp == 2)
900	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
901			 RADEON_HOST_DATA_SWAP_HDW);
902	else if (cpp == 1)
903	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
904			 RADEON_HOST_DATA_SWAP_32BIT);
905	else
906	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
907			 RADEON_HOST_DATA_SWAP_NONE);
908	ADVANCE_RING();
909    }
910#endif
911
912    /*RADEON_PURGE_CACHE();
913      RADEON_WAIT_UNTIL_IDLE();*/
914
915    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
916    dwords = *hpass * *bufPitch / 4;
917
918    BEGIN_RING( dwords + 10 );
919    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
920    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
921	    | RADEON_GMC_DST_CLIPPING
922	    | RADEON_GMC_BRUSH_NONE
923	    | format
924	    | RADEON_GMC_SRC_DATATYPE_COLOR
925	    | RADEON_ROP3_S
926	    | RADEON_DP_SRC_SOURCE_HOST_DATA
927	    | RADEON_GMC_CLR_CMP_CNTL_DIS
928	    | RADEON_GMC_WR_MSK_DIS );
929    OUT_RING( dstPitchOff );
930    OUT_RING( (*y << 16) | x );
931    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
932    OUT_RING( 0xffffffff );
933    OUT_RING( 0xffffffff );
934    OUT_RING( *y << 16 | x );
935    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
936    OUT_RING( dwords );
937
938    ret = ( uint8_t* )&__head[__count];
939
940    __count += dwords;
941    ADVANCE_RING();
942
943    *y += *hpass;
944    *h -= *hpass;
945
946    return ret;
947}
948
949void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
950{
951    switch(swap) {
952    case RADEON_HOST_DATA_SWAP_HDW:
953        {
954	    unsigned int *d = (unsigned int *)dst;
955	    unsigned int *s = (unsigned int *)src;
956	    unsigned int nwords = size >> 2;
957
958	    for (; nwords > 0; --nwords, ++d, ++s)
959		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
960	    return;
961        }
962    case RADEON_HOST_DATA_SWAP_32BIT:
963        {
964	    unsigned int *d = (unsigned int *)dst;
965	    unsigned int *s = (unsigned int *)src;
966	    unsigned int nwords = size >> 2;
967
968	    for (; nwords > 0; --nwords, ++d, ++s)
969#ifdef __powerpc__
970		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
971#else
972		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
973			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
974#endif
975	    return;
976        }
977    case RADEON_HOST_DATA_SWAP_16BIT:
978        {
979	    unsigned short *d = (unsigned short *)dst;
980	    unsigned short *s = (unsigned short *)src;
981	    unsigned int nwords = size >> 1;
982
983	    for (; nwords > 0; --nwords, ++d, ++s)
984#ifdef __powerpc__
985		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
986#else
987	        *d = (*s >> 8) | (*s << 8);
988#endif
989	    return;
990	}
991    }
992    if (src != dst)
993	memcpy(dst, src, size);
994}
995
996/* Copies a single pass worth of data for a hostdata blit set up by
997 * RADEONHostDataBlit().
998 */
999void
1000RADEONHostDataBlitCopyPass(
1001    ScrnInfoPtr pScrn,
1002    unsigned int cpp,
1003    uint8_t *dst,
1004    uint8_t *src,
1005    unsigned int hpass,
1006    unsigned int dstPitch,
1007    unsigned int srcPitch
1008){
1009
1010#if X_BYTE_ORDER == X_BIG_ENDIAN
1011    RADEONInfoPtr info = RADEONPTR( pScrn );
1012#endif
1013
1014    /* RADEONHostDataBlitCopy can return NULL ! */
1015    if( (dst==NULL) || (src==NULL)) return;
1016
1017    if ( dstPitch == srcPitch )
1018    {
1019#if X_BYTE_ORDER == X_BIG_ENDIAN
1020        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1021	    switch(cpp) {
1022	    case 1:
1023		RADEONCopySwap(dst, src, hpass * dstPitch,
1024			       RADEON_HOST_DATA_SWAP_32BIT);
1025		return;
1026	    case 2:
1027	        RADEONCopySwap(dst, src, hpass * dstPitch,
1028			       RADEON_HOST_DATA_SWAP_HDW);
1029		return;
1030	    }
1031	}
1032#endif
1033	memcpy( dst, src, hpass * dstPitch );
1034    }
1035    else
1036    {
1037	unsigned int minPitch = min( dstPitch, srcPitch );
1038	while ( hpass-- )
1039	{
1040#if X_BYTE_ORDER == X_BIG_ENDIAN
1041            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1042		switch(cpp) {
1043		case 1:
1044		    RADEONCopySwap(dst, src, minPitch,
1045				   RADEON_HOST_DATA_SWAP_32BIT);
1046		    goto next;
1047		case 2:
1048	            RADEONCopySwap(dst, src, minPitch,
1049				   RADEON_HOST_DATA_SWAP_HDW);
1050		    goto next;
1051		}
1052	    }
1053#endif
1054	    memcpy( dst, src, minPitch );
1055#if X_BYTE_ORDER == X_BIG_ENDIAN
1056	next:
1057#endif
1058	    src += srcPitch;
1059	    dst += dstPitch;
1060	}
1061    }
1062}
1063
1064#endif
1065
1066Bool RADEONAccelInit(ScreenPtr pScreen)
1067{
1068    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1069    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1070
1071#ifdef USE_EXA
1072    if (info->useEXA) {
1073# ifdef XF86DRI
1074	if (info->directRenderingEnabled) {
1075#ifdef XF86DRM_MODE
1076	    if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
1077		if (!EVERGREENDrawInit(pScreen))
1078		    return FALSE;
1079	    } else
1080#endif
1081	      if (info->ChipFamily >= CHIP_FAMILY_R600) {
1082		if (!R600DrawInit(pScreen))
1083		    return FALSE;
1084	    } else {
1085		if (!RADEONDrawInitCP(pScreen))
1086		    return FALSE;
1087	    }
1088	} else
1089# endif /* XF86DRI */
1090	{
1091	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1092		return FALSE;
1093	    else {
1094		if (!RADEONDrawInitMMIO(pScreen))
1095		    return FALSE;
1096	    }
1097	}
1098    }
1099#endif /* USE_EXA */
1100#ifdef USE_XAA
1101    if (!info->useEXA) {
1102	XAAInfoRecPtr  a;
1103
1104	if (info->ChipFamily >= CHIP_FAMILY_R600)
1105	    return FALSE;
1106
1107	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1108	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1109	    return FALSE;
1110	}
1111
1112#ifdef XF86DRI
1113	if (info->directRenderingEnabled)
1114	    RADEONAccelInitCP(pScreen, a);
1115	else
1116#endif /* XF86DRI */
1117	    RADEONAccelInitMMIO(pScreen, a);
1118
1119	RADEONEngineInit(pScrn);
1120
1121	if (!XAAInit(pScreen, a)) {
1122	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1123	    return FALSE;
1124	}
1125    }
1126#endif /* USE_XAA */
1127    return TRUE;
1128}
1129
1130void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1131{
1132    RADEONInfoPtr info = RADEONPTR (pScrn);
1133
1134#ifdef XF86DRI
1135    if (info->directRenderingEnabled) {
1136	drm_radeon_sarea_t *pSAREAPriv;
1137
1138	if (!info->kms_enabled) {
1139	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1140	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1141	}
1142	RADEONInit3DEngineCP(pScrn);
1143    } else
1144#endif
1145	RADEONInit3DEngineMMIO(pScrn);
1146
1147    info->accel_state->XInited3D = TRUE;
1148}
1149
1150#ifdef USE_XAA
1151#ifdef XF86DRI
1152Bool
1153RADEONSetupMemXAA_DRI(ScreenPtr pScreen)
1154{
1155    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1156    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1157    int            cpp = info->CurrentLayout.pixel_bytes;
1158    int            depthCpp = (info->dri->depthBits - 8) / 4;
1159    int            width_bytes = pScrn->displayWidth * cpp;
1160    int            bufferSize;
1161    int            depthSize;
1162    int            l;
1163    int            scanlines;
1164    int            texsizerequest;
1165    BoxRec         MemBox;
1166    FBAreaPtr      fbarea;
1167
1168    info->dri->frontOffset = 0;
1169    info->dri->frontPitch = pScrn->displayWidth;
1170    info->dri->backPitch = pScrn->displayWidth;
1171
1172    /* make sure we use 16 line alignment for tiling (8 might be enough).
1173     * Might need that for non-XF86DRI too?
1174     */
1175    if (info->allowColorTiling) {
1176	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1177		      RADEON_GPU_PAGE_SIZE);
1178    } else {
1179        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1180		      RADEON_GPU_PAGE_SIZE);
1181    }
1182
1183    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1184     * which is always the case if color tiling is used due to color pitch
1185     * but not necessarily otherwise, and its height a multiple of 16 lines.
1186     */
1187    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1188    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1189		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1190
1191    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1192	       "Using %d MB GART aperture\n", info->dri->gartSize);
1193    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1194	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1195    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1196	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1197    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1198	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1199
1200    /* Try for front, back, depth, and three framebuffers worth of
1201     * pixmap cache.  Should be enough for a fullscreen background
1202     * image plus some leftovers.
1203     * If the FBTexPercent option was used, try to achieve that percentage instead,
1204     * but still have at least one pixmap buffer (get problems with xvideo/render
1205     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1206     * probably useless for XAA.
1207     */
1208    if (info->dri->textureSize >= 0) {
1209	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1210			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1211	/* first divide, then multiply or we'll get an overflow (been there...) */
1212			 / 100 * info->dri->textureSize;
1213    }
1214    else {
1215	texsizerequest = (int)info->FbMapSize / 2;
1216    }
1217    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1218
1219    /* If that gives us less than the requested memory, let's
1220     * be greedy and grab some more.  Sorry, I care more about 3D
1221     * performance than playing nicely, and you'll get around a full
1222     * framebuffer's worth of pixmap cache anyway.
1223     */
1224    if (info->dri->textureSize < texsizerequest) {
1225        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1226    }
1227    if (info->dri->textureSize < texsizerequest) {
1228        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1229    }
1230
1231    /* If there's still no space for textures, try without pixmap cache, but
1232     * never use the reserved space, the space hw cursor and PCIGART table might
1233     * use.
1234     */
1235    if (info->dri->textureSize < 0) {
1236	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1237	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1238    }
1239
1240    /* Check to see if there is more room available after the 8192nd
1241     * scanline for textures
1242     */
1243    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1244    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1245	> info->dri->textureSize) {
1246	info->dri->textureSize =
1247		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1248    }
1249
1250    /* If backbuffer is disabled, don't allocate memory for it */
1251    if (info->dri->noBackBuffer) {
1252	info->dri->textureSize += bufferSize;
1253    }
1254
1255    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1256       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1257       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1258       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1259       area otherwise).
1260       This might cause some space at the end of the video memory to be unused, since it
1261       can't be used (?) due to that log_tex_granularity thing???
1262       Could use different copyscreentoscreen function for the pageflip copies
1263       (which would use different src and dst offsets) to avoid this. */
1264    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1265	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1266			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1267    }
1268    if (info->dri->textureSize > 0) {
1269	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1270	if (l < RADEON_LOG_TEX_GRANULARITY)
1271	    l = RADEON_LOG_TEX_GRANULARITY;
1272	/* Round the texture size up to the nearest whole number of
1273	 * texture regions.  Again, be greedy about this, don't
1274	 * round down.
1275	 */
1276	info->dri->log2TexGran = l;
1277	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1278    } else {
1279	info->dri->textureSize = 0;
1280    }
1281
1282    /* Set a minimum usable local texture heap size.  This will fit
1283     * two 256x256x32bpp textures.
1284     */
1285    if (info->dri->textureSize < 512 * 1024) {
1286	info->dri->textureOffset = 0;
1287	info->dri->textureSize = 0;
1288    }
1289
1290    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1291	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1292				    (width_bytes * 16)) * (width_bytes * 16);
1293    }
1294    else {
1295	/* Reserve space for textures */
1296	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1297				     RADEON_GPU_PAGE_SIZE);
1298    }
1299
1300    /* Reserve space for the shared depth
1301     * buffer.
1302     */
1303    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1304			       RADEON_GPU_PAGE_SIZE);
1305
1306    /* Reserve space for the shared back buffer */
1307    if (info->dri->noBackBuffer) {
1308       info->dri->backOffset = info->dri->depthOffset;
1309    } else {
1310       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1311				 RADEON_GPU_PAGE_SIZE);
1312    }
1313
1314    info->dri->backY = info->dri->backOffset / width_bytes;
1315    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1316
1317    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1318    if (scanlines > 8191)
1319	scanlines = 8191;
1320
1321    MemBox.x1 = 0;
1322    MemBox.y1 = 0;
1323    MemBox.x2 = pScrn->displayWidth;
1324    MemBox.y2 = scanlines;
1325
1326    if (!xf86InitFBManager(pScreen, &MemBox)) {
1327        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1328		   "Memory manager initialization to "
1329		   "(%d,%d) (%d,%d) failed\n",
1330		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1331	return FALSE;
1332    } else {
1333	int  width, height;
1334
1335	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1336		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1337		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1338	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1339	   aligned... sigh */
1340	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1341						pScrn->displayWidth,
1342						info->allowColorTiling ?
1343						(RADEON_ALIGN(pScrn->virtualY, 16))
1344						- pScrn->virtualY + 2 : 2,
1345						0, NULL, NULL,
1346						NULL))) {
1347	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1348		       "Reserved area from (%d,%d) to (%d,%d)\n",
1349		       fbarea->box.x1, fbarea->box.y1,
1350		       fbarea->box.x2, fbarea->box.y2);
1351	} else {
1352	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1353	}
1354
1355	RADEONDRIAllocatePCIGARTTable(pScreen);
1356
1357	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1358					  &height, 0, 0, 0)) {
1359	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1360		       "Largest offscreen area available: %d x %d\n",
1361		       width, height);
1362
1363	    /* Lines in offscreen area needed for depth buffer and
1364	     * textures
1365	     */
1366	    info->dri->depthTexLines = (scanlines
1367					- info->dri->depthOffset / width_bytes);
1368	    info->dri->backLines	    = (scanlines
1369					       - info->dri->backOffset / width_bytes
1370					       - info->dri->depthTexLines);
1371	    info->dri->backArea	    = NULL;
1372	} else {
1373	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1374		       "Unable to determine largest offscreen area "
1375		       "available\n");
1376	    return FALSE;
1377	}
1378    }
1379
1380    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1381	       "Will use front buffer at offset 0x%x\n",
1382	       info->dri->frontOffset);
1383
1384    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1385	       "Will use back buffer at offset 0x%x\n",
1386	       info->dri->backOffset);
1387    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1388	       "Will use depth buffer at offset 0x%x\n",
1389	       info->dri->depthOffset);
1390    if (info->cardType==CARD_PCIE)
1391    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1392	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1393		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1394    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1395	       "Will use %d kb for textures at offset 0x%x\n",
1396	       info->dri->textureSize/1024, info->dri->textureOffset);
1397
1398    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1399				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1400
1401    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1402				  ((info->dri->backOffset + info->fbLocation) >> 10));
1403
1404    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1405				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1406    return TRUE;
1407}
1408#endif /* XF86DRI */
1409
1410Bool
1411RADEONSetupMemXAA(ScreenPtr pScreen)
1412{
1413    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1414    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1415    BoxRec         MemBox;
1416    int            y2;
1417
1418    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1419
1420    MemBox.x1 = 0;
1421    MemBox.y1 = 0;
1422    MemBox.x2 = pScrn->displayWidth;
1423    y2 = info->FbMapSize / width_bytes;
1424    if (y2 >= 32768)
1425	y2 = 32767; /* because MemBox.y2 is signed short */
1426    MemBox.y2 = y2;
1427
1428    /* The acceleration engine uses 14 bit
1429     * signed coordinates, so we can't have any
1430     * drawable caches beyond this region.
1431     */
1432    if (MemBox.y2 > 8191)
1433	MemBox.y2 = 8191;
1434
1435    if (!xf86InitFBManager(pScreen, &MemBox)) {
1436	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1437		   "Memory manager initialization to "
1438		   "(%d,%d) (%d,%d) failed\n",
1439		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1440	return FALSE;
1441    } else {
1442	int       width, height;
1443	FBAreaPtr fbarea;
1444
1445	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1446		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1447		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1448	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1449						pScrn->displayWidth,
1450						info->allowColorTiling ?
1451						(RADEON_ALIGN(pScrn->virtualY, 16))
1452						- pScrn->virtualY + 2 : 2,
1453						0, NULL, NULL,
1454						NULL))) {
1455	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1456		       "Reserved area from (%d,%d) to (%d,%d)\n",
1457		       fbarea->box.x1, fbarea->box.y1,
1458		       fbarea->box.x2, fbarea->box.y2);
1459	} else {
1460	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1461	}
1462	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1463					      0, 0, 0)) {
1464	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1465		       "Largest offscreen area available: %d x %d\n",
1466		       width, height);
1467	}
1468	return TRUE;
1469    }
1470}
1471#endif /* USE_XAA */
1472