radeon_accel.c revision 209ff23f
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78				/* Driver data structures */
79#include "radeon.h"
80#include "radeon_reg.h"
81#include "radeon_macros.h"
82#include "radeon_probe.h"
83#include "radeon_version.h"
84#ifdef XF86DRI
85#define _XF86DRI_SERVER_
86#include "radeon_dri.h"
87#include "radeon_common.h"
88#include "radeon_sarea.h"
89#endif
90
91				/* Line support */
92#include "miline.h"
93
94				/* X and server generic header files */
95#include "xf86.h"
96
97
98#ifdef USE_XAA
99static struct {
100    int rop;
101    int pattern;
102} RADEON_ROP[] = {
103    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
104    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
105    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
106    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
107    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
108    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
109    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
110    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
111    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
112    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
113    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
114    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
115    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
116    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
117    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
118    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
119};
120#endif
121
122/* The FIFO has 64 slots.  This routines waits until at least `entries'
123 * of these slots are empty.
124 */
125void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
126{
127    RADEONInfoPtr  info       = RADEONPTR(pScrn);
128    unsigned char *RADEONMMIO = info->MMIO;
129    int            i;
130
131    for (;;) {
132	for (i = 0; i < RADEON_TIMEOUT; i++) {
133	    info->fifo_slots =
134		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
135	    if (info->fifo_slots >= entries) return;
136	}
137	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
138		       "FIFO timed out: %u entries, stat=0x%08x\n",
139		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
140		       (unsigned int)INREG(RADEON_RBBM_STATUS));
141	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
142		   "FIFO timed out, resetting engine...\n");
143	RADEONEngineReset(pScrn);
144	RADEONEngineRestore(pScrn);
145#ifdef XF86DRI
146	if (info->directRenderingEnabled) {
147	    RADEONCP_RESET(pScrn, info);
148	    RADEONCP_START(pScrn, info);
149	}
150#endif
151    }
152}
153
154/* Flush all dirty data in the Pixel Cache to memory */
155void RADEONEngineFlush(ScrnInfoPtr pScrn)
156{
157    RADEONInfoPtr  info       = RADEONPTR(pScrn);
158    unsigned char *RADEONMMIO = info->MMIO;
159    int            i;
160
161    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
162	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
163		RADEON_RB3D_DC_FLUSH_ALL,
164		~RADEON_RB3D_DC_FLUSH_ALL);
165	for (i = 0; i < RADEON_TIMEOUT; i++) {
166	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
167		break;
168	}
169	if (i == RADEON_TIMEOUT) {
170	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
171			   "DC flush timeout: %x\n",
172			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
173	}
174    } else {
175	OUTREGP(R300_DSTCACHE_CTLSTAT,
176		R300_RB2D_DC_FLUSH_ALL,
177		~R300_RB2D_DC_FLUSH_ALL);
178	for (i = 0; i < RADEON_TIMEOUT; i++) {
179	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
180		break;
181	}
182	if (i == RADEON_TIMEOUT) {
183	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
184			   "DC flush timeout: %x\n",
185			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
186	}
187    }
188}
189
190/* Reset graphics card to known state */
191void RADEONEngineReset(ScrnInfoPtr pScrn)
192{
193    RADEONInfoPtr  info       = RADEONPTR(pScrn);
194    unsigned char *RADEONMMIO = info->MMIO;
195    uint32_t       clock_cntl_index;
196    uint32_t       mclk_cntl;
197    uint32_t       rbbm_soft_reset;
198    uint32_t       host_path_cntl;
199
200    /* The following RBBM_SOFT_RESET sequence can help un-wedge
201     * an R300 after the command processor got stuck.
202     */
203    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
204    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
205                                   RADEON_SOFT_RESET_CP |
206                                   RADEON_SOFT_RESET_HI |
207                                   RADEON_SOFT_RESET_SE |
208                                   RADEON_SOFT_RESET_RE |
209                                   RADEON_SOFT_RESET_PP |
210                                   RADEON_SOFT_RESET_E2 |
211                                   RADEON_SOFT_RESET_RB));
212    INREG(RADEON_RBBM_SOFT_RESET);
213    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
214                                   ~(RADEON_SOFT_RESET_CP |
215                                     RADEON_SOFT_RESET_HI |
216                                     RADEON_SOFT_RESET_SE |
217                                     RADEON_SOFT_RESET_RE |
218                                     RADEON_SOFT_RESET_PP |
219                                     RADEON_SOFT_RESET_E2 |
220                                     RADEON_SOFT_RESET_RB)));
221    INREG(RADEON_RBBM_SOFT_RESET);
222    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
223    INREG(RADEON_RBBM_SOFT_RESET);
224
225    RADEONEngineFlush(pScrn);
226
227    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
228    RADEONPllErrataAfterIndex(info);
229
230#if 0 /* taken care of by new PM code */
231    /* Some ASICs have bugs with dynamic-on feature, which are
232     * ASIC-version dependent, so we force all blocks on for now
233     */
234    if (info->HasCRTC2) {
235	uint32_t tmp;
236
237	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
238	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
239				  RADEON_CP_MAX_DYN_STOP_LAT |
240				  RADEON_SCLK_FORCEON_MASK));
241
242	if (info->ChipFamily == CHIP_FAMILY_RV200) {
243	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
244	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
245	}
246    }
247#endif /* new PM code */
248
249    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
250
251#if 0 /* handled by new PM code */
252    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
253			      RADEON_FORCEON_MCLKA |
254			      RADEON_FORCEON_MCLKB |
255			      RADEON_FORCEON_YCLKA |
256			      RADEON_FORCEON_YCLKB |
257			      RADEON_FORCEON_MC |
258			      RADEON_FORCEON_AIC));
259#endif /* new PM code */
260
261    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
262     * unexpected behaviour on some machines.  Here we use
263     * RADEON_HOST_PATH_CNTL to reset it.
264     */
265    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
266    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
267
268    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
269	uint32_t tmp;
270
271	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
272					RADEON_SOFT_RESET_CP |
273					RADEON_SOFT_RESET_HI |
274					RADEON_SOFT_RESET_E2));
275	INREG(RADEON_RBBM_SOFT_RESET);
276	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
277	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
278	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
279    } else {
280	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
281					RADEON_SOFT_RESET_CP |
282					RADEON_SOFT_RESET_SE |
283					RADEON_SOFT_RESET_RE |
284					RADEON_SOFT_RESET_PP |
285					RADEON_SOFT_RESET_E2 |
286					RADEON_SOFT_RESET_RB));
287	INREG(RADEON_RBBM_SOFT_RESET);
288	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
289					~(RADEON_SOFT_RESET_CP |
290					  RADEON_SOFT_RESET_SE |
291					  RADEON_SOFT_RESET_RE |
292					  RADEON_SOFT_RESET_PP |
293					  RADEON_SOFT_RESET_E2 |
294					  RADEON_SOFT_RESET_RB)));
295	INREG(RADEON_RBBM_SOFT_RESET);
296    }
297
298    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
299    INREG(RADEON_HOST_PATH_CNTL);
300    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
301
302    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
303	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
304
305    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
306    RADEONPllErrataAfterIndex(info);
307    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
308}
309
310/* Restore the acceleration hardware to its previous state */
311void RADEONEngineRestore(ScrnInfoPtr pScrn)
312{
313    RADEONInfoPtr  info       = RADEONPTR(pScrn);
314    unsigned char *RADEONMMIO = info->MMIO;
315
316    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
317		   "EngineRestore (%d/%d)\n",
318		   info->CurrentLayout.pixel_code,
319		   info->CurrentLayout.bitsPerPixel);
320
321    /* Setup engine location. This shouldn't be necessary since we
322     * set them appropriately before any accel ops, but let's avoid
323     * random bogus DMA in case we inadvertently trigger the engine
324     * in the wrong place (happened).
325     */
326    RADEONWaitForFifo(pScrn, 2);
327    OUTREG(RADEON_DST_PITCH_OFFSET, info->dst_pitch_offset);
328    OUTREG(RADEON_SRC_PITCH_OFFSET, info->dst_pitch_offset);
329
330    RADEONWaitForFifo(pScrn, 1);
331#if X_BYTE_ORDER == X_BIG_ENDIAN
332    OUTREGP(RADEON_DP_DATATYPE,
333	    RADEON_HOST_BIG_ENDIAN_EN,
334	    ~RADEON_HOST_BIG_ENDIAN_EN);
335#else
336    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
337#endif
338
339    /* Restore SURFACE_CNTL */
340    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
341
342    RADEONWaitForFifo(pScrn, 1);
343    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
344					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
345    RADEONWaitForFifo(pScrn, 1);
346    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
347				       | RADEON_GMC_BRUSH_SOLID_COLOR
348				       | RADEON_GMC_SRC_DATATYPE_COLOR));
349
350    RADEONWaitForFifo(pScrn, 5);
351    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
352    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
353    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
354    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
355    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
356
357    RADEONWaitForIdleMMIO(pScrn);
358
359    info->XInited3D = FALSE;
360}
361
362/* Initialize the acceleration hardware */
363void RADEONEngineInit(ScrnInfoPtr pScrn)
364{
365    RADEONInfoPtr  info       = RADEONPTR(pScrn);
366    unsigned char *RADEONMMIO = info->MMIO;
367
368    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
369		   "EngineInit (%d/%d)\n",
370		   info->CurrentLayout.pixel_code,
371		   info->CurrentLayout.bitsPerPixel);
372
373#ifdef XF86DRI
374    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
375	drmRadeonGetParam np;
376	int num_pipes;
377
378	memset(&np, 0, sizeof(np));
379	np.param = RADEON_PARAM_NUM_GB_PIPES;
380	np.value = &num_pipes;
381
382	if (drmCommandWriteRead(info->drmFD, DRM_RADEON_GETPARAM, &np,
383				sizeof(np)) < 0) {
384	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
385		       "Failed to determine num pipes from DRM, falling back to "
386		       "manual look-up!\n");
387	    info->num_gb_pipes = 0;
388	} else {
389	    info->num_gb_pipes = num_pipes;
390	}
391    }
392#endif
393
394    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
395	(info->ChipFamily == CHIP_FAMILY_R420)  ||
396	(info->ChipFamily == CHIP_FAMILY_RS600) ||
397	(info->ChipFamily == CHIP_FAMILY_RS690) ||
398	(info->ChipFamily == CHIP_FAMILY_RS740) ||
399	(info->ChipFamily == CHIP_FAMILY_RS400) ||
400	(info->ChipFamily == CHIP_FAMILY_RS480) ||
401	IS_R500_3D) {
402	if (info->num_gb_pipes == 0) {
403	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
404
405	    info->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
406	    if (IS_R500_3D)
407		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
408	}
409    } else {
410	if (info->num_gb_pipes == 0) {
411	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
412		(info->ChipFamily == CHIP_FAMILY_R350)) {
413		/* R3xx chips */
414		info->num_gb_pipes = 2;
415	    } else {
416		/* RV3xx chips */
417		info->num_gb_pipes = 1;
418	    }
419	}
420    }
421
422    if (IS_R300_3D || IS_R500_3D)
423	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
424		   "num pipes is %d\n", info->num_gb_pipes);
425
426    if (IS_R300_3D || IS_R500_3D) {
427	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16);
428
429	switch(info->num_gb_pipes) {
430	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
431	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
432	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
433	default:
434	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
435	}
436
437	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
438	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
439	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
440	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
441					 R300_DC_AUTOFLUSH_ENABLE |
442					 R300_DC_DC_DISABLE_IGNORE_PE));
443    } else
444	OUTREG(RADEON_RB3D_CNTL, 0);
445
446    RADEONEngineReset(pScrn);
447
448    switch (info->CurrentLayout.pixel_code) {
449    case 8:  info->datatype = 2; break;
450    case 15: info->datatype = 3; break;
451    case 16: info->datatype = 4; break;
452    case 24: info->datatype = 5; break;
453    case 32: info->datatype = 6; break;
454    default:
455	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
456		       "Unknown depth/bpp = %d/%d (code = %d)\n",
457		       info->CurrentLayout.depth,
458		       info->CurrentLayout.bitsPerPixel,
459		       info->CurrentLayout.pixel_code);
460    }
461    info->pitch = ((info->CurrentLayout.displayWidth / 8) *
462		   (info->CurrentLayout.pixel_bytes == 3 ? 3 : 1));
463
464    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
465		   "Pitch for acceleration = %d\n", info->pitch);
466
467    info->dp_gui_master_cntl =
468	((info->datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
469	 | RADEON_GMC_CLR_CMP_CNTL_DIS
470	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
471
472#ifdef XF86DRI
473    info->sc_left         = 0x00000000;
474    info->sc_right        = RADEON_DEFAULT_SC_RIGHT_MAX;
475    info->sc_top          = 0x00000000;
476    info->sc_bottom       = RADEON_DEFAULT_SC_BOTTOM_MAX;
477
478    info->re_top_left     = 0x00000000;
479    if (info->ChipFamily <= CHIP_FAMILY_RV280)
480	info->re_width_height = ((0x7ff << RADEON_RE_WIDTH_SHIFT) |
481				 (0x7ff << RADEON_RE_HEIGHT_SHIFT));
482    else
483	info->re_width_height = ((8191 << R300_SCISSOR_X_SHIFT) |
484				 (8191 << R300_SCISSOR_Y_SHIFT));
485
486    info->aux_sc_cntl     = 0x00000000;
487#endif
488
489    RADEONEngineRestore(pScrn);
490}
491
492
493#define ACCEL_MMIO
494#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
495#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
496#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
497#define FINISH_ACCEL()
498
499#include "radeon_commonfuncs.c"
500#if defined(RENDER) && defined(USE_XAA)
501#include "radeon_render.c"
502#endif
503#include "radeon_accelfuncs.c"
504
505#undef ACCEL_MMIO
506#undef ACCEL_PREAMBLE
507#undef BEGIN_ACCEL
508#undef OUT_ACCEL_REG
509#undef FINISH_ACCEL
510
511#ifdef XF86DRI
512
513#define ACCEL_CP
514#define ACCEL_PREAMBLE()						\
515    RING_LOCALS;							\
516    RADEONCP_REFRESH(pScrn, info)
517#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
518#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
519#define FINISH_ACCEL()          ADVANCE_RING()
520
521
522#include "radeon_commonfuncs.c"
523#if defined(RENDER) && defined(USE_XAA)
524#include "radeon_render.c"
525#endif
526#include "radeon_accelfuncs.c"
527
528#undef ACCEL_CP
529#undef ACCEL_PREAMBLE
530#undef BEGIN_ACCEL
531#undef OUT_ACCEL_REG
532#undef FINISH_ACCEL
533
534/* Stop the CP */
535int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
536{
537    drmRadeonCPStop  stop;
538    int              ret, i;
539
540    stop.flush = 1;
541    stop.idle  = 1;
542
543    ret = drmCommandWrite(info->drmFD, DRM_RADEON_CP_STOP, &stop,
544			  sizeof(drmRadeonCPStop));
545
546    if (ret == 0) {
547	return 0;
548    } else if (errno != EBUSY) {
549	return -errno;
550    }
551
552    stop.flush = 0;
553
554    i = 0;
555    do {
556	ret = drmCommandWrite(info->drmFD, DRM_RADEON_CP_STOP, &stop,
557			      sizeof(drmRadeonCPStop));
558    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
559
560    if (ret == 0) {
561	return 0;
562    } else if (errno != EBUSY) {
563	return -errno;
564    }
565
566    stop.idle = 0;
567
568    if (drmCommandWrite(info->drmFD, DRM_RADEON_CP_STOP,
569			&stop, sizeof(drmRadeonCPStop))) {
570	return -errno;
571    } else {
572	return 0;
573    }
574}
575
576/* Get an indirect buffer for the CP 2D acceleration commands  */
577drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
578{
579    RADEONInfoPtr  info = RADEONPTR(pScrn);
580    drmDMAReq      dma;
581    drmBufPtr      buf = NULL;
582    int            indx = 0;
583    int            size = 0;
584    int            i = 0;
585    int            ret;
586
587#if 0
588    /* FIXME: pScrn->pScreen has not been initialized when this is first
589     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
590     * the screen index from pScrn, which is initialized, and then get
591     * the screen from screenInfo.screens[index], but that is a hack.
592     */
593    dma.context = DRIGetContext(pScrn->pScreen);
594#else
595    /* This is the X server's context */
596    dma.context = 0x00000001;
597#endif
598
599    dma.send_count    = 0;
600    dma.send_list     = NULL;
601    dma.send_sizes    = NULL;
602    dma.flags         = 0;
603    dma.request_count = 1;
604    dma.request_size  = RADEON_BUFFER_SIZE;
605    dma.request_list  = &indx;
606    dma.request_sizes = &size;
607    dma.granted_count = 0;
608
609    while (1) {
610	do {
611	    ret = drmDMA(info->drmFD, &dma);
612	    if (ret && ret != -EBUSY) {
613		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
614			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
615	    }
616	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
617
618	if (ret == 0) {
619	    buf = &info->buffers->list[indx];
620	    buf->used = 0;
621	    if (RADEON_VERBOSE) {
622		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
623			   "   GetBuffer returning %d %p\n",
624			   buf->idx, buf->address);
625	    }
626	    return buf;
627	}
628
629	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
630		   "GetBuffer timed out, resetting engine...\n");
631	RADEONEngineReset(pScrn);
632	RADEONEngineRestore(pScrn);
633
634	/* Always restart the engine when doing CP 2D acceleration */
635	RADEONCP_RESET(pScrn, info);
636	RADEONCP_START(pScrn, info);
637    }
638}
639
640/* Flush the indirect buffer to the kernel for submission to the card */
641void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
642{
643    RADEONInfoPtr      info   = RADEONPTR(pScrn);
644    drmBufPtr          buffer = info->indirectBuffer;
645    int                start  = info->indirectStart;
646    drmRadeonIndirect  indirect;
647
648    if (!buffer) return;
649    if (start == buffer->used && !discard) return;
650
651    if (RADEON_VERBOSE) {
652	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
653		   buffer->idx);
654    }
655
656    indirect.idx     = buffer->idx;
657    indirect.start   = start;
658    indirect.end     = buffer->used;
659    indirect.discard = discard;
660
661    drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT,
662			&indirect, sizeof(drmRadeonIndirect));
663
664    if (discard) {
665	info->indirectBuffer = RADEONCPGetBuffer(pScrn);
666	info->indirectStart  = 0;
667    } else {
668	/* Start on a double word boundary */
669	info->indirectStart  = buffer->used = (buffer->used + 7) & ~7;
670	if (RADEON_VERBOSE) {
671	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
672		       info->indirectStart);
673	}
674    }
675}
676
677/* Flush and release the indirect buffer */
678void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
679{
680    RADEONInfoPtr      info   = RADEONPTR(pScrn);
681    drmBufPtr          buffer = info->indirectBuffer;
682    int                start  = info->indirectStart;
683    drmRadeonIndirect  indirect;
684
685    info->indirectBuffer = NULL;
686    info->indirectStart  = 0;
687
688    if (!buffer) return;
689
690    if (RADEON_VERBOSE) {
691	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
692		   buffer->idx);
693    }
694
695    indirect.idx     = buffer->idx;
696    indirect.start   = start;
697    indirect.end     = buffer->used;
698    indirect.discard = 1;
699
700    drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT,
701			&indirect, sizeof(drmRadeonIndirect));
702}
703
704/** \brief Calculate HostDataBlit parameters from pointer and pitch
705 *
706 * This is a helper for the trivial HostDataBlit users that don't need to worry
707 * about tiling etc.
708 */
709void
710RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
711		     uint32_t *dstPitchOff, int *x, int *y)
712{
713    RADEONInfoPtr info = RADEONPTR( pScrn );
714    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
715
716    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
717    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
718    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
719}
720
721/* Set up a hostdata blit to transfer data from system memory to the
722 * framebuffer. Returns the address where the data can be written to and sets
723 * the dstPitch and hpass variables as required.
724 */
725uint8_t*
726RADEONHostDataBlit(
727    ScrnInfoPtr pScrn,
728    unsigned int cpp,
729    unsigned int w,
730    uint32_t dstPitchOff,
731    uint32_t *bufPitch,
732    int x,
733    int *y,
734    unsigned int *h,
735    unsigned int *hpass
736){
737    RADEONInfoPtr info = RADEONPTR( pScrn );
738    uint32_t format, dwords;
739    uint8_t *ret;
740    RING_LOCALS;
741
742    if ( *h == 0 )
743    {
744	return NULL;
745    }
746
747    switch ( cpp )
748    {
749    case 4:
750	format = RADEON_GMC_DST_32BPP;
751	*bufPitch = 4 * w;
752	break;
753    case 2:
754	format = RADEON_GMC_DST_16BPP;
755	*bufPitch = 2 * ((w + 1) & ~1);
756	break;
757    case 1:
758	format = RADEON_GMC_DST_8BPP_CI;
759	*bufPitch = (w + 3) & ~3;
760	break;
761    default:
762	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
763		    "%s: Unsupported cpp %d!\n", __func__, cpp );
764	return NULL;
765    }
766
767#if X_BYTE_ORDER == X_BIG_ENDIAN
768    /* Swap doesn't work on R300 and later, it's handled during the
769     * copy to ind. buffer pass
770     */
771    if (info->ChipFamily < CHIP_FAMILY_R300) {
772        BEGIN_RING(2);
773	if (cpp == 2)
774	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
775			 RADEON_HOST_DATA_SWAP_HDW);
776	else if (cpp == 1)
777	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
778			 RADEON_HOST_DATA_SWAP_32BIT);
779	else
780	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
781			 RADEON_HOST_DATA_SWAP_NONE);
782	ADVANCE_RING();
783    }
784#endif
785
786    /*RADEON_PURGE_CACHE();
787      RADEON_WAIT_UNTIL_IDLE();*/
788
789    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
790    dwords = *hpass * *bufPitch / 4;
791
792    BEGIN_RING( dwords + 10 );
793    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
794    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
795	    | RADEON_GMC_DST_CLIPPING
796	    | RADEON_GMC_BRUSH_NONE
797	    | format
798	    | RADEON_GMC_SRC_DATATYPE_COLOR
799	    | RADEON_ROP3_S
800	    | RADEON_DP_SRC_SOURCE_HOST_DATA
801	    | RADEON_GMC_CLR_CMP_CNTL_DIS
802	    | RADEON_GMC_WR_MSK_DIS );
803    OUT_RING( dstPitchOff );
804    OUT_RING( (*y << 16) | x );
805    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
806    OUT_RING( 0xffffffff );
807    OUT_RING( 0xffffffff );
808    OUT_RING( *y << 16 | x );
809    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
810    OUT_RING( dwords );
811
812    ret = ( uint8_t* )&__head[__count];
813
814    __count += dwords;
815    ADVANCE_RING();
816
817    *y += *hpass;
818    *h -= *hpass;
819
820    return ret;
821}
822
823void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
824{
825    switch(swap) {
826    case RADEON_HOST_DATA_SWAP_HDW:
827        {
828	    unsigned int *d = (unsigned int *)dst;
829	    unsigned int *s = (unsigned int *)src;
830	    unsigned int nwords = size >> 2;
831
832	    for (; nwords > 0; --nwords, ++d, ++s)
833		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
834	    return;
835        }
836    case RADEON_HOST_DATA_SWAP_32BIT:
837        {
838	    unsigned int *d = (unsigned int *)dst;
839	    unsigned int *s = (unsigned int *)src;
840	    unsigned int nwords = size >> 2;
841
842	    for (; nwords > 0; --nwords, ++d, ++s)
843#ifdef __powerpc__
844		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
845#else
846		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
847			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
848#endif
849	    return;
850        }
851    case RADEON_HOST_DATA_SWAP_16BIT:
852        {
853	    unsigned short *d = (unsigned short *)dst;
854	    unsigned short *s = (unsigned short *)src;
855	    unsigned int nwords = size >> 1;
856
857	    for (; nwords > 0; --nwords, ++d, ++s)
858#ifdef __powerpc__
859		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
860#else
861	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
862			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
863#endif
864	    return;
865	}
866    }
867    if (src != dst)
868	    memmove(dst, src, size);
869}
870
871/* Copies a single pass worth of data for a hostdata blit set up by
872 * RADEONHostDataBlit().
873 */
874void
875RADEONHostDataBlitCopyPass(
876    ScrnInfoPtr pScrn,
877    unsigned int cpp,
878    uint8_t *dst,
879    uint8_t *src,
880    unsigned int hpass,
881    unsigned int dstPitch,
882    unsigned int srcPitch
883){
884
885#if X_BYTE_ORDER == X_BIG_ENDIAN
886    RADEONInfoPtr info = RADEONPTR( pScrn );
887#endif
888
889    /* RADEONHostDataBlitCopy can return NULL ! */
890    if( (dst==NULL) || (src==NULL)) return;
891
892    if ( dstPitch == srcPitch )
893    {
894#if X_BYTE_ORDER == X_BIG_ENDIAN
895        if (info->ChipFamily >= CHIP_FAMILY_R300) {
896	    switch(cpp) {
897	    case 1:
898		RADEONCopySwap(dst, src, hpass * dstPitch,
899			       RADEON_HOST_DATA_SWAP_32BIT);
900		return;
901	    case 2:
902	        RADEONCopySwap(dst, src, hpass * dstPitch,
903			       RADEON_HOST_DATA_SWAP_HDW);
904		return;
905	    }
906	}
907#endif
908	memcpy( dst, src, hpass * dstPitch );
909    }
910    else
911    {
912	unsigned int minPitch = min( dstPitch, srcPitch );
913	while ( hpass-- )
914	{
915#if X_BYTE_ORDER == X_BIG_ENDIAN
916            if (info->ChipFamily >= CHIP_FAMILY_R300) {
917		switch(cpp) {
918		case 1:
919		    RADEONCopySwap(dst, src, minPitch,
920				   RADEON_HOST_DATA_SWAP_32BIT);
921		    goto next;
922		case 2:
923	            RADEONCopySwap(dst, src, minPitch,
924				   RADEON_HOST_DATA_SWAP_HDW);
925		    goto next;
926		}
927	    }
928#endif
929	    memcpy( dst, src, minPitch );
930#if X_BYTE_ORDER == X_BIG_ENDIAN
931	next:
932#endif
933	    src += srcPitch;
934	    dst += dstPitch;
935	}
936    }
937}
938
939#endif
940
941Bool RADEONAccelInit(ScreenPtr pScreen)
942{
943    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
944    RADEONInfoPtr  info  = RADEONPTR(pScrn);
945
946    if (info->ChipFamily >= CHIP_FAMILY_R600)
947	return FALSE;
948
949#ifdef USE_EXA
950    if (info->useEXA) {
951# ifdef XF86DRI
952	if (info->directRenderingEnabled) {
953	    if (!RADEONDrawInitCP(pScreen))
954		return FALSE;
955	} else
956# endif /* XF86DRI */
957	{
958	    if (!RADEONDrawInitMMIO(pScreen))
959		return FALSE;
960	}
961    }
962#endif /* USE_EXA */
963#ifdef USE_XAA
964    if (!info->useEXA) {
965	XAAInfoRecPtr  a;
966
967	if (!(a = info->accel = XAACreateInfoRec())) {
968	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
969	    return FALSE;
970	}
971
972#ifdef XF86DRI
973	if (info->directRenderingEnabled)
974	    RADEONAccelInitCP(pScreen, a);
975	else
976#endif /* XF86DRI */
977	    RADEONAccelInitMMIO(pScreen, a);
978
979	RADEONEngineInit(pScrn);
980
981	if (!XAAInit(pScreen, a)) {
982	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
983	    return FALSE;
984	}
985    }
986#endif /* USE_XAA */
987    return TRUE;
988}
989
990void RADEONInit3DEngine(ScrnInfoPtr pScrn)
991{
992    RADEONInfoPtr info = RADEONPTR (pScrn);
993
994#ifdef XF86DRI
995    if (info->directRenderingEnabled) {
996	RADEONSAREAPrivPtr pSAREAPriv;
997
998	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
999	pSAREAPriv->ctxOwner = DRIGetContext(pScrn->pScreen);
1000	RADEONInit3DEngineCP(pScrn);
1001    } else
1002#endif
1003	RADEONInit3DEngineMMIO(pScrn);
1004
1005    info->XInited3D = TRUE;
1006}
1007
1008#ifdef USE_XAA
1009#ifdef XF86DRI
1010Bool
1011RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1012{
1013    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1014    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1015    int            cpp = info->CurrentLayout.pixel_bytes;
1016    int            depthCpp = (info->depthBits - 8) / 4;
1017    int            width_bytes = pScrn->displayWidth * cpp;
1018    int            bufferSize;
1019    int            depthSize;
1020    int            l;
1021    int            scanlines;
1022    int            texsizerequest;
1023    BoxRec         MemBox;
1024    FBAreaPtr      fbarea;
1025
1026    info->frontOffset = 0;
1027    info->frontPitch = pScrn->displayWidth;
1028    info->backPitch = pScrn->displayWidth;
1029
1030    /* make sure we use 16 line alignment for tiling (8 might be enough).
1031     * Might need that for non-XF86DRI too?
1032     */
1033    if (info->allowColorTiling) {
1034	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
1035		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1036    } else {
1037        bufferSize = (pScrn->virtualY * width_bytes
1038		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1039    }
1040
1041    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1042     * which is always the case if color tiling is used due to color pitch
1043     * but not necessarily otherwise, and its height a multiple of 16 lines.
1044     */
1045    info->depthPitch = (pScrn->displayWidth + 31) & ~31;
1046    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->depthPitch
1047		  * depthCpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
1048
1049    switch (info->CPMode) {
1050    case RADEON_DEFAULT_CP_PIO_MODE:
1051	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in PIO mode\n");
1052	break;
1053    case RADEON_DEFAULT_CP_BM_MODE:
1054	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in BM mode\n");
1055	break;
1056    default:
1057        xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in UNKNOWN mode\n");
1058	break;
1059    }
1060
1061    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1062	       "Using %d MB GART aperture\n", info->gartSize);
1063    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1064	       "Using %d MB for the ring buffer\n", info->ringSize);
1065    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1066	       "Using %d MB for vertex/indirect buffers\n", info->bufSize);
1067    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1068	       "Using %d MB for GART textures\n", info->gartTexSize);
1069
1070    /* Try for front, back, depth, and three framebuffers worth of
1071     * pixmap cache.  Should be enough for a fullscreen background
1072     * image plus some leftovers.
1073     * If the FBTexPercent option was used, try to achieve that percentage instead,
1074     * but still have at least one pixmap buffer (get problems with xvideo/render
1075     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1076     * probably useless for XAA.
1077     */
1078    if (info->textureSize >= 0) {
1079	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1080			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1081	/* first divide, then multiply or we'll get an overflow (been there...) */
1082			 / 100 * info->textureSize;
1083    }
1084    else {
1085	texsizerequest = (int)info->FbMapSize / 2;
1086    }
1087    info->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1088
1089    /* If that gives us less than the requested memory, let's
1090     * be greedy and grab some more.  Sorry, I care more about 3D
1091     * performance than playing nicely, and you'll get around a full
1092     * framebuffer's worth of pixmap cache anyway.
1093     */
1094    if (info->textureSize < texsizerequest) {
1095        info->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1096    }
1097    if (info->textureSize < texsizerequest) {
1098        info->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1099    }
1100
1101    /* If there's still no space for textures, try without pixmap cache, but
1102     * never use the reserved space, the space hw cursor and PCIGART table might
1103     * use.
1104     */
1105    if (info->textureSize < 0) {
1106	info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1107	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1108    }
1109
1110    /* Check to see if there is more room available after the 8192nd
1111     * scanline for textures
1112     */
1113    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1114    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1115	> info->textureSize) {
1116	info->textureSize =
1117		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1118    }
1119
1120    /* If backbuffer is disabled, don't allocate memory for it */
1121    if (info->noBackBuffer) {
1122	info->textureSize += bufferSize;
1123    }
1124
1125    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1126       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1127       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1128       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1129       area otherwise).
1130       This might cause some space at the end of the video memory to be unused, since it
1131       can't be used (?) due to that log_tex_granularity thing???
1132       Could use different copyscreentoscreen function for the pageflip copies
1133       (which would use different src and dst offsets) to avoid this. */
1134    if (info->allowColorTiling && !info->noBackBuffer) {
1135	info->textureSize = info->FbMapSize - ((info->FbMapSize - info->textureSize +
1136			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1137    }
1138    if (info->textureSize > 0) {
1139	l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS);
1140	if (l < RADEON_LOG_TEX_GRANULARITY)
1141	    l = RADEON_LOG_TEX_GRANULARITY;
1142	/* Round the texture size up to the nearest whole number of
1143	 * texture regions.  Again, be greedy about this, don't
1144	 * round down.
1145	 */
1146	info->log2TexGran = l;
1147	info->textureSize = (info->textureSize >> l) << l;
1148    } else {
1149	info->textureSize = 0;
1150    }
1151
1152    /* Set a minimum usable local texture heap size.  This will fit
1153     * two 256x256x32bpp textures.
1154     */
1155    if (info->textureSize < 512 * 1024) {
1156	info->textureOffset = 0;
1157	info->textureSize = 0;
1158    }
1159
1160    if (info->allowColorTiling && !info->noBackBuffer) {
1161	info->textureOffset = ((info->FbMapSize - info->textureSize) /
1162			       (width_bytes * 16)) * (width_bytes * 16);
1163    }
1164    else {
1165	/* Reserve space for textures */
1166	info->textureOffset = ((info->FbMapSize - info->textureSize +
1167				RADEON_BUFFER_ALIGN) &
1168			       ~(uint32_t)RADEON_BUFFER_ALIGN);
1169    }
1170
1171    /* Reserve space for the shared depth
1172     * buffer.
1173     */
1174    info->depthOffset = ((info->textureOffset - depthSize +
1175			  RADEON_BUFFER_ALIGN) &
1176			 ~(uint32_t)RADEON_BUFFER_ALIGN);
1177
1178    /* Reserve space for the shared back buffer */
1179    if (info->noBackBuffer) {
1180       info->backOffset = info->depthOffset;
1181    } else {
1182       info->backOffset = ((info->depthOffset - bufferSize +
1183			    RADEON_BUFFER_ALIGN) &
1184			   ~(uint32_t)RADEON_BUFFER_ALIGN);
1185    }
1186
1187    info->backY = info->backOffset / width_bytes;
1188    info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp;
1189
1190    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1191    if (scanlines > 8191)
1192	scanlines = 8191;
1193
1194    MemBox.x1 = 0;
1195    MemBox.y1 = 0;
1196    MemBox.x2 = pScrn->displayWidth;
1197    MemBox.y2 = scanlines;
1198
1199    if (!xf86InitFBManager(pScreen, &MemBox)) {
1200        xf86DrvMsg(scrnIndex, X_ERROR,
1201		   "Memory manager initialization to "
1202		   "(%d,%d) (%d,%d) failed\n",
1203		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1204	return FALSE;
1205    } else {
1206	int  width, height;
1207
1208	xf86DrvMsg(scrnIndex, X_INFO,
1209		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1210		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1211	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1212	   aligned... sigh */
1213	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1214						pScrn->displayWidth,
1215						info->allowColorTiling ?
1216						((pScrn->virtualY + 15) & ~15)
1217						- pScrn->virtualY + 2 : 2,
1218						0, NULL, NULL,
1219						NULL))) {
1220	    xf86DrvMsg(scrnIndex, X_INFO,
1221		       "Reserved area from (%d,%d) to (%d,%d)\n",
1222		       fbarea->box.x1, fbarea->box.y1,
1223		       fbarea->box.x2, fbarea->box.y2);
1224	} else {
1225	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1226	}
1227
1228	RADEONDRIAllocatePCIGARTTable(pScreen);
1229
1230	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1231					  &height, 0, 0, 0)) {
1232	    xf86DrvMsg(scrnIndex, X_INFO,
1233		       "Largest offscreen area available: %d x %d\n",
1234		       width, height);
1235
1236	    /* Lines in offscreen area needed for depth buffer and
1237	     * textures
1238	     */
1239	    info->depthTexLines = (scanlines
1240				   - info->depthOffset / width_bytes);
1241	    info->backLines	    = (scanlines
1242				       - info->backOffset / width_bytes
1243				       - info->depthTexLines);
1244	    info->backArea	    = NULL;
1245	} else {
1246	    xf86DrvMsg(scrnIndex, X_ERROR,
1247		       "Unable to determine largest offscreen area "
1248		       "available\n");
1249	    return FALSE;
1250	}
1251    }
1252
1253    xf86DrvMsg(scrnIndex, X_INFO,
1254	       "Will use front buffer at offset 0x%x\n",
1255	       info->frontOffset);
1256
1257    xf86DrvMsg(scrnIndex, X_INFO,
1258	       "Will use back buffer at offset 0x%x\n",
1259	       info->backOffset);
1260    xf86DrvMsg(scrnIndex, X_INFO,
1261	       "Will use depth buffer at offset 0x%x\n",
1262	       info->depthOffset);
1263    if (info->cardType==CARD_PCIE)
1264    	xf86DrvMsg(scrnIndex, X_INFO,
1265	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1266		   info->pciGartSize/1024, (unsigned)info->pciGartOffset);
1267    xf86DrvMsg(scrnIndex, X_INFO,
1268	       "Will use %d kb for textures at offset 0x%x\n",
1269	       info->textureSize/1024, info->textureOffset);
1270
1271    info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) |
1272			      ((info->frontOffset + info->fbLocation) >> 10));
1273
1274    info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) |
1275			     ((info->backOffset + info->fbLocation) >> 10));
1276
1277    info->depthPitchOffset = (((info->depthPitch * depthCpp / 64) << 22) |
1278			      ((info->depthOffset + info->fbLocation) >> 10));
1279    return TRUE;
1280}
1281#endif /* XF86DRI */
1282
1283Bool
1284RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1285{
1286    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1287    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1288    BoxRec         MemBox;
1289    int            y2;
1290
1291    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1292
1293    MemBox.x1 = 0;
1294    MemBox.y1 = 0;
1295    MemBox.x2 = pScrn->displayWidth;
1296    y2 = info->FbMapSize / width_bytes;
1297    if (y2 >= 32768)
1298	y2 = 32767; /* because MemBox.y2 is signed short */
1299    MemBox.y2 = y2;
1300
1301    /* The acceleration engine uses 14 bit
1302     * signed coordinates, so we can't have any
1303     * drawable caches beyond this region.
1304     */
1305    if (MemBox.y2 > 8191)
1306	MemBox.y2 = 8191;
1307
1308    if (!xf86InitFBManager(pScreen, &MemBox)) {
1309	xf86DrvMsg(scrnIndex, X_ERROR,
1310		   "Memory manager initialization to "
1311		   "(%d,%d) (%d,%d) failed\n",
1312		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1313	return FALSE;
1314    } else {
1315	int       width, height;
1316	FBAreaPtr fbarea;
1317
1318	xf86DrvMsg(scrnIndex, X_INFO,
1319		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1320		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1321	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1322						pScrn->displayWidth,
1323						info->allowColorTiling ?
1324						((pScrn->virtualY + 15) & ~15)
1325						- pScrn->virtualY + 2 : 2,
1326						0, NULL, NULL,
1327						NULL))) {
1328	    xf86DrvMsg(scrnIndex, X_INFO,
1329		       "Reserved area from (%d,%d) to (%d,%d)\n",
1330		       fbarea->box.x1, fbarea->box.y1,
1331		       fbarea->box.x2, fbarea->box.y2);
1332	} else {
1333	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1334	}
1335	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1336					      0, 0, 0)) {
1337	    xf86DrvMsg(scrnIndex, X_INFO,
1338		       "Largest offscreen area available: %d x %d\n",
1339		       width, height);
1340	}
1341	return TRUE;
1342    }
1343}
1344#endif /* USE_XAA */
1345