radeon_accel.c revision b7e1c893
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78				/* Driver data structures */
79#include "radeon.h"
80#include "radeon_reg.h"
81#include "r600_reg.h"
82#include "radeon_macros.h"
83#include "radeon_probe.h"
84#include "radeon_version.h"
85#ifdef XF86DRI
86#define _XF86DRI_SERVER_
87#include "radeon_drm.h"
88#endif
89
90				/* Line support */
91#include "miline.h"
92
93				/* X and server generic header files */
94#include "xf86.h"
95
96static void R600EngineReset(ScrnInfoPtr pScrn);
97
98#ifdef USE_XAA
99static struct {
100    int rop;
101    int pattern;
102} RADEON_ROP[] = {
103    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
104    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
105    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
106    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
107    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
108    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
109    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
110    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
111    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
112    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
113    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
114    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
115    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
116    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
117    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
118    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
119};
120#endif
121
122/* The FIFO has 64 slots.  This routines waits until at least `entries'
123 * of these slots are empty.
124 */
125void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
126{
127    RADEONInfoPtr  info       = RADEONPTR(pScrn);
128    unsigned char *RADEONMMIO = info->MMIO;
129    int            i;
130
131    for (;;) {
132	for (i = 0; i < RADEON_TIMEOUT; i++) {
133	    info->accel_state->fifo_slots =
134		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
135	    if (info->accel_state->fifo_slots >= entries) return;
136	}
137	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
138		       "FIFO timed out: %u entries, stat=0x%08x\n",
139		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
140		       (unsigned int)INREG(RADEON_RBBM_STATUS));
141	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
142		   "FIFO timed out, resetting engine...\n");
143	RADEONEngineReset(pScrn);
144	RADEONEngineRestore(pScrn);
145#ifdef XF86DRI
146	if (info->directRenderingEnabled) {
147	    RADEONCP_RESET(pScrn, info);
148	    RADEONCP_START(pScrn, info);
149	}
150#endif
151    }
152}
153
154void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
155{
156    RADEONInfoPtr  info       = RADEONPTR(pScrn);
157    unsigned char *RADEONMMIO = info->MMIO;
158    int            i;
159
160    for (;;) {
161	for (i = 0; i < RADEON_TIMEOUT; i++) {
162	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
163		info->accel_state->fifo_slots =
164		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
165	    else
166		info->accel_state->fifo_slots =
167		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
168	    if (info->accel_state->fifo_slots >= entries) return;
169	}
170	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
171		       "FIFO timed out: stat=0x%08x\n",
172		       (unsigned int)INREG(R600_GRBM_STATUS));
173	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
174		   "FIFO timed out, resetting engine...\n");
175	R600EngineReset(pScrn);
176#ifdef XF86DRI
177	if (info->directRenderingEnabled) {
178	    RADEONCP_RESET(pScrn, info);
179	    RADEONCP_START(pScrn, info);
180	}
181#endif
182    }
183}
184
185/* Flush all dirty data in the Pixel Cache to memory */
186void RADEONEngineFlush(ScrnInfoPtr pScrn)
187{
188    RADEONInfoPtr  info       = RADEONPTR(pScrn);
189    unsigned char *RADEONMMIO = info->MMIO;
190    int            i;
191
192    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
193	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
194		RADEON_RB3D_DC_FLUSH_ALL,
195		~RADEON_RB3D_DC_FLUSH_ALL);
196	for (i = 0; i < RADEON_TIMEOUT; i++) {
197	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
198		break;
199	}
200	if (i == RADEON_TIMEOUT) {
201	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
202			   "DC flush timeout: %x\n",
203			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
204	}
205    } else {
206	OUTREGP(R300_DSTCACHE_CTLSTAT,
207		R300_RB2D_DC_FLUSH_ALL,
208		~R300_RB2D_DC_FLUSH_ALL);
209	for (i = 0; i < RADEON_TIMEOUT; i++) {
210	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
211		break;
212	}
213	if (i == RADEON_TIMEOUT) {
214	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
215			   "DC flush timeout: %x\n",
216			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
217	}
218    }
219}
220
221/* Reset graphics card to known state */
222void RADEONEngineReset(ScrnInfoPtr pScrn)
223{
224    RADEONInfoPtr  info       = RADEONPTR(pScrn);
225    unsigned char *RADEONMMIO = info->MMIO;
226    uint32_t       clock_cntl_index;
227    uint32_t       mclk_cntl;
228    uint32_t       rbbm_soft_reset;
229    uint32_t       host_path_cntl;
230
231    /* The following RBBM_SOFT_RESET sequence can help un-wedge
232     * an R300 after the command processor got stuck.
233     */
234    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
235    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
236                                   RADEON_SOFT_RESET_CP |
237                                   RADEON_SOFT_RESET_HI |
238                                   RADEON_SOFT_RESET_SE |
239                                   RADEON_SOFT_RESET_RE |
240                                   RADEON_SOFT_RESET_PP |
241                                   RADEON_SOFT_RESET_E2 |
242                                   RADEON_SOFT_RESET_RB));
243    INREG(RADEON_RBBM_SOFT_RESET);
244    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
245                                   ~(RADEON_SOFT_RESET_CP |
246                                     RADEON_SOFT_RESET_HI |
247                                     RADEON_SOFT_RESET_SE |
248                                     RADEON_SOFT_RESET_RE |
249                                     RADEON_SOFT_RESET_PP |
250                                     RADEON_SOFT_RESET_E2 |
251                                     RADEON_SOFT_RESET_RB)));
252    INREG(RADEON_RBBM_SOFT_RESET);
253    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
254    INREG(RADEON_RBBM_SOFT_RESET);
255
256    RADEONEngineFlush(pScrn);
257
258    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
259    RADEONPllErrataAfterIndex(info);
260
261#if 0 /* taken care of by new PM code */
262    /* Some ASICs have bugs with dynamic-on feature, which are
263     * ASIC-version dependent, so we force all blocks on for now
264     */
265    if (info->HasCRTC2) {
266	uint32_t tmp;
267
268	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
269	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
270				  RADEON_CP_MAX_DYN_STOP_LAT |
271				  RADEON_SCLK_FORCEON_MASK));
272
273	if (info->ChipFamily == CHIP_FAMILY_RV200) {
274	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
275	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
276	}
277    }
278#endif /* new PM code */
279
280    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
281
282#if 0 /* handled by new PM code */
283    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
284			      RADEON_FORCEON_MCLKA |
285			      RADEON_FORCEON_MCLKB |
286			      RADEON_FORCEON_YCLKA |
287			      RADEON_FORCEON_YCLKB |
288			      RADEON_FORCEON_MC |
289			      RADEON_FORCEON_AIC));
290#endif /* new PM code */
291
292    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
293     * unexpected behaviour on some machines.  Here we use
294     * RADEON_HOST_PATH_CNTL to reset it.
295     */
296    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
297    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
298
299    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
300	uint32_t tmp;
301
302	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
303					RADEON_SOFT_RESET_CP |
304					RADEON_SOFT_RESET_HI |
305					RADEON_SOFT_RESET_E2));
306	INREG(RADEON_RBBM_SOFT_RESET);
307	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
308	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
309	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
310    } else {
311	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
312					RADEON_SOFT_RESET_CP |
313					RADEON_SOFT_RESET_SE |
314					RADEON_SOFT_RESET_RE |
315					RADEON_SOFT_RESET_PP |
316					RADEON_SOFT_RESET_E2 |
317					RADEON_SOFT_RESET_RB));
318	INREG(RADEON_RBBM_SOFT_RESET);
319	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
320					~(RADEON_SOFT_RESET_CP |
321					  RADEON_SOFT_RESET_SE |
322					  RADEON_SOFT_RESET_RE |
323					  RADEON_SOFT_RESET_PP |
324					  RADEON_SOFT_RESET_E2 |
325					  RADEON_SOFT_RESET_RB)));
326	INREG(RADEON_RBBM_SOFT_RESET);
327    }
328
329    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
330    INREG(RADEON_HOST_PATH_CNTL);
331    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
332
333    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
334	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
335
336    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
337    RADEONPllErrataAfterIndex(info);
338    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
339}
340
341/* Reset graphics card to known state */
342static void R600EngineReset(ScrnInfoPtr pScrn)
343{
344    RADEONInfoPtr  info       = RADEONPTR(pScrn);
345    unsigned char *RADEONMMIO = info->MMIO;
346    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
347
348    cp_ptr = INREG(R600_CP_RB_WPTR);
349
350    cp_me_cntl = INREG(R600_CP_ME_CNTL);
351    OUTREG(R600_CP_ME_CNTL, 0x10000000);
352
353    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
354    INREG(R600_GRBM_SOFT_RESET);
355    usleep (50);
356    OUTREG(R600_GRBM_SOFT_RESET, 0);
357    INREG(R600_GRBM_SOFT_RESET);
358
359    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
360    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
361    OUTREG(R600_CP_RB_CNTL, 0x80000000);
362
363    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
364    OUTREG(R600_CP_RB_WPTR, cp_ptr);
365    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
366    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
367
368}
369
370/* Restore the acceleration hardware to its previous state */
371void RADEONEngineRestore(ScrnInfoPtr pScrn)
372{
373    RADEONInfoPtr  info       = RADEONPTR(pScrn);
374    unsigned char *RADEONMMIO = info->MMIO;
375
376    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
377		   "EngineRestore (%d/%d)\n",
378		   info->CurrentLayout.pixel_code,
379		   info->CurrentLayout.bitsPerPixel);
380
381    /* Setup engine location. This shouldn't be necessary since we
382     * set them appropriately before any accel ops, but let's avoid
383     * random bogus DMA in case we inadvertently trigger the engine
384     * in the wrong place (happened).
385     */
386    RADEONWaitForFifo(pScrn, 2);
387    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
388    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
389
390    RADEONWaitForFifo(pScrn, 1);
391#if X_BYTE_ORDER == X_BIG_ENDIAN
392    OUTREGP(RADEON_DP_DATATYPE,
393	    RADEON_HOST_BIG_ENDIAN_EN,
394	    ~RADEON_HOST_BIG_ENDIAN_EN);
395#else
396    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
397#endif
398
399    /* Restore SURFACE_CNTL */
400    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
401
402    RADEONWaitForFifo(pScrn, 1);
403    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
404					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
405    RADEONWaitForFifo(pScrn, 1);
406    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
407				       | RADEON_GMC_BRUSH_SOLID_COLOR
408				       | RADEON_GMC_SRC_DATATYPE_COLOR));
409
410    RADEONWaitForFifo(pScrn, 5);
411    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
412    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
413    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
414    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
415    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
416
417    RADEONWaitForIdleMMIO(pScrn);
418
419    info->accel_state->XInited3D = FALSE;
420}
421
422/* Initialize the acceleration hardware */
423void RADEONEngineInit(ScrnInfoPtr pScrn)
424{
425    RADEONInfoPtr  info       = RADEONPTR(pScrn);
426    unsigned char *RADEONMMIO = info->MMIO;
427    int datatype = 0;
428    info->accel_state->num_gb_pipes = 0;
429
430    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
431		   "EngineInit (%d/%d)\n",
432		   info->CurrentLayout.pixel_code,
433		   info->CurrentLayout.bitsPerPixel);
434
435#ifdef XF86DRI
436    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
437	drm_radeon_getparam_t np;
438	int num_pipes;
439
440	memset(&np, 0, sizeof(np));
441	np.param = RADEON_PARAM_NUM_GB_PIPES;
442	np.value = &num_pipes;
443
444	if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np,
445				sizeof(np)) < 0) {
446	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
447		       "Failed to determine num pipes from DRM, falling back to "
448		       "manual look-up!\n");
449	    info->accel_state->num_gb_pipes = 0;
450	} else {
451	    info->accel_state->num_gb_pipes = num_pipes;
452	}
453    }
454#endif
455
456    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
457	(info->ChipFamily == CHIP_FAMILY_R420)  ||
458	(info->ChipFamily == CHIP_FAMILY_RS600) ||
459	(info->ChipFamily == CHIP_FAMILY_RS690) ||
460	(info->ChipFamily == CHIP_FAMILY_RS740) ||
461	(info->ChipFamily == CHIP_FAMILY_RS400) ||
462	(info->ChipFamily == CHIP_FAMILY_RS480) ||
463	IS_R500_3D) {
464	if (info->accel_state->num_gb_pipes == 0) {
465	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
466
467	    info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
468	    if (IS_R500_3D)
469		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
470	}
471    } else {
472	if (info->accel_state->num_gb_pipes == 0) {
473	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
474		(info->ChipFamily == CHIP_FAMILY_R350)) {
475		/* R3xx chips */
476		info->accel_state->num_gb_pipes = 2;
477	    } else {
478		/* RV3xx chips */
479		info->accel_state->num_gb_pipes = 1;
480	    }
481	}
482    }
483
484    if (IS_R300_3D || IS_R500_3D)
485	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
486		   "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
487
488    if (IS_R300_3D || IS_R500_3D) {
489	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16);
490
491	switch(info->accel_state->num_gb_pipes) {
492	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
493	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
494	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
495	default:
496	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
497	}
498
499	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
500	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
501	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
502	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
503					 R300_DC_AUTOFLUSH_ENABLE |
504					 R300_DC_DC_DISABLE_IGNORE_PE));
505    } else
506	OUTREG(RADEON_RB3D_CNTL, 0);
507
508    RADEONEngineReset(pScrn);
509
510    switch (info->CurrentLayout.pixel_code) {
511    case 8:  datatype = 2; break;
512    case 15: datatype = 3; break;
513    case 16: datatype = 4; break;
514    case 24: datatype = 5; break;
515    case 32: datatype = 6; break;
516    default:
517	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
518		       "Unknown depth/bpp = %d/%d (code = %d)\n",
519		       info->CurrentLayout.depth,
520		       info->CurrentLayout.bitsPerPixel,
521		       info->CurrentLayout.pixel_code);
522    }
523
524    info->accel_state->dp_gui_master_cntl =
525	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
526	 | RADEON_GMC_CLR_CMP_CNTL_DIS
527	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
528
529    RADEONEngineRestore(pScrn);
530}
531
532
533#define ACCEL_MMIO
534#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
535#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
536#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
537#define FINISH_ACCEL()
538
539#include "radeon_commonfuncs.c"
540#if defined(RENDER) && defined(USE_XAA)
541#include "radeon_render.c"
542#endif
543#include "radeon_accelfuncs.c"
544
545#undef ACCEL_MMIO
546#undef ACCEL_PREAMBLE
547#undef BEGIN_ACCEL
548#undef OUT_ACCEL_REG
549#undef FINISH_ACCEL
550
551#ifdef XF86DRI
552
553#define ACCEL_CP
554#define ACCEL_PREAMBLE()						\
555    RING_LOCALS;							\
556    RADEONCP_REFRESH(pScrn, info)
557#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
558#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
559#define FINISH_ACCEL()          ADVANCE_RING()
560
561
562#include "radeon_commonfuncs.c"
563#if defined(RENDER) && defined(USE_XAA)
564#include "radeon_render.c"
565#endif
566#include "radeon_accelfuncs.c"
567
568#undef ACCEL_CP
569#undef ACCEL_PREAMBLE
570#undef BEGIN_ACCEL
571#undef OUT_ACCEL_REG
572#undef FINISH_ACCEL
573
574/* Stop the CP */
575int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
576{
577    drm_radeon_cp_stop_t  stop;
578    int              ret, i;
579
580    stop.flush = 1;
581    stop.idle  = 1;
582
583    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
584			  sizeof(drm_radeon_cp_stop_t));
585
586    if (ret == 0) {
587	return 0;
588    } else if (errno != EBUSY) {
589	return -errno;
590    }
591
592    stop.flush = 0;
593
594    i = 0;
595    do {
596	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
597			      sizeof(drm_radeon_cp_stop_t));
598    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
599
600    if (ret == 0) {
601	return 0;
602    } else if (errno != EBUSY) {
603	return -errno;
604    }
605
606    stop.idle = 0;
607
608    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
609			&stop, sizeof(drm_radeon_cp_stop_t))) {
610	return -errno;
611    } else {
612	return 0;
613    }
614}
615
616/* Get an indirect buffer for the CP 2D acceleration commands  */
617drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
618{
619    RADEONInfoPtr  info = RADEONPTR(pScrn);
620    drmDMAReq      dma;
621    drmBufPtr      buf = NULL;
622    int            indx = 0;
623    int            size = 0;
624    int            i = 0;
625    int            ret;
626
627#if 0
628    /* FIXME: pScrn->pScreen has not been initialized when this is first
629     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
630     * the screen index from pScrn, which is initialized, and then get
631     * the screen from screenInfo.screens[index], but that is a hack.
632     */
633    dma.context = DRIGetContext(pScrn->pScreen);
634#else
635    /* This is the X server's context */
636    dma.context = 0x00000001;
637#endif
638
639    dma.send_count    = 0;
640    dma.send_list     = NULL;
641    dma.send_sizes    = NULL;
642    dma.flags         = 0;
643    dma.request_count = 1;
644    dma.request_size  = RADEON_BUFFER_SIZE;
645    dma.request_list  = &indx;
646    dma.request_sizes = &size;
647    dma.granted_count = 0;
648
649    while (1) {
650	do {
651	    ret = drmDMA(info->dri->drmFD, &dma);
652	    if (ret && ret != -EBUSY) {
653		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
654			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
655	    }
656	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
657
658	if (ret == 0) {
659	    buf = &info->dri->buffers->list[indx];
660	    buf->used = 0;
661	    if (RADEON_VERBOSE) {
662		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
663			   "   GetBuffer returning %d %p\n",
664			   buf->idx, buf->address);
665	    }
666	    return buf;
667	}
668
669	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
670		   "GetBuffer timed out, resetting engine...\n");
671
672	if (info->ChipFamily < CHIP_FAMILY_R600) {
673	    RADEONEngineReset(pScrn);
674	    RADEONEngineRestore(pScrn);
675	} else
676	    R600EngineReset(pScrn);
677
678	/* Always restart the engine when doing CP 2D acceleration */
679	RADEONCP_RESET(pScrn, info);
680	RADEONCP_START(pScrn, info);
681    }
682}
683
684/* Flush the indirect buffer to the kernel for submission to the card */
685void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
686{
687    RADEONInfoPtr      info   = RADEONPTR(pScrn);
688    drmBufPtr          buffer = info->cp->indirectBuffer;
689    int                start  = info->cp->indirectStart;
690    drm_radeon_indirect_t  indirect;
691
692    if (!buffer) return;
693    if (start == buffer->used && !discard) return;
694
695    if (RADEON_VERBOSE) {
696	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
697		   buffer->idx);
698    }
699
700    if (info->ChipFamily >= CHIP_FAMILY_R600) {
701	if (buffer->used & 0x3c) {
702	    RING_LOCALS;
703
704	    while (buffer->used & 0x3c) {
705		BEGIN_RING(1);
706		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
707		ADVANCE_RING();
708	    }
709	}
710    }
711
712    indirect.idx     = buffer->idx;
713    indirect.start   = start;
714    indirect.end     = buffer->used;
715    indirect.discard = discard;
716
717    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
718			&indirect, sizeof(drm_radeon_indirect_t));
719
720    if (discard) {
721	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
722	info->cp->indirectStart  = 0;
723    } else {
724	/* Start on a double word boundary */
725	info->cp->indirectStart  = buffer->used = (buffer->used + 7) & ~7;
726	if (RADEON_VERBOSE) {
727	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
728		       info->cp->indirectStart);
729	}
730    }
731}
732
733/* Flush and release the indirect buffer */
734void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
735{
736    RADEONInfoPtr      info   = RADEONPTR(pScrn);
737    drmBufPtr          buffer = info->cp->indirectBuffer;
738    int                start  = info->cp->indirectStart;
739    drm_radeon_indirect_t  indirect;
740
741    if (info->ChipFamily >= CHIP_FAMILY_R600) {
742	if (buffer && (buffer->used & 0x3c)) {
743	    RING_LOCALS;
744
745	    while (buffer->used & 0x3c) {
746		BEGIN_RING(1);
747		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
748		ADVANCE_RING();
749	    }
750	}
751    }
752
753    info->cp->indirectBuffer = NULL;
754    info->cp->indirectStart  = 0;
755
756    if (!buffer) return;
757
758    if (RADEON_VERBOSE) {
759	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
760		   buffer->idx);
761    }
762
763    indirect.idx     = buffer->idx;
764    indirect.start   = start;
765    indirect.end     = buffer->used;
766    indirect.discard = 1;
767
768    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
769			&indirect, sizeof(drm_radeon_indirect_t));
770}
771
772/** \brief Calculate HostDataBlit parameters from pointer and pitch
773 *
774 * This is a helper for the trivial HostDataBlit users that don't need to worry
775 * about tiling etc.
776 */
777void
778RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
779		     uint32_t *dstPitchOff, int *x, int *y)
780{
781    RADEONInfoPtr info = RADEONPTR( pScrn );
782    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
783
784    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
785    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
786    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
787}
788
789/* Set up a hostdata blit to transfer data from system memory to the
790 * framebuffer. Returns the address where the data can be written to and sets
791 * the dstPitch and hpass variables as required.
792 */
793uint8_t*
794RADEONHostDataBlit(
795    ScrnInfoPtr pScrn,
796    unsigned int cpp,
797    unsigned int w,
798    uint32_t dstPitchOff,
799    uint32_t *bufPitch,
800    int x,
801    int *y,
802    unsigned int *h,
803    unsigned int *hpass
804){
805    RADEONInfoPtr info = RADEONPTR( pScrn );
806    uint32_t format, dwords;
807    uint8_t *ret;
808    RING_LOCALS;
809
810    if ( *h == 0 )
811    {
812	return NULL;
813    }
814
815    switch ( cpp )
816    {
817    case 4:
818	format = RADEON_GMC_DST_32BPP;
819	*bufPitch = 4 * w;
820	break;
821    case 2:
822	format = RADEON_GMC_DST_16BPP;
823	*bufPitch = 2 * ((w + 1) & ~1);
824	break;
825    case 1:
826	format = RADEON_GMC_DST_8BPP_CI;
827	*bufPitch = (w + 3) & ~3;
828	break;
829    default:
830	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
831		    "%s: Unsupported cpp %d!\n", __func__, cpp );
832	return NULL;
833    }
834
835#if X_BYTE_ORDER == X_BIG_ENDIAN
836    /* Swap doesn't work on R300 and later, it's handled during the
837     * copy to ind. buffer pass
838     */
839    if (info->ChipFamily < CHIP_FAMILY_R300) {
840        BEGIN_RING(2);
841	if (cpp == 2)
842	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
843			 RADEON_HOST_DATA_SWAP_HDW);
844	else if (cpp == 1)
845	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
846			 RADEON_HOST_DATA_SWAP_32BIT);
847	else
848	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
849			 RADEON_HOST_DATA_SWAP_NONE);
850	ADVANCE_RING();
851    }
852#endif
853
854    /*RADEON_PURGE_CACHE();
855      RADEON_WAIT_UNTIL_IDLE();*/
856
857    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
858    dwords = *hpass * *bufPitch / 4;
859
860    BEGIN_RING( dwords + 10 );
861    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
862    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
863	    | RADEON_GMC_DST_CLIPPING
864	    | RADEON_GMC_BRUSH_NONE
865	    | format
866	    | RADEON_GMC_SRC_DATATYPE_COLOR
867	    | RADEON_ROP3_S
868	    | RADEON_DP_SRC_SOURCE_HOST_DATA
869	    | RADEON_GMC_CLR_CMP_CNTL_DIS
870	    | RADEON_GMC_WR_MSK_DIS );
871    OUT_RING( dstPitchOff );
872    OUT_RING( (*y << 16) | x );
873    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
874    OUT_RING( 0xffffffff );
875    OUT_RING( 0xffffffff );
876    OUT_RING( *y << 16 | x );
877    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
878    OUT_RING( dwords );
879
880    ret = ( uint8_t* )&__head[__count];
881
882    __count += dwords;
883    ADVANCE_RING();
884
885    *y += *hpass;
886    *h -= *hpass;
887
888    return ret;
889}
890
891void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
892{
893    switch(swap) {
894    case RADEON_HOST_DATA_SWAP_HDW:
895        {
896	    unsigned int *d = (unsigned int *)dst;
897	    unsigned int *s = (unsigned int *)src;
898	    unsigned int nwords = size >> 2;
899
900	    for (; nwords > 0; --nwords, ++d, ++s)
901		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
902	    return;
903        }
904    case RADEON_HOST_DATA_SWAP_32BIT:
905        {
906	    unsigned int *d = (unsigned int *)dst;
907	    unsigned int *s = (unsigned int *)src;
908	    unsigned int nwords = size >> 2;
909
910	    for (; nwords > 0; --nwords, ++d, ++s)
911#ifdef __powerpc__
912		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
913#else
914		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
915			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
916#endif
917	    return;
918        }
919    case RADEON_HOST_DATA_SWAP_16BIT:
920        {
921	    unsigned short *d = (unsigned short *)dst;
922	    unsigned short *s = (unsigned short *)src;
923	    unsigned int nwords = size >> 1;
924
925	    for (; nwords > 0; --nwords, ++d, ++s)
926#ifdef __powerpc__
927		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
928#else
929	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
930			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
931#endif
932	    return;
933	}
934    }
935    if (src != dst)
936	    memmove(dst, src, size);
937}
938
939/* Copies a single pass worth of data for a hostdata blit set up by
940 * RADEONHostDataBlit().
941 */
942void
943RADEONHostDataBlitCopyPass(
944    ScrnInfoPtr pScrn,
945    unsigned int cpp,
946    uint8_t *dst,
947    uint8_t *src,
948    unsigned int hpass,
949    unsigned int dstPitch,
950    unsigned int srcPitch
951){
952
953#if X_BYTE_ORDER == X_BIG_ENDIAN
954    RADEONInfoPtr info = RADEONPTR( pScrn );
955#endif
956
957    /* RADEONHostDataBlitCopy can return NULL ! */
958    if( (dst==NULL) || (src==NULL)) return;
959
960    if ( dstPitch == srcPitch )
961    {
962#if X_BYTE_ORDER == X_BIG_ENDIAN
963        if (info->ChipFamily >= CHIP_FAMILY_R300) {
964	    switch(cpp) {
965	    case 1:
966		RADEONCopySwap(dst, src, hpass * dstPitch,
967			       RADEON_HOST_DATA_SWAP_32BIT);
968		return;
969	    case 2:
970	        RADEONCopySwap(dst, src, hpass * dstPitch,
971			       RADEON_HOST_DATA_SWAP_HDW);
972		return;
973	    }
974	}
975#endif
976	memcpy( dst, src, hpass * dstPitch );
977    }
978    else
979    {
980	unsigned int minPitch = min( dstPitch, srcPitch );
981	while ( hpass-- )
982	{
983#if X_BYTE_ORDER == X_BIG_ENDIAN
984            if (info->ChipFamily >= CHIP_FAMILY_R300) {
985		switch(cpp) {
986		case 1:
987		    RADEONCopySwap(dst, src, minPitch,
988				   RADEON_HOST_DATA_SWAP_32BIT);
989		    goto next;
990		case 2:
991	            RADEONCopySwap(dst, src, minPitch,
992				   RADEON_HOST_DATA_SWAP_HDW);
993		    goto next;
994		}
995	    }
996#endif
997	    memcpy( dst, src, minPitch );
998#if X_BYTE_ORDER == X_BIG_ENDIAN
999	next:
1000#endif
1001	    src += srcPitch;
1002	    dst += dstPitch;
1003	}
1004    }
1005}
1006
1007#endif
1008
1009Bool RADEONAccelInit(ScreenPtr pScreen)
1010{
1011    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1012    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1013
1014#ifdef USE_EXA
1015    if (info->useEXA) {
1016# ifdef XF86DRI
1017	if (info->directRenderingEnabled) {
1018	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1019		if (!R600DrawInit(pScreen))
1020		    return FALSE;
1021	    } else {
1022		if (!RADEONDrawInitCP(pScreen))
1023		    return FALSE;
1024	    }
1025	} else
1026# endif /* XF86DRI */
1027	{
1028	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1029		return FALSE;
1030	    else {
1031		if (!RADEONDrawInitMMIO(pScreen))
1032		    return FALSE;
1033	    }
1034	}
1035    }
1036#endif /* USE_EXA */
1037#ifdef USE_XAA
1038    if (!info->useEXA) {
1039	XAAInfoRecPtr  a;
1040
1041	if (info->ChipFamily >= CHIP_FAMILY_R600)
1042	    return FALSE;
1043
1044	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1045	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1046	    return FALSE;
1047	}
1048
1049#ifdef XF86DRI
1050	if (info->directRenderingEnabled)
1051	    RADEONAccelInitCP(pScreen, a);
1052	else
1053#endif /* XF86DRI */
1054	    RADEONAccelInitMMIO(pScreen, a);
1055
1056	RADEONEngineInit(pScrn);
1057
1058	if (!XAAInit(pScreen, a)) {
1059	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1060	    return FALSE;
1061	}
1062    }
1063#endif /* USE_XAA */
1064    return TRUE;
1065}
1066
1067void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1068{
1069    RADEONInfoPtr info = RADEONPTR (pScrn);
1070
1071#ifdef XF86DRI
1072    if (info->directRenderingEnabled) {
1073	drm_radeon_sarea_t *pSAREAPriv;
1074
1075	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1076	pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1077	RADEONInit3DEngineCP(pScrn);
1078    } else
1079#endif
1080	RADEONInit3DEngineMMIO(pScrn);
1081
1082    info->accel_state->XInited3D = TRUE;
1083}
1084
1085#ifdef USE_XAA
1086#ifdef XF86DRI
1087Bool
1088RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1089{
1090    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1091    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1092    int            cpp = info->CurrentLayout.pixel_bytes;
1093    int            depthCpp = (info->dri->depthBits - 8) / 4;
1094    int            width_bytes = pScrn->displayWidth * cpp;
1095    int            bufferSize;
1096    int            depthSize;
1097    int            l;
1098    int            scanlines;
1099    int            texsizerequest;
1100    BoxRec         MemBox;
1101    FBAreaPtr      fbarea;
1102
1103    info->dri->frontOffset = 0;
1104    info->dri->frontPitch = pScrn->displayWidth;
1105    info->dri->backPitch = pScrn->displayWidth;
1106
1107    /* make sure we use 16 line alignment for tiling (8 might be enough).
1108     * Might need that for non-XF86DRI too?
1109     */
1110    if (info->allowColorTiling) {
1111	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
1112		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1113    } else {
1114        bufferSize = (pScrn->virtualY * width_bytes
1115		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1116    }
1117
1118    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1119     * which is always the case if color tiling is used due to color pitch
1120     * but not necessarily otherwise, and its height a multiple of 16 lines.
1121     */
1122    info->dri->depthPitch = (pScrn->displayWidth + 31) & ~31;
1123    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->dri->depthPitch
1124		  * depthCpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
1125
1126    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1127	       "Using %d MB GART aperture\n", info->dri->gartSize);
1128    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1129	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1130    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1131	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1132    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1133	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1134
1135    /* Try for front, back, depth, and three framebuffers worth of
1136     * pixmap cache.  Should be enough for a fullscreen background
1137     * image plus some leftovers.
1138     * If the FBTexPercent option was used, try to achieve that percentage instead,
1139     * but still have at least one pixmap buffer (get problems with xvideo/render
1140     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1141     * probably useless for XAA.
1142     */
1143    if (info->dri->textureSize >= 0) {
1144	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1145			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1146	/* first divide, then multiply or we'll get an overflow (been there...) */
1147			 / 100 * info->dri->textureSize;
1148    }
1149    else {
1150	texsizerequest = (int)info->FbMapSize / 2;
1151    }
1152    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1153
1154    /* If that gives us less than the requested memory, let's
1155     * be greedy and grab some more.  Sorry, I care more about 3D
1156     * performance than playing nicely, and you'll get around a full
1157     * framebuffer's worth of pixmap cache anyway.
1158     */
1159    if (info->dri->textureSize < texsizerequest) {
1160        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1161    }
1162    if (info->dri->textureSize < texsizerequest) {
1163        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1164    }
1165
1166    /* If there's still no space for textures, try without pixmap cache, but
1167     * never use the reserved space, the space hw cursor and PCIGART table might
1168     * use.
1169     */
1170    if (info->dri->textureSize < 0) {
1171	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1172	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1173    }
1174
1175    /* Check to see if there is more room available after the 8192nd
1176     * scanline for textures
1177     */
1178    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1179    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1180	> info->dri->textureSize) {
1181	info->dri->textureSize =
1182		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1183    }
1184
1185    /* If backbuffer is disabled, don't allocate memory for it */
1186    if (info->dri->noBackBuffer) {
1187	info->dri->textureSize += bufferSize;
1188    }
1189
1190    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1191       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1192       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1193       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1194       area otherwise).
1195       This might cause some space at the end of the video memory to be unused, since it
1196       can't be used (?) due to that log_tex_granularity thing???
1197       Could use different copyscreentoscreen function for the pageflip copies
1198       (which would use different src and dst offsets) to avoid this. */
1199    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1200	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1201			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1202    }
1203    if (info->dri->textureSize > 0) {
1204	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1205	if (l < RADEON_LOG_TEX_GRANULARITY)
1206	    l = RADEON_LOG_TEX_GRANULARITY;
1207	/* Round the texture size up to the nearest whole number of
1208	 * texture regions.  Again, be greedy about this, don't
1209	 * round down.
1210	 */
1211	info->dri->log2TexGran = l;
1212	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1213    } else {
1214	info->dri->textureSize = 0;
1215    }
1216
1217    /* Set a minimum usable local texture heap size.  This will fit
1218     * two 256x256x32bpp textures.
1219     */
1220    if (info->dri->textureSize < 512 * 1024) {
1221	info->dri->textureOffset = 0;
1222	info->dri->textureSize = 0;
1223    }
1224
1225    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1226	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1227				    (width_bytes * 16)) * (width_bytes * 16);
1228    }
1229    else {
1230	/* Reserve space for textures */
1231	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize +
1232				     RADEON_BUFFER_ALIGN) &
1233				    ~(uint32_t)RADEON_BUFFER_ALIGN);
1234    }
1235
1236    /* Reserve space for the shared depth
1237     * buffer.
1238     */
1239    info->dri->depthOffset = ((info->dri->textureOffset - depthSize +
1240			       RADEON_BUFFER_ALIGN) &
1241			      ~(uint32_t)RADEON_BUFFER_ALIGN);
1242
1243    /* Reserve space for the shared back buffer */
1244    if (info->dri->noBackBuffer) {
1245       info->dri->backOffset = info->dri->depthOffset;
1246    } else {
1247       info->dri->backOffset = ((info->dri->depthOffset - bufferSize +
1248				 RADEON_BUFFER_ALIGN) &
1249				~(uint32_t)RADEON_BUFFER_ALIGN);
1250    }
1251
1252    info->dri->backY = info->dri->backOffset / width_bytes;
1253    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1254
1255    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1256    if (scanlines > 8191)
1257	scanlines = 8191;
1258
1259    MemBox.x1 = 0;
1260    MemBox.y1 = 0;
1261    MemBox.x2 = pScrn->displayWidth;
1262    MemBox.y2 = scanlines;
1263
1264    if (!xf86InitFBManager(pScreen, &MemBox)) {
1265        xf86DrvMsg(scrnIndex, X_ERROR,
1266		   "Memory manager initialization to "
1267		   "(%d,%d) (%d,%d) failed\n",
1268		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1269	return FALSE;
1270    } else {
1271	int  width, height;
1272
1273	xf86DrvMsg(scrnIndex, X_INFO,
1274		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1275		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1276	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1277	   aligned... sigh */
1278	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1279						pScrn->displayWidth,
1280						info->allowColorTiling ?
1281						((pScrn->virtualY + 15) & ~15)
1282						- pScrn->virtualY + 2 : 2,
1283						0, NULL, NULL,
1284						NULL))) {
1285	    xf86DrvMsg(scrnIndex, X_INFO,
1286		       "Reserved area from (%d,%d) to (%d,%d)\n",
1287		       fbarea->box.x1, fbarea->box.y1,
1288		       fbarea->box.x2, fbarea->box.y2);
1289	} else {
1290	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1291	}
1292
1293	RADEONDRIAllocatePCIGARTTable(pScreen);
1294
1295	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1296					  &height, 0, 0, 0)) {
1297	    xf86DrvMsg(scrnIndex, X_INFO,
1298		       "Largest offscreen area available: %d x %d\n",
1299		       width, height);
1300
1301	    /* Lines in offscreen area needed for depth buffer and
1302	     * textures
1303	     */
1304	    info->dri->depthTexLines = (scanlines
1305					- info->dri->depthOffset / width_bytes);
1306	    info->dri->backLines	    = (scanlines
1307					       - info->dri->backOffset / width_bytes
1308					       - info->dri->depthTexLines);
1309	    info->dri->backArea	    = NULL;
1310	} else {
1311	    xf86DrvMsg(scrnIndex, X_ERROR,
1312		       "Unable to determine largest offscreen area "
1313		       "available\n");
1314	    return FALSE;
1315	}
1316    }
1317
1318    xf86DrvMsg(scrnIndex, X_INFO,
1319	       "Will use front buffer at offset 0x%x\n",
1320	       info->dri->frontOffset);
1321
1322    xf86DrvMsg(scrnIndex, X_INFO,
1323	       "Will use back buffer at offset 0x%x\n",
1324	       info->dri->backOffset);
1325    xf86DrvMsg(scrnIndex, X_INFO,
1326	       "Will use depth buffer at offset 0x%x\n",
1327	       info->dri->depthOffset);
1328    if (info->cardType==CARD_PCIE)
1329    	xf86DrvMsg(scrnIndex, X_INFO,
1330	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1331		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1332    xf86DrvMsg(scrnIndex, X_INFO,
1333	       "Will use %d kb for textures at offset 0x%x\n",
1334	       info->dri->textureSize/1024, info->dri->textureOffset);
1335
1336    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1337				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1338
1339    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1340				  ((info->dri->backOffset + info->fbLocation) >> 10));
1341
1342    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1343				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1344    return TRUE;
1345}
1346#endif /* XF86DRI */
1347
1348Bool
1349RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1350{
1351    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1352    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1353    BoxRec         MemBox;
1354    int            y2;
1355
1356    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1357
1358    MemBox.x1 = 0;
1359    MemBox.y1 = 0;
1360    MemBox.x2 = pScrn->displayWidth;
1361    y2 = info->FbMapSize / width_bytes;
1362    if (y2 >= 32768)
1363	y2 = 32767; /* because MemBox.y2 is signed short */
1364    MemBox.y2 = y2;
1365
1366    /* The acceleration engine uses 14 bit
1367     * signed coordinates, so we can't have any
1368     * drawable caches beyond this region.
1369     */
1370    if (MemBox.y2 > 8191)
1371	MemBox.y2 = 8191;
1372
1373    if (!xf86InitFBManager(pScreen, &MemBox)) {
1374	xf86DrvMsg(scrnIndex, X_ERROR,
1375		   "Memory manager initialization to "
1376		   "(%d,%d) (%d,%d) failed\n",
1377		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1378	return FALSE;
1379    } else {
1380	int       width, height;
1381	FBAreaPtr fbarea;
1382
1383	xf86DrvMsg(scrnIndex, X_INFO,
1384		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1385		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1386	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1387						pScrn->displayWidth,
1388						info->allowColorTiling ?
1389						((pScrn->virtualY + 15) & ~15)
1390						- pScrn->virtualY + 2 : 2,
1391						0, NULL, NULL,
1392						NULL))) {
1393	    xf86DrvMsg(scrnIndex, X_INFO,
1394		       "Reserved area from (%d,%d) to (%d,%d)\n",
1395		       fbarea->box.x1, fbarea->box.y1,
1396		       fbarea->box.x2, fbarea->box.y2);
1397	} else {
1398	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1399	}
1400	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1401					      0, 0, 0)) {
1402	    xf86DrvMsg(scrnIndex, X_INFO,
1403		       "Largest offscreen area available: %d x %d\n",
1404		       width, height);
1405	}
1406	return TRUE;
1407    }
1408}
1409#endif /* USE_XAA */
1410