radeon_accel.c revision 72f1971a
1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *
39 * Credits:
40 *
41 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42 *   code to his Radeon driver.  Portions of this file are based on the
43 *   initialization code for that driver.
44 *
45 * References:
46 *
47 * !!!! FIXME !!!!
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   This has been removed as XAA expects 16bit registers
58 *                  for full clipping.
59 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60 *   DashedLine with non-power-of-two pattern length: Apparently, there is
61 *                  no way to set the length of the pattern -- it is always
62 *                  assumed to be 8 or 32 (or 1024?).
63 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64 *                  Manual where it states that monochrome expansion of frame
65 *                  buffer data is not supported.
66 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67 *                  direct/indirect method.  If we had more data registers,
68 *                  then we could do better.  If XAA supported a trigger write
69 *                  address, the code would be simpler.
70 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71 *                  pattern from frame buffer memory.
72 *   ImageWrites:   Same as CPUToScreenColorExpandFill
73 *
74 */
75
76#include <errno.h>
77#include <string.h>
78#include <assert.h>
79				/* Driver data structures */
80#include "radeon.h"
81#include "radeon_reg.h"
82#include "r600_reg.h"
83#include "radeon_macros.h"
84#include "radeon_probe.h"
85#include "radeon_version.h"
86#ifdef XF86DRI
87#define _XF86DRI_SERVER_
88#include "radeon_drm.h"
89#endif
90
91#include "ati_pciids_gen.h"
92
93				/* Line support */
94#include "miline.h"
95
96				/* X and server generic header files */
97#include "xf86.h"
98
99static void R600EngineReset(ScrnInfoPtr pScrn);
100
101#ifdef USE_XAA
102static struct {
103    int rop;
104    int pattern;
105} RADEON_ROP[] = {
106    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
107    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
108    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
109    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
110    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
111    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
112    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
113    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
114    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
115    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
116    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
117    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
118    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
119    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
120    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
121    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
122};
123#endif
124
125/* The FIFO has 64 slots.  This routines waits until at least `entries'
126 * of these slots are empty.
127 */
128void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
129{
130    RADEONInfoPtr  info       = RADEONPTR(pScrn);
131    unsigned char *RADEONMMIO = info->MMIO;
132    int            i;
133
134    for (;;) {
135	for (i = 0; i < RADEON_TIMEOUT; i++) {
136	    info->accel_state->fifo_slots =
137		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
138	    if (info->accel_state->fifo_slots >= entries) return;
139	}
140	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
141		       "FIFO timed out: %u entries, stat=0x%08x\n",
142		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
143		       (unsigned int)INREG(RADEON_RBBM_STATUS));
144	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
145		   "FIFO timed out, resetting engine...\n");
146	RADEONEngineReset(pScrn);
147	RADEONEngineRestore(pScrn);
148#ifdef XF86DRI
149	if (info->directRenderingEnabled) {
150	    RADEONCP_RESET(pScrn, info);
151	    RADEONCP_START(pScrn, info);
152	}
153#endif
154    }
155}
156
157void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
158{
159    RADEONInfoPtr  info       = RADEONPTR(pScrn);
160    unsigned char *RADEONMMIO = info->MMIO;
161    int            i;
162
163    for (;;) {
164	for (i = 0; i < RADEON_TIMEOUT; i++) {
165	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
166		info->accel_state->fifo_slots =
167		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
168	    else
169		info->accel_state->fifo_slots =
170		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
171	    if (info->accel_state->fifo_slots >= entries) return;
172	}
173	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
174		       "FIFO timed out: stat=0x%08x\n",
175		       (unsigned int)INREG(R600_GRBM_STATUS));
176	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
177		   "FIFO timed out, resetting engine...\n");
178	R600EngineReset(pScrn);
179#ifdef XF86DRI
180	if (info->directRenderingEnabled) {
181	    RADEONCP_RESET(pScrn, info);
182	    RADEONCP_START(pScrn, info);
183	}
184#endif
185    }
186}
187
188/* Flush all dirty data in the Pixel Cache to memory */
189void RADEONEngineFlush(ScrnInfoPtr pScrn)
190{
191    RADEONInfoPtr  info       = RADEONPTR(pScrn);
192    unsigned char *RADEONMMIO = info->MMIO;
193    int            i;
194
195    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
196	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
197		RADEON_RB3D_DC_FLUSH_ALL,
198		~RADEON_RB3D_DC_FLUSH_ALL);
199	for (i = 0; i < RADEON_TIMEOUT; i++) {
200	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
201		break;
202	}
203	if (i == RADEON_TIMEOUT) {
204	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
205			   "DC flush timeout: %x\n",
206			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
207	}
208    } else {
209	OUTREGP(R300_DSTCACHE_CTLSTAT,
210		R300_RB2D_DC_FLUSH_ALL,
211		~R300_RB2D_DC_FLUSH_ALL);
212	for (i = 0; i < RADEON_TIMEOUT; i++) {
213	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
214		break;
215	}
216	if (i == RADEON_TIMEOUT) {
217	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
218			   "DC flush timeout: %x\n",
219			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
220	}
221    }
222}
223
224/* Reset graphics card to known state */
225void RADEONEngineReset(ScrnInfoPtr pScrn)
226{
227    RADEONInfoPtr  info       = RADEONPTR(pScrn);
228    unsigned char *RADEONMMIO = info->MMIO;
229    uint32_t       clock_cntl_index;
230    uint32_t       mclk_cntl;
231    uint32_t       rbbm_soft_reset;
232    uint32_t       host_path_cntl;
233
234    /* The following RBBM_SOFT_RESET sequence can help un-wedge
235     * an R300 after the command processor got stuck.
236     */
237    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
238    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
239                                   RADEON_SOFT_RESET_CP |
240                                   RADEON_SOFT_RESET_HI |
241                                   RADEON_SOFT_RESET_SE |
242                                   RADEON_SOFT_RESET_RE |
243                                   RADEON_SOFT_RESET_PP |
244                                   RADEON_SOFT_RESET_E2 |
245                                   RADEON_SOFT_RESET_RB));
246    INREG(RADEON_RBBM_SOFT_RESET);
247    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
248                                   ~(RADEON_SOFT_RESET_CP |
249                                     RADEON_SOFT_RESET_HI |
250                                     RADEON_SOFT_RESET_SE |
251                                     RADEON_SOFT_RESET_RE |
252                                     RADEON_SOFT_RESET_PP |
253                                     RADEON_SOFT_RESET_E2 |
254                                     RADEON_SOFT_RESET_RB)));
255    INREG(RADEON_RBBM_SOFT_RESET);
256    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
257    INREG(RADEON_RBBM_SOFT_RESET);
258
259    RADEONEngineFlush(pScrn);
260
261    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
262    RADEONPllErrataAfterIndex(info);
263
264#if 0 /* taken care of by new PM code */
265    /* Some ASICs have bugs with dynamic-on feature, which are
266     * ASIC-version dependent, so we force all blocks on for now
267     */
268    if (info->HasCRTC2) {
269	uint32_t tmp;
270
271	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
272	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
273				  RADEON_CP_MAX_DYN_STOP_LAT |
274				  RADEON_SCLK_FORCEON_MASK));
275
276	if (info->ChipFamily == CHIP_FAMILY_RV200) {
277	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
278	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
279	}
280    }
281#endif /* new PM code */
282
283    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
284
285#if 0 /* handled by new PM code */
286    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
287			      RADEON_FORCEON_MCLKA |
288			      RADEON_FORCEON_MCLKB |
289			      RADEON_FORCEON_YCLKA |
290			      RADEON_FORCEON_YCLKB |
291			      RADEON_FORCEON_MC |
292			      RADEON_FORCEON_AIC));
293#endif /* new PM code */
294
295    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
296     * unexpected behaviour on some machines.  Here we use
297     * RADEON_HOST_PATH_CNTL to reset it.
298     */
299    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
300    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
301
302    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
303	uint32_t tmp;
304
305	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
306					RADEON_SOFT_RESET_CP |
307					RADEON_SOFT_RESET_HI |
308					RADEON_SOFT_RESET_E2));
309	INREG(RADEON_RBBM_SOFT_RESET);
310	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
311	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
312	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
313    } else {
314	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
315					RADEON_SOFT_RESET_CP |
316					RADEON_SOFT_RESET_SE |
317					RADEON_SOFT_RESET_RE |
318					RADEON_SOFT_RESET_PP |
319					RADEON_SOFT_RESET_E2 |
320					RADEON_SOFT_RESET_RB));
321	INREG(RADEON_RBBM_SOFT_RESET);
322	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
323					~(RADEON_SOFT_RESET_CP |
324					  RADEON_SOFT_RESET_SE |
325					  RADEON_SOFT_RESET_RE |
326					  RADEON_SOFT_RESET_PP |
327					  RADEON_SOFT_RESET_E2 |
328					  RADEON_SOFT_RESET_RB)));
329	INREG(RADEON_RBBM_SOFT_RESET);
330    }
331
332    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
333	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
334
335    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
336    RADEONPllErrataAfterIndex(info);
337    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
338}
339
340/* Reset graphics card to known state */
341static void R600EngineReset(ScrnInfoPtr pScrn)
342{
343    RADEONInfoPtr  info       = RADEONPTR(pScrn);
344    unsigned char *RADEONMMIO = info->MMIO;
345    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
346
347    cp_ptr = INREG(R600_CP_RB_WPTR);
348
349    cp_me_cntl = INREG(R600_CP_ME_CNTL);
350    OUTREG(R600_CP_ME_CNTL, 0x10000000);
351
352    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
353    INREG(R600_GRBM_SOFT_RESET);
354    usleep (50);
355    OUTREG(R600_GRBM_SOFT_RESET, 0);
356    INREG(R600_GRBM_SOFT_RESET);
357
358    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
359    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
360    OUTREG(R600_CP_RB_CNTL, 0x80000000);
361
362    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
363    OUTREG(R600_CP_RB_WPTR, cp_ptr);
364    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
365    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
366
367}
368
369/* Restore the acceleration hardware to its previous state */
370void RADEONEngineRestore(ScrnInfoPtr pScrn)
371{
372    RADEONInfoPtr  info       = RADEONPTR(pScrn);
373    unsigned char *RADEONMMIO = info->MMIO;
374
375    if (info->cs)
376      return;
377
378    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
379		   "EngineRestore (%d/%d)\n",
380		   info->CurrentLayout.pixel_code,
381		   info->CurrentLayout.bitsPerPixel);
382
383    /* Setup engine location. This shouldn't be necessary since we
384     * set them appropriately before any accel ops, but let's avoid
385     * random bogus DMA in case we inadvertently trigger the engine
386     * in the wrong place (happened).
387     */
388    RADEONWaitForFifo(pScrn, 2);
389    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
391
392    RADEONWaitForFifo(pScrn, 1);
393#if X_BYTE_ORDER == X_BIG_ENDIAN
394    OUTREGP(RADEON_DP_DATATYPE,
395	    RADEON_HOST_BIG_ENDIAN_EN,
396	    ~RADEON_HOST_BIG_ENDIAN_EN);
397#else
398    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
399#endif
400
401    /* Restore SURFACE_CNTL */
402    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
403
404    RADEONWaitForFifo(pScrn, 1);
405    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
406					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
407    RADEONWaitForFifo(pScrn, 1);
408    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
409				       | RADEON_GMC_BRUSH_SOLID_COLOR
410				       | RADEON_GMC_SRC_DATATYPE_COLOR));
411
412    RADEONWaitForFifo(pScrn, 5);
413    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
414    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
415    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
416    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
417    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
418
419    RADEONWaitForIdleMMIO(pScrn);
420
421    info->accel_state->XInited3D = FALSE;
422}
423
424static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
425{
426    RADEONInfoPtr info = RADEONPTR(pScrn);
427    if (info->dri->pKernelDRMVersion->version_major < 2) {
428        drm_radeon_getparam_t np;
429
430        memset(&np, 0, sizeof(np));
431        np.param = RADEON_PARAM_NUM_GB_PIPES;
432        np.value = num_pipes;
433        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
434    } else {
435        struct drm_radeon_info np2;
436        np2.value = (unsigned long)num_pipes;
437        np2.request = RADEON_INFO_NUM_GB_PIPES;
438        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
439    }
440}
441
442/* Initialize the acceleration hardware */
443void RADEONEngineInit(ScrnInfoPtr pScrn)
444{
445    RADEONInfoPtr  info       = RADEONPTR(pScrn);
446    unsigned char *RADEONMMIO = info->MMIO;
447    int datatype = 0;
448    info->accel_state->num_gb_pipes = 0;
449
450    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
451		   "EngineInit (%d/%d)\n",
452		   info->CurrentLayout.pixel_code,
453		   info->CurrentLayout.bitsPerPixel);
454
455#ifdef XF86DRI
456    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
457	int num_pipes;
458
459	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
460	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
461		       "Failed to determine num pipes from DRM, falling back to "
462		       "manual look-up!\n");
463	    info->accel_state->num_gb_pipes = 0;
464	} else {
465	    info->accel_state->num_gb_pipes = num_pipes;
466	}
467    }
468#endif
469
470    if (!info->cs) {
471	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
472	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
473	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
474	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
475	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
476	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
477	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
478	    IS_R500_3D) {
479	    if (info->accel_state->num_gb_pipes == 0) {
480		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
481
482		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
483		if (IS_R500_3D)
484		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
485	    }
486	} else {
487	    if (info->accel_state->num_gb_pipes == 0) {
488		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
489		    (info->ChipFamily == CHIP_FAMILY_R350)) {
490		    /* R3xx chips */
491		    info->accel_state->num_gb_pipes = 2;
492		} else {
493		    /* RV3xx chips */
494		    info->accel_state->num_gb_pipes = 1;
495		}
496	    }
497	}
498
499	/* SE cards only have 1 quadpipe */
500	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
501	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
502	    (info->Chipset == PCI_CHIP_R300_AD) ||
503	    (info->Chipset == PCI_CHIP_R350_AH))
504	    info->accel_state->num_gb_pipes = 1;
505
506	if (IS_R300_3D || IS_R500_3D)
507	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
508		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
509
510	if (IS_R300_3D || IS_R500_3D) {
511	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
512
513	    switch(info->accel_state->num_gb_pipes) {
514	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
515	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
516	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
517	    default:
518	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
519	    }
520
521	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
522	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
523	    if (info->ChipFamily >= CHIP_FAMILY_R420)
524		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
525	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
526					     R300_DC_AUTOFLUSH_ENABLE |
527					     R300_DC_DC_DISABLE_IGNORE_PE));
528	} else
529	    OUTREG(RADEON_RB3D_CNTL, 0);
530
531	RADEONEngineReset(pScrn);
532    }
533
534    switch (info->CurrentLayout.pixel_code) {
535    case 8:  datatype = 2; break;
536    case 15: datatype = 3; break;
537    case 16: datatype = 4; break;
538    case 24: datatype = 5; break;
539    case 32: datatype = 6; break;
540    default:
541	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
542		       "Unknown depth/bpp = %d/%d (code = %d)\n",
543		       info->CurrentLayout.depth,
544		       info->CurrentLayout.bitsPerPixel,
545		       info->CurrentLayout.pixel_code);
546    }
547
548    info->accel_state->dp_gui_master_cntl =
549	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
550	 | RADEON_GMC_CLR_CMP_CNTL_DIS
551	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
552
553    RADEONEngineRestore(pScrn);
554}
555
556uint32_t radeonGetPixmapOffset(PixmapPtr pPix)
557{
558    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
559    RADEONInfoPtr info = RADEONPTR(pScrn);
560    uint32_t offset = 0;
561    if (info->cs)
562	return 0;
563#ifdef USE_EXA
564    if (info->useEXA) {
565	offset = exaGetPixmapOffset(pPix);
566    } else
567#endif
568    {
569	offset = pPix->devPrivate.ptr - info->FB;
570    }
571    offset += info->fbLocation + pScrn->fbOffset;
572    return offset;
573}
574
575int radeon_cs_space_remaining(ScrnInfoPtr pScrn)
576{
577    RADEONInfoPtr info = RADEONPTR(pScrn);
578
579#ifdef XF86DRM_MODE
580    if (info->cs)
581	return (info->cs->ndw - info->cs->cdw);
582    else
583#endif
584        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
585}
586
587#define ACCEL_MMIO
588#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
589#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
590#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
591#define FINISH_ACCEL()
592
593#include "radeon_commonfuncs.c"
594#if defined(RENDER) && defined(USE_XAA)
595#include "radeon_render.c"
596#endif
597#include "radeon_accelfuncs.c"
598
599#undef ACCEL_MMIO
600#undef ACCEL_PREAMBLE
601#undef BEGIN_ACCEL
602#undef OUT_ACCEL_REG
603#undef FINISH_ACCEL
604
605#ifdef XF86DRI
606
607#define ACCEL_CP
608#define ACCEL_PREAMBLE()						\
609    RING_LOCALS;							\
610    RADEONCP_REFRESH(pScrn, info)
611#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
612#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
613#define FINISH_ACCEL()          ADVANCE_RING()
614
615
616#include "radeon_commonfuncs.c"
617#if defined(RENDER) && defined(USE_XAA)
618#include "radeon_render.c"
619#endif
620#include "radeon_accelfuncs.c"
621
622#undef ACCEL_CP
623#undef ACCEL_PREAMBLE
624#undef BEGIN_ACCEL
625#undef OUT_ACCEL_REG
626#undef FINISH_ACCEL
627
628/* Stop the CP */
629int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
630{
631    drm_radeon_cp_stop_t  stop;
632    int              ret, i;
633
634    stop.flush = 1;
635    stop.idle  = 1;
636
637    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
638			  sizeof(drm_radeon_cp_stop_t));
639
640    if (ret == 0) {
641	return 0;
642    } else if (errno != EBUSY) {
643	return -errno;
644    }
645
646    stop.flush = 0;
647
648    i = 0;
649    do {
650	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
651			      sizeof(drm_radeon_cp_stop_t));
652    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
653
654    if (ret == 0) {
655	return 0;
656    } else if (errno != EBUSY) {
657	return -errno;
658    }
659
660    stop.idle = 0;
661
662    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
663			&stop, sizeof(drm_radeon_cp_stop_t))) {
664	return -errno;
665    } else {
666	return 0;
667    }
668}
669
670#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
671
672/* Get an indirect buffer for the CP 2D acceleration commands  */
673drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
674{
675    RADEONInfoPtr  info = RADEONPTR(pScrn);
676    drmDMAReq      dma;
677    drmBufPtr      buf = NULL;
678    int            indx = 0;
679    int            size = 0;
680    int            i = 0;
681    int            ret;
682
683#if 0
684    /* FIXME: pScrn->pScreen has not been initialized when this is first
685     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
686     * the screen index from pScrn, which is initialized, and then get
687     * the screen from screenInfo.screens[index], but that is a hack.
688     */
689    dma.context = DRIGetContext(pScrn->pScreen);
690#else
691    /* This is the X server's context */
692    dma.context = 0x00000001;
693#endif
694
695    dma.send_count    = 0;
696    dma.send_list     = NULL;
697    dma.send_sizes    = NULL;
698    dma.flags         = 0;
699    dma.request_count = 1;
700    dma.request_size  = RADEON_BUFFER_SIZE;
701    dma.request_list  = &indx;
702    dma.request_sizes = &size;
703    dma.granted_count = 0;
704
705    while (1) {
706	do {
707	    ret = drmDMA(info->dri->drmFD, &dma);
708	    if (ret && ret != -EBUSY) {
709		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
710			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
711	    }
712	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
713
714	if (ret == 0) {
715	    buf = &info->dri->buffers->list[indx];
716	    buf->used = 0;
717	    if (RADEON_VERBOSE) {
718		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
719			   "   GetBuffer returning %d %p\n",
720			   buf->idx, buf->address);
721	    }
722	    return buf;
723	}
724
725	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
726		   "GetBuffer timed out, resetting engine...\n");
727
728	if (info->ChipFamily < CHIP_FAMILY_R600) {
729	    RADEONEngineReset(pScrn);
730	    RADEONEngineRestore(pScrn);
731	} else
732	    R600EngineReset(pScrn);
733
734	/* Always restart the engine when doing CP 2D acceleration */
735	RADEONCP_RESET(pScrn, info);
736	RADEONCP_START(pScrn, info);
737    }
738}
739
740/* Flush the indirect buffer to the kernel for submission to the card */
741void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
742{
743    RADEONInfoPtr      info   = RADEONPTR(pScrn);
744    drmBufPtr          buffer = info->cp->indirectBuffer;
745    int                start  = info->cp->indirectStart;
746    drm_radeon_indirect_t  indirect;
747
748    assert(!info->cs);
749    if (!buffer) return;
750    if (start == buffer->used && !discard) return;
751
752    if (RADEON_VERBOSE) {
753	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
754		   buffer->idx);
755    }
756
757    if (info->ChipFamily >= CHIP_FAMILY_R600) {
758	if (buffer->used & 0x3c) {
759	    RING_LOCALS;
760
761	    while (buffer->used & 0x3c) {
762		BEGIN_RING(1);
763		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
764		ADVANCE_RING();
765	    }
766	}
767    }
768
769    indirect.idx     = buffer->idx;
770    indirect.start   = start;
771    indirect.end     = buffer->used;
772    indirect.discard = discard;
773
774    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
775			&indirect, sizeof(drm_radeon_indirect_t));
776
777    if (discard) {
778	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
779	info->cp->indirectStart  = 0;
780    } else {
781	/* Start on a double word boundary */
782	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
783	if (RADEON_VERBOSE) {
784	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
785		       info->cp->indirectStart);
786	}
787    }
788}
789
790/* Flush and release the indirect buffer */
791void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
792{
793    RADEONInfoPtr      info   = RADEONPTR(pScrn);
794    drmBufPtr          buffer = info->cp->indirectBuffer;
795    int                start  = info->cp->indirectStart;
796    drm_radeon_indirect_t  indirect;
797
798    assert(!info->cs);
799    if (info->ChipFamily >= CHIP_FAMILY_R600) {
800	if (buffer && (buffer->used & 0x3c)) {
801	    RING_LOCALS;
802
803	    while (buffer->used & 0x3c) {
804		BEGIN_RING(1);
805		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
806		ADVANCE_RING();
807	    }
808	}
809    }
810
811    info->cp->indirectBuffer = NULL;
812    info->cp->indirectStart  = 0;
813
814    if (!buffer) return;
815
816    if (RADEON_VERBOSE) {
817	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
818		   buffer->idx);
819    }
820
821    indirect.idx     = buffer->idx;
822    indirect.start   = start;
823    indirect.end     = buffer->used;
824    indirect.discard = 1;
825
826    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
827			&indirect, sizeof(drm_radeon_indirect_t));
828}
829
830/** \brief Calculate HostDataBlit parameters from pointer and pitch
831 *
832 * This is a helper for the trivial HostDataBlit users that don't need to worry
833 * about tiling etc.
834 */
835void
836RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
837		     uint32_t *dstPitchOff, int *x, int *y)
838{
839    RADEONInfoPtr info = RADEONPTR( pScrn );
840    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
841
842    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
843    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
844    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
845}
846
847/* Set up a hostdata blit to transfer data from system memory to the
848 * framebuffer. Returns the address where the data can be written to and sets
849 * the dstPitch and hpass variables as required.
850 */
851uint8_t*
852RADEONHostDataBlit(
853    ScrnInfoPtr pScrn,
854    unsigned int cpp,
855    unsigned int w,
856    uint32_t dstPitchOff,
857    uint32_t *bufPitch,
858    int x,
859    int *y,
860    unsigned int *h,
861    unsigned int *hpass
862){
863    RADEONInfoPtr info = RADEONPTR( pScrn );
864    uint32_t format, dwords;
865    uint8_t *ret;
866    RING_LOCALS;
867
868    if ( *h == 0 )
869    {
870	return NULL;
871    }
872
873    switch ( cpp )
874    {
875    case 4:
876	format = RADEON_GMC_DST_32BPP;
877	*bufPitch = 4 * w;
878	break;
879    case 2:
880	format = RADEON_GMC_DST_16BPP;
881	*bufPitch = 2 * RADEON_ALIGN(w, 2);
882	break;
883    case 1:
884	format = RADEON_GMC_DST_8BPP_CI;
885	*bufPitch = RADEON_ALIGN(w, 4);
886	break;
887    default:
888	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
889		    "%s: Unsupported cpp %d!\n", __func__, cpp );
890	return NULL;
891    }
892
893#if X_BYTE_ORDER == X_BIG_ENDIAN
894    /* Swap doesn't work on R300 and later, it's handled during the
895     * copy to ind. buffer pass
896     */
897    if (info->ChipFamily < CHIP_FAMILY_R300) {
898        BEGIN_RING(2);
899	if (cpp == 2)
900	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
901			 RADEON_HOST_DATA_SWAP_HDW);
902	else if (cpp == 1)
903	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
904			 RADEON_HOST_DATA_SWAP_32BIT);
905	else
906	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
907			 RADEON_HOST_DATA_SWAP_NONE);
908	ADVANCE_RING();
909    }
910#endif
911
912    /*RADEON_PURGE_CACHE();
913      RADEON_WAIT_UNTIL_IDLE();*/
914
915    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
916    dwords = *hpass * *bufPitch / 4;
917
918    BEGIN_RING( dwords + 10 );
919    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
920    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
921	    | RADEON_GMC_DST_CLIPPING
922	    | RADEON_GMC_BRUSH_NONE
923	    | format
924	    | RADEON_GMC_SRC_DATATYPE_COLOR
925	    | RADEON_ROP3_S
926	    | RADEON_DP_SRC_SOURCE_HOST_DATA
927	    | RADEON_GMC_CLR_CMP_CNTL_DIS
928	    | RADEON_GMC_WR_MSK_DIS );
929    OUT_RING( dstPitchOff );
930    OUT_RING( (*y << 16) | x );
931    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
932    OUT_RING( 0xffffffff );
933    OUT_RING( 0xffffffff );
934    OUT_RING( *y << 16 | x );
935    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
936    OUT_RING( dwords );
937
938    ret = ( uint8_t* )&__head[__count];
939
940    __count += dwords;
941    ADVANCE_RING();
942
943    *y += *hpass;
944    *h -= *hpass;
945
946    return ret;
947}
948
949void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
950{
951    switch(swap) {
952    case RADEON_HOST_DATA_SWAP_HDW:
953        {
954	    unsigned int *d = (unsigned int *)dst;
955	    unsigned int *s = (unsigned int *)src;
956	    unsigned int nwords = size >> 2;
957
958	    for (; nwords > 0; --nwords, ++d, ++s)
959		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
960	    return;
961        }
962    case RADEON_HOST_DATA_SWAP_32BIT:
963	if (((uintptr_t)dst & 1) || ((uintptr_t)src & 1)) {
964	    uint8_t *d = (uint8_t *)dst;
965	    uint8_t *s = (uint8_t *)src;
966	    unsigned int nwords = size >> 2;
967
968	    for (; nwords > 0; --nwords, d+=4, s+=4) {
969	        d[0] = s[3];
970		d[1] = s[2];
971		d[2] = s[1];
972		d[3] = s[0];
973	    }
974	    return;
975        } else if (((uintptr_t)dst & 3) || ((uintptr_t)src & 3)) {
976	    /* copy 16bit wise */
977	    uint16_t *d = (uint16_t *)dst;
978	    uint16_t *s = (uint16_t *)src;
979	    unsigned int nwords = size >> 2;
980
981	    for (; nwords > 0; --nwords, d+=2, s+=2) {
982	        d[0] = ((s[1] >> 8) & 0xff) | ((s[1] & 0xff) << 8);
983	        d[1] = ((s[0] >> 8) & 0xff) | ((s[0] & 0xff) << 8);
984	    }
985	    return;
986	} else {
987	    unsigned int *d = (unsigned int *)dst;
988	    unsigned int *s = (unsigned int *)src;
989	    unsigned int nwords = size >> 2;
990
991	    for (; nwords > 0; --nwords, ++d, ++s)
992#ifdef __powerpc__
993		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
994#else
995		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
996			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
997#endif
998	    return;
999        }
1000    case RADEON_HOST_DATA_SWAP_16BIT:
1001        {
1002	    unsigned short *d = (unsigned short *)dst;
1003	    unsigned short *s = (unsigned short *)src;
1004	    unsigned int nwords = size >> 1;
1005
1006	    for (; nwords > 0; --nwords, ++d, ++s)
1007#ifdef __powerpc__
1008		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
1009#else
1010	        *d = (*s >> 8) | (*s << 8);
1011#endif
1012	    return;
1013	}
1014    }
1015    if (src != dst)
1016	memcpy(dst, src, size);
1017}
1018
1019/* Copies a single pass worth of data for a hostdata blit set up by
1020 * RADEONHostDataBlit().
1021 */
1022void
1023RADEONHostDataBlitCopyPass(
1024    ScrnInfoPtr pScrn,
1025    unsigned int cpp,
1026    uint8_t *dst,
1027    uint8_t *src,
1028    unsigned int hpass,
1029    unsigned int dstPitch,
1030    unsigned int srcPitch
1031){
1032
1033#if X_BYTE_ORDER == X_BIG_ENDIAN
1034    RADEONInfoPtr info = RADEONPTR( pScrn );
1035#endif
1036
1037    /* RADEONHostDataBlitCopy can return NULL ! */
1038    if( (dst==NULL) || (src==NULL)) return;
1039
1040    if ( dstPitch == srcPitch )
1041    {
1042#if X_BYTE_ORDER == X_BIG_ENDIAN
1043        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1044	    switch(cpp) {
1045	    case 1:
1046		RADEONCopySwap(dst, src, hpass * dstPitch,
1047			       RADEON_HOST_DATA_SWAP_32BIT);
1048		return;
1049	    case 2:
1050	        RADEONCopySwap(dst, src, hpass * dstPitch,
1051			       RADEON_HOST_DATA_SWAP_HDW);
1052		return;
1053	    }
1054	}
1055#endif
1056	memcpy( dst, src, hpass * dstPitch );
1057    }
1058    else
1059    {
1060	unsigned int minPitch = min( dstPitch, srcPitch );
1061	while ( hpass-- )
1062	{
1063#if X_BYTE_ORDER == X_BIG_ENDIAN
1064            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1065		switch(cpp) {
1066		case 1:
1067		    RADEONCopySwap(dst, src, minPitch,
1068				   RADEON_HOST_DATA_SWAP_32BIT);
1069		    goto next;
1070		case 2:
1071	            RADEONCopySwap(dst, src, minPitch,
1072				   RADEON_HOST_DATA_SWAP_HDW);
1073		    goto next;
1074		}
1075	    }
1076#endif
1077	    memcpy( dst, src, minPitch );
1078#if X_BYTE_ORDER == X_BIG_ENDIAN
1079	next:
1080#endif
1081	    src += srcPitch;
1082	    dst += dstPitch;
1083	}
1084    }
1085}
1086
1087#endif
1088
1089Bool RADEONAccelInit(ScreenPtr pScreen)
1090{
1091    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1092    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1093
1094#ifdef USE_EXA
1095    if (info->useEXA) {
1096# ifdef XF86DRI
1097	if (info->directRenderingEnabled) {
1098#ifdef XF86DRM_MODE
1099	    if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
1100		if (!EVERGREENDrawInit(pScreen))
1101		    return FALSE;
1102	    } else
1103#endif
1104	      if (info->ChipFamily >= CHIP_FAMILY_R600) {
1105		if (!R600DrawInit(pScreen))
1106		    return FALSE;
1107	    } else {
1108		if (!RADEONDrawInitCP(pScreen))
1109		    return FALSE;
1110	    }
1111	} else
1112# endif /* XF86DRI */
1113	{
1114	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1115		return FALSE;
1116	    else {
1117		if (!RADEONDrawInitMMIO(pScreen))
1118		    return FALSE;
1119	    }
1120	}
1121    }
1122#endif /* USE_EXA */
1123#ifdef USE_XAA
1124    if (!info->useEXA) {
1125	XAAInfoRecPtr  a;
1126
1127	if (info->ChipFamily >= CHIP_FAMILY_R600)
1128	    return FALSE;
1129
1130	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1131	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1132	    return FALSE;
1133	}
1134
1135#ifdef XF86DRI
1136	if (info->directRenderingEnabled)
1137	    RADEONAccelInitCP(pScreen, a);
1138	else
1139#endif /* XF86DRI */
1140	    RADEONAccelInitMMIO(pScreen, a);
1141
1142	RADEONEngineInit(pScrn);
1143
1144	if (!XAAInit(pScreen, a)) {
1145	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1146	    return FALSE;
1147	}
1148    }
1149#endif /* USE_XAA */
1150    return TRUE;
1151}
1152
1153void RADEONInit3DEngine(ScrnInfoPtr pScrn)
1154{
1155    RADEONInfoPtr info = RADEONPTR (pScrn);
1156
1157#ifdef XF86DRI
1158    if (info->directRenderingEnabled) {
1159	drm_radeon_sarea_t *pSAREAPriv;
1160
1161	if (!info->kms_enabled) {
1162	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1163	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1164	}
1165	RADEONInit3DEngineCP(pScrn);
1166    } else
1167#endif
1168	RADEONInit3DEngineMMIO(pScrn);
1169
1170    info->accel_state->XInited3D = TRUE;
1171}
1172
1173#ifdef USE_XAA
1174#ifdef XF86DRI
1175Bool
1176RADEONSetupMemXAA_DRI(ScreenPtr pScreen)
1177{
1178    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1179    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1180    int            cpp = info->CurrentLayout.pixel_bytes;
1181    int            depthCpp = (info->dri->depthBits - 8) / 4;
1182    int            width_bytes = pScrn->displayWidth * cpp;
1183    int            bufferSize;
1184    int            depthSize;
1185    int            l;
1186    int            scanlines;
1187    int            texsizerequest;
1188    BoxRec         MemBox;
1189    FBAreaPtr      fbarea;
1190
1191    info->dri->frontOffset = 0;
1192    info->dri->frontPitch = pScrn->displayWidth;
1193    info->dri->backPitch = pScrn->displayWidth;
1194
1195    /* make sure we use 16 line alignment for tiling (8 might be enough).
1196     * Might need that for non-XF86DRI too?
1197     */
1198    if (info->allowColorTiling) {
1199	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1200		      RADEON_GPU_PAGE_SIZE);
1201    } else {
1202        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1203		      RADEON_GPU_PAGE_SIZE);
1204    }
1205
1206    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1207     * which is always the case if color tiling is used due to color pitch
1208     * but not necessarily otherwise, and its height a multiple of 16 lines.
1209     */
1210    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1211    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1212		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1213
1214    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1215	       "Using %d MB GART aperture\n", info->dri->gartSize);
1216    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1217	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1218    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1219	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1220    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1221	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1222
1223    /* Try for front, back, depth, and three framebuffers worth of
1224     * pixmap cache.  Should be enough for a fullscreen background
1225     * image plus some leftovers.
1226     * If the FBTexPercent option was used, try to achieve that percentage instead,
1227     * but still have at least one pixmap buffer (get problems with xvideo/render
1228     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1229     * probably useless for XAA.
1230     */
1231    if (info->dri->textureSize >= 0) {
1232	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1233			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1234	/* first divide, then multiply or we'll get an overflow (been there...) */
1235			 / 100 * info->dri->textureSize;
1236    }
1237    else {
1238	texsizerequest = (int)info->FbMapSize / 2;
1239    }
1240    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1241
1242    /* If that gives us less than the requested memory, let's
1243     * be greedy and grab some more.  Sorry, I care more about 3D
1244     * performance than playing nicely, and you'll get around a full
1245     * framebuffer's worth of pixmap cache anyway.
1246     */
1247    if (info->dri->textureSize < texsizerequest) {
1248        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1249    }
1250    if (info->dri->textureSize < texsizerequest) {
1251        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1252    }
1253
1254    /* If there's still no space for textures, try without pixmap cache, but
1255     * never use the reserved space, the space hw cursor and PCIGART table might
1256     * use.
1257     */
1258    if (info->dri->textureSize < 0) {
1259	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1260	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1261    }
1262
1263    /* Check to see if there is more room available after the 8192nd
1264     * scanline for textures
1265     */
1266    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1267    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1268	> info->dri->textureSize) {
1269	info->dri->textureSize =
1270		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1271    }
1272
1273    /* If backbuffer is disabled, don't allocate memory for it */
1274    if (info->dri->noBackBuffer) {
1275	info->dri->textureSize += bufferSize;
1276    }
1277
1278    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1279       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1280       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1281       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1282       area otherwise).
1283       This might cause some space at the end of the video memory to be unused, since it
1284       can't be used (?) due to that log_tex_granularity thing???
1285       Could use different copyscreentoscreen function for the pageflip copies
1286       (which would use different src and dst offsets) to avoid this. */
1287    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1288	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1289			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1290    }
1291    if (info->dri->textureSize > 0) {
1292	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1293	if (l < RADEON_LOG_TEX_GRANULARITY)
1294	    l = RADEON_LOG_TEX_GRANULARITY;
1295	/* Round the texture size up to the nearest whole number of
1296	 * texture regions.  Again, be greedy about this, don't
1297	 * round down.
1298	 */
1299	info->dri->log2TexGran = l;
1300	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1301    } else {
1302	info->dri->textureSize = 0;
1303    }
1304
1305    /* Set a minimum usable local texture heap size.  This will fit
1306     * two 256x256x32bpp textures.
1307     */
1308    if (info->dri->textureSize < 512 * 1024) {
1309	info->dri->textureOffset = 0;
1310	info->dri->textureSize = 0;
1311    }
1312
1313    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1314	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1315				    (width_bytes * 16)) * (width_bytes * 16);
1316    }
1317    else {
1318	/* Reserve space for textures */
1319	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1320				     RADEON_GPU_PAGE_SIZE);
1321    }
1322
1323    /* Reserve space for the shared depth
1324     * buffer.
1325     */
1326    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1327			       RADEON_GPU_PAGE_SIZE);
1328
1329    /* Reserve space for the shared back buffer */
1330    if (info->dri->noBackBuffer) {
1331       info->dri->backOffset = info->dri->depthOffset;
1332    } else {
1333       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1334				 RADEON_GPU_PAGE_SIZE);
1335    }
1336
1337    info->dri->backY = info->dri->backOffset / width_bytes;
1338    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1339
1340    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1341    if (scanlines > 8191)
1342	scanlines = 8191;
1343
1344    MemBox.x1 = 0;
1345    MemBox.y1 = 0;
1346    MemBox.x2 = pScrn->displayWidth;
1347    MemBox.y2 = scanlines;
1348
1349    if (!xf86InitFBManager(pScreen, &MemBox)) {
1350        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1351		   "Memory manager initialization to "
1352		   "(%d,%d) (%d,%d) failed\n",
1353		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1354	return FALSE;
1355    } else {
1356	int  width, height;
1357
1358	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1359		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1360		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1361	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1362	   aligned... sigh */
1363	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1364						pScrn->displayWidth,
1365						info->allowColorTiling ?
1366						(RADEON_ALIGN(pScrn->virtualY, 16))
1367						- pScrn->virtualY + 2 : 2,
1368						0, NULL, NULL,
1369						NULL))) {
1370	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1371		       "Reserved area from (%d,%d) to (%d,%d)\n",
1372		       fbarea->box.x1, fbarea->box.y1,
1373		       fbarea->box.x2, fbarea->box.y2);
1374	} else {
1375	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1376	}
1377
1378	RADEONDRIAllocatePCIGARTTable(pScreen);
1379
1380	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1381					  &height, 0, 0, 0)) {
1382	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1383		       "Largest offscreen area available: %d x %d\n",
1384		       width, height);
1385
1386	    /* Lines in offscreen area needed for depth buffer and
1387	     * textures
1388	     */
1389	    info->dri->depthTexLines = (scanlines
1390					- info->dri->depthOffset / width_bytes);
1391	    info->dri->backLines	    = (scanlines
1392					       - info->dri->backOffset / width_bytes
1393					       - info->dri->depthTexLines);
1394	    info->dri->backArea	    = NULL;
1395	} else {
1396	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1397		       "Unable to determine largest offscreen area "
1398		       "available\n");
1399	    return FALSE;
1400	}
1401    }
1402
1403    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1404	       "Will use front buffer at offset 0x%x\n",
1405	       info->dri->frontOffset);
1406
1407    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1408	       "Will use back buffer at offset 0x%x\n",
1409	       info->dri->backOffset);
1410    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1411	       "Will use depth buffer at offset 0x%x\n",
1412	       info->dri->depthOffset);
1413    if (info->cardType==CARD_PCIE)
1414    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1415	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1416		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1417    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1418	       "Will use %d kb for textures at offset 0x%x\n",
1419	       info->dri->textureSize/1024, info->dri->textureOffset);
1420
1421    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1422				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1423
1424    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1425				  ((info->dri->backOffset + info->fbLocation) >> 10));
1426
1427    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1428				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1429    return TRUE;
1430}
1431#endif /* XF86DRI */
1432
1433Bool
1434RADEONSetupMemXAA(ScreenPtr pScreen)
1435{
1436    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1437    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1438    BoxRec         MemBox;
1439    int            y2;
1440
1441    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1442
1443    MemBox.x1 = 0;
1444    MemBox.y1 = 0;
1445    MemBox.x2 = pScrn->displayWidth;
1446    y2 = info->FbMapSize / width_bytes;
1447    if (y2 >= 32768)
1448	y2 = 32767; /* because MemBox.y2 is signed short */
1449    MemBox.y2 = y2;
1450
1451    /* The acceleration engine uses 14 bit
1452     * signed coordinates, so we can't have any
1453     * drawable caches beyond this region.
1454     */
1455    if (MemBox.y2 > 8191)
1456	MemBox.y2 = 8191;
1457
1458    if (!xf86InitFBManager(pScreen, &MemBox)) {
1459	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1460		   "Memory manager initialization to "
1461		   "(%d,%d) (%d,%d) failed\n",
1462		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1463	return FALSE;
1464    } else {
1465	int       width, height;
1466	FBAreaPtr fbarea;
1467
1468	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1469		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1470		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1471	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1472						pScrn->displayWidth,
1473						info->allowColorTiling ?
1474						(RADEON_ALIGN(pScrn->virtualY, 16))
1475						- pScrn->virtualY + 2 : 2,
1476						0, NULL, NULL,
1477						NULL))) {
1478	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1479		       "Reserved area from (%d,%d) to (%d,%d)\n",
1480		       fbarea->box.x1, fbarea->box.y1,
1481		       fbarea->box.x2, fbarea->box.y2);
1482	} else {
1483	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1484	}
1485	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1486					      0, 0, 0)) {
1487	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1488		       "Largest offscreen area available: %d x %d\n",
1489		       width, height);
1490	}
1491	return TRUE;
1492    }
1493}
1494#endif /* USE_XAA */
1495