radeon_accel.c revision b7e1c893
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3209ff23fSmrg *                VA Linux Systems Inc., Fremont, California.
4209ff23fSmrg *
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining
8209ff23fSmrg * a copy of this software and associated documentation files (the
9209ff23fSmrg * "Software"), to deal in the Software without restriction, including
10209ff23fSmrg * without limitation on the rights to use, copy, modify, merge,
11209ff23fSmrg * publish, distribute, sublicense, and/or sell copies of the Software,
12209ff23fSmrg * and to permit persons to whom the Software is furnished to do so,
13209ff23fSmrg * subject to the following conditions:
14209ff23fSmrg *
15209ff23fSmrg * The above copyright notice and this permission notice (including the
16209ff23fSmrg * next paragraph) shall be included in all copies or substantial
17209ff23fSmrg * portions of the Software.
18209ff23fSmrg *
19209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20209ff23fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21209ff23fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22209ff23fSmrg * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23209ff23fSmrg * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24209ff23fSmrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26209ff23fSmrg * DEALINGS IN THE SOFTWARE.
27209ff23fSmrg */
28209ff23fSmrg
29209ff23fSmrg#ifdef HAVE_CONFIG_H
30209ff23fSmrg#include "config.h"
31209ff23fSmrg#endif
32209ff23fSmrg
33209ff23fSmrg/*
34209ff23fSmrg * Authors:
35209ff23fSmrg *   Kevin E. Martin <martin@xfree86.org>
36209ff23fSmrg *   Rickard E. Faith <faith@valinux.com>
37209ff23fSmrg *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38209ff23fSmrg *
39209ff23fSmrg * Credits:
40209ff23fSmrg *
41209ff23fSmrg *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42209ff23fSmrg *   code to his Radeon driver.  Portions of this file are based on the
43209ff23fSmrg *   initialization code for that driver.
44209ff23fSmrg *
45209ff23fSmrg * References:
46209ff23fSmrg *
47209ff23fSmrg * !!!! FIXME !!!!
48209ff23fSmrg *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49209ff23fSmrg *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50209ff23fSmrg *   1999.
51209ff23fSmrg *
52209ff23fSmrg *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53209ff23fSmrg *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54209ff23fSmrg *
55209ff23fSmrg * Notes on unimplemented XAA optimizations:
56209ff23fSmrg *
57209ff23fSmrg *   SetClipping:   This has been removed as XAA expects 16bit registers
58209ff23fSmrg *                  for full clipping.
59209ff23fSmrg *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60209ff23fSmrg *   DashedLine with non-power-of-two pattern length: Apparently, there is
61209ff23fSmrg *                  no way to set the length of the pattern -- it is always
62209ff23fSmrg *                  assumed to be 8 or 32 (or 1024?).
63209ff23fSmrg *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64209ff23fSmrg *                  Manual where it states that monochrome expansion of frame
65209ff23fSmrg *                  buffer data is not supported.
66209ff23fSmrg *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67209ff23fSmrg *                  direct/indirect method.  If we had more data registers,
68209ff23fSmrg *                  then we could do better.  If XAA supported a trigger write
69209ff23fSmrg *                  address, the code would be simpler.
70209ff23fSmrg *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71209ff23fSmrg *                  pattern from frame buffer memory.
72209ff23fSmrg *   ImageWrites:   Same as CPUToScreenColorExpandFill
73209ff23fSmrg *
74209ff23fSmrg */
75209ff23fSmrg
76209ff23fSmrg#include <errno.h>
77209ff23fSmrg#include <string.h>
78209ff23fSmrg				/* Driver data structures */
79209ff23fSmrg#include "radeon.h"
80209ff23fSmrg#include "radeon_reg.h"
81b7e1c893Smrg#include "r600_reg.h"
82209ff23fSmrg#include "radeon_macros.h"
83209ff23fSmrg#include "radeon_probe.h"
84209ff23fSmrg#include "radeon_version.h"
85209ff23fSmrg#ifdef XF86DRI
86209ff23fSmrg#define _XF86DRI_SERVER_
87b7e1c893Smrg#include "radeon_drm.h"
88209ff23fSmrg#endif
89209ff23fSmrg
90209ff23fSmrg				/* Line support */
91209ff23fSmrg#include "miline.h"
92209ff23fSmrg
93209ff23fSmrg				/* X and server generic header files */
94209ff23fSmrg#include "xf86.h"
95209ff23fSmrg
96b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn);
97209ff23fSmrg
98209ff23fSmrg#ifdef USE_XAA
99209ff23fSmrgstatic struct {
100209ff23fSmrg    int rop;
101209ff23fSmrg    int pattern;
102209ff23fSmrg} RADEON_ROP[] = {
103209ff23fSmrg    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
104209ff23fSmrg    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
105209ff23fSmrg    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
106209ff23fSmrg    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
107209ff23fSmrg    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
108209ff23fSmrg    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
109209ff23fSmrg    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
110209ff23fSmrg    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
111209ff23fSmrg    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
112209ff23fSmrg    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
113209ff23fSmrg    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
114209ff23fSmrg    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
115209ff23fSmrg    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
116209ff23fSmrg    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
117209ff23fSmrg    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
118209ff23fSmrg    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
119209ff23fSmrg};
120209ff23fSmrg#endif
121209ff23fSmrg
122209ff23fSmrg/* The FIFO has 64 slots.  This routines waits until at least `entries'
123209ff23fSmrg * of these slots are empty.
124209ff23fSmrg */
125209ff23fSmrgvoid RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
126209ff23fSmrg{
127209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
128209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
129209ff23fSmrg    int            i;
130209ff23fSmrg
131209ff23fSmrg    for (;;) {
132209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
133b7e1c893Smrg	    info->accel_state->fifo_slots =
134209ff23fSmrg		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
135b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
136209ff23fSmrg	}
137209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
138209ff23fSmrg		       "FIFO timed out: %u entries, stat=0x%08x\n",
139209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
140209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS));
141209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
142209ff23fSmrg		   "FIFO timed out, resetting engine...\n");
143209ff23fSmrg	RADEONEngineReset(pScrn);
144209ff23fSmrg	RADEONEngineRestore(pScrn);
145209ff23fSmrg#ifdef XF86DRI
146209ff23fSmrg	if (info->directRenderingEnabled) {
147209ff23fSmrg	    RADEONCP_RESET(pScrn, info);
148209ff23fSmrg	    RADEONCP_START(pScrn, info);
149209ff23fSmrg	}
150209ff23fSmrg#endif
151209ff23fSmrg    }
152209ff23fSmrg}
153209ff23fSmrg
154b7e1c893Smrgvoid R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
155b7e1c893Smrg{
156b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
157b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
158b7e1c893Smrg    int            i;
159b7e1c893Smrg
160b7e1c893Smrg    for (;;) {
161b7e1c893Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
162b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
163b7e1c893Smrg		info->accel_state->fifo_slots =
164b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
165b7e1c893Smrg	    else
166b7e1c893Smrg		info->accel_state->fifo_slots =
167b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
168b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
169b7e1c893Smrg	}
170b7e1c893Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
171b7e1c893Smrg		       "FIFO timed out: stat=0x%08x\n",
172b7e1c893Smrg		       (unsigned int)INREG(R600_GRBM_STATUS));
173b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
174b7e1c893Smrg		   "FIFO timed out, resetting engine...\n");
175b7e1c893Smrg	R600EngineReset(pScrn);
176b7e1c893Smrg#ifdef XF86DRI
177b7e1c893Smrg	if (info->directRenderingEnabled) {
178b7e1c893Smrg	    RADEONCP_RESET(pScrn, info);
179b7e1c893Smrg	    RADEONCP_START(pScrn, info);
180b7e1c893Smrg	}
181b7e1c893Smrg#endif
182b7e1c893Smrg    }
183b7e1c893Smrg}
184b7e1c893Smrg
185209ff23fSmrg/* Flush all dirty data in the Pixel Cache to memory */
186209ff23fSmrgvoid RADEONEngineFlush(ScrnInfoPtr pScrn)
187209ff23fSmrg{
188209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
189209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
190209ff23fSmrg    int            i;
191209ff23fSmrg
192209ff23fSmrg    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
193209ff23fSmrg	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
194209ff23fSmrg		RADEON_RB3D_DC_FLUSH_ALL,
195209ff23fSmrg		~RADEON_RB3D_DC_FLUSH_ALL);
196209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
197209ff23fSmrg	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
198209ff23fSmrg		break;
199209ff23fSmrg	}
200209ff23fSmrg	if (i == RADEON_TIMEOUT) {
201209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
202209ff23fSmrg			   "DC flush timeout: %x\n",
203209ff23fSmrg			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
204209ff23fSmrg	}
205209ff23fSmrg    } else {
206209ff23fSmrg	OUTREGP(R300_DSTCACHE_CTLSTAT,
207209ff23fSmrg		R300_RB2D_DC_FLUSH_ALL,
208209ff23fSmrg		~R300_RB2D_DC_FLUSH_ALL);
209209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
210209ff23fSmrg	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
211209ff23fSmrg		break;
212209ff23fSmrg	}
213209ff23fSmrg	if (i == RADEON_TIMEOUT) {
214209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
215209ff23fSmrg			   "DC flush timeout: %x\n",
216209ff23fSmrg			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
217209ff23fSmrg	}
218209ff23fSmrg    }
219209ff23fSmrg}
220209ff23fSmrg
221209ff23fSmrg/* Reset graphics card to known state */
222209ff23fSmrgvoid RADEONEngineReset(ScrnInfoPtr pScrn)
223209ff23fSmrg{
224209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
225209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
226209ff23fSmrg    uint32_t       clock_cntl_index;
227209ff23fSmrg    uint32_t       mclk_cntl;
228209ff23fSmrg    uint32_t       rbbm_soft_reset;
229209ff23fSmrg    uint32_t       host_path_cntl;
230209ff23fSmrg
231209ff23fSmrg    /* The following RBBM_SOFT_RESET sequence can help un-wedge
232209ff23fSmrg     * an R300 after the command processor got stuck.
233209ff23fSmrg     */
234209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
235209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
236209ff23fSmrg                                   RADEON_SOFT_RESET_CP |
237209ff23fSmrg                                   RADEON_SOFT_RESET_HI |
238209ff23fSmrg                                   RADEON_SOFT_RESET_SE |
239209ff23fSmrg                                   RADEON_SOFT_RESET_RE |
240209ff23fSmrg                                   RADEON_SOFT_RESET_PP |
241209ff23fSmrg                                   RADEON_SOFT_RESET_E2 |
242209ff23fSmrg                                   RADEON_SOFT_RESET_RB));
243209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
244209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
245209ff23fSmrg                                   ~(RADEON_SOFT_RESET_CP |
246209ff23fSmrg                                     RADEON_SOFT_RESET_HI |
247209ff23fSmrg                                     RADEON_SOFT_RESET_SE |
248209ff23fSmrg                                     RADEON_SOFT_RESET_RE |
249209ff23fSmrg                                     RADEON_SOFT_RESET_PP |
250209ff23fSmrg                                     RADEON_SOFT_RESET_E2 |
251209ff23fSmrg                                     RADEON_SOFT_RESET_RB)));
252209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
253209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
254209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
255209ff23fSmrg
256209ff23fSmrg    RADEONEngineFlush(pScrn);
257209ff23fSmrg
258209ff23fSmrg    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
259209ff23fSmrg    RADEONPllErrataAfterIndex(info);
260209ff23fSmrg
261209ff23fSmrg#if 0 /* taken care of by new PM code */
262209ff23fSmrg    /* Some ASICs have bugs with dynamic-on feature, which are
263209ff23fSmrg     * ASIC-version dependent, so we force all blocks on for now
264209ff23fSmrg     */
265209ff23fSmrg    if (info->HasCRTC2) {
266209ff23fSmrg	uint32_t tmp;
267209ff23fSmrg
268209ff23fSmrg	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
269209ff23fSmrg	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
270209ff23fSmrg				  RADEON_CP_MAX_DYN_STOP_LAT |
271209ff23fSmrg				  RADEON_SCLK_FORCEON_MASK));
272209ff23fSmrg
273209ff23fSmrg	if (info->ChipFamily == CHIP_FAMILY_RV200) {
274209ff23fSmrg	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
275209ff23fSmrg	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
276209ff23fSmrg	}
277209ff23fSmrg    }
278209ff23fSmrg#endif /* new PM code */
279209ff23fSmrg
280209ff23fSmrg    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
281209ff23fSmrg
282209ff23fSmrg#if 0 /* handled by new PM code */
283209ff23fSmrg    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
284209ff23fSmrg			      RADEON_FORCEON_MCLKA |
285209ff23fSmrg			      RADEON_FORCEON_MCLKB |
286209ff23fSmrg			      RADEON_FORCEON_YCLKA |
287209ff23fSmrg			      RADEON_FORCEON_YCLKB |
288209ff23fSmrg			      RADEON_FORCEON_MC |
289209ff23fSmrg			      RADEON_FORCEON_AIC));
290209ff23fSmrg#endif /* new PM code */
291209ff23fSmrg
292209ff23fSmrg    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
293209ff23fSmrg     * unexpected behaviour on some machines.  Here we use
294209ff23fSmrg     * RADEON_HOST_PATH_CNTL to reset it.
295209ff23fSmrg     */
296209ff23fSmrg    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
297209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
298209ff23fSmrg
299209ff23fSmrg    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
300209ff23fSmrg	uint32_t tmp;
301209ff23fSmrg
302209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
303209ff23fSmrg					RADEON_SOFT_RESET_CP |
304209ff23fSmrg					RADEON_SOFT_RESET_HI |
305209ff23fSmrg					RADEON_SOFT_RESET_E2));
306209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
307209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
308209ff23fSmrg	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
309209ff23fSmrg	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
310209ff23fSmrg    } else {
311209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
312209ff23fSmrg					RADEON_SOFT_RESET_CP |
313209ff23fSmrg					RADEON_SOFT_RESET_SE |
314209ff23fSmrg					RADEON_SOFT_RESET_RE |
315209ff23fSmrg					RADEON_SOFT_RESET_PP |
316209ff23fSmrg					RADEON_SOFT_RESET_E2 |
317209ff23fSmrg					RADEON_SOFT_RESET_RB));
318209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
319209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
320209ff23fSmrg					~(RADEON_SOFT_RESET_CP |
321209ff23fSmrg					  RADEON_SOFT_RESET_SE |
322209ff23fSmrg					  RADEON_SOFT_RESET_RE |
323209ff23fSmrg					  RADEON_SOFT_RESET_PP |
324209ff23fSmrg					  RADEON_SOFT_RESET_E2 |
325209ff23fSmrg					  RADEON_SOFT_RESET_RB)));
326209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
327209ff23fSmrg    }
328209ff23fSmrg
329209ff23fSmrg    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
330209ff23fSmrg    INREG(RADEON_HOST_PATH_CNTL);
331209ff23fSmrg    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
332209ff23fSmrg
333209ff23fSmrg    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
334209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
335209ff23fSmrg
336209ff23fSmrg    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
337209ff23fSmrg    RADEONPllErrataAfterIndex(info);
338209ff23fSmrg    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
339209ff23fSmrg}
340209ff23fSmrg
341b7e1c893Smrg/* Reset graphics card to known state */
342b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn)
343b7e1c893Smrg{
344b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
345b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
346b7e1c893Smrg    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
347b7e1c893Smrg
348b7e1c893Smrg    cp_ptr = INREG(R600_CP_RB_WPTR);
349b7e1c893Smrg
350b7e1c893Smrg    cp_me_cntl = INREG(R600_CP_ME_CNTL);
351b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, 0x10000000);
352b7e1c893Smrg
353b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
354b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
355b7e1c893Smrg    usleep (50);
356b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0);
357b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
358b7e1c893Smrg
359b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
360b7e1c893Smrg    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
361b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, 0x80000000);
362b7e1c893Smrg
363b7e1c893Smrg    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
364b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR, cp_ptr);
365b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
366b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
367b7e1c893Smrg
368b7e1c893Smrg}
369b7e1c893Smrg
370209ff23fSmrg/* Restore the acceleration hardware to its previous state */
371209ff23fSmrgvoid RADEONEngineRestore(ScrnInfoPtr pScrn)
372209ff23fSmrg{
373209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
374209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
375209ff23fSmrg
376209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
377209ff23fSmrg		   "EngineRestore (%d/%d)\n",
378209ff23fSmrg		   info->CurrentLayout.pixel_code,
379209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
380209ff23fSmrg
381209ff23fSmrg    /* Setup engine location. This shouldn't be necessary since we
382209ff23fSmrg     * set them appropriately before any accel ops, but let's avoid
383209ff23fSmrg     * random bogus DMA in case we inadvertently trigger the engine
384209ff23fSmrg     * in the wrong place (happened).
385209ff23fSmrg     */
386209ff23fSmrg    RADEONWaitForFifo(pScrn, 2);
387b7e1c893Smrg    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
388b7e1c893Smrg    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
389209ff23fSmrg
390209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
391209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
392209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE,
393209ff23fSmrg	    RADEON_HOST_BIG_ENDIAN_EN,
394209ff23fSmrg	    ~RADEON_HOST_BIG_ENDIAN_EN);
395209ff23fSmrg#else
396209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
397209ff23fSmrg#endif
398209ff23fSmrg
399209ff23fSmrg    /* Restore SURFACE_CNTL */
400209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
401209ff23fSmrg
402209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
403209ff23fSmrg    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
404209ff23fSmrg					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
405209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
406b7e1c893Smrg    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
407209ff23fSmrg				       | RADEON_GMC_BRUSH_SOLID_COLOR
408209ff23fSmrg				       | RADEON_GMC_SRC_DATATYPE_COLOR));
409209ff23fSmrg
410209ff23fSmrg    RADEONWaitForFifo(pScrn, 5);
411209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
412209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
413209ff23fSmrg    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
414209ff23fSmrg    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
415209ff23fSmrg    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
416209ff23fSmrg
417209ff23fSmrg    RADEONWaitForIdleMMIO(pScrn);
418209ff23fSmrg
419b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
420209ff23fSmrg}
421209ff23fSmrg
422209ff23fSmrg/* Initialize the acceleration hardware */
423209ff23fSmrgvoid RADEONEngineInit(ScrnInfoPtr pScrn)
424209ff23fSmrg{
425209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
426209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
427b7e1c893Smrg    int datatype = 0;
428b7e1c893Smrg    info->accel_state->num_gb_pipes = 0;
429209ff23fSmrg
430209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
431209ff23fSmrg		   "EngineInit (%d/%d)\n",
432209ff23fSmrg		   info->CurrentLayout.pixel_code,
433209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
434209ff23fSmrg
435209ff23fSmrg#ifdef XF86DRI
436209ff23fSmrg    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
437b7e1c893Smrg	drm_radeon_getparam_t np;
438209ff23fSmrg	int num_pipes;
439209ff23fSmrg
440209ff23fSmrg	memset(&np, 0, sizeof(np));
441209ff23fSmrg	np.param = RADEON_PARAM_NUM_GB_PIPES;
442209ff23fSmrg	np.value = &num_pipes;
443209ff23fSmrg
444b7e1c893Smrg	if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np,
445209ff23fSmrg				sizeof(np)) < 0) {
446209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
447209ff23fSmrg		       "Failed to determine num pipes from DRM, falling back to "
448209ff23fSmrg		       "manual look-up!\n");
449b7e1c893Smrg	    info->accel_state->num_gb_pipes = 0;
450209ff23fSmrg	} else {
451b7e1c893Smrg	    info->accel_state->num_gb_pipes = num_pipes;
452209ff23fSmrg	}
453209ff23fSmrg    }
454209ff23fSmrg#endif
455209ff23fSmrg
456209ff23fSmrg    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
457209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_R420)  ||
458209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS600) ||
459209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS690) ||
460209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS740) ||
461209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS400) ||
462209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS480) ||
463209ff23fSmrg	IS_R500_3D) {
464b7e1c893Smrg	if (info->accel_state->num_gb_pipes == 0) {
465209ff23fSmrg	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
466209ff23fSmrg
467b7e1c893Smrg	    info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
468209ff23fSmrg	    if (IS_R500_3D)
469209ff23fSmrg		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
470209ff23fSmrg	}
471209ff23fSmrg    } else {
472b7e1c893Smrg	if (info->accel_state->num_gb_pipes == 0) {
473209ff23fSmrg	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
474209ff23fSmrg		(info->ChipFamily == CHIP_FAMILY_R350)) {
475209ff23fSmrg		/* R3xx chips */
476b7e1c893Smrg		info->accel_state->num_gb_pipes = 2;
477209ff23fSmrg	    } else {
478209ff23fSmrg		/* RV3xx chips */
479b7e1c893Smrg		info->accel_state->num_gb_pipes = 1;
480209ff23fSmrg	    }
481209ff23fSmrg	}
482209ff23fSmrg    }
483209ff23fSmrg
484209ff23fSmrg    if (IS_R300_3D || IS_R500_3D)
485209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
486b7e1c893Smrg		   "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
487209ff23fSmrg
488209ff23fSmrg    if (IS_R300_3D || IS_R500_3D) {
489209ff23fSmrg	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16);
490209ff23fSmrg
491b7e1c893Smrg	switch(info->accel_state->num_gb_pipes) {
492209ff23fSmrg	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
493209ff23fSmrg	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
494209ff23fSmrg	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
495209ff23fSmrg	default:
496209ff23fSmrg	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
497209ff23fSmrg	}
498209ff23fSmrg
499209ff23fSmrg	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
500209ff23fSmrg	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
501209ff23fSmrg	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
502209ff23fSmrg	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
503209ff23fSmrg					 R300_DC_AUTOFLUSH_ENABLE |
504209ff23fSmrg					 R300_DC_DC_DISABLE_IGNORE_PE));
505209ff23fSmrg    } else
506209ff23fSmrg	OUTREG(RADEON_RB3D_CNTL, 0);
507209ff23fSmrg
508209ff23fSmrg    RADEONEngineReset(pScrn);
509209ff23fSmrg
510209ff23fSmrg    switch (info->CurrentLayout.pixel_code) {
511b7e1c893Smrg    case 8:  datatype = 2; break;
512b7e1c893Smrg    case 15: datatype = 3; break;
513b7e1c893Smrg    case 16: datatype = 4; break;
514b7e1c893Smrg    case 24: datatype = 5; break;
515b7e1c893Smrg    case 32: datatype = 6; break;
516209ff23fSmrg    default:
517209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
518209ff23fSmrg		       "Unknown depth/bpp = %d/%d (code = %d)\n",
519209ff23fSmrg		       info->CurrentLayout.depth,
520209ff23fSmrg		       info->CurrentLayout.bitsPerPixel,
521209ff23fSmrg		       info->CurrentLayout.pixel_code);
522209ff23fSmrg    }
523209ff23fSmrg
524b7e1c893Smrg    info->accel_state->dp_gui_master_cntl =
525b7e1c893Smrg	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
526209ff23fSmrg	 | RADEON_GMC_CLR_CMP_CNTL_DIS
527209ff23fSmrg	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
528209ff23fSmrg
529209ff23fSmrg    RADEONEngineRestore(pScrn);
530209ff23fSmrg}
531209ff23fSmrg
532209ff23fSmrg
533209ff23fSmrg#define ACCEL_MMIO
534209ff23fSmrg#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
535209ff23fSmrg#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
536209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
537209ff23fSmrg#define FINISH_ACCEL()
538209ff23fSmrg
539209ff23fSmrg#include "radeon_commonfuncs.c"
540209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
541209ff23fSmrg#include "radeon_render.c"
542209ff23fSmrg#endif
543209ff23fSmrg#include "radeon_accelfuncs.c"
544209ff23fSmrg
545209ff23fSmrg#undef ACCEL_MMIO
546209ff23fSmrg#undef ACCEL_PREAMBLE
547209ff23fSmrg#undef BEGIN_ACCEL
548209ff23fSmrg#undef OUT_ACCEL_REG
549209ff23fSmrg#undef FINISH_ACCEL
550209ff23fSmrg
551209ff23fSmrg#ifdef XF86DRI
552209ff23fSmrg
553209ff23fSmrg#define ACCEL_CP
554209ff23fSmrg#define ACCEL_PREAMBLE()						\
555209ff23fSmrg    RING_LOCALS;							\
556209ff23fSmrg    RADEONCP_REFRESH(pScrn, info)
557209ff23fSmrg#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
558209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
559209ff23fSmrg#define FINISH_ACCEL()          ADVANCE_RING()
560209ff23fSmrg
561209ff23fSmrg
562209ff23fSmrg#include "radeon_commonfuncs.c"
563209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
564209ff23fSmrg#include "radeon_render.c"
565209ff23fSmrg#endif
566209ff23fSmrg#include "radeon_accelfuncs.c"
567209ff23fSmrg
568209ff23fSmrg#undef ACCEL_CP
569209ff23fSmrg#undef ACCEL_PREAMBLE
570209ff23fSmrg#undef BEGIN_ACCEL
571209ff23fSmrg#undef OUT_ACCEL_REG
572209ff23fSmrg#undef FINISH_ACCEL
573209ff23fSmrg
574209ff23fSmrg/* Stop the CP */
575209ff23fSmrgint RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
576209ff23fSmrg{
577b7e1c893Smrg    drm_radeon_cp_stop_t  stop;
578209ff23fSmrg    int              ret, i;
579209ff23fSmrg
580209ff23fSmrg    stop.flush = 1;
581209ff23fSmrg    stop.idle  = 1;
582209ff23fSmrg
583b7e1c893Smrg    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
584b7e1c893Smrg			  sizeof(drm_radeon_cp_stop_t));
585209ff23fSmrg
586209ff23fSmrg    if (ret == 0) {
587209ff23fSmrg	return 0;
588209ff23fSmrg    } else if (errno != EBUSY) {
589209ff23fSmrg	return -errno;
590209ff23fSmrg    }
591209ff23fSmrg
592209ff23fSmrg    stop.flush = 0;
593209ff23fSmrg
594209ff23fSmrg    i = 0;
595209ff23fSmrg    do {
596b7e1c893Smrg	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
597b7e1c893Smrg			      sizeof(drm_radeon_cp_stop_t));
598209ff23fSmrg    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
599209ff23fSmrg
600209ff23fSmrg    if (ret == 0) {
601209ff23fSmrg	return 0;
602209ff23fSmrg    } else if (errno != EBUSY) {
603209ff23fSmrg	return -errno;
604209ff23fSmrg    }
605209ff23fSmrg
606209ff23fSmrg    stop.idle = 0;
607209ff23fSmrg
608b7e1c893Smrg    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
609b7e1c893Smrg			&stop, sizeof(drm_radeon_cp_stop_t))) {
610209ff23fSmrg	return -errno;
611209ff23fSmrg    } else {
612209ff23fSmrg	return 0;
613209ff23fSmrg    }
614209ff23fSmrg}
615209ff23fSmrg
616209ff23fSmrg/* Get an indirect buffer for the CP 2D acceleration commands  */
617209ff23fSmrgdrmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
618209ff23fSmrg{
619209ff23fSmrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
620209ff23fSmrg    drmDMAReq      dma;
621209ff23fSmrg    drmBufPtr      buf = NULL;
622209ff23fSmrg    int            indx = 0;
623209ff23fSmrg    int            size = 0;
624209ff23fSmrg    int            i = 0;
625209ff23fSmrg    int            ret;
626209ff23fSmrg
627209ff23fSmrg#if 0
628209ff23fSmrg    /* FIXME: pScrn->pScreen has not been initialized when this is first
629209ff23fSmrg     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
630209ff23fSmrg     * the screen index from pScrn, which is initialized, and then get
631209ff23fSmrg     * the screen from screenInfo.screens[index], but that is a hack.
632209ff23fSmrg     */
633209ff23fSmrg    dma.context = DRIGetContext(pScrn->pScreen);
634209ff23fSmrg#else
635209ff23fSmrg    /* This is the X server's context */
636209ff23fSmrg    dma.context = 0x00000001;
637209ff23fSmrg#endif
638209ff23fSmrg
639209ff23fSmrg    dma.send_count    = 0;
640209ff23fSmrg    dma.send_list     = NULL;
641209ff23fSmrg    dma.send_sizes    = NULL;
642209ff23fSmrg    dma.flags         = 0;
643209ff23fSmrg    dma.request_count = 1;
644209ff23fSmrg    dma.request_size  = RADEON_BUFFER_SIZE;
645209ff23fSmrg    dma.request_list  = &indx;
646209ff23fSmrg    dma.request_sizes = &size;
647209ff23fSmrg    dma.granted_count = 0;
648209ff23fSmrg
649209ff23fSmrg    while (1) {
650209ff23fSmrg	do {
651b7e1c893Smrg	    ret = drmDMA(info->dri->drmFD, &dma);
652209ff23fSmrg	    if (ret && ret != -EBUSY) {
653209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
654209ff23fSmrg			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
655209ff23fSmrg	    }
656209ff23fSmrg	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
657209ff23fSmrg
658209ff23fSmrg	if (ret == 0) {
659b7e1c893Smrg	    buf = &info->dri->buffers->list[indx];
660209ff23fSmrg	    buf->used = 0;
661209ff23fSmrg	    if (RADEON_VERBOSE) {
662209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
663209ff23fSmrg			   "   GetBuffer returning %d %p\n",
664209ff23fSmrg			   buf->idx, buf->address);
665209ff23fSmrg	    }
666209ff23fSmrg	    return buf;
667209ff23fSmrg	}
668209ff23fSmrg
669209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
670209ff23fSmrg		   "GetBuffer timed out, resetting engine...\n");
671b7e1c893Smrg
672b7e1c893Smrg	if (info->ChipFamily < CHIP_FAMILY_R600) {
673b7e1c893Smrg	    RADEONEngineReset(pScrn);
674b7e1c893Smrg	    RADEONEngineRestore(pScrn);
675b7e1c893Smrg	} else
676b7e1c893Smrg	    R600EngineReset(pScrn);
677209ff23fSmrg
678209ff23fSmrg	/* Always restart the engine when doing CP 2D acceleration */
679209ff23fSmrg	RADEONCP_RESET(pScrn, info);
680209ff23fSmrg	RADEONCP_START(pScrn, info);
681209ff23fSmrg    }
682209ff23fSmrg}
683209ff23fSmrg
684209ff23fSmrg/* Flush the indirect buffer to the kernel for submission to the card */
685209ff23fSmrgvoid RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
686209ff23fSmrg{
687209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
688b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
689b7e1c893Smrg    int                start  = info->cp->indirectStart;
690b7e1c893Smrg    drm_radeon_indirect_t  indirect;
691209ff23fSmrg
692209ff23fSmrg    if (!buffer) return;
693209ff23fSmrg    if (start == buffer->used && !discard) return;
694209ff23fSmrg
695209ff23fSmrg    if (RADEON_VERBOSE) {
696209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
697209ff23fSmrg		   buffer->idx);
698209ff23fSmrg    }
699209ff23fSmrg
700b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
701b7e1c893Smrg	if (buffer->used & 0x3c) {
702b7e1c893Smrg	    RING_LOCALS;
703b7e1c893Smrg
704b7e1c893Smrg	    while (buffer->used & 0x3c) {
705b7e1c893Smrg		BEGIN_RING(1);
706b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
707b7e1c893Smrg		ADVANCE_RING();
708b7e1c893Smrg	    }
709b7e1c893Smrg	}
710b7e1c893Smrg    }
711b7e1c893Smrg
712209ff23fSmrg    indirect.idx     = buffer->idx;
713209ff23fSmrg    indirect.start   = start;
714209ff23fSmrg    indirect.end     = buffer->used;
715209ff23fSmrg    indirect.discard = discard;
716209ff23fSmrg
717b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
718b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
719209ff23fSmrg
720209ff23fSmrg    if (discard) {
721b7e1c893Smrg	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
722b7e1c893Smrg	info->cp->indirectStart  = 0;
723209ff23fSmrg    } else {
724209ff23fSmrg	/* Start on a double word boundary */
725b7e1c893Smrg	info->cp->indirectStart  = buffer->used = (buffer->used + 7) & ~7;
726209ff23fSmrg	if (RADEON_VERBOSE) {
727209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
728b7e1c893Smrg		       info->cp->indirectStart);
729209ff23fSmrg	}
730209ff23fSmrg    }
731209ff23fSmrg}
732209ff23fSmrg
733209ff23fSmrg/* Flush and release the indirect buffer */
734209ff23fSmrgvoid RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
735209ff23fSmrg{
736209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
737b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
738b7e1c893Smrg    int                start  = info->cp->indirectStart;
739b7e1c893Smrg    drm_radeon_indirect_t  indirect;
740b7e1c893Smrg
741b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
742b7e1c893Smrg	if (buffer && (buffer->used & 0x3c)) {
743b7e1c893Smrg	    RING_LOCALS;
744b7e1c893Smrg
745b7e1c893Smrg	    while (buffer->used & 0x3c) {
746b7e1c893Smrg		BEGIN_RING(1);
747b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
748b7e1c893Smrg		ADVANCE_RING();
749b7e1c893Smrg	    }
750b7e1c893Smrg	}
751b7e1c893Smrg    }
752209ff23fSmrg
753b7e1c893Smrg    info->cp->indirectBuffer = NULL;
754b7e1c893Smrg    info->cp->indirectStart  = 0;
755209ff23fSmrg
756209ff23fSmrg    if (!buffer) return;
757209ff23fSmrg
758209ff23fSmrg    if (RADEON_VERBOSE) {
759209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
760209ff23fSmrg		   buffer->idx);
761209ff23fSmrg    }
762209ff23fSmrg
763209ff23fSmrg    indirect.idx     = buffer->idx;
764209ff23fSmrg    indirect.start   = start;
765209ff23fSmrg    indirect.end     = buffer->used;
766209ff23fSmrg    indirect.discard = 1;
767209ff23fSmrg
768b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
769b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
770209ff23fSmrg}
771209ff23fSmrg
772209ff23fSmrg/** \brief Calculate HostDataBlit parameters from pointer and pitch
773209ff23fSmrg *
774209ff23fSmrg * This is a helper for the trivial HostDataBlit users that don't need to worry
775209ff23fSmrg * about tiling etc.
776209ff23fSmrg */
777209ff23fSmrgvoid
778209ff23fSmrgRADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
779209ff23fSmrg		     uint32_t *dstPitchOff, int *x, int *y)
780209ff23fSmrg{
781209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
782209ff23fSmrg    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
783209ff23fSmrg
784209ff23fSmrg    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
785209ff23fSmrg    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
786209ff23fSmrg    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
787209ff23fSmrg}
788209ff23fSmrg
789209ff23fSmrg/* Set up a hostdata blit to transfer data from system memory to the
790209ff23fSmrg * framebuffer. Returns the address where the data can be written to and sets
791209ff23fSmrg * the dstPitch and hpass variables as required.
792209ff23fSmrg */
793209ff23fSmrguint8_t*
794209ff23fSmrgRADEONHostDataBlit(
795209ff23fSmrg    ScrnInfoPtr pScrn,
796209ff23fSmrg    unsigned int cpp,
797209ff23fSmrg    unsigned int w,
798209ff23fSmrg    uint32_t dstPitchOff,
799209ff23fSmrg    uint32_t *bufPitch,
800209ff23fSmrg    int x,
801209ff23fSmrg    int *y,
802209ff23fSmrg    unsigned int *h,
803209ff23fSmrg    unsigned int *hpass
804209ff23fSmrg){
805209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
806209ff23fSmrg    uint32_t format, dwords;
807209ff23fSmrg    uint8_t *ret;
808209ff23fSmrg    RING_LOCALS;
809209ff23fSmrg
810209ff23fSmrg    if ( *h == 0 )
811209ff23fSmrg    {
812209ff23fSmrg	return NULL;
813209ff23fSmrg    }
814209ff23fSmrg
815209ff23fSmrg    switch ( cpp )
816209ff23fSmrg    {
817209ff23fSmrg    case 4:
818209ff23fSmrg	format = RADEON_GMC_DST_32BPP;
819209ff23fSmrg	*bufPitch = 4 * w;
820209ff23fSmrg	break;
821209ff23fSmrg    case 2:
822209ff23fSmrg	format = RADEON_GMC_DST_16BPP;
823209ff23fSmrg	*bufPitch = 2 * ((w + 1) & ~1);
824209ff23fSmrg	break;
825209ff23fSmrg    case 1:
826209ff23fSmrg	format = RADEON_GMC_DST_8BPP_CI;
827209ff23fSmrg	*bufPitch = (w + 3) & ~3;
828209ff23fSmrg	break;
829209ff23fSmrg    default:
830209ff23fSmrg	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
831209ff23fSmrg		    "%s: Unsupported cpp %d!\n", __func__, cpp );
832209ff23fSmrg	return NULL;
833209ff23fSmrg    }
834209ff23fSmrg
835209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
836209ff23fSmrg    /* Swap doesn't work on R300 and later, it's handled during the
837209ff23fSmrg     * copy to ind. buffer pass
838209ff23fSmrg     */
839209ff23fSmrg    if (info->ChipFamily < CHIP_FAMILY_R300) {
840209ff23fSmrg        BEGIN_RING(2);
841209ff23fSmrg	if (cpp == 2)
842209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
843209ff23fSmrg			 RADEON_HOST_DATA_SWAP_HDW);
844209ff23fSmrg	else if (cpp == 1)
845209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
846209ff23fSmrg			 RADEON_HOST_DATA_SWAP_32BIT);
847209ff23fSmrg	else
848209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
849209ff23fSmrg			 RADEON_HOST_DATA_SWAP_NONE);
850209ff23fSmrg	ADVANCE_RING();
851209ff23fSmrg    }
852209ff23fSmrg#endif
853209ff23fSmrg
854209ff23fSmrg    /*RADEON_PURGE_CACHE();
855209ff23fSmrg      RADEON_WAIT_UNTIL_IDLE();*/
856209ff23fSmrg
857209ff23fSmrg    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
858209ff23fSmrg    dwords = *hpass * *bufPitch / 4;
859209ff23fSmrg
860209ff23fSmrg    BEGIN_RING( dwords + 10 );
861209ff23fSmrg    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
862209ff23fSmrg    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
863209ff23fSmrg	    | RADEON_GMC_DST_CLIPPING
864209ff23fSmrg	    | RADEON_GMC_BRUSH_NONE
865209ff23fSmrg	    | format
866209ff23fSmrg	    | RADEON_GMC_SRC_DATATYPE_COLOR
867209ff23fSmrg	    | RADEON_ROP3_S
868209ff23fSmrg	    | RADEON_DP_SRC_SOURCE_HOST_DATA
869209ff23fSmrg	    | RADEON_GMC_CLR_CMP_CNTL_DIS
870209ff23fSmrg	    | RADEON_GMC_WR_MSK_DIS );
871209ff23fSmrg    OUT_RING( dstPitchOff );
872209ff23fSmrg    OUT_RING( (*y << 16) | x );
873209ff23fSmrg    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
874209ff23fSmrg    OUT_RING( 0xffffffff );
875209ff23fSmrg    OUT_RING( 0xffffffff );
876209ff23fSmrg    OUT_RING( *y << 16 | x );
877209ff23fSmrg    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
878209ff23fSmrg    OUT_RING( dwords );
879209ff23fSmrg
880209ff23fSmrg    ret = ( uint8_t* )&__head[__count];
881209ff23fSmrg
882209ff23fSmrg    __count += dwords;
883209ff23fSmrg    ADVANCE_RING();
884209ff23fSmrg
885209ff23fSmrg    *y += *hpass;
886209ff23fSmrg    *h -= *hpass;
887209ff23fSmrg
888209ff23fSmrg    return ret;
889209ff23fSmrg}
890209ff23fSmrg
891209ff23fSmrgvoid RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
892209ff23fSmrg{
893209ff23fSmrg    switch(swap) {
894209ff23fSmrg    case RADEON_HOST_DATA_SWAP_HDW:
895209ff23fSmrg        {
896209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
897209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
898209ff23fSmrg	    unsigned int nwords = size >> 2;
899209ff23fSmrg
900209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
901209ff23fSmrg		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
902209ff23fSmrg	    return;
903209ff23fSmrg        }
904209ff23fSmrg    case RADEON_HOST_DATA_SWAP_32BIT:
905209ff23fSmrg        {
906209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
907209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
908209ff23fSmrg	    unsigned int nwords = size >> 2;
909209ff23fSmrg
910209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
911209ff23fSmrg#ifdef __powerpc__
912209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
913209ff23fSmrg#else
914209ff23fSmrg		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
915209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
916209ff23fSmrg#endif
917209ff23fSmrg	    return;
918209ff23fSmrg        }
919209ff23fSmrg    case RADEON_HOST_DATA_SWAP_16BIT:
920209ff23fSmrg        {
921209ff23fSmrg	    unsigned short *d = (unsigned short *)dst;
922209ff23fSmrg	    unsigned short *s = (unsigned short *)src;
923209ff23fSmrg	    unsigned int nwords = size >> 1;
924209ff23fSmrg
925209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
926209ff23fSmrg#ifdef __powerpc__
927209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
928209ff23fSmrg#else
929209ff23fSmrg	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
930209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
931209ff23fSmrg#endif
932209ff23fSmrg	    return;
933209ff23fSmrg	}
934209ff23fSmrg    }
935209ff23fSmrg    if (src != dst)
936209ff23fSmrg	    memmove(dst, src, size);
937209ff23fSmrg}
938209ff23fSmrg
939209ff23fSmrg/* Copies a single pass worth of data for a hostdata blit set up by
940209ff23fSmrg * RADEONHostDataBlit().
941209ff23fSmrg */
942209ff23fSmrgvoid
943209ff23fSmrgRADEONHostDataBlitCopyPass(
944209ff23fSmrg    ScrnInfoPtr pScrn,
945209ff23fSmrg    unsigned int cpp,
946209ff23fSmrg    uint8_t *dst,
947209ff23fSmrg    uint8_t *src,
948209ff23fSmrg    unsigned int hpass,
949209ff23fSmrg    unsigned int dstPitch,
950209ff23fSmrg    unsigned int srcPitch
951209ff23fSmrg){
952209ff23fSmrg
953209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
954209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
955209ff23fSmrg#endif
956209ff23fSmrg
957209ff23fSmrg    /* RADEONHostDataBlitCopy can return NULL ! */
958209ff23fSmrg    if( (dst==NULL) || (src==NULL)) return;
959209ff23fSmrg
960209ff23fSmrg    if ( dstPitch == srcPitch )
961209ff23fSmrg    {
962209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
963209ff23fSmrg        if (info->ChipFamily >= CHIP_FAMILY_R300) {
964209ff23fSmrg	    switch(cpp) {
965209ff23fSmrg	    case 1:
966209ff23fSmrg		RADEONCopySwap(dst, src, hpass * dstPitch,
967209ff23fSmrg			       RADEON_HOST_DATA_SWAP_32BIT);
968209ff23fSmrg		return;
969209ff23fSmrg	    case 2:
970209ff23fSmrg	        RADEONCopySwap(dst, src, hpass * dstPitch,
971209ff23fSmrg			       RADEON_HOST_DATA_SWAP_HDW);
972209ff23fSmrg		return;
973209ff23fSmrg	    }
974209ff23fSmrg	}
975209ff23fSmrg#endif
976209ff23fSmrg	memcpy( dst, src, hpass * dstPitch );
977209ff23fSmrg    }
978209ff23fSmrg    else
979209ff23fSmrg    {
980209ff23fSmrg	unsigned int minPitch = min( dstPitch, srcPitch );
981209ff23fSmrg	while ( hpass-- )
982209ff23fSmrg	{
983209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
984209ff23fSmrg            if (info->ChipFamily >= CHIP_FAMILY_R300) {
985209ff23fSmrg		switch(cpp) {
986209ff23fSmrg		case 1:
987209ff23fSmrg		    RADEONCopySwap(dst, src, minPitch,
988209ff23fSmrg				   RADEON_HOST_DATA_SWAP_32BIT);
989209ff23fSmrg		    goto next;
990209ff23fSmrg		case 2:
991209ff23fSmrg	            RADEONCopySwap(dst, src, minPitch,
992209ff23fSmrg				   RADEON_HOST_DATA_SWAP_HDW);
993209ff23fSmrg		    goto next;
994209ff23fSmrg		}
995209ff23fSmrg	    }
996209ff23fSmrg#endif
997209ff23fSmrg	    memcpy( dst, src, minPitch );
998209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
999209ff23fSmrg	next:
1000209ff23fSmrg#endif
1001209ff23fSmrg	    src += srcPitch;
1002209ff23fSmrg	    dst += dstPitch;
1003209ff23fSmrg	}
1004209ff23fSmrg    }
1005209ff23fSmrg}
1006209ff23fSmrg
1007209ff23fSmrg#endif
1008209ff23fSmrg
1009209ff23fSmrgBool RADEONAccelInit(ScreenPtr pScreen)
1010209ff23fSmrg{
1011209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1012209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1013209ff23fSmrg
1014209ff23fSmrg#ifdef USE_EXA
1015209ff23fSmrg    if (info->useEXA) {
1016209ff23fSmrg# ifdef XF86DRI
1017209ff23fSmrg	if (info->directRenderingEnabled) {
1018b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1019b7e1c893Smrg		if (!R600DrawInit(pScreen))
1020b7e1c893Smrg		    return FALSE;
1021b7e1c893Smrg	    } else {
1022b7e1c893Smrg		if (!RADEONDrawInitCP(pScreen))
1023b7e1c893Smrg		    return FALSE;
1024b7e1c893Smrg	    }
1025209ff23fSmrg	} else
1026209ff23fSmrg# endif /* XF86DRI */
1027209ff23fSmrg	{
1028b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1029209ff23fSmrg		return FALSE;
1030b7e1c893Smrg	    else {
1031b7e1c893Smrg		if (!RADEONDrawInitMMIO(pScreen))
1032b7e1c893Smrg		    return FALSE;
1033b7e1c893Smrg	    }
1034209ff23fSmrg	}
1035209ff23fSmrg    }
1036209ff23fSmrg#endif /* USE_EXA */
1037209ff23fSmrg#ifdef USE_XAA
1038209ff23fSmrg    if (!info->useEXA) {
1039209ff23fSmrg	XAAInfoRecPtr  a;
1040209ff23fSmrg
1041b7e1c893Smrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
1042b7e1c893Smrg	    return FALSE;
1043b7e1c893Smrg
1044b7e1c893Smrg	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1045209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1046209ff23fSmrg	    return FALSE;
1047209ff23fSmrg	}
1048209ff23fSmrg
1049209ff23fSmrg#ifdef XF86DRI
1050209ff23fSmrg	if (info->directRenderingEnabled)
1051209ff23fSmrg	    RADEONAccelInitCP(pScreen, a);
1052209ff23fSmrg	else
1053209ff23fSmrg#endif /* XF86DRI */
1054209ff23fSmrg	    RADEONAccelInitMMIO(pScreen, a);
1055209ff23fSmrg
1056209ff23fSmrg	RADEONEngineInit(pScrn);
1057209ff23fSmrg
1058209ff23fSmrg	if (!XAAInit(pScreen, a)) {
1059209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1060209ff23fSmrg	    return FALSE;
1061209ff23fSmrg	}
1062209ff23fSmrg    }
1063209ff23fSmrg#endif /* USE_XAA */
1064209ff23fSmrg    return TRUE;
1065209ff23fSmrg}
1066209ff23fSmrg
1067209ff23fSmrgvoid RADEONInit3DEngine(ScrnInfoPtr pScrn)
1068209ff23fSmrg{
1069209ff23fSmrg    RADEONInfoPtr info = RADEONPTR (pScrn);
1070209ff23fSmrg
1071209ff23fSmrg#ifdef XF86DRI
1072209ff23fSmrg    if (info->directRenderingEnabled) {
1073b7e1c893Smrg	drm_radeon_sarea_t *pSAREAPriv;
1074209ff23fSmrg
1075209ff23fSmrg	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1076b7e1c893Smrg	pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1077209ff23fSmrg	RADEONInit3DEngineCP(pScrn);
1078209ff23fSmrg    } else
1079209ff23fSmrg#endif
1080209ff23fSmrg	RADEONInit3DEngineMMIO(pScrn);
1081209ff23fSmrg
1082b7e1c893Smrg    info->accel_state->XInited3D = TRUE;
1083209ff23fSmrg}
1084209ff23fSmrg
1085209ff23fSmrg#ifdef USE_XAA
1086209ff23fSmrg#ifdef XF86DRI
1087209ff23fSmrgBool
1088209ff23fSmrgRADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1089209ff23fSmrg{
1090209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1091209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1092209ff23fSmrg    int            cpp = info->CurrentLayout.pixel_bytes;
1093b7e1c893Smrg    int            depthCpp = (info->dri->depthBits - 8) / 4;
1094209ff23fSmrg    int            width_bytes = pScrn->displayWidth * cpp;
1095209ff23fSmrg    int            bufferSize;
1096209ff23fSmrg    int            depthSize;
1097209ff23fSmrg    int            l;
1098209ff23fSmrg    int            scanlines;
1099209ff23fSmrg    int            texsizerequest;
1100209ff23fSmrg    BoxRec         MemBox;
1101209ff23fSmrg    FBAreaPtr      fbarea;
1102209ff23fSmrg
1103b7e1c893Smrg    info->dri->frontOffset = 0;
1104b7e1c893Smrg    info->dri->frontPitch = pScrn->displayWidth;
1105b7e1c893Smrg    info->dri->backPitch = pScrn->displayWidth;
1106209ff23fSmrg
1107209ff23fSmrg    /* make sure we use 16 line alignment for tiling (8 might be enough).
1108209ff23fSmrg     * Might need that for non-XF86DRI too?
1109209ff23fSmrg     */
1110209ff23fSmrg    if (info->allowColorTiling) {
1111209ff23fSmrg	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
1112209ff23fSmrg		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1113209ff23fSmrg    } else {
1114209ff23fSmrg        bufferSize = (pScrn->virtualY * width_bytes
1115209ff23fSmrg		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1116209ff23fSmrg    }
1117209ff23fSmrg
1118209ff23fSmrg    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1119209ff23fSmrg     * which is always the case if color tiling is used due to color pitch
1120209ff23fSmrg     * but not necessarily otherwise, and its height a multiple of 16 lines.
1121209ff23fSmrg     */
1122b7e1c893Smrg    info->dri->depthPitch = (pScrn->displayWidth + 31) & ~31;
1123b7e1c893Smrg    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->dri->depthPitch
1124209ff23fSmrg		  * depthCpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
1125209ff23fSmrg
1126209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1127b7e1c893Smrg	       "Using %d MB GART aperture\n", info->dri->gartSize);
1128209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1129b7e1c893Smrg	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1130209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1131b7e1c893Smrg	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1132209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1133b7e1c893Smrg	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1134209ff23fSmrg
1135209ff23fSmrg    /* Try for front, back, depth, and three framebuffers worth of
1136209ff23fSmrg     * pixmap cache.  Should be enough for a fullscreen background
1137209ff23fSmrg     * image plus some leftovers.
1138209ff23fSmrg     * If the FBTexPercent option was used, try to achieve that percentage instead,
1139209ff23fSmrg     * but still have at least one pixmap buffer (get problems with xvideo/render
1140209ff23fSmrg     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1141209ff23fSmrg     * probably useless for XAA.
1142209ff23fSmrg     */
1143b7e1c893Smrg    if (info->dri->textureSize >= 0) {
1144209ff23fSmrg	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1145209ff23fSmrg			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1146209ff23fSmrg	/* first divide, then multiply or we'll get an overflow (been there...) */
1147b7e1c893Smrg			 / 100 * info->dri->textureSize;
1148209ff23fSmrg    }
1149209ff23fSmrg    else {
1150209ff23fSmrg	texsizerequest = (int)info->FbMapSize / 2;
1151209ff23fSmrg    }
1152b7e1c893Smrg    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1153209ff23fSmrg
1154209ff23fSmrg    /* If that gives us less than the requested memory, let's
1155209ff23fSmrg     * be greedy and grab some more.  Sorry, I care more about 3D
1156209ff23fSmrg     * performance than playing nicely, and you'll get around a full
1157209ff23fSmrg     * framebuffer's worth of pixmap cache anyway.
1158209ff23fSmrg     */
1159b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1160b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1161209ff23fSmrg    }
1162b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1163b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1164209ff23fSmrg    }
1165209ff23fSmrg
1166209ff23fSmrg    /* If there's still no space for textures, try without pixmap cache, but
1167209ff23fSmrg     * never use the reserved space, the space hw cursor and PCIGART table might
1168209ff23fSmrg     * use.
1169209ff23fSmrg     */
1170b7e1c893Smrg    if (info->dri->textureSize < 0) {
1171b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1172209ff23fSmrg	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1173209ff23fSmrg    }
1174209ff23fSmrg
1175209ff23fSmrg    /* Check to see if there is more room available after the 8192nd
1176209ff23fSmrg     * scanline for textures
1177209ff23fSmrg     */
1178209ff23fSmrg    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1179209ff23fSmrg    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1180b7e1c893Smrg	> info->dri->textureSize) {
1181b7e1c893Smrg	info->dri->textureSize =
1182209ff23fSmrg		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1183209ff23fSmrg    }
1184209ff23fSmrg
1185209ff23fSmrg    /* If backbuffer is disabled, don't allocate memory for it */
1186b7e1c893Smrg    if (info->dri->noBackBuffer) {
1187b7e1c893Smrg	info->dri->textureSize += bufferSize;
1188209ff23fSmrg    }
1189209ff23fSmrg
1190209ff23fSmrg    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1191209ff23fSmrg       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1192209ff23fSmrg       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1193209ff23fSmrg       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1194209ff23fSmrg       area otherwise).
1195209ff23fSmrg       This might cause some space at the end of the video memory to be unused, since it
1196209ff23fSmrg       can't be used (?) due to that log_tex_granularity thing???
1197209ff23fSmrg       Could use different copyscreentoscreen function for the pageflip copies
1198209ff23fSmrg       (which would use different src and dst offsets) to avoid this. */
1199b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1200b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1201209ff23fSmrg			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1202209ff23fSmrg    }
1203b7e1c893Smrg    if (info->dri->textureSize > 0) {
1204b7e1c893Smrg	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1205209ff23fSmrg	if (l < RADEON_LOG_TEX_GRANULARITY)
1206209ff23fSmrg	    l = RADEON_LOG_TEX_GRANULARITY;
1207209ff23fSmrg	/* Round the texture size up to the nearest whole number of
1208209ff23fSmrg	 * texture regions.  Again, be greedy about this, don't
1209209ff23fSmrg	 * round down.
1210209ff23fSmrg	 */
1211b7e1c893Smrg	info->dri->log2TexGran = l;
1212b7e1c893Smrg	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1213209ff23fSmrg    } else {
1214b7e1c893Smrg	info->dri->textureSize = 0;
1215209ff23fSmrg    }
1216209ff23fSmrg
1217209ff23fSmrg    /* Set a minimum usable local texture heap size.  This will fit
1218209ff23fSmrg     * two 256x256x32bpp textures.
1219209ff23fSmrg     */
1220b7e1c893Smrg    if (info->dri->textureSize < 512 * 1024) {
1221b7e1c893Smrg	info->dri->textureOffset = 0;
1222b7e1c893Smrg	info->dri->textureSize = 0;
1223209ff23fSmrg    }
1224209ff23fSmrg
1225b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1226b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1227b7e1c893Smrg				    (width_bytes * 16)) * (width_bytes * 16);
1228209ff23fSmrg    }
1229209ff23fSmrg    else {
1230209ff23fSmrg	/* Reserve space for textures */
1231b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize +
1232b7e1c893Smrg				     RADEON_BUFFER_ALIGN) &
1233b7e1c893Smrg				    ~(uint32_t)RADEON_BUFFER_ALIGN);
1234209ff23fSmrg    }
1235209ff23fSmrg
1236209ff23fSmrg    /* Reserve space for the shared depth
1237209ff23fSmrg     * buffer.
1238209ff23fSmrg     */
1239b7e1c893Smrg    info->dri->depthOffset = ((info->dri->textureOffset - depthSize +
1240b7e1c893Smrg			       RADEON_BUFFER_ALIGN) &
1241b7e1c893Smrg			      ~(uint32_t)RADEON_BUFFER_ALIGN);
1242209ff23fSmrg
1243209ff23fSmrg    /* Reserve space for the shared back buffer */
1244b7e1c893Smrg    if (info->dri->noBackBuffer) {
1245b7e1c893Smrg       info->dri->backOffset = info->dri->depthOffset;
1246209ff23fSmrg    } else {
1247b7e1c893Smrg       info->dri->backOffset = ((info->dri->depthOffset - bufferSize +
1248b7e1c893Smrg				 RADEON_BUFFER_ALIGN) &
1249b7e1c893Smrg				~(uint32_t)RADEON_BUFFER_ALIGN);
1250209ff23fSmrg    }
1251209ff23fSmrg
1252b7e1c893Smrg    info->dri->backY = info->dri->backOffset / width_bytes;
1253b7e1c893Smrg    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1254209ff23fSmrg
1255209ff23fSmrg    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1256209ff23fSmrg    if (scanlines > 8191)
1257209ff23fSmrg	scanlines = 8191;
1258209ff23fSmrg
1259209ff23fSmrg    MemBox.x1 = 0;
1260209ff23fSmrg    MemBox.y1 = 0;
1261209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1262209ff23fSmrg    MemBox.y2 = scanlines;
1263209ff23fSmrg
1264209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1265209ff23fSmrg        xf86DrvMsg(scrnIndex, X_ERROR,
1266209ff23fSmrg		   "Memory manager initialization to "
1267209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1268209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1269209ff23fSmrg	return FALSE;
1270209ff23fSmrg    } else {
1271209ff23fSmrg	int  width, height;
1272209ff23fSmrg
1273209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1274209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1275209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1276209ff23fSmrg	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1277209ff23fSmrg	   aligned... sigh */
1278209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1279209ff23fSmrg						pScrn->displayWidth,
1280209ff23fSmrg						info->allowColorTiling ?
1281209ff23fSmrg						((pScrn->virtualY + 15) & ~15)
1282209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1283209ff23fSmrg						0, NULL, NULL,
1284209ff23fSmrg						NULL))) {
1285209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1286209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1287209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1288209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1289209ff23fSmrg	} else {
1290209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1291209ff23fSmrg	}
1292209ff23fSmrg
1293209ff23fSmrg	RADEONDRIAllocatePCIGARTTable(pScreen);
1294209ff23fSmrg
1295209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1296209ff23fSmrg					  &height, 0, 0, 0)) {
1297209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1298209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1299209ff23fSmrg		       width, height);
1300209ff23fSmrg
1301209ff23fSmrg	    /* Lines in offscreen area needed for depth buffer and
1302209ff23fSmrg	     * textures
1303209ff23fSmrg	     */
1304b7e1c893Smrg	    info->dri->depthTexLines = (scanlines
1305b7e1c893Smrg					- info->dri->depthOffset / width_bytes);
1306b7e1c893Smrg	    info->dri->backLines	    = (scanlines
1307b7e1c893Smrg					       - info->dri->backOffset / width_bytes
1308b7e1c893Smrg					       - info->dri->depthTexLines);
1309b7e1c893Smrg	    info->dri->backArea	    = NULL;
1310209ff23fSmrg	} else {
1311209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR,
1312209ff23fSmrg		       "Unable to determine largest offscreen area "
1313209ff23fSmrg		       "available\n");
1314209ff23fSmrg	    return FALSE;
1315209ff23fSmrg	}
1316209ff23fSmrg    }
1317209ff23fSmrg
1318209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1319209ff23fSmrg	       "Will use front buffer at offset 0x%x\n",
1320b7e1c893Smrg	       info->dri->frontOffset);
1321209ff23fSmrg
1322209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1323209ff23fSmrg	       "Will use back buffer at offset 0x%x\n",
1324b7e1c893Smrg	       info->dri->backOffset);
1325209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1326209ff23fSmrg	       "Will use depth buffer at offset 0x%x\n",
1327b7e1c893Smrg	       info->dri->depthOffset);
1328209ff23fSmrg    if (info->cardType==CARD_PCIE)
1329209ff23fSmrg    	xf86DrvMsg(scrnIndex, X_INFO,
1330209ff23fSmrg	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1331b7e1c893Smrg		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1332209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1333209ff23fSmrg	       "Will use %d kb for textures at offset 0x%x\n",
1334b7e1c893Smrg	       info->dri->textureSize/1024, info->dri->textureOffset);
1335209ff23fSmrg
1336b7e1c893Smrg    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1337b7e1c893Smrg				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1338209ff23fSmrg
1339b7e1c893Smrg    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1340b7e1c893Smrg				  ((info->dri->backOffset + info->fbLocation) >> 10));
1341209ff23fSmrg
1342b7e1c893Smrg    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1343b7e1c893Smrg				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1344209ff23fSmrg    return TRUE;
1345209ff23fSmrg}
1346209ff23fSmrg#endif /* XF86DRI */
1347209ff23fSmrg
1348209ff23fSmrgBool
1349209ff23fSmrgRADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1350209ff23fSmrg{
1351209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1352209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1353209ff23fSmrg    BoxRec         MemBox;
1354209ff23fSmrg    int            y2;
1355209ff23fSmrg
1356209ff23fSmrg    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1357209ff23fSmrg
1358209ff23fSmrg    MemBox.x1 = 0;
1359209ff23fSmrg    MemBox.y1 = 0;
1360209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1361209ff23fSmrg    y2 = info->FbMapSize / width_bytes;
1362209ff23fSmrg    if (y2 >= 32768)
1363209ff23fSmrg	y2 = 32767; /* because MemBox.y2 is signed short */
1364209ff23fSmrg    MemBox.y2 = y2;
1365209ff23fSmrg
1366209ff23fSmrg    /* The acceleration engine uses 14 bit
1367209ff23fSmrg     * signed coordinates, so we can't have any
1368209ff23fSmrg     * drawable caches beyond this region.
1369209ff23fSmrg     */
1370209ff23fSmrg    if (MemBox.y2 > 8191)
1371209ff23fSmrg	MemBox.y2 = 8191;
1372209ff23fSmrg
1373209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1374209ff23fSmrg	xf86DrvMsg(scrnIndex, X_ERROR,
1375209ff23fSmrg		   "Memory manager initialization to "
1376209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1377209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1378209ff23fSmrg	return FALSE;
1379209ff23fSmrg    } else {
1380209ff23fSmrg	int       width, height;
1381209ff23fSmrg	FBAreaPtr fbarea;
1382209ff23fSmrg
1383209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1384209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1385209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1386209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1387209ff23fSmrg						pScrn->displayWidth,
1388209ff23fSmrg						info->allowColorTiling ?
1389209ff23fSmrg						((pScrn->virtualY + 15) & ~15)
1390209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1391209ff23fSmrg						0, NULL, NULL,
1392209ff23fSmrg						NULL))) {
1393209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1394209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1395209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1396209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1397209ff23fSmrg	} else {
1398209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1399209ff23fSmrg	}
1400209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1401209ff23fSmrg					      0, 0, 0)) {
1402209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1403209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1404209ff23fSmrg		       width, height);
1405209ff23fSmrg	}
1406209ff23fSmrg	return TRUE;
1407209ff23fSmrg    }
1408209ff23fSmrg}
1409209ff23fSmrg#endif /* USE_XAA */
1410