radeon_accel.c revision c503f109
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3209ff23fSmrg *                VA Linux Systems Inc., Fremont, California.
4209ff23fSmrg *
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining
8209ff23fSmrg * a copy of this software and associated documentation files (the
9209ff23fSmrg * "Software"), to deal in the Software without restriction, including
10209ff23fSmrg * without limitation on the rights to use, copy, modify, merge,
11209ff23fSmrg * publish, distribute, sublicense, and/or sell copies of the Software,
12209ff23fSmrg * and to permit persons to whom the Software is furnished to do so,
13209ff23fSmrg * subject to the following conditions:
14209ff23fSmrg *
15209ff23fSmrg * The above copyright notice and this permission notice (including the
16209ff23fSmrg * next paragraph) shall be included in all copies or substantial
17209ff23fSmrg * portions of the Software.
18209ff23fSmrg *
19209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20209ff23fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21209ff23fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22209ff23fSmrg * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23209ff23fSmrg * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24209ff23fSmrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26209ff23fSmrg * DEALINGS IN THE SOFTWARE.
27209ff23fSmrg */
28209ff23fSmrg
29209ff23fSmrg#ifdef HAVE_CONFIG_H
30209ff23fSmrg#include "config.h"
31209ff23fSmrg#endif
32209ff23fSmrg
33209ff23fSmrg/*
34209ff23fSmrg * Authors:
35209ff23fSmrg *   Kevin E. Martin <martin@xfree86.org>
36209ff23fSmrg *   Rickard E. Faith <faith@valinux.com>
37209ff23fSmrg *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38209ff23fSmrg *
39209ff23fSmrg * Credits:
40209ff23fSmrg *
41209ff23fSmrg *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42209ff23fSmrg *   code to his Radeon driver.  Portions of this file are based on the
43209ff23fSmrg *   initialization code for that driver.
44209ff23fSmrg *
45209ff23fSmrg * References:
46209ff23fSmrg *
47209ff23fSmrg * !!!! FIXME !!!!
48209ff23fSmrg *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49209ff23fSmrg *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50209ff23fSmrg *   1999.
51209ff23fSmrg *
52209ff23fSmrg *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53209ff23fSmrg *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54209ff23fSmrg *
55209ff23fSmrg * Notes on unimplemented XAA optimizations:
56209ff23fSmrg *
57209ff23fSmrg *   SetClipping:   This has been removed as XAA expects 16bit registers
58209ff23fSmrg *                  for full clipping.
59209ff23fSmrg *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60209ff23fSmrg *   DashedLine with non-power-of-two pattern length: Apparently, there is
61209ff23fSmrg *                  no way to set the length of the pattern -- it is always
62209ff23fSmrg *                  assumed to be 8 or 32 (or 1024?).
63209ff23fSmrg *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64209ff23fSmrg *                  Manual where it states that monochrome expansion of frame
65209ff23fSmrg *                  buffer data is not supported.
66209ff23fSmrg *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67209ff23fSmrg *                  direct/indirect method.  If we had more data registers,
68209ff23fSmrg *                  then we could do better.  If XAA supported a trigger write
69209ff23fSmrg *                  address, the code would be simpler.
70209ff23fSmrg *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71209ff23fSmrg *                  pattern from frame buffer memory.
72209ff23fSmrg *   ImageWrites:   Same as CPUToScreenColorExpandFill
73209ff23fSmrg *
74209ff23fSmrg */
75209ff23fSmrg
76209ff23fSmrg#include <errno.h>
77209ff23fSmrg#include <string.h>
78209ff23fSmrg				/* Driver data structures */
79209ff23fSmrg#include "radeon.h"
80209ff23fSmrg#include "radeon_reg.h"
81b7e1c893Smrg#include "r600_reg.h"
82209ff23fSmrg#include "radeon_macros.h"
83209ff23fSmrg#include "radeon_probe.h"
84209ff23fSmrg#include "radeon_version.h"
85209ff23fSmrg#ifdef XF86DRI
86209ff23fSmrg#define _XF86DRI_SERVER_
87b7e1c893Smrg#include "radeon_drm.h"
88209ff23fSmrg#endif
89209ff23fSmrg
90c503f109Smrg#include "ati_pciids_gen.h"
91c503f109Smrg
92209ff23fSmrg				/* Line support */
93209ff23fSmrg#include "miline.h"
94209ff23fSmrg
95209ff23fSmrg				/* X and server generic header files */
96209ff23fSmrg#include "xf86.h"
97209ff23fSmrg
98b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn);
99209ff23fSmrg
100209ff23fSmrg#ifdef USE_XAA
101209ff23fSmrgstatic struct {
102209ff23fSmrg    int rop;
103209ff23fSmrg    int pattern;
104209ff23fSmrg} RADEON_ROP[] = {
105209ff23fSmrg    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
106209ff23fSmrg    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
107209ff23fSmrg    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
108209ff23fSmrg    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
109209ff23fSmrg    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
110209ff23fSmrg    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
111209ff23fSmrg    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
112209ff23fSmrg    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
113209ff23fSmrg    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
114209ff23fSmrg    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
115209ff23fSmrg    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
116209ff23fSmrg    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
117209ff23fSmrg    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
118209ff23fSmrg    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
119209ff23fSmrg    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
120209ff23fSmrg    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
121209ff23fSmrg};
122209ff23fSmrg#endif
123209ff23fSmrg
124209ff23fSmrg/* The FIFO has 64 slots.  This routines waits until at least `entries'
125209ff23fSmrg * of these slots are empty.
126209ff23fSmrg */
127209ff23fSmrgvoid RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
128209ff23fSmrg{
129209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
130209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
131209ff23fSmrg    int            i;
132209ff23fSmrg
133209ff23fSmrg    for (;;) {
134209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
135b7e1c893Smrg	    info->accel_state->fifo_slots =
136209ff23fSmrg		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
137b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
138209ff23fSmrg	}
139209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
140209ff23fSmrg		       "FIFO timed out: %u entries, stat=0x%08x\n",
141209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
142209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS));
143209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
144209ff23fSmrg		   "FIFO timed out, resetting engine...\n");
145209ff23fSmrg	RADEONEngineReset(pScrn);
146209ff23fSmrg	RADEONEngineRestore(pScrn);
147209ff23fSmrg#ifdef XF86DRI
148209ff23fSmrg	if (info->directRenderingEnabled) {
149209ff23fSmrg	    RADEONCP_RESET(pScrn, info);
150209ff23fSmrg	    RADEONCP_START(pScrn, info);
151209ff23fSmrg	}
152209ff23fSmrg#endif
153209ff23fSmrg    }
154209ff23fSmrg}
155209ff23fSmrg
156b7e1c893Smrgvoid R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
157b7e1c893Smrg{
158b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
159b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
160b7e1c893Smrg    int            i;
161b7e1c893Smrg
162b7e1c893Smrg    for (;;) {
163b7e1c893Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
164b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
165b7e1c893Smrg		info->accel_state->fifo_slots =
166b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
167b7e1c893Smrg	    else
168b7e1c893Smrg		info->accel_state->fifo_slots =
169b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
170b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
171b7e1c893Smrg	}
172b7e1c893Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
173b7e1c893Smrg		       "FIFO timed out: stat=0x%08x\n",
174b7e1c893Smrg		       (unsigned int)INREG(R600_GRBM_STATUS));
175b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
176b7e1c893Smrg		   "FIFO timed out, resetting engine...\n");
177b7e1c893Smrg	R600EngineReset(pScrn);
178b7e1c893Smrg#ifdef XF86DRI
179b7e1c893Smrg	if (info->directRenderingEnabled) {
180b7e1c893Smrg	    RADEONCP_RESET(pScrn, info);
181b7e1c893Smrg	    RADEONCP_START(pScrn, info);
182b7e1c893Smrg	}
183b7e1c893Smrg#endif
184b7e1c893Smrg    }
185b7e1c893Smrg}
186b7e1c893Smrg
187209ff23fSmrg/* Flush all dirty data in the Pixel Cache to memory */
188209ff23fSmrgvoid RADEONEngineFlush(ScrnInfoPtr pScrn)
189209ff23fSmrg{
190209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
191209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
192209ff23fSmrg    int            i;
193209ff23fSmrg
194209ff23fSmrg    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
195209ff23fSmrg	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
196209ff23fSmrg		RADEON_RB3D_DC_FLUSH_ALL,
197209ff23fSmrg		~RADEON_RB3D_DC_FLUSH_ALL);
198209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
199209ff23fSmrg	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
200209ff23fSmrg		break;
201209ff23fSmrg	}
202209ff23fSmrg	if (i == RADEON_TIMEOUT) {
203209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
204209ff23fSmrg			   "DC flush timeout: %x\n",
205209ff23fSmrg			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
206209ff23fSmrg	}
207209ff23fSmrg    } else {
208209ff23fSmrg	OUTREGP(R300_DSTCACHE_CTLSTAT,
209209ff23fSmrg		R300_RB2D_DC_FLUSH_ALL,
210209ff23fSmrg		~R300_RB2D_DC_FLUSH_ALL);
211209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
212209ff23fSmrg	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
213209ff23fSmrg		break;
214209ff23fSmrg	}
215209ff23fSmrg	if (i == RADEON_TIMEOUT) {
216209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
217209ff23fSmrg			   "DC flush timeout: %x\n",
218209ff23fSmrg			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
219209ff23fSmrg	}
220209ff23fSmrg    }
221209ff23fSmrg}
222209ff23fSmrg
223209ff23fSmrg/* Reset graphics card to known state */
224209ff23fSmrgvoid RADEONEngineReset(ScrnInfoPtr pScrn)
225209ff23fSmrg{
226209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
227209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
228209ff23fSmrg    uint32_t       clock_cntl_index;
229209ff23fSmrg    uint32_t       mclk_cntl;
230209ff23fSmrg    uint32_t       rbbm_soft_reset;
231209ff23fSmrg    uint32_t       host_path_cntl;
232209ff23fSmrg
233209ff23fSmrg    /* The following RBBM_SOFT_RESET sequence can help un-wedge
234209ff23fSmrg     * an R300 after the command processor got stuck.
235209ff23fSmrg     */
236209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
237209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
238209ff23fSmrg                                   RADEON_SOFT_RESET_CP |
239209ff23fSmrg                                   RADEON_SOFT_RESET_HI |
240209ff23fSmrg                                   RADEON_SOFT_RESET_SE |
241209ff23fSmrg                                   RADEON_SOFT_RESET_RE |
242209ff23fSmrg                                   RADEON_SOFT_RESET_PP |
243209ff23fSmrg                                   RADEON_SOFT_RESET_E2 |
244209ff23fSmrg                                   RADEON_SOFT_RESET_RB));
245209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
246209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
247209ff23fSmrg                                   ~(RADEON_SOFT_RESET_CP |
248209ff23fSmrg                                     RADEON_SOFT_RESET_HI |
249209ff23fSmrg                                     RADEON_SOFT_RESET_SE |
250209ff23fSmrg                                     RADEON_SOFT_RESET_RE |
251209ff23fSmrg                                     RADEON_SOFT_RESET_PP |
252209ff23fSmrg                                     RADEON_SOFT_RESET_E2 |
253209ff23fSmrg                                     RADEON_SOFT_RESET_RB)));
254209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
255209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
256209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
257209ff23fSmrg
258209ff23fSmrg    RADEONEngineFlush(pScrn);
259209ff23fSmrg
260209ff23fSmrg    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
261209ff23fSmrg    RADEONPllErrataAfterIndex(info);
262209ff23fSmrg
263209ff23fSmrg#if 0 /* taken care of by new PM code */
264209ff23fSmrg    /* Some ASICs have bugs with dynamic-on feature, which are
265209ff23fSmrg     * ASIC-version dependent, so we force all blocks on for now
266209ff23fSmrg     */
267209ff23fSmrg    if (info->HasCRTC2) {
268209ff23fSmrg	uint32_t tmp;
269209ff23fSmrg
270209ff23fSmrg	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
271209ff23fSmrg	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
272209ff23fSmrg				  RADEON_CP_MAX_DYN_STOP_LAT |
273209ff23fSmrg				  RADEON_SCLK_FORCEON_MASK));
274209ff23fSmrg
275209ff23fSmrg	if (info->ChipFamily == CHIP_FAMILY_RV200) {
276209ff23fSmrg	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
277209ff23fSmrg	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
278209ff23fSmrg	}
279209ff23fSmrg    }
280209ff23fSmrg#endif /* new PM code */
281209ff23fSmrg
282209ff23fSmrg    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
283209ff23fSmrg
284209ff23fSmrg#if 0 /* handled by new PM code */
285209ff23fSmrg    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
286209ff23fSmrg			      RADEON_FORCEON_MCLKA |
287209ff23fSmrg			      RADEON_FORCEON_MCLKB |
288209ff23fSmrg			      RADEON_FORCEON_YCLKA |
289209ff23fSmrg			      RADEON_FORCEON_YCLKB |
290209ff23fSmrg			      RADEON_FORCEON_MC |
291209ff23fSmrg			      RADEON_FORCEON_AIC));
292209ff23fSmrg#endif /* new PM code */
293209ff23fSmrg
294209ff23fSmrg    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
295209ff23fSmrg     * unexpected behaviour on some machines.  Here we use
296209ff23fSmrg     * RADEON_HOST_PATH_CNTL to reset it.
297209ff23fSmrg     */
298209ff23fSmrg    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
299209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
300209ff23fSmrg
301209ff23fSmrg    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
302209ff23fSmrg	uint32_t tmp;
303209ff23fSmrg
304209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
305209ff23fSmrg					RADEON_SOFT_RESET_CP |
306209ff23fSmrg					RADEON_SOFT_RESET_HI |
307209ff23fSmrg					RADEON_SOFT_RESET_E2));
308209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
309209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
310209ff23fSmrg	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
311209ff23fSmrg	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
312209ff23fSmrg    } else {
313209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
314209ff23fSmrg					RADEON_SOFT_RESET_CP |
315209ff23fSmrg					RADEON_SOFT_RESET_SE |
316209ff23fSmrg					RADEON_SOFT_RESET_RE |
317209ff23fSmrg					RADEON_SOFT_RESET_PP |
318209ff23fSmrg					RADEON_SOFT_RESET_E2 |
319209ff23fSmrg					RADEON_SOFT_RESET_RB));
320209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
321209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
322209ff23fSmrg					~(RADEON_SOFT_RESET_CP |
323209ff23fSmrg					  RADEON_SOFT_RESET_SE |
324209ff23fSmrg					  RADEON_SOFT_RESET_RE |
325209ff23fSmrg					  RADEON_SOFT_RESET_PP |
326209ff23fSmrg					  RADEON_SOFT_RESET_E2 |
327209ff23fSmrg					  RADEON_SOFT_RESET_RB)));
328209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
329209ff23fSmrg    }
330209ff23fSmrg
331209ff23fSmrg    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET);
332209ff23fSmrg    INREG(RADEON_HOST_PATH_CNTL);
333209ff23fSmrg    OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl);
334209ff23fSmrg
335209ff23fSmrg    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
336209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
337209ff23fSmrg
338209ff23fSmrg    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
339209ff23fSmrg    RADEONPllErrataAfterIndex(info);
340209ff23fSmrg    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
341209ff23fSmrg}
342209ff23fSmrg
343b7e1c893Smrg/* Reset graphics card to known state */
344b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn)
345b7e1c893Smrg{
346b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
347b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
348b7e1c893Smrg    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
349b7e1c893Smrg
350b7e1c893Smrg    cp_ptr = INREG(R600_CP_RB_WPTR);
351b7e1c893Smrg
352b7e1c893Smrg    cp_me_cntl = INREG(R600_CP_ME_CNTL);
353b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, 0x10000000);
354b7e1c893Smrg
355b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
356b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
357b7e1c893Smrg    usleep (50);
358b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0);
359b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
360b7e1c893Smrg
361b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
362b7e1c893Smrg    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
363b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, 0x80000000);
364b7e1c893Smrg
365b7e1c893Smrg    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
366b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR, cp_ptr);
367b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
368b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
369b7e1c893Smrg
370b7e1c893Smrg}
371b7e1c893Smrg
372209ff23fSmrg/* Restore the acceleration hardware to its previous state */
373209ff23fSmrgvoid RADEONEngineRestore(ScrnInfoPtr pScrn)
374209ff23fSmrg{
375209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
376209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
377209ff23fSmrg
378209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
379209ff23fSmrg		   "EngineRestore (%d/%d)\n",
380209ff23fSmrg		   info->CurrentLayout.pixel_code,
381209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
382209ff23fSmrg
383209ff23fSmrg    /* Setup engine location. This shouldn't be necessary since we
384209ff23fSmrg     * set them appropriately before any accel ops, but let's avoid
385209ff23fSmrg     * random bogus DMA in case we inadvertently trigger the engine
386209ff23fSmrg     * in the wrong place (happened).
387209ff23fSmrg     */
388209ff23fSmrg    RADEONWaitForFifo(pScrn, 2);
389b7e1c893Smrg    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390b7e1c893Smrg    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
391209ff23fSmrg
392209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
393209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
394209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE,
395209ff23fSmrg	    RADEON_HOST_BIG_ENDIAN_EN,
396209ff23fSmrg	    ~RADEON_HOST_BIG_ENDIAN_EN);
397209ff23fSmrg#else
398209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
399209ff23fSmrg#endif
400209ff23fSmrg
401209ff23fSmrg    /* Restore SURFACE_CNTL */
402209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
403209ff23fSmrg
404209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
405209ff23fSmrg    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
406209ff23fSmrg					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
407209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
408b7e1c893Smrg    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
409209ff23fSmrg				       | RADEON_GMC_BRUSH_SOLID_COLOR
410209ff23fSmrg				       | RADEON_GMC_SRC_DATATYPE_COLOR));
411209ff23fSmrg
412209ff23fSmrg    RADEONWaitForFifo(pScrn, 5);
413209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
414209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
415209ff23fSmrg    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
416209ff23fSmrg    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
417209ff23fSmrg    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
418209ff23fSmrg
419209ff23fSmrg    RADEONWaitForIdleMMIO(pScrn);
420209ff23fSmrg
421b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
422209ff23fSmrg}
423209ff23fSmrg
424209ff23fSmrg/* Initialize the acceleration hardware */
425209ff23fSmrgvoid RADEONEngineInit(ScrnInfoPtr pScrn)
426209ff23fSmrg{
427209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
428209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
429b7e1c893Smrg    int datatype = 0;
430b7e1c893Smrg    info->accel_state->num_gb_pipes = 0;
431209ff23fSmrg
432209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
433209ff23fSmrg		   "EngineInit (%d/%d)\n",
434209ff23fSmrg		   info->CurrentLayout.pixel_code,
435209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
436209ff23fSmrg
437209ff23fSmrg#ifdef XF86DRI
438209ff23fSmrg    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
439b7e1c893Smrg	drm_radeon_getparam_t np;
440209ff23fSmrg	int num_pipes;
441209ff23fSmrg
442209ff23fSmrg	memset(&np, 0, sizeof(np));
443209ff23fSmrg	np.param = RADEON_PARAM_NUM_GB_PIPES;
444209ff23fSmrg	np.value = &num_pipes;
445209ff23fSmrg
446b7e1c893Smrg	if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np,
447209ff23fSmrg				sizeof(np)) < 0) {
448209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
449209ff23fSmrg		       "Failed to determine num pipes from DRM, falling back to "
450209ff23fSmrg		       "manual look-up!\n");
451b7e1c893Smrg	    info->accel_state->num_gb_pipes = 0;
452209ff23fSmrg	} else {
453b7e1c893Smrg	    info->accel_state->num_gb_pipes = num_pipes;
454209ff23fSmrg	}
455209ff23fSmrg    }
456209ff23fSmrg#endif
457209ff23fSmrg
458209ff23fSmrg    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
459209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_R420)  ||
460209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS600) ||
461209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS690) ||
462209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS740) ||
463209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS400) ||
464209ff23fSmrg	(info->ChipFamily == CHIP_FAMILY_RS480) ||
465209ff23fSmrg	IS_R500_3D) {
466b7e1c893Smrg	if (info->accel_state->num_gb_pipes == 0) {
467209ff23fSmrg	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
468209ff23fSmrg
469b7e1c893Smrg	    info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
470209ff23fSmrg	    if (IS_R500_3D)
471209ff23fSmrg		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
472209ff23fSmrg	}
473209ff23fSmrg    } else {
474b7e1c893Smrg	if (info->accel_state->num_gb_pipes == 0) {
475209ff23fSmrg	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
476209ff23fSmrg		(info->ChipFamily == CHIP_FAMILY_R350)) {
477209ff23fSmrg		/* R3xx chips */
478b7e1c893Smrg		info->accel_state->num_gb_pipes = 2;
479209ff23fSmrg	    } else {
480209ff23fSmrg		/* RV3xx chips */
481b7e1c893Smrg		info->accel_state->num_gb_pipes = 1;
482209ff23fSmrg	    }
483209ff23fSmrg	}
484209ff23fSmrg    }
485209ff23fSmrg
486c503f109Smrg    /* RV410 SE cards only have 1 quadpipe */
487c503f109Smrg    if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
488c503f109Smrg	(info->Chipset == PCI_CHIP_RV410_5E4F))
489c503f109Smrg	info->accel_state->num_gb_pipes = 1;
490c503f109Smrg
491209ff23fSmrg    if (IS_R300_3D || IS_R500_3D)
492209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
493b7e1c893Smrg		   "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
494209ff23fSmrg
495209ff23fSmrg    if (IS_R300_3D || IS_R500_3D) {
496c503f109Smrg	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
497209ff23fSmrg
498b7e1c893Smrg	switch(info->accel_state->num_gb_pipes) {
499209ff23fSmrg	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
500209ff23fSmrg	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
501209ff23fSmrg	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
502209ff23fSmrg	default:
503209ff23fSmrg	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
504209ff23fSmrg	}
505209ff23fSmrg
506209ff23fSmrg	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
507209ff23fSmrg	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
508209ff23fSmrg	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
509209ff23fSmrg	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
510209ff23fSmrg					 R300_DC_AUTOFLUSH_ENABLE |
511209ff23fSmrg					 R300_DC_DC_DISABLE_IGNORE_PE));
512209ff23fSmrg    } else
513209ff23fSmrg	OUTREG(RADEON_RB3D_CNTL, 0);
514209ff23fSmrg
515209ff23fSmrg    RADEONEngineReset(pScrn);
516209ff23fSmrg
517209ff23fSmrg    switch (info->CurrentLayout.pixel_code) {
518b7e1c893Smrg    case 8:  datatype = 2; break;
519b7e1c893Smrg    case 15: datatype = 3; break;
520b7e1c893Smrg    case 16: datatype = 4; break;
521b7e1c893Smrg    case 24: datatype = 5; break;
522b7e1c893Smrg    case 32: datatype = 6; break;
523209ff23fSmrg    default:
524209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
525209ff23fSmrg		       "Unknown depth/bpp = %d/%d (code = %d)\n",
526209ff23fSmrg		       info->CurrentLayout.depth,
527209ff23fSmrg		       info->CurrentLayout.bitsPerPixel,
528209ff23fSmrg		       info->CurrentLayout.pixel_code);
529209ff23fSmrg    }
530209ff23fSmrg
531b7e1c893Smrg    info->accel_state->dp_gui_master_cntl =
532b7e1c893Smrg	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
533209ff23fSmrg	 | RADEON_GMC_CLR_CMP_CNTL_DIS
534209ff23fSmrg	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
535209ff23fSmrg
536209ff23fSmrg    RADEONEngineRestore(pScrn);
537209ff23fSmrg}
538209ff23fSmrg
539209ff23fSmrg
540209ff23fSmrg#define ACCEL_MMIO
541209ff23fSmrg#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
542209ff23fSmrg#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
543209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
544209ff23fSmrg#define FINISH_ACCEL()
545209ff23fSmrg
546209ff23fSmrg#include "radeon_commonfuncs.c"
547209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
548209ff23fSmrg#include "radeon_render.c"
549209ff23fSmrg#endif
550209ff23fSmrg#include "radeon_accelfuncs.c"
551209ff23fSmrg
552209ff23fSmrg#undef ACCEL_MMIO
553209ff23fSmrg#undef ACCEL_PREAMBLE
554209ff23fSmrg#undef BEGIN_ACCEL
555209ff23fSmrg#undef OUT_ACCEL_REG
556209ff23fSmrg#undef FINISH_ACCEL
557209ff23fSmrg
558209ff23fSmrg#ifdef XF86DRI
559209ff23fSmrg
560209ff23fSmrg#define ACCEL_CP
561209ff23fSmrg#define ACCEL_PREAMBLE()						\
562209ff23fSmrg    RING_LOCALS;							\
563209ff23fSmrg    RADEONCP_REFRESH(pScrn, info)
564209ff23fSmrg#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
565209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
566209ff23fSmrg#define FINISH_ACCEL()          ADVANCE_RING()
567209ff23fSmrg
568209ff23fSmrg
569209ff23fSmrg#include "radeon_commonfuncs.c"
570209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
571209ff23fSmrg#include "radeon_render.c"
572209ff23fSmrg#endif
573209ff23fSmrg#include "radeon_accelfuncs.c"
574209ff23fSmrg
575209ff23fSmrg#undef ACCEL_CP
576209ff23fSmrg#undef ACCEL_PREAMBLE
577209ff23fSmrg#undef BEGIN_ACCEL
578209ff23fSmrg#undef OUT_ACCEL_REG
579209ff23fSmrg#undef FINISH_ACCEL
580209ff23fSmrg
581209ff23fSmrg/* Stop the CP */
582209ff23fSmrgint RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
583209ff23fSmrg{
584b7e1c893Smrg    drm_radeon_cp_stop_t  stop;
585209ff23fSmrg    int              ret, i;
586209ff23fSmrg
587209ff23fSmrg    stop.flush = 1;
588209ff23fSmrg    stop.idle  = 1;
589209ff23fSmrg
590b7e1c893Smrg    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
591b7e1c893Smrg			  sizeof(drm_radeon_cp_stop_t));
592209ff23fSmrg
593209ff23fSmrg    if (ret == 0) {
594209ff23fSmrg	return 0;
595209ff23fSmrg    } else if (errno != EBUSY) {
596209ff23fSmrg	return -errno;
597209ff23fSmrg    }
598209ff23fSmrg
599209ff23fSmrg    stop.flush = 0;
600209ff23fSmrg
601209ff23fSmrg    i = 0;
602209ff23fSmrg    do {
603b7e1c893Smrg	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
604b7e1c893Smrg			      sizeof(drm_radeon_cp_stop_t));
605209ff23fSmrg    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
606209ff23fSmrg
607209ff23fSmrg    if (ret == 0) {
608209ff23fSmrg	return 0;
609209ff23fSmrg    } else if (errno != EBUSY) {
610209ff23fSmrg	return -errno;
611209ff23fSmrg    }
612209ff23fSmrg
613209ff23fSmrg    stop.idle = 0;
614209ff23fSmrg
615b7e1c893Smrg    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
616b7e1c893Smrg			&stop, sizeof(drm_radeon_cp_stop_t))) {
617209ff23fSmrg	return -errno;
618209ff23fSmrg    } else {
619209ff23fSmrg	return 0;
620209ff23fSmrg    }
621209ff23fSmrg}
622209ff23fSmrg
623209ff23fSmrg/* Get an indirect buffer for the CP 2D acceleration commands  */
624209ff23fSmrgdrmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
625209ff23fSmrg{
626209ff23fSmrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
627209ff23fSmrg    drmDMAReq      dma;
628209ff23fSmrg    drmBufPtr      buf = NULL;
629209ff23fSmrg    int            indx = 0;
630209ff23fSmrg    int            size = 0;
631209ff23fSmrg    int            i = 0;
632209ff23fSmrg    int            ret;
633209ff23fSmrg
634209ff23fSmrg#if 0
635209ff23fSmrg    /* FIXME: pScrn->pScreen has not been initialized when this is first
636209ff23fSmrg     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
637209ff23fSmrg     * the screen index from pScrn, which is initialized, and then get
638209ff23fSmrg     * the screen from screenInfo.screens[index], but that is a hack.
639209ff23fSmrg     */
640209ff23fSmrg    dma.context = DRIGetContext(pScrn->pScreen);
641209ff23fSmrg#else
642209ff23fSmrg    /* This is the X server's context */
643209ff23fSmrg    dma.context = 0x00000001;
644209ff23fSmrg#endif
645209ff23fSmrg
646209ff23fSmrg    dma.send_count    = 0;
647209ff23fSmrg    dma.send_list     = NULL;
648209ff23fSmrg    dma.send_sizes    = NULL;
649209ff23fSmrg    dma.flags         = 0;
650209ff23fSmrg    dma.request_count = 1;
651209ff23fSmrg    dma.request_size  = RADEON_BUFFER_SIZE;
652209ff23fSmrg    dma.request_list  = &indx;
653209ff23fSmrg    dma.request_sizes = &size;
654209ff23fSmrg    dma.granted_count = 0;
655209ff23fSmrg
656209ff23fSmrg    while (1) {
657209ff23fSmrg	do {
658b7e1c893Smrg	    ret = drmDMA(info->dri->drmFD, &dma);
659209ff23fSmrg	    if (ret && ret != -EBUSY) {
660209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
661209ff23fSmrg			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
662209ff23fSmrg	    }
663209ff23fSmrg	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
664209ff23fSmrg
665209ff23fSmrg	if (ret == 0) {
666b7e1c893Smrg	    buf = &info->dri->buffers->list[indx];
667209ff23fSmrg	    buf->used = 0;
668209ff23fSmrg	    if (RADEON_VERBOSE) {
669209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
670209ff23fSmrg			   "   GetBuffer returning %d %p\n",
671209ff23fSmrg			   buf->idx, buf->address);
672209ff23fSmrg	    }
673209ff23fSmrg	    return buf;
674209ff23fSmrg	}
675209ff23fSmrg
676209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
677209ff23fSmrg		   "GetBuffer timed out, resetting engine...\n");
678b7e1c893Smrg
679b7e1c893Smrg	if (info->ChipFamily < CHIP_FAMILY_R600) {
680b7e1c893Smrg	    RADEONEngineReset(pScrn);
681b7e1c893Smrg	    RADEONEngineRestore(pScrn);
682b7e1c893Smrg	} else
683b7e1c893Smrg	    R600EngineReset(pScrn);
684209ff23fSmrg
685209ff23fSmrg	/* Always restart the engine when doing CP 2D acceleration */
686209ff23fSmrg	RADEONCP_RESET(pScrn, info);
687209ff23fSmrg	RADEONCP_START(pScrn, info);
688209ff23fSmrg    }
689209ff23fSmrg}
690209ff23fSmrg
691209ff23fSmrg/* Flush the indirect buffer to the kernel for submission to the card */
692209ff23fSmrgvoid RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
693209ff23fSmrg{
694209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
695b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
696b7e1c893Smrg    int                start  = info->cp->indirectStart;
697b7e1c893Smrg    drm_radeon_indirect_t  indirect;
698209ff23fSmrg
699209ff23fSmrg    if (!buffer) return;
700209ff23fSmrg    if (start == buffer->used && !discard) return;
701209ff23fSmrg
702209ff23fSmrg    if (RADEON_VERBOSE) {
703209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
704209ff23fSmrg		   buffer->idx);
705209ff23fSmrg    }
706209ff23fSmrg
707b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
708b7e1c893Smrg	if (buffer->used & 0x3c) {
709b7e1c893Smrg	    RING_LOCALS;
710b7e1c893Smrg
711b7e1c893Smrg	    while (buffer->used & 0x3c) {
712b7e1c893Smrg		BEGIN_RING(1);
713b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
714b7e1c893Smrg		ADVANCE_RING();
715b7e1c893Smrg	    }
716b7e1c893Smrg	}
717b7e1c893Smrg    }
718b7e1c893Smrg
719209ff23fSmrg    indirect.idx     = buffer->idx;
720209ff23fSmrg    indirect.start   = start;
721209ff23fSmrg    indirect.end     = buffer->used;
722209ff23fSmrg    indirect.discard = discard;
723209ff23fSmrg
724b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
725b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
726209ff23fSmrg
727209ff23fSmrg    if (discard) {
728b7e1c893Smrg	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
729b7e1c893Smrg	info->cp->indirectStart  = 0;
730209ff23fSmrg    } else {
731209ff23fSmrg	/* Start on a double word boundary */
732b7e1c893Smrg	info->cp->indirectStart  = buffer->used = (buffer->used + 7) & ~7;
733209ff23fSmrg	if (RADEON_VERBOSE) {
734209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
735b7e1c893Smrg		       info->cp->indirectStart);
736209ff23fSmrg	}
737209ff23fSmrg    }
738209ff23fSmrg}
739209ff23fSmrg
740209ff23fSmrg/* Flush and release the indirect buffer */
741209ff23fSmrgvoid RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
742209ff23fSmrg{
743209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
744b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
745b7e1c893Smrg    int                start  = info->cp->indirectStart;
746b7e1c893Smrg    drm_radeon_indirect_t  indirect;
747b7e1c893Smrg
748b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
749b7e1c893Smrg	if (buffer && (buffer->used & 0x3c)) {
750b7e1c893Smrg	    RING_LOCALS;
751b7e1c893Smrg
752b7e1c893Smrg	    while (buffer->used & 0x3c) {
753b7e1c893Smrg		BEGIN_RING(1);
754b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
755b7e1c893Smrg		ADVANCE_RING();
756b7e1c893Smrg	    }
757b7e1c893Smrg	}
758b7e1c893Smrg    }
759209ff23fSmrg
760b7e1c893Smrg    info->cp->indirectBuffer = NULL;
761b7e1c893Smrg    info->cp->indirectStart  = 0;
762209ff23fSmrg
763209ff23fSmrg    if (!buffer) return;
764209ff23fSmrg
765209ff23fSmrg    if (RADEON_VERBOSE) {
766209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
767209ff23fSmrg		   buffer->idx);
768209ff23fSmrg    }
769209ff23fSmrg
770209ff23fSmrg    indirect.idx     = buffer->idx;
771209ff23fSmrg    indirect.start   = start;
772209ff23fSmrg    indirect.end     = buffer->used;
773209ff23fSmrg    indirect.discard = 1;
774209ff23fSmrg
775b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
776b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
777209ff23fSmrg}
778209ff23fSmrg
779209ff23fSmrg/** \brief Calculate HostDataBlit parameters from pointer and pitch
780209ff23fSmrg *
781209ff23fSmrg * This is a helper for the trivial HostDataBlit users that don't need to worry
782209ff23fSmrg * about tiling etc.
783209ff23fSmrg */
784209ff23fSmrgvoid
785209ff23fSmrgRADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
786209ff23fSmrg		     uint32_t *dstPitchOff, int *x, int *y)
787209ff23fSmrg{
788209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
789209ff23fSmrg    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
790209ff23fSmrg
791209ff23fSmrg    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
792209ff23fSmrg    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
793209ff23fSmrg    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
794209ff23fSmrg}
795209ff23fSmrg
796209ff23fSmrg/* Set up a hostdata blit to transfer data from system memory to the
797209ff23fSmrg * framebuffer. Returns the address where the data can be written to and sets
798209ff23fSmrg * the dstPitch and hpass variables as required.
799209ff23fSmrg */
800209ff23fSmrguint8_t*
801209ff23fSmrgRADEONHostDataBlit(
802209ff23fSmrg    ScrnInfoPtr pScrn,
803209ff23fSmrg    unsigned int cpp,
804209ff23fSmrg    unsigned int w,
805209ff23fSmrg    uint32_t dstPitchOff,
806209ff23fSmrg    uint32_t *bufPitch,
807209ff23fSmrg    int x,
808209ff23fSmrg    int *y,
809209ff23fSmrg    unsigned int *h,
810209ff23fSmrg    unsigned int *hpass
811209ff23fSmrg){
812209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
813209ff23fSmrg    uint32_t format, dwords;
814209ff23fSmrg    uint8_t *ret;
815209ff23fSmrg    RING_LOCALS;
816209ff23fSmrg
817209ff23fSmrg    if ( *h == 0 )
818209ff23fSmrg    {
819209ff23fSmrg	return NULL;
820209ff23fSmrg    }
821209ff23fSmrg
822209ff23fSmrg    switch ( cpp )
823209ff23fSmrg    {
824209ff23fSmrg    case 4:
825209ff23fSmrg	format = RADEON_GMC_DST_32BPP;
826209ff23fSmrg	*bufPitch = 4 * w;
827209ff23fSmrg	break;
828209ff23fSmrg    case 2:
829209ff23fSmrg	format = RADEON_GMC_DST_16BPP;
830209ff23fSmrg	*bufPitch = 2 * ((w + 1) & ~1);
831209ff23fSmrg	break;
832209ff23fSmrg    case 1:
833209ff23fSmrg	format = RADEON_GMC_DST_8BPP_CI;
834209ff23fSmrg	*bufPitch = (w + 3) & ~3;
835209ff23fSmrg	break;
836209ff23fSmrg    default:
837209ff23fSmrg	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
838209ff23fSmrg		    "%s: Unsupported cpp %d!\n", __func__, cpp );
839209ff23fSmrg	return NULL;
840209ff23fSmrg    }
841209ff23fSmrg
842209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
843209ff23fSmrg    /* Swap doesn't work on R300 and later, it's handled during the
844209ff23fSmrg     * copy to ind. buffer pass
845209ff23fSmrg     */
846209ff23fSmrg    if (info->ChipFamily < CHIP_FAMILY_R300) {
847209ff23fSmrg        BEGIN_RING(2);
848209ff23fSmrg	if (cpp == 2)
849209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
850209ff23fSmrg			 RADEON_HOST_DATA_SWAP_HDW);
851209ff23fSmrg	else if (cpp == 1)
852209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
853209ff23fSmrg			 RADEON_HOST_DATA_SWAP_32BIT);
854209ff23fSmrg	else
855209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
856209ff23fSmrg			 RADEON_HOST_DATA_SWAP_NONE);
857209ff23fSmrg	ADVANCE_RING();
858209ff23fSmrg    }
859209ff23fSmrg#endif
860209ff23fSmrg
861209ff23fSmrg    /*RADEON_PURGE_CACHE();
862209ff23fSmrg      RADEON_WAIT_UNTIL_IDLE();*/
863209ff23fSmrg
864209ff23fSmrg    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
865209ff23fSmrg    dwords = *hpass * *bufPitch / 4;
866209ff23fSmrg
867209ff23fSmrg    BEGIN_RING( dwords + 10 );
868209ff23fSmrg    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
869209ff23fSmrg    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
870209ff23fSmrg	    | RADEON_GMC_DST_CLIPPING
871209ff23fSmrg	    | RADEON_GMC_BRUSH_NONE
872209ff23fSmrg	    | format
873209ff23fSmrg	    | RADEON_GMC_SRC_DATATYPE_COLOR
874209ff23fSmrg	    | RADEON_ROP3_S
875209ff23fSmrg	    | RADEON_DP_SRC_SOURCE_HOST_DATA
876209ff23fSmrg	    | RADEON_GMC_CLR_CMP_CNTL_DIS
877209ff23fSmrg	    | RADEON_GMC_WR_MSK_DIS );
878209ff23fSmrg    OUT_RING( dstPitchOff );
879209ff23fSmrg    OUT_RING( (*y << 16) | x );
880209ff23fSmrg    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
881209ff23fSmrg    OUT_RING( 0xffffffff );
882209ff23fSmrg    OUT_RING( 0xffffffff );
883209ff23fSmrg    OUT_RING( *y << 16 | x );
884209ff23fSmrg    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
885209ff23fSmrg    OUT_RING( dwords );
886209ff23fSmrg
887209ff23fSmrg    ret = ( uint8_t* )&__head[__count];
888209ff23fSmrg
889209ff23fSmrg    __count += dwords;
890209ff23fSmrg    ADVANCE_RING();
891209ff23fSmrg
892209ff23fSmrg    *y += *hpass;
893209ff23fSmrg    *h -= *hpass;
894209ff23fSmrg
895209ff23fSmrg    return ret;
896209ff23fSmrg}
897209ff23fSmrg
898209ff23fSmrgvoid RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
899209ff23fSmrg{
900209ff23fSmrg    switch(swap) {
901209ff23fSmrg    case RADEON_HOST_DATA_SWAP_HDW:
902209ff23fSmrg        {
903209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
904209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
905209ff23fSmrg	    unsigned int nwords = size >> 2;
906209ff23fSmrg
907209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
908209ff23fSmrg		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
909209ff23fSmrg	    return;
910209ff23fSmrg        }
911209ff23fSmrg    case RADEON_HOST_DATA_SWAP_32BIT:
912209ff23fSmrg        {
913209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
914209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
915209ff23fSmrg	    unsigned int nwords = size >> 2;
916209ff23fSmrg
917209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
918209ff23fSmrg#ifdef __powerpc__
919209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
920209ff23fSmrg#else
921209ff23fSmrg		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
922209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
923209ff23fSmrg#endif
924209ff23fSmrg	    return;
925209ff23fSmrg        }
926209ff23fSmrg    case RADEON_HOST_DATA_SWAP_16BIT:
927209ff23fSmrg        {
928209ff23fSmrg	    unsigned short *d = (unsigned short *)dst;
929209ff23fSmrg	    unsigned short *s = (unsigned short *)src;
930209ff23fSmrg	    unsigned int nwords = size >> 1;
931209ff23fSmrg
932209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
933209ff23fSmrg#ifdef __powerpc__
934209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
935209ff23fSmrg#else
936209ff23fSmrg	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
937209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
938209ff23fSmrg#endif
939209ff23fSmrg	    return;
940209ff23fSmrg	}
941209ff23fSmrg    }
942209ff23fSmrg    if (src != dst)
943209ff23fSmrg	    memmove(dst, src, size);
944209ff23fSmrg}
945209ff23fSmrg
946209ff23fSmrg/* Copies a single pass worth of data for a hostdata blit set up by
947209ff23fSmrg * RADEONHostDataBlit().
948209ff23fSmrg */
949209ff23fSmrgvoid
950209ff23fSmrgRADEONHostDataBlitCopyPass(
951209ff23fSmrg    ScrnInfoPtr pScrn,
952209ff23fSmrg    unsigned int cpp,
953209ff23fSmrg    uint8_t *dst,
954209ff23fSmrg    uint8_t *src,
955209ff23fSmrg    unsigned int hpass,
956209ff23fSmrg    unsigned int dstPitch,
957209ff23fSmrg    unsigned int srcPitch
958209ff23fSmrg){
959209ff23fSmrg
960209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
961209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
962209ff23fSmrg#endif
963209ff23fSmrg
964209ff23fSmrg    /* RADEONHostDataBlitCopy can return NULL ! */
965209ff23fSmrg    if( (dst==NULL) || (src==NULL)) return;
966209ff23fSmrg
967209ff23fSmrg    if ( dstPitch == srcPitch )
968209ff23fSmrg    {
969209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
970209ff23fSmrg        if (info->ChipFamily >= CHIP_FAMILY_R300) {
971209ff23fSmrg	    switch(cpp) {
972209ff23fSmrg	    case 1:
973209ff23fSmrg		RADEONCopySwap(dst, src, hpass * dstPitch,
974209ff23fSmrg			       RADEON_HOST_DATA_SWAP_32BIT);
975209ff23fSmrg		return;
976209ff23fSmrg	    case 2:
977209ff23fSmrg	        RADEONCopySwap(dst, src, hpass * dstPitch,
978209ff23fSmrg			       RADEON_HOST_DATA_SWAP_HDW);
979209ff23fSmrg		return;
980209ff23fSmrg	    }
981209ff23fSmrg	}
982209ff23fSmrg#endif
983209ff23fSmrg	memcpy( dst, src, hpass * dstPitch );
984209ff23fSmrg    }
985209ff23fSmrg    else
986209ff23fSmrg    {
987209ff23fSmrg	unsigned int minPitch = min( dstPitch, srcPitch );
988209ff23fSmrg	while ( hpass-- )
989209ff23fSmrg	{
990209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
991209ff23fSmrg            if (info->ChipFamily >= CHIP_FAMILY_R300) {
992209ff23fSmrg		switch(cpp) {
993209ff23fSmrg		case 1:
994209ff23fSmrg		    RADEONCopySwap(dst, src, minPitch,
995209ff23fSmrg				   RADEON_HOST_DATA_SWAP_32BIT);
996209ff23fSmrg		    goto next;
997209ff23fSmrg		case 2:
998209ff23fSmrg	            RADEONCopySwap(dst, src, minPitch,
999209ff23fSmrg				   RADEON_HOST_DATA_SWAP_HDW);
1000209ff23fSmrg		    goto next;
1001209ff23fSmrg		}
1002209ff23fSmrg	    }
1003209ff23fSmrg#endif
1004209ff23fSmrg	    memcpy( dst, src, minPitch );
1005209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1006209ff23fSmrg	next:
1007209ff23fSmrg#endif
1008209ff23fSmrg	    src += srcPitch;
1009209ff23fSmrg	    dst += dstPitch;
1010209ff23fSmrg	}
1011209ff23fSmrg    }
1012209ff23fSmrg}
1013209ff23fSmrg
1014209ff23fSmrg#endif
1015209ff23fSmrg
1016209ff23fSmrgBool RADEONAccelInit(ScreenPtr pScreen)
1017209ff23fSmrg{
1018209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1019209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1020209ff23fSmrg
1021209ff23fSmrg#ifdef USE_EXA
1022209ff23fSmrg    if (info->useEXA) {
1023209ff23fSmrg# ifdef XF86DRI
1024209ff23fSmrg	if (info->directRenderingEnabled) {
1025b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1026b7e1c893Smrg		if (!R600DrawInit(pScreen))
1027b7e1c893Smrg		    return FALSE;
1028b7e1c893Smrg	    } else {
1029b7e1c893Smrg		if (!RADEONDrawInitCP(pScreen))
1030b7e1c893Smrg		    return FALSE;
1031b7e1c893Smrg	    }
1032209ff23fSmrg	} else
1033209ff23fSmrg# endif /* XF86DRI */
1034209ff23fSmrg	{
1035b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1036209ff23fSmrg		return FALSE;
1037b7e1c893Smrg	    else {
1038b7e1c893Smrg		if (!RADEONDrawInitMMIO(pScreen))
1039b7e1c893Smrg		    return FALSE;
1040b7e1c893Smrg	    }
1041209ff23fSmrg	}
1042209ff23fSmrg    }
1043209ff23fSmrg#endif /* USE_EXA */
1044209ff23fSmrg#ifdef USE_XAA
1045209ff23fSmrg    if (!info->useEXA) {
1046209ff23fSmrg	XAAInfoRecPtr  a;
1047209ff23fSmrg
1048b7e1c893Smrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
1049b7e1c893Smrg	    return FALSE;
1050b7e1c893Smrg
1051b7e1c893Smrg	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1052209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1053209ff23fSmrg	    return FALSE;
1054209ff23fSmrg	}
1055209ff23fSmrg
1056209ff23fSmrg#ifdef XF86DRI
1057209ff23fSmrg	if (info->directRenderingEnabled)
1058209ff23fSmrg	    RADEONAccelInitCP(pScreen, a);
1059209ff23fSmrg	else
1060209ff23fSmrg#endif /* XF86DRI */
1061209ff23fSmrg	    RADEONAccelInitMMIO(pScreen, a);
1062209ff23fSmrg
1063209ff23fSmrg	RADEONEngineInit(pScrn);
1064209ff23fSmrg
1065209ff23fSmrg	if (!XAAInit(pScreen, a)) {
1066209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1067209ff23fSmrg	    return FALSE;
1068209ff23fSmrg	}
1069209ff23fSmrg    }
1070209ff23fSmrg#endif /* USE_XAA */
1071209ff23fSmrg    return TRUE;
1072209ff23fSmrg}
1073209ff23fSmrg
1074209ff23fSmrgvoid RADEONInit3DEngine(ScrnInfoPtr pScrn)
1075209ff23fSmrg{
1076209ff23fSmrg    RADEONInfoPtr info = RADEONPTR (pScrn);
1077209ff23fSmrg
1078209ff23fSmrg#ifdef XF86DRI
1079209ff23fSmrg    if (info->directRenderingEnabled) {
1080b7e1c893Smrg	drm_radeon_sarea_t *pSAREAPriv;
1081209ff23fSmrg
1082209ff23fSmrg	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1083b7e1c893Smrg	pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1084209ff23fSmrg	RADEONInit3DEngineCP(pScrn);
1085209ff23fSmrg    } else
1086209ff23fSmrg#endif
1087209ff23fSmrg	RADEONInit3DEngineMMIO(pScrn);
1088209ff23fSmrg
1089b7e1c893Smrg    info->accel_state->XInited3D = TRUE;
1090209ff23fSmrg}
1091209ff23fSmrg
1092209ff23fSmrg#ifdef USE_XAA
1093209ff23fSmrg#ifdef XF86DRI
1094209ff23fSmrgBool
1095209ff23fSmrgRADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1096209ff23fSmrg{
1097209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1098209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1099209ff23fSmrg    int            cpp = info->CurrentLayout.pixel_bytes;
1100b7e1c893Smrg    int            depthCpp = (info->dri->depthBits - 8) / 4;
1101209ff23fSmrg    int            width_bytes = pScrn->displayWidth * cpp;
1102209ff23fSmrg    int            bufferSize;
1103209ff23fSmrg    int            depthSize;
1104209ff23fSmrg    int            l;
1105209ff23fSmrg    int            scanlines;
1106209ff23fSmrg    int            texsizerequest;
1107209ff23fSmrg    BoxRec         MemBox;
1108209ff23fSmrg    FBAreaPtr      fbarea;
1109209ff23fSmrg
1110b7e1c893Smrg    info->dri->frontOffset = 0;
1111b7e1c893Smrg    info->dri->frontPitch = pScrn->displayWidth;
1112b7e1c893Smrg    info->dri->backPitch = pScrn->displayWidth;
1113209ff23fSmrg
1114209ff23fSmrg    /* make sure we use 16 line alignment for tiling (8 might be enough).
1115209ff23fSmrg     * Might need that for non-XF86DRI too?
1116209ff23fSmrg     */
1117209ff23fSmrg    if (info->allowColorTiling) {
1118209ff23fSmrg	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
1119209ff23fSmrg		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1120209ff23fSmrg    } else {
1121209ff23fSmrg        bufferSize = (pScrn->virtualY * width_bytes
1122209ff23fSmrg		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
1123209ff23fSmrg    }
1124209ff23fSmrg
1125209ff23fSmrg    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1126209ff23fSmrg     * which is always the case if color tiling is used due to color pitch
1127209ff23fSmrg     * but not necessarily otherwise, and its height a multiple of 16 lines.
1128209ff23fSmrg     */
1129b7e1c893Smrg    info->dri->depthPitch = (pScrn->displayWidth + 31) & ~31;
1130b7e1c893Smrg    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->dri->depthPitch
1131209ff23fSmrg		  * depthCpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
1132209ff23fSmrg
1133209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1134b7e1c893Smrg	       "Using %d MB GART aperture\n", info->dri->gartSize);
1135209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1136b7e1c893Smrg	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1137209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1138b7e1c893Smrg	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1139209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1140b7e1c893Smrg	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1141209ff23fSmrg
1142209ff23fSmrg    /* Try for front, back, depth, and three framebuffers worth of
1143209ff23fSmrg     * pixmap cache.  Should be enough for a fullscreen background
1144209ff23fSmrg     * image plus some leftovers.
1145209ff23fSmrg     * If the FBTexPercent option was used, try to achieve that percentage instead,
1146209ff23fSmrg     * but still have at least one pixmap buffer (get problems with xvideo/render
1147209ff23fSmrg     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1148209ff23fSmrg     * probably useless for XAA.
1149209ff23fSmrg     */
1150b7e1c893Smrg    if (info->dri->textureSize >= 0) {
1151209ff23fSmrg	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1152209ff23fSmrg			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1153209ff23fSmrg	/* first divide, then multiply or we'll get an overflow (been there...) */
1154b7e1c893Smrg			 / 100 * info->dri->textureSize;
1155209ff23fSmrg    }
1156209ff23fSmrg    else {
1157209ff23fSmrg	texsizerequest = (int)info->FbMapSize / 2;
1158209ff23fSmrg    }
1159b7e1c893Smrg    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1160209ff23fSmrg
1161209ff23fSmrg    /* If that gives us less than the requested memory, let's
1162209ff23fSmrg     * be greedy and grab some more.  Sorry, I care more about 3D
1163209ff23fSmrg     * performance than playing nicely, and you'll get around a full
1164209ff23fSmrg     * framebuffer's worth of pixmap cache anyway.
1165209ff23fSmrg     */
1166b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1167b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1168209ff23fSmrg    }
1169b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1170b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1171209ff23fSmrg    }
1172209ff23fSmrg
1173209ff23fSmrg    /* If there's still no space for textures, try without pixmap cache, but
1174209ff23fSmrg     * never use the reserved space, the space hw cursor and PCIGART table might
1175209ff23fSmrg     * use.
1176209ff23fSmrg     */
1177b7e1c893Smrg    if (info->dri->textureSize < 0) {
1178b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1179209ff23fSmrg	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1180209ff23fSmrg    }
1181209ff23fSmrg
1182209ff23fSmrg    /* Check to see if there is more room available after the 8192nd
1183209ff23fSmrg     * scanline for textures
1184209ff23fSmrg     */
1185209ff23fSmrg    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1186209ff23fSmrg    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1187b7e1c893Smrg	> info->dri->textureSize) {
1188b7e1c893Smrg	info->dri->textureSize =
1189209ff23fSmrg		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1190209ff23fSmrg    }
1191209ff23fSmrg
1192209ff23fSmrg    /* If backbuffer is disabled, don't allocate memory for it */
1193b7e1c893Smrg    if (info->dri->noBackBuffer) {
1194b7e1c893Smrg	info->dri->textureSize += bufferSize;
1195209ff23fSmrg    }
1196209ff23fSmrg
1197209ff23fSmrg    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1198209ff23fSmrg       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1199209ff23fSmrg       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1200209ff23fSmrg       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1201209ff23fSmrg       area otherwise).
1202209ff23fSmrg       This might cause some space at the end of the video memory to be unused, since it
1203209ff23fSmrg       can't be used (?) due to that log_tex_granularity thing???
1204209ff23fSmrg       Could use different copyscreentoscreen function for the pageflip copies
1205209ff23fSmrg       (which would use different src and dst offsets) to avoid this. */
1206b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1207b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1208209ff23fSmrg			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1209209ff23fSmrg    }
1210b7e1c893Smrg    if (info->dri->textureSize > 0) {
1211b7e1c893Smrg	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1212209ff23fSmrg	if (l < RADEON_LOG_TEX_GRANULARITY)
1213209ff23fSmrg	    l = RADEON_LOG_TEX_GRANULARITY;
1214209ff23fSmrg	/* Round the texture size up to the nearest whole number of
1215209ff23fSmrg	 * texture regions.  Again, be greedy about this, don't
1216209ff23fSmrg	 * round down.
1217209ff23fSmrg	 */
1218b7e1c893Smrg	info->dri->log2TexGran = l;
1219b7e1c893Smrg	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1220209ff23fSmrg    } else {
1221b7e1c893Smrg	info->dri->textureSize = 0;
1222209ff23fSmrg    }
1223209ff23fSmrg
1224209ff23fSmrg    /* Set a minimum usable local texture heap size.  This will fit
1225209ff23fSmrg     * two 256x256x32bpp textures.
1226209ff23fSmrg     */
1227b7e1c893Smrg    if (info->dri->textureSize < 512 * 1024) {
1228b7e1c893Smrg	info->dri->textureOffset = 0;
1229b7e1c893Smrg	info->dri->textureSize = 0;
1230209ff23fSmrg    }
1231209ff23fSmrg
1232b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1233b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1234b7e1c893Smrg				    (width_bytes * 16)) * (width_bytes * 16);
1235209ff23fSmrg    }
1236209ff23fSmrg    else {
1237209ff23fSmrg	/* Reserve space for textures */
1238b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize +
1239b7e1c893Smrg				     RADEON_BUFFER_ALIGN) &
1240b7e1c893Smrg				    ~(uint32_t)RADEON_BUFFER_ALIGN);
1241209ff23fSmrg    }
1242209ff23fSmrg
1243209ff23fSmrg    /* Reserve space for the shared depth
1244209ff23fSmrg     * buffer.
1245209ff23fSmrg     */
1246b7e1c893Smrg    info->dri->depthOffset = ((info->dri->textureOffset - depthSize +
1247b7e1c893Smrg			       RADEON_BUFFER_ALIGN) &
1248b7e1c893Smrg			      ~(uint32_t)RADEON_BUFFER_ALIGN);
1249209ff23fSmrg
1250209ff23fSmrg    /* Reserve space for the shared back buffer */
1251b7e1c893Smrg    if (info->dri->noBackBuffer) {
1252b7e1c893Smrg       info->dri->backOffset = info->dri->depthOffset;
1253209ff23fSmrg    } else {
1254b7e1c893Smrg       info->dri->backOffset = ((info->dri->depthOffset - bufferSize +
1255b7e1c893Smrg				 RADEON_BUFFER_ALIGN) &
1256b7e1c893Smrg				~(uint32_t)RADEON_BUFFER_ALIGN);
1257209ff23fSmrg    }
1258209ff23fSmrg
1259b7e1c893Smrg    info->dri->backY = info->dri->backOffset / width_bytes;
1260b7e1c893Smrg    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1261209ff23fSmrg
1262209ff23fSmrg    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1263209ff23fSmrg    if (scanlines > 8191)
1264209ff23fSmrg	scanlines = 8191;
1265209ff23fSmrg
1266209ff23fSmrg    MemBox.x1 = 0;
1267209ff23fSmrg    MemBox.y1 = 0;
1268209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1269209ff23fSmrg    MemBox.y2 = scanlines;
1270209ff23fSmrg
1271209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1272209ff23fSmrg        xf86DrvMsg(scrnIndex, X_ERROR,
1273209ff23fSmrg		   "Memory manager initialization to "
1274209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1275209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1276209ff23fSmrg	return FALSE;
1277209ff23fSmrg    } else {
1278209ff23fSmrg	int  width, height;
1279209ff23fSmrg
1280209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1281209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1282209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1283209ff23fSmrg	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1284209ff23fSmrg	   aligned... sigh */
1285209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1286209ff23fSmrg						pScrn->displayWidth,
1287209ff23fSmrg						info->allowColorTiling ?
1288209ff23fSmrg						((pScrn->virtualY + 15) & ~15)
1289209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1290209ff23fSmrg						0, NULL, NULL,
1291209ff23fSmrg						NULL))) {
1292209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1293209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1294209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1295209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1296209ff23fSmrg	} else {
1297209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1298209ff23fSmrg	}
1299209ff23fSmrg
1300209ff23fSmrg	RADEONDRIAllocatePCIGARTTable(pScreen);
1301209ff23fSmrg
1302209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1303209ff23fSmrg					  &height, 0, 0, 0)) {
1304209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1305209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1306209ff23fSmrg		       width, height);
1307209ff23fSmrg
1308209ff23fSmrg	    /* Lines in offscreen area needed for depth buffer and
1309209ff23fSmrg	     * textures
1310209ff23fSmrg	     */
1311b7e1c893Smrg	    info->dri->depthTexLines = (scanlines
1312b7e1c893Smrg					- info->dri->depthOffset / width_bytes);
1313b7e1c893Smrg	    info->dri->backLines	    = (scanlines
1314b7e1c893Smrg					       - info->dri->backOffset / width_bytes
1315b7e1c893Smrg					       - info->dri->depthTexLines);
1316b7e1c893Smrg	    info->dri->backArea	    = NULL;
1317209ff23fSmrg	} else {
1318209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR,
1319209ff23fSmrg		       "Unable to determine largest offscreen area "
1320209ff23fSmrg		       "available\n");
1321209ff23fSmrg	    return FALSE;
1322209ff23fSmrg	}
1323209ff23fSmrg    }
1324209ff23fSmrg
1325209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1326209ff23fSmrg	       "Will use front buffer at offset 0x%x\n",
1327b7e1c893Smrg	       info->dri->frontOffset);
1328209ff23fSmrg
1329209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1330209ff23fSmrg	       "Will use back buffer at offset 0x%x\n",
1331b7e1c893Smrg	       info->dri->backOffset);
1332209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1333209ff23fSmrg	       "Will use depth buffer at offset 0x%x\n",
1334b7e1c893Smrg	       info->dri->depthOffset);
1335209ff23fSmrg    if (info->cardType==CARD_PCIE)
1336209ff23fSmrg    	xf86DrvMsg(scrnIndex, X_INFO,
1337209ff23fSmrg	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1338b7e1c893Smrg		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1339209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1340209ff23fSmrg	       "Will use %d kb for textures at offset 0x%x\n",
1341b7e1c893Smrg	       info->dri->textureSize/1024, info->dri->textureOffset);
1342209ff23fSmrg
1343b7e1c893Smrg    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1344b7e1c893Smrg				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1345209ff23fSmrg
1346b7e1c893Smrg    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1347b7e1c893Smrg				  ((info->dri->backOffset + info->fbLocation) >> 10));
1348209ff23fSmrg
1349b7e1c893Smrg    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1350b7e1c893Smrg				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1351209ff23fSmrg    return TRUE;
1352209ff23fSmrg}
1353209ff23fSmrg#endif /* XF86DRI */
1354209ff23fSmrg
1355209ff23fSmrgBool
1356209ff23fSmrgRADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1357209ff23fSmrg{
1358209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1359209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1360209ff23fSmrg    BoxRec         MemBox;
1361209ff23fSmrg    int            y2;
1362209ff23fSmrg
1363209ff23fSmrg    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1364209ff23fSmrg
1365209ff23fSmrg    MemBox.x1 = 0;
1366209ff23fSmrg    MemBox.y1 = 0;
1367209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1368209ff23fSmrg    y2 = info->FbMapSize / width_bytes;
1369209ff23fSmrg    if (y2 >= 32768)
1370209ff23fSmrg	y2 = 32767; /* because MemBox.y2 is signed short */
1371209ff23fSmrg    MemBox.y2 = y2;
1372209ff23fSmrg
1373209ff23fSmrg    /* The acceleration engine uses 14 bit
1374209ff23fSmrg     * signed coordinates, so we can't have any
1375209ff23fSmrg     * drawable caches beyond this region.
1376209ff23fSmrg     */
1377209ff23fSmrg    if (MemBox.y2 > 8191)
1378209ff23fSmrg	MemBox.y2 = 8191;
1379209ff23fSmrg
1380209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1381209ff23fSmrg	xf86DrvMsg(scrnIndex, X_ERROR,
1382209ff23fSmrg		   "Memory manager initialization to "
1383209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1384209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1385209ff23fSmrg	return FALSE;
1386209ff23fSmrg    } else {
1387209ff23fSmrg	int       width, height;
1388209ff23fSmrg	FBAreaPtr fbarea;
1389209ff23fSmrg
1390209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1391209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1392209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1393209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1394209ff23fSmrg						pScrn->displayWidth,
1395209ff23fSmrg						info->allowColorTiling ?
1396209ff23fSmrg						((pScrn->virtualY + 15) & ~15)
1397209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1398209ff23fSmrg						0, NULL, NULL,
1399209ff23fSmrg						NULL))) {
1400209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1401209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1402209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1403209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1404209ff23fSmrg	} else {
1405209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1406209ff23fSmrg	}
1407209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1408209ff23fSmrg					      0, 0, 0)) {
1409209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1410209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1411209ff23fSmrg		       width, height);
1412209ff23fSmrg	}
1413209ff23fSmrg	return TRUE;
1414209ff23fSmrg    }
1415209ff23fSmrg}
1416209ff23fSmrg#endif /* USE_XAA */
1417