radeon_accel.c revision 68105dcb
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3209ff23fSmrg *                VA Linux Systems Inc., Fremont, California.
4209ff23fSmrg *
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining
8209ff23fSmrg * a copy of this software and associated documentation files (the
9209ff23fSmrg * "Software"), to deal in the Software without restriction, including
10209ff23fSmrg * without limitation on the rights to use, copy, modify, merge,
11209ff23fSmrg * publish, distribute, sublicense, and/or sell copies of the Software,
12209ff23fSmrg * and to permit persons to whom the Software is furnished to do so,
13209ff23fSmrg * subject to the following conditions:
14209ff23fSmrg *
15209ff23fSmrg * The above copyright notice and this permission notice (including the
16209ff23fSmrg * next paragraph) shall be included in all copies or substantial
17209ff23fSmrg * portions of the Software.
18209ff23fSmrg *
19209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20209ff23fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21209ff23fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22209ff23fSmrg * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23209ff23fSmrg * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24209ff23fSmrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26209ff23fSmrg * DEALINGS IN THE SOFTWARE.
27209ff23fSmrg */
28209ff23fSmrg
29209ff23fSmrg#ifdef HAVE_CONFIG_H
30209ff23fSmrg#include "config.h"
31209ff23fSmrg#endif
32209ff23fSmrg
33209ff23fSmrg/*
34209ff23fSmrg * Authors:
35209ff23fSmrg *   Kevin E. Martin <martin@xfree86.org>
36209ff23fSmrg *   Rickard E. Faith <faith@valinux.com>
37209ff23fSmrg *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38209ff23fSmrg *
39209ff23fSmrg * Credits:
40209ff23fSmrg *
41209ff23fSmrg *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42209ff23fSmrg *   code to his Radeon driver.  Portions of this file are based on the
43209ff23fSmrg *   initialization code for that driver.
44209ff23fSmrg *
45209ff23fSmrg * References:
46209ff23fSmrg *
47209ff23fSmrg * !!!! FIXME !!!!
48209ff23fSmrg *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49209ff23fSmrg *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50209ff23fSmrg *   1999.
51209ff23fSmrg *
52209ff23fSmrg *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53209ff23fSmrg *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54209ff23fSmrg *
55209ff23fSmrg * Notes on unimplemented XAA optimizations:
56209ff23fSmrg *
57209ff23fSmrg *   SetClipping:   This has been removed as XAA expects 16bit registers
58209ff23fSmrg *                  for full clipping.
59209ff23fSmrg *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60209ff23fSmrg *   DashedLine with non-power-of-two pattern length: Apparently, there is
61209ff23fSmrg *                  no way to set the length of the pattern -- it is always
62209ff23fSmrg *                  assumed to be 8 or 32 (or 1024?).
63209ff23fSmrg *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64209ff23fSmrg *                  Manual where it states that monochrome expansion of frame
65209ff23fSmrg *                  buffer data is not supported.
66209ff23fSmrg *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67209ff23fSmrg *                  direct/indirect method.  If we had more data registers,
68209ff23fSmrg *                  then we could do better.  If XAA supported a trigger write
69209ff23fSmrg *                  address, the code would be simpler.
70209ff23fSmrg *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71209ff23fSmrg *                  pattern from frame buffer memory.
72209ff23fSmrg *   ImageWrites:   Same as CPUToScreenColorExpandFill
73209ff23fSmrg *
74209ff23fSmrg */
75209ff23fSmrg
76209ff23fSmrg#include <errno.h>
77209ff23fSmrg#include <string.h>
78921a55d8Smrg#include <assert.h>
79209ff23fSmrg				/* Driver data structures */
80209ff23fSmrg#include "radeon.h"
81209ff23fSmrg#include "radeon_reg.h"
82b7e1c893Smrg#include "r600_reg.h"
83209ff23fSmrg#include "radeon_macros.h"
84209ff23fSmrg#include "radeon_probe.h"
85209ff23fSmrg#include "radeon_version.h"
86209ff23fSmrg#ifdef XF86DRI
87209ff23fSmrg#define _XF86DRI_SERVER_
88b7e1c893Smrg#include "radeon_drm.h"
89209ff23fSmrg#endif
90209ff23fSmrg
91c503f109Smrg#include "ati_pciids_gen.h"
92c503f109Smrg
93209ff23fSmrg				/* Line support */
94209ff23fSmrg#include "miline.h"
95209ff23fSmrg
96209ff23fSmrg				/* X and server generic header files */
97209ff23fSmrg#include "xf86.h"
98209ff23fSmrg
99b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn);
100209ff23fSmrg
101209ff23fSmrg#ifdef USE_XAA
102209ff23fSmrgstatic struct {
103209ff23fSmrg    int rop;
104209ff23fSmrg    int pattern;
105209ff23fSmrg} RADEON_ROP[] = {
106209ff23fSmrg    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
107209ff23fSmrg    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
108209ff23fSmrg    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
109209ff23fSmrg    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
110209ff23fSmrg    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
111209ff23fSmrg    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
112209ff23fSmrg    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
113209ff23fSmrg    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
114209ff23fSmrg    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
115209ff23fSmrg    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
116209ff23fSmrg    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
117209ff23fSmrg    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
118209ff23fSmrg    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
119209ff23fSmrg    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
120209ff23fSmrg    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
121209ff23fSmrg    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
122209ff23fSmrg};
123209ff23fSmrg#endif
124209ff23fSmrg
125209ff23fSmrg/* The FIFO has 64 slots.  This routines waits until at least `entries'
126209ff23fSmrg * of these slots are empty.
127209ff23fSmrg */
128209ff23fSmrgvoid RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
129209ff23fSmrg{
130209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
131209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
132209ff23fSmrg    int            i;
133209ff23fSmrg
134209ff23fSmrg    for (;;) {
135209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
136b7e1c893Smrg	    info->accel_state->fifo_slots =
137209ff23fSmrg		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
138b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
139209ff23fSmrg	}
140209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
141209ff23fSmrg		       "FIFO timed out: %u entries, stat=0x%08x\n",
142209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
143209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS));
144209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
145209ff23fSmrg		   "FIFO timed out, resetting engine...\n");
146209ff23fSmrg	RADEONEngineReset(pScrn);
147209ff23fSmrg	RADEONEngineRestore(pScrn);
148209ff23fSmrg#ifdef XF86DRI
149209ff23fSmrg	if (info->directRenderingEnabled) {
150209ff23fSmrg	    RADEONCP_RESET(pScrn, info);
151209ff23fSmrg	    RADEONCP_START(pScrn, info);
152209ff23fSmrg	}
153209ff23fSmrg#endif
154209ff23fSmrg    }
155209ff23fSmrg}
156209ff23fSmrg
157b7e1c893Smrgvoid R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
158b7e1c893Smrg{
159b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
160b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
161b7e1c893Smrg    int            i;
162b7e1c893Smrg
163b7e1c893Smrg    for (;;) {
164b7e1c893Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
165b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
166b7e1c893Smrg		info->accel_state->fifo_slots =
167b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
168b7e1c893Smrg	    else
169b7e1c893Smrg		info->accel_state->fifo_slots =
170b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
171b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
172b7e1c893Smrg	}
173b7e1c893Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
174b7e1c893Smrg		       "FIFO timed out: stat=0x%08x\n",
175b7e1c893Smrg		       (unsigned int)INREG(R600_GRBM_STATUS));
176b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
177b7e1c893Smrg		   "FIFO timed out, resetting engine...\n");
178b7e1c893Smrg	R600EngineReset(pScrn);
179b7e1c893Smrg#ifdef XF86DRI
180b7e1c893Smrg	if (info->directRenderingEnabled) {
181b7e1c893Smrg	    RADEONCP_RESET(pScrn, info);
182b7e1c893Smrg	    RADEONCP_START(pScrn, info);
183b7e1c893Smrg	}
184b7e1c893Smrg#endif
185b7e1c893Smrg    }
186b7e1c893Smrg}
187b7e1c893Smrg
188209ff23fSmrg/* Flush all dirty data in the Pixel Cache to memory */
189209ff23fSmrgvoid RADEONEngineFlush(ScrnInfoPtr pScrn)
190209ff23fSmrg{
191209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
192209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
193209ff23fSmrg    int            i;
194209ff23fSmrg
195209ff23fSmrg    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
196209ff23fSmrg	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
197209ff23fSmrg		RADEON_RB3D_DC_FLUSH_ALL,
198209ff23fSmrg		~RADEON_RB3D_DC_FLUSH_ALL);
199209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
200209ff23fSmrg	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
201209ff23fSmrg		break;
202209ff23fSmrg	}
203209ff23fSmrg	if (i == RADEON_TIMEOUT) {
204209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
205209ff23fSmrg			   "DC flush timeout: %x\n",
206209ff23fSmrg			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
207209ff23fSmrg	}
208209ff23fSmrg    } else {
209209ff23fSmrg	OUTREGP(R300_DSTCACHE_CTLSTAT,
210209ff23fSmrg		R300_RB2D_DC_FLUSH_ALL,
211209ff23fSmrg		~R300_RB2D_DC_FLUSH_ALL);
212209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
213209ff23fSmrg	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
214209ff23fSmrg		break;
215209ff23fSmrg	}
216209ff23fSmrg	if (i == RADEON_TIMEOUT) {
217209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
218209ff23fSmrg			   "DC flush timeout: %x\n",
219209ff23fSmrg			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
220209ff23fSmrg	}
221209ff23fSmrg    }
222209ff23fSmrg}
223209ff23fSmrg
224209ff23fSmrg/* Reset graphics card to known state */
225209ff23fSmrgvoid RADEONEngineReset(ScrnInfoPtr pScrn)
226209ff23fSmrg{
227209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
228209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
229209ff23fSmrg    uint32_t       clock_cntl_index;
230209ff23fSmrg    uint32_t       mclk_cntl;
231209ff23fSmrg    uint32_t       rbbm_soft_reset;
232209ff23fSmrg    uint32_t       host_path_cntl;
233209ff23fSmrg
234209ff23fSmrg    /* The following RBBM_SOFT_RESET sequence can help un-wedge
235209ff23fSmrg     * an R300 after the command processor got stuck.
236209ff23fSmrg     */
237209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
238209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
239209ff23fSmrg                                   RADEON_SOFT_RESET_CP |
240209ff23fSmrg                                   RADEON_SOFT_RESET_HI |
241209ff23fSmrg                                   RADEON_SOFT_RESET_SE |
242209ff23fSmrg                                   RADEON_SOFT_RESET_RE |
243209ff23fSmrg                                   RADEON_SOFT_RESET_PP |
244209ff23fSmrg                                   RADEON_SOFT_RESET_E2 |
245209ff23fSmrg                                   RADEON_SOFT_RESET_RB));
246209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
247209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
248209ff23fSmrg                                   ~(RADEON_SOFT_RESET_CP |
249209ff23fSmrg                                     RADEON_SOFT_RESET_HI |
250209ff23fSmrg                                     RADEON_SOFT_RESET_SE |
251209ff23fSmrg                                     RADEON_SOFT_RESET_RE |
252209ff23fSmrg                                     RADEON_SOFT_RESET_PP |
253209ff23fSmrg                                     RADEON_SOFT_RESET_E2 |
254209ff23fSmrg                                     RADEON_SOFT_RESET_RB)));
255209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
256209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
257209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
258209ff23fSmrg
259209ff23fSmrg    RADEONEngineFlush(pScrn);
260209ff23fSmrg
261209ff23fSmrg    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
262209ff23fSmrg    RADEONPllErrataAfterIndex(info);
263209ff23fSmrg
264209ff23fSmrg#if 0 /* taken care of by new PM code */
265209ff23fSmrg    /* Some ASICs have bugs with dynamic-on feature, which are
266209ff23fSmrg     * ASIC-version dependent, so we force all blocks on for now
267209ff23fSmrg     */
268209ff23fSmrg    if (info->HasCRTC2) {
269209ff23fSmrg	uint32_t tmp;
270209ff23fSmrg
271209ff23fSmrg	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
272209ff23fSmrg	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
273209ff23fSmrg				  RADEON_CP_MAX_DYN_STOP_LAT |
274209ff23fSmrg				  RADEON_SCLK_FORCEON_MASK));
275209ff23fSmrg
276209ff23fSmrg	if (info->ChipFamily == CHIP_FAMILY_RV200) {
277209ff23fSmrg	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
278209ff23fSmrg	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
279209ff23fSmrg	}
280209ff23fSmrg    }
281209ff23fSmrg#endif /* new PM code */
282209ff23fSmrg
283209ff23fSmrg    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
284209ff23fSmrg
285209ff23fSmrg#if 0 /* handled by new PM code */
286209ff23fSmrg    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
287209ff23fSmrg			      RADEON_FORCEON_MCLKA |
288209ff23fSmrg			      RADEON_FORCEON_MCLKB |
289209ff23fSmrg			      RADEON_FORCEON_YCLKA |
290209ff23fSmrg			      RADEON_FORCEON_YCLKB |
291209ff23fSmrg			      RADEON_FORCEON_MC |
292209ff23fSmrg			      RADEON_FORCEON_AIC));
293209ff23fSmrg#endif /* new PM code */
294209ff23fSmrg
295209ff23fSmrg    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
296209ff23fSmrg     * unexpected behaviour on some machines.  Here we use
297209ff23fSmrg     * RADEON_HOST_PATH_CNTL to reset it.
298209ff23fSmrg     */
299209ff23fSmrg    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
300209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
301209ff23fSmrg
302209ff23fSmrg    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
303209ff23fSmrg	uint32_t tmp;
304209ff23fSmrg
305209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
306209ff23fSmrg					RADEON_SOFT_RESET_CP |
307209ff23fSmrg					RADEON_SOFT_RESET_HI |
308209ff23fSmrg					RADEON_SOFT_RESET_E2));
309209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
310209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
311209ff23fSmrg	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
312209ff23fSmrg	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
313209ff23fSmrg    } else {
314209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
315209ff23fSmrg					RADEON_SOFT_RESET_CP |
316209ff23fSmrg					RADEON_SOFT_RESET_SE |
317209ff23fSmrg					RADEON_SOFT_RESET_RE |
318209ff23fSmrg					RADEON_SOFT_RESET_PP |
319209ff23fSmrg					RADEON_SOFT_RESET_E2 |
320209ff23fSmrg					RADEON_SOFT_RESET_RB));
321209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
322209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
323209ff23fSmrg					~(RADEON_SOFT_RESET_CP |
324209ff23fSmrg					  RADEON_SOFT_RESET_SE |
325209ff23fSmrg					  RADEON_SOFT_RESET_RE |
326209ff23fSmrg					  RADEON_SOFT_RESET_PP |
327209ff23fSmrg					  RADEON_SOFT_RESET_E2 |
328209ff23fSmrg					  RADEON_SOFT_RESET_RB)));
329209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
330209ff23fSmrg    }
331209ff23fSmrg
332209ff23fSmrg    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
333209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
334209ff23fSmrg
335209ff23fSmrg    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
336209ff23fSmrg    RADEONPllErrataAfterIndex(info);
337209ff23fSmrg    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
338209ff23fSmrg}
339209ff23fSmrg
340b7e1c893Smrg/* Reset graphics card to known state */
341b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn)
342b7e1c893Smrg{
343b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
344b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
345b7e1c893Smrg    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
346b7e1c893Smrg
347b7e1c893Smrg    cp_ptr = INREG(R600_CP_RB_WPTR);
348b7e1c893Smrg
349b7e1c893Smrg    cp_me_cntl = INREG(R600_CP_ME_CNTL);
350b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, 0x10000000);
351b7e1c893Smrg
352b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
353b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
354b7e1c893Smrg    usleep (50);
355b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0);
356b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
357b7e1c893Smrg
358b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
359b7e1c893Smrg    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
360b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, 0x80000000);
361b7e1c893Smrg
362b7e1c893Smrg    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
363b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR, cp_ptr);
364b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
365b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
366b7e1c893Smrg
367b7e1c893Smrg}
368b7e1c893Smrg
369209ff23fSmrg/* Restore the acceleration hardware to its previous state */
370209ff23fSmrgvoid RADEONEngineRestore(ScrnInfoPtr pScrn)
371209ff23fSmrg{
372209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
373209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
374209ff23fSmrg
375ad43ddacSmrg    if (info->cs)
376ad43ddacSmrg      return;
377ad43ddacSmrg
378209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
379209ff23fSmrg		   "EngineRestore (%d/%d)\n",
380209ff23fSmrg		   info->CurrentLayout.pixel_code,
381209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
382209ff23fSmrg
383209ff23fSmrg    /* Setup engine location. This shouldn't be necessary since we
384209ff23fSmrg     * set them appropriately before any accel ops, but let's avoid
385209ff23fSmrg     * random bogus DMA in case we inadvertently trigger the engine
386209ff23fSmrg     * in the wrong place (happened).
387209ff23fSmrg     */
388209ff23fSmrg    RADEONWaitForFifo(pScrn, 2);
389b7e1c893Smrg    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390b7e1c893Smrg    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
391209ff23fSmrg
392209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
393209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
394209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE,
395209ff23fSmrg	    RADEON_HOST_BIG_ENDIAN_EN,
396209ff23fSmrg	    ~RADEON_HOST_BIG_ENDIAN_EN);
397209ff23fSmrg#else
398209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
399209ff23fSmrg#endif
400209ff23fSmrg
401209ff23fSmrg    /* Restore SURFACE_CNTL */
402209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
403209ff23fSmrg
404209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
405209ff23fSmrg    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
406209ff23fSmrg					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
407209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
408b7e1c893Smrg    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
409209ff23fSmrg				       | RADEON_GMC_BRUSH_SOLID_COLOR
410209ff23fSmrg				       | RADEON_GMC_SRC_DATATYPE_COLOR));
411209ff23fSmrg
412209ff23fSmrg    RADEONWaitForFifo(pScrn, 5);
413209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
414209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
415209ff23fSmrg    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
416209ff23fSmrg    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
417209ff23fSmrg    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
418209ff23fSmrg
419209ff23fSmrg    RADEONWaitForIdleMMIO(pScrn);
420209ff23fSmrg
421b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
422209ff23fSmrg}
423209ff23fSmrg
424ad43ddacSmrgstatic int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
425ad43ddacSmrg{
426ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
427ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_major < 2) {
428ad43ddacSmrg        drm_radeon_getparam_t np;
429ad43ddacSmrg
430ad43ddacSmrg        memset(&np, 0, sizeof(np));
431ad43ddacSmrg        np.param = RADEON_PARAM_NUM_GB_PIPES;
432ad43ddacSmrg        np.value = num_pipes;
433ad43ddacSmrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
434ad43ddacSmrg    } else {
435ad43ddacSmrg        struct drm_radeon_info np2;
436ad43ddacSmrg        np2.value = (unsigned long)num_pipes;
437ad43ddacSmrg        np2.request = RADEON_INFO_NUM_GB_PIPES;
438ad43ddacSmrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
439ad43ddacSmrg    }
440ad43ddacSmrg}
441ad43ddacSmrg
442209ff23fSmrg/* Initialize the acceleration hardware */
443209ff23fSmrgvoid RADEONEngineInit(ScrnInfoPtr pScrn)
444209ff23fSmrg{
445209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
446209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
447b7e1c893Smrg    int datatype = 0;
448b7e1c893Smrg    info->accel_state->num_gb_pipes = 0;
449209ff23fSmrg
450209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
451209ff23fSmrg		   "EngineInit (%d/%d)\n",
452209ff23fSmrg		   info->CurrentLayout.pixel_code,
453209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
454209ff23fSmrg
455209ff23fSmrg#ifdef XF86DRI
456209ff23fSmrg    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
457209ff23fSmrg	int num_pipes;
458209ff23fSmrg
459ad43ddacSmrg	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
460209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
461209ff23fSmrg		       "Failed to determine num pipes from DRM, falling back to "
462209ff23fSmrg		       "manual look-up!\n");
463b7e1c893Smrg	    info->accel_state->num_gb_pipes = 0;
464209ff23fSmrg	} else {
465b7e1c893Smrg	    info->accel_state->num_gb_pipes = num_pipes;
466209ff23fSmrg	}
467209ff23fSmrg    }
468209ff23fSmrg#endif
469209ff23fSmrg
470ad43ddacSmrg    if (!info->cs) {
471ad43ddacSmrg	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
472ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
473ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
474ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
475ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
476ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
477ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
478ad43ddacSmrg	    IS_R500_3D) {
479ad43ddacSmrg	    if (info->accel_state->num_gb_pipes == 0) {
480ad43ddacSmrg		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
481ad43ddacSmrg
482ad43ddacSmrg		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
483ad43ddacSmrg		if (IS_R500_3D)
484ad43ddacSmrg		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
485ad43ddacSmrg	    }
486ad43ddacSmrg	} else {
487ad43ddacSmrg	    if (info->accel_state->num_gb_pipes == 0) {
488ad43ddacSmrg		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
489ad43ddacSmrg		    (info->ChipFamily == CHIP_FAMILY_R350)) {
490ad43ddacSmrg		    /* R3xx chips */
491ad43ddacSmrg		    info->accel_state->num_gb_pipes = 2;
492ad43ddacSmrg		} else {
493ad43ddacSmrg		    /* RV3xx chips */
494ad43ddacSmrg		    info->accel_state->num_gb_pipes = 1;
495ad43ddacSmrg		}
496209ff23fSmrg	    }
497209ff23fSmrg	}
498209ff23fSmrg
4992f39173dSmrg	/* SE cards only have 1 quadpipe */
500ad43ddacSmrg	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
5012f39173dSmrg	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
5022f39173dSmrg	    (info->Chipset == PCI_CHIP_R300_AD) ||
5032f39173dSmrg	    (info->Chipset == PCI_CHIP_R350_AH))
504ad43ddacSmrg	    info->accel_state->num_gb_pipes = 1;
505ad43ddacSmrg
506ad43ddacSmrg	if (IS_R300_3D || IS_R500_3D)
507ad43ddacSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
508ad43ddacSmrg		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
509ad43ddacSmrg
510ad43ddacSmrg	if (IS_R300_3D || IS_R500_3D) {
511ad43ddacSmrg	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
512ad43ddacSmrg
513ad43ddacSmrg	    switch(info->accel_state->num_gb_pipes) {
514ad43ddacSmrg	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
515ad43ddacSmrg	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
516ad43ddacSmrg	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
517ad43ddacSmrg	    default:
518ad43ddacSmrg	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
519ad43ddacSmrg	    }
520209ff23fSmrg
521ad43ddacSmrg	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
522ad43ddacSmrg	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
523ad43ddacSmrg	    if (info->ChipFamily >= CHIP_FAMILY_R420)
524ad43ddacSmrg		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
525ad43ddacSmrg	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
526ad43ddacSmrg					     R300_DC_AUTOFLUSH_ENABLE |
527ad43ddacSmrg					     R300_DC_DC_DISABLE_IGNORE_PE));
528ad43ddacSmrg	} else
529ad43ddacSmrg	    OUTREG(RADEON_RB3D_CNTL, 0);
530ad43ddacSmrg
531ad43ddacSmrg	RADEONEngineReset(pScrn);
532ad43ddacSmrg    }
533209ff23fSmrg
534209ff23fSmrg    switch (info->CurrentLayout.pixel_code) {
535b7e1c893Smrg    case 8:  datatype = 2; break;
536b7e1c893Smrg    case 15: datatype = 3; break;
537b7e1c893Smrg    case 16: datatype = 4; break;
538b7e1c893Smrg    case 24: datatype = 5; break;
539b7e1c893Smrg    case 32: datatype = 6; break;
540209ff23fSmrg    default:
541209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
542209ff23fSmrg		       "Unknown depth/bpp = %d/%d (code = %d)\n",
543209ff23fSmrg		       info->CurrentLayout.depth,
544209ff23fSmrg		       info->CurrentLayout.bitsPerPixel,
545209ff23fSmrg		       info->CurrentLayout.pixel_code);
546209ff23fSmrg    }
547209ff23fSmrg
548b7e1c893Smrg    info->accel_state->dp_gui_master_cntl =
549b7e1c893Smrg	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
550209ff23fSmrg	 | RADEON_GMC_CLR_CMP_CNTL_DIS
551209ff23fSmrg	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
552209ff23fSmrg
553209ff23fSmrg    RADEONEngineRestore(pScrn);
554209ff23fSmrg}
555209ff23fSmrg
556ad43ddacSmrguint32_t radeonGetPixmapOffset(PixmapPtr pPix)
557ad43ddacSmrg{
55868105dcbSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
559ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
560ad43ddacSmrg    uint32_t offset = 0;
561ad43ddacSmrg    if (info->cs)
562ad43ddacSmrg	return 0;
563ad43ddacSmrg#ifdef USE_EXA
564ad43ddacSmrg    if (info->useEXA) {
565ad43ddacSmrg	offset = exaGetPixmapOffset(pPix);
566ad43ddacSmrg    } else
567ad43ddacSmrg#endif
568ad43ddacSmrg    {
569ad43ddacSmrg	offset = pPix->devPrivate.ptr - info->FB;
570ad43ddacSmrg    }
571ad43ddacSmrg    offset += info->fbLocation + pScrn->fbOffset;
572ad43ddacSmrg    return offset;
573ad43ddacSmrg}
574209ff23fSmrg
5752f39173dSmrgint radeon_cs_space_remaining(ScrnInfoPtr pScrn)
5762f39173dSmrg{
5772f39173dSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
5782f39173dSmrg
5792f39173dSmrg#ifdef XF86DRM_MODE
5802f39173dSmrg    if (info->cs)
5812f39173dSmrg	return (info->cs->ndw - info->cs->cdw);
5822f39173dSmrg    else
5832f39173dSmrg#endif
5842f39173dSmrg        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
5852f39173dSmrg}
5862f39173dSmrg
587209ff23fSmrg#define ACCEL_MMIO
588209ff23fSmrg#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
589209ff23fSmrg#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
590209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
591209ff23fSmrg#define FINISH_ACCEL()
592209ff23fSmrg
593209ff23fSmrg#include "radeon_commonfuncs.c"
594209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
595209ff23fSmrg#include "radeon_render.c"
596209ff23fSmrg#endif
597209ff23fSmrg#include "radeon_accelfuncs.c"
598209ff23fSmrg
599209ff23fSmrg#undef ACCEL_MMIO
600209ff23fSmrg#undef ACCEL_PREAMBLE
601209ff23fSmrg#undef BEGIN_ACCEL
602209ff23fSmrg#undef OUT_ACCEL_REG
603209ff23fSmrg#undef FINISH_ACCEL
604209ff23fSmrg
605209ff23fSmrg#ifdef XF86DRI
606209ff23fSmrg
607209ff23fSmrg#define ACCEL_CP
608209ff23fSmrg#define ACCEL_PREAMBLE()						\
609209ff23fSmrg    RING_LOCALS;							\
610209ff23fSmrg    RADEONCP_REFRESH(pScrn, info)
611209ff23fSmrg#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
612209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
613209ff23fSmrg#define FINISH_ACCEL()          ADVANCE_RING()
614209ff23fSmrg
615209ff23fSmrg
616209ff23fSmrg#include "radeon_commonfuncs.c"
617209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
618209ff23fSmrg#include "radeon_render.c"
619209ff23fSmrg#endif
620209ff23fSmrg#include "radeon_accelfuncs.c"
621209ff23fSmrg
622209ff23fSmrg#undef ACCEL_CP
623209ff23fSmrg#undef ACCEL_PREAMBLE
624209ff23fSmrg#undef BEGIN_ACCEL
625209ff23fSmrg#undef OUT_ACCEL_REG
626209ff23fSmrg#undef FINISH_ACCEL
627209ff23fSmrg
628209ff23fSmrg/* Stop the CP */
629209ff23fSmrgint RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
630209ff23fSmrg{
631b7e1c893Smrg    drm_radeon_cp_stop_t  stop;
632209ff23fSmrg    int              ret, i;
633209ff23fSmrg
634209ff23fSmrg    stop.flush = 1;
635209ff23fSmrg    stop.idle  = 1;
636209ff23fSmrg
637b7e1c893Smrg    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
638b7e1c893Smrg			  sizeof(drm_radeon_cp_stop_t));
639209ff23fSmrg
640209ff23fSmrg    if (ret == 0) {
641209ff23fSmrg	return 0;
642209ff23fSmrg    } else if (errno != EBUSY) {
643209ff23fSmrg	return -errno;
644209ff23fSmrg    }
645209ff23fSmrg
646209ff23fSmrg    stop.flush = 0;
647209ff23fSmrg
648209ff23fSmrg    i = 0;
649209ff23fSmrg    do {
650b7e1c893Smrg	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
651b7e1c893Smrg			      sizeof(drm_radeon_cp_stop_t));
652209ff23fSmrg    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
653209ff23fSmrg
654209ff23fSmrg    if (ret == 0) {
655209ff23fSmrg	return 0;
656209ff23fSmrg    } else if (errno != EBUSY) {
657209ff23fSmrg	return -errno;
658209ff23fSmrg    }
659209ff23fSmrg
660209ff23fSmrg    stop.idle = 0;
661209ff23fSmrg
662b7e1c893Smrg    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
663b7e1c893Smrg			&stop, sizeof(drm_radeon_cp_stop_t))) {
664209ff23fSmrg	return -errno;
665209ff23fSmrg    } else {
666209ff23fSmrg	return 0;
667209ff23fSmrg    }
668209ff23fSmrg}
669209ff23fSmrg
670ad43ddacSmrg#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
671ad43ddacSmrg
672209ff23fSmrg/* Get an indirect buffer for the CP 2D acceleration commands  */
673209ff23fSmrgdrmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
674209ff23fSmrg{
675209ff23fSmrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
676209ff23fSmrg    drmDMAReq      dma;
677209ff23fSmrg    drmBufPtr      buf = NULL;
678209ff23fSmrg    int            indx = 0;
679209ff23fSmrg    int            size = 0;
680209ff23fSmrg    int            i = 0;
681209ff23fSmrg    int            ret;
682209ff23fSmrg
683209ff23fSmrg#if 0
684209ff23fSmrg    /* FIXME: pScrn->pScreen has not been initialized when this is first
685209ff23fSmrg     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
686209ff23fSmrg     * the screen index from pScrn, which is initialized, and then get
687209ff23fSmrg     * the screen from screenInfo.screens[index], but that is a hack.
688209ff23fSmrg     */
689209ff23fSmrg    dma.context = DRIGetContext(pScrn->pScreen);
690209ff23fSmrg#else
691209ff23fSmrg    /* This is the X server's context */
692209ff23fSmrg    dma.context = 0x00000001;
693209ff23fSmrg#endif
694209ff23fSmrg
695209ff23fSmrg    dma.send_count    = 0;
696209ff23fSmrg    dma.send_list     = NULL;
697209ff23fSmrg    dma.send_sizes    = NULL;
698209ff23fSmrg    dma.flags         = 0;
699209ff23fSmrg    dma.request_count = 1;
700209ff23fSmrg    dma.request_size  = RADEON_BUFFER_SIZE;
701209ff23fSmrg    dma.request_list  = &indx;
702209ff23fSmrg    dma.request_sizes = &size;
703209ff23fSmrg    dma.granted_count = 0;
704209ff23fSmrg
705209ff23fSmrg    while (1) {
706209ff23fSmrg	do {
707b7e1c893Smrg	    ret = drmDMA(info->dri->drmFD, &dma);
708209ff23fSmrg	    if (ret && ret != -EBUSY) {
709209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
710209ff23fSmrg			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
711209ff23fSmrg	    }
712209ff23fSmrg	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
713209ff23fSmrg
714209ff23fSmrg	if (ret == 0) {
715b7e1c893Smrg	    buf = &info->dri->buffers->list[indx];
716209ff23fSmrg	    buf->used = 0;
717209ff23fSmrg	    if (RADEON_VERBOSE) {
718209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
719209ff23fSmrg			   "   GetBuffer returning %d %p\n",
720209ff23fSmrg			   buf->idx, buf->address);
721209ff23fSmrg	    }
722209ff23fSmrg	    return buf;
723209ff23fSmrg	}
724209ff23fSmrg
725209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
726209ff23fSmrg		   "GetBuffer timed out, resetting engine...\n");
727b7e1c893Smrg
728b7e1c893Smrg	if (info->ChipFamily < CHIP_FAMILY_R600) {
729b7e1c893Smrg	    RADEONEngineReset(pScrn);
730b7e1c893Smrg	    RADEONEngineRestore(pScrn);
731b7e1c893Smrg	} else
732b7e1c893Smrg	    R600EngineReset(pScrn);
733209ff23fSmrg
734209ff23fSmrg	/* Always restart the engine when doing CP 2D acceleration */
735209ff23fSmrg	RADEONCP_RESET(pScrn, info);
736209ff23fSmrg	RADEONCP_START(pScrn, info);
737209ff23fSmrg    }
738209ff23fSmrg}
739209ff23fSmrg
740209ff23fSmrg/* Flush the indirect buffer to the kernel for submission to the card */
741209ff23fSmrgvoid RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
742209ff23fSmrg{
743209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
744b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
745b7e1c893Smrg    int                start  = info->cp->indirectStart;
746b7e1c893Smrg    drm_radeon_indirect_t  indirect;
747209ff23fSmrg
748ad43ddacSmrg    assert(!info->cs);
749209ff23fSmrg    if (!buffer) return;
750209ff23fSmrg    if (start == buffer->used && !discard) return;
751209ff23fSmrg
752209ff23fSmrg    if (RADEON_VERBOSE) {
753209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
754209ff23fSmrg		   buffer->idx);
755209ff23fSmrg    }
756209ff23fSmrg
757b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
758b7e1c893Smrg	if (buffer->used & 0x3c) {
759b7e1c893Smrg	    RING_LOCALS;
760b7e1c893Smrg
761b7e1c893Smrg	    while (buffer->used & 0x3c) {
762b7e1c893Smrg		BEGIN_RING(1);
763b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
764b7e1c893Smrg		ADVANCE_RING();
765b7e1c893Smrg	    }
766b7e1c893Smrg	}
767b7e1c893Smrg    }
768b7e1c893Smrg
769209ff23fSmrg    indirect.idx     = buffer->idx;
770209ff23fSmrg    indirect.start   = start;
771209ff23fSmrg    indirect.end     = buffer->used;
772209ff23fSmrg    indirect.discard = discard;
773209ff23fSmrg
774b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
775b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
776209ff23fSmrg
777209ff23fSmrg    if (discard) {
778b7e1c893Smrg	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
779b7e1c893Smrg	info->cp->indirectStart  = 0;
780209ff23fSmrg    } else {
781209ff23fSmrg	/* Start on a double word boundary */
782ad43ddacSmrg	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
783209ff23fSmrg	if (RADEON_VERBOSE) {
784209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
785b7e1c893Smrg		       info->cp->indirectStart);
786209ff23fSmrg	}
787209ff23fSmrg    }
788209ff23fSmrg}
789209ff23fSmrg
790209ff23fSmrg/* Flush and release the indirect buffer */
791209ff23fSmrgvoid RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
792209ff23fSmrg{
793209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
794b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
795b7e1c893Smrg    int                start  = info->cp->indirectStart;
796b7e1c893Smrg    drm_radeon_indirect_t  indirect;
797b7e1c893Smrg
798ad43ddacSmrg    assert(!info->cs);
799b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
800b7e1c893Smrg	if (buffer && (buffer->used & 0x3c)) {
801b7e1c893Smrg	    RING_LOCALS;
802b7e1c893Smrg
803b7e1c893Smrg	    while (buffer->used & 0x3c) {
804b7e1c893Smrg		BEGIN_RING(1);
805b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
806b7e1c893Smrg		ADVANCE_RING();
807b7e1c893Smrg	    }
808b7e1c893Smrg	}
809b7e1c893Smrg    }
810209ff23fSmrg
811b7e1c893Smrg    info->cp->indirectBuffer = NULL;
812b7e1c893Smrg    info->cp->indirectStart  = 0;
813209ff23fSmrg
814209ff23fSmrg    if (!buffer) return;
815209ff23fSmrg
816209ff23fSmrg    if (RADEON_VERBOSE) {
817209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
818209ff23fSmrg		   buffer->idx);
819209ff23fSmrg    }
820209ff23fSmrg
821209ff23fSmrg    indirect.idx     = buffer->idx;
822209ff23fSmrg    indirect.start   = start;
823209ff23fSmrg    indirect.end     = buffer->used;
824209ff23fSmrg    indirect.discard = 1;
825209ff23fSmrg
826b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
827b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
828209ff23fSmrg}
829209ff23fSmrg
830209ff23fSmrg/** \brief Calculate HostDataBlit parameters from pointer and pitch
831209ff23fSmrg *
832209ff23fSmrg * This is a helper for the trivial HostDataBlit users that don't need to worry
833209ff23fSmrg * about tiling etc.
834209ff23fSmrg */
835209ff23fSmrgvoid
836209ff23fSmrgRADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
837209ff23fSmrg		     uint32_t *dstPitchOff, int *x, int *y)
838209ff23fSmrg{
839209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
840209ff23fSmrg    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
841209ff23fSmrg
842209ff23fSmrg    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
843209ff23fSmrg    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
844209ff23fSmrg    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
845209ff23fSmrg}
846209ff23fSmrg
847209ff23fSmrg/* Set up a hostdata blit to transfer data from system memory to the
848209ff23fSmrg * framebuffer. Returns the address where the data can be written to and sets
849209ff23fSmrg * the dstPitch and hpass variables as required.
850209ff23fSmrg */
851209ff23fSmrguint8_t*
852209ff23fSmrgRADEONHostDataBlit(
853209ff23fSmrg    ScrnInfoPtr pScrn,
854209ff23fSmrg    unsigned int cpp,
855209ff23fSmrg    unsigned int w,
856209ff23fSmrg    uint32_t dstPitchOff,
857209ff23fSmrg    uint32_t *bufPitch,
858209ff23fSmrg    int x,
859209ff23fSmrg    int *y,
860209ff23fSmrg    unsigned int *h,
861209ff23fSmrg    unsigned int *hpass
862209ff23fSmrg){
863209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
864209ff23fSmrg    uint32_t format, dwords;
865209ff23fSmrg    uint8_t *ret;
866209ff23fSmrg    RING_LOCALS;
867209ff23fSmrg
868209ff23fSmrg    if ( *h == 0 )
869209ff23fSmrg    {
870209ff23fSmrg	return NULL;
871209ff23fSmrg    }
872209ff23fSmrg
873209ff23fSmrg    switch ( cpp )
874209ff23fSmrg    {
875209ff23fSmrg    case 4:
876209ff23fSmrg	format = RADEON_GMC_DST_32BPP;
877209ff23fSmrg	*bufPitch = 4 * w;
878209ff23fSmrg	break;
879209ff23fSmrg    case 2:
880209ff23fSmrg	format = RADEON_GMC_DST_16BPP;
881ad43ddacSmrg	*bufPitch = 2 * RADEON_ALIGN(w, 2);
882209ff23fSmrg	break;
883209ff23fSmrg    case 1:
884209ff23fSmrg	format = RADEON_GMC_DST_8BPP_CI;
885ad43ddacSmrg	*bufPitch = RADEON_ALIGN(w, 4);
886209ff23fSmrg	break;
887209ff23fSmrg    default:
888209ff23fSmrg	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
889209ff23fSmrg		    "%s: Unsupported cpp %d!\n", __func__, cpp );
890209ff23fSmrg	return NULL;
891209ff23fSmrg    }
892209ff23fSmrg
893209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
894209ff23fSmrg    /* Swap doesn't work on R300 and later, it's handled during the
895209ff23fSmrg     * copy to ind. buffer pass
896209ff23fSmrg     */
897209ff23fSmrg    if (info->ChipFamily < CHIP_FAMILY_R300) {
898209ff23fSmrg        BEGIN_RING(2);
899209ff23fSmrg	if (cpp == 2)
900209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
901209ff23fSmrg			 RADEON_HOST_DATA_SWAP_HDW);
902209ff23fSmrg	else if (cpp == 1)
903209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
904209ff23fSmrg			 RADEON_HOST_DATA_SWAP_32BIT);
905209ff23fSmrg	else
906209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
907209ff23fSmrg			 RADEON_HOST_DATA_SWAP_NONE);
908209ff23fSmrg	ADVANCE_RING();
909209ff23fSmrg    }
910209ff23fSmrg#endif
911209ff23fSmrg
912209ff23fSmrg    /*RADEON_PURGE_CACHE();
913209ff23fSmrg      RADEON_WAIT_UNTIL_IDLE();*/
914209ff23fSmrg
915209ff23fSmrg    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
916209ff23fSmrg    dwords = *hpass * *bufPitch / 4;
917209ff23fSmrg
918209ff23fSmrg    BEGIN_RING( dwords + 10 );
919209ff23fSmrg    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
920209ff23fSmrg    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
921209ff23fSmrg	    | RADEON_GMC_DST_CLIPPING
922209ff23fSmrg	    | RADEON_GMC_BRUSH_NONE
923209ff23fSmrg	    | format
924209ff23fSmrg	    | RADEON_GMC_SRC_DATATYPE_COLOR
925209ff23fSmrg	    | RADEON_ROP3_S
926209ff23fSmrg	    | RADEON_DP_SRC_SOURCE_HOST_DATA
927209ff23fSmrg	    | RADEON_GMC_CLR_CMP_CNTL_DIS
928209ff23fSmrg	    | RADEON_GMC_WR_MSK_DIS );
929209ff23fSmrg    OUT_RING( dstPitchOff );
930209ff23fSmrg    OUT_RING( (*y << 16) | x );
931209ff23fSmrg    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
932209ff23fSmrg    OUT_RING( 0xffffffff );
933209ff23fSmrg    OUT_RING( 0xffffffff );
934209ff23fSmrg    OUT_RING( *y << 16 | x );
935209ff23fSmrg    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
936209ff23fSmrg    OUT_RING( dwords );
937209ff23fSmrg
938209ff23fSmrg    ret = ( uint8_t* )&__head[__count];
939209ff23fSmrg
940209ff23fSmrg    __count += dwords;
941209ff23fSmrg    ADVANCE_RING();
942209ff23fSmrg
943209ff23fSmrg    *y += *hpass;
944209ff23fSmrg    *h -= *hpass;
945209ff23fSmrg
946209ff23fSmrg    return ret;
947209ff23fSmrg}
948209ff23fSmrg
949209ff23fSmrgvoid RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
950209ff23fSmrg{
951209ff23fSmrg    switch(swap) {
952209ff23fSmrg    case RADEON_HOST_DATA_SWAP_HDW:
953209ff23fSmrg        {
954209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
955209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
956209ff23fSmrg	    unsigned int nwords = size >> 2;
957209ff23fSmrg
958209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
959209ff23fSmrg		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
960209ff23fSmrg	    return;
961209ff23fSmrg        }
962209ff23fSmrg    case RADEON_HOST_DATA_SWAP_32BIT:
963209ff23fSmrg        {
964209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
965209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
966209ff23fSmrg	    unsigned int nwords = size >> 2;
967209ff23fSmrg
968209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
969209ff23fSmrg#ifdef __powerpc__
970209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
971209ff23fSmrg#else
972209ff23fSmrg		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
973209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
974209ff23fSmrg#endif
975209ff23fSmrg	    return;
976209ff23fSmrg        }
977209ff23fSmrg    case RADEON_HOST_DATA_SWAP_16BIT:
978209ff23fSmrg        {
979209ff23fSmrg	    unsigned short *d = (unsigned short *)dst;
980209ff23fSmrg	    unsigned short *s = (unsigned short *)src;
981209ff23fSmrg	    unsigned int nwords = size >> 1;
982209ff23fSmrg
983209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
984209ff23fSmrg#ifdef __powerpc__
98568105dcbSveego		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
986209ff23fSmrg#else
98768105dcbSveego	        *d = (*s >> 8) | (*s << 8);
988209ff23fSmrg#endif
989209ff23fSmrg	    return;
990209ff23fSmrg	}
991209ff23fSmrg    }
992209ff23fSmrg    if (src != dst)
993ad43ddacSmrg	memcpy(dst, src, size);
994209ff23fSmrg}
995209ff23fSmrg
996209ff23fSmrg/* Copies a single pass worth of data for a hostdata blit set up by
997209ff23fSmrg * RADEONHostDataBlit().
998209ff23fSmrg */
999209ff23fSmrgvoid
1000209ff23fSmrgRADEONHostDataBlitCopyPass(
1001209ff23fSmrg    ScrnInfoPtr pScrn,
1002209ff23fSmrg    unsigned int cpp,
1003209ff23fSmrg    uint8_t *dst,
1004209ff23fSmrg    uint8_t *src,
1005209ff23fSmrg    unsigned int hpass,
1006209ff23fSmrg    unsigned int dstPitch,
1007209ff23fSmrg    unsigned int srcPitch
1008209ff23fSmrg){
1009209ff23fSmrg
1010209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1011209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
1012209ff23fSmrg#endif
1013209ff23fSmrg
1014209ff23fSmrg    /* RADEONHostDataBlitCopy can return NULL ! */
1015209ff23fSmrg    if( (dst==NULL) || (src==NULL)) return;
1016209ff23fSmrg
1017209ff23fSmrg    if ( dstPitch == srcPitch )
1018209ff23fSmrg    {
1019209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1020209ff23fSmrg        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1021209ff23fSmrg	    switch(cpp) {
1022209ff23fSmrg	    case 1:
1023209ff23fSmrg		RADEONCopySwap(dst, src, hpass * dstPitch,
1024209ff23fSmrg			       RADEON_HOST_DATA_SWAP_32BIT);
1025209ff23fSmrg		return;
1026209ff23fSmrg	    case 2:
1027209ff23fSmrg	        RADEONCopySwap(dst, src, hpass * dstPitch,
1028209ff23fSmrg			       RADEON_HOST_DATA_SWAP_HDW);
1029209ff23fSmrg		return;
1030209ff23fSmrg	    }
1031209ff23fSmrg	}
1032209ff23fSmrg#endif
1033209ff23fSmrg	memcpy( dst, src, hpass * dstPitch );
1034209ff23fSmrg    }
1035209ff23fSmrg    else
1036209ff23fSmrg    {
1037209ff23fSmrg	unsigned int minPitch = min( dstPitch, srcPitch );
1038209ff23fSmrg	while ( hpass-- )
1039209ff23fSmrg	{
1040209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1041209ff23fSmrg            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1042209ff23fSmrg		switch(cpp) {
1043209ff23fSmrg		case 1:
1044209ff23fSmrg		    RADEONCopySwap(dst, src, minPitch,
1045209ff23fSmrg				   RADEON_HOST_DATA_SWAP_32BIT);
1046209ff23fSmrg		    goto next;
1047209ff23fSmrg		case 2:
1048209ff23fSmrg	            RADEONCopySwap(dst, src, minPitch,
1049209ff23fSmrg				   RADEON_HOST_DATA_SWAP_HDW);
1050209ff23fSmrg		    goto next;
1051209ff23fSmrg		}
1052209ff23fSmrg	    }
1053209ff23fSmrg#endif
1054209ff23fSmrg	    memcpy( dst, src, minPitch );
1055209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1056209ff23fSmrg	next:
1057209ff23fSmrg#endif
1058209ff23fSmrg	    src += srcPitch;
1059209ff23fSmrg	    dst += dstPitch;
1060209ff23fSmrg	}
1061209ff23fSmrg    }
1062209ff23fSmrg}
1063209ff23fSmrg
1064209ff23fSmrg#endif
1065209ff23fSmrg
1066209ff23fSmrgBool RADEONAccelInit(ScreenPtr pScreen)
1067209ff23fSmrg{
106868105dcbSveego    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1069209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1070209ff23fSmrg
1071209ff23fSmrg#ifdef USE_EXA
1072209ff23fSmrg    if (info->useEXA) {
1073209ff23fSmrg# ifdef XF86DRI
1074209ff23fSmrg	if (info->directRenderingEnabled) {
1075921a55d8Smrg#ifdef XF86DRM_MODE
1076921a55d8Smrg	    if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
1077921a55d8Smrg		if (!EVERGREENDrawInit(pScreen))
1078921a55d8Smrg		    return FALSE;
1079921a55d8Smrg	    } else
1080921a55d8Smrg#endif
1081921a55d8Smrg	      if (info->ChipFamily >= CHIP_FAMILY_R600) {
1082b7e1c893Smrg		if (!R600DrawInit(pScreen))
1083b7e1c893Smrg		    return FALSE;
1084b7e1c893Smrg	    } else {
1085b7e1c893Smrg		if (!RADEONDrawInitCP(pScreen))
1086b7e1c893Smrg		    return FALSE;
1087b7e1c893Smrg	    }
1088209ff23fSmrg	} else
1089209ff23fSmrg# endif /* XF86DRI */
1090209ff23fSmrg	{
1091b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1092209ff23fSmrg		return FALSE;
1093b7e1c893Smrg	    else {
1094b7e1c893Smrg		if (!RADEONDrawInitMMIO(pScreen))
1095b7e1c893Smrg		    return FALSE;
1096b7e1c893Smrg	    }
1097209ff23fSmrg	}
1098209ff23fSmrg    }
1099209ff23fSmrg#endif /* USE_EXA */
1100209ff23fSmrg#ifdef USE_XAA
1101209ff23fSmrg    if (!info->useEXA) {
1102209ff23fSmrg	XAAInfoRecPtr  a;
1103209ff23fSmrg
1104b7e1c893Smrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
1105b7e1c893Smrg	    return FALSE;
1106b7e1c893Smrg
1107b7e1c893Smrg	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1108209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1109209ff23fSmrg	    return FALSE;
1110209ff23fSmrg	}
1111209ff23fSmrg
1112209ff23fSmrg#ifdef XF86DRI
1113209ff23fSmrg	if (info->directRenderingEnabled)
1114209ff23fSmrg	    RADEONAccelInitCP(pScreen, a);
1115209ff23fSmrg	else
1116209ff23fSmrg#endif /* XF86DRI */
1117209ff23fSmrg	    RADEONAccelInitMMIO(pScreen, a);
1118209ff23fSmrg
1119209ff23fSmrg	RADEONEngineInit(pScrn);
1120209ff23fSmrg
1121209ff23fSmrg	if (!XAAInit(pScreen, a)) {
1122209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1123209ff23fSmrg	    return FALSE;
1124209ff23fSmrg	}
1125209ff23fSmrg    }
1126209ff23fSmrg#endif /* USE_XAA */
1127209ff23fSmrg    return TRUE;
1128209ff23fSmrg}
1129209ff23fSmrg
1130209ff23fSmrgvoid RADEONInit3DEngine(ScrnInfoPtr pScrn)
1131209ff23fSmrg{
1132209ff23fSmrg    RADEONInfoPtr info = RADEONPTR (pScrn);
1133209ff23fSmrg
1134209ff23fSmrg#ifdef XF86DRI
1135209ff23fSmrg    if (info->directRenderingEnabled) {
1136b7e1c893Smrg	drm_radeon_sarea_t *pSAREAPriv;
1137209ff23fSmrg
1138ad43ddacSmrg	if (!info->kms_enabled) {
1139ad43ddacSmrg	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1140ad43ddacSmrg	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1141ad43ddacSmrg	}
1142209ff23fSmrg	RADEONInit3DEngineCP(pScrn);
1143209ff23fSmrg    } else
1144209ff23fSmrg#endif
1145209ff23fSmrg	RADEONInit3DEngineMMIO(pScrn);
1146209ff23fSmrg
1147b7e1c893Smrg    info->accel_state->XInited3D = TRUE;
1148209ff23fSmrg}
1149209ff23fSmrg
1150209ff23fSmrg#ifdef USE_XAA
1151209ff23fSmrg#ifdef XF86DRI
1152209ff23fSmrgBool
115368105dcbSveegoRADEONSetupMemXAA_DRI(ScreenPtr pScreen)
1154209ff23fSmrg{
115568105dcbSveego    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1156209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1157209ff23fSmrg    int            cpp = info->CurrentLayout.pixel_bytes;
1158b7e1c893Smrg    int            depthCpp = (info->dri->depthBits - 8) / 4;
1159209ff23fSmrg    int            width_bytes = pScrn->displayWidth * cpp;
1160209ff23fSmrg    int            bufferSize;
1161209ff23fSmrg    int            depthSize;
1162209ff23fSmrg    int            l;
1163209ff23fSmrg    int            scanlines;
1164209ff23fSmrg    int            texsizerequest;
1165209ff23fSmrg    BoxRec         MemBox;
1166209ff23fSmrg    FBAreaPtr      fbarea;
1167209ff23fSmrg
1168b7e1c893Smrg    info->dri->frontOffset = 0;
1169b7e1c893Smrg    info->dri->frontPitch = pScrn->displayWidth;
1170b7e1c893Smrg    info->dri->backPitch = pScrn->displayWidth;
1171209ff23fSmrg
1172209ff23fSmrg    /* make sure we use 16 line alignment for tiling (8 might be enough).
1173209ff23fSmrg     * Might need that for non-XF86DRI too?
1174209ff23fSmrg     */
1175209ff23fSmrg    if (info->allowColorTiling) {
1176ad43ddacSmrg	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1177ad43ddacSmrg		      RADEON_GPU_PAGE_SIZE);
1178209ff23fSmrg    } else {
1179ad43ddacSmrg        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1180ad43ddacSmrg		      RADEON_GPU_PAGE_SIZE);
1181209ff23fSmrg    }
1182209ff23fSmrg
1183209ff23fSmrg    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1184209ff23fSmrg     * which is always the case if color tiling is used due to color pitch
1185209ff23fSmrg     * but not necessarily otherwise, and its height a multiple of 16 lines.
1186209ff23fSmrg     */
1187ad43ddacSmrg    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1188ad43ddacSmrg    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1189ad43ddacSmrg		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1190209ff23fSmrg
1191209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1192b7e1c893Smrg	       "Using %d MB GART aperture\n", info->dri->gartSize);
1193209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1194b7e1c893Smrg	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1195209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1196b7e1c893Smrg	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1197209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1198b7e1c893Smrg	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1199209ff23fSmrg
1200209ff23fSmrg    /* Try for front, back, depth, and three framebuffers worth of
1201209ff23fSmrg     * pixmap cache.  Should be enough for a fullscreen background
1202209ff23fSmrg     * image plus some leftovers.
1203209ff23fSmrg     * If the FBTexPercent option was used, try to achieve that percentage instead,
1204209ff23fSmrg     * but still have at least one pixmap buffer (get problems with xvideo/render
1205209ff23fSmrg     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1206209ff23fSmrg     * probably useless for XAA.
1207209ff23fSmrg     */
1208b7e1c893Smrg    if (info->dri->textureSize >= 0) {
1209209ff23fSmrg	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1210209ff23fSmrg			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1211209ff23fSmrg	/* first divide, then multiply or we'll get an overflow (been there...) */
1212b7e1c893Smrg			 / 100 * info->dri->textureSize;
1213209ff23fSmrg    }
1214209ff23fSmrg    else {
1215209ff23fSmrg	texsizerequest = (int)info->FbMapSize / 2;
1216209ff23fSmrg    }
1217b7e1c893Smrg    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1218209ff23fSmrg
1219209ff23fSmrg    /* If that gives us less than the requested memory, let's
1220209ff23fSmrg     * be greedy and grab some more.  Sorry, I care more about 3D
1221209ff23fSmrg     * performance than playing nicely, and you'll get around a full
1222209ff23fSmrg     * framebuffer's worth of pixmap cache anyway.
1223209ff23fSmrg     */
1224b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1225b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1226209ff23fSmrg    }
1227b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1228b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1229209ff23fSmrg    }
1230209ff23fSmrg
1231209ff23fSmrg    /* If there's still no space for textures, try without pixmap cache, but
1232209ff23fSmrg     * never use the reserved space, the space hw cursor and PCIGART table might
1233209ff23fSmrg     * use.
1234209ff23fSmrg     */
1235b7e1c893Smrg    if (info->dri->textureSize < 0) {
1236b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1237209ff23fSmrg	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1238209ff23fSmrg    }
1239209ff23fSmrg
1240209ff23fSmrg    /* Check to see if there is more room available after the 8192nd
1241209ff23fSmrg     * scanline for textures
1242209ff23fSmrg     */
1243209ff23fSmrg    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1244209ff23fSmrg    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1245b7e1c893Smrg	> info->dri->textureSize) {
1246b7e1c893Smrg	info->dri->textureSize =
1247209ff23fSmrg		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1248209ff23fSmrg    }
1249209ff23fSmrg
1250209ff23fSmrg    /* If backbuffer is disabled, don't allocate memory for it */
1251b7e1c893Smrg    if (info->dri->noBackBuffer) {
1252b7e1c893Smrg	info->dri->textureSize += bufferSize;
1253209ff23fSmrg    }
1254209ff23fSmrg
1255209ff23fSmrg    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1256209ff23fSmrg       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1257209ff23fSmrg       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1258209ff23fSmrg       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1259209ff23fSmrg       area otherwise).
1260209ff23fSmrg       This might cause some space at the end of the video memory to be unused, since it
1261209ff23fSmrg       can't be used (?) due to that log_tex_granularity thing???
1262209ff23fSmrg       Could use different copyscreentoscreen function for the pageflip copies
1263209ff23fSmrg       (which would use different src and dst offsets) to avoid this. */
1264b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1265b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1266209ff23fSmrg			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1267209ff23fSmrg    }
1268b7e1c893Smrg    if (info->dri->textureSize > 0) {
1269b7e1c893Smrg	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1270209ff23fSmrg	if (l < RADEON_LOG_TEX_GRANULARITY)
1271209ff23fSmrg	    l = RADEON_LOG_TEX_GRANULARITY;
1272209ff23fSmrg	/* Round the texture size up to the nearest whole number of
1273209ff23fSmrg	 * texture regions.  Again, be greedy about this, don't
1274209ff23fSmrg	 * round down.
1275209ff23fSmrg	 */
1276b7e1c893Smrg	info->dri->log2TexGran = l;
1277b7e1c893Smrg	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1278209ff23fSmrg    } else {
1279b7e1c893Smrg	info->dri->textureSize = 0;
1280209ff23fSmrg    }
1281209ff23fSmrg
1282209ff23fSmrg    /* Set a minimum usable local texture heap size.  This will fit
1283209ff23fSmrg     * two 256x256x32bpp textures.
1284209ff23fSmrg     */
1285b7e1c893Smrg    if (info->dri->textureSize < 512 * 1024) {
1286b7e1c893Smrg	info->dri->textureOffset = 0;
1287b7e1c893Smrg	info->dri->textureSize = 0;
1288209ff23fSmrg    }
1289209ff23fSmrg
1290b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1291b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1292b7e1c893Smrg				    (width_bytes * 16)) * (width_bytes * 16);
1293209ff23fSmrg    }
1294209ff23fSmrg    else {
1295209ff23fSmrg	/* Reserve space for textures */
1296ad43ddacSmrg	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1297ad43ddacSmrg				     RADEON_GPU_PAGE_SIZE);
1298209ff23fSmrg    }
1299209ff23fSmrg
1300209ff23fSmrg    /* Reserve space for the shared depth
1301209ff23fSmrg     * buffer.
1302209ff23fSmrg     */
1303ad43ddacSmrg    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1304ad43ddacSmrg			       RADEON_GPU_PAGE_SIZE);
1305209ff23fSmrg
1306209ff23fSmrg    /* Reserve space for the shared back buffer */
1307b7e1c893Smrg    if (info->dri->noBackBuffer) {
1308b7e1c893Smrg       info->dri->backOffset = info->dri->depthOffset;
1309209ff23fSmrg    } else {
1310ad43ddacSmrg       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1311ad43ddacSmrg				 RADEON_GPU_PAGE_SIZE);
1312209ff23fSmrg    }
1313209ff23fSmrg
1314b7e1c893Smrg    info->dri->backY = info->dri->backOffset / width_bytes;
1315b7e1c893Smrg    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1316209ff23fSmrg
1317209ff23fSmrg    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1318209ff23fSmrg    if (scanlines > 8191)
1319209ff23fSmrg	scanlines = 8191;
1320209ff23fSmrg
1321209ff23fSmrg    MemBox.x1 = 0;
1322209ff23fSmrg    MemBox.y1 = 0;
1323209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1324209ff23fSmrg    MemBox.y2 = scanlines;
1325209ff23fSmrg
1326209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
132768105dcbSveego        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1328209ff23fSmrg		   "Memory manager initialization to "
1329209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1330209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1331209ff23fSmrg	return FALSE;
1332209ff23fSmrg    } else {
1333209ff23fSmrg	int  width, height;
1334209ff23fSmrg
133568105dcbSveego	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1336209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1337209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1338209ff23fSmrg	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1339209ff23fSmrg	   aligned... sigh */
1340209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1341209ff23fSmrg						pScrn->displayWidth,
1342209ff23fSmrg						info->allowColorTiling ?
1343ad43ddacSmrg						(RADEON_ALIGN(pScrn->virtualY, 16))
1344209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1345209ff23fSmrg						0, NULL, NULL,
1346209ff23fSmrg						NULL))) {
134768105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1348209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1349209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1350209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1351209ff23fSmrg	} else {
135268105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1353209ff23fSmrg	}
1354209ff23fSmrg
1355209ff23fSmrg	RADEONDRIAllocatePCIGARTTable(pScreen);
1356209ff23fSmrg
1357209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1358209ff23fSmrg					  &height, 0, 0, 0)) {
135968105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1360209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1361209ff23fSmrg		       width, height);
1362209ff23fSmrg
1363209ff23fSmrg	    /* Lines in offscreen area needed for depth buffer and
1364209ff23fSmrg	     * textures
1365209ff23fSmrg	     */
1366b7e1c893Smrg	    info->dri->depthTexLines = (scanlines
1367b7e1c893Smrg					- info->dri->depthOffset / width_bytes);
1368b7e1c893Smrg	    info->dri->backLines	    = (scanlines
1369b7e1c893Smrg					       - info->dri->backOffset / width_bytes
1370b7e1c893Smrg					       - info->dri->depthTexLines);
1371b7e1c893Smrg	    info->dri->backArea	    = NULL;
1372209ff23fSmrg	} else {
137368105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1374209ff23fSmrg		       "Unable to determine largest offscreen area "
1375209ff23fSmrg		       "available\n");
1376209ff23fSmrg	    return FALSE;
1377209ff23fSmrg	}
1378209ff23fSmrg    }
1379209ff23fSmrg
138068105dcbSveego    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1381209ff23fSmrg	       "Will use front buffer at offset 0x%x\n",
1382b7e1c893Smrg	       info->dri->frontOffset);
1383209ff23fSmrg
138468105dcbSveego    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1385209ff23fSmrg	       "Will use back buffer at offset 0x%x\n",
1386b7e1c893Smrg	       info->dri->backOffset);
138768105dcbSveego    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1388209ff23fSmrg	       "Will use depth buffer at offset 0x%x\n",
1389b7e1c893Smrg	       info->dri->depthOffset);
1390209ff23fSmrg    if (info->cardType==CARD_PCIE)
139168105dcbSveego    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1392209ff23fSmrg	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1393b7e1c893Smrg		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
139468105dcbSveego    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1395209ff23fSmrg	       "Will use %d kb for textures at offset 0x%x\n",
1396b7e1c893Smrg	       info->dri->textureSize/1024, info->dri->textureOffset);
1397209ff23fSmrg
1398b7e1c893Smrg    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1399b7e1c893Smrg				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1400209ff23fSmrg
1401b7e1c893Smrg    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1402b7e1c893Smrg				  ((info->dri->backOffset + info->fbLocation) >> 10));
1403209ff23fSmrg
1404b7e1c893Smrg    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1405b7e1c893Smrg				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1406209ff23fSmrg    return TRUE;
1407209ff23fSmrg}
1408209ff23fSmrg#endif /* XF86DRI */
1409209ff23fSmrg
1410209ff23fSmrgBool
141168105dcbSveegoRADEONSetupMemXAA(ScreenPtr pScreen)
1412209ff23fSmrg{
141368105dcbSveego    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1414209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1415209ff23fSmrg    BoxRec         MemBox;
1416209ff23fSmrg    int            y2;
1417209ff23fSmrg
1418209ff23fSmrg    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1419209ff23fSmrg
1420209ff23fSmrg    MemBox.x1 = 0;
1421209ff23fSmrg    MemBox.y1 = 0;
1422209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1423209ff23fSmrg    y2 = info->FbMapSize / width_bytes;
1424209ff23fSmrg    if (y2 >= 32768)
1425209ff23fSmrg	y2 = 32767; /* because MemBox.y2 is signed short */
1426209ff23fSmrg    MemBox.y2 = y2;
1427209ff23fSmrg
1428209ff23fSmrg    /* The acceleration engine uses 14 bit
1429209ff23fSmrg     * signed coordinates, so we can't have any
1430209ff23fSmrg     * drawable caches beyond this region.
1431209ff23fSmrg     */
1432209ff23fSmrg    if (MemBox.y2 > 8191)
1433209ff23fSmrg	MemBox.y2 = 8191;
1434209ff23fSmrg
1435209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
143668105dcbSveego	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
1437209ff23fSmrg		   "Memory manager initialization to "
1438209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1439209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1440209ff23fSmrg	return FALSE;
1441209ff23fSmrg    } else {
1442209ff23fSmrg	int       width, height;
1443209ff23fSmrg	FBAreaPtr fbarea;
1444209ff23fSmrg
144568105dcbSveego	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1446209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1447209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1448209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1449209ff23fSmrg						pScrn->displayWidth,
1450209ff23fSmrg						info->allowColorTiling ?
1451ad43ddacSmrg						(RADEON_ALIGN(pScrn->virtualY, 16))
1452209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1453209ff23fSmrg						0, NULL, NULL,
1454209ff23fSmrg						NULL))) {
145568105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1456209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1457209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1458209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1459209ff23fSmrg	} else {
146068105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
1461209ff23fSmrg	}
1462209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1463209ff23fSmrg					      0, 0, 0)) {
146468105dcbSveego	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1465209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1466209ff23fSmrg		       width, height);
1467209ff23fSmrg	}
1468209ff23fSmrg	return TRUE;
1469209ff23fSmrg    }
1470209ff23fSmrg}
1471209ff23fSmrg#endif /* USE_XAA */
1472