radeon_accel.c revision 2f39173d
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3209ff23fSmrg *                VA Linux Systems Inc., Fremont, California.
4209ff23fSmrg *
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining
8209ff23fSmrg * a copy of this software and associated documentation files (the
9209ff23fSmrg * "Software"), to deal in the Software without restriction, including
10209ff23fSmrg * without limitation on the rights to use, copy, modify, merge,
11209ff23fSmrg * publish, distribute, sublicense, and/or sell copies of the Software,
12209ff23fSmrg * and to permit persons to whom the Software is furnished to do so,
13209ff23fSmrg * subject to the following conditions:
14209ff23fSmrg *
15209ff23fSmrg * The above copyright notice and this permission notice (including the
16209ff23fSmrg * next paragraph) shall be included in all copies or substantial
17209ff23fSmrg * portions of the Software.
18209ff23fSmrg *
19209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20209ff23fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21209ff23fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22209ff23fSmrg * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23209ff23fSmrg * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24209ff23fSmrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26209ff23fSmrg * DEALINGS IN THE SOFTWARE.
27209ff23fSmrg */
28209ff23fSmrg
29209ff23fSmrg#ifdef HAVE_CONFIG_H
30209ff23fSmrg#include "config.h"
31209ff23fSmrg#endif
32209ff23fSmrg
33209ff23fSmrg/*
34209ff23fSmrg * Authors:
35209ff23fSmrg *   Kevin E. Martin <martin@xfree86.org>
36209ff23fSmrg *   Rickard E. Faith <faith@valinux.com>
37209ff23fSmrg *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38209ff23fSmrg *
39209ff23fSmrg * Credits:
40209ff23fSmrg *
41209ff23fSmrg *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42209ff23fSmrg *   code to his Radeon driver.  Portions of this file are based on the
43209ff23fSmrg *   initialization code for that driver.
44209ff23fSmrg *
45209ff23fSmrg * References:
46209ff23fSmrg *
47209ff23fSmrg * !!!! FIXME !!!!
48209ff23fSmrg *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49209ff23fSmrg *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50209ff23fSmrg *   1999.
51209ff23fSmrg *
52209ff23fSmrg *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53209ff23fSmrg *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54209ff23fSmrg *
55209ff23fSmrg * Notes on unimplemented XAA optimizations:
56209ff23fSmrg *
57209ff23fSmrg *   SetClipping:   This has been removed as XAA expects 16bit registers
58209ff23fSmrg *                  for full clipping.
59209ff23fSmrg *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60209ff23fSmrg *   DashedLine with non-power-of-two pattern length: Apparently, there is
61209ff23fSmrg *                  no way to set the length of the pattern -- it is always
62209ff23fSmrg *                  assumed to be 8 or 32 (or 1024?).
63209ff23fSmrg *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64209ff23fSmrg *                  Manual where it states that monochrome expansion of frame
65209ff23fSmrg *                  buffer data is not supported.
66209ff23fSmrg *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67209ff23fSmrg *                  direct/indirect method.  If we had more data registers,
68209ff23fSmrg *                  then we could do better.  If XAA supported a trigger write
69209ff23fSmrg *                  address, the code would be simpler.
70209ff23fSmrg *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71209ff23fSmrg *                  pattern from frame buffer memory.
72209ff23fSmrg *   ImageWrites:   Same as CPUToScreenColorExpandFill
73209ff23fSmrg *
74209ff23fSmrg */
75209ff23fSmrg
76209ff23fSmrg#include <errno.h>
77209ff23fSmrg#include <string.h>
78209ff23fSmrg				/* Driver data structures */
79209ff23fSmrg#include "radeon.h"
80209ff23fSmrg#include "radeon_reg.h"
81b7e1c893Smrg#include "r600_reg.h"
82209ff23fSmrg#include "radeon_macros.h"
83209ff23fSmrg#include "radeon_probe.h"
84209ff23fSmrg#include "radeon_version.h"
85209ff23fSmrg#ifdef XF86DRI
86209ff23fSmrg#define _XF86DRI_SERVER_
87b7e1c893Smrg#include "radeon_drm.h"
88209ff23fSmrg#endif
89209ff23fSmrg
90c503f109Smrg#include "ati_pciids_gen.h"
91c503f109Smrg
92209ff23fSmrg				/* Line support */
93209ff23fSmrg#include "miline.h"
94209ff23fSmrg
95209ff23fSmrg				/* X and server generic header files */
96209ff23fSmrg#include "xf86.h"
97209ff23fSmrg
98b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn);
99209ff23fSmrg
100209ff23fSmrg#ifdef USE_XAA
101209ff23fSmrgstatic struct {
102209ff23fSmrg    int rop;
103209ff23fSmrg    int pattern;
104209ff23fSmrg} RADEON_ROP[] = {
105209ff23fSmrg    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
106209ff23fSmrg    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
107209ff23fSmrg    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
108209ff23fSmrg    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
109209ff23fSmrg    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
110209ff23fSmrg    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
111209ff23fSmrg    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
112209ff23fSmrg    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
113209ff23fSmrg    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
114209ff23fSmrg    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
115209ff23fSmrg    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
116209ff23fSmrg    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
117209ff23fSmrg    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
118209ff23fSmrg    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
119209ff23fSmrg    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
120209ff23fSmrg    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
121209ff23fSmrg};
122209ff23fSmrg#endif
123209ff23fSmrg
124209ff23fSmrg/* The FIFO has 64 slots.  This routines waits until at least `entries'
125209ff23fSmrg * of these slots are empty.
126209ff23fSmrg */
127209ff23fSmrgvoid RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
128209ff23fSmrg{
129209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
130209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
131209ff23fSmrg    int            i;
132209ff23fSmrg
133209ff23fSmrg    for (;;) {
134209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
135b7e1c893Smrg	    info->accel_state->fifo_slots =
136209ff23fSmrg		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
137b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
138209ff23fSmrg	}
139209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
140209ff23fSmrg		       "FIFO timed out: %u entries, stat=0x%08x\n",
141209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
142209ff23fSmrg		       (unsigned int)INREG(RADEON_RBBM_STATUS));
143209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
144209ff23fSmrg		   "FIFO timed out, resetting engine...\n");
145209ff23fSmrg	RADEONEngineReset(pScrn);
146209ff23fSmrg	RADEONEngineRestore(pScrn);
147209ff23fSmrg#ifdef XF86DRI
148209ff23fSmrg	if (info->directRenderingEnabled) {
149209ff23fSmrg	    RADEONCP_RESET(pScrn, info);
150209ff23fSmrg	    RADEONCP_START(pScrn, info);
151209ff23fSmrg	}
152209ff23fSmrg#endif
153209ff23fSmrg    }
154209ff23fSmrg}
155209ff23fSmrg
156b7e1c893Smrgvoid R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
157b7e1c893Smrg{
158b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
159b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
160b7e1c893Smrg    int            i;
161b7e1c893Smrg
162b7e1c893Smrg    for (;;) {
163b7e1c893Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
164b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
165b7e1c893Smrg		info->accel_state->fifo_slots =
166b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
167b7e1c893Smrg	    else
168b7e1c893Smrg		info->accel_state->fifo_slots =
169b7e1c893Smrg		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
170b7e1c893Smrg	    if (info->accel_state->fifo_slots >= entries) return;
171b7e1c893Smrg	}
172b7e1c893Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
173b7e1c893Smrg		       "FIFO timed out: stat=0x%08x\n",
174b7e1c893Smrg		       (unsigned int)INREG(R600_GRBM_STATUS));
175b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
176b7e1c893Smrg		   "FIFO timed out, resetting engine...\n");
177b7e1c893Smrg	R600EngineReset(pScrn);
178b7e1c893Smrg#ifdef XF86DRI
179b7e1c893Smrg	if (info->directRenderingEnabled) {
180b7e1c893Smrg	    RADEONCP_RESET(pScrn, info);
181b7e1c893Smrg	    RADEONCP_START(pScrn, info);
182b7e1c893Smrg	}
183b7e1c893Smrg#endif
184b7e1c893Smrg    }
185b7e1c893Smrg}
186b7e1c893Smrg
187209ff23fSmrg/* Flush all dirty data in the Pixel Cache to memory */
188209ff23fSmrgvoid RADEONEngineFlush(ScrnInfoPtr pScrn)
189209ff23fSmrg{
190209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
191209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
192209ff23fSmrg    int            i;
193209ff23fSmrg
194209ff23fSmrg    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
195209ff23fSmrg	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
196209ff23fSmrg		RADEON_RB3D_DC_FLUSH_ALL,
197209ff23fSmrg		~RADEON_RB3D_DC_FLUSH_ALL);
198209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
199209ff23fSmrg	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
200209ff23fSmrg		break;
201209ff23fSmrg	}
202209ff23fSmrg	if (i == RADEON_TIMEOUT) {
203209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
204209ff23fSmrg			   "DC flush timeout: %x\n",
205209ff23fSmrg			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
206209ff23fSmrg	}
207209ff23fSmrg    } else {
208209ff23fSmrg	OUTREGP(R300_DSTCACHE_CTLSTAT,
209209ff23fSmrg		R300_RB2D_DC_FLUSH_ALL,
210209ff23fSmrg		~R300_RB2D_DC_FLUSH_ALL);
211209ff23fSmrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
212209ff23fSmrg	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
213209ff23fSmrg		break;
214209ff23fSmrg	}
215209ff23fSmrg	if (i == RADEON_TIMEOUT) {
216209ff23fSmrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
217209ff23fSmrg			   "DC flush timeout: %x\n",
218209ff23fSmrg			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
219209ff23fSmrg	}
220209ff23fSmrg    }
221209ff23fSmrg}
222209ff23fSmrg
223209ff23fSmrg/* Reset graphics card to known state */
224209ff23fSmrgvoid RADEONEngineReset(ScrnInfoPtr pScrn)
225209ff23fSmrg{
226209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
227209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
228209ff23fSmrg    uint32_t       clock_cntl_index;
229209ff23fSmrg    uint32_t       mclk_cntl;
230209ff23fSmrg    uint32_t       rbbm_soft_reset;
231209ff23fSmrg    uint32_t       host_path_cntl;
232209ff23fSmrg
233209ff23fSmrg    /* The following RBBM_SOFT_RESET sequence can help un-wedge
234209ff23fSmrg     * an R300 after the command processor got stuck.
235209ff23fSmrg     */
236209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
237209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
238209ff23fSmrg                                   RADEON_SOFT_RESET_CP |
239209ff23fSmrg                                   RADEON_SOFT_RESET_HI |
240209ff23fSmrg                                   RADEON_SOFT_RESET_SE |
241209ff23fSmrg                                   RADEON_SOFT_RESET_RE |
242209ff23fSmrg                                   RADEON_SOFT_RESET_PP |
243209ff23fSmrg                                   RADEON_SOFT_RESET_E2 |
244209ff23fSmrg                                   RADEON_SOFT_RESET_RB));
245209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
246209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
247209ff23fSmrg                                   ~(RADEON_SOFT_RESET_CP |
248209ff23fSmrg                                     RADEON_SOFT_RESET_HI |
249209ff23fSmrg                                     RADEON_SOFT_RESET_SE |
250209ff23fSmrg                                     RADEON_SOFT_RESET_RE |
251209ff23fSmrg                                     RADEON_SOFT_RESET_PP |
252209ff23fSmrg                                     RADEON_SOFT_RESET_E2 |
253209ff23fSmrg                                     RADEON_SOFT_RESET_RB)));
254209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
255209ff23fSmrg    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
256209ff23fSmrg    INREG(RADEON_RBBM_SOFT_RESET);
257209ff23fSmrg
258209ff23fSmrg    RADEONEngineFlush(pScrn);
259209ff23fSmrg
260209ff23fSmrg    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
261209ff23fSmrg    RADEONPllErrataAfterIndex(info);
262209ff23fSmrg
263209ff23fSmrg#if 0 /* taken care of by new PM code */
264209ff23fSmrg    /* Some ASICs have bugs with dynamic-on feature, which are
265209ff23fSmrg     * ASIC-version dependent, so we force all blocks on for now
266209ff23fSmrg     */
267209ff23fSmrg    if (info->HasCRTC2) {
268209ff23fSmrg	uint32_t tmp;
269209ff23fSmrg
270209ff23fSmrg	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
271209ff23fSmrg	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
272209ff23fSmrg				  RADEON_CP_MAX_DYN_STOP_LAT |
273209ff23fSmrg				  RADEON_SCLK_FORCEON_MASK));
274209ff23fSmrg
275209ff23fSmrg	if (info->ChipFamily == CHIP_FAMILY_RV200) {
276209ff23fSmrg	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
277209ff23fSmrg	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
278209ff23fSmrg	}
279209ff23fSmrg    }
280209ff23fSmrg#endif /* new PM code */
281209ff23fSmrg
282209ff23fSmrg    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
283209ff23fSmrg
284209ff23fSmrg#if 0 /* handled by new PM code */
285209ff23fSmrg    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
286209ff23fSmrg			      RADEON_FORCEON_MCLKA |
287209ff23fSmrg			      RADEON_FORCEON_MCLKB |
288209ff23fSmrg			      RADEON_FORCEON_YCLKA |
289209ff23fSmrg			      RADEON_FORCEON_YCLKB |
290209ff23fSmrg			      RADEON_FORCEON_MC |
291209ff23fSmrg			      RADEON_FORCEON_AIC));
292209ff23fSmrg#endif /* new PM code */
293209ff23fSmrg
294209ff23fSmrg    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
295209ff23fSmrg     * unexpected behaviour on some machines.  Here we use
296209ff23fSmrg     * RADEON_HOST_PATH_CNTL to reset it.
297209ff23fSmrg     */
298209ff23fSmrg    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
299209ff23fSmrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
300209ff23fSmrg
301209ff23fSmrg    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
302209ff23fSmrg	uint32_t tmp;
303209ff23fSmrg
304209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
305209ff23fSmrg					RADEON_SOFT_RESET_CP |
306209ff23fSmrg					RADEON_SOFT_RESET_HI |
307209ff23fSmrg					RADEON_SOFT_RESET_E2));
308209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
309209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
310209ff23fSmrg	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
311209ff23fSmrg	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
312209ff23fSmrg    } else {
313209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
314209ff23fSmrg					RADEON_SOFT_RESET_CP |
315209ff23fSmrg					RADEON_SOFT_RESET_SE |
316209ff23fSmrg					RADEON_SOFT_RESET_RE |
317209ff23fSmrg					RADEON_SOFT_RESET_PP |
318209ff23fSmrg					RADEON_SOFT_RESET_E2 |
319209ff23fSmrg					RADEON_SOFT_RESET_RB));
320209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
321209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
322209ff23fSmrg					~(RADEON_SOFT_RESET_CP |
323209ff23fSmrg					  RADEON_SOFT_RESET_SE |
324209ff23fSmrg					  RADEON_SOFT_RESET_RE |
325209ff23fSmrg					  RADEON_SOFT_RESET_PP |
326209ff23fSmrg					  RADEON_SOFT_RESET_E2 |
327209ff23fSmrg					  RADEON_SOFT_RESET_RB)));
328209ff23fSmrg	INREG(RADEON_RBBM_SOFT_RESET);
329209ff23fSmrg    }
330209ff23fSmrg
331209ff23fSmrg    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
332209ff23fSmrg	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
333209ff23fSmrg
334209ff23fSmrg    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
335209ff23fSmrg    RADEONPllErrataAfterIndex(info);
336209ff23fSmrg    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
337209ff23fSmrg}
338209ff23fSmrg
339b7e1c893Smrg/* Reset graphics card to known state */
340b7e1c893Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn)
341b7e1c893Smrg{
342b7e1c893Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
343b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
344b7e1c893Smrg    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
345b7e1c893Smrg
346b7e1c893Smrg    cp_ptr = INREG(R600_CP_RB_WPTR);
347b7e1c893Smrg
348b7e1c893Smrg    cp_me_cntl = INREG(R600_CP_ME_CNTL);
349b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, 0x10000000);
350b7e1c893Smrg
351b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
352b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
353b7e1c893Smrg    usleep (50);
354b7e1c893Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0);
355b7e1c893Smrg    INREG(R600_GRBM_SOFT_RESET);
356b7e1c893Smrg
357b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
358b7e1c893Smrg    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
359b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, 0x80000000);
360b7e1c893Smrg
361b7e1c893Smrg    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
362b7e1c893Smrg    OUTREG(R600_CP_RB_WPTR, cp_ptr);
363b7e1c893Smrg    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
364b7e1c893Smrg    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
365b7e1c893Smrg
366b7e1c893Smrg}
367b7e1c893Smrg
368209ff23fSmrg/* Restore the acceleration hardware to its previous state */
369209ff23fSmrgvoid RADEONEngineRestore(ScrnInfoPtr pScrn)
370209ff23fSmrg{
371209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
372209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
373209ff23fSmrg
374ad43ddacSmrg    if (info->cs)
375ad43ddacSmrg      return;
376ad43ddacSmrg
377209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
378209ff23fSmrg		   "EngineRestore (%d/%d)\n",
379209ff23fSmrg		   info->CurrentLayout.pixel_code,
380209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
381209ff23fSmrg
382209ff23fSmrg    /* Setup engine location. This shouldn't be necessary since we
383209ff23fSmrg     * set them appropriately before any accel ops, but let's avoid
384209ff23fSmrg     * random bogus DMA in case we inadvertently trigger the engine
385209ff23fSmrg     * in the wrong place (happened).
386209ff23fSmrg     */
387209ff23fSmrg    RADEONWaitForFifo(pScrn, 2);
388b7e1c893Smrg    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
389b7e1c893Smrg    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
390209ff23fSmrg
391209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
392209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
393209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE,
394209ff23fSmrg	    RADEON_HOST_BIG_ENDIAN_EN,
395209ff23fSmrg	    ~RADEON_HOST_BIG_ENDIAN_EN);
396209ff23fSmrg#else
397209ff23fSmrg    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
398209ff23fSmrg#endif
399209ff23fSmrg
400209ff23fSmrg    /* Restore SURFACE_CNTL */
401209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
402209ff23fSmrg
403209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
404209ff23fSmrg    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
405209ff23fSmrg					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
406209ff23fSmrg    RADEONWaitForFifo(pScrn, 1);
407b7e1c893Smrg    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
408209ff23fSmrg				       | RADEON_GMC_BRUSH_SOLID_COLOR
409209ff23fSmrg				       | RADEON_GMC_SRC_DATATYPE_COLOR));
410209ff23fSmrg
411209ff23fSmrg    RADEONWaitForFifo(pScrn, 5);
412209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
413209ff23fSmrg    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
414209ff23fSmrg    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
415209ff23fSmrg    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
416209ff23fSmrg    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
417209ff23fSmrg
418209ff23fSmrg    RADEONWaitForIdleMMIO(pScrn);
419209ff23fSmrg
420b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
421209ff23fSmrg}
422209ff23fSmrg
423ad43ddacSmrgstatic int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
424ad43ddacSmrg{
425ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
426ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_major < 2) {
427ad43ddacSmrg        drm_radeon_getparam_t np;
428ad43ddacSmrg
429ad43ddacSmrg        memset(&np, 0, sizeof(np));
430ad43ddacSmrg        np.param = RADEON_PARAM_NUM_GB_PIPES;
431ad43ddacSmrg        np.value = num_pipes;
432ad43ddacSmrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
433ad43ddacSmrg    } else {
434ad43ddacSmrg        struct drm_radeon_info np2;
435ad43ddacSmrg        np2.value = (unsigned long)num_pipes;
436ad43ddacSmrg        np2.request = RADEON_INFO_NUM_GB_PIPES;
437ad43ddacSmrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
438ad43ddacSmrg    }
439ad43ddacSmrg}
440ad43ddacSmrg
441209ff23fSmrg/* Initialize the acceleration hardware */
442209ff23fSmrgvoid RADEONEngineInit(ScrnInfoPtr pScrn)
443209ff23fSmrg{
444209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
445209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
446b7e1c893Smrg    int datatype = 0;
447b7e1c893Smrg    info->accel_state->num_gb_pipes = 0;
448209ff23fSmrg
449209ff23fSmrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
450209ff23fSmrg		   "EngineInit (%d/%d)\n",
451209ff23fSmrg		   info->CurrentLayout.pixel_code,
452209ff23fSmrg		   info->CurrentLayout.bitsPerPixel);
453209ff23fSmrg
454209ff23fSmrg#ifdef XF86DRI
455209ff23fSmrg    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
456209ff23fSmrg	int num_pipes;
457209ff23fSmrg
458ad43ddacSmrg	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
459209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
460209ff23fSmrg		       "Failed to determine num pipes from DRM, falling back to "
461209ff23fSmrg		       "manual look-up!\n");
462b7e1c893Smrg	    info->accel_state->num_gb_pipes = 0;
463209ff23fSmrg	} else {
464b7e1c893Smrg	    info->accel_state->num_gb_pipes = num_pipes;
465209ff23fSmrg	}
466209ff23fSmrg    }
467209ff23fSmrg#endif
468209ff23fSmrg
469ad43ddacSmrg    if (!info->cs) {
470ad43ddacSmrg	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
471ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
472ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
473ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
474ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
475ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
476ad43ddacSmrg	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
477ad43ddacSmrg	    IS_R500_3D) {
478ad43ddacSmrg	    if (info->accel_state->num_gb_pipes == 0) {
479ad43ddacSmrg		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
480ad43ddacSmrg
481ad43ddacSmrg		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
482ad43ddacSmrg		if (IS_R500_3D)
483ad43ddacSmrg		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
484ad43ddacSmrg	    }
485ad43ddacSmrg	} else {
486ad43ddacSmrg	    if (info->accel_state->num_gb_pipes == 0) {
487ad43ddacSmrg		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
488ad43ddacSmrg		    (info->ChipFamily == CHIP_FAMILY_R350)) {
489ad43ddacSmrg		    /* R3xx chips */
490ad43ddacSmrg		    info->accel_state->num_gb_pipes = 2;
491ad43ddacSmrg		} else {
492ad43ddacSmrg		    /* RV3xx chips */
493ad43ddacSmrg		    info->accel_state->num_gb_pipes = 1;
494ad43ddacSmrg		}
495209ff23fSmrg	    }
496209ff23fSmrg	}
497209ff23fSmrg
4982f39173dSmrg	/* SE cards only have 1 quadpipe */
499ad43ddacSmrg	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
5002f39173dSmrg	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
5012f39173dSmrg	    (info->Chipset == PCI_CHIP_R300_AD) ||
5022f39173dSmrg	    (info->Chipset == PCI_CHIP_R350_AH))
503ad43ddacSmrg	    info->accel_state->num_gb_pipes = 1;
504ad43ddacSmrg
505ad43ddacSmrg	if (IS_R300_3D || IS_R500_3D)
506ad43ddacSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
507ad43ddacSmrg		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
508ad43ddacSmrg
509ad43ddacSmrg	if (IS_R300_3D || IS_R500_3D) {
510ad43ddacSmrg	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
511ad43ddacSmrg
512ad43ddacSmrg	    switch(info->accel_state->num_gb_pipes) {
513ad43ddacSmrg	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
514ad43ddacSmrg	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
515ad43ddacSmrg	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
516ad43ddacSmrg	    default:
517ad43ddacSmrg	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
518ad43ddacSmrg	    }
519209ff23fSmrg
520ad43ddacSmrg	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
521ad43ddacSmrg	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
522ad43ddacSmrg	    if (info->ChipFamily >= CHIP_FAMILY_R420)
523ad43ddacSmrg		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
524ad43ddacSmrg	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
525ad43ddacSmrg					     R300_DC_AUTOFLUSH_ENABLE |
526ad43ddacSmrg					     R300_DC_DC_DISABLE_IGNORE_PE));
527ad43ddacSmrg	} else
528ad43ddacSmrg	    OUTREG(RADEON_RB3D_CNTL, 0);
529ad43ddacSmrg
530ad43ddacSmrg	RADEONEngineReset(pScrn);
531ad43ddacSmrg    }
532209ff23fSmrg
533209ff23fSmrg    switch (info->CurrentLayout.pixel_code) {
534b7e1c893Smrg    case 8:  datatype = 2; break;
535b7e1c893Smrg    case 15: datatype = 3; break;
536b7e1c893Smrg    case 16: datatype = 4; break;
537b7e1c893Smrg    case 24: datatype = 5; break;
538b7e1c893Smrg    case 32: datatype = 6; break;
539209ff23fSmrg    default:
540209ff23fSmrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
541209ff23fSmrg		       "Unknown depth/bpp = %d/%d (code = %d)\n",
542209ff23fSmrg		       info->CurrentLayout.depth,
543209ff23fSmrg		       info->CurrentLayout.bitsPerPixel,
544209ff23fSmrg		       info->CurrentLayout.pixel_code);
545209ff23fSmrg    }
546209ff23fSmrg
547b7e1c893Smrg    info->accel_state->dp_gui_master_cntl =
548b7e1c893Smrg	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
549209ff23fSmrg	 | RADEON_GMC_CLR_CMP_CNTL_DIS
550209ff23fSmrg	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
551209ff23fSmrg
552209ff23fSmrg    RADEONEngineRestore(pScrn);
553209ff23fSmrg}
554209ff23fSmrg
555ad43ddacSmrguint32_t radeonGetPixmapOffset(PixmapPtr pPix)
556ad43ddacSmrg{
557ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
558ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
559ad43ddacSmrg    uint32_t offset = 0;
560ad43ddacSmrg    if (info->cs)
561ad43ddacSmrg	return 0;
562ad43ddacSmrg#ifdef USE_EXA
563ad43ddacSmrg    if (info->useEXA) {
564ad43ddacSmrg	offset = exaGetPixmapOffset(pPix);
565ad43ddacSmrg    } else
566ad43ddacSmrg#endif
567ad43ddacSmrg    {
568ad43ddacSmrg	offset = pPix->devPrivate.ptr - info->FB;
569ad43ddacSmrg    }
570ad43ddacSmrg    offset += info->fbLocation + pScrn->fbOffset;
571ad43ddacSmrg    return offset;
572ad43ddacSmrg}
573209ff23fSmrg
5742f39173dSmrgint radeon_cs_space_remaining(ScrnInfoPtr pScrn)
5752f39173dSmrg{
5762f39173dSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
5772f39173dSmrg
5782f39173dSmrg#ifdef XF86DRM_MODE
5792f39173dSmrg    if (info->cs)
5802f39173dSmrg	return (info->cs->ndw - info->cs->cdw);
5812f39173dSmrg    else
5822f39173dSmrg#endif
5832f39173dSmrg        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
5842f39173dSmrg}
5852f39173dSmrg
586209ff23fSmrg#define ACCEL_MMIO
587209ff23fSmrg#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
588209ff23fSmrg#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
589209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
590209ff23fSmrg#define FINISH_ACCEL()
591209ff23fSmrg
592209ff23fSmrg#include "radeon_commonfuncs.c"
593209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
594209ff23fSmrg#include "radeon_render.c"
595209ff23fSmrg#endif
596209ff23fSmrg#include "radeon_accelfuncs.c"
597209ff23fSmrg
598209ff23fSmrg#undef ACCEL_MMIO
599209ff23fSmrg#undef ACCEL_PREAMBLE
600209ff23fSmrg#undef BEGIN_ACCEL
601209ff23fSmrg#undef OUT_ACCEL_REG
602209ff23fSmrg#undef FINISH_ACCEL
603209ff23fSmrg
604209ff23fSmrg#ifdef XF86DRI
605209ff23fSmrg
606209ff23fSmrg#define ACCEL_CP
607209ff23fSmrg#define ACCEL_PREAMBLE()						\
608209ff23fSmrg    RING_LOCALS;							\
609209ff23fSmrg    RADEONCP_REFRESH(pScrn, info)
610209ff23fSmrg#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
611209ff23fSmrg#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
612209ff23fSmrg#define FINISH_ACCEL()          ADVANCE_RING()
613209ff23fSmrg
614209ff23fSmrg
615209ff23fSmrg#include "radeon_commonfuncs.c"
616209ff23fSmrg#if defined(RENDER) && defined(USE_XAA)
617209ff23fSmrg#include "radeon_render.c"
618209ff23fSmrg#endif
619209ff23fSmrg#include "radeon_accelfuncs.c"
620209ff23fSmrg
621209ff23fSmrg#undef ACCEL_CP
622209ff23fSmrg#undef ACCEL_PREAMBLE
623209ff23fSmrg#undef BEGIN_ACCEL
624209ff23fSmrg#undef OUT_ACCEL_REG
625209ff23fSmrg#undef FINISH_ACCEL
626209ff23fSmrg
627209ff23fSmrg/* Stop the CP */
628209ff23fSmrgint RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
629209ff23fSmrg{
630b7e1c893Smrg    drm_radeon_cp_stop_t  stop;
631209ff23fSmrg    int              ret, i;
632209ff23fSmrg
633209ff23fSmrg    stop.flush = 1;
634209ff23fSmrg    stop.idle  = 1;
635209ff23fSmrg
636b7e1c893Smrg    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
637b7e1c893Smrg			  sizeof(drm_radeon_cp_stop_t));
638209ff23fSmrg
639209ff23fSmrg    if (ret == 0) {
640209ff23fSmrg	return 0;
641209ff23fSmrg    } else if (errno != EBUSY) {
642209ff23fSmrg	return -errno;
643209ff23fSmrg    }
644209ff23fSmrg
645209ff23fSmrg    stop.flush = 0;
646209ff23fSmrg
647209ff23fSmrg    i = 0;
648209ff23fSmrg    do {
649b7e1c893Smrg	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
650b7e1c893Smrg			      sizeof(drm_radeon_cp_stop_t));
651209ff23fSmrg    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
652209ff23fSmrg
653209ff23fSmrg    if (ret == 0) {
654209ff23fSmrg	return 0;
655209ff23fSmrg    } else if (errno != EBUSY) {
656209ff23fSmrg	return -errno;
657209ff23fSmrg    }
658209ff23fSmrg
659209ff23fSmrg    stop.idle = 0;
660209ff23fSmrg
661b7e1c893Smrg    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
662b7e1c893Smrg			&stop, sizeof(drm_radeon_cp_stop_t))) {
663209ff23fSmrg	return -errno;
664209ff23fSmrg    } else {
665209ff23fSmrg	return 0;
666209ff23fSmrg    }
667209ff23fSmrg}
668209ff23fSmrg
669ad43ddacSmrg#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
670ad43ddacSmrg
671209ff23fSmrg/* Get an indirect buffer for the CP 2D acceleration commands  */
672209ff23fSmrgdrmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
673209ff23fSmrg{
674209ff23fSmrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
675209ff23fSmrg    drmDMAReq      dma;
676209ff23fSmrg    drmBufPtr      buf = NULL;
677209ff23fSmrg    int            indx = 0;
678209ff23fSmrg    int            size = 0;
679209ff23fSmrg    int            i = 0;
680209ff23fSmrg    int            ret;
681209ff23fSmrg
682209ff23fSmrg#if 0
683209ff23fSmrg    /* FIXME: pScrn->pScreen has not been initialized when this is first
684209ff23fSmrg     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
685209ff23fSmrg     * the screen index from pScrn, which is initialized, and then get
686209ff23fSmrg     * the screen from screenInfo.screens[index], but that is a hack.
687209ff23fSmrg     */
688209ff23fSmrg    dma.context = DRIGetContext(pScrn->pScreen);
689209ff23fSmrg#else
690209ff23fSmrg    /* This is the X server's context */
691209ff23fSmrg    dma.context = 0x00000001;
692209ff23fSmrg#endif
693209ff23fSmrg
694209ff23fSmrg    dma.send_count    = 0;
695209ff23fSmrg    dma.send_list     = NULL;
696209ff23fSmrg    dma.send_sizes    = NULL;
697209ff23fSmrg    dma.flags         = 0;
698209ff23fSmrg    dma.request_count = 1;
699209ff23fSmrg    dma.request_size  = RADEON_BUFFER_SIZE;
700209ff23fSmrg    dma.request_list  = &indx;
701209ff23fSmrg    dma.request_sizes = &size;
702209ff23fSmrg    dma.granted_count = 0;
703209ff23fSmrg
704209ff23fSmrg    while (1) {
705209ff23fSmrg	do {
706b7e1c893Smrg	    ret = drmDMA(info->dri->drmFD, &dma);
707209ff23fSmrg	    if (ret && ret != -EBUSY) {
708209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
709209ff23fSmrg			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
710209ff23fSmrg	    }
711209ff23fSmrg	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
712209ff23fSmrg
713209ff23fSmrg	if (ret == 0) {
714b7e1c893Smrg	    buf = &info->dri->buffers->list[indx];
715209ff23fSmrg	    buf->used = 0;
716209ff23fSmrg	    if (RADEON_VERBOSE) {
717209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
718209ff23fSmrg			   "   GetBuffer returning %d %p\n",
719209ff23fSmrg			   buf->idx, buf->address);
720209ff23fSmrg	    }
721209ff23fSmrg	    return buf;
722209ff23fSmrg	}
723209ff23fSmrg
724209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
725209ff23fSmrg		   "GetBuffer timed out, resetting engine...\n");
726b7e1c893Smrg
727b7e1c893Smrg	if (info->ChipFamily < CHIP_FAMILY_R600) {
728b7e1c893Smrg	    RADEONEngineReset(pScrn);
729b7e1c893Smrg	    RADEONEngineRestore(pScrn);
730b7e1c893Smrg	} else
731b7e1c893Smrg	    R600EngineReset(pScrn);
732209ff23fSmrg
733209ff23fSmrg	/* Always restart the engine when doing CP 2D acceleration */
734209ff23fSmrg	RADEONCP_RESET(pScrn, info);
735209ff23fSmrg	RADEONCP_START(pScrn, info);
736209ff23fSmrg    }
737209ff23fSmrg}
738209ff23fSmrg
739209ff23fSmrg/* Flush the indirect buffer to the kernel for submission to the card */
740209ff23fSmrgvoid RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
741209ff23fSmrg{
742209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
743b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
744b7e1c893Smrg    int                start  = info->cp->indirectStart;
745b7e1c893Smrg    drm_radeon_indirect_t  indirect;
746209ff23fSmrg
747ad43ddacSmrg    assert(!info->cs);
748209ff23fSmrg    if (!buffer) return;
749209ff23fSmrg    if (start == buffer->used && !discard) return;
750209ff23fSmrg
751209ff23fSmrg    if (RADEON_VERBOSE) {
752209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
753209ff23fSmrg		   buffer->idx);
754209ff23fSmrg    }
755209ff23fSmrg
756b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
757b7e1c893Smrg	if (buffer->used & 0x3c) {
758b7e1c893Smrg	    RING_LOCALS;
759b7e1c893Smrg
760b7e1c893Smrg	    while (buffer->used & 0x3c) {
761b7e1c893Smrg		BEGIN_RING(1);
762b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
763b7e1c893Smrg		ADVANCE_RING();
764b7e1c893Smrg	    }
765b7e1c893Smrg	}
766b7e1c893Smrg    }
767b7e1c893Smrg
768209ff23fSmrg    indirect.idx     = buffer->idx;
769209ff23fSmrg    indirect.start   = start;
770209ff23fSmrg    indirect.end     = buffer->used;
771209ff23fSmrg    indirect.discard = discard;
772209ff23fSmrg
773b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
774b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
775209ff23fSmrg
776209ff23fSmrg    if (discard) {
777b7e1c893Smrg	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
778b7e1c893Smrg	info->cp->indirectStart  = 0;
779209ff23fSmrg    } else {
780209ff23fSmrg	/* Start on a double word boundary */
781ad43ddacSmrg	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
782209ff23fSmrg	if (RADEON_VERBOSE) {
783209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
784b7e1c893Smrg		       info->cp->indirectStart);
785209ff23fSmrg	}
786209ff23fSmrg    }
787209ff23fSmrg}
788209ff23fSmrg
789209ff23fSmrg/* Flush and release the indirect buffer */
790209ff23fSmrgvoid RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
791209ff23fSmrg{
792209ff23fSmrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
793b7e1c893Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
794b7e1c893Smrg    int                start  = info->cp->indirectStart;
795b7e1c893Smrg    drm_radeon_indirect_t  indirect;
796b7e1c893Smrg
797ad43ddacSmrg    assert(!info->cs);
798b7e1c893Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
799b7e1c893Smrg	if (buffer && (buffer->used & 0x3c)) {
800b7e1c893Smrg	    RING_LOCALS;
801b7e1c893Smrg
802b7e1c893Smrg	    while (buffer->used & 0x3c) {
803b7e1c893Smrg		BEGIN_RING(1);
804b7e1c893Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
805b7e1c893Smrg		ADVANCE_RING();
806b7e1c893Smrg	    }
807b7e1c893Smrg	}
808b7e1c893Smrg    }
809209ff23fSmrg
810b7e1c893Smrg    info->cp->indirectBuffer = NULL;
811b7e1c893Smrg    info->cp->indirectStart  = 0;
812209ff23fSmrg
813209ff23fSmrg    if (!buffer) return;
814209ff23fSmrg
815209ff23fSmrg    if (RADEON_VERBOSE) {
816209ff23fSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
817209ff23fSmrg		   buffer->idx);
818209ff23fSmrg    }
819209ff23fSmrg
820209ff23fSmrg    indirect.idx     = buffer->idx;
821209ff23fSmrg    indirect.start   = start;
822209ff23fSmrg    indirect.end     = buffer->used;
823209ff23fSmrg    indirect.discard = 1;
824209ff23fSmrg
825b7e1c893Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
826b7e1c893Smrg			&indirect, sizeof(drm_radeon_indirect_t));
827209ff23fSmrg}
828209ff23fSmrg
829209ff23fSmrg/** \brief Calculate HostDataBlit parameters from pointer and pitch
830209ff23fSmrg *
831209ff23fSmrg * This is a helper for the trivial HostDataBlit users that don't need to worry
832209ff23fSmrg * about tiling etc.
833209ff23fSmrg */
834209ff23fSmrgvoid
835209ff23fSmrgRADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
836209ff23fSmrg		     uint32_t *dstPitchOff, int *x, int *y)
837209ff23fSmrg{
838209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
839209ff23fSmrg    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
840209ff23fSmrg
841209ff23fSmrg    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
842209ff23fSmrg    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
843209ff23fSmrg    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
844209ff23fSmrg}
845209ff23fSmrg
846209ff23fSmrg/* Set up a hostdata blit to transfer data from system memory to the
847209ff23fSmrg * framebuffer. Returns the address where the data can be written to and sets
848209ff23fSmrg * the dstPitch and hpass variables as required.
849209ff23fSmrg */
850209ff23fSmrguint8_t*
851209ff23fSmrgRADEONHostDataBlit(
852209ff23fSmrg    ScrnInfoPtr pScrn,
853209ff23fSmrg    unsigned int cpp,
854209ff23fSmrg    unsigned int w,
855209ff23fSmrg    uint32_t dstPitchOff,
856209ff23fSmrg    uint32_t *bufPitch,
857209ff23fSmrg    int x,
858209ff23fSmrg    int *y,
859209ff23fSmrg    unsigned int *h,
860209ff23fSmrg    unsigned int *hpass
861209ff23fSmrg){
862209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
863209ff23fSmrg    uint32_t format, dwords;
864209ff23fSmrg    uint8_t *ret;
865209ff23fSmrg    RING_LOCALS;
866209ff23fSmrg
867209ff23fSmrg    if ( *h == 0 )
868209ff23fSmrg    {
869209ff23fSmrg	return NULL;
870209ff23fSmrg    }
871209ff23fSmrg
872209ff23fSmrg    switch ( cpp )
873209ff23fSmrg    {
874209ff23fSmrg    case 4:
875209ff23fSmrg	format = RADEON_GMC_DST_32BPP;
876209ff23fSmrg	*bufPitch = 4 * w;
877209ff23fSmrg	break;
878209ff23fSmrg    case 2:
879209ff23fSmrg	format = RADEON_GMC_DST_16BPP;
880ad43ddacSmrg	*bufPitch = 2 * RADEON_ALIGN(w, 2);
881209ff23fSmrg	break;
882209ff23fSmrg    case 1:
883209ff23fSmrg	format = RADEON_GMC_DST_8BPP_CI;
884ad43ddacSmrg	*bufPitch = RADEON_ALIGN(w, 4);
885209ff23fSmrg	break;
886209ff23fSmrg    default:
887209ff23fSmrg	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
888209ff23fSmrg		    "%s: Unsupported cpp %d!\n", __func__, cpp );
889209ff23fSmrg	return NULL;
890209ff23fSmrg    }
891209ff23fSmrg
892209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
893209ff23fSmrg    /* Swap doesn't work on R300 and later, it's handled during the
894209ff23fSmrg     * copy to ind. buffer pass
895209ff23fSmrg     */
896209ff23fSmrg    if (info->ChipFamily < CHIP_FAMILY_R300) {
897209ff23fSmrg        BEGIN_RING(2);
898209ff23fSmrg	if (cpp == 2)
899209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
900209ff23fSmrg			 RADEON_HOST_DATA_SWAP_HDW);
901209ff23fSmrg	else if (cpp == 1)
902209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
903209ff23fSmrg			 RADEON_HOST_DATA_SWAP_32BIT);
904209ff23fSmrg	else
905209ff23fSmrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
906209ff23fSmrg			 RADEON_HOST_DATA_SWAP_NONE);
907209ff23fSmrg	ADVANCE_RING();
908209ff23fSmrg    }
909209ff23fSmrg#endif
910209ff23fSmrg
911209ff23fSmrg    /*RADEON_PURGE_CACHE();
912209ff23fSmrg      RADEON_WAIT_UNTIL_IDLE();*/
913209ff23fSmrg
914209ff23fSmrg    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
915209ff23fSmrg    dwords = *hpass * *bufPitch / 4;
916209ff23fSmrg
917209ff23fSmrg    BEGIN_RING( dwords + 10 );
918209ff23fSmrg    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
919209ff23fSmrg    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
920209ff23fSmrg	    | RADEON_GMC_DST_CLIPPING
921209ff23fSmrg	    | RADEON_GMC_BRUSH_NONE
922209ff23fSmrg	    | format
923209ff23fSmrg	    | RADEON_GMC_SRC_DATATYPE_COLOR
924209ff23fSmrg	    | RADEON_ROP3_S
925209ff23fSmrg	    | RADEON_DP_SRC_SOURCE_HOST_DATA
926209ff23fSmrg	    | RADEON_GMC_CLR_CMP_CNTL_DIS
927209ff23fSmrg	    | RADEON_GMC_WR_MSK_DIS );
928209ff23fSmrg    OUT_RING( dstPitchOff );
929209ff23fSmrg    OUT_RING( (*y << 16) | x );
930209ff23fSmrg    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
931209ff23fSmrg    OUT_RING( 0xffffffff );
932209ff23fSmrg    OUT_RING( 0xffffffff );
933209ff23fSmrg    OUT_RING( *y << 16 | x );
934209ff23fSmrg    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
935209ff23fSmrg    OUT_RING( dwords );
936209ff23fSmrg
937209ff23fSmrg    ret = ( uint8_t* )&__head[__count];
938209ff23fSmrg
939209ff23fSmrg    __count += dwords;
940209ff23fSmrg    ADVANCE_RING();
941209ff23fSmrg
942209ff23fSmrg    *y += *hpass;
943209ff23fSmrg    *h -= *hpass;
944209ff23fSmrg
945209ff23fSmrg    return ret;
946209ff23fSmrg}
947209ff23fSmrg
948209ff23fSmrgvoid RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
949209ff23fSmrg{
950209ff23fSmrg    switch(swap) {
951209ff23fSmrg    case RADEON_HOST_DATA_SWAP_HDW:
952209ff23fSmrg        {
953209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
954209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
955209ff23fSmrg	    unsigned int nwords = size >> 2;
956209ff23fSmrg
957209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
958209ff23fSmrg		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
959209ff23fSmrg	    return;
960209ff23fSmrg        }
961209ff23fSmrg    case RADEON_HOST_DATA_SWAP_32BIT:
962209ff23fSmrg        {
963209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
964209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
965209ff23fSmrg	    unsigned int nwords = size >> 2;
966209ff23fSmrg
967209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
968209ff23fSmrg#ifdef __powerpc__
969209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
970209ff23fSmrg#else
971209ff23fSmrg		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
972209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
973209ff23fSmrg#endif
974209ff23fSmrg	    return;
975209ff23fSmrg        }
976209ff23fSmrg    case RADEON_HOST_DATA_SWAP_16BIT:
977209ff23fSmrg        {
978209ff23fSmrg	    unsigned short *d = (unsigned short *)dst;
979209ff23fSmrg	    unsigned short *s = (unsigned short *)src;
980209ff23fSmrg	    unsigned int nwords = size >> 1;
981209ff23fSmrg
982209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
983209ff23fSmrg#ifdef __powerpc__
984209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
985209ff23fSmrg#else
986209ff23fSmrg	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
987209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
988209ff23fSmrg#endif
989209ff23fSmrg	    return;
990209ff23fSmrg	}
991209ff23fSmrg    }
992209ff23fSmrg    if (src != dst)
993ad43ddacSmrg	memcpy(dst, src, size);
994209ff23fSmrg}
995209ff23fSmrg
996209ff23fSmrg/* Copies a single pass worth of data for a hostdata blit set up by
997209ff23fSmrg * RADEONHostDataBlit().
998209ff23fSmrg */
999209ff23fSmrgvoid
1000209ff23fSmrgRADEONHostDataBlitCopyPass(
1001209ff23fSmrg    ScrnInfoPtr pScrn,
1002209ff23fSmrg    unsigned int cpp,
1003209ff23fSmrg    uint8_t *dst,
1004209ff23fSmrg    uint8_t *src,
1005209ff23fSmrg    unsigned int hpass,
1006209ff23fSmrg    unsigned int dstPitch,
1007209ff23fSmrg    unsigned int srcPitch
1008209ff23fSmrg){
1009209ff23fSmrg
1010209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1011209ff23fSmrg    RADEONInfoPtr info = RADEONPTR( pScrn );
1012209ff23fSmrg#endif
1013209ff23fSmrg
1014209ff23fSmrg    /* RADEONHostDataBlitCopy can return NULL ! */
1015209ff23fSmrg    if( (dst==NULL) || (src==NULL)) return;
1016209ff23fSmrg
1017209ff23fSmrg    if ( dstPitch == srcPitch )
1018209ff23fSmrg    {
1019209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1020209ff23fSmrg        if (info->ChipFamily >= CHIP_FAMILY_R300) {
1021209ff23fSmrg	    switch(cpp) {
1022209ff23fSmrg	    case 1:
1023209ff23fSmrg		RADEONCopySwap(dst, src, hpass * dstPitch,
1024209ff23fSmrg			       RADEON_HOST_DATA_SWAP_32BIT);
1025209ff23fSmrg		return;
1026209ff23fSmrg	    case 2:
1027209ff23fSmrg	        RADEONCopySwap(dst, src, hpass * dstPitch,
1028209ff23fSmrg			       RADEON_HOST_DATA_SWAP_HDW);
1029209ff23fSmrg		return;
1030209ff23fSmrg	    }
1031209ff23fSmrg	}
1032209ff23fSmrg#endif
1033209ff23fSmrg	memcpy( dst, src, hpass * dstPitch );
1034209ff23fSmrg    }
1035209ff23fSmrg    else
1036209ff23fSmrg    {
1037209ff23fSmrg	unsigned int minPitch = min( dstPitch, srcPitch );
1038209ff23fSmrg	while ( hpass-- )
1039209ff23fSmrg	{
1040209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1041209ff23fSmrg            if (info->ChipFamily >= CHIP_FAMILY_R300) {
1042209ff23fSmrg		switch(cpp) {
1043209ff23fSmrg		case 1:
1044209ff23fSmrg		    RADEONCopySwap(dst, src, minPitch,
1045209ff23fSmrg				   RADEON_HOST_DATA_SWAP_32BIT);
1046209ff23fSmrg		    goto next;
1047209ff23fSmrg		case 2:
1048209ff23fSmrg	            RADEONCopySwap(dst, src, minPitch,
1049209ff23fSmrg				   RADEON_HOST_DATA_SWAP_HDW);
1050209ff23fSmrg		    goto next;
1051209ff23fSmrg		}
1052209ff23fSmrg	    }
1053209ff23fSmrg#endif
1054209ff23fSmrg	    memcpy( dst, src, minPitch );
1055209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1056209ff23fSmrg	next:
1057209ff23fSmrg#endif
1058209ff23fSmrg	    src += srcPitch;
1059209ff23fSmrg	    dst += dstPitch;
1060209ff23fSmrg	}
1061209ff23fSmrg    }
1062209ff23fSmrg}
1063209ff23fSmrg
1064209ff23fSmrg#endif
1065209ff23fSmrg
1066209ff23fSmrgBool RADEONAccelInit(ScreenPtr pScreen)
1067209ff23fSmrg{
1068209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1069209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1070209ff23fSmrg
1071209ff23fSmrg#ifdef USE_EXA
1072209ff23fSmrg    if (info->useEXA) {
1073209ff23fSmrg# ifdef XF86DRI
1074209ff23fSmrg	if (info->directRenderingEnabled) {
1075b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
1076b7e1c893Smrg		if (!R600DrawInit(pScreen))
1077b7e1c893Smrg		    return FALSE;
1078b7e1c893Smrg	    } else {
1079b7e1c893Smrg		if (!RADEONDrawInitCP(pScreen))
1080b7e1c893Smrg		    return FALSE;
1081b7e1c893Smrg	    }
1082209ff23fSmrg	} else
1083209ff23fSmrg# endif /* XF86DRI */
1084209ff23fSmrg	{
1085b7e1c893Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600)
1086209ff23fSmrg		return FALSE;
1087b7e1c893Smrg	    else {
1088b7e1c893Smrg		if (!RADEONDrawInitMMIO(pScreen))
1089b7e1c893Smrg		    return FALSE;
1090b7e1c893Smrg	    }
1091209ff23fSmrg	}
1092209ff23fSmrg    }
1093209ff23fSmrg#endif /* USE_EXA */
1094209ff23fSmrg#ifdef USE_XAA
1095209ff23fSmrg    if (!info->useEXA) {
1096209ff23fSmrg	XAAInfoRecPtr  a;
1097209ff23fSmrg
1098b7e1c893Smrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
1099b7e1c893Smrg	    return FALSE;
1100b7e1c893Smrg
1101b7e1c893Smrg	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
1102209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
1103209ff23fSmrg	    return FALSE;
1104209ff23fSmrg	}
1105209ff23fSmrg
1106209ff23fSmrg#ifdef XF86DRI
1107209ff23fSmrg	if (info->directRenderingEnabled)
1108209ff23fSmrg	    RADEONAccelInitCP(pScreen, a);
1109209ff23fSmrg	else
1110209ff23fSmrg#endif /* XF86DRI */
1111209ff23fSmrg	    RADEONAccelInitMMIO(pScreen, a);
1112209ff23fSmrg
1113209ff23fSmrg	RADEONEngineInit(pScrn);
1114209ff23fSmrg
1115209ff23fSmrg	if (!XAAInit(pScreen, a)) {
1116209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
1117209ff23fSmrg	    return FALSE;
1118209ff23fSmrg	}
1119209ff23fSmrg    }
1120209ff23fSmrg#endif /* USE_XAA */
1121209ff23fSmrg    return TRUE;
1122209ff23fSmrg}
1123209ff23fSmrg
1124209ff23fSmrgvoid RADEONInit3DEngine(ScrnInfoPtr pScrn)
1125209ff23fSmrg{
1126209ff23fSmrg    RADEONInfoPtr info = RADEONPTR (pScrn);
1127209ff23fSmrg
1128209ff23fSmrg#ifdef XF86DRI
1129209ff23fSmrg    if (info->directRenderingEnabled) {
1130b7e1c893Smrg	drm_radeon_sarea_t *pSAREAPriv;
1131209ff23fSmrg
1132ad43ddacSmrg	if (!info->kms_enabled) {
1133ad43ddacSmrg	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
1134ad43ddacSmrg	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1135ad43ddacSmrg	}
1136209ff23fSmrg	RADEONInit3DEngineCP(pScrn);
1137209ff23fSmrg    } else
1138209ff23fSmrg#endif
1139209ff23fSmrg	RADEONInit3DEngineMMIO(pScrn);
1140209ff23fSmrg
1141b7e1c893Smrg    info->accel_state->XInited3D = TRUE;
1142209ff23fSmrg}
1143209ff23fSmrg
1144209ff23fSmrg#ifdef USE_XAA
1145209ff23fSmrg#ifdef XF86DRI
1146209ff23fSmrgBool
1147209ff23fSmrgRADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
1148209ff23fSmrg{
1149209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1150209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1151209ff23fSmrg    int            cpp = info->CurrentLayout.pixel_bytes;
1152b7e1c893Smrg    int            depthCpp = (info->dri->depthBits - 8) / 4;
1153209ff23fSmrg    int            width_bytes = pScrn->displayWidth * cpp;
1154209ff23fSmrg    int            bufferSize;
1155209ff23fSmrg    int            depthSize;
1156209ff23fSmrg    int            l;
1157209ff23fSmrg    int            scanlines;
1158209ff23fSmrg    int            texsizerequest;
1159209ff23fSmrg    BoxRec         MemBox;
1160209ff23fSmrg    FBAreaPtr      fbarea;
1161209ff23fSmrg
1162b7e1c893Smrg    info->dri->frontOffset = 0;
1163b7e1c893Smrg    info->dri->frontPitch = pScrn->displayWidth;
1164b7e1c893Smrg    info->dri->backPitch = pScrn->displayWidth;
1165209ff23fSmrg
1166209ff23fSmrg    /* make sure we use 16 line alignment for tiling (8 might be enough).
1167209ff23fSmrg     * Might need that for non-XF86DRI too?
1168209ff23fSmrg     */
1169209ff23fSmrg    if (info->allowColorTiling) {
1170ad43ddacSmrg	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
1171ad43ddacSmrg		      RADEON_GPU_PAGE_SIZE);
1172209ff23fSmrg    } else {
1173ad43ddacSmrg        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
1174ad43ddacSmrg		      RADEON_GPU_PAGE_SIZE);
1175209ff23fSmrg    }
1176209ff23fSmrg
1177209ff23fSmrg    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
1178209ff23fSmrg     * which is always the case if color tiling is used due to color pitch
1179209ff23fSmrg     * but not necessarily otherwise, and its height a multiple of 16 lines.
1180209ff23fSmrg     */
1181ad43ddacSmrg    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
1182ad43ddacSmrg    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
1183ad43ddacSmrg		  * depthCpp, RADEON_GPU_PAGE_SIZE);
1184209ff23fSmrg
1185209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1186b7e1c893Smrg	       "Using %d MB GART aperture\n", info->dri->gartSize);
1187209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1188b7e1c893Smrg	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
1189209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1190b7e1c893Smrg	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
1191209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1192b7e1c893Smrg	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
1193209ff23fSmrg
1194209ff23fSmrg    /* Try for front, back, depth, and three framebuffers worth of
1195209ff23fSmrg     * pixmap cache.  Should be enough for a fullscreen background
1196209ff23fSmrg     * image plus some leftovers.
1197209ff23fSmrg     * If the FBTexPercent option was used, try to achieve that percentage instead,
1198209ff23fSmrg     * but still have at least one pixmap buffer (get problems with xvideo/render
1199209ff23fSmrg     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
1200209ff23fSmrg     * probably useless for XAA.
1201209ff23fSmrg     */
1202b7e1c893Smrg    if (info->dri->textureSize >= 0) {
1203209ff23fSmrg	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
1204209ff23fSmrg			 - 2 * width_bytes - 16384 - info->FbSecureSize)
1205209ff23fSmrg	/* first divide, then multiply or we'll get an overflow (been there...) */
1206b7e1c893Smrg			 / 100 * info->dri->textureSize;
1207209ff23fSmrg    }
1208209ff23fSmrg    else {
1209209ff23fSmrg	texsizerequest = (int)info->FbMapSize / 2;
1210209ff23fSmrg    }
1211b7e1c893Smrg    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
1212209ff23fSmrg
1213209ff23fSmrg    /* If that gives us less than the requested memory, let's
1214209ff23fSmrg     * be greedy and grab some more.  Sorry, I care more about 3D
1215209ff23fSmrg     * performance than playing nicely, and you'll get around a full
1216209ff23fSmrg     * framebuffer's worth of pixmap cache anyway.
1217209ff23fSmrg     */
1218b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1219b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
1220209ff23fSmrg    }
1221b7e1c893Smrg    if (info->dri->textureSize < texsizerequest) {
1222b7e1c893Smrg        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
1223209ff23fSmrg    }
1224209ff23fSmrg
1225209ff23fSmrg    /* If there's still no space for textures, try without pixmap cache, but
1226209ff23fSmrg     * never use the reserved space, the space hw cursor and PCIGART table might
1227209ff23fSmrg     * use.
1228209ff23fSmrg     */
1229b7e1c893Smrg    if (info->dri->textureSize < 0) {
1230b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
1231209ff23fSmrg	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
1232209ff23fSmrg    }
1233209ff23fSmrg
1234209ff23fSmrg    /* Check to see if there is more room available after the 8192nd
1235209ff23fSmrg     * scanline for textures
1236209ff23fSmrg     */
1237209ff23fSmrg    /* FIXME: what's this good for? condition is pretty much impossible to meet */
1238209ff23fSmrg    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
1239b7e1c893Smrg	> info->dri->textureSize) {
1240b7e1c893Smrg	info->dri->textureSize =
1241209ff23fSmrg		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
1242209ff23fSmrg    }
1243209ff23fSmrg
1244209ff23fSmrg    /* If backbuffer is disabled, don't allocate memory for it */
1245b7e1c893Smrg    if (info->dri->noBackBuffer) {
1246b7e1c893Smrg	info->dri->textureSize += bufferSize;
1247209ff23fSmrg    }
1248209ff23fSmrg
1249209ff23fSmrg    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
1250209ff23fSmrg       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
1251209ff23fSmrg       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
1252209ff23fSmrg       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
1253209ff23fSmrg       area otherwise).
1254209ff23fSmrg       This might cause some space at the end of the video memory to be unused, since it
1255209ff23fSmrg       can't be used (?) due to that log_tex_granularity thing???
1256209ff23fSmrg       Could use different copyscreentoscreen function for the pageflip copies
1257209ff23fSmrg       (which would use different src and dst offsets) to avoid this. */
1258b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1259b7e1c893Smrg	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
1260209ff23fSmrg			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
1261209ff23fSmrg    }
1262b7e1c893Smrg    if (info->dri->textureSize > 0) {
1263b7e1c893Smrg	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
1264209ff23fSmrg	if (l < RADEON_LOG_TEX_GRANULARITY)
1265209ff23fSmrg	    l = RADEON_LOG_TEX_GRANULARITY;
1266209ff23fSmrg	/* Round the texture size up to the nearest whole number of
1267209ff23fSmrg	 * texture regions.  Again, be greedy about this, don't
1268209ff23fSmrg	 * round down.
1269209ff23fSmrg	 */
1270b7e1c893Smrg	info->dri->log2TexGran = l;
1271b7e1c893Smrg	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1272209ff23fSmrg    } else {
1273b7e1c893Smrg	info->dri->textureSize = 0;
1274209ff23fSmrg    }
1275209ff23fSmrg
1276209ff23fSmrg    /* Set a minimum usable local texture heap size.  This will fit
1277209ff23fSmrg     * two 256x256x32bpp textures.
1278209ff23fSmrg     */
1279b7e1c893Smrg    if (info->dri->textureSize < 512 * 1024) {
1280b7e1c893Smrg	info->dri->textureOffset = 0;
1281b7e1c893Smrg	info->dri->textureSize = 0;
1282209ff23fSmrg    }
1283209ff23fSmrg
1284b7e1c893Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
1285b7e1c893Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
1286b7e1c893Smrg				    (width_bytes * 16)) * (width_bytes * 16);
1287209ff23fSmrg    }
1288209ff23fSmrg    else {
1289209ff23fSmrg	/* Reserve space for textures */
1290ad43ddacSmrg	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
1291ad43ddacSmrg				     RADEON_GPU_PAGE_SIZE);
1292209ff23fSmrg    }
1293209ff23fSmrg
1294209ff23fSmrg    /* Reserve space for the shared depth
1295209ff23fSmrg     * buffer.
1296209ff23fSmrg     */
1297ad43ddacSmrg    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
1298ad43ddacSmrg			       RADEON_GPU_PAGE_SIZE);
1299209ff23fSmrg
1300209ff23fSmrg    /* Reserve space for the shared back buffer */
1301b7e1c893Smrg    if (info->dri->noBackBuffer) {
1302b7e1c893Smrg       info->dri->backOffset = info->dri->depthOffset;
1303209ff23fSmrg    } else {
1304ad43ddacSmrg       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
1305ad43ddacSmrg				 RADEON_GPU_PAGE_SIZE);
1306209ff23fSmrg    }
1307209ff23fSmrg
1308b7e1c893Smrg    info->dri->backY = info->dri->backOffset / width_bytes;
1309b7e1c893Smrg    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1310209ff23fSmrg
1311209ff23fSmrg    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
1312209ff23fSmrg    if (scanlines > 8191)
1313209ff23fSmrg	scanlines = 8191;
1314209ff23fSmrg
1315209ff23fSmrg    MemBox.x1 = 0;
1316209ff23fSmrg    MemBox.y1 = 0;
1317209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1318209ff23fSmrg    MemBox.y2 = scanlines;
1319209ff23fSmrg
1320209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1321209ff23fSmrg        xf86DrvMsg(scrnIndex, X_ERROR,
1322209ff23fSmrg		   "Memory manager initialization to "
1323209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1324209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1325209ff23fSmrg	return FALSE;
1326209ff23fSmrg    } else {
1327209ff23fSmrg	int  width, height;
1328209ff23fSmrg
1329209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1330209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1331209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1332209ff23fSmrg	/* why oh why can't we just request modes which are guaranteed to be 16 lines
1333209ff23fSmrg	   aligned... sigh */
1334209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1335209ff23fSmrg						pScrn->displayWidth,
1336209ff23fSmrg						info->allowColorTiling ?
1337ad43ddacSmrg						(RADEON_ALIGN(pScrn->virtualY, 16))
1338209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1339209ff23fSmrg						0, NULL, NULL,
1340209ff23fSmrg						NULL))) {
1341209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1342209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1343209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1344209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1345209ff23fSmrg	} else {
1346209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1347209ff23fSmrg	}
1348209ff23fSmrg
1349209ff23fSmrg	RADEONDRIAllocatePCIGARTTable(pScreen);
1350209ff23fSmrg
1351209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width,
1352209ff23fSmrg					  &height, 0, 0, 0)) {
1353209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1354209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1355209ff23fSmrg		       width, height);
1356209ff23fSmrg
1357209ff23fSmrg	    /* Lines in offscreen area needed for depth buffer and
1358209ff23fSmrg	     * textures
1359209ff23fSmrg	     */
1360b7e1c893Smrg	    info->dri->depthTexLines = (scanlines
1361b7e1c893Smrg					- info->dri->depthOffset / width_bytes);
1362b7e1c893Smrg	    info->dri->backLines	    = (scanlines
1363b7e1c893Smrg					       - info->dri->backOffset / width_bytes
1364b7e1c893Smrg					       - info->dri->depthTexLines);
1365b7e1c893Smrg	    info->dri->backArea	    = NULL;
1366209ff23fSmrg	} else {
1367209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR,
1368209ff23fSmrg		       "Unable to determine largest offscreen area "
1369209ff23fSmrg		       "available\n");
1370209ff23fSmrg	    return FALSE;
1371209ff23fSmrg	}
1372209ff23fSmrg    }
1373209ff23fSmrg
1374209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1375209ff23fSmrg	       "Will use front buffer at offset 0x%x\n",
1376b7e1c893Smrg	       info->dri->frontOffset);
1377209ff23fSmrg
1378209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1379209ff23fSmrg	       "Will use back buffer at offset 0x%x\n",
1380b7e1c893Smrg	       info->dri->backOffset);
1381209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1382209ff23fSmrg	       "Will use depth buffer at offset 0x%x\n",
1383b7e1c893Smrg	       info->dri->depthOffset);
1384209ff23fSmrg    if (info->cardType==CARD_PCIE)
1385209ff23fSmrg    	xf86DrvMsg(scrnIndex, X_INFO,
1386209ff23fSmrg	           "Will use %d kb for PCI GART table at offset 0x%x\n",
1387b7e1c893Smrg		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
1388209ff23fSmrg    xf86DrvMsg(scrnIndex, X_INFO,
1389209ff23fSmrg	       "Will use %d kb for textures at offset 0x%x\n",
1390b7e1c893Smrg	       info->dri->textureSize/1024, info->dri->textureOffset);
1391209ff23fSmrg
1392b7e1c893Smrg    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
1393b7e1c893Smrg				   ((info->dri->frontOffset + info->fbLocation) >> 10));
1394209ff23fSmrg
1395b7e1c893Smrg    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
1396b7e1c893Smrg				  ((info->dri->backOffset + info->fbLocation) >> 10));
1397209ff23fSmrg
1398b7e1c893Smrg    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
1399b7e1c893Smrg				   ((info->dri->depthOffset + info->fbLocation) >> 10));
1400209ff23fSmrg    return TRUE;
1401209ff23fSmrg}
1402209ff23fSmrg#endif /* XF86DRI */
1403209ff23fSmrg
1404209ff23fSmrgBool
1405209ff23fSmrgRADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
1406209ff23fSmrg{
1407209ff23fSmrg    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
1408209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1409209ff23fSmrg    BoxRec         MemBox;
1410209ff23fSmrg    int            y2;
1411209ff23fSmrg
1412209ff23fSmrg    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
1413209ff23fSmrg
1414209ff23fSmrg    MemBox.x1 = 0;
1415209ff23fSmrg    MemBox.y1 = 0;
1416209ff23fSmrg    MemBox.x2 = pScrn->displayWidth;
1417209ff23fSmrg    y2 = info->FbMapSize / width_bytes;
1418209ff23fSmrg    if (y2 >= 32768)
1419209ff23fSmrg	y2 = 32767; /* because MemBox.y2 is signed short */
1420209ff23fSmrg    MemBox.y2 = y2;
1421209ff23fSmrg
1422209ff23fSmrg    /* The acceleration engine uses 14 bit
1423209ff23fSmrg     * signed coordinates, so we can't have any
1424209ff23fSmrg     * drawable caches beyond this region.
1425209ff23fSmrg     */
1426209ff23fSmrg    if (MemBox.y2 > 8191)
1427209ff23fSmrg	MemBox.y2 = 8191;
1428209ff23fSmrg
1429209ff23fSmrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
1430209ff23fSmrg	xf86DrvMsg(scrnIndex, X_ERROR,
1431209ff23fSmrg		   "Memory manager initialization to "
1432209ff23fSmrg		   "(%d,%d) (%d,%d) failed\n",
1433209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1434209ff23fSmrg	return FALSE;
1435209ff23fSmrg    } else {
1436209ff23fSmrg	int       width, height;
1437209ff23fSmrg	FBAreaPtr fbarea;
1438209ff23fSmrg
1439209ff23fSmrg	xf86DrvMsg(scrnIndex, X_INFO,
1440209ff23fSmrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
1441209ff23fSmrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
1442209ff23fSmrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
1443209ff23fSmrg						pScrn->displayWidth,
1444209ff23fSmrg						info->allowColorTiling ?
1445ad43ddacSmrg						(RADEON_ALIGN(pScrn->virtualY, 16))
1446209ff23fSmrg						- pScrn->virtualY + 2 : 2,
1447209ff23fSmrg						0, NULL, NULL,
1448209ff23fSmrg						NULL))) {
1449209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1450209ff23fSmrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
1451209ff23fSmrg		       fbarea->box.x1, fbarea->box.y1,
1452209ff23fSmrg		       fbarea->box.x2, fbarea->box.y2);
1453209ff23fSmrg	} else {
1454209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
1455209ff23fSmrg	}
1456209ff23fSmrg	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
1457209ff23fSmrg					      0, 0, 0)) {
1458209ff23fSmrg	    xf86DrvMsg(scrnIndex, X_INFO,
1459209ff23fSmrg		       "Largest offscreen area available: %d x %d\n",
1460209ff23fSmrg		       width, height);
1461209ff23fSmrg	}
1462209ff23fSmrg	return TRUE;
1463209ff23fSmrg    }
1464209ff23fSmrg}
1465209ff23fSmrg#endif /* USE_XAA */
1466