1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3209ff23fSmrg *                VA Linux Systems Inc., Fremont, California.
4209ff23fSmrg *
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining
8209ff23fSmrg * a copy of this software and associated documentation files (the
9209ff23fSmrg * "Software"), to deal in the Software without restriction, including
10209ff23fSmrg * without limitation on the rights to use, copy, modify, merge,
11209ff23fSmrg * publish, distribute, sublicense, and/or sell copies of the Software,
12209ff23fSmrg * and to permit persons to whom the Software is furnished to do so,
13209ff23fSmrg * subject to the following conditions:
14209ff23fSmrg *
15209ff23fSmrg * The above copyright notice and this permission notice (including the
16209ff23fSmrg * next paragraph) shall be included in all copies or substantial
17209ff23fSmrg * portions of the Software.
18209ff23fSmrg *
19209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20209ff23fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21209ff23fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22209ff23fSmrg * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23209ff23fSmrg * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24209ff23fSmrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26209ff23fSmrg * DEALINGS IN THE SOFTWARE.
27209ff23fSmrg */
28209ff23fSmrg
29209ff23fSmrg#ifdef HAVE_CONFIG_H
30209ff23fSmrg#include "config.h"
31209ff23fSmrg#endif
32209ff23fSmrg
33209ff23fSmrg/*
34209ff23fSmrg * Authors:
35209ff23fSmrg *   Kevin E. Martin <martin@xfree86.org>
36209ff23fSmrg *   Rickard E. Faith <faith@valinux.com>
37209ff23fSmrg *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38209ff23fSmrg *
39209ff23fSmrg * Credits:
40209ff23fSmrg *
41209ff23fSmrg *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
42209ff23fSmrg *   code to his Radeon driver.  Portions of this file are based on the
43209ff23fSmrg *   initialization code for that driver.
44209ff23fSmrg *
45209ff23fSmrg * References:
46209ff23fSmrg *
47209ff23fSmrg * !!!! FIXME !!!!
48209ff23fSmrg *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49209ff23fSmrg *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50209ff23fSmrg *   1999.
51209ff23fSmrg *
52209ff23fSmrg *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53209ff23fSmrg *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54209ff23fSmrg *
55209ff23fSmrg * Notes on unimplemented XAA optimizations:
56209ff23fSmrg *
57209ff23fSmrg *   SetClipping:   This has been removed as XAA expects 16bit registers
58209ff23fSmrg *                  for full clipping.
59209ff23fSmrg *   TwoPointLine:  The Radeon supports this. Not Bresenham.
60209ff23fSmrg *   DashedLine with non-power-of-two pattern length: Apparently, there is
61209ff23fSmrg *                  no way to set the length of the pattern -- it is always
62209ff23fSmrg *                  assumed to be 8 or 32 (or 1024?).
63209ff23fSmrg *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
64209ff23fSmrg *                  Manual where it states that monochrome expansion of frame
65209ff23fSmrg *                  buffer data is not supported.
66209ff23fSmrg *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
67209ff23fSmrg *                  direct/indirect method.  If we had more data registers,
68209ff23fSmrg *                  then we could do better.  If XAA supported a trigger write
69209ff23fSmrg *                  address, the code would be simpler.
70209ff23fSmrg *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
71209ff23fSmrg *                  pattern from frame buffer memory.
72209ff23fSmrg *   ImageWrites:   Same as CPUToScreenColorExpandFill
73209ff23fSmrg *
74209ff23fSmrg */
75209ff23fSmrg
76209ff23fSmrg#include <errno.h>
77209ff23fSmrg#include <string.h>
78921a55d8Smrg#include <assert.h>
79209ff23fSmrg				/* Driver data structures */
80209ff23fSmrg#include "radeon.h"
81209ff23fSmrg#include "radeon_reg.h"
82b7e1c893Smrg#include "r600_reg.h"
8343df4709Smrg#include "radeon_macros.h"
84209ff23fSmrg#include "radeon_probe.h"
85209ff23fSmrg#include "radeon_version.h"
8643df4709Smrg#ifdef XF86DRI
8743df4709Smrg#define _XF86DRI_SERVER_
8843df4709Smrg#include "radeon_drm.h"
8943df4709Smrg#endif
90209ff23fSmrg
91c503f109Smrg#include "ati_pciids_gen.h"
92c503f109Smrg
93209ff23fSmrg				/* Line support */
94209ff23fSmrg#include "miline.h"
95209ff23fSmrg
96209ff23fSmrg				/* X and server generic header files */
97209ff23fSmrg#include "xf86.h"
98209ff23fSmrg
9943df4709Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn);
10043df4709Smrg
10143df4709Smrg#ifdef USE_XAA
10243df4709Smrgstatic struct {
10343df4709Smrg    int rop;
10443df4709Smrg    int pattern;
10543df4709Smrg} RADEON_ROP[] = {
10643df4709Smrg    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
10743df4709Smrg    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
10843df4709Smrg    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
10943df4709Smrg    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
11043df4709Smrg    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
11143df4709Smrg    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
11243df4709Smrg    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
11343df4709Smrg    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
11443df4709Smrg    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
11543df4709Smrg    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
11643df4709Smrg    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
11743df4709Smrg    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
11843df4709Smrg    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
11943df4709Smrg    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
12043df4709Smrg    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
12143df4709Smrg    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
12243df4709Smrg};
12343df4709Smrg#endif
12443df4709Smrg
12543df4709Smrg/* The FIFO has 64 slots.  This routines waits until at least `entries'
12643df4709Smrg * of these slots are empty.
12743df4709Smrg */
12843df4709Smrgvoid RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
12943df4709Smrg{
13043df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
13143df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
13243df4709Smrg    int            i;
13343df4709Smrg
13443df4709Smrg    for (;;) {
13543df4709Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
13643df4709Smrg	    info->accel_state->fifo_slots =
13743df4709Smrg		INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
13843df4709Smrg	    if (info->accel_state->fifo_slots >= entries) return;
13943df4709Smrg	}
14043df4709Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
14143df4709Smrg		       "FIFO timed out: %u entries, stat=0x%08x\n",
14243df4709Smrg		       (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
14343df4709Smrg		       (unsigned int)INREG(RADEON_RBBM_STATUS));
14443df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
14543df4709Smrg		   "FIFO timed out, resetting engine...\n");
14643df4709Smrg	RADEONEngineReset(pScrn);
14743df4709Smrg	RADEONEngineRestore(pScrn);
14843df4709Smrg#ifdef XF86DRI
14943df4709Smrg	if (info->directRenderingEnabled) {
15043df4709Smrg	    RADEONCP_RESET(pScrn, info);
15143df4709Smrg	    RADEONCP_START(pScrn, info);
15243df4709Smrg	}
15343df4709Smrg#endif
15443df4709Smrg    }
15543df4709Smrg}
15643df4709Smrg
15743df4709Smrgvoid R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
15843df4709Smrg{
15943df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
16043df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
16143df4709Smrg    int            i;
16243df4709Smrg
16343df4709Smrg    for (;;) {
16443df4709Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
16543df4709Smrg	    if (info->ChipFamily >= CHIP_FAMILY_RV770)
16643df4709Smrg		info->accel_state->fifo_slots =
16743df4709Smrg		    INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
16843df4709Smrg	    else
16943df4709Smrg		info->accel_state->fifo_slots =
17043df4709Smrg		    INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
17143df4709Smrg	    if (info->accel_state->fifo_slots >= entries) return;
17243df4709Smrg	}
17343df4709Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
17443df4709Smrg		       "FIFO timed out: stat=0x%08x\n",
17543df4709Smrg		       (unsigned int)INREG(R600_GRBM_STATUS));
17643df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
17743df4709Smrg		   "FIFO timed out, resetting engine...\n");
17843df4709Smrg	R600EngineReset(pScrn);
17943df4709Smrg#ifdef XF86DRI
18043df4709Smrg	if (info->directRenderingEnabled) {
18143df4709Smrg	    RADEONCP_RESET(pScrn, info);
18243df4709Smrg	    RADEONCP_START(pScrn, info);
18343df4709Smrg	}
18443df4709Smrg#endif
18543df4709Smrg    }
18643df4709Smrg}
18743df4709Smrg
18843df4709Smrg/* Flush all dirty data in the Pixel Cache to memory */
18943df4709Smrgvoid RADEONEngineFlush(ScrnInfoPtr pScrn)
19043df4709Smrg{
19143df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
19243df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
19343df4709Smrg    int            i;
19443df4709Smrg
19543df4709Smrg    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
19643df4709Smrg	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
19743df4709Smrg		RADEON_RB3D_DC_FLUSH_ALL,
19843df4709Smrg		~RADEON_RB3D_DC_FLUSH_ALL);
19943df4709Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
20043df4709Smrg	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
20143df4709Smrg		break;
20243df4709Smrg	}
20343df4709Smrg	if (i == RADEON_TIMEOUT) {
20443df4709Smrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
20543df4709Smrg			   "DC flush timeout: %x\n",
20643df4709Smrg			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
20743df4709Smrg	}
20843df4709Smrg    } else {
20943df4709Smrg	OUTREGP(R300_DSTCACHE_CTLSTAT,
21043df4709Smrg		R300_RB2D_DC_FLUSH_ALL,
21143df4709Smrg		~R300_RB2D_DC_FLUSH_ALL);
21243df4709Smrg	for (i = 0; i < RADEON_TIMEOUT; i++) {
21343df4709Smrg	    if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
21443df4709Smrg		break;
21543df4709Smrg	}
21643df4709Smrg	if (i == RADEON_TIMEOUT) {
21743df4709Smrg	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
21843df4709Smrg			   "DC flush timeout: %x\n",
21943df4709Smrg			   (unsigned int)INREG(R300_DSTCACHE_CTLSTAT));
22043df4709Smrg	}
22143df4709Smrg    }
22243df4709Smrg}
22343df4709Smrg
22443df4709Smrg/* Reset graphics card to known state */
22543df4709Smrgvoid RADEONEngineReset(ScrnInfoPtr pScrn)
22643df4709Smrg{
22743df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
22843df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
22943df4709Smrg    uint32_t       clock_cntl_index;
23043df4709Smrg    uint32_t       mclk_cntl;
23143df4709Smrg    uint32_t       rbbm_soft_reset;
23243df4709Smrg    uint32_t       host_path_cntl;
23343df4709Smrg
23443df4709Smrg    /* The following RBBM_SOFT_RESET sequence can help un-wedge
23543df4709Smrg     * an R300 after the command processor got stuck.
23643df4709Smrg     */
23743df4709Smrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
23843df4709Smrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
23943df4709Smrg                                   RADEON_SOFT_RESET_CP |
24043df4709Smrg                                   RADEON_SOFT_RESET_HI |
24143df4709Smrg                                   RADEON_SOFT_RESET_SE |
24243df4709Smrg                                   RADEON_SOFT_RESET_RE |
24343df4709Smrg                                   RADEON_SOFT_RESET_PP |
24443df4709Smrg                                   RADEON_SOFT_RESET_E2 |
24543df4709Smrg                                   RADEON_SOFT_RESET_RB));
24643df4709Smrg    INREG(RADEON_RBBM_SOFT_RESET);
24743df4709Smrg    OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
24843df4709Smrg                                   ~(RADEON_SOFT_RESET_CP |
24943df4709Smrg                                     RADEON_SOFT_RESET_HI |
25043df4709Smrg                                     RADEON_SOFT_RESET_SE |
25143df4709Smrg                                     RADEON_SOFT_RESET_RE |
25243df4709Smrg                                     RADEON_SOFT_RESET_PP |
25343df4709Smrg                                     RADEON_SOFT_RESET_E2 |
25443df4709Smrg                                     RADEON_SOFT_RESET_RB)));
25543df4709Smrg    INREG(RADEON_RBBM_SOFT_RESET);
25643df4709Smrg    OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
25743df4709Smrg    INREG(RADEON_RBBM_SOFT_RESET);
25843df4709Smrg
25943df4709Smrg    RADEONEngineFlush(pScrn);
26043df4709Smrg
26143df4709Smrg    clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX);
26243df4709Smrg    RADEONPllErrataAfterIndex(info);
26343df4709Smrg
26443df4709Smrg#if 0 /* taken care of by new PM code */
26543df4709Smrg    /* Some ASICs have bugs with dynamic-on feature, which are
26643df4709Smrg     * ASIC-version dependent, so we force all blocks on for now
26743df4709Smrg     */
26843df4709Smrg    if (info->HasCRTC2) {
26943df4709Smrg	uint32_t tmp;
27043df4709Smrg
27143df4709Smrg	tmp = INPLL(pScrn, RADEON_SCLK_CNTL);
27243df4709Smrg	OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) |
27343df4709Smrg				  RADEON_CP_MAX_DYN_STOP_LAT |
27443df4709Smrg				  RADEON_SCLK_FORCEON_MASK));
27543df4709Smrg
27643df4709Smrg	if (info->ChipFamily == CHIP_FAMILY_RV200) {
27743df4709Smrg	    tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL);
27843df4709Smrg	    OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON);
27943df4709Smrg	}
28043df4709Smrg    }
28143df4709Smrg#endif /* new PM code */
28243df4709Smrg
28343df4709Smrg    mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL);
28443df4709Smrg
28543df4709Smrg#if 0 /* handled by new PM code */
28643df4709Smrg    OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl |
28743df4709Smrg			      RADEON_FORCEON_MCLKA |
28843df4709Smrg			      RADEON_FORCEON_MCLKB |
28943df4709Smrg			      RADEON_FORCEON_YCLKA |
29043df4709Smrg			      RADEON_FORCEON_YCLKB |
29143df4709Smrg			      RADEON_FORCEON_MC |
29243df4709Smrg			      RADEON_FORCEON_AIC));
29343df4709Smrg#endif /* new PM code */
29443df4709Smrg
29543df4709Smrg    /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some
29643df4709Smrg     * unexpected behaviour on some machines.  Here we use
29743df4709Smrg     * RADEON_HOST_PATH_CNTL to reset it.
29843df4709Smrg     */
29943df4709Smrg    host_path_cntl = INREG(RADEON_HOST_PATH_CNTL);
30043df4709Smrg    rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET);
30143df4709Smrg
30243df4709Smrg    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
30343df4709Smrg	uint32_t tmp;
30443df4709Smrg
30543df4709Smrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
30643df4709Smrg					RADEON_SOFT_RESET_CP |
30743df4709Smrg					RADEON_SOFT_RESET_HI |
30843df4709Smrg					RADEON_SOFT_RESET_E2));
30943df4709Smrg	INREG(RADEON_RBBM_SOFT_RESET);
31043df4709Smrg	OUTREG(RADEON_RBBM_SOFT_RESET, 0);
31143df4709Smrg	tmp = INREG(RADEON_RB3D_DSTCACHE_MODE);
31243df4709Smrg	OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
31343df4709Smrg    } else {
31443df4709Smrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
31543df4709Smrg					RADEON_SOFT_RESET_CP |
31643df4709Smrg					RADEON_SOFT_RESET_SE |
31743df4709Smrg					RADEON_SOFT_RESET_RE |
31843df4709Smrg					RADEON_SOFT_RESET_PP |
31943df4709Smrg					RADEON_SOFT_RESET_E2 |
32043df4709Smrg					RADEON_SOFT_RESET_RB));
32143df4709Smrg	INREG(RADEON_RBBM_SOFT_RESET);
32243df4709Smrg	OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t)
32343df4709Smrg					~(RADEON_SOFT_RESET_CP |
32443df4709Smrg					  RADEON_SOFT_RESET_SE |
32543df4709Smrg					  RADEON_SOFT_RESET_RE |
32643df4709Smrg					  RADEON_SOFT_RESET_PP |
32743df4709Smrg					  RADEON_SOFT_RESET_E2 |
32843df4709Smrg					  RADEON_SOFT_RESET_RB)));
32943df4709Smrg	INREG(RADEON_RBBM_SOFT_RESET);
33043df4709Smrg    }
33143df4709Smrg
33243df4709Smrg    if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT)
33343df4709Smrg	OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
33443df4709Smrg
33543df4709Smrg    OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
33643df4709Smrg    RADEONPllErrataAfterIndex(info);
33743df4709Smrg    OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
33843df4709Smrg}
33943df4709Smrg
34043df4709Smrg/* Reset graphics card to known state */
34143df4709Smrgstatic void R600EngineReset(ScrnInfoPtr pScrn)
34243df4709Smrg{
34343df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
34443df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
34543df4709Smrg    uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
34643df4709Smrg
34743df4709Smrg    cp_ptr = INREG(R600_CP_RB_WPTR);
34843df4709Smrg
34943df4709Smrg    cp_me_cntl = INREG(R600_CP_ME_CNTL);
35043df4709Smrg    OUTREG(R600_CP_ME_CNTL, 0x10000000);
35143df4709Smrg
35243df4709Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
35343df4709Smrg    INREG(R600_GRBM_SOFT_RESET);
35443df4709Smrg    usleep (50);
35543df4709Smrg    OUTREG(R600_GRBM_SOFT_RESET, 0);
35643df4709Smrg    INREG(R600_GRBM_SOFT_RESET);
35743df4709Smrg
35843df4709Smrg    OUTREG(R600_CP_RB_WPTR_DELAY, 0);
35943df4709Smrg    cp_rb_cntl = INREG(R600_CP_RB_CNTL);
36043df4709Smrg    OUTREG(R600_CP_RB_CNTL, 0x80000000);
36143df4709Smrg
36243df4709Smrg    OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
36343df4709Smrg    OUTREG(R600_CP_RB_WPTR, cp_ptr);
36443df4709Smrg    OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
36543df4709Smrg    OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
36643df4709Smrg
36743df4709Smrg}
36843df4709Smrg
36943df4709Smrg/* Restore the acceleration hardware to its previous state */
37043df4709Smrgvoid RADEONEngineRestore(ScrnInfoPtr pScrn)
37143df4709Smrg{
37243df4709Smrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
37343df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
37443df4709Smrg
37543df4709Smrg    if (info->cs)
37643df4709Smrg      return;
37743df4709Smrg
37843df4709Smrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
37943df4709Smrg		   "EngineRestore (%d/%d)\n",
38043df4709Smrg		   info->CurrentLayout.pixel_code,
38143df4709Smrg		   info->CurrentLayout.bitsPerPixel);
38243df4709Smrg
38343df4709Smrg    /* Setup engine location. This shouldn't be necessary since we
38443df4709Smrg     * set them appropriately before any accel ops, but let's avoid
38543df4709Smrg     * random bogus DMA in case we inadvertently trigger the engine
38643df4709Smrg     * in the wrong place (happened).
38743df4709Smrg     */
38843df4709Smrg    RADEONWaitForFifo(pScrn, 2);
38943df4709Smrg    OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
39043df4709Smrg    OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset);
39143df4709Smrg
39243df4709Smrg    RADEONWaitForFifo(pScrn, 1);
39343df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
39443df4709Smrg    OUTREGP(RADEON_DP_DATATYPE,
39543df4709Smrg	    RADEON_HOST_BIG_ENDIAN_EN,
39643df4709Smrg	    ~RADEON_HOST_BIG_ENDIAN_EN);
39743df4709Smrg#else
39843df4709Smrg    OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN);
39943df4709Smrg#endif
40043df4709Smrg
40143df4709Smrg    /* Restore SURFACE_CNTL */
40243df4709Smrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
40343df4709Smrg
40443df4709Smrg    RADEONWaitForFifo(pScrn, 1);
40543df4709Smrg    OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX
40643df4709Smrg					    | RADEON_DEFAULT_SC_BOTTOM_MAX));
40743df4709Smrg    RADEONWaitForFifo(pScrn, 1);
40843df4709Smrg    OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl
40943df4709Smrg				       | RADEON_GMC_BRUSH_SOLID_COLOR
41043df4709Smrg				       | RADEON_GMC_SRC_DATATYPE_COLOR));
41143df4709Smrg
41243df4709Smrg    RADEONWaitForFifo(pScrn, 5);
41343df4709Smrg    OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff);
41443df4709Smrg    OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000);
41543df4709Smrg    OUTREG(RADEON_DP_SRC_FRGD_CLR,   0xffffffff);
41643df4709Smrg    OUTREG(RADEON_DP_SRC_BKGD_CLR,   0x00000000);
41743df4709Smrg    OUTREG(RADEON_DP_WRITE_MASK,     0xffffffff);
41843df4709Smrg
41943df4709Smrg    RADEONWaitForIdleMMIO(pScrn);
42043df4709Smrg
42143df4709Smrg    info->accel_state->XInited3D = FALSE;
42243df4709Smrg}
42343df4709Smrg
424ad43ddacSmrgstatic int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
425ad43ddacSmrg{
426ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
42743df4709Smrg    if (info->dri->pKernelDRMVersion->version_major < 2) {
42843df4709Smrg        drm_radeon_getparam_t np;
42943df4709Smrg
43043df4709Smrg        memset(&np, 0, sizeof(np));
43143df4709Smrg        np.param = RADEON_PARAM_NUM_GB_PIPES;
43243df4709Smrg        np.value = num_pipes;
43343df4709Smrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
43443df4709Smrg    } else {
43543df4709Smrg        struct drm_radeon_info np2;
43643df4709Smrg        np2.value = (unsigned long)num_pipes;
43743df4709Smrg        np2.request = RADEON_INFO_NUM_GB_PIPES;
43843df4709Smrg        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
43943df4709Smrg    }
440ad43ddacSmrg}
441ad43ddacSmrg
442209ff23fSmrg/* Initialize the acceleration hardware */
443209ff23fSmrgvoid RADEONEngineInit(ScrnInfoPtr pScrn)
444209ff23fSmrg{
445209ff23fSmrg    RADEONInfoPtr  info       = RADEONPTR(pScrn);
44643df4709Smrg    unsigned char *RADEONMMIO = info->MMIO;
44743df4709Smrg    int datatype = 0;
448b7e1c893Smrg    info->accel_state->num_gb_pipes = 0;
449209ff23fSmrg
45043df4709Smrg    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
45143df4709Smrg		   "EngineInit (%d/%d)\n",
45243df4709Smrg		   info->CurrentLayout.pixel_code,
45343df4709Smrg		   info->CurrentLayout.bitsPerPixel);
45443df4709Smrg
45543df4709Smrg#ifdef XF86DRI
456209ff23fSmrg    if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
457209ff23fSmrg	int num_pipes;
458209ff23fSmrg
459ad43ddacSmrg	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
460209ff23fSmrg	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
461209ff23fSmrg		       "Failed to determine num pipes from DRM, falling back to "
462209ff23fSmrg		       "manual look-up!\n");
463b7e1c893Smrg	    info->accel_state->num_gb_pipes = 0;
464209ff23fSmrg	} else {
465b7e1c893Smrg	    info->accel_state->num_gb_pipes = num_pipes;
466209ff23fSmrg	}
467209ff23fSmrg    }
46843df4709Smrg#endif
46943df4709Smrg
47043df4709Smrg    if (!info->cs) {
47143df4709Smrg	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
47243df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
47343df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
47443df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
47543df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
47643df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
47743df4709Smrg	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
47843df4709Smrg	    IS_R500_3D) {
47943df4709Smrg	    if (info->accel_state->num_gb_pipes == 0) {
48043df4709Smrg		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
48143df4709Smrg
48243df4709Smrg		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
48343df4709Smrg		if (IS_R500_3D)
48443df4709Smrg		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
48543df4709Smrg	    }
48643df4709Smrg	} else {
48743df4709Smrg	    if (info->accel_state->num_gb_pipes == 0) {
48843df4709Smrg		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
48943df4709Smrg		    (info->ChipFamily == CHIP_FAMILY_R350)) {
49043df4709Smrg		    /* R3xx chips */
49143df4709Smrg		    info->accel_state->num_gb_pipes = 2;
49243df4709Smrg		} else {
49343df4709Smrg		    /* RV3xx chips */
49443df4709Smrg		    info->accel_state->num_gb_pipes = 1;
49543df4709Smrg		}
49643df4709Smrg	    }
49743df4709Smrg	}
49843df4709Smrg
49943df4709Smrg	/* SE cards only have 1 quadpipe */
50043df4709Smrg	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
50143df4709Smrg	    (info->Chipset == PCI_CHIP_RV410_5E4F) ||
50243df4709Smrg	    (info->Chipset == PCI_CHIP_R300_AD) ||
50343df4709Smrg	    (info->Chipset == PCI_CHIP_R350_AH))
50443df4709Smrg	    info->accel_state->num_gb_pipes = 1;
50543df4709Smrg
50643df4709Smrg	if (IS_R300_3D || IS_R500_3D)
50743df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
50843df4709Smrg		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
50943df4709Smrg
51043df4709Smrg	if (IS_R300_3D || IS_R500_3D) {
51143df4709Smrg	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
51243df4709Smrg
51343df4709Smrg	    switch(info->accel_state->num_gb_pipes) {
51443df4709Smrg	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
51543df4709Smrg	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
51643df4709Smrg	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
51743df4709Smrg	    default:
51843df4709Smrg	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
51943df4709Smrg	    }
52043df4709Smrg
52143df4709Smrg	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
52243df4709Smrg	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
52343df4709Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R420)
52443df4709Smrg		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
52543df4709Smrg	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
52643df4709Smrg					     R300_DC_AUTOFLUSH_ENABLE |
52743df4709Smrg					     R300_DC_DC_DISABLE_IGNORE_PE));
52843df4709Smrg	} else
52943df4709Smrg	    OUTREG(RADEON_RB3D_CNTL, 0);
53043df4709Smrg
53143df4709Smrg	RADEONEngineReset(pScrn);
53243df4709Smrg    }
53343df4709Smrg
53443df4709Smrg    switch (info->CurrentLayout.pixel_code) {
53543df4709Smrg    case 8:  datatype = 2; break;
53643df4709Smrg    case 15: datatype = 3; break;
53743df4709Smrg    case 16: datatype = 4; break;
53843df4709Smrg    case 24: datatype = 5; break;
53943df4709Smrg    case 32: datatype = 6; break;
54043df4709Smrg    default:
54143df4709Smrg	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
54243df4709Smrg		       "Unknown depth/bpp = %d/%d (code = %d)\n",
54343df4709Smrg		       info->CurrentLayout.depth,
54443df4709Smrg		       info->CurrentLayout.bitsPerPixel,
54543df4709Smrg		       info->CurrentLayout.pixel_code);
54643df4709Smrg    }
54743df4709Smrg
54843df4709Smrg    info->accel_state->dp_gui_master_cntl =
54943df4709Smrg	((datatype << RADEON_GMC_DST_DATATYPE_SHIFT)
55043df4709Smrg	 | RADEON_GMC_CLR_CMP_CNTL_DIS
55143df4709Smrg	 | RADEON_GMC_DST_PITCH_OFFSET_CNTL);
55243df4709Smrg
55343df4709Smrg    RADEONEngineRestore(pScrn);
55443df4709Smrg}
55543df4709Smrg
55643df4709Smrguint32_t radeonGetPixmapOffset(PixmapPtr pPix)
55743df4709Smrg{
55843df4709Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
55943df4709Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
56043df4709Smrg    uint32_t offset = 0;
56143df4709Smrg    if (info->cs)
56243df4709Smrg	return 0;
56343df4709Smrg#ifdef USE_EXA
56443df4709Smrg    if (info->useEXA) {
56543df4709Smrg	offset = exaGetPixmapOffset(pPix);
56643df4709Smrg    } else
56743df4709Smrg#endif
56843df4709Smrg    {
56943df4709Smrg	offset = pPix->devPrivate.ptr - info->FB;
57043df4709Smrg    }
57143df4709Smrg    offset += info->fbLocation + pScrn->fbOffset;
57243df4709Smrg    return offset;
573ad43ddacSmrg}
574209ff23fSmrg
5752f39173dSmrgint radeon_cs_space_remaining(ScrnInfoPtr pScrn)
5762f39173dSmrg{
5772f39173dSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
5782f39173dSmrg
57943df4709Smrg#ifdef XF86DRM_MODE
58043df4709Smrg    if (info->cs)
58143df4709Smrg	return (info->cs->ndw - info->cs->cdw);
58243df4709Smrg    else
58343df4709Smrg#endif
58443df4709Smrg        return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t);
58543df4709Smrg}
58643df4709Smrg
58743df4709Smrg#define ACCEL_MMIO
58843df4709Smrg#define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
58943df4709Smrg#define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
59043df4709Smrg#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
59143df4709Smrg#define FINISH_ACCEL()
59243df4709Smrg
59343df4709Smrg#include "radeon_commonfuncs.c"
59443df4709Smrg#if defined(RENDER) && defined(USE_XAA)
59543df4709Smrg#include "radeon_render.c"
59643df4709Smrg#endif
59743df4709Smrg#include "radeon_accelfuncs.c"
59843df4709Smrg
59943df4709Smrg#undef ACCEL_MMIO
60043df4709Smrg#undef ACCEL_PREAMBLE
60143df4709Smrg#undef BEGIN_ACCEL
60243df4709Smrg#undef OUT_ACCEL_REG
60343df4709Smrg#undef FINISH_ACCEL
60443df4709Smrg
60543df4709Smrg#ifdef XF86DRI
60643df4709Smrg
60743df4709Smrg#define ACCEL_CP
60843df4709Smrg#define ACCEL_PREAMBLE()						\
60943df4709Smrg    RING_LOCALS;							\
61043df4709Smrg    RADEONCP_REFRESH(pScrn, info)
61143df4709Smrg#define BEGIN_ACCEL(n)          BEGIN_RING(2*(n))
61243df4709Smrg#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
61343df4709Smrg#define FINISH_ACCEL()          ADVANCE_RING()
61443df4709Smrg
61543df4709Smrg
61643df4709Smrg#include "radeon_commonfuncs.c"
61743df4709Smrg#if defined(RENDER) && defined(USE_XAA)
61843df4709Smrg#include "radeon_render.c"
61943df4709Smrg#endif
62043df4709Smrg#include "radeon_accelfuncs.c"
62143df4709Smrg
62243df4709Smrg#undef ACCEL_CP
62343df4709Smrg#undef ACCEL_PREAMBLE
62443df4709Smrg#undef BEGIN_ACCEL
62543df4709Smrg#undef OUT_ACCEL_REG
62643df4709Smrg#undef FINISH_ACCEL
62743df4709Smrg
62843df4709Smrg/* Stop the CP */
62943df4709Smrgint RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
63043df4709Smrg{
63143df4709Smrg    drm_radeon_cp_stop_t  stop;
63243df4709Smrg    int              ret, i;
63343df4709Smrg
63443df4709Smrg    stop.flush = 1;
63543df4709Smrg    stop.idle  = 1;
63643df4709Smrg
63743df4709Smrg    ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
63843df4709Smrg			  sizeof(drm_radeon_cp_stop_t));
63943df4709Smrg
64043df4709Smrg    if (ret == 0) {
64143df4709Smrg	return 0;
64243df4709Smrg    } else if (errno != EBUSY) {
64343df4709Smrg	return -errno;
64443df4709Smrg    }
64543df4709Smrg
64643df4709Smrg    stop.flush = 0;
64743df4709Smrg
64843df4709Smrg    i = 0;
64943df4709Smrg    do {
65043df4709Smrg	ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop,
65143df4709Smrg			      sizeof(drm_radeon_cp_stop_t));
65243df4709Smrg    } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY);
65343df4709Smrg
65443df4709Smrg    if (ret == 0) {
65543df4709Smrg	return 0;
65643df4709Smrg    } else if (errno != EBUSY) {
65743df4709Smrg	return -errno;
65843df4709Smrg    }
65943df4709Smrg
66043df4709Smrg    stop.idle = 0;
66143df4709Smrg
66243df4709Smrg    if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP,
66343df4709Smrg			&stop, sizeof(drm_radeon_cp_stop_t))) {
66443df4709Smrg	return -errno;
66543df4709Smrg    } else {
66643df4709Smrg	return 0;
66743df4709Smrg    }
66843df4709Smrg}
66943df4709Smrg
67043df4709Smrg#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
67143df4709Smrg
67243df4709Smrg/* Get an indirect buffer for the CP 2D acceleration commands  */
67343df4709SmrgdrmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
67443df4709Smrg{
67543df4709Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
67643df4709Smrg    drmDMAReq      dma;
67743df4709Smrg    drmBufPtr      buf = NULL;
67843df4709Smrg    int            indx = 0;
67943df4709Smrg    int            size = 0;
68043df4709Smrg    int            i = 0;
68143df4709Smrg    int            ret;
68243df4709Smrg
68343df4709Smrg#if 0
68443df4709Smrg    /* FIXME: pScrn->pScreen has not been initialized when this is first
68543df4709Smrg     * called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
68643df4709Smrg     * the screen index from pScrn, which is initialized, and then get
68743df4709Smrg     * the screen from screenInfo.screens[index], but that is a hack.
68843df4709Smrg     */
68943df4709Smrg    dma.context = DRIGetContext(pScrn->pScreen);
69043df4709Smrg#else
69143df4709Smrg    /* This is the X server's context */
69243df4709Smrg    dma.context = 0x00000001;
69343df4709Smrg#endif
69443df4709Smrg
69543df4709Smrg    dma.send_count    = 0;
69643df4709Smrg    dma.send_list     = NULL;
69743df4709Smrg    dma.send_sizes    = NULL;
69843df4709Smrg    dma.flags         = 0;
69943df4709Smrg    dma.request_count = 1;
70043df4709Smrg    dma.request_size  = RADEON_BUFFER_SIZE;
70143df4709Smrg    dma.request_list  = &indx;
70243df4709Smrg    dma.request_sizes = &size;
70343df4709Smrg    dma.granted_count = 0;
70443df4709Smrg
70543df4709Smrg    while (1) {
70643df4709Smrg	do {
70743df4709Smrg	    ret = drmDMA(info->dri->drmFD, &dma);
70843df4709Smrg	    if (ret && ret != -EBUSY) {
70943df4709Smrg		xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
71043df4709Smrg			   "%s: CP GetBuffer %d\n", __FUNCTION__, ret);
71143df4709Smrg	    }
71243df4709Smrg	} while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
71343df4709Smrg
71443df4709Smrg	if (ret == 0) {
71543df4709Smrg	    buf = &info->dri->buffers->list[indx];
71643df4709Smrg	    buf->used = 0;
71743df4709Smrg	    if (RADEON_VERBOSE) {
71843df4709Smrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
71943df4709Smrg			   "   GetBuffer returning %d %p\n",
72043df4709Smrg			   buf->idx, buf->address);
72143df4709Smrg	    }
72243df4709Smrg	    return buf;
72343df4709Smrg	}
72443df4709Smrg
72543df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
72643df4709Smrg		   "GetBuffer timed out, resetting engine...\n");
72743df4709Smrg
72843df4709Smrg	if (info->ChipFamily < CHIP_FAMILY_R600) {
72943df4709Smrg	    RADEONEngineReset(pScrn);
73043df4709Smrg	    RADEONEngineRestore(pScrn);
73143df4709Smrg	} else
73243df4709Smrg	    R600EngineReset(pScrn);
73343df4709Smrg
73443df4709Smrg	/* Always restart the engine when doing CP 2D acceleration */
73543df4709Smrg	RADEONCP_RESET(pScrn, info);
73643df4709Smrg	RADEONCP_START(pScrn, info);
73743df4709Smrg    }
73843df4709Smrg}
73943df4709Smrg
74043df4709Smrg/* Flush the indirect buffer to the kernel for submission to the card */
74143df4709Smrgvoid RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
74243df4709Smrg{
74343df4709Smrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
74443df4709Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
74543df4709Smrg    int                start  = info->cp->indirectStart;
74643df4709Smrg    drm_radeon_indirect_t  indirect;
74743df4709Smrg
74843df4709Smrg    assert(!info->cs);
74943df4709Smrg    if (!buffer) return;
75043df4709Smrg    if (start == buffer->used && !discard) return;
75143df4709Smrg
75243df4709Smrg    if (RADEON_VERBOSE) {
75343df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
75443df4709Smrg		   buffer->idx);
75543df4709Smrg    }
75643df4709Smrg
75743df4709Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
75843df4709Smrg	if (buffer->used & 0x3c) {
75943df4709Smrg	    RING_LOCALS;
76043df4709Smrg
76143df4709Smrg	    while (buffer->used & 0x3c) {
76243df4709Smrg		BEGIN_RING(1);
76343df4709Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
76443df4709Smrg		ADVANCE_RING();
76543df4709Smrg	    }
76643df4709Smrg	}
76743df4709Smrg    }
76843df4709Smrg
76943df4709Smrg    indirect.idx     = buffer->idx;
77043df4709Smrg    indirect.start   = start;
77143df4709Smrg    indirect.end     = buffer->used;
77243df4709Smrg    indirect.discard = discard;
77343df4709Smrg
77443df4709Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
77543df4709Smrg			&indirect, sizeof(drm_radeon_indirect_t));
77643df4709Smrg
77743df4709Smrg    if (discard) {
77843df4709Smrg	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);
77943df4709Smrg	info->cp->indirectStart  = 0;
78043df4709Smrg    } else {
78143df4709Smrg	/* Start on a double word boundary */
78243df4709Smrg	info->cp->indirectStart  = buffer->used = RADEON_ALIGN(buffer->used, 8);
78343df4709Smrg	if (RADEON_VERBOSE) {
78443df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "   Starting at %d\n",
78543df4709Smrg		       info->cp->indirectStart);
78643df4709Smrg	}
78743df4709Smrg    }
78843df4709Smrg}
78943df4709Smrg
79043df4709Smrg/* Flush and release the indirect buffer */
79143df4709Smrgvoid RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
79243df4709Smrg{
79343df4709Smrg    RADEONInfoPtr      info   = RADEONPTR(pScrn);
79443df4709Smrg    drmBufPtr          buffer = info->cp->indirectBuffer;
79543df4709Smrg    int                start  = info->cp->indirectStart;
79643df4709Smrg    drm_radeon_indirect_t  indirect;
79743df4709Smrg
79843df4709Smrg    assert(!info->cs);
79943df4709Smrg    if (info->ChipFamily >= CHIP_FAMILY_R600) {
80043df4709Smrg	if (buffer && (buffer->used & 0x3c)) {
80143df4709Smrg	    RING_LOCALS;
80243df4709Smrg
80343df4709Smrg	    while (buffer->used & 0x3c) {
80443df4709Smrg		BEGIN_RING(1);
80543df4709Smrg		OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
80643df4709Smrg		ADVANCE_RING();
80743df4709Smrg	    }
80843df4709Smrg	}
80943df4709Smrg    }
81043df4709Smrg
81143df4709Smrg    info->cp->indirectBuffer = NULL;
81243df4709Smrg    info->cp->indirectStart  = 0;
81343df4709Smrg
81443df4709Smrg    if (!buffer) return;
81543df4709Smrg
81643df4709Smrg    if (RADEON_VERBOSE) {
81743df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n",
81843df4709Smrg		   buffer->idx);
81943df4709Smrg    }
82043df4709Smrg
82143df4709Smrg    indirect.idx     = buffer->idx;
82243df4709Smrg    indirect.start   = start;
82343df4709Smrg    indirect.end     = buffer->used;
82443df4709Smrg    indirect.discard = 1;
82543df4709Smrg
82643df4709Smrg    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
82743df4709Smrg			&indirect, sizeof(drm_radeon_indirect_t));
82843df4709Smrg}
82943df4709Smrg
83043df4709Smrg/** \brief Calculate HostDataBlit parameters from pointer and pitch
83143df4709Smrg *
83243df4709Smrg * This is a helper for the trivial HostDataBlit users that don't need to worry
83343df4709Smrg * about tiling etc.
83443df4709Smrg */
83543df4709Smrgvoid
83643df4709SmrgRADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp,
83743df4709Smrg		     uint32_t *dstPitchOff, int *x, int *y)
83843df4709Smrg{
83943df4709Smrg    RADEONInfoPtr info = RADEONPTR( pScrn );
84043df4709Smrg    uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation;
84143df4709Smrg
84243df4709Smrg    *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10;
84343df4709Smrg    *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch;
84443df4709Smrg    *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp;
84543df4709Smrg}
84643df4709Smrg
84743df4709Smrg/* Set up a hostdata blit to transfer data from system memory to the
84843df4709Smrg * framebuffer. Returns the address where the data can be written to and sets
84943df4709Smrg * the dstPitch and hpass variables as required.
85043df4709Smrg */
85143df4709Smrguint8_t*
85243df4709SmrgRADEONHostDataBlit(
85343df4709Smrg    ScrnInfoPtr pScrn,
85443df4709Smrg    unsigned int cpp,
85543df4709Smrg    unsigned int w,
85643df4709Smrg    uint32_t dstPitchOff,
85743df4709Smrg    uint32_t *bufPitch,
85843df4709Smrg    int x,
85943df4709Smrg    int *y,
86043df4709Smrg    unsigned int *h,
86143df4709Smrg    unsigned int *hpass
86243df4709Smrg){
86343df4709Smrg    RADEONInfoPtr info = RADEONPTR( pScrn );
86443df4709Smrg    uint32_t format, dwords;
86543df4709Smrg    uint8_t *ret;
86643df4709Smrg    RING_LOCALS;
86743df4709Smrg
86843df4709Smrg    if ( *h == 0 )
86943df4709Smrg    {
87043df4709Smrg	return NULL;
87143df4709Smrg    }
87243df4709Smrg
87343df4709Smrg    switch ( cpp )
87443df4709Smrg    {
87543df4709Smrg    case 4:
87643df4709Smrg	format = RADEON_GMC_DST_32BPP;
87743df4709Smrg	*bufPitch = 4 * w;
87843df4709Smrg	break;
87943df4709Smrg    case 2:
88043df4709Smrg	format = RADEON_GMC_DST_16BPP;
88143df4709Smrg	*bufPitch = 2 * RADEON_ALIGN(w, 2);
88243df4709Smrg	break;
88343df4709Smrg    case 1:
88443df4709Smrg	format = RADEON_GMC_DST_8BPP_CI;
88543df4709Smrg	*bufPitch = RADEON_ALIGN(w, 4);
88643df4709Smrg	break;
88743df4709Smrg    default:
88843df4709Smrg	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
88943df4709Smrg		    "%s: Unsupported cpp %d!\n", __func__, cpp );
89043df4709Smrg	return NULL;
89143df4709Smrg    }
89243df4709Smrg
89343df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
89443df4709Smrg    /* Swap doesn't work on R300 and later, it's handled during the
89543df4709Smrg     * copy to ind. buffer pass
89643df4709Smrg     */
89743df4709Smrg    if (info->ChipFamily < CHIP_FAMILY_R300) {
89843df4709Smrg        BEGIN_RING(2);
89943df4709Smrg	if (cpp == 2)
90043df4709Smrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
90143df4709Smrg			 RADEON_HOST_DATA_SWAP_HDW);
90243df4709Smrg	else if (cpp == 1)
90343df4709Smrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
90443df4709Smrg			 RADEON_HOST_DATA_SWAP_32BIT);
90543df4709Smrg	else
90643df4709Smrg	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
90743df4709Smrg			 RADEON_HOST_DATA_SWAP_NONE);
90843df4709Smrg	ADVANCE_RING();
90943df4709Smrg    }
91043df4709Smrg#endif
91143df4709Smrg
91243df4709Smrg    /*RADEON_PURGE_CACHE();
91343df4709Smrg      RADEON_WAIT_UNTIL_IDLE();*/
91443df4709Smrg
91543df4709Smrg    *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) );
91643df4709Smrg    dwords = *hpass * *bufPitch / 4;
91743df4709Smrg
91843df4709Smrg    BEGIN_RING( dwords + 10 );
91943df4709Smrg    OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) );
92043df4709Smrg    OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
92143df4709Smrg	    | RADEON_GMC_DST_CLIPPING
92243df4709Smrg	    | RADEON_GMC_BRUSH_NONE
92343df4709Smrg	    | format
92443df4709Smrg	    | RADEON_GMC_SRC_DATATYPE_COLOR
92543df4709Smrg	    | RADEON_ROP3_S
92643df4709Smrg	    | RADEON_DP_SRC_SOURCE_HOST_DATA
92743df4709Smrg	    | RADEON_GMC_CLR_CMP_CNTL_DIS
92843df4709Smrg	    | RADEON_GMC_WR_MSK_DIS );
92943df4709Smrg    OUT_RING( dstPitchOff );
93043df4709Smrg    OUT_RING( (*y << 16) | x );
93143df4709Smrg    OUT_RING( ((*y + *hpass) << 16) | (x + w) );
93243df4709Smrg    OUT_RING( 0xffffffff );
93343df4709Smrg    OUT_RING( 0xffffffff );
93443df4709Smrg    OUT_RING( *y << 16 | x );
93543df4709Smrg    OUT_RING( *hpass << 16 | (*bufPitch / cpp) );
93643df4709Smrg    OUT_RING( dwords );
93743df4709Smrg
93843df4709Smrg    ret = ( uint8_t* )&__head[__count];
93943df4709Smrg
94043df4709Smrg    __count += dwords;
94143df4709Smrg    ADVANCE_RING();
94243df4709Smrg
94343df4709Smrg    *y += *hpass;
94443df4709Smrg    *h -= *hpass;
94543df4709Smrg
94643df4709Smrg    return ret;
947209ff23fSmrg}
948209ff23fSmrg
949209ff23fSmrgvoid RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
950209ff23fSmrg{
951209ff23fSmrg    switch(swap) {
952209ff23fSmrg    case RADEON_HOST_DATA_SWAP_HDW:
953209ff23fSmrg        {
954209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
955209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
956209ff23fSmrg	    unsigned int nwords = size >> 2;
957209ff23fSmrg
958209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
959209ff23fSmrg		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
960209ff23fSmrg	    return;
961209ff23fSmrg        }
962209ff23fSmrg    case RADEON_HOST_DATA_SWAP_32BIT:
96372f1971aSmartin	if (((uintptr_t)dst & 1) || ((uintptr_t)src & 1)) {
96472f1971aSmartin	    uint8_t *d = (uint8_t *)dst;
96572f1971aSmartin	    uint8_t *s = (uint8_t *)src;
96672f1971aSmartin	    unsigned int nwords = size >> 2;
96772f1971aSmartin
96872f1971aSmartin	    for (; nwords > 0; --nwords, d+=4, s+=4) {
96972f1971aSmartin	        d[0] = s[3];
97072f1971aSmartin		d[1] = s[2];
97172f1971aSmartin		d[2] = s[1];
97272f1971aSmartin		d[3] = s[0];
97372f1971aSmartin	    }
97472f1971aSmartin	    return;
97572f1971aSmartin        } else if (((uintptr_t)dst & 3) || ((uintptr_t)src & 3)) {
97672f1971aSmartin	    /* copy 16bit wise */
97772f1971aSmartin	    uint16_t *d = (uint16_t *)dst;
97872f1971aSmartin	    uint16_t *s = (uint16_t *)src;
97972f1971aSmartin	    unsigned int nwords = size >> 2;
98072f1971aSmartin
98172f1971aSmartin	    for (; nwords > 0; --nwords, d+=2, s+=2) {
98272f1971aSmartin	        d[0] = ((s[1] >> 8) & 0xff) | ((s[1] & 0xff) << 8);
98372f1971aSmartin	        d[1] = ((s[0] >> 8) & 0xff) | ((s[0] & 0xff) << 8);
98472f1971aSmartin	    }
98572f1971aSmartin	    return;
98672f1971aSmartin	} else {
987209ff23fSmrg	    unsigned int *d = (unsigned int *)dst;
988209ff23fSmrg	    unsigned int *s = (unsigned int *)src;
989209ff23fSmrg	    unsigned int nwords = size >> 2;
990209ff23fSmrg
991209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
992209ff23fSmrg#ifdef __powerpc__
993209ff23fSmrg		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
994209ff23fSmrg#else
995209ff23fSmrg		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
996209ff23fSmrg			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
997209ff23fSmrg#endif
998209ff23fSmrg	    return;
999209ff23fSmrg        }
1000209ff23fSmrg    case RADEON_HOST_DATA_SWAP_16BIT:
1001209ff23fSmrg        {
1002209ff23fSmrg	    unsigned short *d = (unsigned short *)dst;
1003209ff23fSmrg	    unsigned short *s = (unsigned short *)src;
1004209ff23fSmrg	    unsigned int nwords = size >> 1;
1005209ff23fSmrg
1006209ff23fSmrg	    for (; nwords > 0; --nwords, ++d, ++s)
1007209ff23fSmrg#ifdef __powerpc__
100868105dcbSveego		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
1009209ff23fSmrg#else
101068105dcbSveego	        *d = (*s >> 8) | (*s << 8);
1011209ff23fSmrg#endif
1012209ff23fSmrg	    return;
1013209ff23fSmrg	}
1014209ff23fSmrg    }
1015209ff23fSmrg    if (src != dst)
1016ad43ddacSmrg	memcpy(dst, src, size);
1017209ff23fSmrg}
1018209ff23fSmrg
101943df4709Smrg/* Copies a single pass worth of data for a hostdata blit set up by
102043df4709Smrg * RADEONHostDataBlit().
102143df4709Smrg */
102243df4709Smrgvoid
102343df4709SmrgRADEONHostDataBlitCopyPass(
102443df4709Smrg    ScrnInfoPtr pScrn,
102543df4709Smrg    unsigned int cpp,
102643df4709Smrg    uint8_t *dst,
102743df4709Smrg    uint8_t *src,
102843df4709Smrg    unsigned int hpass,
102943df4709Smrg    unsigned int dstPitch,
103043df4709Smrg    unsigned int srcPitch
103143df4709Smrg){
103243df4709Smrg
103343df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
103443df4709Smrg    RADEONInfoPtr info = RADEONPTR( pScrn );
103543df4709Smrg#endif
1036209ff23fSmrg
103743df4709Smrg    /* RADEONHostDataBlitCopy can return NULL ! */
103843df4709Smrg    if( (dst==NULL) || (src==NULL)) return;
103943df4709Smrg
104043df4709Smrg    if ( dstPitch == srcPitch )
104143df4709Smrg    {
104243df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
104343df4709Smrg        if (info->ChipFamily >= CHIP_FAMILY_R300) {
104443df4709Smrg	    switch(cpp) {
104543df4709Smrg	    case 1:
104643df4709Smrg		RADEONCopySwap(dst, src, hpass * dstPitch,
104743df4709Smrg			       RADEON_HOST_DATA_SWAP_32BIT);
104843df4709Smrg		return;
104943df4709Smrg	    case 2:
105043df4709Smrg	        RADEONCopySwap(dst, src, hpass * dstPitch,
105143df4709Smrg			       RADEON_HOST_DATA_SWAP_HDW);
105243df4709Smrg		return;
105343df4709Smrg	    }
105443df4709Smrg	}
105543df4709Smrg#endif
105643df4709Smrg	memcpy( dst, src, hpass * dstPitch );
105743df4709Smrg    }
105843df4709Smrg    else
105943df4709Smrg    {
106043df4709Smrg	unsigned int minPitch = min( dstPitch, srcPitch );
106143df4709Smrg	while ( hpass-- )
106243df4709Smrg	{
106343df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
106443df4709Smrg            if (info->ChipFamily >= CHIP_FAMILY_R300) {
106543df4709Smrg		switch(cpp) {
106643df4709Smrg		case 1:
106743df4709Smrg		    RADEONCopySwap(dst, src, minPitch,
106843df4709Smrg				   RADEON_HOST_DATA_SWAP_32BIT);
106943df4709Smrg		    goto next;
107043df4709Smrg		case 2:
107143df4709Smrg	            RADEONCopySwap(dst, src, minPitch,
107243df4709Smrg				   RADEON_HOST_DATA_SWAP_HDW);
107343df4709Smrg		    goto next;
107443df4709Smrg		}
107543df4709Smrg	    }
107643df4709Smrg#endif
107743df4709Smrg	    memcpy( dst, src, minPitch );
107843df4709Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
107943df4709Smrg	next:
108043df4709Smrg#endif
108143df4709Smrg	    src += srcPitch;
108243df4709Smrg	    dst += dstPitch;
108343df4709Smrg	}
108443df4709Smrg    }
108543df4709Smrg}
108643df4709Smrg
108743df4709Smrg#endif
1088209ff23fSmrg
1089209ff23fSmrgBool RADEONAccelInit(ScreenPtr pScreen)
1090209ff23fSmrg{
109168105dcbSveego    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1092209ff23fSmrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1093209ff23fSmrg
109443df4709Smrg#ifdef USE_EXA
109543df4709Smrg    if (info->useEXA) {
109643df4709Smrg# ifdef XF86DRI
109743df4709Smrg	if (info->directRenderingEnabled) {
109843df4709Smrg#ifdef XF86DRM_MODE
109943df4709Smrg	    if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
110043df4709Smrg		if (!EVERGREENDrawInit(pScreen))
110143df4709Smrg		    return FALSE;
110243df4709Smrg	    } else
110343df4709Smrg#endif
110443df4709Smrg	      if (info->ChipFamily >= CHIP_FAMILY_R600) {
1105b7e1c893Smrg		if (!R600DrawInit(pScreen))
1106b7e1c893Smrg		    return FALSE;
1107b7e1c893Smrg	    } else {
110843df4709Smrg		if (!RADEONDrawInitCP(pScreen))
1109b7e1c893Smrg		    return FALSE;
1110b7e1c893Smrg	    }
111143df4709Smrg	} else
111243df4709Smrg# endif /* XF86DRI */
111343df4709Smrg	{
111443df4709Smrg	    if (info->ChipFamily >= CHIP_FAMILY_R600)
111543df4709Smrg		return FALSE;
111643df4709Smrg	    else {
111743df4709Smrg		if (!RADEONDrawInitMMIO(pScreen))
111843df4709Smrg		    return FALSE;
111943df4709Smrg	    }
112043df4709Smrg	}
1121209ff23fSmrg    }
112243df4709Smrg#endif /* USE_EXA */
112343df4709Smrg#ifdef USE_XAA
112443df4709Smrg    if (!info->useEXA) {
112543df4709Smrg	XAAInfoRecPtr  a;
1126209ff23fSmrg
112743df4709Smrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
112843df4709Smrg	    return FALSE;
1129b7e1c893Smrg
113043df4709Smrg	if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
113143df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
113243df4709Smrg	    return FALSE;
1133209ff23fSmrg	}
1134209ff23fSmrg
113543df4709Smrg#ifdef XF86DRI
113643df4709Smrg	if (info->directRenderingEnabled)
113743df4709Smrg	    RADEONAccelInitCP(pScreen, a);
1138209ff23fSmrg	else
113943df4709Smrg#endif /* XF86DRI */
114043df4709Smrg	    RADEONAccelInitMMIO(pScreen, a);
1141209ff23fSmrg
114243df4709Smrg	RADEONEngineInit(pScrn);
1143209ff23fSmrg
114443df4709Smrg	if (!XAAInit(pScreen, a)) {
114543df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
114643df4709Smrg	    return FALSE;
1147209ff23fSmrg	}
114843df4709Smrg    }
114943df4709Smrg#endif /* USE_XAA */
115043df4709Smrg    return TRUE;
115143df4709Smrg}
1152209ff23fSmrg
115343df4709Smrgvoid RADEONInit3DEngine(ScrnInfoPtr pScrn)
115443df4709Smrg{
115543df4709Smrg    RADEONInfoPtr info = RADEONPTR (pScrn);
1156209ff23fSmrg
115743df4709Smrg#ifdef XF86DRI
115843df4709Smrg    if (info->directRenderingEnabled) {
115943df4709Smrg	drm_radeon_sarea_t *pSAREAPriv;
1160209ff23fSmrg
116143df4709Smrg	if (!info->kms_enabled) {
116243df4709Smrg	    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
116343df4709Smrg	    pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen);
1164ad43ddacSmrg	}
116543df4709Smrg	RADEONInit3DEngineCP(pScrn);
116643df4709Smrg    } else
116743df4709Smrg#endif
116843df4709Smrg	RADEONInit3DEngineMMIO(pScrn);
1169209ff23fSmrg
117043df4709Smrg    info->accel_state->XInited3D = TRUE;
117143df4709Smrg}
1172209ff23fSmrg
117343df4709Smrg#ifdef USE_XAA
117443df4709Smrg#ifdef XF86DRI
117543df4709SmrgBool
117643df4709SmrgRADEONSetupMemXAA_DRI(ScreenPtr pScreen)
117743df4709Smrg{
117843df4709Smrg    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
117943df4709Smrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
118043df4709Smrg    int            cpp = info->CurrentLayout.pixel_bytes;
118143df4709Smrg    int            depthCpp = (info->dri->depthBits - 8) / 4;
118243df4709Smrg    int            width_bytes = pScrn->displayWidth * cpp;
118343df4709Smrg    int            bufferSize;
118443df4709Smrg    int            depthSize;
118543df4709Smrg    int            l;
118643df4709Smrg    int            scanlines;
118743df4709Smrg    int            texsizerequest;
118843df4709Smrg    BoxRec         MemBox;
118943df4709Smrg    FBAreaPtr      fbarea;
119043df4709Smrg
119143df4709Smrg    info->dri->frontOffset = 0;
119243df4709Smrg    info->dri->frontPitch = pScrn->displayWidth;
119343df4709Smrg    info->dri->backPitch = pScrn->displayWidth;
119443df4709Smrg
119543df4709Smrg    /* make sure we use 16 line alignment for tiling (8 might be enough).
119643df4709Smrg     * Might need that for non-XF86DRI too?
119743df4709Smrg     */
119843df4709Smrg    if (info->allowColorTiling) {
119943df4709Smrg	bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes,
120043df4709Smrg		      RADEON_GPU_PAGE_SIZE);
120143df4709Smrg    } else {
120243df4709Smrg        bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes,
120343df4709Smrg		      RADEON_GPU_PAGE_SIZE);
120443df4709Smrg    }
1205209ff23fSmrg
120643df4709Smrg    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
120743df4709Smrg     * which is always the case if color tiling is used due to color pitch
120843df4709Smrg     * but not necessarily otherwise, and its height a multiple of 16 lines.
120943df4709Smrg     */
121043df4709Smrg    info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32);
121143df4709Smrg    depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch
121243df4709Smrg		  * depthCpp, RADEON_GPU_PAGE_SIZE);
121343df4709Smrg
121443df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
121543df4709Smrg	       "Using %d MB GART aperture\n", info->dri->gartSize);
121643df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
121743df4709Smrg	       "Using %d MB for the ring buffer\n", info->dri->ringSize);
121843df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
121943df4709Smrg	       "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize);
122043df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
122143df4709Smrg	       "Using %d MB for GART textures\n", info->dri->gartTexSize);
122243df4709Smrg
122343df4709Smrg    /* Try for front, back, depth, and three framebuffers worth of
122443df4709Smrg     * pixmap cache.  Should be enough for a fullscreen background
122543df4709Smrg     * image plus some leftovers.
122643df4709Smrg     * If the FBTexPercent option was used, try to achieve that percentage instead,
122743df4709Smrg     * but still have at least one pixmap buffer (get problems with xvideo/render
122843df4709Smrg     * otherwise probably), and never reserve more than 3 offscreen buffers as it's
122943df4709Smrg     * probably useless for XAA.
123043df4709Smrg     */
123143df4709Smrg    if (info->dri->textureSize >= 0) {
123243df4709Smrg	texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize
123343df4709Smrg			 - 2 * width_bytes - 16384 - info->FbSecureSize)
123443df4709Smrg	/* first divide, then multiply or we'll get an overflow (been there...) */
123543df4709Smrg			 / 100 * info->dri->textureSize;
123643df4709Smrg    }
123743df4709Smrg    else {
123843df4709Smrg	texsizerequest = (int)info->FbMapSize / 2;
123943df4709Smrg    }
124043df4709Smrg    info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
124143df4709Smrg
124243df4709Smrg    /* If that gives us less than the requested memory, let's
124343df4709Smrg     * be greedy and grab some more.  Sorry, I care more about 3D
124443df4709Smrg     * performance than playing nicely, and you'll get around a full
124543df4709Smrg     * framebuffer's worth of pixmap cache anyway.
124643df4709Smrg     */
124743df4709Smrg    if (info->dri->textureSize < texsizerequest) {
124843df4709Smrg        info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
124943df4709Smrg    }
125043df4709Smrg    if (info->dri->textureSize < texsizerequest) {
125143df4709Smrg        info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
125243df4709Smrg    }
1253209ff23fSmrg
125443df4709Smrg    /* If there's still no space for textures, try without pixmap cache, but
125543df4709Smrg     * never use the reserved space, the space hw cursor and PCIGART table might
125643df4709Smrg     * use.
125743df4709Smrg     */
125843df4709Smrg    if (info->dri->textureSize < 0) {
125943df4709Smrg	info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
126043df4709Smrg	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
126143df4709Smrg    }
1262209ff23fSmrg
126343df4709Smrg    /* Check to see if there is more room available after the 8192nd
126443df4709Smrg     * scanline for textures
126543df4709Smrg     */
126643df4709Smrg    /* FIXME: what's this good for? condition is pretty much impossible to meet */
126743df4709Smrg    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
126843df4709Smrg	> info->dri->textureSize) {
126943df4709Smrg	info->dri->textureSize =
127043df4709Smrg		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
127143df4709Smrg    }
1272209ff23fSmrg
127343df4709Smrg    /* If backbuffer is disabled, don't allocate memory for it */
127443df4709Smrg    if (info->dri->noBackBuffer) {
127543df4709Smrg	info->dri->textureSize += bufferSize;
127643df4709Smrg    }
1277209ff23fSmrg
127843df4709Smrg    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
127943df4709Smrg       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
128043df4709Smrg       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
128143df4709Smrg       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
128243df4709Smrg       area otherwise).
128343df4709Smrg       This might cause some space at the end of the video memory to be unused, since it
128443df4709Smrg       can't be used (?) due to that log_tex_granularity thing???
128543df4709Smrg       Could use different copyscreentoscreen function for the pageflip copies
128643df4709Smrg       (which would use different src and dst offsets) to avoid this. */
128743df4709Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
128843df4709Smrg	info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize +
128943df4709Smrg			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
129043df4709Smrg    }
129143df4709Smrg    if (info->dri->textureSize > 0) {
129243df4709Smrg	l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS);
129343df4709Smrg	if (l < RADEON_LOG_TEX_GRANULARITY)
129443df4709Smrg	    l = RADEON_LOG_TEX_GRANULARITY;
129543df4709Smrg	/* Round the texture size up to the nearest whole number of
129643df4709Smrg	 * texture regions.  Again, be greedy about this, don't
129743df4709Smrg	 * round down.
129843df4709Smrg	 */
129943df4709Smrg	info->dri->log2TexGran = l;
130043df4709Smrg	info->dri->textureSize = (info->dri->textureSize >> l) << l;
1301209ff23fSmrg    } else {
130243df4709Smrg	info->dri->textureSize = 0;
130343df4709Smrg    }
1304209ff23fSmrg
130543df4709Smrg    /* Set a minimum usable local texture heap size.  This will fit
130643df4709Smrg     * two 256x256x32bpp textures.
130743df4709Smrg     */
130843df4709Smrg    if (info->dri->textureSize < 512 * 1024) {
130943df4709Smrg	info->dri->textureOffset = 0;
131043df4709Smrg	info->dri->textureSize = 0;
1311209ff23fSmrg    }
1312209ff23fSmrg
131343df4709Smrg    if (info->allowColorTiling && !info->dri->noBackBuffer) {
131443df4709Smrg	info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) /
131543df4709Smrg				    (width_bytes * 16)) * (width_bytes * 16);
131643df4709Smrg    }
131743df4709Smrg    else {
131843df4709Smrg	/* Reserve space for textures */
131943df4709Smrg	info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize,
132043df4709Smrg				     RADEON_GPU_PAGE_SIZE);
132143df4709Smrg    }
1322209ff23fSmrg
132343df4709Smrg    /* Reserve space for the shared depth
132443df4709Smrg     * buffer.
132543df4709Smrg     */
132643df4709Smrg    info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize,
132743df4709Smrg			       RADEON_GPU_PAGE_SIZE);
1328209ff23fSmrg
132943df4709Smrg    /* Reserve space for the shared back buffer */
133043df4709Smrg    if (info->dri->noBackBuffer) {
133143df4709Smrg       info->dri->backOffset = info->dri->depthOffset;
133243df4709Smrg    } else {
133343df4709Smrg       info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize,
133443df4709Smrg				 RADEON_GPU_PAGE_SIZE);
133543df4709Smrg    }
1336209ff23fSmrg
133743df4709Smrg    info->dri->backY = info->dri->backOffset / width_bytes;
133843df4709Smrg    info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp;
1339209ff23fSmrg
134043df4709Smrg    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
134143df4709Smrg    if (scanlines > 8191)
134243df4709Smrg	scanlines = 8191;
1343209ff23fSmrg
134443df4709Smrg    MemBox.x1 = 0;
134543df4709Smrg    MemBox.y1 = 0;
134643df4709Smrg    MemBox.x2 = pScrn->displayWidth;
134743df4709Smrg    MemBox.y2 = scanlines;
1348209ff23fSmrg
134943df4709Smrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
135043df4709Smrg        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
135143df4709Smrg		   "Memory manager initialization to "
135243df4709Smrg		   "(%d,%d) (%d,%d) failed\n",
135343df4709Smrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
135443df4709Smrg	return FALSE;
135543df4709Smrg    } else {
135643df4709Smrg	int  width, height;
135743df4709Smrg
135843df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
135943df4709Smrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
136043df4709Smrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
136143df4709Smrg	/* why oh why can't we just request modes which are guaranteed to be 16 lines
136243df4709Smrg	   aligned... sigh */
136343df4709Smrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
136443df4709Smrg						pScrn->displayWidth,
136543df4709Smrg						info->allowColorTiling ?
136643df4709Smrg						(RADEON_ALIGN(pScrn->virtualY, 16))
136743df4709Smrg						- pScrn->virtualY + 2 : 2,
136843df4709Smrg						0, NULL, NULL,
136943df4709Smrg						NULL))) {
137043df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
137143df4709Smrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
137243df4709Smrg		       fbarea->box.x1, fbarea->box.y1,
137343df4709Smrg		       fbarea->box.x2, fbarea->box.y2);
137443df4709Smrg	} else {
137543df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
137643df4709Smrg	}
1377209ff23fSmrg
137843df4709Smrg	RADEONDRIAllocatePCIGARTTable(pScreen);
1379209ff23fSmrg
138043df4709Smrg	if (xf86QueryLargestOffscreenArea(pScreen, &width,
138143df4709Smrg					  &height, 0, 0, 0)) {
138243df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
138343df4709Smrg		       "Largest offscreen area available: %d x %d\n",
138443df4709Smrg		       width, height);
13856322c902Smrg
138643df4709Smrg	    /* Lines in offscreen area needed for depth buffer and
138743df4709Smrg	     * textures
138843df4709Smrg	     */
138943df4709Smrg	    info->dri->depthTexLines = (scanlines
139043df4709Smrg					- info->dri->depthOffset / width_bytes);
139143df4709Smrg	    info->dri->backLines	    = (scanlines
139243df4709Smrg					       - info->dri->backOffset / width_bytes
139343df4709Smrg					       - info->dri->depthTexLines);
139443df4709Smrg	    info->dri->backArea	    = NULL;
139543df4709Smrg	} else {
139643df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
139743df4709Smrg		       "Unable to determine largest offscreen area "
139843df4709Smrg		       "available\n");
139943df4709Smrg	    return FALSE;
140043df4709Smrg	}
140143df4709Smrg    }
1402209ff23fSmrg
140343df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
140443df4709Smrg	       "Will use front buffer at offset 0x%x\n",
140543df4709Smrg	       info->dri->frontOffset);
140643df4709Smrg
140743df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
140843df4709Smrg	       "Will use back buffer at offset 0x%x\n",
140943df4709Smrg	       info->dri->backOffset);
141043df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
141143df4709Smrg	       "Will use depth buffer at offset 0x%x\n",
141243df4709Smrg	       info->dri->depthOffset);
141343df4709Smrg    if (info->cardType==CARD_PCIE)
141443df4709Smrg    	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
141543df4709Smrg	           "Will use %d kb for PCI GART table at offset 0x%x\n",
141643df4709Smrg		   info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset);
141743df4709Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
141843df4709Smrg	       "Will use %d kb for textures at offset 0x%x\n",
141943df4709Smrg	       info->dri->textureSize/1024, info->dri->textureOffset);
142043df4709Smrg
142143df4709Smrg    info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) |
142243df4709Smrg				   ((info->dri->frontOffset + info->fbLocation) >> 10));
142343df4709Smrg
142443df4709Smrg    info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) |
142543df4709Smrg				  ((info->dri->backOffset + info->fbLocation) >> 10));
142643df4709Smrg
142743df4709Smrg    info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) |
142843df4709Smrg				   ((info->dri->depthOffset + info->fbLocation) >> 10));
142943df4709Smrg    return TRUE;
143043df4709Smrg}
143143df4709Smrg#endif /* XF86DRI */
14326322c902Smrg
143343df4709SmrgBool
143443df4709SmrgRADEONSetupMemXAA(ScreenPtr pScreen)
1435209ff23fSmrg{
143643df4709Smrg    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
143743df4709Smrg    RADEONInfoPtr  info  = RADEONPTR(pScrn);
143843df4709Smrg    BoxRec         MemBox;
143943df4709Smrg    int            y2;
144043df4709Smrg
144143df4709Smrg    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
144243df4709Smrg
144343df4709Smrg    MemBox.x1 = 0;
144443df4709Smrg    MemBox.y1 = 0;
144543df4709Smrg    MemBox.x2 = pScrn->displayWidth;
144643df4709Smrg    y2 = info->FbMapSize / width_bytes;
144743df4709Smrg    if (y2 >= 32768)
144843df4709Smrg	y2 = 32767; /* because MemBox.y2 is signed short */
144943df4709Smrg    MemBox.y2 = y2;
145043df4709Smrg
145143df4709Smrg    /* The acceleration engine uses 14 bit
145243df4709Smrg     * signed coordinates, so we can't have any
145343df4709Smrg     * drawable caches beyond this region.
145443df4709Smrg     */
145543df4709Smrg    if (MemBox.y2 > 8191)
145643df4709Smrg	MemBox.y2 = 8191;
145743df4709Smrg
145843df4709Smrg    if (!xf86InitFBManager(pScreen, &MemBox)) {
145943df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
146043df4709Smrg		   "Memory manager initialization to "
146143df4709Smrg		   "(%d,%d) (%d,%d) failed\n",
146243df4709Smrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
146343df4709Smrg	return FALSE;
146443df4709Smrg    } else {
146543df4709Smrg	int       width, height;
146643df4709Smrg	FBAreaPtr fbarea;
146743df4709Smrg
146843df4709Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
146943df4709Smrg		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
147043df4709Smrg		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
147143df4709Smrg	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
147243df4709Smrg						pScrn->displayWidth,
147343df4709Smrg						info->allowColorTiling ?
147443df4709Smrg						(RADEON_ALIGN(pScrn->virtualY, 16))
147543df4709Smrg						- pScrn->virtualY + 2 : 2,
147643df4709Smrg						0, NULL, NULL,
147743df4709Smrg						NULL))) {
147843df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
147943df4709Smrg		       "Reserved area from (%d,%d) to (%d,%d)\n",
148043df4709Smrg		       fbarea->box.x1, fbarea->box.y1,
148143df4709Smrg		       fbarea->box.x2, fbarea->box.y2);
148243df4709Smrg	} else {
148343df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
148443df4709Smrg	}
148543df4709Smrg	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
148643df4709Smrg					      0, 0, 0)) {
148743df4709Smrg	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
148843df4709Smrg		       "Largest offscreen area available: %d x %d\n",
148943df4709Smrg		       width, height);
149043df4709Smrg	}
149143df4709Smrg	return TRUE;
149243df4709Smrg    }
1493209ff23fSmrg}
149443df4709Smrg#endif /* USE_XAA */
1495