radeon_accel.c revision ad43ddac
1/* 2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and 3 * VA Linux Systems Inc., Fremont, California. 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation on the rights to use, copy, modify, merge, 11 * publish, distribute, sublicense, and/or sell copies of the Software, 12 * and to permit persons to whom the Software is furnished to do so, 13 * subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR 23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 * DEALINGS IN THE SOFTWARE. 27 */ 28 29#ifdef HAVE_CONFIG_H 30#include "config.h" 31#endif 32 33/* 34 * Authors: 35 * Kevin E. Martin <martin@xfree86.org> 36 * Rickard E. Faith <faith@valinux.com> 37 * Alan Hourihane <alanh@fairlite.demon.co.uk> 38 * 39 * Credits: 40 * 41 * Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source 42 * code to his Radeon driver. Portions of this file are based on the 43 * initialization code for that driver. 44 * 45 * References: 46 * 47 * !!!! FIXME !!!! 48 * RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical 49 * Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April 50 * 1999. 51 * 52 * RAGE 128 Software Development Manual (Technical Reference Manual P/N 53 * SDK-G04000 Rev. 0.01), ATI Technologies: June 1999. 54 * 55 * Notes on unimplemented XAA optimizations: 56 * 57 * SetClipping: This has been removed as XAA expects 16bit registers 58 * for full clipping. 59 * TwoPointLine: The Radeon supports this. Not Bresenham. 60 * DashedLine with non-power-of-two pattern length: Apparently, there is 61 * no way to set the length of the pattern -- it is always 62 * assumed to be 8 or 32 (or 1024?). 63 * ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference 64 * Manual where it states that monochrome expansion of frame 65 * buffer data is not supported. 66 * CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid 67 * direct/indirect method. If we had more data registers, 68 * then we could do better. If XAA supported a trigger write 69 * address, the code would be simpler. 70 * Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8 71 * pattern from frame buffer memory. 72 * ImageWrites: Same as CPUToScreenColorExpandFill 73 * 74 */ 75 76#include <errno.h> 77#include <string.h> 78 /* Driver data structures */ 79#include "radeon.h" 80#include "radeon_reg.h" 81#include "r600_reg.h" 82#include "radeon_macros.h" 83#include "radeon_probe.h" 84#include "radeon_version.h" 85#ifdef XF86DRI 86#define _XF86DRI_SERVER_ 87#include "radeon_drm.h" 88#endif 89 90#include "ati_pciids_gen.h" 91 92 /* Line support */ 93#include "miline.h" 94 95 /* X and server generic header files */ 96#include "xf86.h" 97 98static void R600EngineReset(ScrnInfoPtr pScrn); 99 100#ifdef USE_XAA 101static struct { 102 int rop; 103 int pattern; 104} RADEON_ROP[] = { 105 { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear */ 106 { RADEON_ROP3_DSa, RADEON_ROP3_DPa }, /* Gxand */ 107 { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse */ 108 { RADEON_ROP3_S, RADEON_ROP3_P }, /* GXcopy */ 109 { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted */ 110 { RADEON_ROP3_D, RADEON_ROP3_D }, /* GXnoop */ 111 { RADEON_ROP3_DSx, RADEON_ROP3_DPx }, /* GXxor */ 112 { RADEON_ROP3_DSo, RADEON_ROP3_DPo }, /* GXor */ 113 { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor */ 114 { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv */ 115 { RADEON_ROP3_Dn, RADEON_ROP3_Dn }, /* GXinvert */ 116 { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse */ 117 { RADEON_ROP3_Sn, RADEON_ROP3_Pn }, /* GXcopyInverted */ 118 { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted */ 119 { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand */ 120 { RADEON_ROP3_ONE, RADEON_ROP3_ONE } /* GXset */ 121}; 122#endif 123 124/* The FIFO has 64 slots. This routines waits until at least `entries' 125 * of these slots are empty. 126 */ 127void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries) 128{ 129 RADEONInfoPtr info = RADEONPTR(pScrn); 130 unsigned char *RADEONMMIO = info->MMIO; 131 int i; 132 133 for (;;) { 134 for (i = 0; i < RADEON_TIMEOUT; i++) { 135 info->accel_state->fifo_slots = 136 INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 137 if (info->accel_state->fifo_slots >= entries) return; 138 } 139 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 140 "FIFO timed out: %u entries, stat=0x%08x\n", 141 (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, 142 (unsigned int)INREG(RADEON_RBBM_STATUS)); 143 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 144 "FIFO timed out, resetting engine...\n"); 145 RADEONEngineReset(pScrn); 146 RADEONEngineRestore(pScrn); 147#ifdef XF86DRI 148 if (info->directRenderingEnabled) { 149 RADEONCP_RESET(pScrn, info); 150 RADEONCP_START(pScrn, info); 151 } 152#endif 153 } 154} 155 156void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries) 157{ 158 RADEONInfoPtr info = RADEONPTR(pScrn); 159 unsigned char *RADEONMMIO = info->MMIO; 160 int i; 161 162 for (;;) { 163 for (i = 0; i < RADEON_TIMEOUT; i++) { 164 if (info->ChipFamily >= CHIP_FAMILY_RV770) 165 info->accel_state->fifo_slots = 166 INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK; 167 else 168 info->accel_state->fifo_slots = 169 INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK; 170 if (info->accel_state->fifo_slots >= entries) return; 171 } 172 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 173 "FIFO timed out: stat=0x%08x\n", 174 (unsigned int)INREG(R600_GRBM_STATUS)); 175 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 176 "FIFO timed out, resetting engine...\n"); 177 R600EngineReset(pScrn); 178#ifdef XF86DRI 179 if (info->directRenderingEnabled) { 180 RADEONCP_RESET(pScrn, info); 181 RADEONCP_START(pScrn, info); 182 } 183#endif 184 } 185} 186 187/* Flush all dirty data in the Pixel Cache to memory */ 188void RADEONEngineFlush(ScrnInfoPtr pScrn) 189{ 190 RADEONInfoPtr info = RADEONPTR(pScrn); 191 unsigned char *RADEONMMIO = info->MMIO; 192 int i; 193 194 if (info->ChipFamily <= CHIP_FAMILY_RV280) { 195 OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, 196 RADEON_RB3D_DC_FLUSH_ALL, 197 ~RADEON_RB3D_DC_FLUSH_ALL); 198 for (i = 0; i < RADEON_TIMEOUT; i++) { 199 if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY)) 200 break; 201 } 202 if (i == RADEON_TIMEOUT) { 203 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 204 "DC flush timeout: %x\n", 205 (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT)); 206 } 207 } else { 208 OUTREGP(R300_DSTCACHE_CTLSTAT, 209 R300_RB2D_DC_FLUSH_ALL, 210 ~R300_RB2D_DC_FLUSH_ALL); 211 for (i = 0; i < RADEON_TIMEOUT; i++) { 212 if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY)) 213 break; 214 } 215 if (i == RADEON_TIMEOUT) { 216 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 217 "DC flush timeout: %x\n", 218 (unsigned int)INREG(R300_DSTCACHE_CTLSTAT)); 219 } 220 } 221} 222 223/* Reset graphics card to known state */ 224void RADEONEngineReset(ScrnInfoPtr pScrn) 225{ 226 RADEONInfoPtr info = RADEONPTR(pScrn); 227 unsigned char *RADEONMMIO = info->MMIO; 228 uint32_t clock_cntl_index; 229 uint32_t mclk_cntl; 230 uint32_t rbbm_soft_reset; 231 uint32_t host_path_cntl; 232 233 /* The following RBBM_SOFT_RESET sequence can help un-wedge 234 * an R300 after the command processor got stuck. 235 */ 236 rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET); 237 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 238 RADEON_SOFT_RESET_CP | 239 RADEON_SOFT_RESET_HI | 240 RADEON_SOFT_RESET_SE | 241 RADEON_SOFT_RESET_RE | 242 RADEON_SOFT_RESET_PP | 243 RADEON_SOFT_RESET_E2 | 244 RADEON_SOFT_RESET_RB)); 245 INREG(RADEON_RBBM_SOFT_RESET); 246 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t) 247 ~(RADEON_SOFT_RESET_CP | 248 RADEON_SOFT_RESET_HI | 249 RADEON_SOFT_RESET_SE | 250 RADEON_SOFT_RESET_RE | 251 RADEON_SOFT_RESET_PP | 252 RADEON_SOFT_RESET_E2 | 253 RADEON_SOFT_RESET_RB))); 254 INREG(RADEON_RBBM_SOFT_RESET); 255 OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); 256 INREG(RADEON_RBBM_SOFT_RESET); 257 258 RADEONEngineFlush(pScrn); 259 260 clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX); 261 RADEONPllErrataAfterIndex(info); 262 263#if 0 /* taken care of by new PM code */ 264 /* Some ASICs have bugs with dynamic-on feature, which are 265 * ASIC-version dependent, so we force all blocks on for now 266 */ 267 if (info->HasCRTC2) { 268 uint32_t tmp; 269 270 tmp = INPLL(pScrn, RADEON_SCLK_CNTL); 271 OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) | 272 RADEON_CP_MAX_DYN_STOP_LAT | 273 RADEON_SCLK_FORCEON_MASK)); 274 275 if (info->ChipFamily == CHIP_FAMILY_RV200) { 276 tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); 277 OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON); 278 } 279 } 280#endif /* new PM code */ 281 282 mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL); 283 284#if 0 /* handled by new PM code */ 285 OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl | 286 RADEON_FORCEON_MCLKA | 287 RADEON_FORCEON_MCLKB | 288 RADEON_FORCEON_YCLKA | 289 RADEON_FORCEON_YCLKB | 290 RADEON_FORCEON_MC | 291 RADEON_FORCEON_AIC)); 292#endif /* new PM code */ 293 294 /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some 295 * unexpected behaviour on some machines. Here we use 296 * RADEON_HOST_PATH_CNTL to reset it. 297 */ 298 host_path_cntl = INREG(RADEON_HOST_PATH_CNTL); 299 rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET); 300 301 if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { 302 uint32_t tmp; 303 304 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 305 RADEON_SOFT_RESET_CP | 306 RADEON_SOFT_RESET_HI | 307 RADEON_SOFT_RESET_E2)); 308 INREG(RADEON_RBBM_SOFT_RESET); 309 OUTREG(RADEON_RBBM_SOFT_RESET, 0); 310 tmp = INREG(RADEON_RB3D_DSTCACHE_MODE); 311 OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */ 312 } else { 313 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 314 RADEON_SOFT_RESET_CP | 315 RADEON_SOFT_RESET_SE | 316 RADEON_SOFT_RESET_RE | 317 RADEON_SOFT_RESET_PP | 318 RADEON_SOFT_RESET_E2 | 319 RADEON_SOFT_RESET_RB)); 320 INREG(RADEON_RBBM_SOFT_RESET); 321 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t) 322 ~(RADEON_SOFT_RESET_CP | 323 RADEON_SOFT_RESET_SE | 324 RADEON_SOFT_RESET_RE | 325 RADEON_SOFT_RESET_PP | 326 RADEON_SOFT_RESET_E2 | 327 RADEON_SOFT_RESET_RB))); 328 INREG(RADEON_RBBM_SOFT_RESET); 329 } 330 331 OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET); 332 INREG(RADEON_HOST_PATH_CNTL); 333 OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl); 334 335 if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT) 336 OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); 337 338 OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index); 339 RADEONPllErrataAfterIndex(info); 340 OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl); 341} 342 343/* Reset graphics card to known state */ 344static void R600EngineReset(ScrnInfoPtr pScrn) 345{ 346 RADEONInfoPtr info = RADEONPTR(pScrn); 347 unsigned char *RADEONMMIO = info->MMIO; 348 uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl; 349 350 cp_ptr = INREG(R600_CP_RB_WPTR); 351 352 cp_me_cntl = INREG(R600_CP_ME_CNTL); 353 OUTREG(R600_CP_ME_CNTL, 0x10000000); 354 355 OUTREG(R600_GRBM_SOFT_RESET, 0x7fff); 356 INREG(R600_GRBM_SOFT_RESET); 357 usleep (50); 358 OUTREG(R600_GRBM_SOFT_RESET, 0); 359 INREG(R600_GRBM_SOFT_RESET); 360 361 OUTREG(R600_CP_RB_WPTR_DELAY, 0); 362 cp_rb_cntl = INREG(R600_CP_RB_CNTL); 363 OUTREG(R600_CP_RB_CNTL, 0x80000000); 364 365 OUTREG(R600_CP_RB_RPTR_WR, cp_ptr); 366 OUTREG(R600_CP_RB_WPTR, cp_ptr); 367 OUTREG(R600_CP_RB_CNTL, cp_rb_cntl); 368 OUTREG(R600_CP_ME_CNTL, cp_me_cntl); 369 370} 371 372/* Restore the acceleration hardware to its previous state */ 373void RADEONEngineRestore(ScrnInfoPtr pScrn) 374{ 375 RADEONInfoPtr info = RADEONPTR(pScrn); 376 unsigned char *RADEONMMIO = info->MMIO; 377 378 if (info->cs) 379 return; 380 381 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 382 "EngineRestore (%d/%d)\n", 383 info->CurrentLayout.pixel_code, 384 info->CurrentLayout.bitsPerPixel); 385 386 /* Setup engine location. This shouldn't be necessary since we 387 * set them appropriately before any accel ops, but let's avoid 388 * random bogus DMA in case we inadvertently trigger the engine 389 * in the wrong place (happened). 390 */ 391 RADEONWaitForFifo(pScrn, 2); 392 OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset); 393 OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset); 394 395 RADEONWaitForFifo(pScrn, 1); 396#if X_BYTE_ORDER == X_BIG_ENDIAN 397 OUTREGP(RADEON_DP_DATATYPE, 398 RADEON_HOST_BIG_ENDIAN_EN, 399 ~RADEON_HOST_BIG_ENDIAN_EN); 400#else 401 OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN); 402#endif 403 404 /* Restore SURFACE_CNTL */ 405 OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); 406 407 RADEONWaitForFifo(pScrn, 1); 408 OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX 409 | RADEON_DEFAULT_SC_BOTTOM_MAX)); 410 RADEONWaitForFifo(pScrn, 1); 411 OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl 412 | RADEON_GMC_BRUSH_SOLID_COLOR 413 | RADEON_GMC_SRC_DATATYPE_COLOR)); 414 415 RADEONWaitForFifo(pScrn, 5); 416 OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); 417 OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000); 418 OUTREG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); 419 OUTREG(RADEON_DP_SRC_BKGD_CLR, 0x00000000); 420 OUTREG(RADEON_DP_WRITE_MASK, 0xffffffff); 421 422 RADEONWaitForIdleMMIO(pScrn); 423 424 info->accel_state->XInited3D = FALSE; 425} 426 427static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes) 428{ 429 RADEONInfoPtr info = RADEONPTR(pScrn); 430 if (info->dri->pKernelDRMVersion->version_major < 2) { 431 drm_radeon_getparam_t np; 432 433 memset(&np, 0, sizeof(np)); 434 np.param = RADEON_PARAM_NUM_GB_PIPES; 435 np.value = num_pipes; 436 return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np)); 437 } else { 438 struct drm_radeon_info np2; 439 np2.value = (unsigned long)num_pipes; 440 np2.request = RADEON_INFO_NUM_GB_PIPES; 441 return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2)); 442 } 443} 444 445/* Initialize the acceleration hardware */ 446void RADEONEngineInit(ScrnInfoPtr pScrn) 447{ 448 RADEONInfoPtr info = RADEONPTR(pScrn); 449 unsigned char *RADEONMMIO = info->MMIO; 450 int datatype = 0; 451 info->accel_state->num_gb_pipes = 0; 452 453 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 454 "EngineInit (%d/%d)\n", 455 info->CurrentLayout.pixel_code, 456 info->CurrentLayout.bitsPerPixel); 457 458#ifdef XF86DRI 459 if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) { 460 int num_pipes; 461 462 if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) { 463 xf86DrvMsg(pScrn->scrnIndex, X_WARNING, 464 "Failed to determine num pipes from DRM, falling back to " 465 "manual look-up!\n"); 466 info->accel_state->num_gb_pipes = 0; 467 } else { 468 info->accel_state->num_gb_pipes = num_pipes; 469 } 470 } 471#endif 472 473 if (!info->cs) { 474 if ((info->ChipFamily == CHIP_FAMILY_RV410) || 475 (info->ChipFamily == CHIP_FAMILY_R420) || 476 (info->ChipFamily == CHIP_FAMILY_RS600) || 477 (info->ChipFamily == CHIP_FAMILY_RS690) || 478 (info->ChipFamily == CHIP_FAMILY_RS740) || 479 (info->ChipFamily == CHIP_FAMILY_RS400) || 480 (info->ChipFamily == CHIP_FAMILY_RS480) || 481 IS_R500_3D) { 482 if (info->accel_state->num_gb_pipes == 0) { 483 uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); 484 485 info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; 486 if (IS_R500_3D) 487 OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); 488 } 489 } else { 490 if (info->accel_state->num_gb_pipes == 0) { 491 if ((info->ChipFamily == CHIP_FAMILY_R300) || 492 (info->ChipFamily == CHIP_FAMILY_R350)) { 493 /* R3xx chips */ 494 info->accel_state->num_gb_pipes = 2; 495 } else { 496 /* RV3xx chips */ 497 info->accel_state->num_gb_pipes = 1; 498 } 499 } 500 } 501 502 /* RV410 SE cards only have 1 quadpipe */ 503 if ((info->Chipset == PCI_CHIP_RV410_5E4C) || 504 (info->Chipset == PCI_CHIP_RV410_5E4F)) 505 info->accel_state->num_gb_pipes = 1; 506 507 if (IS_R300_3D || IS_R500_3D) 508 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 509 "num quad-pipes is %d\n", info->accel_state->num_gb_pipes); 510 511 if (IS_R300_3D || IS_R500_3D) { 512 uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16); 513 514 switch(info->accel_state->num_gb_pipes) { 515 case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; 516 case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; 517 case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; 518 default: 519 case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; 520 } 521 522 OUTREG(R300_GB_TILE_CONFIG, gb_tile_config); 523 OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 524 if (info->ChipFamily >= CHIP_FAMILY_R420) 525 OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); 526 OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) | 527 R300_DC_AUTOFLUSH_ENABLE | 528 R300_DC_DC_DISABLE_IGNORE_PE)); 529 } else 530 OUTREG(RADEON_RB3D_CNTL, 0); 531 532 RADEONEngineReset(pScrn); 533 } 534 535 switch (info->CurrentLayout.pixel_code) { 536 case 8: datatype = 2; break; 537 case 15: datatype = 3; break; 538 case 16: datatype = 4; break; 539 case 24: datatype = 5; break; 540 case 32: datatype = 6; break; 541 default: 542 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 543 "Unknown depth/bpp = %d/%d (code = %d)\n", 544 info->CurrentLayout.depth, 545 info->CurrentLayout.bitsPerPixel, 546 info->CurrentLayout.pixel_code); 547 } 548 549 info->accel_state->dp_gui_master_cntl = 550 ((datatype << RADEON_GMC_DST_DATATYPE_SHIFT) 551 | RADEON_GMC_CLR_CMP_CNTL_DIS 552 | RADEON_GMC_DST_PITCH_OFFSET_CNTL); 553 554 RADEONEngineRestore(pScrn); 555} 556 557uint32_t radeonGetPixmapOffset(PixmapPtr pPix) 558{ 559 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 560 RADEONInfoPtr info = RADEONPTR(pScrn); 561 uint32_t offset = 0; 562 if (info->cs) 563 return 0; 564#ifdef USE_EXA 565 if (info->useEXA) { 566 offset = exaGetPixmapOffset(pPix); 567 } else 568#endif 569 { 570 offset = pPix->devPrivate.ptr - info->FB; 571 } 572 offset += info->fbLocation + pScrn->fbOffset; 573 return offset; 574} 575 576#define ACCEL_MMIO 577#define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO 578#define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) 579#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) 580#define FINISH_ACCEL() 581 582#include "radeon_commonfuncs.c" 583#if defined(RENDER) && defined(USE_XAA) 584#include "radeon_render.c" 585#endif 586#include "radeon_accelfuncs.c" 587 588#undef ACCEL_MMIO 589#undef ACCEL_PREAMBLE 590#undef BEGIN_ACCEL 591#undef OUT_ACCEL_REG 592#undef FINISH_ACCEL 593 594#ifdef XF86DRI 595 596#define ACCEL_CP 597#define ACCEL_PREAMBLE() \ 598 RING_LOCALS; \ 599 RADEONCP_REFRESH(pScrn, info) 600#define BEGIN_ACCEL(n) BEGIN_RING(2*(n)) 601#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) 602#define FINISH_ACCEL() ADVANCE_RING() 603 604 605#include "radeon_commonfuncs.c" 606#if defined(RENDER) && defined(USE_XAA) 607#include "radeon_render.c" 608#endif 609#include "radeon_accelfuncs.c" 610 611#undef ACCEL_CP 612#undef ACCEL_PREAMBLE 613#undef BEGIN_ACCEL 614#undef OUT_ACCEL_REG 615#undef FINISH_ACCEL 616 617/* Stop the CP */ 618int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info) 619{ 620 drm_radeon_cp_stop_t stop; 621 int ret, i; 622 623 stop.flush = 1; 624 stop.idle = 1; 625 626 ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop, 627 sizeof(drm_radeon_cp_stop_t)); 628 629 if (ret == 0) { 630 return 0; 631 } else if (errno != EBUSY) { 632 return -errno; 633 } 634 635 stop.flush = 0; 636 637 i = 0; 638 do { 639 ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop, 640 sizeof(drm_radeon_cp_stop_t)); 641 } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY); 642 643 if (ret == 0) { 644 return 0; 645 } else if (errno != EBUSY) { 646 return -errno; 647 } 648 649 stop.idle = 0; 650 651 if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, 652 &stop, sizeof(drm_radeon_cp_stop_t))) { 653 return -errno; 654 } else { 655 return 0; 656 } 657} 658 659#define RADEON_IB_RESERVE (16 * sizeof(uint32_t)) 660 661/* Get an indirect buffer for the CP 2D acceleration commands */ 662drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) 663{ 664 RADEONInfoPtr info = RADEONPTR(pScrn); 665 drmDMAReq dma; 666 drmBufPtr buf = NULL; 667 int indx = 0; 668 int size = 0; 669 int i = 0; 670 int ret; 671 672#if 0 673 /* FIXME: pScrn->pScreen has not been initialized when this is first 674 * called from RADEONSelectBuffer via RADEONDRICPInit. We could use 675 * the screen index from pScrn, which is initialized, and then get 676 * the screen from screenInfo.screens[index], but that is a hack. 677 */ 678 dma.context = DRIGetContext(pScrn->pScreen); 679#else 680 /* This is the X server's context */ 681 dma.context = 0x00000001; 682#endif 683 684 dma.send_count = 0; 685 dma.send_list = NULL; 686 dma.send_sizes = NULL; 687 dma.flags = 0; 688 dma.request_count = 1; 689 dma.request_size = RADEON_BUFFER_SIZE; 690 dma.request_list = &indx; 691 dma.request_sizes = &size; 692 dma.granted_count = 0; 693 694 while (1) { 695 do { 696 ret = drmDMA(info->dri->drmFD, &dma); 697 if (ret && ret != -EBUSY) { 698 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 699 "%s: CP GetBuffer %d\n", __FUNCTION__, ret); 700 } 701 } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT)); 702 703 if (ret == 0) { 704 buf = &info->dri->buffers->list[indx]; 705 buf->used = 0; 706 if (RADEON_VERBOSE) { 707 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 708 " GetBuffer returning %d %p\n", 709 buf->idx, buf->address); 710 } 711 return buf; 712 } 713 714 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 715 "GetBuffer timed out, resetting engine...\n"); 716 717 if (info->ChipFamily < CHIP_FAMILY_R600) { 718 RADEONEngineReset(pScrn); 719 RADEONEngineRestore(pScrn); 720 } else 721 R600EngineReset(pScrn); 722 723 /* Always restart the engine when doing CP 2D acceleration */ 724 RADEONCP_RESET(pScrn, info); 725 RADEONCP_START(pScrn, info); 726 } 727} 728 729/* Flush the indirect buffer to the kernel for submission to the card */ 730void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) 731{ 732 RADEONInfoPtr info = RADEONPTR(pScrn); 733 drmBufPtr buffer = info->cp->indirectBuffer; 734 int start = info->cp->indirectStart; 735 drm_radeon_indirect_t indirect; 736 737 assert(!info->cs); 738 if (!buffer) return; 739 if (start == buffer->used && !discard) return; 740 741 if (RADEON_VERBOSE) { 742 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 743 buffer->idx); 744 } 745 746 if (info->ChipFamily >= CHIP_FAMILY_R600) { 747 if (buffer->used & 0x3c) { 748 RING_LOCALS; 749 750 while (buffer->used & 0x3c) { 751 BEGIN_RING(1); 752 OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ 753 ADVANCE_RING(); 754 } 755 } 756 } 757 758 indirect.idx = buffer->idx; 759 indirect.start = start; 760 indirect.end = buffer->used; 761 indirect.discard = discard; 762 763 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 764 &indirect, sizeof(drm_radeon_indirect_t)); 765 766 if (discard) { 767 info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn); 768 info->cp->indirectStart = 0; 769 } else { 770 /* Start on a double word boundary */ 771 info->cp->indirectStart = buffer->used = RADEON_ALIGN(buffer->used, 8); 772 if (RADEON_VERBOSE) { 773 xf86DrvMsg(pScrn->scrnIndex, X_INFO, " Starting at %d\n", 774 info->cp->indirectStart); 775 } 776 } 777} 778 779/* Flush and release the indirect buffer */ 780void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) 781{ 782 RADEONInfoPtr info = RADEONPTR(pScrn); 783 drmBufPtr buffer = info->cp->indirectBuffer; 784 int start = info->cp->indirectStart; 785 drm_radeon_indirect_t indirect; 786 787 assert(!info->cs); 788 if (info->ChipFamily >= CHIP_FAMILY_R600) { 789 if (buffer && (buffer->used & 0x3c)) { 790 RING_LOCALS; 791 792 while (buffer->used & 0x3c) { 793 BEGIN_RING(1); 794 OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ 795 ADVANCE_RING(); 796 } 797 } 798 } 799 800 info->cp->indirectBuffer = NULL; 801 info->cp->indirectStart = 0; 802 803 if (!buffer) return; 804 805 if (RADEON_VERBOSE) { 806 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n", 807 buffer->idx); 808 } 809 810 indirect.idx = buffer->idx; 811 indirect.start = start; 812 indirect.end = buffer->used; 813 indirect.discard = 1; 814 815 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 816 &indirect, sizeof(drm_radeon_indirect_t)); 817} 818 819/** \brief Calculate HostDataBlit parameters from pointer and pitch 820 * 821 * This is a helper for the trivial HostDataBlit users that don't need to worry 822 * about tiling etc. 823 */ 824void 825RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp, 826 uint32_t *dstPitchOff, int *x, int *y) 827{ 828 RADEONInfoPtr info = RADEONPTR( pScrn ); 829 uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation; 830 831 *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10; 832 *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch; 833 *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp; 834} 835 836/* Set up a hostdata blit to transfer data from system memory to the 837 * framebuffer. Returns the address where the data can be written to and sets 838 * the dstPitch and hpass variables as required. 839 */ 840uint8_t* 841RADEONHostDataBlit( 842 ScrnInfoPtr pScrn, 843 unsigned int cpp, 844 unsigned int w, 845 uint32_t dstPitchOff, 846 uint32_t *bufPitch, 847 int x, 848 int *y, 849 unsigned int *h, 850 unsigned int *hpass 851){ 852 RADEONInfoPtr info = RADEONPTR( pScrn ); 853 uint32_t format, dwords; 854 uint8_t *ret; 855 RING_LOCALS; 856 857 if ( *h == 0 ) 858 { 859 return NULL; 860 } 861 862 switch ( cpp ) 863 { 864 case 4: 865 format = RADEON_GMC_DST_32BPP; 866 *bufPitch = 4 * w; 867 break; 868 case 2: 869 format = RADEON_GMC_DST_16BPP; 870 *bufPitch = 2 * RADEON_ALIGN(w, 2); 871 break; 872 case 1: 873 format = RADEON_GMC_DST_8BPP_CI; 874 *bufPitch = RADEON_ALIGN(w, 4); 875 break; 876 default: 877 xf86DrvMsg( pScrn->scrnIndex, X_ERROR, 878 "%s: Unsupported cpp %d!\n", __func__, cpp ); 879 return NULL; 880 } 881 882#if X_BYTE_ORDER == X_BIG_ENDIAN 883 /* Swap doesn't work on R300 and later, it's handled during the 884 * copy to ind. buffer pass 885 */ 886 if (info->ChipFamily < CHIP_FAMILY_R300) { 887 BEGIN_RING(2); 888 if (cpp == 2) 889 OUT_RING_REG(RADEON_RBBM_GUICNTL, 890 RADEON_HOST_DATA_SWAP_HDW); 891 else if (cpp == 1) 892 OUT_RING_REG(RADEON_RBBM_GUICNTL, 893 RADEON_HOST_DATA_SWAP_32BIT); 894 else 895 OUT_RING_REG(RADEON_RBBM_GUICNTL, 896 RADEON_HOST_DATA_SWAP_NONE); 897 ADVANCE_RING(); 898 } 899#endif 900 901 /*RADEON_PURGE_CACHE(); 902 RADEON_WAIT_UNTIL_IDLE();*/ 903 904 *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) ); 905 dwords = *hpass * *bufPitch / 4; 906 907 BEGIN_RING( dwords + 10 ); 908 OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) ); 909 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL 910 | RADEON_GMC_DST_CLIPPING 911 | RADEON_GMC_BRUSH_NONE 912 | format 913 | RADEON_GMC_SRC_DATATYPE_COLOR 914 | RADEON_ROP3_S 915 | RADEON_DP_SRC_SOURCE_HOST_DATA 916 | RADEON_GMC_CLR_CMP_CNTL_DIS 917 | RADEON_GMC_WR_MSK_DIS ); 918 OUT_RING( dstPitchOff ); 919 OUT_RING( (*y << 16) | x ); 920 OUT_RING( ((*y + *hpass) << 16) | (x + w) ); 921 OUT_RING( 0xffffffff ); 922 OUT_RING( 0xffffffff ); 923 OUT_RING( *y << 16 | x ); 924 OUT_RING( *hpass << 16 | (*bufPitch / cpp) ); 925 OUT_RING( dwords ); 926 927 ret = ( uint8_t* )&__head[__count]; 928 929 __count += dwords; 930 ADVANCE_RING(); 931 932 *y += *hpass; 933 *h -= *hpass; 934 935 return ret; 936} 937 938void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap) 939{ 940 switch(swap) { 941 case RADEON_HOST_DATA_SWAP_HDW: 942 { 943 unsigned int *d = (unsigned int *)dst; 944 unsigned int *s = (unsigned int *)src; 945 unsigned int nwords = size >> 2; 946 947 for (; nwords > 0; --nwords, ++d, ++s) 948 *d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff); 949 return; 950 } 951 case RADEON_HOST_DATA_SWAP_32BIT: 952 { 953 unsigned int *d = (unsigned int *)dst; 954 unsigned int *s = (unsigned int *)src; 955 unsigned int nwords = size >> 2; 956 957 for (; nwords > 0; --nwords, ++d, ++s) 958#ifdef __powerpc__ 959 asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d)); 960#else 961 *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00) 962 | ((*s & 0xff00) << 8) | ((*s & 0xff) << 24); 963#endif 964 return; 965 } 966 case RADEON_HOST_DATA_SWAP_16BIT: 967 { 968 unsigned short *d = (unsigned short *)dst; 969 unsigned short *s = (unsigned short *)src; 970 unsigned int nwords = size >> 1; 971 972 for (; nwords > 0; --nwords, ++d, ++s) 973#ifdef __powerpc__ 974 asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d)); 975#else 976 *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00) 977 | ((*s & 0xff00) << 8) | ((*s & 0xff) << 24); 978#endif 979 return; 980 } 981 } 982 if (src != dst) 983 memcpy(dst, src, size); 984} 985 986/* Copies a single pass worth of data for a hostdata blit set up by 987 * RADEONHostDataBlit(). 988 */ 989void 990RADEONHostDataBlitCopyPass( 991 ScrnInfoPtr pScrn, 992 unsigned int cpp, 993 uint8_t *dst, 994 uint8_t *src, 995 unsigned int hpass, 996 unsigned int dstPitch, 997 unsigned int srcPitch 998){ 999 1000#if X_BYTE_ORDER == X_BIG_ENDIAN 1001 RADEONInfoPtr info = RADEONPTR( pScrn ); 1002#endif 1003 1004 /* RADEONHostDataBlitCopy can return NULL ! */ 1005 if( (dst==NULL) || (src==NULL)) return; 1006 1007 if ( dstPitch == srcPitch ) 1008 { 1009#if X_BYTE_ORDER == X_BIG_ENDIAN 1010 if (info->ChipFamily >= CHIP_FAMILY_R300) { 1011 switch(cpp) { 1012 case 1: 1013 RADEONCopySwap(dst, src, hpass * dstPitch, 1014 RADEON_HOST_DATA_SWAP_32BIT); 1015 return; 1016 case 2: 1017 RADEONCopySwap(dst, src, hpass * dstPitch, 1018 RADEON_HOST_DATA_SWAP_HDW); 1019 return; 1020 } 1021 } 1022#endif 1023 memcpy( dst, src, hpass * dstPitch ); 1024 } 1025 else 1026 { 1027 unsigned int minPitch = min( dstPitch, srcPitch ); 1028 while ( hpass-- ) 1029 { 1030#if X_BYTE_ORDER == X_BIG_ENDIAN 1031 if (info->ChipFamily >= CHIP_FAMILY_R300) { 1032 switch(cpp) { 1033 case 1: 1034 RADEONCopySwap(dst, src, minPitch, 1035 RADEON_HOST_DATA_SWAP_32BIT); 1036 goto next; 1037 case 2: 1038 RADEONCopySwap(dst, src, minPitch, 1039 RADEON_HOST_DATA_SWAP_HDW); 1040 goto next; 1041 } 1042 } 1043#endif 1044 memcpy( dst, src, minPitch ); 1045#if X_BYTE_ORDER == X_BIG_ENDIAN 1046 next: 1047#endif 1048 src += srcPitch; 1049 dst += dstPitch; 1050 } 1051 } 1052} 1053 1054#endif 1055 1056Bool RADEONAccelInit(ScreenPtr pScreen) 1057{ 1058 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1059 RADEONInfoPtr info = RADEONPTR(pScrn); 1060 1061#ifdef USE_EXA 1062 if (info->useEXA) { 1063# ifdef XF86DRI 1064 if (info->directRenderingEnabled) { 1065 if (info->ChipFamily >= CHIP_FAMILY_R600) { 1066 if (!R600DrawInit(pScreen)) 1067 return FALSE; 1068 } else { 1069 if (!RADEONDrawInitCP(pScreen)) 1070 return FALSE; 1071 } 1072 } else 1073# endif /* XF86DRI */ 1074 { 1075 if (info->ChipFamily >= CHIP_FAMILY_R600) 1076 return FALSE; 1077 else { 1078 if (!RADEONDrawInitMMIO(pScreen)) 1079 return FALSE; 1080 } 1081 } 1082 } 1083#endif /* USE_EXA */ 1084#ifdef USE_XAA 1085 if (!info->useEXA) { 1086 XAAInfoRecPtr a; 1087 1088 if (info->ChipFamily >= CHIP_FAMILY_R600) 1089 return FALSE; 1090 1091 if (!(a = info->accel_state->accel = XAACreateInfoRec())) { 1092 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); 1093 return FALSE; 1094 } 1095 1096#ifdef XF86DRI 1097 if (info->directRenderingEnabled) 1098 RADEONAccelInitCP(pScreen, a); 1099 else 1100#endif /* XF86DRI */ 1101 RADEONAccelInitMMIO(pScreen, a); 1102 1103 RADEONEngineInit(pScrn); 1104 1105 if (!XAAInit(pScreen, a)) { 1106 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n"); 1107 return FALSE; 1108 } 1109 } 1110#endif /* USE_XAA */ 1111 return TRUE; 1112} 1113 1114void RADEONInit3DEngine(ScrnInfoPtr pScrn) 1115{ 1116 RADEONInfoPtr info = RADEONPTR (pScrn); 1117 1118#ifdef XF86DRI 1119 if (info->directRenderingEnabled) { 1120 drm_radeon_sarea_t *pSAREAPriv; 1121 1122 if (!info->kms_enabled) { 1123 pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); 1124 pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen); 1125 } 1126 RADEONInit3DEngineCP(pScrn); 1127 } else 1128#endif 1129 RADEONInit3DEngineMMIO(pScrn); 1130 1131 info->accel_state->XInited3D = TRUE; 1132} 1133 1134#ifdef USE_XAA 1135#ifdef XF86DRI 1136Bool 1137RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen) 1138{ 1139 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1140 RADEONInfoPtr info = RADEONPTR(pScrn); 1141 int cpp = info->CurrentLayout.pixel_bytes; 1142 int depthCpp = (info->dri->depthBits - 8) / 4; 1143 int width_bytes = pScrn->displayWidth * cpp; 1144 int bufferSize; 1145 int depthSize; 1146 int l; 1147 int scanlines; 1148 int texsizerequest; 1149 BoxRec MemBox; 1150 FBAreaPtr fbarea; 1151 1152 info->dri->frontOffset = 0; 1153 info->dri->frontPitch = pScrn->displayWidth; 1154 info->dri->backPitch = pScrn->displayWidth; 1155 1156 /* make sure we use 16 line alignment for tiling (8 might be enough). 1157 * Might need that for non-XF86DRI too? 1158 */ 1159 if (info->allowColorTiling) { 1160 bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes, 1161 RADEON_GPU_PAGE_SIZE); 1162 } else { 1163 bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes, 1164 RADEON_GPU_PAGE_SIZE); 1165 } 1166 1167 /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels, 1168 * which is always the case if color tiling is used due to color pitch 1169 * but not necessarily otherwise, and its height a multiple of 16 lines. 1170 */ 1171 info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32); 1172 depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch 1173 * depthCpp, RADEON_GPU_PAGE_SIZE); 1174 1175 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1176 "Using %d MB GART aperture\n", info->dri->gartSize); 1177 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1178 "Using %d MB for the ring buffer\n", info->dri->ringSize); 1179 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1180 "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize); 1181 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1182 "Using %d MB for GART textures\n", info->dri->gartTexSize); 1183 1184 /* Try for front, back, depth, and three framebuffers worth of 1185 * pixmap cache. Should be enough for a fullscreen background 1186 * image plus some leftovers. 1187 * If the FBTexPercent option was used, try to achieve that percentage instead, 1188 * but still have at least one pixmap buffer (get problems with xvideo/render 1189 * otherwise probably), and never reserve more than 3 offscreen buffers as it's 1190 * probably useless for XAA. 1191 */ 1192 if (info->dri->textureSize >= 0) { 1193 texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize 1194 - 2 * width_bytes - 16384 - info->FbSecureSize) 1195 /* first divide, then multiply or we'll get an overflow (been there...) */ 1196 / 100 * info->dri->textureSize; 1197 } 1198 else { 1199 texsizerequest = (int)info->FbMapSize / 2; 1200 } 1201 info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize; 1202 1203 /* If that gives us less than the requested memory, let's 1204 * be greedy and grab some more. Sorry, I care more about 3D 1205 * performance than playing nicely, and you'll get around a full 1206 * framebuffer's worth of pixmap cache anyway. 1207 */ 1208 if (info->dri->textureSize < texsizerequest) { 1209 info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize; 1210 } 1211 if (info->dri->textureSize < texsizerequest) { 1212 info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize; 1213 } 1214 1215 /* If there's still no space for textures, try without pixmap cache, but 1216 * never use the reserved space, the space hw cursor and PCIGART table might 1217 * use. 1218 */ 1219 if (info->dri->textureSize < 0) { 1220 info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize 1221 - 2 * width_bytes - 16384 - info->FbSecureSize; 1222 } 1223 1224 /* Check to see if there is more room available after the 8192nd 1225 * scanline for textures 1226 */ 1227 /* FIXME: what's this good for? condition is pretty much impossible to meet */ 1228 if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize 1229 > info->dri->textureSize) { 1230 info->dri->textureSize = 1231 info->FbMapSize - 8192*width_bytes - bufferSize - depthSize; 1232 } 1233 1234 /* If backbuffer is disabled, don't allocate memory for it */ 1235 if (info->dri->noBackBuffer) { 1236 info->dri->textureSize += bufferSize; 1237 } 1238 1239 /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer! 1240 At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned, 1241 otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee 1242 it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor 1243 area otherwise). 1244 This might cause some space at the end of the video memory to be unused, since it 1245 can't be used (?) due to that log_tex_granularity thing??? 1246 Could use different copyscreentoscreen function for the pageflip copies 1247 (which would use different src and dst offsets) to avoid this. */ 1248 if (info->allowColorTiling && !info->dri->noBackBuffer) { 1249 info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize + 1250 width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16); 1251 } 1252 if (info->dri->textureSize > 0) { 1253 l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS); 1254 if (l < RADEON_LOG_TEX_GRANULARITY) 1255 l = RADEON_LOG_TEX_GRANULARITY; 1256 /* Round the texture size up to the nearest whole number of 1257 * texture regions. Again, be greedy about this, don't 1258 * round down. 1259 */ 1260 info->dri->log2TexGran = l; 1261 info->dri->textureSize = (info->dri->textureSize >> l) << l; 1262 } else { 1263 info->dri->textureSize = 0; 1264 } 1265 1266 /* Set a minimum usable local texture heap size. This will fit 1267 * two 256x256x32bpp textures. 1268 */ 1269 if (info->dri->textureSize < 512 * 1024) { 1270 info->dri->textureOffset = 0; 1271 info->dri->textureSize = 0; 1272 } 1273 1274 if (info->allowColorTiling && !info->dri->noBackBuffer) { 1275 info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) / 1276 (width_bytes * 16)) * (width_bytes * 16); 1277 } 1278 else { 1279 /* Reserve space for textures */ 1280 info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize, 1281 RADEON_GPU_PAGE_SIZE); 1282 } 1283 1284 /* Reserve space for the shared depth 1285 * buffer. 1286 */ 1287 info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize, 1288 RADEON_GPU_PAGE_SIZE); 1289 1290 /* Reserve space for the shared back buffer */ 1291 if (info->dri->noBackBuffer) { 1292 info->dri->backOffset = info->dri->depthOffset; 1293 } else { 1294 info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize, 1295 RADEON_GPU_PAGE_SIZE); 1296 } 1297 1298 info->dri->backY = info->dri->backOffset / width_bytes; 1299 info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp; 1300 1301 scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes; 1302 if (scanlines > 8191) 1303 scanlines = 8191; 1304 1305 MemBox.x1 = 0; 1306 MemBox.y1 = 0; 1307 MemBox.x2 = pScrn->displayWidth; 1308 MemBox.y2 = scanlines; 1309 1310 if (!xf86InitFBManager(pScreen, &MemBox)) { 1311 xf86DrvMsg(scrnIndex, X_ERROR, 1312 "Memory manager initialization to " 1313 "(%d,%d) (%d,%d) failed\n", 1314 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1315 return FALSE; 1316 } else { 1317 int width, height; 1318 1319 xf86DrvMsg(scrnIndex, X_INFO, 1320 "Memory manager initialized to (%d,%d) (%d,%d)\n", 1321 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1322 /* why oh why can't we just request modes which are guaranteed to be 16 lines 1323 aligned... sigh */ 1324 if ((fbarea = xf86AllocateOffscreenArea(pScreen, 1325 pScrn->displayWidth, 1326 info->allowColorTiling ? 1327 (RADEON_ALIGN(pScrn->virtualY, 16)) 1328 - pScrn->virtualY + 2 : 2, 1329 0, NULL, NULL, 1330 NULL))) { 1331 xf86DrvMsg(scrnIndex, X_INFO, 1332 "Reserved area from (%d,%d) to (%d,%d)\n", 1333 fbarea->box.x1, fbarea->box.y1, 1334 fbarea->box.x2, fbarea->box.y2); 1335 } else { 1336 xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); 1337 } 1338 1339 RADEONDRIAllocatePCIGARTTable(pScreen); 1340 1341 if (xf86QueryLargestOffscreenArea(pScreen, &width, 1342 &height, 0, 0, 0)) { 1343 xf86DrvMsg(scrnIndex, X_INFO, 1344 "Largest offscreen area available: %d x %d\n", 1345 width, height); 1346 1347 /* Lines in offscreen area needed for depth buffer and 1348 * textures 1349 */ 1350 info->dri->depthTexLines = (scanlines 1351 - info->dri->depthOffset / width_bytes); 1352 info->dri->backLines = (scanlines 1353 - info->dri->backOffset / width_bytes 1354 - info->dri->depthTexLines); 1355 info->dri->backArea = NULL; 1356 } else { 1357 xf86DrvMsg(scrnIndex, X_ERROR, 1358 "Unable to determine largest offscreen area " 1359 "available\n"); 1360 return FALSE; 1361 } 1362 } 1363 1364 xf86DrvMsg(scrnIndex, X_INFO, 1365 "Will use front buffer at offset 0x%x\n", 1366 info->dri->frontOffset); 1367 1368 xf86DrvMsg(scrnIndex, X_INFO, 1369 "Will use back buffer at offset 0x%x\n", 1370 info->dri->backOffset); 1371 xf86DrvMsg(scrnIndex, X_INFO, 1372 "Will use depth buffer at offset 0x%x\n", 1373 info->dri->depthOffset); 1374 if (info->cardType==CARD_PCIE) 1375 xf86DrvMsg(scrnIndex, X_INFO, 1376 "Will use %d kb for PCI GART table at offset 0x%x\n", 1377 info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset); 1378 xf86DrvMsg(scrnIndex, X_INFO, 1379 "Will use %d kb for textures at offset 0x%x\n", 1380 info->dri->textureSize/1024, info->dri->textureOffset); 1381 1382 info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) | 1383 ((info->dri->frontOffset + info->fbLocation) >> 10)); 1384 1385 info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) | 1386 ((info->dri->backOffset + info->fbLocation) >> 10)); 1387 1388 info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) | 1389 ((info->dri->depthOffset + info->fbLocation) >> 10)); 1390 return TRUE; 1391} 1392#endif /* XF86DRI */ 1393 1394Bool 1395RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen) 1396{ 1397 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1398 RADEONInfoPtr info = RADEONPTR(pScrn); 1399 BoxRec MemBox; 1400 int y2; 1401 1402 int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; 1403 1404 MemBox.x1 = 0; 1405 MemBox.y1 = 0; 1406 MemBox.x2 = pScrn->displayWidth; 1407 y2 = info->FbMapSize / width_bytes; 1408 if (y2 >= 32768) 1409 y2 = 32767; /* because MemBox.y2 is signed short */ 1410 MemBox.y2 = y2; 1411 1412 /* The acceleration engine uses 14 bit 1413 * signed coordinates, so we can't have any 1414 * drawable caches beyond this region. 1415 */ 1416 if (MemBox.y2 > 8191) 1417 MemBox.y2 = 8191; 1418 1419 if (!xf86InitFBManager(pScreen, &MemBox)) { 1420 xf86DrvMsg(scrnIndex, X_ERROR, 1421 "Memory manager initialization to " 1422 "(%d,%d) (%d,%d) failed\n", 1423 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1424 return FALSE; 1425 } else { 1426 int width, height; 1427 FBAreaPtr fbarea; 1428 1429 xf86DrvMsg(scrnIndex, X_INFO, 1430 "Memory manager initialized to (%d,%d) (%d,%d)\n", 1431 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1432 if ((fbarea = xf86AllocateOffscreenArea(pScreen, 1433 pScrn->displayWidth, 1434 info->allowColorTiling ? 1435 (RADEON_ALIGN(pScrn->virtualY, 16)) 1436 - pScrn->virtualY + 2 : 2, 1437 0, NULL, NULL, 1438 NULL))) { 1439 xf86DrvMsg(scrnIndex, X_INFO, 1440 "Reserved area from (%d,%d) to (%d,%d)\n", 1441 fbarea->box.x1, fbarea->box.y1, 1442 fbarea->box.x2, fbarea->box.y2); 1443 } else { 1444 xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); 1445 } 1446 if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 1447 0, 0, 0)) { 1448 xf86DrvMsg(scrnIndex, X_INFO, 1449 "Largest offscreen area available: %d x %d\n", 1450 width, height); 1451 } 1452 return TRUE; 1453 } 1454} 1455#endif /* USE_XAA */ 1456