radeon_accel.c revision 72f1971a
1/* 2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and 3 * VA Linux Systems Inc., Fremont, California. 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation on the rights to use, copy, modify, merge, 11 * publish, distribute, sublicense, and/or sell copies of the Software, 12 * and to permit persons to whom the Software is furnished to do so, 13 * subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR 23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 * DEALINGS IN THE SOFTWARE. 27 */ 28 29#ifdef HAVE_CONFIG_H 30#include "config.h" 31#endif 32 33/* 34 * Authors: 35 * Kevin E. Martin <martin@xfree86.org> 36 * Rickard E. Faith <faith@valinux.com> 37 * Alan Hourihane <alanh@fairlite.demon.co.uk> 38 * 39 * Credits: 40 * 41 * Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source 42 * code to his Radeon driver. Portions of this file are based on the 43 * initialization code for that driver. 44 * 45 * References: 46 * 47 * !!!! FIXME !!!! 48 * RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical 49 * Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April 50 * 1999. 51 * 52 * RAGE 128 Software Development Manual (Technical Reference Manual P/N 53 * SDK-G04000 Rev. 0.01), ATI Technologies: June 1999. 54 * 55 * Notes on unimplemented XAA optimizations: 56 * 57 * SetClipping: This has been removed as XAA expects 16bit registers 58 * for full clipping. 59 * TwoPointLine: The Radeon supports this. Not Bresenham. 60 * DashedLine with non-power-of-two pattern length: Apparently, there is 61 * no way to set the length of the pattern -- it is always 62 * assumed to be 8 or 32 (or 1024?). 63 * ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference 64 * Manual where it states that monochrome expansion of frame 65 * buffer data is not supported. 66 * CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid 67 * direct/indirect method. If we had more data registers, 68 * then we could do better. If XAA supported a trigger write 69 * address, the code would be simpler. 70 * Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8 71 * pattern from frame buffer memory. 72 * ImageWrites: Same as CPUToScreenColorExpandFill 73 * 74 */ 75 76#include <errno.h> 77#include <string.h> 78#include <assert.h> 79 /* Driver data structures */ 80#include "radeon.h" 81#include "radeon_reg.h" 82#include "r600_reg.h" 83#include "radeon_macros.h" 84#include "radeon_probe.h" 85#include "radeon_version.h" 86#ifdef XF86DRI 87#define _XF86DRI_SERVER_ 88#include "radeon_drm.h" 89#endif 90 91#include "ati_pciids_gen.h" 92 93 /* Line support */ 94#include "miline.h" 95 96 /* X and server generic header files */ 97#include "xf86.h" 98 99static void R600EngineReset(ScrnInfoPtr pScrn); 100 101#ifdef USE_XAA 102static struct { 103 int rop; 104 int pattern; 105} RADEON_ROP[] = { 106 { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear */ 107 { RADEON_ROP3_DSa, RADEON_ROP3_DPa }, /* Gxand */ 108 { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse */ 109 { RADEON_ROP3_S, RADEON_ROP3_P }, /* GXcopy */ 110 { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted */ 111 { RADEON_ROP3_D, RADEON_ROP3_D }, /* GXnoop */ 112 { RADEON_ROP3_DSx, RADEON_ROP3_DPx }, /* GXxor */ 113 { RADEON_ROP3_DSo, RADEON_ROP3_DPo }, /* GXor */ 114 { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor */ 115 { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv */ 116 { RADEON_ROP3_Dn, RADEON_ROP3_Dn }, /* GXinvert */ 117 { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse */ 118 { RADEON_ROP3_Sn, RADEON_ROP3_Pn }, /* GXcopyInverted */ 119 { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted */ 120 { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand */ 121 { RADEON_ROP3_ONE, RADEON_ROP3_ONE } /* GXset */ 122}; 123#endif 124 125/* The FIFO has 64 slots. This routines waits until at least `entries' 126 * of these slots are empty. 127 */ 128void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries) 129{ 130 RADEONInfoPtr info = RADEONPTR(pScrn); 131 unsigned char *RADEONMMIO = info->MMIO; 132 int i; 133 134 for (;;) { 135 for (i = 0; i < RADEON_TIMEOUT; i++) { 136 info->accel_state->fifo_slots = 137 INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 138 if (info->accel_state->fifo_slots >= entries) return; 139 } 140 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 141 "FIFO timed out: %u entries, stat=0x%08x\n", 142 (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, 143 (unsigned int)INREG(RADEON_RBBM_STATUS)); 144 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 145 "FIFO timed out, resetting engine...\n"); 146 RADEONEngineReset(pScrn); 147 RADEONEngineRestore(pScrn); 148#ifdef XF86DRI 149 if (info->directRenderingEnabled) { 150 RADEONCP_RESET(pScrn, info); 151 RADEONCP_START(pScrn, info); 152 } 153#endif 154 } 155} 156 157void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries) 158{ 159 RADEONInfoPtr info = RADEONPTR(pScrn); 160 unsigned char *RADEONMMIO = info->MMIO; 161 int i; 162 163 for (;;) { 164 for (i = 0; i < RADEON_TIMEOUT; i++) { 165 if (info->ChipFamily >= CHIP_FAMILY_RV770) 166 info->accel_state->fifo_slots = 167 INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK; 168 else 169 info->accel_state->fifo_slots = 170 INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK; 171 if (info->accel_state->fifo_slots >= entries) return; 172 } 173 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 174 "FIFO timed out: stat=0x%08x\n", 175 (unsigned int)INREG(R600_GRBM_STATUS)); 176 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 177 "FIFO timed out, resetting engine...\n"); 178 R600EngineReset(pScrn); 179#ifdef XF86DRI 180 if (info->directRenderingEnabled) { 181 RADEONCP_RESET(pScrn, info); 182 RADEONCP_START(pScrn, info); 183 } 184#endif 185 } 186} 187 188/* Flush all dirty data in the Pixel Cache to memory */ 189void RADEONEngineFlush(ScrnInfoPtr pScrn) 190{ 191 RADEONInfoPtr info = RADEONPTR(pScrn); 192 unsigned char *RADEONMMIO = info->MMIO; 193 int i; 194 195 if (info->ChipFamily <= CHIP_FAMILY_RV280) { 196 OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, 197 RADEON_RB3D_DC_FLUSH_ALL, 198 ~RADEON_RB3D_DC_FLUSH_ALL); 199 for (i = 0; i < RADEON_TIMEOUT; i++) { 200 if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY)) 201 break; 202 } 203 if (i == RADEON_TIMEOUT) { 204 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 205 "DC flush timeout: %x\n", 206 (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT)); 207 } 208 } else { 209 OUTREGP(R300_DSTCACHE_CTLSTAT, 210 R300_RB2D_DC_FLUSH_ALL, 211 ~R300_RB2D_DC_FLUSH_ALL); 212 for (i = 0; i < RADEON_TIMEOUT; i++) { 213 if (!(INREG(R300_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY)) 214 break; 215 } 216 if (i == RADEON_TIMEOUT) { 217 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 218 "DC flush timeout: %x\n", 219 (unsigned int)INREG(R300_DSTCACHE_CTLSTAT)); 220 } 221 } 222} 223 224/* Reset graphics card to known state */ 225void RADEONEngineReset(ScrnInfoPtr pScrn) 226{ 227 RADEONInfoPtr info = RADEONPTR(pScrn); 228 unsigned char *RADEONMMIO = info->MMIO; 229 uint32_t clock_cntl_index; 230 uint32_t mclk_cntl; 231 uint32_t rbbm_soft_reset; 232 uint32_t host_path_cntl; 233 234 /* The following RBBM_SOFT_RESET sequence can help un-wedge 235 * an R300 after the command processor got stuck. 236 */ 237 rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET); 238 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 239 RADEON_SOFT_RESET_CP | 240 RADEON_SOFT_RESET_HI | 241 RADEON_SOFT_RESET_SE | 242 RADEON_SOFT_RESET_RE | 243 RADEON_SOFT_RESET_PP | 244 RADEON_SOFT_RESET_E2 | 245 RADEON_SOFT_RESET_RB)); 246 INREG(RADEON_RBBM_SOFT_RESET); 247 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t) 248 ~(RADEON_SOFT_RESET_CP | 249 RADEON_SOFT_RESET_HI | 250 RADEON_SOFT_RESET_SE | 251 RADEON_SOFT_RESET_RE | 252 RADEON_SOFT_RESET_PP | 253 RADEON_SOFT_RESET_E2 | 254 RADEON_SOFT_RESET_RB))); 255 INREG(RADEON_RBBM_SOFT_RESET); 256 OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); 257 INREG(RADEON_RBBM_SOFT_RESET); 258 259 RADEONEngineFlush(pScrn); 260 261 clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX); 262 RADEONPllErrataAfterIndex(info); 263 264#if 0 /* taken care of by new PM code */ 265 /* Some ASICs have bugs with dynamic-on feature, which are 266 * ASIC-version dependent, so we force all blocks on for now 267 */ 268 if (info->HasCRTC2) { 269 uint32_t tmp; 270 271 tmp = INPLL(pScrn, RADEON_SCLK_CNTL); 272 OUTPLL(RADEON_SCLK_CNTL, ((tmp & ~RADEON_DYN_STOP_LAT_MASK) | 273 RADEON_CP_MAX_DYN_STOP_LAT | 274 RADEON_SCLK_FORCEON_MASK)); 275 276 if (info->ChipFamily == CHIP_FAMILY_RV200) { 277 tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); 278 OUTPLL(RADEON_SCLK_MORE_CNTL, tmp | RADEON_SCLK_MORE_FORCEON); 279 } 280 } 281#endif /* new PM code */ 282 283 mclk_cntl = INPLL(pScrn, RADEON_MCLK_CNTL); 284 285#if 0 /* handled by new PM code */ 286 OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl | 287 RADEON_FORCEON_MCLKA | 288 RADEON_FORCEON_MCLKB | 289 RADEON_FORCEON_YCLKA | 290 RADEON_FORCEON_YCLKB | 291 RADEON_FORCEON_MC | 292 RADEON_FORCEON_AIC)); 293#endif /* new PM code */ 294 295 /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some 296 * unexpected behaviour on some machines. Here we use 297 * RADEON_HOST_PATH_CNTL to reset it. 298 */ 299 host_path_cntl = INREG(RADEON_HOST_PATH_CNTL); 300 rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET); 301 302 if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { 303 uint32_t tmp; 304 305 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 306 RADEON_SOFT_RESET_CP | 307 RADEON_SOFT_RESET_HI | 308 RADEON_SOFT_RESET_E2)); 309 INREG(RADEON_RBBM_SOFT_RESET); 310 OUTREG(RADEON_RBBM_SOFT_RESET, 0); 311 tmp = INREG(RADEON_RB3D_DSTCACHE_MODE); 312 OUTREG(RADEON_RB3D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */ 313 } else { 314 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | 315 RADEON_SOFT_RESET_CP | 316 RADEON_SOFT_RESET_SE | 317 RADEON_SOFT_RESET_RE | 318 RADEON_SOFT_RESET_PP | 319 RADEON_SOFT_RESET_E2 | 320 RADEON_SOFT_RESET_RB)); 321 INREG(RADEON_RBBM_SOFT_RESET); 322 OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & (uint32_t) 323 ~(RADEON_SOFT_RESET_CP | 324 RADEON_SOFT_RESET_SE | 325 RADEON_SOFT_RESET_RE | 326 RADEON_SOFT_RESET_PP | 327 RADEON_SOFT_RESET_E2 | 328 RADEON_SOFT_RESET_RB))); 329 INREG(RADEON_RBBM_SOFT_RESET); 330 } 331 332 if (!IS_R300_VARIANT && !IS_AVIVO_VARIANT) 333 OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); 334 335 OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index); 336 RADEONPllErrataAfterIndex(info); 337 OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl); 338} 339 340/* Reset graphics card to known state */ 341static void R600EngineReset(ScrnInfoPtr pScrn) 342{ 343 RADEONInfoPtr info = RADEONPTR(pScrn); 344 unsigned char *RADEONMMIO = info->MMIO; 345 uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl; 346 347 cp_ptr = INREG(R600_CP_RB_WPTR); 348 349 cp_me_cntl = INREG(R600_CP_ME_CNTL); 350 OUTREG(R600_CP_ME_CNTL, 0x10000000); 351 352 OUTREG(R600_GRBM_SOFT_RESET, 0x7fff); 353 INREG(R600_GRBM_SOFT_RESET); 354 usleep (50); 355 OUTREG(R600_GRBM_SOFT_RESET, 0); 356 INREG(R600_GRBM_SOFT_RESET); 357 358 OUTREG(R600_CP_RB_WPTR_DELAY, 0); 359 cp_rb_cntl = INREG(R600_CP_RB_CNTL); 360 OUTREG(R600_CP_RB_CNTL, 0x80000000); 361 362 OUTREG(R600_CP_RB_RPTR_WR, cp_ptr); 363 OUTREG(R600_CP_RB_WPTR, cp_ptr); 364 OUTREG(R600_CP_RB_CNTL, cp_rb_cntl); 365 OUTREG(R600_CP_ME_CNTL, cp_me_cntl); 366 367} 368 369/* Restore the acceleration hardware to its previous state */ 370void RADEONEngineRestore(ScrnInfoPtr pScrn) 371{ 372 RADEONInfoPtr info = RADEONPTR(pScrn); 373 unsigned char *RADEONMMIO = info->MMIO; 374 375 if (info->cs) 376 return; 377 378 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 379 "EngineRestore (%d/%d)\n", 380 info->CurrentLayout.pixel_code, 381 info->CurrentLayout.bitsPerPixel); 382 383 /* Setup engine location. This shouldn't be necessary since we 384 * set them appropriately before any accel ops, but let's avoid 385 * random bogus DMA in case we inadvertently trigger the engine 386 * in the wrong place (happened). 387 */ 388 RADEONWaitForFifo(pScrn, 2); 389 OUTREG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset); 390 OUTREG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset); 391 392 RADEONWaitForFifo(pScrn, 1); 393#if X_BYTE_ORDER == X_BIG_ENDIAN 394 OUTREGP(RADEON_DP_DATATYPE, 395 RADEON_HOST_BIG_ENDIAN_EN, 396 ~RADEON_HOST_BIG_ENDIAN_EN); 397#else 398 OUTREGP(RADEON_DP_DATATYPE, 0, ~RADEON_HOST_BIG_ENDIAN_EN); 399#endif 400 401 /* Restore SURFACE_CNTL */ 402 OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); 403 404 RADEONWaitForFifo(pScrn, 1); 405 OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX 406 | RADEON_DEFAULT_SC_BOTTOM_MAX)); 407 RADEONWaitForFifo(pScrn, 1); 408 OUTREG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl 409 | RADEON_GMC_BRUSH_SOLID_COLOR 410 | RADEON_GMC_SRC_DATATYPE_COLOR)); 411 412 RADEONWaitForFifo(pScrn, 5); 413 OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); 414 OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0x00000000); 415 OUTREG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); 416 OUTREG(RADEON_DP_SRC_BKGD_CLR, 0x00000000); 417 OUTREG(RADEON_DP_WRITE_MASK, 0xffffffff); 418 419 RADEONWaitForIdleMMIO(pScrn); 420 421 info->accel_state->XInited3D = FALSE; 422} 423 424static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes) 425{ 426 RADEONInfoPtr info = RADEONPTR(pScrn); 427 if (info->dri->pKernelDRMVersion->version_major < 2) { 428 drm_radeon_getparam_t np; 429 430 memset(&np, 0, sizeof(np)); 431 np.param = RADEON_PARAM_NUM_GB_PIPES; 432 np.value = num_pipes; 433 return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np)); 434 } else { 435 struct drm_radeon_info np2; 436 np2.value = (unsigned long)num_pipes; 437 np2.request = RADEON_INFO_NUM_GB_PIPES; 438 return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2)); 439 } 440} 441 442/* Initialize the acceleration hardware */ 443void RADEONEngineInit(ScrnInfoPtr pScrn) 444{ 445 RADEONInfoPtr info = RADEONPTR(pScrn); 446 unsigned char *RADEONMMIO = info->MMIO; 447 int datatype = 0; 448 info->accel_state->num_gb_pipes = 0; 449 450 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 451 "EngineInit (%d/%d)\n", 452 info->CurrentLayout.pixel_code, 453 info->CurrentLayout.bitsPerPixel); 454 455#ifdef XF86DRI 456 if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) { 457 int num_pipes; 458 459 if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) { 460 xf86DrvMsg(pScrn->scrnIndex, X_WARNING, 461 "Failed to determine num pipes from DRM, falling back to " 462 "manual look-up!\n"); 463 info->accel_state->num_gb_pipes = 0; 464 } else { 465 info->accel_state->num_gb_pipes = num_pipes; 466 } 467 } 468#endif 469 470 if (!info->cs) { 471 if ((info->ChipFamily == CHIP_FAMILY_RV410) || 472 (info->ChipFamily == CHIP_FAMILY_R420) || 473 (info->ChipFamily == CHIP_FAMILY_RS600) || 474 (info->ChipFamily == CHIP_FAMILY_RS690) || 475 (info->ChipFamily == CHIP_FAMILY_RS740) || 476 (info->ChipFamily == CHIP_FAMILY_RS400) || 477 (info->ChipFamily == CHIP_FAMILY_RS480) || 478 IS_R500_3D) { 479 if (info->accel_state->num_gb_pipes == 0) { 480 uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); 481 482 info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; 483 if (IS_R500_3D) 484 OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); 485 } 486 } else { 487 if (info->accel_state->num_gb_pipes == 0) { 488 if ((info->ChipFamily == CHIP_FAMILY_R300) || 489 (info->ChipFamily == CHIP_FAMILY_R350)) { 490 /* R3xx chips */ 491 info->accel_state->num_gb_pipes = 2; 492 } else { 493 /* RV3xx chips */ 494 info->accel_state->num_gb_pipes = 1; 495 } 496 } 497 } 498 499 /* SE cards only have 1 quadpipe */ 500 if ((info->Chipset == PCI_CHIP_RV410_5E4C) || 501 (info->Chipset == PCI_CHIP_RV410_5E4F) || 502 (info->Chipset == PCI_CHIP_R300_AD) || 503 (info->Chipset == PCI_CHIP_R350_AH)) 504 info->accel_state->num_gb_pipes = 1; 505 506 if (IS_R300_3D || IS_R500_3D) 507 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 508 "num quad-pipes is %d\n", info->accel_state->num_gb_pipes); 509 510 if (IS_R300_3D || IS_R500_3D) { 511 uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16); 512 513 switch(info->accel_state->num_gb_pipes) { 514 case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; 515 case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; 516 case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; 517 default: 518 case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; 519 } 520 521 OUTREG(R300_GB_TILE_CONFIG, gb_tile_config); 522 OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 523 if (info->ChipFamily >= CHIP_FAMILY_R420) 524 OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); 525 OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) | 526 R300_DC_AUTOFLUSH_ENABLE | 527 R300_DC_DC_DISABLE_IGNORE_PE)); 528 } else 529 OUTREG(RADEON_RB3D_CNTL, 0); 530 531 RADEONEngineReset(pScrn); 532 } 533 534 switch (info->CurrentLayout.pixel_code) { 535 case 8: datatype = 2; break; 536 case 15: datatype = 3; break; 537 case 16: datatype = 4; break; 538 case 24: datatype = 5; break; 539 case 32: datatype = 6; break; 540 default: 541 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, 542 "Unknown depth/bpp = %d/%d (code = %d)\n", 543 info->CurrentLayout.depth, 544 info->CurrentLayout.bitsPerPixel, 545 info->CurrentLayout.pixel_code); 546 } 547 548 info->accel_state->dp_gui_master_cntl = 549 ((datatype << RADEON_GMC_DST_DATATYPE_SHIFT) 550 | RADEON_GMC_CLR_CMP_CNTL_DIS 551 | RADEON_GMC_DST_PITCH_OFFSET_CNTL); 552 553 RADEONEngineRestore(pScrn); 554} 555 556uint32_t radeonGetPixmapOffset(PixmapPtr pPix) 557{ 558 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 559 RADEONInfoPtr info = RADEONPTR(pScrn); 560 uint32_t offset = 0; 561 if (info->cs) 562 return 0; 563#ifdef USE_EXA 564 if (info->useEXA) { 565 offset = exaGetPixmapOffset(pPix); 566 } else 567#endif 568 { 569 offset = pPix->devPrivate.ptr - info->FB; 570 } 571 offset += info->fbLocation + pScrn->fbOffset; 572 return offset; 573} 574 575int radeon_cs_space_remaining(ScrnInfoPtr pScrn) 576{ 577 RADEONInfoPtr info = RADEONPTR(pScrn); 578 579#ifdef XF86DRM_MODE 580 if (info->cs) 581 return (info->cs->ndw - info->cs->cdw); 582 else 583#endif 584 return (info->cp->indirectBuffer->total - info->cp->indirectBuffer->used) / (int)sizeof(uint32_t); 585} 586 587#define ACCEL_MMIO 588#define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO 589#define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) 590#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) 591#define FINISH_ACCEL() 592 593#include "radeon_commonfuncs.c" 594#if defined(RENDER) && defined(USE_XAA) 595#include "radeon_render.c" 596#endif 597#include "radeon_accelfuncs.c" 598 599#undef ACCEL_MMIO 600#undef ACCEL_PREAMBLE 601#undef BEGIN_ACCEL 602#undef OUT_ACCEL_REG 603#undef FINISH_ACCEL 604 605#ifdef XF86DRI 606 607#define ACCEL_CP 608#define ACCEL_PREAMBLE() \ 609 RING_LOCALS; \ 610 RADEONCP_REFRESH(pScrn, info) 611#define BEGIN_ACCEL(n) BEGIN_RING(2*(n)) 612#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) 613#define FINISH_ACCEL() ADVANCE_RING() 614 615 616#include "radeon_commonfuncs.c" 617#if defined(RENDER) && defined(USE_XAA) 618#include "radeon_render.c" 619#endif 620#include "radeon_accelfuncs.c" 621 622#undef ACCEL_CP 623#undef ACCEL_PREAMBLE 624#undef BEGIN_ACCEL 625#undef OUT_ACCEL_REG 626#undef FINISH_ACCEL 627 628/* Stop the CP */ 629int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info) 630{ 631 drm_radeon_cp_stop_t stop; 632 int ret, i; 633 634 stop.flush = 1; 635 stop.idle = 1; 636 637 ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop, 638 sizeof(drm_radeon_cp_stop_t)); 639 640 if (ret == 0) { 641 return 0; 642 } else if (errno != EBUSY) { 643 return -errno; 644 } 645 646 stop.flush = 0; 647 648 i = 0; 649 do { 650 ret = drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, &stop, 651 sizeof(drm_radeon_cp_stop_t)); 652 } while (ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY); 653 654 if (ret == 0) { 655 return 0; 656 } else if (errno != EBUSY) { 657 return -errno; 658 } 659 660 stop.idle = 0; 661 662 if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_STOP, 663 &stop, sizeof(drm_radeon_cp_stop_t))) { 664 return -errno; 665 } else { 666 return 0; 667 } 668} 669 670#define RADEON_IB_RESERVE (16 * sizeof(uint32_t)) 671 672/* Get an indirect buffer for the CP 2D acceleration commands */ 673drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) 674{ 675 RADEONInfoPtr info = RADEONPTR(pScrn); 676 drmDMAReq dma; 677 drmBufPtr buf = NULL; 678 int indx = 0; 679 int size = 0; 680 int i = 0; 681 int ret; 682 683#if 0 684 /* FIXME: pScrn->pScreen has not been initialized when this is first 685 * called from RADEONSelectBuffer via RADEONDRICPInit. We could use 686 * the screen index from pScrn, which is initialized, and then get 687 * the screen from screenInfo.screens[index], but that is a hack. 688 */ 689 dma.context = DRIGetContext(pScrn->pScreen); 690#else 691 /* This is the X server's context */ 692 dma.context = 0x00000001; 693#endif 694 695 dma.send_count = 0; 696 dma.send_list = NULL; 697 dma.send_sizes = NULL; 698 dma.flags = 0; 699 dma.request_count = 1; 700 dma.request_size = RADEON_BUFFER_SIZE; 701 dma.request_list = &indx; 702 dma.request_sizes = &size; 703 dma.granted_count = 0; 704 705 while (1) { 706 do { 707 ret = drmDMA(info->dri->drmFD, &dma); 708 if (ret && ret != -EBUSY) { 709 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 710 "%s: CP GetBuffer %d\n", __FUNCTION__, ret); 711 } 712 } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT)); 713 714 if (ret == 0) { 715 buf = &info->dri->buffers->list[indx]; 716 buf->used = 0; 717 if (RADEON_VERBOSE) { 718 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 719 " GetBuffer returning %d %p\n", 720 buf->idx, buf->address); 721 } 722 return buf; 723 } 724 725 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 726 "GetBuffer timed out, resetting engine...\n"); 727 728 if (info->ChipFamily < CHIP_FAMILY_R600) { 729 RADEONEngineReset(pScrn); 730 RADEONEngineRestore(pScrn); 731 } else 732 R600EngineReset(pScrn); 733 734 /* Always restart the engine when doing CP 2D acceleration */ 735 RADEONCP_RESET(pScrn, info); 736 RADEONCP_START(pScrn, info); 737 } 738} 739 740/* Flush the indirect buffer to the kernel for submission to the card */ 741void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) 742{ 743 RADEONInfoPtr info = RADEONPTR(pScrn); 744 drmBufPtr buffer = info->cp->indirectBuffer; 745 int start = info->cp->indirectStart; 746 drm_radeon_indirect_t indirect; 747 748 assert(!info->cs); 749 if (!buffer) return; 750 if (start == buffer->used && !discard) return; 751 752 if (RADEON_VERBOSE) { 753 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 754 buffer->idx); 755 } 756 757 if (info->ChipFamily >= CHIP_FAMILY_R600) { 758 if (buffer->used & 0x3c) { 759 RING_LOCALS; 760 761 while (buffer->used & 0x3c) { 762 BEGIN_RING(1); 763 OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ 764 ADVANCE_RING(); 765 } 766 } 767 } 768 769 indirect.idx = buffer->idx; 770 indirect.start = start; 771 indirect.end = buffer->used; 772 indirect.discard = discard; 773 774 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 775 &indirect, sizeof(drm_radeon_indirect_t)); 776 777 if (discard) { 778 info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn); 779 info->cp->indirectStart = 0; 780 } else { 781 /* Start on a double word boundary */ 782 info->cp->indirectStart = buffer->used = RADEON_ALIGN(buffer->used, 8); 783 if (RADEON_VERBOSE) { 784 xf86DrvMsg(pScrn->scrnIndex, X_INFO, " Starting at %d\n", 785 info->cp->indirectStart); 786 } 787 } 788} 789 790/* Flush and release the indirect buffer */ 791void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) 792{ 793 RADEONInfoPtr info = RADEONPTR(pScrn); 794 drmBufPtr buffer = info->cp->indirectBuffer; 795 int start = info->cp->indirectStart; 796 drm_radeon_indirect_t indirect; 797 798 assert(!info->cs); 799 if (info->ChipFamily >= CHIP_FAMILY_R600) { 800 if (buffer && (buffer->used & 0x3c)) { 801 RING_LOCALS; 802 803 while (buffer->used & 0x3c) { 804 BEGIN_RING(1); 805 OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ 806 ADVANCE_RING(); 807 } 808 } 809 } 810 811 info->cp->indirectBuffer = NULL; 812 info->cp->indirectStart = 0; 813 814 if (!buffer) return; 815 816 if (RADEON_VERBOSE) { 817 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n", 818 buffer->idx); 819 } 820 821 indirect.idx = buffer->idx; 822 indirect.start = start; 823 indirect.end = buffer->used; 824 indirect.discard = 1; 825 826 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 827 &indirect, sizeof(drm_radeon_indirect_t)); 828} 829 830/** \brief Calculate HostDataBlit parameters from pointer and pitch 831 * 832 * This is a helper for the trivial HostDataBlit users that don't need to worry 833 * about tiling etc. 834 */ 835void 836RADEONHostDataParams(ScrnInfoPtr pScrn, uint8_t *dst, uint32_t pitch, int cpp, 837 uint32_t *dstPitchOff, int *x, int *y) 838{ 839 RADEONInfoPtr info = RADEONPTR( pScrn ); 840 uint32_t dstOffs = dst - (uint8_t*)info->FB + info->fbLocation; 841 842 *dstPitchOff = pitch << 16 | (dstOffs & ~RADEON_BUFFER_ALIGN) >> 10; 843 *y = ( dstOffs & RADEON_BUFFER_ALIGN ) / pitch; 844 *x = ( ( dstOffs & RADEON_BUFFER_ALIGN ) - ( *y * pitch ) ) / cpp; 845} 846 847/* Set up a hostdata blit to transfer data from system memory to the 848 * framebuffer. Returns the address where the data can be written to and sets 849 * the dstPitch and hpass variables as required. 850 */ 851uint8_t* 852RADEONHostDataBlit( 853 ScrnInfoPtr pScrn, 854 unsigned int cpp, 855 unsigned int w, 856 uint32_t dstPitchOff, 857 uint32_t *bufPitch, 858 int x, 859 int *y, 860 unsigned int *h, 861 unsigned int *hpass 862){ 863 RADEONInfoPtr info = RADEONPTR( pScrn ); 864 uint32_t format, dwords; 865 uint8_t *ret; 866 RING_LOCALS; 867 868 if ( *h == 0 ) 869 { 870 return NULL; 871 } 872 873 switch ( cpp ) 874 { 875 case 4: 876 format = RADEON_GMC_DST_32BPP; 877 *bufPitch = 4 * w; 878 break; 879 case 2: 880 format = RADEON_GMC_DST_16BPP; 881 *bufPitch = 2 * RADEON_ALIGN(w, 2); 882 break; 883 case 1: 884 format = RADEON_GMC_DST_8BPP_CI; 885 *bufPitch = RADEON_ALIGN(w, 4); 886 break; 887 default: 888 xf86DrvMsg( pScrn->scrnIndex, X_ERROR, 889 "%s: Unsupported cpp %d!\n", __func__, cpp ); 890 return NULL; 891 } 892 893#if X_BYTE_ORDER == X_BIG_ENDIAN 894 /* Swap doesn't work on R300 and later, it's handled during the 895 * copy to ind. buffer pass 896 */ 897 if (info->ChipFamily < CHIP_FAMILY_R300) { 898 BEGIN_RING(2); 899 if (cpp == 2) 900 OUT_RING_REG(RADEON_RBBM_GUICNTL, 901 RADEON_HOST_DATA_SWAP_HDW); 902 else if (cpp == 1) 903 OUT_RING_REG(RADEON_RBBM_GUICNTL, 904 RADEON_HOST_DATA_SWAP_32BIT); 905 else 906 OUT_RING_REG(RADEON_RBBM_GUICNTL, 907 RADEON_HOST_DATA_SWAP_NONE); 908 ADVANCE_RING(); 909 } 910#endif 911 912 /*RADEON_PURGE_CACHE(); 913 RADEON_WAIT_UNTIL_IDLE();*/ 914 915 *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 10 * 4 ) / *bufPitch ) ); 916 dwords = *hpass * *bufPitch / 4; 917 918 BEGIN_RING( dwords + 10 ); 919 OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 10 - 2 ) ); 920 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL 921 | RADEON_GMC_DST_CLIPPING 922 | RADEON_GMC_BRUSH_NONE 923 | format 924 | RADEON_GMC_SRC_DATATYPE_COLOR 925 | RADEON_ROP3_S 926 | RADEON_DP_SRC_SOURCE_HOST_DATA 927 | RADEON_GMC_CLR_CMP_CNTL_DIS 928 | RADEON_GMC_WR_MSK_DIS ); 929 OUT_RING( dstPitchOff ); 930 OUT_RING( (*y << 16) | x ); 931 OUT_RING( ((*y + *hpass) << 16) | (x + w) ); 932 OUT_RING( 0xffffffff ); 933 OUT_RING( 0xffffffff ); 934 OUT_RING( *y << 16 | x ); 935 OUT_RING( *hpass << 16 | (*bufPitch / cpp) ); 936 OUT_RING( dwords ); 937 938 ret = ( uint8_t* )&__head[__count]; 939 940 __count += dwords; 941 ADVANCE_RING(); 942 943 *y += *hpass; 944 *h -= *hpass; 945 946 return ret; 947} 948 949void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap) 950{ 951 switch(swap) { 952 case RADEON_HOST_DATA_SWAP_HDW: 953 { 954 unsigned int *d = (unsigned int *)dst; 955 unsigned int *s = (unsigned int *)src; 956 unsigned int nwords = size >> 2; 957 958 for (; nwords > 0; --nwords, ++d, ++s) 959 *d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff); 960 return; 961 } 962 case RADEON_HOST_DATA_SWAP_32BIT: 963 if (((uintptr_t)dst & 1) || ((uintptr_t)src & 1)) { 964 uint8_t *d = (uint8_t *)dst; 965 uint8_t *s = (uint8_t *)src; 966 unsigned int nwords = size >> 2; 967 968 for (; nwords > 0; --nwords, d+=4, s+=4) { 969 d[0] = s[3]; 970 d[1] = s[2]; 971 d[2] = s[1]; 972 d[3] = s[0]; 973 } 974 return; 975 } else if (((uintptr_t)dst & 3) || ((uintptr_t)src & 3)) { 976 /* copy 16bit wise */ 977 uint16_t *d = (uint16_t *)dst; 978 uint16_t *s = (uint16_t *)src; 979 unsigned int nwords = size >> 2; 980 981 for (; nwords > 0; --nwords, d+=2, s+=2) { 982 d[0] = ((s[1] >> 8) & 0xff) | ((s[1] & 0xff) << 8); 983 d[1] = ((s[0] >> 8) & 0xff) | ((s[0] & 0xff) << 8); 984 } 985 return; 986 } else { 987 unsigned int *d = (unsigned int *)dst; 988 unsigned int *s = (unsigned int *)src; 989 unsigned int nwords = size >> 2; 990 991 for (; nwords > 0; --nwords, ++d, ++s) 992#ifdef __powerpc__ 993 asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d)); 994#else 995 *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00) 996 | ((*s & 0xff00) << 8) | ((*s & 0xff) << 24); 997#endif 998 return; 999 } 1000 case RADEON_HOST_DATA_SWAP_16BIT: 1001 { 1002 unsigned short *d = (unsigned short *)dst; 1003 unsigned short *s = (unsigned short *)src; 1004 unsigned int nwords = size >> 1; 1005 1006 for (; nwords > 0; --nwords, ++d, ++s) 1007#ifdef __powerpc__ 1008 asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d)); 1009#else 1010 *d = (*s >> 8) | (*s << 8); 1011#endif 1012 return; 1013 } 1014 } 1015 if (src != dst) 1016 memcpy(dst, src, size); 1017} 1018 1019/* Copies a single pass worth of data for a hostdata blit set up by 1020 * RADEONHostDataBlit(). 1021 */ 1022void 1023RADEONHostDataBlitCopyPass( 1024 ScrnInfoPtr pScrn, 1025 unsigned int cpp, 1026 uint8_t *dst, 1027 uint8_t *src, 1028 unsigned int hpass, 1029 unsigned int dstPitch, 1030 unsigned int srcPitch 1031){ 1032 1033#if X_BYTE_ORDER == X_BIG_ENDIAN 1034 RADEONInfoPtr info = RADEONPTR( pScrn ); 1035#endif 1036 1037 /* RADEONHostDataBlitCopy can return NULL ! */ 1038 if( (dst==NULL) || (src==NULL)) return; 1039 1040 if ( dstPitch == srcPitch ) 1041 { 1042#if X_BYTE_ORDER == X_BIG_ENDIAN 1043 if (info->ChipFamily >= CHIP_FAMILY_R300) { 1044 switch(cpp) { 1045 case 1: 1046 RADEONCopySwap(dst, src, hpass * dstPitch, 1047 RADEON_HOST_DATA_SWAP_32BIT); 1048 return; 1049 case 2: 1050 RADEONCopySwap(dst, src, hpass * dstPitch, 1051 RADEON_HOST_DATA_SWAP_HDW); 1052 return; 1053 } 1054 } 1055#endif 1056 memcpy( dst, src, hpass * dstPitch ); 1057 } 1058 else 1059 { 1060 unsigned int minPitch = min( dstPitch, srcPitch ); 1061 while ( hpass-- ) 1062 { 1063#if X_BYTE_ORDER == X_BIG_ENDIAN 1064 if (info->ChipFamily >= CHIP_FAMILY_R300) { 1065 switch(cpp) { 1066 case 1: 1067 RADEONCopySwap(dst, src, minPitch, 1068 RADEON_HOST_DATA_SWAP_32BIT); 1069 goto next; 1070 case 2: 1071 RADEONCopySwap(dst, src, minPitch, 1072 RADEON_HOST_DATA_SWAP_HDW); 1073 goto next; 1074 } 1075 } 1076#endif 1077 memcpy( dst, src, minPitch ); 1078#if X_BYTE_ORDER == X_BIG_ENDIAN 1079 next: 1080#endif 1081 src += srcPitch; 1082 dst += dstPitch; 1083 } 1084 } 1085} 1086 1087#endif 1088 1089Bool RADEONAccelInit(ScreenPtr pScreen) 1090{ 1091 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1092 RADEONInfoPtr info = RADEONPTR(pScrn); 1093 1094#ifdef USE_EXA 1095 if (info->useEXA) { 1096# ifdef XF86DRI 1097 if (info->directRenderingEnabled) { 1098#ifdef XF86DRM_MODE 1099 if (info->ChipFamily >= CHIP_FAMILY_CEDAR) { 1100 if (!EVERGREENDrawInit(pScreen)) 1101 return FALSE; 1102 } else 1103#endif 1104 if (info->ChipFamily >= CHIP_FAMILY_R600) { 1105 if (!R600DrawInit(pScreen)) 1106 return FALSE; 1107 } else { 1108 if (!RADEONDrawInitCP(pScreen)) 1109 return FALSE; 1110 } 1111 } else 1112# endif /* XF86DRI */ 1113 { 1114 if (info->ChipFamily >= CHIP_FAMILY_R600) 1115 return FALSE; 1116 else { 1117 if (!RADEONDrawInitMMIO(pScreen)) 1118 return FALSE; 1119 } 1120 } 1121 } 1122#endif /* USE_EXA */ 1123#ifdef USE_XAA 1124 if (!info->useEXA) { 1125 XAAInfoRecPtr a; 1126 1127 if (info->ChipFamily >= CHIP_FAMILY_R600) 1128 return FALSE; 1129 1130 if (!(a = info->accel_state->accel = XAACreateInfoRec())) { 1131 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); 1132 return FALSE; 1133 } 1134 1135#ifdef XF86DRI 1136 if (info->directRenderingEnabled) 1137 RADEONAccelInitCP(pScreen, a); 1138 else 1139#endif /* XF86DRI */ 1140 RADEONAccelInitMMIO(pScreen, a); 1141 1142 RADEONEngineInit(pScrn); 1143 1144 if (!XAAInit(pScreen, a)) { 1145 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n"); 1146 return FALSE; 1147 } 1148 } 1149#endif /* USE_XAA */ 1150 return TRUE; 1151} 1152 1153void RADEONInit3DEngine(ScrnInfoPtr pScrn) 1154{ 1155 RADEONInfoPtr info = RADEONPTR (pScrn); 1156 1157#ifdef XF86DRI 1158 if (info->directRenderingEnabled) { 1159 drm_radeon_sarea_t *pSAREAPriv; 1160 1161 if (!info->kms_enabled) { 1162 pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); 1163 pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen); 1164 } 1165 RADEONInit3DEngineCP(pScrn); 1166 } else 1167#endif 1168 RADEONInit3DEngineMMIO(pScrn); 1169 1170 info->accel_state->XInited3D = TRUE; 1171} 1172 1173#ifdef USE_XAA 1174#ifdef XF86DRI 1175Bool 1176RADEONSetupMemXAA_DRI(ScreenPtr pScreen) 1177{ 1178 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1179 RADEONInfoPtr info = RADEONPTR(pScrn); 1180 int cpp = info->CurrentLayout.pixel_bytes; 1181 int depthCpp = (info->dri->depthBits - 8) / 4; 1182 int width_bytes = pScrn->displayWidth * cpp; 1183 int bufferSize; 1184 int depthSize; 1185 int l; 1186 int scanlines; 1187 int texsizerequest; 1188 BoxRec MemBox; 1189 FBAreaPtr fbarea; 1190 1191 info->dri->frontOffset = 0; 1192 info->dri->frontPitch = pScrn->displayWidth; 1193 info->dri->backPitch = pScrn->displayWidth; 1194 1195 /* make sure we use 16 line alignment for tiling (8 might be enough). 1196 * Might need that for non-XF86DRI too? 1197 */ 1198 if (info->allowColorTiling) { 1199 bufferSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * width_bytes, 1200 RADEON_GPU_PAGE_SIZE); 1201 } else { 1202 bufferSize = RADEON_ALIGN(pScrn->virtualY * width_bytes, 1203 RADEON_GPU_PAGE_SIZE); 1204 } 1205 1206 /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels, 1207 * which is always the case if color tiling is used due to color pitch 1208 * but not necessarily otherwise, and its height a multiple of 16 lines. 1209 */ 1210 info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32); 1211 depthSize = RADEON_ALIGN((RADEON_ALIGN(pScrn->virtualY, 16)) * info->dri->depthPitch 1212 * depthCpp, RADEON_GPU_PAGE_SIZE); 1213 1214 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1215 "Using %d MB GART aperture\n", info->dri->gartSize); 1216 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1217 "Using %d MB for the ring buffer\n", info->dri->ringSize); 1218 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1219 "Using %d MB for vertex/indirect buffers\n", info->dri->bufSize); 1220 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1221 "Using %d MB for GART textures\n", info->dri->gartTexSize); 1222 1223 /* Try for front, back, depth, and three framebuffers worth of 1224 * pixmap cache. Should be enough for a fullscreen background 1225 * image plus some leftovers. 1226 * If the FBTexPercent option was used, try to achieve that percentage instead, 1227 * but still have at least one pixmap buffer (get problems with xvideo/render 1228 * otherwise probably), and never reserve more than 3 offscreen buffers as it's 1229 * probably useless for XAA. 1230 */ 1231 if (info->dri->textureSize >= 0) { 1232 texsizerequest = ((int)info->FbMapSize - 2 * bufferSize - depthSize 1233 - 2 * width_bytes - 16384 - info->FbSecureSize) 1234 /* first divide, then multiply or we'll get an overflow (been there...) */ 1235 / 100 * info->dri->textureSize; 1236 } 1237 else { 1238 texsizerequest = (int)info->FbMapSize / 2; 1239 } 1240 info->dri->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize; 1241 1242 /* If that gives us less than the requested memory, let's 1243 * be greedy and grab some more. Sorry, I care more about 3D 1244 * performance than playing nicely, and you'll get around a full 1245 * framebuffer's worth of pixmap cache anyway. 1246 */ 1247 if (info->dri->textureSize < texsizerequest) { 1248 info->dri->textureSize = info->FbMapSize - 4 * bufferSize - depthSize; 1249 } 1250 if (info->dri->textureSize < texsizerequest) { 1251 info->dri->textureSize = info->FbMapSize - 3 * bufferSize - depthSize; 1252 } 1253 1254 /* If there's still no space for textures, try without pixmap cache, but 1255 * never use the reserved space, the space hw cursor and PCIGART table might 1256 * use. 1257 */ 1258 if (info->dri->textureSize < 0) { 1259 info->dri->textureSize = info->FbMapSize - 2 * bufferSize - depthSize 1260 - 2 * width_bytes - 16384 - info->FbSecureSize; 1261 } 1262 1263 /* Check to see if there is more room available after the 8192nd 1264 * scanline for textures 1265 */ 1266 /* FIXME: what's this good for? condition is pretty much impossible to meet */ 1267 if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize 1268 > info->dri->textureSize) { 1269 info->dri->textureSize = 1270 info->FbMapSize - 8192*width_bytes - bufferSize - depthSize; 1271 } 1272 1273 /* If backbuffer is disabled, don't allocate memory for it */ 1274 if (info->dri->noBackBuffer) { 1275 info->dri->textureSize += bufferSize; 1276 } 1277 1278 /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer! 1279 At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned, 1280 otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee 1281 it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor 1282 area otherwise). 1283 This might cause some space at the end of the video memory to be unused, since it 1284 can't be used (?) due to that log_tex_granularity thing??? 1285 Could use different copyscreentoscreen function for the pageflip copies 1286 (which would use different src and dst offsets) to avoid this. */ 1287 if (info->allowColorTiling && !info->dri->noBackBuffer) { 1288 info->dri->textureSize = info->FbMapSize - ((info->FbMapSize - info->dri->textureSize + 1289 width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16); 1290 } 1291 if (info->dri->textureSize > 0) { 1292 l = RADEONMinBits((info->dri->textureSize-1) / RADEON_NR_TEX_REGIONS); 1293 if (l < RADEON_LOG_TEX_GRANULARITY) 1294 l = RADEON_LOG_TEX_GRANULARITY; 1295 /* Round the texture size up to the nearest whole number of 1296 * texture regions. Again, be greedy about this, don't 1297 * round down. 1298 */ 1299 info->dri->log2TexGran = l; 1300 info->dri->textureSize = (info->dri->textureSize >> l) << l; 1301 } else { 1302 info->dri->textureSize = 0; 1303 } 1304 1305 /* Set a minimum usable local texture heap size. This will fit 1306 * two 256x256x32bpp textures. 1307 */ 1308 if (info->dri->textureSize < 512 * 1024) { 1309 info->dri->textureOffset = 0; 1310 info->dri->textureSize = 0; 1311 } 1312 1313 if (info->allowColorTiling && !info->dri->noBackBuffer) { 1314 info->dri->textureOffset = ((info->FbMapSize - info->dri->textureSize) / 1315 (width_bytes * 16)) * (width_bytes * 16); 1316 } 1317 else { 1318 /* Reserve space for textures */ 1319 info->dri->textureOffset = RADEON_ALIGN(info->FbMapSize - info->dri->textureSize, 1320 RADEON_GPU_PAGE_SIZE); 1321 } 1322 1323 /* Reserve space for the shared depth 1324 * buffer. 1325 */ 1326 info->dri->depthOffset = RADEON_ALIGN(info->dri->textureOffset - depthSize, 1327 RADEON_GPU_PAGE_SIZE); 1328 1329 /* Reserve space for the shared back buffer */ 1330 if (info->dri->noBackBuffer) { 1331 info->dri->backOffset = info->dri->depthOffset; 1332 } else { 1333 info->dri->backOffset = RADEON_ALIGN(info->dri->depthOffset - bufferSize, 1334 RADEON_GPU_PAGE_SIZE); 1335 } 1336 1337 info->dri->backY = info->dri->backOffset / width_bytes; 1338 info->dri->backX = (info->dri->backOffset - (info->dri->backY * width_bytes)) / cpp; 1339 1340 scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes; 1341 if (scanlines > 8191) 1342 scanlines = 8191; 1343 1344 MemBox.x1 = 0; 1345 MemBox.y1 = 0; 1346 MemBox.x2 = pScrn->displayWidth; 1347 MemBox.y2 = scanlines; 1348 1349 if (!xf86InitFBManager(pScreen, &MemBox)) { 1350 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 1351 "Memory manager initialization to " 1352 "(%d,%d) (%d,%d) failed\n", 1353 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1354 return FALSE; 1355 } else { 1356 int width, height; 1357 1358 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1359 "Memory manager initialized to (%d,%d) (%d,%d)\n", 1360 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1361 /* why oh why can't we just request modes which are guaranteed to be 16 lines 1362 aligned... sigh */ 1363 if ((fbarea = xf86AllocateOffscreenArea(pScreen, 1364 pScrn->displayWidth, 1365 info->allowColorTiling ? 1366 (RADEON_ALIGN(pScrn->virtualY, 16)) 1367 - pScrn->virtualY + 2 : 2, 1368 0, NULL, NULL, 1369 NULL))) { 1370 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1371 "Reserved area from (%d,%d) to (%d,%d)\n", 1372 fbarea->box.x1, fbarea->box.y1, 1373 fbarea->box.x2, fbarea->box.y2); 1374 } else { 1375 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n"); 1376 } 1377 1378 RADEONDRIAllocatePCIGARTTable(pScreen); 1379 1380 if (xf86QueryLargestOffscreenArea(pScreen, &width, 1381 &height, 0, 0, 0)) { 1382 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1383 "Largest offscreen area available: %d x %d\n", 1384 width, height); 1385 1386 /* Lines in offscreen area needed for depth buffer and 1387 * textures 1388 */ 1389 info->dri->depthTexLines = (scanlines 1390 - info->dri->depthOffset / width_bytes); 1391 info->dri->backLines = (scanlines 1392 - info->dri->backOffset / width_bytes 1393 - info->dri->depthTexLines); 1394 info->dri->backArea = NULL; 1395 } else { 1396 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 1397 "Unable to determine largest offscreen area " 1398 "available\n"); 1399 return FALSE; 1400 } 1401 } 1402 1403 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1404 "Will use front buffer at offset 0x%x\n", 1405 info->dri->frontOffset); 1406 1407 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1408 "Will use back buffer at offset 0x%x\n", 1409 info->dri->backOffset); 1410 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1411 "Will use depth buffer at offset 0x%x\n", 1412 info->dri->depthOffset); 1413 if (info->cardType==CARD_PCIE) 1414 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1415 "Will use %d kb for PCI GART table at offset 0x%x\n", 1416 info->dri->pciGartSize/1024, (unsigned)info->dri->pciGartOffset); 1417 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1418 "Will use %d kb for textures at offset 0x%x\n", 1419 info->dri->textureSize/1024, info->dri->textureOffset); 1420 1421 info->dri->frontPitchOffset = (((info->dri->frontPitch * cpp / 64) << 22) | 1422 ((info->dri->frontOffset + info->fbLocation) >> 10)); 1423 1424 info->dri->backPitchOffset = (((info->dri->backPitch * cpp / 64) << 22) | 1425 ((info->dri->backOffset + info->fbLocation) >> 10)); 1426 1427 info->dri->depthPitchOffset = (((info->dri->depthPitch * depthCpp / 64) << 22) | 1428 ((info->dri->depthOffset + info->fbLocation) >> 10)); 1429 return TRUE; 1430} 1431#endif /* XF86DRI */ 1432 1433Bool 1434RADEONSetupMemXAA(ScreenPtr pScreen) 1435{ 1436 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1437 RADEONInfoPtr info = RADEONPTR(pScrn); 1438 BoxRec MemBox; 1439 int y2; 1440 1441 int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; 1442 1443 MemBox.x1 = 0; 1444 MemBox.y1 = 0; 1445 MemBox.x2 = pScrn->displayWidth; 1446 y2 = info->FbMapSize / width_bytes; 1447 if (y2 >= 32768) 1448 y2 = 32767; /* because MemBox.y2 is signed short */ 1449 MemBox.y2 = y2; 1450 1451 /* The acceleration engine uses 14 bit 1452 * signed coordinates, so we can't have any 1453 * drawable caches beyond this region. 1454 */ 1455 if (MemBox.y2 > 8191) 1456 MemBox.y2 = 8191; 1457 1458 if (!xf86InitFBManager(pScreen, &MemBox)) { 1459 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, 1460 "Memory manager initialization to " 1461 "(%d,%d) (%d,%d) failed\n", 1462 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1463 return FALSE; 1464 } else { 1465 int width, height; 1466 FBAreaPtr fbarea; 1467 1468 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1469 "Memory manager initialized to (%d,%d) (%d,%d)\n", 1470 MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); 1471 if ((fbarea = xf86AllocateOffscreenArea(pScreen, 1472 pScrn->displayWidth, 1473 info->allowColorTiling ? 1474 (RADEON_ALIGN(pScrn->virtualY, 16)) 1475 - pScrn->virtualY + 2 : 2, 1476 0, NULL, NULL, 1477 NULL))) { 1478 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1479 "Reserved area from (%d,%d) to (%d,%d)\n", 1480 fbarea->box.x1, fbarea->box.y1, 1481 fbarea->box.x2, fbarea->box.y2); 1482 } else { 1483 xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n"); 1484 } 1485 if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 1486 0, 0, 0)) { 1487 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 1488 "Largest offscreen area available: %d x %d\n", 1489 width, height); 1490 } 1491 return TRUE; 1492 } 1493} 1494#endif /* USE_XAA */ 1495