radeon_exa_funcs.c revision 0f6d6f99
1/* 2 * Copyright 2005 Eric Anholt 3 * Copyright 2005 Benjamin Herrenschmidt 4 * Copyright 2006 Tungsten Graphics, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 * 26 * Authors: 27 * Eric Anholt <anholt@FreeBSD.org> 28 * Zack Rusin <zrusin@trolltech.com> 29 * Benjamin Herrenschmidt <benh@kernel.crashing.org> 30 * Michel Dänzer <michel@tungstengraphics.com> 31 * 32 */ 33 34#if defined(ACCEL_MMIO) && defined(ACCEL_CP) 35#error Cannot define both MMIO and CP acceleration! 36#endif 37 38#if !defined(UNIXCPP) || defined(ANSICPP) 39#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix 40#else 41#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix 42#endif 43 44#ifdef ACCEL_MMIO 45#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) 46#else 47#ifdef ACCEL_CP 48#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) 49#else 50#error No accel type defined! 51#endif 52#endif 53 54#include <errno.h> 55#include <string.h> 56 57#include "radeon.h" 58 59#include "exa.h" 60 61static int 62FUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen) 63{ 64 RINFO_FROM_SCREEN(pScreen); 65 66 TRACE; 67 68 return ++info->accel_state->exaSyncMarker; 69} 70 71static void 72FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) 73{ 74 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 75 RADEONInfoPtr info = RADEONPTR(pScrn); 76 77 if (info->cs) 78 return; 79 80 TRACE; 81 82 if (info->accel_state->exaMarkerSynced != marker) { 83 FUNC_NAME(RADEONWaitForIdle)(pScrn); 84 info->accel_state->exaMarkerSynced = marker; 85 } 86 87 RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; 88} 89 90static void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op) 91{ 92 RADEONInfoPtr info = RADEONPTR(pScrn); 93 int has_src; 94 ACCEL_PREAMBLE(); 95 96 /* don't emit if no operation in progress */ 97 if (info->state_2d.op == 0 && op == 0) 98 return; 99 100 has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo); 101 102 if (has_src) { 103 BEGIN_ACCEL_RELOC(10, 2); 104 } else { 105 BEGIN_ACCEL_RELOC(9, 1); 106 } 107 OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right); 108 OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl); 109 OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr); 110 OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr); 111 OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR, info->state_2d.dp_src_frgd_clr); 112 OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR, info->state_2d.dp_src_bkgd_clr); 113 OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask); 114 OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl); 115 116 OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset); 117 if (info->cs) 118 OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); 119 120 if (has_src) { 121 OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset); 122 if (info->cs) 123 OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); 124 125 } 126 FINISH_ACCEL(); 127 128 if (op) 129 info->state_2d.op = op; 130 if (info->cs) 131 info->reemit_current2d = FUNC_NAME(Emit2DState); 132} 133 134static void 135FUNC_NAME(RADEONDone2D)(PixmapPtr pPix) 136{ 137 RINFO_FROM_SCREEN(pPix->drawable.pScreen); 138 ACCEL_PREAMBLE(); 139 140 TRACE; 141 142 info->state_2d.op = 0; 143 BEGIN_ACCEL(2); 144 OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); 145 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 146 RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); 147 FINISH_ACCEL(); 148} 149 150static Bool 151FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 152{ 153 RINFO_FROM_SCREEN(pPix->drawable.pScreen); 154 uint32_t datatype, dst_pitch_offset; 155 156 TRACE; 157 158 if (pPix->drawable.bitsPerPixel == 24) 159 RADEON_FALLBACK(("24bpp unsupported\n")); 160 if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype)) 161 RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); 162 if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset)) 163 RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n")); 164 165 RADEON_SWITCH_TO_2D(); 166 167#ifdef XF86DRM_MODE 168 if (info->cs) { 169 struct radeon_exa_pixmap_priv *driver_priv; 170 int ret; 171 172 radeon_cs_space_reset_bos(info->cs); 173 174 driver_priv = exaGetPixmapDriverPrivate(pPix); 175 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 176 177 ret = radeon_cs_space_check(info->cs); 178 if (ret) 179 RADEON_FALLBACK(("Not enough RAM to hw accel solid operation\n")); 180 181 driver_priv = exaGetPixmapDriverPrivate(pPix); 182 if (driver_priv) 183 info->state_2d.dst_bo = driver_priv->bo; 184 } 185#endif 186 187 info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX | 188 RADEON_DEFAULT_SC_BOTTOM_MAX); 189 info->state_2d.dp_brush_bkgd_clr = 0x00000000; 190 info->state_2d.dp_src_frgd_clr = 0xffffffff; 191 info->state_2d.dp_src_bkgd_clr = 0x00000000; 192 info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | 193 RADEON_GMC_BRUSH_SOLID_COLOR | 194 (datatype << 8) | 195 RADEON_GMC_SRC_DATATYPE_COLOR | 196 RADEON_ROP[alu].pattern | 197 RADEON_GMC_CLR_CMP_CNTL_DIS); 198 info->state_2d.dp_brush_frgd_clr = fg; 199 info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM); 200 info->state_2d.dp_write_mask = pm; 201 info->state_2d.dst_pitch_offset = dst_pitch_offset; 202 info->state_2d.src_pitch_offset = 0; 203 info->state_2d.src_bo = NULL; 204 205 info->accel_state->dst_pix = pPix; 206 207 FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID); 208 209 return TRUE; 210} 211 212 213static void 214FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) 215{ 216 RINFO_FROM_SCREEN(pPix->drawable.pScreen); 217 ACCEL_PREAMBLE(); 218 219 TRACE; 220 221#if defined(ACCEL_CP) && defined(XF86DRM_MODE) 222 if (info->cs && CS_FULL(info->cs)) { 223 FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix); 224 radeon_cs_flush_indirect(pScrn); 225 } 226#endif 227 228 if (info->accel_state->vsync) 229 FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, 230 radeon_pick_best_crtc(pScrn, x1, x2, y1, y2), 231 y1, y2); 232 233 BEGIN_ACCEL(2); 234 OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1); 235 OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1)); 236 FINISH_ACCEL(); 237} 238 239void 240FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset, 241 uint32_t dst_pitch_offset, uint32_t datatype, int rop, 242 Pixel planemask) 243{ 244 RADEONInfoPtr info = RADEONPTR(pScrn); 245 246 /* setup 2D state */ 247 info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | 248 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 249 RADEON_GMC_BRUSH_NONE | 250 (datatype << 8) | 251 RADEON_GMC_SRC_DATATYPE_COLOR | 252 RADEON_ROP[rop].rop | 253 RADEON_DP_SRC_SOURCE_MEMORY | 254 RADEON_GMC_CLR_CMP_CNTL_DIS); 255 info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | 256 (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); 257 info->state_2d.dp_brush_frgd_clr = 0xffffffff; 258 info->state_2d.dp_brush_bkgd_clr = 0x00000000; 259 info->state_2d.dp_src_frgd_clr = 0xffffffff; 260 info->state_2d.dp_src_bkgd_clr = 0x00000000; 261 info->state_2d.dp_write_mask = planemask; 262 info->state_2d.dst_pitch_offset = dst_pitch_offset; 263 info->state_2d.src_pitch_offset = src_pitch_offset; 264 info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX 265 | RADEON_DEFAULT_SC_BOTTOM_MAX); 266 267 FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY); 268} 269 270static Bool 271FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, 272 int xdir, int ydir, 273 int rop, 274 Pixel planemask) 275{ 276 RINFO_FROM_SCREEN(pDst->drawable.pScreen); 277 uint32_t datatype, src_pitch_offset, dst_pitch_offset; 278 TRACE; 279 280 if (pDst->drawable.bitsPerPixel == 24) 281 RADEON_FALLBACK(("24bpp unsupported")); 282 if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype)) 283 RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); 284 if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset)) 285 RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n")); 286 if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset)) 287 RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n")); 288 289 RADEON_SWITCH_TO_2D(); 290 291#ifdef XF86DRM_MODE 292 if (info->cs) { 293 struct radeon_exa_pixmap_priv *driver_priv; 294 int ret; 295 296 radeon_cs_space_reset_bos(info->cs); 297 298 driver_priv = exaGetPixmapDriverPrivate(pSrc); 299 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 300 info->state_2d.src_bo = driver_priv->bo; 301 302 driver_priv = exaGetPixmapDriverPrivate(pDst); 303 radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); 304 info->state_2d.dst_bo = driver_priv->bo; 305 306 ret = radeon_cs_space_check(info->cs); 307 if (ret) 308 RADEON_FALLBACK(("Not enough RAM to hw accel copy operation\n")); 309 } 310#endif 311 312 info->accel_state->xdir = xdir; 313 info->accel_state->ydir = ydir; 314 info->accel_state->dst_pix = pDst; 315 316 FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset, 317 datatype, rop, planemask); 318 319 return TRUE; 320} 321 322void 323FUNC_NAME(RADEONCopy)(PixmapPtr pDst, 324 int srcX, int srcY, 325 int dstX, int dstY, 326 int w, int h) 327{ 328 RINFO_FROM_SCREEN(pDst->drawable.pScreen); 329 ACCEL_PREAMBLE(); 330 331 TRACE; 332 333#if defined(ACCEL_CP) && defined(XF86DRM_MODE) 334 if (info->cs && CS_FULL(info->cs)) { 335 FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix); 336 radeon_cs_flush_indirect(pScrn); 337 } 338#endif 339 340 if (info->accel_state->xdir < 0) { 341 srcX += w - 1; 342 dstX += w - 1; 343 } 344 if (info->accel_state->ydir < 0) { 345 srcY += h - 1; 346 dstY += h - 1; 347 } 348 349 if (info->accel_state->vsync) 350 FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, 351 radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h), 352 dstY, dstY + h); 353 354 BEGIN_ACCEL(3); 355 356 OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX); 357 OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX); 358 OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); 359 360 FINISH_ACCEL(); 361} 362 363#ifdef ACCEL_CP 364 365static Bool 366RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, 367 char *src, int src_pitch) 368{ 369 RINFO_FROM_SCREEN(pDst->drawable.pScreen); 370 unsigned int bpp = pDst->drawable.bitsPerPixel; 371 unsigned int hpass; 372 uint32_t buf_pitch, dst_pitch_off; 373 374 TRACE; 375 376 if (bpp < 8) 377 return FALSE; 378 379 if (info->directRenderingEnabled && 380 RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) { 381 uint8_t *buf; 382 int cpp = bpp / 8; 383 ACCEL_PREAMBLE(); 384 385 RADEON_SWITCH_TO_2D(); 386 387 if (info->accel_state->vsync) 388 FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, 389 radeon_pick_best_crtc(pScrn, x, x + w, y, y + h), 390 y, y + h); 391 392 while ((buf = RADEONHostDataBlit(pScrn, 393 cpp, w, dst_pitch_off, &buf_pitch, 394 x, &y, (unsigned int*)&h, &hpass)) != 0) { 395 RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src, 396 hpass, buf_pitch, src_pitch); 397 src += hpass * src_pitch; 398 } 399 400 exaMarkSync(pDst->drawable.pScreen); 401 return TRUE; 402 } 403 404 return FALSE; 405} 406 407/* Emit blit with arbitrary source and destination offsets and pitches */ 408static void 409RADEONBlitChunk(ScrnInfoPtr pScrn, struct radeon_bo *src_bo, 410 struct radeon_bo *dst_bo, uint32_t datatype, 411 uint32_t src_pitch_offset, uint32_t dst_pitch_offset, 412 int srcX, int srcY, int dstX, int dstY, int w, int h, 413 uint32_t src_domain, uint32_t dst_domain) 414{ 415 RADEONInfoPtr info = RADEONPTR(pScrn); 416 ACCEL_PREAMBLE(); 417 418 if (src_bo && dst_bo) { 419 BEGIN_ACCEL_RELOC(6, 2); 420 } else if (src_bo && dst_bo == NULL) { 421 BEGIN_ACCEL_RELOC(6, 1); 422 } else { 423 BEGIN_ACCEL(6); 424 } 425 OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, 426 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 427 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 428 RADEON_GMC_BRUSH_NONE | 429 (datatype << 8) | 430 RADEON_GMC_SRC_DATATYPE_COLOR | 431 RADEON_ROP3_S | 432 RADEON_DP_SRC_SOURCE_MEMORY | 433 RADEON_GMC_CLR_CMP_CNTL_DIS | 434 RADEON_GMC_WR_MSK_DIS); 435 OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset); 436 if (src_bo) { 437 OUT_RELOC(src_bo, src_domain, 0); 438 } 439 OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); 440 if (dst_bo) { 441 OUT_RELOC(dst_bo, 0, dst_domain); 442 } 443 OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX); 444 OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX); 445 OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); 446 FINISH_ACCEL(); 447 BEGIN_ACCEL(2); 448 OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); 449 OUT_ACCEL_REG(RADEON_WAIT_UNTIL, 450 RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); 451 FINISH_ACCEL(); 452} 453 454#if defined(XF86DRM_MODE) 455static Bool 456RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, 457 char *src, int src_pitch) 458{ 459 ScreenPtr pScreen = pDst->drawable.pScreen; 460 RINFO_FROM_SCREEN(pScreen); 461 struct radeon_exa_pixmap_priv *driver_priv; 462 struct radeon_bo *scratch = NULL; 463 struct radeon_bo *copy_dst; 464 unsigned char *dst; 465 unsigned size; 466 uint32_t datatype = 0; 467 uint32_t dst_domain; 468 uint32_t dst_pitch_offset; 469 unsigned bpp = pDst->drawable.bitsPerPixel; 470 uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64); 471 uint32_t copy_pitch; 472 uint32_t swap = RADEON_HOST_DATA_SWAP_NONE; 473 int ret; 474 Bool flush = TRUE; 475 Bool r; 476 int i; 477 478 if (bpp < 8) 479 return FALSE; 480 481 driver_priv = exaGetPixmapDriverPrivate(pDst); 482 if (!driver_priv || !driver_priv->bo) 483 return FALSE; 484 485#if X_BYTE_ORDER == X_BIG_ENDIAN 486 switch (bpp) { 487 case 32: 488 swap = RADEON_HOST_DATA_SWAP_32BIT; 489 break; 490 case 16: 491 swap = RADEON_HOST_DATA_SWAP_16BIT; 492 break; 493 } 494#endif 495 496 /* If we know the BO won't be busy, don't bother with a scratch */ 497 copy_dst = driver_priv->bo; 498 copy_pitch = pDst->devKind; 499 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 500 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 501 flush = FALSE; 502 if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) 503 goto copy; 504 } 505 } 506 507 size = scratch_pitch * h; 508 scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); 509 if (scratch == NULL) { 510 goto copy; 511 } 512 radeon_cs_space_reset_bos(info->cs); 513 radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM); 514 radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0); 515 ret = radeon_cs_space_check(info->cs); 516 if (ret) { 517 goto copy; 518 } 519 copy_dst = scratch; 520 copy_pitch = scratch_pitch; 521 flush = FALSE; 522 523copy: 524 if (flush) 525 radeon_cs_flush_indirect(pScrn); 526 527 ret = radeon_bo_map(copy_dst, 0); 528 if (ret) { 529 r = FALSE; 530 goto out; 531 } 532 r = TRUE; 533 size = w * bpp / 8; 534 dst = copy_dst->ptr; 535 if (copy_dst == driver_priv->bo) 536 dst += y * copy_pitch + x * bpp / 8; 537 for (i = 0; i < h; i++) { 538 RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap); 539 src += src_pitch; 540 } 541 radeon_bo_unmap(copy_dst); 542 543 if (copy_dst == scratch) { 544 RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype); 545 RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset); 546 ACCEL_PREAMBLE(); 547 RADEON_SWITCH_TO_2D(); 548 RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16, 549 dst_pitch_offset, 0, 0, x, y, w, h, 550 RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM); 551 } 552 553out: 554 if (scratch) 555 radeon_bo_unref(scratch); 556 return r; 557} 558 559static Bool 560RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 561 int h, char *dst, int dst_pitch) 562{ 563 RINFO_FROM_SCREEN(pSrc->drawable.pScreen); 564 struct radeon_exa_pixmap_priv *driver_priv; 565 struct radeon_bo *scratch = NULL; 566 struct radeon_bo *copy_src; 567 unsigned size; 568 uint32_t datatype = 0; 569 uint32_t src_domain = 0; 570 uint32_t src_pitch_offset; 571 unsigned bpp = pSrc->drawable.bitsPerPixel; 572 uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64); 573 uint32_t copy_pitch; 574 uint32_t swap = RADEON_HOST_DATA_SWAP_NONE; 575 int ret; 576 Bool flush = FALSE; 577 Bool r; 578 579 if (bpp < 8) 580 return FALSE; 581 582 driver_priv = exaGetPixmapDriverPrivate(pSrc); 583 if (!driver_priv || !driver_priv->bo) 584 return FALSE; 585 586#if X_BYTE_ORDER == X_BIG_ENDIAN 587 switch (bpp) { 588 case 32: 589 swap = RADEON_HOST_DATA_SWAP_32BIT; 590 break; 591 case 16: 592 swap = RADEON_HOST_DATA_SWAP_16BIT; 593 break; 594 } 595#endif 596 597 /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */ 598 copy_src = driver_priv->bo; 599 copy_pitch = pSrc->devKind; 600 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 601 if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 602 src_domain = radeon_bo_get_src_domain(driver_priv->bo); 603 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 604 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) 605 src_domain = 0; 606 else /* A write may be scheduled */ 607 flush = TRUE; 608 } 609 610 if (!src_domain) 611 radeon_bo_is_busy(driver_priv->bo, &src_domain); 612 613 if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) 614 goto copy; 615 } 616 size = scratch_pitch * h; 617 scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); 618 if (scratch == NULL) { 619 goto copy; 620 } 621 radeon_cs_space_reset_bos(info->cs); 622 radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); 623 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT); 624 ret = radeon_cs_space_check(info->cs); 625 if (ret) { 626 goto copy; 627 } 628 RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype); 629 RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset); 630 ACCEL_PREAMBLE(); 631 RADEON_SWITCH_TO_2D(); 632 RADEONBlitChunk(pScrn, driver_priv->bo, scratch, datatype, src_pitch_offset, 633 scratch_pitch << 16, x, y, 0, 0, w, h, 634 RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 635 RADEON_GEM_DOMAIN_GTT); 636 copy_src = scratch; 637 copy_pitch = scratch_pitch; 638 flush = TRUE; 639 640copy: 641 if (flush) 642 FLUSH_RING(); 643 644 ret = radeon_bo_map(copy_src, 0); 645 if (ret) { 646 ErrorF("failed to map pixmap: %d\n", ret); 647 r = FALSE; 648 goto out; 649 } 650 r = TRUE; 651 w *= bpp / 8; 652 if (copy_src == driver_priv->bo) 653 size = y * copy_pitch + x * bpp / 8; 654 else 655 size = 0; 656 while (h--) { 657 RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap); 658 size += copy_pitch; 659 dst += dst_pitch; 660 } 661 radeon_bo_unmap(copy_src); 662out: 663 if (scratch) 664 radeon_bo_unref(scratch); 665 return r; 666} 667#endif 668 669static Bool 670RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, 671 char *dst, int dst_pitch) 672{ 673 RINFO_FROM_SCREEN(pSrc->drawable.pScreen); 674 uint8_t *src = info->FB + exaGetPixmapOffset(pSrc); 675 int bpp = pSrc->drawable.bitsPerPixel; 676 uint32_t datatype, src_pitch_offset, scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64), scratch_off = 0; 677 drmBufPtr scratch; 678 679 TRACE; 680 681 /* 682 * Try to accelerate download. Use an indirect buffer as scratch space, 683 * blitting the bits to one half while copying them out of the other one and 684 * then swapping the halves. 685 */ 686 if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) && 687 RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) && 688 (scratch = RADEONCPGetBuffer(pScrn))) 689 { 690 int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8; 691 int hpass = min(h, scratch->total/2 / scratch_pitch); 692 uint32_t scratch_pitch_offset = scratch_pitch << 16 693 | (info->gartLocation + info->dri->bufStart 694 + scratch->idx * scratch->total) >> 10; 695 drm_radeon_indirect_t indirect; 696 ACCEL_PREAMBLE(); 697 698 RADEON_SWITCH_TO_2D(); 699 700 /* Kick the first blit as early as possible */ 701 RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset, 702 scratch_pitch_offset, x, y, 0, 0, w, hpass, 0, 0); 703 FLUSH_RING(); 704 705#if X_BYTE_ORDER == X_BIG_ENDIAN 706 switch (bpp) { 707 case 16: 708 swap = RADEON_HOST_DATA_SWAP_16BIT; 709 break; 710 case 32: 711 swap = RADEON_HOST_DATA_SWAP_32BIT; 712 break; 713 } 714#endif 715 716 while (h) { 717 int oldhpass = hpass, i = 0; 718 719 src = (uint8_t*)scratch->address + scratch_off; 720 721 y += oldhpass; 722 h -= oldhpass; 723 hpass = min(h, scratch->total/2 / scratch_pitch); 724 725 /* Prepare next blit if anything's left */ 726 if (hpass) { 727 scratch_off = scratch->total/2 - scratch_off; 728 RADEONBlitChunk(pScrn, NULL, NULL, datatype, src_pitch_offset, 729 scratch_pitch_offset + (scratch_off >> 10), 730 x, y, 0, 0, w, hpass, 0, 0); 731 } 732 733 /* 734 * Wait for previous blit to complete. 735 * 736 * XXX: Doing here essentially the same things this ioctl does in 737 * the DRM results in corruption with 'small' transfers, apparently 738 * because the data doesn't actually land in system RAM before the 739 * memcpy. I suspect the ioctl helps mostly due to its latency; what 740 * we'd really need is a way to reliably wait for the host interface 741 * to be done with pushing the data to the host. 742 */ 743 while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY) 744 && (i++ < RADEON_TIMEOUT)) 745 ; 746 747 /* Kick next blit */ 748 if (hpass) 749 FLUSH_RING(); 750 751 /* Copy out data from previous blit */ 752 if (wpass == scratch_pitch && wpass == dst_pitch) { 753 RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap); 754 dst += dst_pitch * oldhpass; 755 } else while (oldhpass--) { 756 RADEONCopySwap((uint8_t*)dst, src, wpass, swap); 757 src += scratch_pitch; 758 dst += dst_pitch; 759 } 760 } 761 762 indirect.idx = scratch->idx; 763 indirect.start = indirect.end = 0; 764 indirect.discard = 1; 765 766 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 767 &indirect, sizeof(drm_radeon_indirect_t)); 768 769 info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker; 770 771 return TRUE; 772 } 773 774 return FALSE; 775} 776 777#endif /* def ACCEL_CP */ 778 779 780Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) 781{ 782 RINFO_FROM_SCREEN(pScreen); 783 784 if (info->accel_state->exa == NULL) { 785 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 786 return FALSE; 787 } 788 789 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 790 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 791 792 info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid); 793 info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid); 794 info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDone2D); 795 796 info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy); 797 info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy); 798 info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDone2D); 799 800 info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync); 801 info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync); 802#ifdef ACCEL_CP 803 if (!info->kms_enabled) { 804 info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP; 805 if (info->accelDFS) 806 info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP; 807 } 808# if defined(XF86DRM_MODE) 809 else { 810 info->accel_state->exa->UploadToScreen = &RADEONUploadToScreenCS; 811 info->accel_state->exa->DownloadFromScreen = &RADEONDownloadFromScreenCS; 812 } 813# endif 814#endif 815 816#if X_BYTE_ORDER == X_BIG_ENDIAN 817 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE; 818 info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE; 819#endif 820 821 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; 822#ifdef EXA_SUPPORTS_PREPARE_AUX 823 info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; 824#endif 825#ifdef EXA_SUPPORTS_OFFSCREEN_OVERLAPS 826 /* The 2D engine supports overlapping memory areas */ 827 info->accel_state->exa->flags |= EXA_SUPPORTS_OFFSCREEN_OVERLAPS; 828#endif 829 info->accel_state->exa->pixmapOffsetAlign = RADEON_GPU_PAGE_SIZE; 830 info->accel_state->exa->pixmapPitchAlign = 64; 831 832#ifdef EXA_HANDLES_PIXMAPS 833 if (info->cs) { 834 info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; 835#ifdef EXA_MIXED_PIXMAPS 836 info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; 837#endif 838 } 839#endif 840 841#ifdef RENDER 842 if (info->RenderAccel) { 843 if (IS_R300_3D || IS_R500_3D) { 844 if ((info->ChipFamily < CHIP_FAMILY_RS400) 845#ifdef XF86DRI 846 || (info->directRenderingEnabled) 847#endif 848 ) { 849 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " 850 "enabled for R300/R400/R500 type cards.\n"); 851 info->accel_state->exa->CheckComposite = R300CheckComposite; 852 info->accel_state->exa->PrepareComposite = 853 FUNC_NAME(R300PrepareComposite); 854 info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite); 855 info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); 856 } else 857 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n"); 858 } else if (IS_R200_3D) { 859 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " 860 "enabled for R200 type cards.\n"); 861 info->accel_state->exa->CheckComposite = R200CheckComposite; 862 info->accel_state->exa->PrepareComposite = 863 FUNC_NAME(R200PrepareComposite); 864 info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite); 865 info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); 866 } else { 867 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " 868 "enabled for R100 type cards.\n"); 869 info->accel_state->exa->CheckComposite = R100CheckComposite; 870 info->accel_state->exa->PrepareComposite = 871 FUNC_NAME(R100PrepareComposite); 872 info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite); 873 info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); 874 } 875 } 876#endif 877 878#ifdef XF86DRM_MODE 879#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 880 if (info->cs) { 881 info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; 882 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; 883 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; 884 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; 885 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; 886#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5) 887 info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2; 888#endif 889 } 890#endif 891#endif 892 893 894#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) 895 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); 896 897 info->accel_state->exa->maxPitchBytes = 16320; 898 info->accel_state->exa->maxX = 8191; 899#else 900 info->accel_state->exa->maxX = 16320 / 4; 901#endif 902 info->accel_state->exa->maxY = 8191; 903 904 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { 905 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); 906 info->accel_state->vsync = TRUE; 907 } else 908 info->accel_state->vsync = FALSE; 909 910 RADEONEngineInit(pScrn); 911 912 if (!exaDriverInit(pScreen, info->accel_state->exa)) { 913 free(info->accel_state->exa); 914 return FALSE; 915 } 916 exaMarkSync(pScreen); 917 918 return TRUE; 919} 920 921#undef FUNC_NAME 922