cg14_accel.c revision f7cb851f
1/* $NetBSD: cg14_accel.c,v 1.5 2013/07/03 15:29:34 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#include <sys/types.h> 33 34/* all driver need this */ 35#include "xf86.h" 36#include "xf86_OSproc.h" 37#include "compiler.h" 38 39#include "cg14.h" 40#include <sparc/sxreg.h> 41 42#define SX_SINGLE 43/*#define SX_DEBUG*/ 44/*#define SX_ADD_SOFTWARE*/ 45 46#ifdef SX_DEBUG 47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48#define DPRINTF xf86Msg 49#else 50#define ENTER 51#define DPRINTF while (0) xf86Msg 52#endif 53 54#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 55 56/* 0xcc is SX's GXcopy equivalent */ 57uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 58 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 59 60int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 61 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 62int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 63 64static inline void 65CG14Wait(Cg14Ptr p) 66{ 67 /* we just wait until the instruction queue is empty */ 68 while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {}; 69} 70 71static void 72CG14WaitMarker(ScreenPtr pScreen, int Marker) 73{ 74 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 75 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 76 77 CG14Wait(p); 78} 79 80static Bool 81CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 82 int xdir, int ydir, int alu, Pixel planemask) 83{ 84 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 85 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 86 87 ENTER; 88 DPRINTF(X_ERROR, "bits per pixel: %d\n", 89 pSrcPixmap->drawable.bitsPerPixel); 90 91 if (planemask != p->last_mask) { 92 CG14Wait(p); 93 write_sx_reg(p, SX_PLANEMASK, planemask); 94 p->last_mask = planemask; 95 } 96 alu = sx_rop[alu]; 97 if (alu != p->last_rop) { 98 CG14Wait(p); 99 write_sx_reg(p, SX_ROP_CONTROL, alu); 100 p->last_rop = alu; 101 } 102 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 103 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 104 p->xdir = xdir; 105 p->ydir = ydir; 106 return TRUE; 107} 108 109static void 110CG14Copy(PixmapPtr pDstPixmap, 111 int srcX, int srcY, int dstX, int dstY, int w, int h) 112{ 113 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 114 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 115 int dstpitch, dstoff, srcpitch, srcoff; 116 int srcstart, dststart, xinc, srcinc, dstinc; 117 int line, count, s, d, num; 118 119 ENTER; 120 dstpitch = exaGetPixmapPitch(pDstPixmap); 121 dstoff = exaGetPixmapOffset(pDstPixmap); 122 srcpitch = p->srcpitch; 123 srcoff = p->srcoff; 124 /* 125 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 126 * actually wrote anything and only sync if it did 127 */ 128 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 129 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 130 131 /* 132 * we always copy up to 32 pixels at a time so direction doesn't 133 * matter if w<=32 134 */ 135 if (w > 32) { 136 if (p->xdir < 0) { 137 srcstart += (w - 32) << 2; 138 dststart += (w - 32) << 2; 139 xinc = -128; 140 } else 141 xinc = 128; 142 } else 143 xinc = 128; 144 if (p->ydir < 0) { 145 srcstart += (h - 1) * srcpitch; 146 dststart += (h - 1) * dstpitch; 147 srcinc = -srcpitch; 148 dstinc = -dstpitch; 149 } else { 150 srcinc = srcpitch; 151 dstinc = dstpitch; 152 } 153 if (p->last_rop == 0xcc) { 154 /* plain old copy */ 155 if ( xinc > 0) { 156 /* going left to right */ 157 for (line = 0; line < h; line++) { 158 count = 0; 159 s = srcstart; 160 d = dststart; 161 while ( count < w) { 162 num = min(32, w - count); 163 write_sx_io(p, s, 164 SX_LD(10, num - 1, s & 7)); 165 write_sx_io(p, d, 166 SX_STM(10, num - 1, d & 7)); 167 s += xinc; 168 d += xinc; 169 count += 32; 170 } 171 srcstart += srcinc; 172 dststart += dstinc; 173 } 174 } else { 175 /* going right to left */ 176 int i, chunks = (w >> 5); 177 for (line = 0; line < h; line++) { 178 s = srcstart; 179 d = dststart; 180 count = w; 181 for (i = 0; i < chunks; i++) { 182 write_sx_io(p, s, 183 SX_LD(10, 31, s & 7)); 184 write_sx_io(p, d, 185 SX_STM(10, 31, d & 7)); 186 s -= 128; 187 d -= 128; 188 count -= 32; 189 } 190 /* leftovers, if any */ 191 if (count > 0) { 192 s += (32 - count) << 2; 193 d += (32 - count) << 2; 194 write_sx_io(p, s, 195 SX_LD(10, count - 1, s & 7)); 196 write_sx_io(p, d, 197 SX_STM(10, count - 1, d & 7)); 198 } 199 srcstart += srcinc; 200 dststart += dstinc; 201 } 202 } 203 } else { 204 /* ROPs needed */ 205 if ( xinc > 0) { 206 /* going left to right */ 207 for (line = 0; line < h; line++) { 208 count = 0; 209 s = srcstart; 210 d = dststart; 211 while ( count < w) { 212 num = min(32, w - count); 213 write_sx_io(p, s, 214 SX_LD(10, num - 1, s & 7)); 215 write_sx_io(p, d, 216 SX_LD(42, num - 1, d & 7)); 217 if (num > 16) { 218 write_sx_reg(p, SX_INSTRUCTIONS, 219 SX_ROP(10, 42, 74, 15)); 220 write_sx_reg(p, SX_INSTRUCTIONS, 221 SX_ROP(26, 58, 90, num - 17)); 222 } else { 223 write_sx_reg(p, SX_INSTRUCTIONS, 224 SX_ROP(10, 42, 74, num - 1)); 225 } 226 write_sx_io(p, d, 227 SX_STM(74, num - 1, d & 7)); 228 s += xinc; 229 d += xinc; 230 count += 32; 231 } 232 srcstart += srcinc; 233 dststart += dstinc; 234 } 235 } else { 236 /* going right to left */ 237 int i, chunks = (w >> 5); 238 for (line = 0; line < h; line++) { 239 s = srcstart; 240 d = dststart; 241 count = w; 242 for (i = 0; i < chunks; i++) { 243 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 244 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 245 write_sx_reg(p, SX_INSTRUCTIONS, 246 SX_ROP(10, 42, 74, 15)); 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(26, 58, 90, 15)); 249 write_sx_io(p, d, 250 SX_STM(74, 31, d & 7)); 251 s -= 128; 252 d -= 128; 253 count -= 32; 254 } 255 /* leftovers, if any */ 256 if (count > 0) { 257 s += (32 - count) << 2; 258 d += (32 - count) << 2; 259 write_sx_io(p, s, 260 SX_LD(10, count - 1, s & 7)); 261 write_sx_io(p, d, 262 SX_LD(42, count - 1, d & 7)); 263 if (count > 16) { 264 write_sx_reg(p, SX_INSTRUCTIONS, 265 SX_ROP(10, 42, 74, 15)); 266 write_sx_reg(p, SX_INSTRUCTIONS, 267 SX_ROP(26, 58, 90, count - 17)); 268 } else { 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, count - 1)); 271 } 272 273 write_sx_io(p, d, 274 SX_STM(74, count - 1, d & 7)); 275 } 276 srcstart += srcinc; 277 dststart += dstinc; 278 } 279 } 280 } 281 exaMarkSync(pDstPixmap->drawable.pScreen); 282} 283 284static void 285CG14DoneCopy(PixmapPtr pDstPixmap) 286{ 287} 288 289static Bool 290CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 291{ 292 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 293 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 294 295 ENTER; 296 DPRINTF(X_ERROR, "bits per pixel: %d\n", 297 pPixmap->drawable.bitsPerPixel); 298 write_sx_reg(p, SX_QUEUED(8), fg); 299 write_sx_reg(p, SX_QUEUED(9), fg); 300 if (planemask != p->last_mask) { 301 CG14Wait(p); 302 write_sx_reg(p, SX_PLANEMASK, planemask); 303 p->last_mask = planemask; 304 } 305 alu = sx_rop[alu]; 306 if (alu != p->last_rop) { 307 CG14Wait(p); 308 write_sx_reg(p, SX_ROP_CONTROL, alu); 309 p->last_rop = alu; 310 } 311 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 312 return TRUE; 313} 314 315static void 316CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 317{ 318 int line, x, num; 319 uint32_t ptr; 320 321 ENTER; 322 if (p->last_rop == 0xcc) { 323 /* simple fill */ 324 for (line = 0; line < h; line++) { 325 x = 0; 326 while (x < w) { 327 ptr = start + (x << 2); 328 num = min(32, w - x); 329 write_sx_io(p, ptr, 330 SX_STS(8, num - 1, ptr & 7)); 331 x += 32; 332 } 333 start += pitch; 334 } 335 } else if (p->last_rop == 0xaa) { 336 /* nothing to do here */ 337 return; 338 } else { 339 /* alright, let's do actual ROP stuff */ 340 341 /* first repeat the fill colour into 16 registers */ 342 write_sx_reg(p, SX_INSTRUCTIONS, 343 SX_SELECT_S(8, 8, 10, 15)); 344 345 for (line = 0; line < h; line++) { 346 x = 0; 347 while (x < w) { 348 ptr = start + (x << 2); 349 num = min(32, w - x); 350 /* now suck fb data into registers */ 351 write_sx_io(p, ptr, 352 SX_LD(42, num - 1, ptr & 7)); 353 /* 354 * ROP them with the fill data we left in 10 355 * non-memory ops can only have counts up to 16 356 */ 357 if (num <= 16) { 358 write_sx_reg(p, SX_INSTRUCTIONS, 359 SX_ROP(10, 42, 74, num - 1)); 360 } else { 361 write_sx_reg(p, SX_INSTRUCTIONS, 362 SX_ROP(10, 42, 74, 15)); 363 write_sx_reg(p, SX_INSTRUCTIONS, 364 SX_ROP(10, 58, 90, num - 17)); 365 } 366 /* and write the result back into memory */ 367 write_sx_io(p, ptr, 368 SX_ST(74, num - 1, ptr & 7)); 369 x += 32; 370 } 371 start += pitch; 372 } 373 } 374} 375 376static void 377CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 378{ 379 int line, x, num, off; 380 uint32_t ptr; 381 382 ENTER; 383 off = start & 7; 384 start &= ~7; 385 386 if (p->last_rop == 0xcc) { 387 /* simple fill */ 388 for (line = 0; line < h; line++) { 389 x = 0; 390 while (x < w) { 391 ptr = start + x; 392 num = min(32, w - x); 393 write_sx_io(p, ptr, 394 SX_STBS(8, num - 1, off)); 395 x += 32; 396 } 397 start += pitch; 398 } 399 } else if (p->last_rop == 0xaa) { 400 /* nothing to do here */ 401 return; 402 } else { 403 /* alright, let's do actual ROP stuff */ 404 405 /* first repeat the fill colour into 16 registers */ 406 write_sx_reg(p, SX_INSTRUCTIONS, 407 SX_SELECT_S(8, 8, 10, 15)); 408 409 for (line = 0; line < h; line++) { 410 x = 0; 411 while (x < w) { 412 ptr = start + x; 413 num = min(32, w - x); 414 /* now suck fb data into registers */ 415 write_sx_io(p, ptr, 416 SX_LDB(42, num - 1, off)); 417 /* 418 * ROP them with the fill data we left in 10 419 * non-memory ops can only have counts up to 16 420 */ 421 if (num <= 16) { 422 write_sx_reg(p, SX_INSTRUCTIONS, 423 SX_ROP(10, 42, 74, num - 1)); 424 } else { 425 write_sx_reg(p, SX_INSTRUCTIONS, 426 SX_ROP(10, 42, 74, 15)); 427 write_sx_reg(p, SX_INSTRUCTIONS, 428 SX_ROP(10, 58, 90, num - 17)); 429 } 430 /* and write the result back into memory */ 431 write_sx_io(p, ptr, 432 SX_STB(74, num - 1, off)); 433 x += 32; 434 } 435 start += pitch; 436 } 437 } 438} 439 440static void 441CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 442{ 443 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 444 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 445 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 446 int start, depth; 447 448 ENTER; 449 dstpitch = exaGetPixmapPitch(pPixmap); 450 dstoff = exaGetPixmapOffset(pPixmap); 451 452 depth = pPixmap->drawable.bitsPerPixel; 453 switch (depth) { 454 case 32: 455 start = dstoff + (y1 * dstpitch) + (x1 << 2); 456 CG14Solid32(p, start, dstpitch, w, h); 457 break; 458 case 8: 459 start = dstoff + (y1 * dstpitch) + x1; 460 CG14Solid8(p, start, dstpitch, w, h); 461 break; 462 } 463 464 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 465 dstpitch, dstoff, start); 466 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 467 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 468 exaMarkSync(pPixmap->drawable.pScreen); 469} 470 471/* 472 * Memcpy-based UTS. 473 */ 474static Bool 475CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 476 char *src, int src_pitch) 477{ 478 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 479 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 480 char *dst = p->fb + exaGetPixmapOffset(pDst); 481 int dst_pitch = exaGetPixmapPitch(pDst); 482 483 int bpp = pDst->drawable.bitsPerPixel; 484 int cpp = (bpp + 7) >> 3; 485 int wBytes = w * cpp; 486 487 ENTER; 488 dst += (x * cpp) + (y * dst_pitch); 489 490 CG14Wait(p); 491 492 while (h--) { 493 memcpy(dst, src, wBytes); 494 src += src_pitch; 495 dst += dst_pitch; 496 } 497 __asm("stbar;"); 498 return TRUE; 499} 500 501/* 502 * Memcpy-based DFS. 503 */ 504static Bool 505CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 506 char *dst, int dst_pitch) 507{ 508 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 509 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 510 char *src = p->fb + exaGetPixmapOffset(pSrc); 511 int src_pitch = exaGetPixmapPitch(pSrc); 512 513 ENTER; 514 int bpp = pSrc->drawable.bitsPerPixel; 515 int cpp = (bpp + 7) >> 3; 516 int wBytes = w * cpp; 517 518 src += (x * cpp) + (y * src_pitch); 519 520 CG14Wait(p); 521 522 while (h--) { 523 memcpy(dst, src, wBytes); 524 src += src_pitch; 525 dst += dst_pitch; 526 } 527 528 return TRUE; 529} 530 531Bool 532CG14CheckComposite(int op, PicturePtr pSrcPicture, 533 PicturePtr pMaskPicture, 534 PicturePtr pDstPicture) 535{ 536 int i, ok = FALSE; 537 538 ENTER; 539 540 /* 541 * SX is in theory capable of accelerating pretty much all Xrender ops, 542 * even coordinate transformation and gradients. Support will be added 543 * over time and likely have to spill over into its own source file. 544 */ 545 546 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 547 xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op); 548 return FALSE; 549 } 550 i = 0; 551 while ((i < arraysize(src_formats)) && (!ok)) { 552 ok = (pSrcPicture->format == src_formats[i]); 553 i++; 554 } 555 556 if (!ok) { 557 xf86Msg(X_ERROR, "%s: unsupported src format %x\n", 558 __func__, pSrcPicture->format); 559 return FALSE; 560 } 561 562 DPRINTF(X_ERROR, "src is %x, %d: %d %d\n", pSrcPicture->format, op, 563 pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height); 564 565 if (pMaskPicture != NULL) { 566 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 567 pMaskPicture->pDrawable->width, 568 pMaskPicture->pDrawable->height); 569 } 570 return TRUE; 571} 572 573Bool 574CG14PrepareComposite(int op, PicturePtr pSrcPicture, 575 PicturePtr pMaskPicture, 576 PicturePtr pDstPicture, 577 PixmapPtr pSrc, 578 PixmapPtr pMask, 579 PixmapPtr pDst) 580{ 581 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 582 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 583 584 ENTER; 585 586 p->no_source_pixmap = FALSE; 587 p->source_is_solid = FALSE; 588 589 if (pSrcPicture->format == PICT_a1) { 590 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 591 pDstPicture->format, op); 592 if (pMaskPicture != NULL) { 593 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 594 } 595 } 596 if (pSrcPicture->pSourcePict != NULL) { 597 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 598 p->fillcolour = 599 pSrcPicture->pSourcePict->solidFill.color; 600 DPRINTF(X_ERROR, "%s: solid src %08x\n", 601 __func__, p->fillcolour); 602 p->no_source_pixmap = TRUE; 603 p->source_is_solid = TRUE; 604 } 605 } 606 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 607 if (pMaskPicture->pSourcePict->type == 608 SourcePictTypeSolidFill) { 609 p->fillcolour = 610 pMaskPicture->pSourcePict->solidFill.color; 611 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 612 __func__, p->fillcolour); 613 } 614 } 615 if (pMaskPicture != NULL) { 616 p->mskoff = exaGetPixmapOffset(pMask); 617 p->mskpitch = exaGetPixmapPitch(pMask); 618 p->mskformat = pMaskPicture->format; 619 } else { 620 p->mskoff = 0; 621 p->mskpitch = 0; 622 p->mskformat = 0; 623 } 624 if (pSrc != NULL) { 625 p->source_is_solid = 626 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 627 p->srcoff = exaGetPixmapOffset(pSrc); 628 p->srcpitch = exaGetPixmapPitch(pSrc); 629 if (p->source_is_solid) { 630 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 631 } 632 } 633 p->srcformat = pSrcPicture->format; 634 p->dstformat = pDstPicture->format; 635 636 if (p->source_is_solid) { 637 uint32_t temp; 638 639 /* stuff source colour into SX registers, swap as needed */ 640 temp = p->fillcolour; 641 switch (p->srcformat) { 642 case PICT_a8r8g8b8: 643 case PICT_x8r8g8b8: 644 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 645 temp = temp >> 8; 646 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 647 temp = temp >> 8; 648 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 649 break; 650 case PICT_a8b8g8r8: 651 case PICT_x8b8g8r8: 652 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 653 temp = temp >> 8; 654 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 655 temp = temp >> 8; 656 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 657 break; 658 } 659 write_sx_reg(p, SX_QUEUED(8), 0xff); 660 } 661 p->op = op; 662 if (op == PictOpSrc) { 663 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 664 } 665#ifdef SX_DEBUG 666 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 667 *(uint32_t *)(p->fb + p->srcoff)); 668#endif 669 return TRUE; 670} 671 672void 673CG14Composite(PixmapPtr pDst, int srcX, int srcY, 674 int maskX, int maskY, 675 int dstX, int dstY, 676 int width, int height) 677{ 678 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 679 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 680 uint32_t dstoff, dstpitch; 681 uint32_t dst, msk, src; 682 683 ENTER; 684 dstoff = exaGetPixmapOffset(pDst); 685 dstpitch = exaGetPixmapPitch(pDst); 686 687 switch (p->op) { 688 case PictOpOver: 689 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 690 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 691 p->mskformat, p->dstformat, srcX, srcY); 692 if (p->source_is_solid) { 693 switch (p->mskformat) { 694 case PICT_a8: 695 msk = p->mskoff + 696 (maskY * p->mskpitch) + 697 maskX; 698 CG14Comp_Over8Solid(p, 699 msk, p->mskpitch, 700 dst, dstpitch, 701 width, height); 702 break; 703 case PICT_a8r8g8b8: 704 case PICT_a8b8g8r8: 705 msk = p->mskoff + 706 (maskY * p->mskpitch) + 707 (maskX << 2); 708 CG14Comp_Over32Solid(p, 709 msk, p->mskpitch, 710 dst, dstpitch, 711 width, height); 712 break; 713 default: 714 xf86Msg(X_ERROR, 715 "unsupported mask format\n"); 716 } 717 } else { 718 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 719 p->mskformat); 720 switch (p->srcformat) { 721 case PICT_a8r8g8b8: 722 case PICT_a8b8g8r8: 723 src = p->srcoff + 724 (srcY * p->srcpitch) + 725 (srcX << 2); 726 dst = dstoff + 727 (dstY * dstpitch) + 728 (dstX << 2); 729 if (p->mskformat == PICT_a8) { 730 msk = p->mskoff + 731 (maskY * p->mskpitch) + 732 maskX; 733 CG14Comp_Over32Mask(p, 734 src, p->srcpitch, 735 msk, p->mskpitch, 736 dst, dstpitch, 737 width, height); 738 } else { 739 CG14Comp_Over32(p, 740 src, p->srcpitch, 741 dst, dstpitch, 742 width, height); 743 } 744 break; 745 case PICT_x8r8g8b8: 746 case PICT_x8b8g8r8: 747 src = p->srcoff + 748 (srcY * p->srcpitch) + 749 (srcX << 2); 750 dst = dstoff + 751 (dstY * dstpitch) + 752 (dstX << 2); 753 if (p->mskformat == PICT_a8) { 754 msk = p->mskoff + 755 (maskY * p->mskpitch) + 756 maskX; 757 CG14Comp_Over32Mask_noalpha(p, 758 src, p->srcpitch, 759 msk, p->mskpitch, 760 dst, dstpitch, 761 width, height); 762 } else { 763 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 764 } 765 break; 766 default: 767 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 768 __func__, p->srcformat); 769 } 770 } 771 break; 772 case PictOpAdd: 773 DPRINTF(X_ERROR, "Add %08x %08x\n", 774 p->srcformat, p->dstformat); 775 switch (p->srcformat) { 776 case PICT_a8: 777 src = p->srcoff + 778 (srcY * p->srcpitch) + srcX; 779 dst = dstoff + (dstY * dstpitch) + dstX; 780 CG14Comp_Add8(p, src, p->srcpitch, 781 dst, dstpitch, width, height); 782 break; 783 case PICT_a8r8g8b8: 784 case PICT_x8r8g8b8: 785 src = p->srcoff + 786 (srcY * p->srcpitch) + (srcX << 2); 787 dst = dstoff + (dstY * dstpitch) + 788 (dstX << 2); 789 CG14Comp_Add32(p, src, p->srcpitch, 790 dst, dstpitch, width, height); 791 break; 792 default: 793 xf86Msg(X_ERROR, 794 "unsupported src format\n"); 795 } 796 break; 797 case PictOpSrc: 798 DPRINTF(X_ERROR, "Src %08x %08x\n", 799 p->srcformat, p->dstformat); 800 CG14Copy(pDst, srcX, srcY, dstX, dstY, width, height); 801 break; 802 default: 803 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 804 } 805 exaMarkSync(pDst->drawable.pScreen); 806} 807 808 809 810Bool 811CG14InitAccel(ScreenPtr pScreen) 812{ 813 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 814 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 815 ExaDriverPtr pExa; 816 817 pExa = exaDriverAlloc(); 818 if (!pExa) 819 return FALSE; 820 821 p->pExa = pExa; 822 823 pExa->exa_major = EXA_VERSION_MAJOR; 824 pExa->exa_minor = EXA_VERSION_MINOR; 825 826 pExa->memoryBase = p->fb; 827 pExa->memorySize = p->memsize; 828 pExa->offScreenBase = p->width * p->height * 4; 829 830 /* 831 * SX memory instructions are written to 64bit aligned addresses with 832 * a 3 bit displacement. Make sure the displacement remains constant 833 * within one column 834 */ 835 836 pExa->pixmapOffsetAlign = 8; 837 pExa->pixmapPitchAlign = 8; 838 839 pExa->flags = EXA_OFFSCREEN_PIXMAPS | 840 /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/ 841 EXA_MIXED_PIXMAPS; 842 843 /* 844 * these limits are bogus 845 * SX doesn't deal with coordinates at all, so there is no limit but 846 * we have to put something here 847 */ 848 pExa->maxX = 4096; 849 pExa->maxY = 4096; 850 851 pExa->WaitMarker = CG14WaitMarker; 852 853 pExa->PrepareSolid = CG14PrepareSolid; 854 pExa->Solid = CG14Solid; 855 pExa->DoneSolid = CG14DoneCopy; 856 pExa->PrepareCopy = CG14PrepareCopy; 857 pExa->Copy = CG14Copy; 858 pExa->DoneCopy = CG14DoneCopy; 859 if (p->use_xrender) { 860 pExa->CheckComposite = CG14CheckComposite; 861 pExa->PrepareComposite = CG14PrepareComposite; 862 pExa->Composite = CG14Composite; 863 pExa->DoneComposite = CG14DoneCopy; 864 } 865 866 /* EXA hits more optimized paths when it does not have to fallback 867 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 868 */ 869 pExa->UploadToScreen = CG14UploadToScreen; 870 pExa->DownloadFromScreen = CG14DownloadFromScreen; 871 872 /* do some hardware init */ 873 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 874 p->last_mask = 0xffffffff; 875 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 876 p->last_rop = 0xcc; 877 return exaDriverInit(pScreen, pExa); 878} 879