cg14_accel.c revision a3a2ba44
1/* $NetBSD: cg14_accel.c,v 1.2 2013/06/25 12:26:57 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#include <sys/types.h> 33 34/* all driver need this */ 35#include "xf86.h" 36#include "xf86_OSproc.h" 37#include "compiler.h" 38 39#include "cg14.h" 40#include <sparc/sxreg.h> 41 42#define SX_SINGLE 43/*#define SX_DEBUG*/ 44/*#define SX_ADD_SOFTWARE*/ 45 46#ifdef SX_DEBUG 47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48#define DPRINTF xf86Msg 49#else 50#define ENTER 51#define DPRINTF while (0) xf86Msg 52#endif 53 54#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 55 56/* 0xcc is SX's GXcopy equivalent */ 57uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 58 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 59 60int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 61 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 62int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 63 64static inline void 65CG14Wait(Cg14Ptr p) 66{ 67 /* we just wait until the instruction queue is empty */ 68 while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {}; 69} 70 71static void 72CG14WaitMarker(ScreenPtr pScreen, int Marker) 73{ 74 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 75 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 76 77 CG14Wait(p); 78} 79 80static Bool 81CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 82 int xdir, int ydir, int alu, Pixel planemask) 83{ 84 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 85 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 86 87 ENTER; 88 DPRINTF(X_ERROR, "bits per pixel: %d\n", 89 pSrcPixmap->drawable.bitsPerPixel); 90 91 if (planemask != p->last_mask) { 92 CG14Wait(p); 93 write_sx_reg(p, SX_PLANEMASK, planemask); 94 p->last_mask = planemask; 95 } 96 alu = sx_rop[alu]; 97 if (alu != p->last_rop) { 98 CG14Wait(p); 99 write_sx_reg(p, SX_ROP_CONTROL, alu); 100 p->last_rop = alu; 101 } 102 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 103 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 104 p->xdir = xdir; 105 p->ydir = ydir; 106 return TRUE; 107} 108 109static void 110CG14Copy(PixmapPtr pDstPixmap, 111 int srcX, int srcY, int dstX, int dstY, int w, int h) 112{ 113 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 114 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 115 int dstpitch, dstoff, srcpitch, srcoff; 116 int srcstart, dststart, xinc, srcinc, dstinc; 117 int line, count, s, d, num; 118 119 ENTER; 120 dstpitch = exaGetPixmapPitch(pDstPixmap); 121 dstoff = exaGetPixmapOffset(pDstPixmap); 122 srcpitch = p->srcpitch; 123 srcoff = p->srcoff; 124 /* 125 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 126 * actually wrote anything and only sync if it did 127 */ 128 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 129 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 130 131 /* 132 * we always copy up to 32 pixels at a time so direction doesn't 133 * matter if w<=32 134 */ 135 if (w > 32) { 136 if (p->xdir < 0) { 137 srcstart += (w - 32) << 2; 138 dststart += (w - 32) << 2; 139 xinc = -128; 140 } else 141 xinc = 128; 142 } else 143 xinc = 128; 144 if (p->ydir < 0) { 145 srcstart += (h - 1) * srcpitch; 146 dststart += (h - 1) * dstpitch; 147 srcinc = -srcpitch; 148 dstinc = -dstpitch; 149 } else { 150 srcinc = srcpitch; 151 dstinc = dstpitch; 152 } 153 if (p->last_rop == 0xcc) { 154 /* plain old copy */ 155 if ( xinc > 0) { 156 /* going left to right */ 157 for (line = 0; line < h; line++) { 158 count = 0; 159 s = srcstart; 160 d = dststart; 161 while ( count < w) { 162 num = min(32, w - count); 163 write_sx_io(p, s, 164 SX_LD(10, num - 1, s & 7)); 165 write_sx_io(p, d, 166 SX_STM(10, num - 1, d & 7)); 167 s += xinc; 168 d += xinc; 169 count += 32; 170 } 171 srcstart += srcinc; 172 dststart += dstinc; 173 } 174 } else { 175 /* going right to left */ 176 int i, chunks = (w >> 5); 177 for (line = 0; line < h; line++) { 178 s = srcstart; 179 d = dststart; 180 count = w; 181 for (i = 0; i < chunks; i++) { 182 write_sx_io(p, s, 183 SX_LD(10, 31, s & 7)); 184 write_sx_io(p, d, 185 SX_STM(10, 31, d & 7)); 186 s -= 128; 187 d -= 128; 188 count -= 32; 189 } 190 /* leftovers, if any */ 191 if (count > 0) { 192 s += (32 - count) << 2; 193 d += (32 - count) << 2; 194 write_sx_io(p, s, 195 SX_LD(10, count - 1, s & 7)); 196 write_sx_io(p, d, 197 SX_STM(10, count - 1, d & 7)); 198 } 199 srcstart += srcinc; 200 dststart += dstinc; 201 } 202 } 203 } else { 204 /* ROPs needed */ 205 if ( xinc > 0) { 206 /* going left to right */ 207 for (line = 0; line < h; line++) { 208 count = 0; 209 s = srcstart; 210 d = dststart; 211 while ( count < w) { 212 num = min(32, w - count); 213 write_sx_io(p, s, 214 SX_LD(10, num - 1, s & 7)); 215 write_sx_io(p, d, 216 SX_LD(42, num - 1, d & 7)); 217 if (num > 16) { 218 write_sx_reg(p, SX_INSTRUCTIONS, 219 SX_ROP(10, 42, 74, 15)); 220 write_sx_reg(p, SX_INSTRUCTIONS, 221 SX_ROP(26, 58, 90, num - 17)); 222 } else { 223 write_sx_reg(p, SX_INSTRUCTIONS, 224 SX_ROP(10, 42, 74, num - 1)); 225 } 226 write_sx_io(p, d, 227 SX_STM(74, num - 1, d & 7)); 228 s += xinc; 229 d += xinc; 230 count += 32; 231 } 232 srcstart += srcinc; 233 dststart += dstinc; 234 } 235 } else { 236 /* going right to left */ 237 int i, chunks = (w >> 5); 238 for (line = 0; line < h; line++) { 239 s = srcstart; 240 d = dststart; 241 count = w; 242 for (i = 0; i < chunks; i++) { 243 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 244 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 245 write_sx_reg(p, SX_INSTRUCTIONS, 246 SX_ROP(10, 42, 74, 15)); 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(26, 58, 90, 15)); 249 write_sx_io(p, d, 250 SX_STM(74, 31, d & 7)); 251 s -= 128; 252 d -= 128; 253 count -= 32; 254 } 255 /* leftovers, if any */ 256 if (count > 0) { 257 s += (32 - count) << 2; 258 d += (32 - count) << 2; 259 write_sx_io(p, s, 260 SX_LD(10, count - 1, s & 7)); 261 write_sx_io(p, d, 262 SX_LD(42, count - 1, d & 7)); 263 if (count > 16) { 264 write_sx_reg(p, SX_INSTRUCTIONS, 265 SX_ROP(10, 42, 74, 15)); 266 write_sx_reg(p, SX_INSTRUCTIONS, 267 SX_ROP(26, 58, 90, count - 17)); 268 } else { 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, count - 1)); 271 } 272 273 write_sx_io(p, d, 274 SX_STM(74, count - 1, d & 7)); 275 } 276 srcstart += srcinc; 277 dststart += dstinc; 278 } 279 } 280 } 281 exaMarkSync(pDstPixmap->drawable.pScreen); 282} 283 284static void 285CG14DoneCopy(PixmapPtr pDstPixmap) 286{ 287} 288 289static Bool 290CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 291{ 292 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 293 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 294 295 ENTER; 296 DPRINTF(X_ERROR, "bits per pixel: %d\n", pPixmap->drawable.bitsPerPixel); 297 write_sx_reg(p, SX_QUEUED(8), fg); 298 write_sx_reg(p, SX_QUEUED(9), fg); 299 if (planemask != p->last_mask) { 300 CG14Wait(p); 301 write_sx_reg(p, SX_PLANEMASK, planemask); 302 p->last_mask = planemask; 303 } 304 alu = sx_rop[alu]; 305 if (alu != p->last_rop) { 306 CG14Wait(p); 307 write_sx_reg(p, SX_ROP_CONTROL, alu); 308 p->last_rop = alu; 309 } 310 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 311 return TRUE; 312} 313 314static void 315CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 316{ 317 int line, x, num; 318 uint32_t ptr; 319 320 ENTER; 321 if (p->last_rop == 0xcc) { 322 /* simple fill */ 323 for (line = 0; line < h; line++) { 324 x = 0; 325 while (x < w) { 326 ptr = start + (x << 2); 327 num = min(32, w - x); 328 write_sx_io(p, ptr, 329 SX_STS(8, num - 1, ptr & 7)); 330 x += 32; 331 } 332 start += pitch; 333 } 334 } else if (p->last_rop == 0xaa) { 335 /* nothing to do here */ 336 return; 337 } else { 338 /* alright, let's do actual ROP stuff */ 339 340 /* first repeat the fill colour into 16 registers */ 341 write_sx_reg(p, SX_INSTRUCTIONS, 342 SX_SELECT_S(8, 8, 10, 15)); 343 344 for (line = 0; line < h; line++) { 345 x = 0; 346 while (x < w) { 347 ptr = start + (x << 2); 348 num = min(32, w - x); 349 /* now suck fb data into registers */ 350 write_sx_io(p, ptr, 351 SX_LD(42, num - 1, ptr & 7)); 352 /* 353 * ROP them with the fill data we left in 10 354 * non-memory ops can only have counts up to 16 355 */ 356 if (num <= 16) { 357 write_sx_reg(p, SX_INSTRUCTIONS, 358 SX_ROP(10, 42, 74, num - 1)); 359 } else { 360 write_sx_reg(p, SX_INSTRUCTIONS, 361 SX_ROP(10, 42, 74, 15)); 362 write_sx_reg(p, SX_INSTRUCTIONS, 363 SX_ROP(10, 58, 90, num - 17)); 364 } 365 /* and write the result back into memory */ 366 write_sx_io(p, ptr, 367 SX_ST(74, num - 1, ptr & 7)); 368 x += 32; 369 } 370 start += pitch; 371 } 372 } 373} 374 375static void 376CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 377{ 378 int line, x, num, off; 379 uint32_t ptr; 380 381 ENTER; 382 off = start & 7; 383 start &= ~7; 384 385 if (p->last_rop == 0xcc) { 386 /* simple fill */ 387 for (line = 0; line < h; line++) { 388 x = 0; 389 while (x < w) { 390 ptr = start + x; 391 num = min(32, w - x); 392 write_sx_io(p, ptr, 393 SX_STBS(8, num - 1, off)); 394 x += 32; 395 } 396 start += pitch; 397 } 398 } else if (p->last_rop == 0xaa) { 399 /* nothing to do here */ 400 return; 401 } else { 402 /* alright, let's do actual ROP stuff */ 403 404 /* first repeat the fill colour into 16 registers */ 405 write_sx_reg(p, SX_INSTRUCTIONS, 406 SX_SELECT_S(8, 8, 10, 15)); 407 408 for (line = 0; line < h; line++) { 409 x = 0; 410 while (x < w) { 411 ptr = start + x; 412 num = min(32, w - x); 413 /* now suck fb data into registers */ 414 write_sx_io(p, ptr, 415 SX_LDB(42, num - 1, off)); 416 /* 417 * ROP them with the fill data we left in 10 418 * non-memory ops can only have counts up to 16 419 */ 420 if (num <= 16) { 421 write_sx_reg(p, SX_INSTRUCTIONS, 422 SX_ROP(10, 42, 74, num - 1)); 423 } else { 424 write_sx_reg(p, SX_INSTRUCTIONS, 425 SX_ROP(10, 42, 74, 15)); 426 write_sx_reg(p, SX_INSTRUCTIONS, 427 SX_ROP(10, 58, 90, num - 17)); 428 } 429 /* and write the result back into memory */ 430 write_sx_io(p, ptr, 431 SX_STB(74, num - 1, off)); 432 x += 32; 433 } 434 start += pitch; 435 } 436 } 437} 438 439static void 440CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 441{ 442 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 443 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 444 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 445 int start, depth; 446 447 ENTER; 448 dstpitch = exaGetPixmapPitch(pPixmap); 449 dstoff = exaGetPixmapOffset(pPixmap); 450 451 depth = pPixmap->drawable.bitsPerPixel; 452 switch (depth) { 453 case 32: 454 start = dstoff + (y1 * dstpitch) + (x1 << 2); 455 CG14Solid32(p, start, dstpitch, w, h); 456 break; 457 case 8: 458 start = dstoff + (y1 * dstpitch) + x1; 459 CG14Solid8(p, start, dstpitch, w, h); 460 break; 461 } 462 463 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 464 dstpitch, dstoff, start); 465 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 466 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 467 exaMarkSync(pPixmap->drawable.pScreen); 468} 469 470/* 471 * Memcpy-based UTS. 472 */ 473static Bool 474CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 475 char *src, int src_pitch) 476{ 477 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 478 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 479 char *dst = p->fb + exaGetPixmapOffset(pDst); 480 int dst_pitch = exaGetPixmapPitch(pDst); 481 482 int bpp = pDst->drawable.bitsPerPixel; 483 int cpp = (bpp + 7) >> 3; 484 int wBytes = w * cpp; 485 486 ENTER; 487 dst += (x * cpp) + (y * dst_pitch); 488 489 CG14Wait(p); 490 491 while (h--) { 492 memcpy(dst, src, wBytes); 493 src += src_pitch; 494 dst += dst_pitch; 495 } 496 __asm("stbar;"); 497 return TRUE; 498} 499 500/* 501 * Memcpy-based DFS. 502 */ 503static Bool 504CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 505 char *dst, int dst_pitch) 506{ 507 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 508 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 509 char *src = p->fb + exaGetPixmapOffset(pSrc); 510 int src_pitch = exaGetPixmapPitch(pSrc); 511 512 ENTER; 513 int bpp = pSrc->drawable.bitsPerPixel; 514 int cpp = (bpp + 7) >> 3; 515 int wBytes = w * cpp; 516 517 src += (x * cpp) + (y * src_pitch); 518 519 CG14Wait(p); 520 521 while (h--) { 522 memcpy(dst, src, wBytes); 523 src += src_pitch; 524 dst += dst_pitch; 525 } 526 527 return TRUE; 528} 529 530Bool 531CG14CheckComposite(int op, PicturePtr pSrcPicture, 532 PicturePtr pMaskPicture, 533 PicturePtr pDstPicture) 534{ 535 int i, ok = FALSE; 536 537 ENTER; 538 539 /* 540 * SX is in theory capable of accelerating pretty much all Xrender ops, 541 * even coordinate transformation and gradients. Support will be added 542 * over time and likely have to spill over into its own source file. 543 */ 544 545 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 546 xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op); 547 return FALSE; 548 } 549 i = 0; 550 while ((i < arraysize(src_formats)) && (!ok)) { 551 ok = (pSrcPicture->format == src_formats[i]); 552 i++; 553 } 554 555 if (!ok) { 556 xf86Msg(X_ERROR, "%s: unsupported src format %x\n", 557 __func__, pSrcPicture->format); 558 return FALSE; 559 } 560 561 DPRINTF(X_ERROR, "src is %x, %d: %d %d\n", pSrcPicture->format, op, 562 pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height); 563 564 if (pMaskPicture != NULL) { 565 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 566 pMaskPicture->pDrawable->width, 567 pMaskPicture->pDrawable->height); 568 } 569 return TRUE; 570} 571 572Bool 573CG14PrepareComposite(int op, PicturePtr pSrcPicture, 574 PicturePtr pMaskPicture, 575 PicturePtr pDstPicture, 576 PixmapPtr pSrc, 577 PixmapPtr pMask, 578 PixmapPtr pDst) 579{ 580 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 581 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 582 583 ENTER; 584 585 if (pSrcPicture->format == PICT_a1) { 586 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", pDstPicture->format, op); 587 if (pMaskPicture != NULL) { 588 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 589 } 590 } 591 if (pSrcPicture->pSourcePict != NULL) { 592 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 593 p->fillcolour = 594 pSrcPicture->pSourcePict->solidFill.color; 595 xf86Msg(X_ERROR, "%s: solid src %08x\n", 596 __func__, p->fillcolour); 597 } 598 } 599 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 600 if (pMaskPicture->pSourcePict->type == 601 SourcePictTypeSolidFill) { 602 p->fillcolour = 603 pMaskPicture->pSourcePict->solidFill.color; 604 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 605 __func__, p->fillcolour); 606 } 607 } 608 if (pMaskPicture != NULL) { 609 p->mskoff = exaGetPixmapOffset(pMask); 610 p->mskpitch = exaGetPixmapPitch(pMask); 611 p->mskformat = pMaskPicture->format; 612 } else { 613 p->mskoff = 0; 614 p->mskpitch = 0; 615 p->mskformat = 0; 616 } 617 p->source_is_solid = 618 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 619 p->srcoff = exaGetPixmapOffset(pSrc); 620 p->srcpitch = exaGetPixmapPitch(pSrc); 621 p->srcformat = pSrcPicture->format; 622 p->dstformat = pDstPicture->format; 623 p->op = op; 624 if (op == PictOpSrc) { 625 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 626 } 627#ifdef SX_DEBUG 628 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 629 *(uint32_t *)(p->fb + p->srcoff)); 630#endif 631 return TRUE; 632} 633 634void CG14Comp_Over32Solid(Cg14Ptr p, 635 uint32_t src, uint32_t srcpitch, 636 uint32_t dst, uint32_t dstpitch, 637 int width, int height) 638{ 639 uint32_t msk = src, mskx, dstx, m; 640 int line, x, i; 641 642 ENTER; 643 /* first get the source colour */ 644 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 645 write_sx_reg(p, SX_QUEUED(8), 0xff); 646 for (line = 0; line < height; line++) { 647 mskx = msk; 648 dstx = dst; 649#ifdef SX_SINGLE 650 651 for (x = 0; x < width; x++) { 652 m = *(volatile uint32_t *)(p->fb + mskx); 653 m = m >> 24; 654 if (m == 0) { 655 /* nothing to do - all transparent */ 656 } else if (m == 0xff) { 657 /* all opaque */ 658 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 659 } else { 660 /* fetch alpha value, stick it into scam */ 661 /* mask is in R[12:15] */ 662 /*write_sx_io(p, mskx, 663 SX_LDUQ0(12, 0, mskx & 7));*/ 664 write_sx_reg(p, SX_QUEUED(12), m); 665 /* fetch dst pixel */ 666 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 667 write_sx_reg(p, SX_INSTRUCTIONS, 668 SX_ORV(12, 0, R_SCAM, 0)); 669 /* 670 * src * alpha + R0 671 * R[9:11] * SCAM + R0 -> R[17:19] 672 */ 673 write_sx_reg(p, SX_INSTRUCTIONS, 674 SX_SAXP16X16SR8(9, 0, 17, 2)); 675 676 /* invert SCAM */ 677 write_sx_reg(p, SX_INSTRUCTIONS, 678 SX_XORV(12, 8, R_SCAM, 0)); 679#ifdef SX_DEBUG 680 write_sx_reg(p, SX_INSTRUCTIONS, 681 SX_XORV(12, 8, 13, 0)); 682#endif 683 /* dst * (1 - alpha) + R[13:15] */ 684 write_sx_reg(p, SX_INSTRUCTIONS, 685 SX_SAXP16X16SR8(21, 17, 25, 2)); 686 write_sx_io(p, dstx, 687 SX_STUQ0C(24, 0, dstx & 7)); 688 } 689 dstx += 4; 690 mskx += 4; 691 } 692#else 693 for (x = 0; x < width; x += 4) { 694 /* fetch 4 mask values */ 695 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 696 /* fetch destination pixels */ 697 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 698 /* duplicate them for all channels */ 699 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 700 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 701 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 702 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 703 /* generate inverted alpha */ 704 write_sx_reg(p, SX_INSTRUCTIONS, 705 SX_XORS(12, 8, 28, 15)); 706 /* multiply source */ 707 write_sx_reg(p, SX_INSTRUCTIONS, 708 SX_MUL16X16SR8(8, 12, 44, 3)); 709 write_sx_reg(p, SX_INSTRUCTIONS, 710 SX_MUL16X16SR8(8, 16, 48, 3)); 711 write_sx_reg(p, SX_INSTRUCTIONS, 712 SX_MUL16X16SR8(8, 20, 52, 3)); 713 write_sx_reg(p, SX_INSTRUCTIONS, 714 SX_MUL16X16SR8(8, 24, 56, 3)); 715 /* multiply dest */ 716 write_sx_reg(p, SX_INSTRUCTIONS, 717 SX_MUL16X16SR8(28, 60, 76, 15)); 718 /* add up */ 719 write_sx_reg(p, SX_INSTRUCTIONS, 720 SX_ADDV(44, 76, 92, 15)); 721 /* write back */ 722 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 723 dstx += 16; 724 mskx += 16; 725 } 726#endif 727 dst += dstpitch; 728 msk += srcpitch; 729 } 730} 731 732void 733CG14Composite(PixmapPtr pDst, int srcX, int srcY, 734 int maskX, int maskY, 735 int dstX, int dstY, 736 int width, int height) 737{ 738 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 739 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 740 uint32_t dstoff, dstpitch; 741 uint32_t dst, msk, src; 742 743 ENTER; 744 dstoff = exaGetPixmapOffset(pDst); 745 dstpitch = exaGetPixmapPitch(pDst); 746 747 switch (p->op) { 748 case PictOpOver: 749 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 750 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 751 p->mskformat, p->dstformat, srcX, srcY); 752 if (p->source_is_solid) { 753 switch (p->mskformat) { 754 case PICT_a8: 755 msk = p->mskoff + 756 (maskY * p->mskpitch) + 757 maskX; 758 CG14Comp_Over8Solid(p, 759 msk, p->mskpitch, 760 dst, dstpitch, 761 width, height); 762 break; 763 case PICT_a8r8g8b8: 764 case PICT_a8b8g8r8: 765 msk = p->mskoff + 766 (maskY * p->mskpitch) + 767 (maskX << 2); 768 CG14Comp_Over32Solid(p, 769 msk, p->mskpitch, 770 dst, dstpitch, 771 width, height); 772 break; 773 default: 774 xf86Msg(X_ERROR, 775 "unsupported mask format\n"); 776 } 777 } else { 778 DPRINTF(X_ERROR, "non-solid over with msk %x\n", p->mskformat); 779 switch (p->srcformat) { 780 case PICT_a8r8g8b8: 781 case PICT_a8b8g8r8: 782 src = p->srcoff + 783 (srcY * p->srcpitch) + 784 (srcX << 2); 785 dst = dstoff + 786 (dstY * dstpitch) + 787 (dstX << 2); 788 if (p->mskformat == PICT_a8) { 789 msk = p->mskoff + 790 (maskY * p->mskpitch) + 791 maskX; 792 CG14Comp_Over32Mask(p, 793 src, p->srcpitch, 794 msk, p->mskpitch, 795 dst, dstpitch, 796 width, height); 797 } else { 798 CG14Comp_Over32(p, 799 src, p->srcpitch, 800 dst, dstpitch, 801 width, height); 802 } 803 break; 804 case PICT_x8r8g8b8: 805 case PICT_x8b8g8r8: 806 xf86Msg(X_ERROR, "alpha better be separate\n"); 807 break; 808 default: 809 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 810 __func__, p->srcformat); 811 } 812 } 813 break; 814 case PictOpAdd: 815 DPRINTF(X_ERROR, "Add %08x %08x\n", 816 p->srcformat, p->dstformat); 817 switch (p->srcformat) { 818 case PICT_a8: 819 src = p->srcoff + 820 (srcY * p->srcpitch) + srcX; 821 dst = dstoff + (dstY * dstpitch) + dstX; 822 CG14Comp_Add8(p, src, p->srcpitch, 823 dst, dstpitch, width, height); 824 break; 825 case PICT_a8r8g8b8: 826 case PICT_x8r8g8b8: 827 src = p->srcoff + 828 (srcY * p->srcpitch) + (srcX << 2); 829 dst = dstoff + (dstY * dstpitch) + 830 (dstX << 2); 831 CG14Comp_Add32(p, src, p->srcpitch, 832 dst, dstpitch, width, height); 833 break; 834 default: 835 xf86Msg(X_ERROR, 836 "unsupported src format\n"); 837 } 838 break; 839 case PictOpSrc: 840 DPRINTF(X_ERROR, "Src %08x %08x\n", 841 p->srcformat, p->dstformat); 842 CG14Copy(pDst, srcX, srcY, dstX, dstY, width, height); 843 break; 844 default: 845 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 846 } 847 exaMarkSync(pDst->drawable.pScreen); 848} 849 850 851 852Bool 853CG14InitAccel(ScreenPtr pScreen) 854{ 855 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 856 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 857 ExaDriverPtr pExa; 858 859 pExa = exaDriverAlloc(); 860 if (!pExa) 861 return FALSE; 862 863 p->pExa = pExa; 864 865 pExa->exa_major = EXA_VERSION_MAJOR; 866 pExa->exa_minor = EXA_VERSION_MINOR; 867 868 pExa->memoryBase = p->fb; 869 pExa->memorySize = p->memsize; 870 pExa->offScreenBase = p->width * p->height * 4; 871 872 /* 873 * SX memory instructions are written to 64bit aligned addresses with 874 * a 3 bit displacement. Make sure the displacement remains constant 875 * within one column 876 */ 877 878 pExa->pixmapOffsetAlign = 8; 879 pExa->pixmapPitchAlign = 8; 880 881 pExa->flags = EXA_OFFSCREEN_PIXMAPS | 882 /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/ 883 EXA_MIXED_PIXMAPS; 884 885 /* 886 * these limits are bogus 887 * SX doesn't deal with coordinates at all, so there is no limit but 888 * we have to put something here 889 */ 890 pExa->maxX = 4096; 891 pExa->maxY = 4096; 892 893 pExa->WaitMarker = CG14WaitMarker; 894 895 pExa->PrepareSolid = CG14PrepareSolid; 896 pExa->Solid = CG14Solid; 897 pExa->DoneSolid = CG14DoneCopy; 898 pExa->PrepareCopy = CG14PrepareCopy; 899 pExa->Copy = CG14Copy; 900 pExa->DoneCopy = CG14DoneCopy; 901 if (p->use_xrender) { 902 pExa->CheckComposite = CG14CheckComposite; 903 pExa->PrepareComposite = CG14PrepareComposite; 904 pExa->Composite = CG14Composite; 905 pExa->DoneComposite = CG14DoneCopy; 906 } 907 908 /* EXA hits more optimized paths when it does not have to fallback 909 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 910 */ 911 pExa->UploadToScreen = CG14UploadToScreen; 912 pExa->DownloadFromScreen = CG14DownloadFromScreen; 913 914 /* do some hardware init */ 915 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 916 p->last_mask = 0xffffffff; 917 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 918 p->last_rop = 0xcc; 919 return exaDriverInit(pScreen, pExa); 920} 921