cg14_accel.c revision 4261fa58
1/* $NetBSD: cg14_accel.c,v 1.1 2013/06/19 13:26:01 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#include <sys/types.h> 33 34/* all driver need this */ 35#include "xf86.h" 36#include "xf86_OSproc.h" 37#include "compiler.h" 38 39#include "cg14.h" 40#include <sparc/sxreg.h> 41 42#define SX_SINGLE 43/*#define SX_DEBUG*/ 44/*#define SX_ADD_SOFTWARE*/ 45 46#ifdef SX_DEBUG 47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48#define DPRINTF xf86Msg 49#else 50#define ENTER 51#define DPRINTF while (0) xf86Msg 52#endif 53 54#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 55 56/* 0xcc is SX's GXcopy equivalent */ 57uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 58 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 59 60int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 61 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 62int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 63 64char c[8] = " .,:+*oX"; 65 66/* write an SX register */ 67static inline void 68write_sx_reg(Cg14Ptr p, int reg, uint32_t val) 69{ 70 *(volatile uint32_t *)(p->sxreg + reg) = val; 71} 72 73/* read an SX register */ 74static inline uint32_t 75read_sx_reg(Cg14Ptr p, int reg) 76{ 77 return *(volatile uint32_t *)(p->sxreg + reg); 78} 79 80/* write a memory referencing instruction */ 81static inline void 82write_sx_io(Cg14Ptr p, int reg, uint32_t val) 83{ 84 *(volatile uint32_t *)(p->sxio + reg) = val; 85} 86 87static inline void 88CG14Wait(Cg14Ptr p) 89{ 90 /* we just wait until the instruction queue is empty */ 91 while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {}; 92} 93 94static void 95CG14WaitMarker(ScreenPtr pScreen, int Marker) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 CG14Wait(p); 101} 102 103static Bool 104CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 105 int xdir, int ydir, int alu, Pixel planemask) 106{ 107 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 108 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 109 110 ENTER; 111 DPRINTF(X_ERROR, "bits per pixel: %d\n", 112 pSrcPixmap->drawable.bitsPerPixel); 113 114 if (planemask != p->last_mask) { 115 CG14Wait(p); 116 write_sx_reg(p, SX_PLANEMASK, planemask); 117 p->last_mask = planemask; 118 } 119 alu = sx_rop[alu]; 120 if (alu != p->last_rop) { 121 CG14Wait(p); 122 write_sx_reg(p, SX_ROP_CONTROL, alu); 123 p->last_rop = alu; 124 } 125 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 126 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 127 p->xdir = xdir; 128 p->ydir = ydir; 129 return TRUE; 130} 131 132static void 133CG14Copy(PixmapPtr pDstPixmap, 134 int srcX, int srcY, int dstX, int dstY, int w, int h) 135{ 136 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 137 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 138 int dstpitch, dstoff, srcpitch, srcoff; 139 int srcstart, dststart, xinc, srcinc, dstinc; 140 int line, count, s, d, num; 141 142 ENTER; 143 dstpitch = exaGetPixmapPitch(pDstPixmap); 144 dstoff = exaGetPixmapOffset(pDstPixmap); 145 srcpitch = p->srcpitch; 146 srcoff = p->srcoff; 147 /* 148 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 149 * actually wrote anything and only sync if it did 150 */ 151 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 152 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 153 154 /* 155 * we always copy up to 32 pixels at a time so direction doesn't 156 * matter if w<=32 157 */ 158 if (w > 32) { 159 if (p->xdir < 0) { 160 srcstart += (w - 32) << 2; 161 dststart += (w - 32) << 2; 162 xinc = -128; 163 } else 164 xinc = 128; 165 } else 166 xinc = 128; 167 if (p->ydir < 0) { 168 srcstart += (h - 1) * srcpitch; 169 dststart += (h - 1) * dstpitch; 170 srcinc = -srcpitch; 171 dstinc = -dstpitch; 172 } else { 173 srcinc = srcpitch; 174 dstinc = dstpitch; 175 } 176 if (p->last_rop == 0xcc) { 177 /* plain old copy */ 178 if ( xinc > 0) { 179 /* going left to right */ 180 for (line = 0; line < h; line++) { 181 count = 0; 182 s = srcstart; 183 d = dststart; 184 while ( count < w) { 185 num = min(32, w - count); 186 write_sx_io(p, s, 187 SX_LD(10, num - 1, s & 7)); 188 write_sx_io(p, d, 189 SX_STM(10, num - 1, d & 7)); 190 s += xinc; 191 d += xinc; 192 count += 32; 193 } 194 srcstart += srcinc; 195 dststart += dstinc; 196 } 197 } else { 198 /* going right to left */ 199 int i, chunks = (w >> 5); 200 for (line = 0; line < h; line++) { 201 s = srcstart; 202 d = dststart; 203 count = w; 204 for (i = 0; i < chunks; i++) { 205 write_sx_io(p, s, 206 SX_LD(10, 31, s & 7)); 207 write_sx_io(p, d, 208 SX_STM(10, 31, d & 7)); 209 s -= 128; 210 d -= 128; 211 count -= 32; 212 } 213 /* leftovers, if any */ 214 if (count > 0) { 215 s += (32 - count) << 2; 216 d += (32 - count) << 2; 217 write_sx_io(p, s, 218 SX_LD(10, count - 1, s & 7)); 219 write_sx_io(p, d, 220 SX_STM(10, count - 1, d & 7)); 221 } 222 srcstart += srcinc; 223 dststart += dstinc; 224 } 225 } 226 } else { 227 /* ROPs needed */ 228 if ( xinc > 0) { 229 /* going left to right */ 230 for (line = 0; line < h; line++) { 231 count = 0; 232 s = srcstart; 233 d = dststart; 234 while ( count < w) { 235 num = min(32, w - count); 236 write_sx_io(p, s, 237 SX_LD(10, num - 1, s & 7)); 238 write_sx_io(p, d, 239 SX_LD(42, num - 1, d & 7)); 240 if (num > 16) { 241 write_sx_reg(p, SX_INSTRUCTIONS, 242 SX_ROP(10, 42, 74, 15)); 243 write_sx_reg(p, SX_INSTRUCTIONS, 244 SX_ROP(26, 58, 90, num - 17)); 245 } else { 246 write_sx_reg(p, SX_INSTRUCTIONS, 247 SX_ROP(10, 42, 74, num - 1)); 248 } 249 write_sx_io(p, d, 250 SX_STM(74, num - 1, d & 7)); 251 s += xinc; 252 d += xinc; 253 count += 32; 254 } 255 srcstart += srcinc; 256 dststart += dstinc; 257 } 258 } else { 259 /* going right to left */ 260 int i, chunks = (w >> 5); 261 for (line = 0; line < h; line++) { 262 s = srcstart; 263 d = dststart; 264 count = w; 265 for (i = 0; i < chunks; i++) { 266 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 267 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 268 write_sx_reg(p, SX_INSTRUCTIONS, 269 SX_ROP(10, 42, 74, 15)); 270 write_sx_reg(p, SX_INSTRUCTIONS, 271 SX_ROP(26, 58, 90, 15)); 272 write_sx_io(p, d, 273 SX_STM(74, 31, d & 7)); 274 s -= 128; 275 d -= 128; 276 count -= 32; 277 } 278 /* leftovers, if any */ 279 if (count > 0) { 280 s += (32 - count) << 2; 281 d += (32 - count) << 2; 282 write_sx_io(p, s, 283 SX_LD(10, count - 1, s & 7)); 284 write_sx_io(p, d, 285 SX_LD(42, count - 1, d & 7)); 286 if (count > 16) { 287 write_sx_reg(p, SX_INSTRUCTIONS, 288 SX_ROP(10, 42, 74, 15)); 289 write_sx_reg(p, SX_INSTRUCTIONS, 290 SX_ROP(26, 58, 90, count - 17)); 291 } else { 292 write_sx_reg(p, SX_INSTRUCTIONS, 293 SX_ROP(10, 42, 74, count - 1)); 294 } 295 296 write_sx_io(p, d, 297 SX_STM(74, count - 1, d & 7)); 298 } 299 srcstart += srcinc; 300 dststart += dstinc; 301 } 302 } 303 } 304 exaMarkSync(pDstPixmap->drawable.pScreen); 305} 306 307static void 308CG14DoneCopy(PixmapPtr pDstPixmap) 309{ 310} 311 312static Bool 313CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 314{ 315 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 316 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 317 318 ENTER; 319 DPRINTF(X_ERROR, "bits per pixel: %d\n", pPixmap->drawable.bitsPerPixel); 320 write_sx_reg(p, SX_QUEUED(8), fg); 321 write_sx_reg(p, SX_QUEUED(9), fg); 322 if (planemask != p->last_mask) { 323 CG14Wait(p); 324 write_sx_reg(p, SX_PLANEMASK, planemask); 325 p->last_mask = planemask; 326 } 327 alu = sx_rop[alu]; 328 if (alu != p->last_rop) { 329 CG14Wait(p); 330 write_sx_reg(p, SX_ROP_CONTROL, alu); 331 p->last_rop = alu; 332 } 333 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 334 return TRUE; 335} 336 337static void 338CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 339{ 340 int line, x, num; 341 uint32_t ptr; 342 343 ENTER; 344 if (p->last_rop == 0xcc) { 345 /* simple fill */ 346 for (line = 0; line < h; line++) { 347 x = 0; 348 while (x < w) { 349 ptr = start + (x << 2); 350 num = min(32, w - x); 351 write_sx_io(p, ptr, 352 SX_STS(8, num - 1, ptr & 7)); 353 x += 32; 354 } 355 start += pitch; 356 } 357 } else if (p->last_rop == 0xaa) { 358 /* nothing to do here */ 359 return; 360 } else { 361 /* alright, let's do actual ROP stuff */ 362 363 /* first repeat the fill colour into 16 registers */ 364 write_sx_reg(p, SX_INSTRUCTIONS, 365 SX_SELECT_S(8, 8, 10, 15)); 366 367 for (line = 0; line < h; line++) { 368 x = 0; 369 while (x < w) { 370 ptr = start + (x << 2); 371 num = min(32, w - x); 372 /* now suck fb data into registers */ 373 write_sx_io(p, ptr, 374 SX_LD(42, num - 1, ptr & 7)); 375 /* 376 * ROP them with the fill data we left in 10 377 * non-memory ops can only have counts up to 16 378 */ 379 if (num <= 16) { 380 write_sx_reg(p, SX_INSTRUCTIONS, 381 SX_ROP(10, 42, 74, num - 1)); 382 } else { 383 write_sx_reg(p, SX_INSTRUCTIONS, 384 SX_ROP(10, 42, 74, 15)); 385 write_sx_reg(p, SX_INSTRUCTIONS, 386 SX_ROP(10, 58, 90, num - 17)); 387 } 388 /* and write the result back into memory */ 389 write_sx_io(p, ptr, 390 SX_ST(74, num - 1, ptr & 7)); 391 x += 32; 392 } 393 start += pitch; 394 } 395 } 396} 397 398static void 399CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 400{ 401 int line, x, num, off; 402 uint32_t ptr; 403 404 ENTER; 405 off = start & 7; 406 start &= ~7; 407 408 if (p->last_rop == 0xcc) { 409 /* simple fill */ 410 for (line = 0; line < h; line++) { 411 x = 0; 412 while (x < w) { 413 ptr = start + x; 414 num = min(32, w - x); 415 write_sx_io(p, ptr, 416 SX_STBS(8, num - 1, off)); 417 x += 32; 418 } 419 start += pitch; 420 } 421 } else if (p->last_rop == 0xaa) { 422 /* nothing to do here */ 423 return; 424 } else { 425 /* alright, let's do actual ROP stuff */ 426 427 /* first repeat the fill colour into 16 registers */ 428 write_sx_reg(p, SX_INSTRUCTIONS, 429 SX_SELECT_S(8, 8, 10, 15)); 430 431 for (line = 0; line < h; line++) { 432 x = 0; 433 while (x < w) { 434 ptr = start + x; 435 num = min(32, w - x); 436 /* now suck fb data into registers */ 437 write_sx_io(p, ptr, 438 SX_LDB(42, num - 1, off)); 439 /* 440 * ROP them with the fill data we left in 10 441 * non-memory ops can only have counts up to 16 442 */ 443 if (num <= 16) { 444 write_sx_reg(p, SX_INSTRUCTIONS, 445 SX_ROP(10, 42, 74, num - 1)); 446 } else { 447 write_sx_reg(p, SX_INSTRUCTIONS, 448 SX_ROP(10, 42, 74, 15)); 449 write_sx_reg(p, SX_INSTRUCTIONS, 450 SX_ROP(10, 58, 90, num - 17)); 451 } 452 /* and write the result back into memory */ 453 write_sx_io(p, ptr, 454 SX_STB(74, num - 1, off)); 455 x += 32; 456 } 457 start += pitch; 458 } 459 } 460} 461 462static void 463CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 464{ 465 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 466 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 467 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 468 int start, depth; 469 470 ENTER; 471 dstpitch = exaGetPixmapPitch(pPixmap); 472 dstoff = exaGetPixmapOffset(pPixmap); 473 474 depth = pPixmap->drawable.bitsPerPixel; 475 switch (depth) { 476 case 32: 477 start = dstoff + (y1 * dstpitch) + (x1 << 2); 478 CG14Solid32(p, start, dstpitch, w, h); 479 break; 480 case 8: 481 start = dstoff + (y1 * dstpitch) + x1; 482 CG14Solid8(p, start, dstpitch, w, h); 483 break; 484 } 485 486 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 487 dstpitch, dstoff, start); 488 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 489 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 490 exaMarkSync(pPixmap->drawable.pScreen); 491} 492 493/* 494 * Memcpy-based UTS. 495 */ 496static Bool 497CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 498 char *src, int src_pitch) 499{ 500 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 501 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 502 char *dst = p->fb + exaGetPixmapOffset(pDst); 503 int dst_pitch = exaGetPixmapPitch(pDst); 504 505 int bpp = pDst->drawable.bitsPerPixel; 506 int cpp = (bpp + 7) >> 3; 507 int wBytes = w * cpp; 508 509 ENTER; 510 dst += (x * cpp) + (y * dst_pitch); 511 512 CG14Wait(p); 513 514 while (h--) { 515 memcpy(dst, src, wBytes); 516 src += src_pitch; 517 dst += dst_pitch; 518 } 519 __asm("stbar;"); 520 return TRUE; 521} 522 523/* 524 * Memcpy-based DFS. 525 */ 526static Bool 527CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 528 char *dst, int dst_pitch) 529{ 530 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 531 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 532 char *src = p->fb + exaGetPixmapOffset(pSrc); 533 int src_pitch = exaGetPixmapPitch(pSrc); 534 535 ENTER; 536 int bpp = pSrc->drawable.bitsPerPixel; 537 int cpp = (bpp + 7) >> 3; 538 int wBytes = w * cpp; 539 540 src += (x * cpp) + (y * src_pitch); 541 542 CG14Wait(p); 543 544 while (h--) { 545 memcpy(dst, src, wBytes); 546 src += src_pitch; 547 dst += dst_pitch; 548 } 549 550 return TRUE; 551} 552 553Bool 554CG14CheckComposite(int op, PicturePtr pSrcPicture, 555 PicturePtr pMaskPicture, 556 PicturePtr pDstPicture) 557{ 558 int i, ok = FALSE; 559 560 ENTER; 561 562 /* 563 * SX is in theory capable of accelerating pretty much all Xrender ops, 564 * even coordinate transformation and gradients. Support will be added 565 * over time and likely have to spill over into its own source file. 566 */ 567 568 if ((op != PictOpOver) && (op != PictOpAdd)) { 569 xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op); 570 return FALSE; 571 } 572 i = 0; 573 while ((i < arraysize(src_formats)) && (!ok)) { 574 ok = (pSrcPicture->format == src_formats[i]); 575 i++; 576 } 577 578 if (!ok) { 579 xf86Msg(X_ERROR, "%s: unsupported src format %x\n", 580 __func__, pSrcPicture->format); 581 return FALSE; 582 } 583 584 DPRINTF(X_ERROR, "src is %x %d %d\n", pSrcPicture->format, 585 pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height); 586 587 if (pMaskPicture != NULL) { 588 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 589 pMaskPicture->pDrawable->width, 590 pMaskPicture->pDrawable->height); 591 } 592 return TRUE; 593} 594 595Bool 596CG14PrepareComposite(int op, PicturePtr pSrcPicture, 597 PicturePtr pMaskPicture, 598 PicturePtr pDstPicture, 599 PixmapPtr pSrc, 600 PixmapPtr pMask, 601 PixmapPtr pDst) 602{ 603 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 604 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 605 606 ENTER; 607 608 if (pSrcPicture->pSourcePict != NULL) { 609 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 610 p->fillcolour = 611 pSrcPicture->pSourcePict->solidFill.color; 612 DPRINTF(X_ERROR, "%s: solid src %08x\n", 613 __func__, p->fillcolour); 614 } 615 } 616 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 617 if (pMaskPicture->pSourcePict->type == 618 SourcePictTypeSolidFill) { 619 p->fillcolour = 620 pMaskPicture->pSourcePict->solidFill.color; 621 DPRINTF(X_ERROR, "%s: solid mask %08x\n", 622 __func__, p->fillcolour); 623 } 624 } 625 if (pMaskPicture != NULL) { 626 p->mskoff = exaGetPixmapOffset(pMask); 627 p->mskpitch = exaGetPixmapPitch(pMask); 628 p->mskformat = pMaskPicture->format; 629 } 630 p->srcoff = exaGetPixmapOffset(pSrc); 631 p->srcpitch = exaGetPixmapPitch(pSrc); 632 p->srcformat = pSrcPicture->format; 633 p->dstformat = pDstPicture->format; 634 p->op = op; 635#ifdef SX_DEBUG 636 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 637 *(uint32_t *)(p->fb + p->srcoff)); 638#endif 639 return TRUE; 640} 641 642void CG14Comp_Over32(Cg14Ptr p, 643 uint32_t src, uint32_t srcpitch, 644 uint32_t dst, uint32_t dstpitch, 645 int width, int height) 646{ 647 uint32_t msk = src, mskx, dstx, m; 648 int line, x, i; 649 650 ENTER; 651 /* first get the source colour */ 652 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 653 write_sx_reg(p, SX_QUEUED(8), 0xff); 654 for (line = 0; line < height; line++) { 655 mskx = msk; 656 dstx = dst; 657#ifdef SX_SINGLE 658 659 for (x = 0; x < width; x++) { 660 m = *(volatile uint32_t *)(p->fb + mskx); 661 m = m >> 24; 662 if (m == 0) { 663 /* nothing to do - all transparent */ 664 } else if (m == 0xff) { 665 /* all opaque */ 666 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 667 } else { 668 /* fetch alpha value, stick it into scam */ 669 /* mask is in R[12:15] */ 670 /*write_sx_io(p, mskx, 671 SX_LDUQ0(12, 0, mskx & 7));*/ 672 write_sx_reg(p, SX_QUEUED(12), m); 673 /* fetch dst pixel */ 674 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 675 write_sx_reg(p, SX_INSTRUCTIONS, 676 SX_ORV(12, 0, R_SCAM, 0)); 677 /* 678 * src * alpha + R0 679 * R[9:11] * SCAM + R0 -> R[17:19] 680 */ 681 write_sx_reg(p, SX_INSTRUCTIONS, 682 SX_SAXP16X16SR8(9, 0, 17, 2)); 683 684 /* invert SCAM */ 685 write_sx_reg(p, SX_INSTRUCTIONS, 686 SX_XORV(12, 8, R_SCAM, 0)); 687#ifdef SX_DEBUG 688 write_sx_reg(p, SX_INSTRUCTIONS, 689 SX_XORV(12, 8, 13, 0)); 690#endif 691 /* dst * (1 - alpha) + R[13:15] */ 692 write_sx_reg(p, SX_INSTRUCTIONS, 693 SX_SAXP16X16SR8(21, 17, 25, 2)); 694 write_sx_io(p, dstx, 695 SX_STUQ0C(24, 0, dstx & 7)); 696 } 697 dstx += 4; 698 mskx += 4; 699 } 700#else 701 for (x = 0; x < width; x += 4) { 702 /* fetch 4 mask values */ 703 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 704 /* fetch destination pixels */ 705 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 706 /* duplicate them for all channels */ 707 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 708 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 709 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 710 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 711 /* generate inverted alpha */ 712 write_sx_reg(p, SX_INSTRUCTIONS, 713 SX_XORS(12, 8, 28, 15)); 714 /* multiply source */ 715 write_sx_reg(p, SX_INSTRUCTIONS, 716 SX_MUL16X16SR8(8, 12, 44, 3)); 717 write_sx_reg(p, SX_INSTRUCTIONS, 718 SX_MUL16X16SR8(8, 16, 48, 3)); 719 write_sx_reg(p, SX_INSTRUCTIONS, 720 SX_MUL16X16SR8(8, 20, 52, 3)); 721 write_sx_reg(p, SX_INSTRUCTIONS, 722 SX_MUL16X16SR8(8, 24, 56, 3)); 723 /* multiply dest */ 724 write_sx_reg(p, SX_INSTRUCTIONS, 725 SX_MUL16X16SR8(28, 60, 76, 15)); 726 /* add up */ 727 write_sx_reg(p, SX_INSTRUCTIONS, 728 SX_ADDV(44, 76, 92, 15)); 729 /* write back */ 730 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 731 dstx += 16; 732 mskx += 16; 733 } 734#endif 735 dst += dstpitch; 736 msk += srcpitch; 737 } 738} 739 740void CG14Comp_Over8(Cg14Ptr p, 741 uint32_t src, uint32_t srcpitch, 742 uint32_t dst, uint32_t dstpitch, 743 int width, int height) 744{ 745 uint32_t msk = src, mskx, dstx, m; 746 int line, x, i; 747#ifdef SX_DEBUG 748 char buffer[256]; 749#endif 750 ENTER; 751 752 /* first get the source colour */ 753 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 754 write_sx_reg(p, SX_QUEUED(8), 0xff); 755 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 756 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 757 *(uint32_t *)(p->fb + p->srcoff)); 758 for (line = 0; line < height; line++) { 759 mskx = msk; 760 dstx = dst; 761#ifdef SX_SINGLE 762 763 for (x = 0; x < width; x++) { 764 m = *(volatile uint8_t *)(p->fb + mskx); 765#ifdef SX_DEBUG 766 buffer[x] = c[m >> 5]; 767#endif 768 if (m == 0) { 769 /* nothing to do - all transparent */ 770 } else if (m == 0xff) { 771 /* all opaque */ 772 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 773 } else { 774 /* fetch alpha value, stick it into scam */ 775 /* mask is in R[12:15] */ 776 /*write_sx_io(p, mskx & ~7, 777 SX_LDB(12, 0, mskx & 7));*/ 778 write_sx_reg(p, SX_QUEUED(12), m); 779 /* fetch dst pixel */ 780 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 781 write_sx_reg(p, SX_INSTRUCTIONS, 782 SX_ORV(12, 0, R_SCAM, 0)); 783 /* 784 * src * alpha + R0 785 * R[9:11] * SCAM + R0 -> R[17:19] 786 */ 787 write_sx_reg(p, SX_INSTRUCTIONS, 788 SX_SAXP16X16SR8(9, 0, 17, 2)); 789 790 /* invert SCAM */ 791 write_sx_reg(p, SX_INSTRUCTIONS, 792 SX_XORV(12, 8, R_SCAM, 0)); 793#ifdef SX_DEBUG 794 write_sx_reg(p, SX_INSTRUCTIONS, 795 SX_XORV(12, 8, 13, 0)); 796#endif 797 /* dst * (1 - alpha) + R[13:15] */ 798 write_sx_reg(p, SX_INSTRUCTIONS, 799 SX_SAXP16X16SR8(21, 17, 25, 2)); 800 write_sx_io(p, dstx, 801 SX_STUQ0C(24, 0, dstx & 7)); 802 } 803 dstx += 4; 804 mskx += 1; 805 } 806#ifdef SX_DEBUG 807 buffer[x] = 0; 808 xf86Msg(X_ERROR, "%s\n", buffer); 809#endif 810#else 811 for (x = 0; x < width; x += 4) { 812 /* fetch 4 mask values */ 813 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 814 /* fetch destination pixels */ 815 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 816 /* duplicate them for all channels */ 817 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 818 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 819 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 820 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 821 /* generate inverted alpha */ 822 write_sx_reg(p, SX_INSTRUCTIONS, 823 SX_XORS(12, 8, 28, 15)); 824 /* multiply source */ 825 write_sx_reg(p, SX_INSTRUCTIONS, 826 SX_MUL16X16SR8(8, 12, 44, 3)); 827 write_sx_reg(p, SX_INSTRUCTIONS, 828 SX_MUL16X16SR8(8, 16, 48, 3)); 829 write_sx_reg(p, SX_INSTRUCTIONS, 830 SX_MUL16X16SR8(8, 20, 52, 3)); 831 write_sx_reg(p, SX_INSTRUCTIONS, 832 SX_MUL16X16SR8(8, 24, 56, 3)); 833 /* multiply dest */ 834 write_sx_reg(p, SX_INSTRUCTIONS, 835 SX_MUL16X16SR8(28, 60, 76, 15)); 836 /* add up */ 837 write_sx_reg(p, SX_INSTRUCTIONS, 838 SX_ADDV(44, 76, 92, 15)); 839 /* write back */ 840 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 841 dstx += 16; 842 mskx += 4; 843 } 844#endif 845 dst += dstpitch; 846 msk += srcpitch; 847 } 848} 849 850void CG14Comp_Add32(Cg14Ptr p, 851 uint32_t src, uint32_t srcpitch, 852 uint32_t dst, uint32_t dstpitch, 853 int width, int height) 854{ 855 int line; 856 uint32_t srcx, dstx; 857 int full, part, x; 858 859 ENTER; 860 full = width >> 3; /* chunks of 8 */ 861 part = width & 7; /* leftovers */ 862 /* we do this up to 8 pixels at a time */ 863 for (line = 0; line < height; line++) { 864 srcx = src; 865 dstx = dst; 866 for (x = 0; x < full; x++) { 867 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 868 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 869 write_sx_reg(p, SX_INSTRUCTIONS, 870 SX_ADDV(8, 40, 72, 15)); 871 write_sx_reg(p, SX_INSTRUCTIONS, 872 SX_ADDV(24, 56, 88, 15)); 873 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 874 srcx += 128; 875 dstx += 128; 876 } 877 878 /* do leftovers */ 879 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 880 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 881 if (part & 16) { 882 write_sx_reg(p, SX_INSTRUCTIONS, 883 SX_ADDV(8, 40, 72, 15)); 884 write_sx_reg(p, SX_INSTRUCTIONS, 885 SX_ADDV(24, 56, 88, part - 17)); 886 } else { 887 write_sx_reg(p, SX_INSTRUCTIONS, 888 SX_ADDV(8, 40, 72, part - 1)); 889 } 890 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 891 892 /* next line */ 893 src += srcpitch; 894 dst += dstpitch; 895 } 896} 897 898void CG14Comp_Add8(Cg14Ptr p, 899 uint32_t src, uint32_t srcpitch, 900 uint32_t dst, uint32_t dstpitch, 901 int width, int height) 902{ 903 int line; 904 uint32_t srcx, dstx, srcoff, dstoff; 905 int pre, full, part, x; 906 uint8_t *d; 907 char buffer[256]; 908 ENTER; 909 910 srcoff = src & 7; 911 src &= ~7; 912 dstoff = dst & 7; 913 dst &= ~7; 914 full = width >> 5; /* chunks of 32 */ 915 part = width & 31; /* leftovers */ 916 917#ifdef SX_DEBUG 918 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 919 width, height, full, part); 920#endif 921 /* we do this up to 32 pixels at a time */ 922 for (line = 0; line < height; line++) { 923 srcx = src; 924 dstx = dst; 925#ifdef SX_ADD_SOFTWARE 926 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 927 d = (uint8_t *)(p->fb + dstx + dstoff); 928 for (x = 0; x < width; x++) { 929 d[x] = min(255, s[x] + d[x]); 930 } 931#else 932 for (x = 0; x < full; x++) { 933 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 934 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 935 write_sx_reg(p, SX_INSTRUCTIONS, 936 SX_ADDV(8, 40, 72, 15)); 937 write_sx_reg(p, SX_INSTRUCTIONS, 938 SX_ADDV(24, 56, 88, 15)); 939 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 940 srcx += 32; 941 dstx += 32; 942 } 943 944 if (part > 0) { 945 /* do leftovers */ 946 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 947 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 948 if (part > 16) { 949 write_sx_reg(p, SX_INSTRUCTIONS, 950 SX_ADDV(8, 40, 72, 15)); 951 write_sx_reg(p, SX_INSTRUCTIONS, 952 SX_ADDV(24, 56, 88, part - 17)); 953 } else { 954 write_sx_reg(p, SX_INSTRUCTIONS, 955 SX_ADDV(8, 40, 72, part - 1)); 956 } 957 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 958 } 959#endif 960#ifdef SX_DEBUG 961 d = (uint8_t *)(p->fb + src + srcoff); 962 for (x = 0; x < width; x++) { 963 buffer[x] = c[d[x]>>5]; 964 } 965 buffer[x] = 0; 966 xf86Msg(X_ERROR, "%s\n", buffer); 967#endif 968 /* next line */ 969 src += srcpitch; 970 dst += dstpitch; 971 } 972} 973 974void 975CG14Composite(PixmapPtr pDst, int srcX, int srcY, 976 int maskX, int maskY, 977 int dstX, int dstY, 978 int width, int height) 979{ 980 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 981 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 982 uint32_t dstoff, dstpitch; 983 uint32_t dst, msk, src; 984 985 ENTER; 986 dstoff = exaGetPixmapOffset(pDst); 987 dstpitch = exaGetPixmapPitch(pDst); 988 989 switch (p->op) { 990 case PictOpOver: 991 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 992 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 993 p->mskformat, p->dstformat, srcX, srcY); 994 switch (p->mskformat) { 995 case PICT_a8: 996 msk = p->mskoff + 997 (maskY * p->mskpitch) + maskX; 998 CG14Comp_Over8(p, msk, p->mskpitch, 999 dst, dstpitch, width, height); 1000 break; 1001 case PICT_a8r8g8b8: 1002 case PICT_a8b8g8r8: 1003 msk = p->mskoff + 1004 (maskY * p->mskpitch) + 1005 (maskX << 2); 1006 CG14Comp_Over32(p, msk, p->mskpitch, 1007 dst, dstpitch, width, height); 1008 break; 1009 default: 1010 xf86Msg(X_ERROR, 1011 "unsupported mask format\n"); 1012 } 1013 break; 1014 case PictOpAdd: 1015 DPRINTF(X_ERROR, "Add %08x %08x\n", 1016 p->srcformat, p->dstformat); 1017 switch (p->srcformat) { 1018 case PICT_a8: 1019 src = p->srcoff + 1020 (srcY * p->srcpitch) + srcX; 1021 dst = dstoff + (dstY * dstpitch) + dstX; 1022 CG14Comp_Add8(p, src, p->srcpitch, 1023 dst, dstpitch, width, height); 1024 break; 1025 case PICT_a8r8g8b8: 1026 case PICT_x8r8g8b8: 1027 src = p->srcoff + 1028 (srcY * p->srcpitch) + (srcX << 2); 1029 dst = dstoff + (dstY * dstpitch) + 1030 (dstX << 2); 1031 CG14Comp_Add32(p, src, p->srcpitch, 1032 dst, dstpitch, width, height); 1033 break; 1034 default: 1035 xf86Msg(X_ERROR, 1036 "unsupported src format\n"); 1037 } 1038 break; 1039 default: 1040 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1041 } 1042 exaMarkSync(pDst->drawable.pScreen); 1043} 1044 1045 1046 1047Bool 1048CG14InitAccel(ScreenPtr pScreen) 1049{ 1050 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1051 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1052 ExaDriverPtr pExa; 1053 1054 pExa = exaDriverAlloc(); 1055 if (!pExa) 1056 return FALSE; 1057 1058 p->pExa = pExa; 1059 1060 pExa->exa_major = EXA_VERSION_MAJOR; 1061 pExa->exa_minor = EXA_VERSION_MINOR; 1062 1063 pExa->memoryBase = p->fb; 1064 pExa->memorySize = p->memsize; 1065 pExa->offScreenBase = p->width * p->height * 4; 1066 1067 /* 1068 * SX memory instructions are written to 64bit aligned addresses with 1069 * a 3 bit displacement. Make sure the displacement remains constant 1070 * within one column 1071 */ 1072 1073 pExa->pixmapOffsetAlign = 8; 1074 pExa->pixmapPitchAlign = 8; 1075 1076 pExa->flags = EXA_OFFSCREEN_PIXMAPS | 1077 /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/ 1078 EXA_MIXED_PIXMAPS; 1079 1080 /* 1081 * these limits are bogus 1082 * SX doesn't deal with coordinates at all, so there is no limit but 1083 * we have to put something here 1084 */ 1085 pExa->maxX = 4096; 1086 pExa->maxY = 4096; 1087 1088 pExa->WaitMarker = CG14WaitMarker; 1089 1090 pExa->PrepareSolid = CG14PrepareSolid; 1091 pExa->Solid = CG14Solid; 1092 pExa->DoneSolid = CG14DoneCopy; 1093 pExa->PrepareCopy = CG14PrepareCopy; 1094 pExa->Copy = CG14Copy; 1095 pExa->DoneCopy = CG14DoneCopy; 1096 if (p->use_xrender) { 1097 pExa->CheckComposite = CG14CheckComposite; 1098 pExa->PrepareComposite = CG14PrepareComposite; 1099 pExa->Composite = CG14Composite; 1100 pExa->DoneComposite = CG14DoneCopy; 1101 } 1102 1103 /* EXA hits more optimized paths when it does not have to fallback 1104 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1105 */ 1106 pExa->UploadToScreen = CG14UploadToScreen; 1107 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1108 1109 /* do some hardware init */ 1110 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1111 p->last_mask = 0xffffffff; 1112 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1113 p->last_rop = 0xcc; 1114 return exaDriverInit(pScreen, pExa); 1115} 1116