cg14_accel.c revision c88c16f8
1/* $NetBSD: cg14_accel.c,v 1.8 2016/09/16 21:16:37 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44#include <sparc/sxreg.h> 45 46#define SX_SINGLE 47/*#define SX_DEBUG*/ 48/*#define SX_ADD_SOFTWARE*/ 49 50#ifdef SX_DEBUG 51#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52#define DPRINTF xf86Msg 53#else 54#define ENTER 55#define DPRINTF while (0) xf86Msg 56#endif 57 58#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 59 60/* 0xcc is SX's GXcopy equivalent */ 61uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 62 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 63 64int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 65 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 66int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 /* we just wait until the instruction queue is empty */ 72 while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {}; 73} 74 75static void 76CG14WaitMarker(ScreenPtr pScreen, int Marker) 77{ 78 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 79 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 80 81 CG14Wait(p); 82} 83 84static Bool 85CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 86 int xdir, int ydir, int alu, Pixel planemask) 87{ 88 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 89 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 90 91 ENTER; 92 DPRINTF(X_ERROR, "bits per pixel: %d\n", 93 pSrcPixmap->drawable.bitsPerPixel); 94 95 if (planemask != p->last_mask) { 96 CG14Wait(p); 97 write_sx_reg(p, SX_PLANEMASK, planemask); 98 p->last_mask = planemask; 99 } 100 alu = sx_rop[alu]; 101 if (alu != p->last_rop) { 102 CG14Wait(p); 103 write_sx_reg(p, SX_ROP_CONTROL, alu); 104 p->last_rop = alu; 105 } 106 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 107 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 108 p->xdir = xdir; 109 p->ydir = ydir; 110 return TRUE; 111} 112 113static void 114CG14Copy(PixmapPtr pDstPixmap, 115 int srcX, int srcY, int dstX, int dstY, int w, int h) 116{ 117 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 118 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 119 int dstpitch, dstoff, srcpitch, srcoff; 120 int srcstart, dststart, xinc, srcinc, dstinc; 121 int line, count, s, d, num; 122 123 ENTER; 124 dstpitch = exaGetPixmapPitch(pDstPixmap); 125 dstoff = exaGetPixmapOffset(pDstPixmap); 126 srcpitch = p->srcpitch; 127 srcoff = p->srcoff; 128 /* 129 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 130 * actually wrote anything and only sync if it did 131 */ 132 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 133 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 134 135 /* 136 * we always copy up to 32 pixels at a time so direction doesn't 137 * matter if w<=32 138 */ 139 if (w > 32) { 140 if (p->xdir < 0) { 141 srcstart += (w - 32) << 2; 142 dststart += (w - 32) << 2; 143 xinc = -128; 144 } else 145 xinc = 128; 146 } else 147 xinc = 128; 148 if (p->ydir < 0) { 149 srcstart += (h - 1) * srcpitch; 150 dststart += (h - 1) * dstpitch; 151 srcinc = -srcpitch; 152 dstinc = -dstpitch; 153 } else { 154 srcinc = srcpitch; 155 dstinc = dstpitch; 156 } 157 if (p->last_rop == 0xcc) { 158 /* plain old copy */ 159 if ( xinc > 0) { 160 /* going left to right */ 161 for (line = 0; line < h; line++) { 162 count = 0; 163 s = srcstart; 164 d = dststart; 165 while ( count < w) { 166 num = min(32, w - count); 167 write_sx_io(p, s, 168 SX_LD(10, num - 1, s & 7)); 169 write_sx_io(p, d, 170 SX_STM(10, num - 1, d & 7)); 171 s += xinc; 172 d += xinc; 173 count += 32; 174 } 175 srcstart += srcinc; 176 dststart += dstinc; 177 } 178 } else { 179 /* going right to left */ 180 int i, chunks = (w >> 5); 181 for (line = 0; line < h; line++) { 182 s = srcstart; 183 d = dststart; 184 count = w; 185 for (i = 0; i < chunks; i++) { 186 write_sx_io(p, s, 187 SX_LD(10, 31, s & 7)); 188 write_sx_io(p, d, 189 SX_STM(10, 31, d & 7)); 190 s -= 128; 191 d -= 128; 192 count -= 32; 193 } 194 /* leftovers, if any */ 195 if (count > 0) { 196 s += (32 - count) << 2; 197 d += (32 - count) << 2; 198 write_sx_io(p, s, 199 SX_LD(10, count - 1, s & 7)); 200 write_sx_io(p, d, 201 SX_STM(10, count - 1, d & 7)); 202 } 203 srcstart += srcinc; 204 dststart += dstinc; 205 } 206 } 207 } else { 208 /* ROPs needed */ 209 if ( xinc > 0) { 210 /* going left to right */ 211 for (line = 0; line < h; line++) { 212 count = 0; 213 s = srcstart; 214 d = dststart; 215 while ( count < w) { 216 num = min(32, w - count); 217 write_sx_io(p, s, 218 SX_LD(10, num - 1, s & 7)); 219 write_sx_io(p, d, 220 SX_LD(42, num - 1, d & 7)); 221 if (num > 16) { 222 write_sx_reg(p, SX_INSTRUCTIONS, 223 SX_ROP(10, 42, 74, 15)); 224 write_sx_reg(p, SX_INSTRUCTIONS, 225 SX_ROP(26, 58, 90, num - 17)); 226 } else { 227 write_sx_reg(p, SX_INSTRUCTIONS, 228 SX_ROP(10, 42, 74, num - 1)); 229 } 230 write_sx_io(p, d, 231 SX_STM(74, num - 1, d & 7)); 232 s += xinc; 233 d += xinc; 234 count += 32; 235 } 236 srcstart += srcinc; 237 dststart += dstinc; 238 } 239 } else { 240 /* going right to left */ 241 int i, chunks = (w >> 5); 242 for (line = 0; line < h; line++) { 243 s = srcstart; 244 d = dststart; 245 count = w; 246 for (i = 0; i < chunks; i++) { 247 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 248 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 249 write_sx_reg(p, SX_INSTRUCTIONS, 250 SX_ROP(10, 42, 74, 15)); 251 write_sx_reg(p, SX_INSTRUCTIONS, 252 SX_ROP(26, 58, 90, 15)); 253 write_sx_io(p, d, 254 SX_STM(74, 31, d & 7)); 255 s -= 128; 256 d -= 128; 257 count -= 32; 258 } 259 /* leftovers, if any */ 260 if (count > 0) { 261 s += (32 - count) << 2; 262 d += (32 - count) << 2; 263 write_sx_io(p, s, 264 SX_LD(10, count - 1, s & 7)); 265 write_sx_io(p, d, 266 SX_LD(42, count - 1, d & 7)); 267 if (count > 16) { 268 write_sx_reg(p, SX_INSTRUCTIONS, 269 SX_ROP(10, 42, 74, 15)); 270 write_sx_reg(p, SX_INSTRUCTIONS, 271 SX_ROP(26, 58, 90, count - 17)); 272 } else { 273 write_sx_reg(p, SX_INSTRUCTIONS, 274 SX_ROP(10, 42, 74, count - 1)); 275 } 276 277 write_sx_io(p, d, 278 SX_STM(74, count - 1, d & 7)); 279 } 280 srcstart += srcinc; 281 dststart += dstinc; 282 } 283 } 284 } 285 exaMarkSync(pDstPixmap->drawable.pScreen); 286} 287 288static void 289CG14DoneCopy(PixmapPtr pDstPixmap) 290{ 291} 292 293static Bool 294CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 295{ 296 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 297 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 298 299 ENTER; 300 DPRINTF(X_ERROR, "bits per pixel: %d\n", 301 pPixmap->drawable.bitsPerPixel); 302 write_sx_reg(p, SX_QUEUED(8), fg); 303 write_sx_reg(p, SX_QUEUED(9), fg); 304 if (planemask != p->last_mask) { 305 CG14Wait(p); 306 write_sx_reg(p, SX_PLANEMASK, planemask); 307 p->last_mask = planemask; 308 } 309 alu = sx_rop[alu]; 310 if (alu != p->last_rop) { 311 CG14Wait(p); 312 write_sx_reg(p, SX_ROP_CONTROL, alu); 313 p->last_rop = alu; 314 } 315 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 316 return TRUE; 317} 318 319static void 320CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 321{ 322 int line, x, num; 323 uint32_t ptr; 324 325 ENTER; 326 if (p->last_rop == 0xcc) { 327 /* simple fill */ 328 for (line = 0; line < h; line++) { 329 x = 0; 330 while (x < w) { 331 ptr = start + (x << 2); 332 num = min(32, w - x); 333 write_sx_io(p, ptr, 334 SX_STS(8, num - 1, ptr & 7)); 335 x += 32; 336 } 337 start += pitch; 338 } 339 } else if (p->last_rop == 0xaa) { 340 /* nothing to do here */ 341 return; 342 } else { 343 /* alright, let's do actual ROP stuff */ 344 345 /* first repeat the fill colour into 16 registers */ 346 write_sx_reg(p, SX_INSTRUCTIONS, 347 SX_SELECT_S(8, 8, 10, 15)); 348 349 for (line = 0; line < h; line++) { 350 x = 0; 351 while (x < w) { 352 ptr = start + (x << 2); 353 num = min(32, w - x); 354 /* now suck fb data into registers */ 355 write_sx_io(p, ptr, 356 SX_LD(42, num - 1, ptr & 7)); 357 /* 358 * ROP them with the fill data we left in 10 359 * non-memory ops can only have counts up to 16 360 */ 361 if (num <= 16) { 362 write_sx_reg(p, SX_INSTRUCTIONS, 363 SX_ROP(10, 42, 74, num - 1)); 364 } else { 365 write_sx_reg(p, SX_INSTRUCTIONS, 366 SX_ROP(10, 42, 74, 15)); 367 write_sx_reg(p, SX_INSTRUCTIONS, 368 SX_ROP(10, 58, 90, num - 17)); 369 } 370 /* and write the result back into memory */ 371 write_sx_io(p, ptr, 372 SX_ST(74, num - 1, ptr & 7)); 373 x += 32; 374 } 375 start += pitch; 376 } 377 } 378} 379 380static void 381CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 382{ 383 int line, x, num, off; 384 uint32_t ptr; 385 386 ENTER; 387 off = start & 7; 388 start &= ~7; 389 390 if (p->last_rop == 0xcc) { 391 /* simple fill */ 392 for (line = 0; line < h; line++) { 393 x = 0; 394 while (x < w) { 395 ptr = start + x; 396 num = min(32, w - x); 397 write_sx_io(p, ptr, 398 SX_STBS(8, num - 1, off)); 399 x += 32; 400 } 401 start += pitch; 402 } 403 } else if (p->last_rop == 0xaa) { 404 /* nothing to do here */ 405 return; 406 } else { 407 /* alright, let's do actual ROP stuff */ 408 409 /* first repeat the fill colour into 16 registers */ 410 write_sx_reg(p, SX_INSTRUCTIONS, 411 SX_SELECT_S(8, 8, 10, 15)); 412 413 for (line = 0; line < h; line++) { 414 x = 0; 415 while (x < w) { 416 ptr = start + x; 417 num = min(32, w - x); 418 /* now suck fb data into registers */ 419 write_sx_io(p, ptr, 420 SX_LDB(42, num - 1, off)); 421 /* 422 * ROP them with the fill data we left in 10 423 * non-memory ops can only have counts up to 16 424 */ 425 if (num <= 16) { 426 write_sx_reg(p, SX_INSTRUCTIONS, 427 SX_ROP(10, 42, 74, num - 1)); 428 } else { 429 write_sx_reg(p, SX_INSTRUCTIONS, 430 SX_ROP(10, 42, 74, 15)); 431 write_sx_reg(p, SX_INSTRUCTIONS, 432 SX_ROP(10, 58, 90, num - 17)); 433 } 434 /* and write the result back into memory */ 435 write_sx_io(p, ptr, 436 SX_STB(74, num - 1, off)); 437 x += 32; 438 } 439 start += pitch; 440 } 441 } 442} 443 444static void 445CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 446{ 447 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 448 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 449 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 450 int start, depth; 451 452 ENTER; 453 dstpitch = exaGetPixmapPitch(pPixmap); 454 dstoff = exaGetPixmapOffset(pPixmap); 455 456 depth = pPixmap->drawable.bitsPerPixel; 457 switch (depth) { 458 case 32: 459 start = dstoff + (y1 * dstpitch) + (x1 << 2); 460 CG14Solid32(p, start, dstpitch, w, h); 461 break; 462 case 8: 463 start = dstoff + (y1 * dstpitch) + x1; 464 CG14Solid8(p, start, dstpitch, w, h); 465 break; 466 } 467 468 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 469 dstpitch, dstoff, start); 470 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 471 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 472 exaMarkSync(pPixmap->drawable.pScreen); 473} 474 475/* 476 * Memcpy-based UTS. 477 */ 478static Bool 479CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 480 char *src, int src_pitch) 481{ 482 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 483 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 484 char *dst = p->fb + exaGetPixmapOffset(pDst); 485 int dst_pitch = exaGetPixmapPitch(pDst); 486 487 int bpp = pDst->drawable.bitsPerPixel; 488 int cpp = (bpp + 7) >> 3; 489 int wBytes = w * cpp; 490 491 ENTER; 492 dst += (x * cpp) + (y * dst_pitch); 493 494 CG14Wait(p); 495 496 while (h--) { 497 memcpy(dst, src, wBytes); 498 src += src_pitch; 499 dst += dst_pitch; 500 } 501 __asm("stbar;"); 502 return TRUE; 503} 504 505/* 506 * Memcpy-based DFS. 507 */ 508static Bool 509CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 510 char *dst, int dst_pitch) 511{ 512 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 513 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 514 char *src = p->fb + exaGetPixmapOffset(pSrc); 515 int src_pitch = exaGetPixmapPitch(pSrc); 516 517 ENTER; 518 int bpp = pSrc->drawable.bitsPerPixel; 519 int cpp = (bpp + 7) >> 3; 520 int wBytes = w * cpp; 521 522 src += (x * cpp) + (y * src_pitch); 523 524 CG14Wait(p); 525 526 while (h--) { 527 memcpy(dst, src, wBytes); 528 src += src_pitch; 529 dst += dst_pitch; 530 } 531 532 return TRUE; 533} 534 535Bool 536CG14CheckComposite(int op, PicturePtr pSrcPicture, 537 PicturePtr pMaskPicture, 538 PicturePtr pDstPicture) 539{ 540 int i, ok = FALSE; 541 542 ENTER; 543 544 /* 545 * SX is in theory capable of accelerating pretty much all Xrender ops, 546 * even coordinate transformation and gradients. Support will be added 547 * over time and likely have to spill over into its own source file. 548 */ 549 550 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 551 xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op); 552 return FALSE; 553 } 554 i = 0; 555 while ((i < arraysize(src_formats)) && (!ok)) { 556 ok = (pSrcPicture->format == src_formats[i]); 557 i++; 558 } 559 560 if (!ok) { 561 xf86Msg(X_ERROR, "%s: unsupported src format %x\n", 562 __func__, pSrcPicture->format); 563 return FALSE; 564 } 565 566 DPRINTF(X_ERROR, "src is %x, %d: %d %d\n", pSrcPicture->format, op, 567 pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height); 568 569 if (pMaskPicture != NULL) { 570 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 571 pMaskPicture->pDrawable->width, 572 pMaskPicture->pDrawable->height); 573 } 574 return TRUE; 575} 576 577Bool 578CG14PrepareComposite(int op, PicturePtr pSrcPicture, 579 PicturePtr pMaskPicture, 580 PicturePtr pDstPicture, 581 PixmapPtr pSrc, 582 PixmapPtr pMask, 583 PixmapPtr pDst) 584{ 585 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 586 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 587 588 ENTER; 589 590 p->no_source_pixmap = FALSE; 591 p->source_is_solid = FALSE; 592 593 if (pSrcPicture->format == PICT_a1) { 594 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 595 pDstPicture->format, op); 596 if (pMaskPicture != NULL) { 597 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 598 } 599 } 600 if (pSrcPicture->pSourcePict != NULL) { 601 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 602 p->fillcolour = 603 pSrcPicture->pSourcePict->solidFill.color; 604 DPRINTF(X_ERROR, "%s: solid src %08x\n", 605 __func__, p->fillcolour); 606 p->no_source_pixmap = TRUE; 607 p->source_is_solid = TRUE; 608 } 609 } 610 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 611 if (pMaskPicture->pSourcePict->type == 612 SourcePictTypeSolidFill) { 613 p->fillcolour = 614 pMaskPicture->pSourcePict->solidFill.color; 615 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 616 __func__, p->fillcolour); 617 } 618 } 619 if (pMaskPicture != NULL) { 620 p->mskoff = exaGetPixmapOffset(pMask); 621 p->mskpitch = exaGetPixmapPitch(pMask); 622 p->mskformat = pMaskPicture->format; 623 } else { 624 p->mskoff = 0; 625 p->mskpitch = 0; 626 p->mskformat = 0; 627 } 628 if (pSrc != NULL) { 629 p->source_is_solid = 630 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 631 p->srcoff = exaGetPixmapOffset(pSrc); 632 p->srcpitch = exaGetPixmapPitch(pSrc); 633 if (p->source_is_solid) { 634 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 635 } 636 } 637 p->srcformat = pSrcPicture->format; 638 p->dstformat = pDstPicture->format; 639 640 if (p->source_is_solid) { 641 uint32_t temp; 642 643 /* stuff source colour into SX registers, swap as needed */ 644 temp = p->fillcolour; 645 switch (p->srcformat) { 646 case PICT_a8r8g8b8: 647 case PICT_x8r8g8b8: 648 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 649 temp = temp >> 8; 650 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 651 temp = temp >> 8; 652 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 653 break; 654 case PICT_a8b8g8r8: 655 case PICT_x8b8g8r8: 656 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 657 temp = temp >> 8; 658 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 659 temp = temp >> 8; 660 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 661 break; 662 } 663 write_sx_reg(p, SX_QUEUED(8), 0xff); 664 } 665 p->op = op; 666 if (op == PictOpSrc) { 667 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 668 } 669#ifdef SX_DEBUG 670 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 671 *(uint32_t *)(p->fb + p->srcoff)); 672#endif 673 return TRUE; 674} 675 676void 677CG14Composite(PixmapPtr pDst, int srcX, int srcY, 678 int maskX, int maskY, 679 int dstX, int dstY, 680 int width, int height) 681{ 682 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 683 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 684 uint32_t dstoff, dstpitch; 685 uint32_t dst, msk, src; 686 687 ENTER; 688 dstoff = exaGetPixmapOffset(pDst); 689 dstpitch = exaGetPixmapPitch(pDst); 690 691 switch (p->op) { 692 case PictOpOver: 693 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 694 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 695 p->mskformat, p->dstformat, srcX, srcY); 696 if (p->source_is_solid) { 697 switch (p->mskformat) { 698 case PICT_a8: 699 msk = p->mskoff + 700 (maskY * p->mskpitch) + 701 maskX; 702 CG14Comp_Over8Solid(p, 703 msk, p->mskpitch, 704 dst, dstpitch, 705 width, height); 706 break; 707 case PICT_a8r8g8b8: 708 case PICT_a8b8g8r8: 709 msk = p->mskoff + 710 (maskY * p->mskpitch) + 711 (maskX << 2); 712 CG14Comp_Over32Solid(p, 713 msk, p->mskpitch, 714 dst, dstpitch, 715 width, height); 716 break; 717 default: 718 xf86Msg(X_ERROR, 719 "unsupported mask format\n"); 720 } 721 } else { 722 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 723 p->mskformat); 724 switch (p->srcformat) { 725 case PICT_a8r8g8b8: 726 case PICT_a8b8g8r8: 727 src = p->srcoff + 728 (srcY * p->srcpitch) + 729 (srcX << 2); 730 dst = dstoff + 731 (dstY * dstpitch) + 732 (dstX << 2); 733 if (p->mskformat == PICT_a8) { 734 msk = p->mskoff + 735 (maskY * p->mskpitch) + 736 maskX; 737 CG14Comp_Over32Mask(p, 738 src, p->srcpitch, 739 msk, p->mskpitch, 740 dst, dstpitch, 741 width, height); 742 } else { 743 CG14Comp_Over32(p, 744 src, p->srcpitch, 745 dst, dstpitch, 746 width, height); 747 } 748 break; 749 case PICT_x8r8g8b8: 750 case PICT_x8b8g8r8: 751 src = p->srcoff + 752 (srcY * p->srcpitch) + 753 (srcX << 2); 754 dst = dstoff + 755 (dstY * dstpitch) + 756 (dstX << 2); 757 if (p->mskformat == PICT_a8) { 758 msk = p->mskoff + 759 (maskY * p->mskpitch) + 760 maskX; 761 CG14Comp_Over32Mask_noalpha(p, 762 src, p->srcpitch, 763 msk, p->mskpitch, 764 dst, dstpitch, 765 width, height); 766 } else if ((p->mskformat == PICT_a8r8g8b8) || 767 (p->mskformat == PICT_a8b8g8r8)) { 768 msk = p->mskoff + 769 (maskY * p->mskpitch) + 770 (maskX << 2); 771 CG14Comp_Over32Mask32_noalpha(p, 772 src, p->srcpitch, 773 msk, p->mskpitch, 774 dst, dstpitch, 775 width, height); 776 } else { 777 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 778 } 779 break; 780 default: 781 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 782 __func__, p->srcformat); 783 } 784 } 785 break; 786 case PictOpAdd: 787 DPRINTF(X_ERROR, "Add %08x %08x\n", 788 p->srcformat, p->dstformat); 789 switch (p->srcformat) { 790 case PICT_a8: 791 src = p->srcoff + 792 (srcY * p->srcpitch) + srcX; 793 dst = dstoff + (dstY * dstpitch) + dstX; 794 CG14Comp_Add8(p, src, p->srcpitch, 795 dst, dstpitch, width, height); 796 break; 797 case PICT_a8r8g8b8: 798 case PICT_x8r8g8b8: 799 src = p->srcoff + 800 (srcY * p->srcpitch) + (srcX << 2); 801 dst = dstoff + (dstY * dstpitch) + 802 (dstX << 2); 803 CG14Comp_Add32(p, src, p->srcpitch, 804 dst, dstpitch, width, height); 805 break; 806 default: 807 xf86Msg(X_ERROR, 808 "unsupported src format\n"); 809 } 810 break; 811 case PictOpSrc: 812 DPRINTF(X_ERROR, "Src %08x %08x\n", 813 p->srcformat, p->dstformat); 814 if (p->mskformat != 0) 815 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 816 CG14Copy(pDst, srcX, srcY, dstX, dstY, width, height); 817 break; 818 default: 819 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 820 } 821 exaMarkSync(pDst->drawable.pScreen); 822} 823 824 825 826Bool 827CG14InitAccel(ScreenPtr pScreen) 828{ 829 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 830 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 831 ExaDriverPtr pExa; 832 833 pExa = exaDriverAlloc(); 834 if (!pExa) 835 return FALSE; 836 837 p->pExa = pExa; 838 839 pExa->exa_major = EXA_VERSION_MAJOR; 840 pExa->exa_minor = EXA_VERSION_MINOR; 841 842 pExa->memoryBase = p->fb; 843 pExa->memorySize = p->memsize; 844 pExa->offScreenBase = p->width * p->height * 4; 845 846 /* 847 * SX memory instructions are written to 64bit aligned addresses with 848 * a 3 bit displacement. Make sure the displacement remains constant 849 * within one column 850 */ 851 852 pExa->pixmapOffsetAlign = 8; 853 pExa->pixmapPitchAlign = 8; 854 855 pExa->flags = EXA_OFFSCREEN_PIXMAPS | 856 /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/ 857 EXA_MIXED_PIXMAPS; 858 859 /* 860 * these limits are bogus 861 * SX doesn't deal with coordinates at all, so there is no limit but 862 * we have to put something here 863 */ 864 pExa->maxX = 4096; 865 pExa->maxY = 4096; 866 867 pExa->WaitMarker = CG14WaitMarker; 868 869 pExa->PrepareSolid = CG14PrepareSolid; 870 pExa->Solid = CG14Solid; 871 pExa->DoneSolid = CG14DoneCopy; 872 pExa->PrepareCopy = CG14PrepareCopy; 873 pExa->Copy = CG14Copy; 874 pExa->DoneCopy = CG14DoneCopy; 875 if (p->use_xrender) { 876 pExa->CheckComposite = CG14CheckComposite; 877 pExa->PrepareComposite = CG14PrepareComposite; 878 pExa->Composite = CG14Composite; 879 pExa->DoneComposite = CG14DoneCopy; 880 } 881 882 /* EXA hits more optimized paths when it does not have to fallback 883 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 884 */ 885 pExa->UploadToScreen = CG14UploadToScreen; 886 pExa->DownloadFromScreen = CG14DownloadFromScreen; 887 888 /* do some hardware init */ 889 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 890 p->last_mask = 0xffffffff; 891 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 892 p->last_rop = 0xcc; 893 return exaDriverInit(pScreen, pExa); 894} 895