cg14_accel.c revision c2193d98
1/* $NetBSD: cg14_accel.c,v 1.15 2019/07/24 16:07:59 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45/*#define SX_DEBUG*/ 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "bits per pixel: %d\n", 102 pSrcPixmap->drawable.bitsPerPixel); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 write_sx_io(p, s, 188 SX_LD(10, num - 1, s & 7)); 189 write_sx_io(p, d, 190 SX_STM(10, num - 1, d & 7)); 191 s += xinc; 192 d += xinc; 193 count += 32; 194 } 195 srcstart += srcinc; 196 dststart += dstinc; 197 } 198 } else { 199 /* going right to left */ 200 int i, chunks = (w >> 5); 201 for (line = 0; line < h; line++) { 202 s = srcstart; 203 d = dststart; 204 count = w; 205 for (i = 0; i < chunks; i++) { 206 write_sx_io(p, s, 207 SX_LD(10, 31, s & 7)); 208 write_sx_io(p, d, 209 SX_STM(10, 31, d & 7)); 210 s -= 128; 211 d -= 128; 212 count -= 32; 213 } 214 /* leftovers, if any */ 215 if (count > 0) { 216 s += (32 - count) << 2; 217 d += (32 - count) << 2; 218 write_sx_io(p, s, 219 SX_LD(10, count - 1, s & 7)); 220 write_sx_io(p, d, 221 SX_STM(10, count - 1, d & 7)); 222 } 223 srcstart += srcinc; 224 dststart += dstinc; 225 } 226 } 227 } else { 228 /* ROPs needed */ 229 if ( xinc > 0) { 230 /* going left to right */ 231 for (line = 0; line < h; line++) { 232 count = 0; 233 s = srcstart; 234 d = dststart; 235 while ( count < w) { 236 num = min(32, w - count); 237 write_sx_io(p, s, 238 SX_LD(10, num - 1, s & 7)); 239 write_sx_io(p, d, 240 SX_LD(42, num - 1, d & 7)); 241 if (num > 16) { 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_ROP(10, 42, 74, 15)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_ROP(26, 58, 90, num - 17)); 246 } else { 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(10, 42, 74, num - 1)); 249 } 250 write_sx_io(p, d, 251 SX_STM(74, num - 1, d & 7)); 252 s += xinc; 253 d += xinc; 254 count += 32; 255 } 256 srcstart += srcinc; 257 dststart += dstinc; 258 } 259 } else { 260 /* going right to left */ 261 int i, chunks = (w >> 5); 262 for (line = 0; line < h; line++) { 263 s = srcstart; 264 d = dststart; 265 count = w; 266 for (i = 0; i < chunks; i++) { 267 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 268 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, 15)); 271 write_sx_reg(p, SX_INSTRUCTIONS, 272 SX_ROP(26, 58, 90, 15)); 273 write_sx_io(p, d, 274 SX_STM(74, 31, d & 7)); 275 s -= 128; 276 d -= 128; 277 count -= 32; 278 } 279 /* leftovers, if any */ 280 if (count > 0) { 281 s += (32 - count) << 2; 282 d += (32 - count) << 2; 283 write_sx_io(p, s, 284 SX_LD(10, count - 1, s & 7)); 285 write_sx_io(p, d, 286 SX_LD(42, count - 1, d & 7)); 287 if (count > 16) { 288 write_sx_reg(p, SX_INSTRUCTIONS, 289 SX_ROP(10, 42, 74, 15)); 290 write_sx_reg(p, SX_INSTRUCTIONS, 291 SX_ROP(26, 58, 90, count - 17)); 292 } else { 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ROP(10, 42, 74, count - 1)); 295 } 296 297 write_sx_io(p, d, 298 SX_STM(74, count - 1, d & 7)); 299 } 300 srcstart += srcinc; 301 dststart += dstinc; 302 } 303 } 304 } 305 exaMarkSync(pDstPixmap->drawable.pScreen); 306} 307 308static void 309CG14Copy8(PixmapPtr pDstPixmap, 310 int srcX, int srcY, int dstX, int dstY, int w, int h) 311{ 312 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 313 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 314 int dstpitch, dstoff, srcpitch, srcoff; 315 int srcstart, dststart, xinc, srcinc, dstinc; 316 int line, count, s, d, num; 317 318 ENTER; 319 dstpitch = exaGetPixmapPitch(pDstPixmap); 320 dstoff = exaGetPixmapOffset(pDstPixmap); 321 srcpitch = p->srcpitch; 322 srcoff = p->srcoff; 323 /* 324 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 325 * actually wrote anything and only sync if it did 326 */ 327 srcstart = srcX + (srcpitch * srcY) + srcoff; 328 dststart = dstX + (dstpitch * dstY) + dstoff; 329 330 /* 331 * we always copy up to 32 pixels at a time so direction doesn't 332 * matter if w<=32 333 */ 334 if (w > 32) { 335 if (p->xdir < 0) { 336 srcstart += (w - 32); 337 dststart += (w - 32); 338 xinc = -32; 339 } else 340 xinc = 32; 341 } else 342 xinc = 32; 343 if (p->ydir < 0) { 344 srcstart += (h - 1) * srcpitch; 345 dststart += (h - 1) * dstpitch; 346 srcinc = -srcpitch; 347 dstinc = -dstpitch; 348 } else { 349 srcinc = srcpitch; 350 dstinc = dstpitch; 351 } 352 if (p->last_rop == 0xcc) { 353 /* plain old copy */ 354 if ( xinc > 0) { 355 /* going left to right */ 356 for (line = 0; line < h; line++) { 357 count = 0; 358 s = srcstart; 359 d = dststart; 360 while ( count < w) { 361 num = min(32, w - count); 362 write_sx_io(p, s, 363 SX_LDB(10, num - 1, s & 7)); 364 write_sx_io(p, d, 365 SX_STBM(10, num - 1, d & 7)); 366 s += xinc; 367 d += xinc; 368 count += 32; 369 } 370 srcstart += srcinc; 371 dststart += dstinc; 372 } 373 } else { 374 /* going right to left */ 375 int i, chunks = (w >> 5); 376 for (line = 0; line < h; line++) { 377 s = srcstart; 378 d = dststart; 379 count = w; 380 for (i = 0; i < chunks; i++) { 381 write_sx_io(p, s, 382 SX_LDB(10, 31, s & 7)); 383 write_sx_io(p, d, 384 SX_STBM(10, 31, d & 7)); 385 s -= 32; 386 d -= 32; 387 count -= 32; 388 } 389 /* leftovers, if any */ 390 if (count > 0) { 391 s += (32 - count); 392 d += (32 - count); 393 write_sx_io(p, s, 394 SX_LDB(10, count - 1, s & 7)); 395 write_sx_io(p, d, 396 SX_STBM(10, count - 1, d & 7)); 397 } 398 srcstart += srcinc; 399 dststart += dstinc; 400 } 401 } 402 } else { 403 /* ROPs needed */ 404 if ( xinc > 0) { 405 /* going left to right */ 406 for (line = 0; line < h; line++) { 407 count = 0; 408 s = srcstart; 409 d = dststart; 410 while ( count < w) { 411 num = min(32, w - count); 412 write_sx_io(p, s, 413 SX_LDB(10, num - 1, s & 7)); 414 write_sx_io(p, d, 415 SX_LDB(42, num - 1, d & 7)); 416 if (num > 16) { 417 write_sx_reg(p, SX_INSTRUCTIONS, 418 SX_ROP(10, 42, 74, 15)); 419 write_sx_reg(p, SX_INSTRUCTIONS, 420 SX_ROP(26, 58, 90, num - 17)); 421 } else { 422 write_sx_reg(p, SX_INSTRUCTIONS, 423 SX_ROP(10, 42, 74, num - 1)); 424 } 425 write_sx_io(p, d, 426 SX_STBM(74, num - 1, d & 7)); 427 s += xinc; 428 d += xinc; 429 count += 32; 430 } 431 srcstart += srcinc; 432 dststart += dstinc; 433 } 434 } else { 435 /* going right to left */ 436 int i, chunks = (w >> 5); 437 for (line = 0; line < h; line++) { 438 s = srcstart; 439 d = dststart; 440 count = w; 441 for (i = 0; i < chunks; i++) { 442 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 443 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 444 write_sx_reg(p, SX_INSTRUCTIONS, 445 SX_ROP(10, 42, 74, 15)); 446 write_sx_reg(p, SX_INSTRUCTIONS, 447 SX_ROP(26, 58, 90, 15)); 448 write_sx_io(p, d, 449 SX_STBM(74, 31, d & 7)); 450 s -= 128; 451 d -= 128; 452 count -= 32; 453 } 454 /* leftovers, if any */ 455 if (count > 0) { 456 s += (32 - count); 457 d += (32 - count); 458 write_sx_io(p, s, 459 SX_LDB(10, count - 1, s & 7)); 460 write_sx_io(p, d, 461 SX_LDB(42, count - 1, d & 7)); 462 if (count > 16) { 463 write_sx_reg(p, SX_INSTRUCTIONS, 464 SX_ROP(10, 42, 74, 15)); 465 write_sx_reg(p, SX_INSTRUCTIONS, 466 SX_ROP(26, 58, 90, count - 17)); 467 } else { 468 write_sx_reg(p, SX_INSTRUCTIONS, 469 SX_ROP(10, 42, 74, count - 1)); 470 } 471 472 write_sx_io(p, d, 473 SX_STBM(74, count - 1, d & 7)); 474 } 475 srcstart += srcinc; 476 dststart += dstinc; 477 } 478 } 479 } 480 exaMarkSync(pDstPixmap->drawable.pScreen); 481} 482 483static void 484CG14DoneCopy(PixmapPtr pDstPixmap) 485{ 486} 487 488static Bool 489CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 490{ 491 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 492 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 493 494 ENTER; 495 DPRINTF(X_ERROR, "bits per pixel: %d\n", 496 pPixmap->drawable.bitsPerPixel); 497 write_sx_reg(p, SX_QUEUED(8), fg); 498 write_sx_reg(p, SX_QUEUED(9), fg); 499 if (planemask != p->last_mask) { 500 CG14Wait(p); 501 write_sx_reg(p, SX_PLANEMASK, planemask); 502 p->last_mask = planemask; 503 } 504 alu = sx_rop[alu]; 505 if (alu != p->last_rop) { 506 CG14Wait(p); 507 write_sx_reg(p, SX_ROP_CONTROL, alu); 508 p->last_rop = alu; 509 } 510 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 511 return TRUE; 512} 513 514static void 515CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 516{ 517 int line, x, num; 518 uint32_t ptr; 519 520 ENTER; 521 if (p->last_rop == 0xcc) { 522 /* simple fill */ 523 for (line = 0; line < h; line++) { 524 x = 0; 525 while (x < w) { 526 ptr = start + (x << 2); 527 num = min(32, w - x); 528 write_sx_io(p, ptr, 529 SX_STS(8, num - 1, ptr & 7)); 530 x += 32; 531 } 532 start += pitch; 533 } 534 } else if (p->last_rop == 0xaa) { 535 /* nothing to do here */ 536 return; 537 } else { 538 /* alright, let's do actual ROP stuff */ 539 540 /* first repeat the fill colour into 16 registers */ 541 write_sx_reg(p, SX_INSTRUCTIONS, 542 SX_SELECT_S(8, 8, 10, 15)); 543 544 for (line = 0; line < h; line++) { 545 x = 0; 546 while (x < w) { 547 ptr = start + (x << 2); 548 num = min(32, w - x); 549 /* now suck fb data into registers */ 550 write_sx_io(p, ptr, 551 SX_LD(42, num - 1, ptr & 7)); 552 /* 553 * ROP them with the fill data we left in 10 554 * non-memory ops can only have counts up to 16 555 */ 556 if (num <= 16) { 557 write_sx_reg(p, SX_INSTRUCTIONS, 558 SX_ROP(10, 42, 74, num - 1)); 559 } else { 560 write_sx_reg(p, SX_INSTRUCTIONS, 561 SX_ROP(10, 42, 74, 15)); 562 write_sx_reg(p, SX_INSTRUCTIONS, 563 SX_ROP(10, 58, 90, num - 17)); 564 } 565 /* and write the result back into memory */ 566 write_sx_io(p, ptr, 567 SX_ST(74, num - 1, ptr & 7)); 568 x += 32; 569 } 570 start += pitch; 571 } 572 } 573} 574 575static void 576CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 577{ 578 int line, x, num, off; 579 uint32_t ptr; 580 581 ENTER; 582 off = start & 7; 583 start &= ~7; 584 585 if (p->last_rop == 0xcc) { 586 /* simple fill */ 587 for (line = 0; line < h; line++) { 588 x = 0; 589 while (x < w) { 590 ptr = start + x; 591 num = min(32, w - x); 592 write_sx_io(p, ptr, 593 SX_STBS(8, num - 1, off)); 594 x += 32; 595 } 596 start += pitch; 597 } 598 } else if (p->last_rop == 0xaa) { 599 /* nothing to do here */ 600 return; 601 } else { 602 /* alright, let's do actual ROP stuff */ 603 604 /* first repeat the fill colour into 16 registers */ 605 write_sx_reg(p, SX_INSTRUCTIONS, 606 SX_SELECT_S(8, 8, 10, 15)); 607 608 for (line = 0; line < h; line++) { 609 x = 0; 610 while (x < w) { 611 ptr = start + x; 612 num = min(32, w - x); 613 /* now suck fb data into registers */ 614 write_sx_io(p, ptr, 615 SX_LDB(42, num - 1, off)); 616 /* 617 * ROP them with the fill data we left in 10 618 * non-memory ops can only have counts up to 16 619 */ 620 if (num <= 16) { 621 write_sx_reg(p, SX_INSTRUCTIONS, 622 SX_ROP(10, 42, 74, num - 1)); 623 } else { 624 write_sx_reg(p, SX_INSTRUCTIONS, 625 SX_ROP(10, 42, 74, 15)); 626 write_sx_reg(p, SX_INSTRUCTIONS, 627 SX_ROP(10, 58, 90, num - 17)); 628 } 629 /* and write the result back into memory */ 630 write_sx_io(p, ptr, 631 SX_STB(74, num - 1, off)); 632 x += 32; 633 } 634 start += pitch; 635 } 636 } 637} 638 639static void 640CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 641{ 642 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 643 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 644 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 645 int start, depth; 646 647 ENTER; 648 dstpitch = exaGetPixmapPitch(pPixmap); 649 dstoff = exaGetPixmapOffset(pPixmap); 650 651 depth = pPixmap->drawable.bitsPerPixel; 652 switch (depth) { 653 case 32: 654 start = dstoff + (y1 * dstpitch) + (x1 << 2); 655 CG14Solid32(p, start, dstpitch, w, h); 656 break; 657 case 8: 658 start = dstoff + (y1 * dstpitch) + x1; 659 CG14Solid8(p, start, dstpitch, w, h); 660 break; 661 } 662 663 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 664 dstpitch, dstoff, start); 665 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 666 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 667 exaMarkSync(pPixmap->drawable.pScreen); 668} 669 670/* 671 * Memcpy-based UTS. 672 */ 673static Bool 674CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 675 char *src, int src_pitch) 676{ 677 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 678 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 679 char *dst = p->fb + exaGetPixmapOffset(pDst); 680 int dst_pitch = exaGetPixmapPitch(pDst); 681 682 int bpp = pDst->drawable.bitsPerPixel; 683 int cpp = (bpp + 7) >> 3; 684 int wBytes = w * cpp; 685 686 ENTER; 687 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 688 dst += (x * cpp) + (y * dst_pitch); 689 690 CG14Wait(p); 691 692 while (h--) { 693 memcpy(dst, src, wBytes); 694 src += src_pitch; 695 dst += dst_pitch; 696 } 697 __asm("stbar;"); 698 return TRUE; 699} 700 701/* 702 * Memcpy-based DFS. 703 */ 704static Bool 705CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 706 char *dst, int dst_pitch) 707{ 708 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 709 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 710 char *src = p->fb + exaGetPixmapOffset(pSrc); 711 int src_pitch = exaGetPixmapPitch(pSrc); 712 713 ENTER; 714 int bpp = pSrc->drawable.bitsPerPixel; 715 int cpp = (bpp + 7) >> 3; 716 int wBytes = w * cpp; 717 718 src += (x * cpp) + (y * src_pitch); 719 720 CG14Wait(p); 721 722 while (h--) { 723 memcpy(dst, src, wBytes); 724 src += src_pitch; 725 dst += dst_pitch; 726 } 727 728 return TRUE; 729} 730 731Bool 732CG14CheckComposite(int op, PicturePtr pSrcPicture, 733 PicturePtr pMaskPicture, 734 PicturePtr pDstPicture) 735{ 736 int i, ok = FALSE; 737 738 ENTER; 739 740 /* 741 * SX is in theory capable of accelerating pretty much all Xrender ops, 742 * even coordinate transformation and gradients. Support will be added 743 * over time and likely have to spill over into its own source file. 744 */ 745 746 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 747 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 748 return FALSE; 749 } 750 751 if (pSrcPicture != NULL) { 752 i = 0; 753 while ((i < arraysize(src_formats)) && (!ok)) { 754 ok = (pSrcPicture->format == src_formats[i]); 755 i++; 756 } 757 758 if (!ok) { 759 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 760 __func__, pSrcPicture->format); 761 return FALSE; 762 } 763 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 764 } 765 766 if (pDstPicture != NULL) { 767 i = 0; 768 ok = FALSE; 769 while ((i < arraysize(src_formats)) && (!ok)) { 770 ok = (pDstPicture->format == src_formats[i]); 771 i++; 772 } 773 774 if (!ok) { 775 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 776 __func__, pDstPicture->format); 777 return FALSE; 778 } 779 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 780 } 781 782 if (pMaskPicture != NULL) { 783 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 784 pMaskPicture->pDrawable->width, 785 pMaskPicture->pDrawable->height); 786 } 787 return TRUE; 788} 789 790Bool 791CG14PrepareComposite(int op, PicturePtr pSrcPicture, 792 PicturePtr pMaskPicture, 793 PicturePtr pDstPicture, 794 PixmapPtr pSrc, 795 PixmapPtr pMask, 796 PixmapPtr pDst) 797{ 798 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 799 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 800 801 ENTER; 802 803 p->no_source_pixmap = FALSE; 804 p->source_is_solid = FALSE; 805 806 if (pSrcPicture->format == PICT_a1) { 807 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 808 pDstPicture->format, op); 809 if (pMaskPicture != NULL) { 810 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 811 } 812 } 813 if (pSrcPicture->pSourcePict != NULL) { 814 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 815 p->fillcolour = 816 pSrcPicture->pSourcePict->solidFill.color; 817 DPRINTF(X_ERROR, "%s: solid src %08x\n", 818 __func__, p->fillcolour); 819 p->no_source_pixmap = TRUE; 820 p->source_is_solid = TRUE; 821 } 822 } 823 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 824 if (pMaskPicture->pSourcePict->type == 825 SourcePictTypeSolidFill) { 826 p->fillcolour = 827 pMaskPicture->pSourcePict->solidFill.color; 828 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 829 __func__, p->fillcolour); 830 } 831 } 832 if (pMaskPicture != NULL) { 833 p->mskoff = exaGetPixmapOffset(pMask); 834 p->mskpitch = exaGetPixmapPitch(pMask); 835 p->mskformat = pMaskPicture->format; 836 } else { 837 p->mskoff = 0; 838 p->mskpitch = 0; 839 p->mskformat = 0; 840 } 841 if (pSrc != NULL) { 842 p->source_is_solid = 843 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 844 p->srcoff = exaGetPixmapOffset(pSrc); 845 p->srcpitch = exaGetPixmapPitch(pSrc); 846 if (p->source_is_solid) { 847 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 848 } 849 } 850 p->srcformat = pSrcPicture->format; 851 p->dstformat = pDstPicture->format; 852 853 if (p->source_is_solid) { 854 uint32_t temp; 855 856 /* stuff source colour into SX registers, swap as needed */ 857 temp = p->fillcolour; 858 switch (p->srcformat) { 859 case PICT_a8r8g8b8: 860 case PICT_x8r8g8b8: 861 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 862 temp = temp >> 8; 863 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 864 temp = temp >> 8; 865 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 866 break; 867 case PICT_a8b8g8r8: 868 case PICT_x8b8g8r8: 869 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 870 temp = temp >> 8; 871 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 872 temp = temp >> 8; 873 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 874 break; 875 } 876 write_sx_reg(p, SX_QUEUED(8), 0xff); 877 } 878 p->op = op; 879 if (op == PictOpSrc) { 880 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 881 } 882#ifdef SX_DEBUG 883 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 884 *(uint32_t *)(p->fb + p->srcoff)); 885#endif 886 return TRUE; 887} 888 889void 890CG14Composite(PixmapPtr pDst, int srcX, int srcY, 891 int maskX, int maskY, 892 int dstX, int dstY, 893 int width, int height) 894{ 895 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 896 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 897 uint32_t dstoff, dstpitch; 898 uint32_t dst, msk, src; 899 int flip = 0; 900 901 ENTER; 902 dstoff = exaGetPixmapOffset(pDst); 903 dstpitch = exaGetPixmapPitch(pDst); 904 905 flip = (PICT_FORMAT_TYPE(p->srcformat) != 906 PICT_FORMAT_TYPE(p->dstformat)); 907 908 switch (p->op) { 909 case PictOpOver: 910 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 911 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 912 p->mskformat, p->dstformat, srcX, srcY); 913 if (p->source_is_solid) { 914 switch (p->mskformat) { 915 case PICT_a8: 916 msk = p->mskoff + 917 (maskY * p->mskpitch) + 918 maskX; 919 CG14Comp_Over8Solid(p, 920 msk, p->mskpitch, 921 dst, dstpitch, 922 width, height); 923 break; 924 case PICT_a8r8g8b8: 925 case PICT_a8b8g8r8: 926 msk = p->mskoff + 927 (maskY * p->mskpitch) + 928 (maskX << 2); 929 CG14Comp_Over32Solid(p, 930 msk, p->mskpitch, 931 dst, dstpitch, 932 width, height); 933 break; 934 default: 935 xf86Msg(X_ERROR, 936 "unsupported mask format %08x\n", p->mskformat); 937 } 938 } else { 939 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 940 p->mskformat); 941 switch (p->srcformat) { 942 case PICT_a8r8g8b8: 943 case PICT_a8b8g8r8: 944 src = p->srcoff + 945 (srcY * p->srcpitch) + 946 (srcX << 2); 947 dst = dstoff + 948 (dstY * dstpitch) + 949 (dstX << 2); 950 if (p->mskformat == PICT_a8) { 951 msk = p->mskoff + 952 (maskY * p->mskpitch) + 953 maskX; 954 CG14Comp_Over32Mask(p, 955 src, p->srcpitch, 956 msk, p->mskpitch, 957 dst, dstpitch, 958 width, height, flip); 959 } else { 960 CG14Comp_Over32(p, 961 src, p->srcpitch, 962 dst, dstpitch, 963 width, height, flip); 964 } 965 break; 966 case PICT_x8r8g8b8: 967 case PICT_x8b8g8r8: 968 src = p->srcoff + 969 (srcY * p->srcpitch) + 970 (srcX << 2); 971 dst = dstoff + 972 (dstY * dstpitch) + 973 (dstX << 2); 974 if (p->mskformat == PICT_a8) { 975 msk = p->mskoff + 976 (maskY * p->mskpitch) + 977 maskX; 978 CG14Comp_Over32Mask_noalpha(p, 979 src, p->srcpitch, 980 msk, p->mskpitch, 981 dst, dstpitch, 982 width, height, flip); 983 } else if ((p->mskformat == PICT_a8r8g8b8) || 984 (p->mskformat == PICT_a8b8g8r8)) { 985 msk = p->mskoff + 986 (maskY * p->mskpitch) + 987 (maskX << 2); 988 CG14Comp_Over32Mask32_noalpha(p, 989 src, p->srcpitch, 990 msk, p->mskpitch, 991 dst, dstpitch, 992 width, height, flip); 993 } else { 994 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 995 } 996 break; 997 default: 998 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 999 __func__, p->srcformat); 1000 } 1001 } 1002 break; 1003 case PictOpAdd: 1004 DPRINTF(X_ERROR, "Add %08x %08x\n", 1005 p->srcformat, p->dstformat); 1006 switch (p->srcformat) { 1007 case PICT_a8: 1008 src = p->srcoff + 1009 (srcY * p->srcpitch) + srcX; 1010 if (p->dstformat == PICT_a8) { 1011 dst = dstoff + 1012 (dstY * dstpitch) + dstX; 1013 CG14Comp_Add8(p, 1014 src, p->srcpitch, 1015 dst, dstpitch, 1016 width, height); 1017 } else { 1018 dst = dstoff + 1019 (dstY * dstpitch) + 1020 (dstX << 2); 1021 CG14Comp_Add8_32(p, 1022 src, p->srcpitch, 1023 dst, dstpitch, 1024 width, height); 1025 } 1026 break; 1027 case PICT_a8r8g8b8: 1028 case PICT_x8r8g8b8: 1029 src = p->srcoff + 1030 (srcY * p->srcpitch) + (srcX << 2); 1031 dst = dstoff + (dstY * dstpitch) + 1032 (dstX << 2); 1033 CG14Comp_Add32(p, src, p->srcpitch, 1034 dst, dstpitch, width, height); 1035 break; 1036 default: 1037 xf86Msg(X_ERROR, 1038 "unsupported src format\n"); 1039 } 1040 break; 1041 case PictOpSrc: 1042 DPRINTF(X_ERROR, "Src %08x %08x\n", 1043 p->srcformat, p->dstformat); 1044 if (p->mskformat != 0) 1045 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1046 if (p->srcformat == PICT_a8) { 1047 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1048 } else { 1049 /* convert between RGB and BGR? */ 1050 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1051 } 1052 break; 1053 default: 1054 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1055 } 1056 exaMarkSync(pDst->drawable.pScreen); 1057} 1058 1059 1060 1061Bool 1062CG14InitAccel(ScreenPtr pScreen) 1063{ 1064 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1065 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1066 ExaDriverPtr pExa; 1067 1068 pExa = exaDriverAlloc(); 1069 if (!pExa) 1070 return FALSE; 1071 1072 p->pExa = pExa; 1073 1074 pExa->exa_major = EXA_VERSION_MAJOR; 1075 pExa->exa_minor = EXA_VERSION_MINOR; 1076 1077 pExa->memoryBase = p->fb; 1078 pExa->memorySize = p->memsize; 1079 pExa->offScreenBase = p->width * p->height * 4; 1080 1081 /* 1082 * SX memory instructions are written to 64bit aligned addresses with 1083 * a 3 bit displacement. Make sure the displacement remains constant 1084 * within one column 1085 */ 1086 1087 pExa->pixmapOffsetAlign = 8; 1088 pExa->pixmapPitchAlign = 8; 1089 1090 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1091 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1092 /*| EXA_MIXED_PIXMAPS*/; 1093 1094 /* 1095 * these limits are bogus 1096 * SX doesn't deal with coordinates at all, so there is no limit but 1097 * we have to put something here 1098 */ 1099 pExa->maxX = 4096; 1100 pExa->maxY = 4096; 1101 1102 pExa->WaitMarker = CG14WaitMarker; 1103 1104 pExa->PrepareSolid = CG14PrepareSolid; 1105 pExa->Solid = CG14Solid; 1106 pExa->DoneSolid = CG14DoneCopy; 1107 pExa->PrepareCopy = CG14PrepareCopy; 1108 pExa->Copy = CG14Copy32; 1109 pExa->DoneCopy = CG14DoneCopy; 1110 if (p->use_xrender) { 1111 pExa->CheckComposite = CG14CheckComposite; 1112 pExa->PrepareComposite = CG14PrepareComposite; 1113 pExa->Composite = CG14Composite; 1114 pExa->DoneComposite = CG14DoneCopy; 1115 } 1116 1117 /* EXA hits more optimized paths when it does not have to fallback 1118 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1119 */ 1120 pExa->UploadToScreen = CG14UploadToScreen; 1121 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1122 1123 p->queuecount = 0; 1124 /* do some hardware init */ 1125 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1126 p->last_mask = 0xffffffff; 1127 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1128 p->last_rop = 0xcc; 1129 return exaDriverInit(pScreen, pExa); 1130} 1131