cg14_accel.c revision dbf8597c
1/* $NetBSD: cg14_accel.c,v 1.18 2021/12/03 16:54:26 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45//#define SX_DEBUG 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "bits per pixel: %d\n", 102 pSrcPixmap->drawable.bitsPerPixel); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 write_sx_io(p, s, 188 SX_LD(10, num - 1, s & 7)); 189 write_sx_io(p, d, 190 SX_STM(10, num - 1, d & 7)); 191 s += xinc; 192 d += xinc; 193 count += 32; 194 } 195 srcstart += srcinc; 196 dststart += dstinc; 197 } 198 } else { 199 /* going right to left */ 200 int i, chunks = (w >> 5); 201 for (line = 0; line < h; line++) { 202 s = srcstart; 203 d = dststart; 204 count = w; 205 for (i = 0; i < chunks; i++) { 206 write_sx_io(p, s, 207 SX_LD(10, 31, s & 7)); 208 write_sx_io(p, d, 209 SX_STM(10, 31, d & 7)); 210 s -= 128; 211 d -= 128; 212 count -= 32; 213 } 214 /* leftovers, if any */ 215 if (count > 0) { 216 s += (32 - count) << 2; 217 d += (32 - count) << 2; 218 write_sx_io(p, s, 219 SX_LD(10, count - 1, s & 7)); 220 write_sx_io(p, d, 221 SX_STM(10, count - 1, d & 7)); 222 } 223 srcstart += srcinc; 224 dststart += dstinc; 225 } 226 } 227 } else { 228 /* ROPs needed */ 229 if ( xinc > 0) { 230 /* going left to right */ 231 for (line = 0; line < h; line++) { 232 count = 0; 233 s = srcstart; 234 d = dststart; 235 while ( count < w) { 236 num = min(32, w - count); 237 write_sx_io(p, s, 238 SX_LD(10, num - 1, s & 7)); 239 write_sx_io(p, d, 240 SX_LD(42, num - 1, d & 7)); 241 if (num > 16) { 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_ROP(10, 42, 74, 15)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_ROP(26, 58, 90, num - 17)); 246 } else { 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(10, 42, 74, num - 1)); 249 } 250 write_sx_io(p, d, 251 SX_STM(74, num - 1, d & 7)); 252 s += xinc; 253 d += xinc; 254 count += 32; 255 } 256 srcstart += srcinc; 257 dststart += dstinc; 258 } 259 } else { 260 /* going right to left */ 261 int i, chunks = (w >> 5); 262 for (line = 0; line < h; line++) { 263 s = srcstart; 264 d = dststart; 265 count = w; 266 for (i = 0; i < chunks; i++) { 267 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 268 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, 15)); 271 write_sx_reg(p, SX_INSTRUCTIONS, 272 SX_ROP(26, 58, 90, 15)); 273 write_sx_io(p, d, 274 SX_STM(74, 31, d & 7)); 275 s -= 128; 276 d -= 128; 277 count -= 32; 278 } 279 /* leftovers, if any */ 280 if (count > 0) { 281 s += (32 - count) << 2; 282 d += (32 - count) << 2; 283 write_sx_io(p, s, 284 SX_LD(10, count - 1, s & 7)); 285 write_sx_io(p, d, 286 SX_LD(42, count - 1, d & 7)); 287 if (count > 16) { 288 write_sx_reg(p, SX_INSTRUCTIONS, 289 SX_ROP(10, 42, 74, 15)); 290 write_sx_reg(p, SX_INSTRUCTIONS, 291 SX_ROP(26, 58, 90, count - 17)); 292 } else { 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ROP(10, 42, 74, count - 1)); 295 } 296 297 write_sx_io(p, d, 298 SX_STM(74, count - 1, d & 7)); 299 } 300 srcstart += srcinc; 301 dststart += dstinc; 302 } 303 } 304 } 305 exaMarkSync(pDstPixmap->drawable.pScreen); 306} 307 308static void 309CG14Copy8(PixmapPtr pDstPixmap, 310 int srcX, int srcY, int dstX, int dstY, int w, int h) 311{ 312 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 313 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 314 int dstpitch, dstoff, srcpitch, srcoff; 315 int srcstart, dststart, xinc, srcinc, dstinc; 316 int line, count, s, d, num; 317 318 ENTER; 319 dstpitch = exaGetPixmapPitch(pDstPixmap); 320 dstoff = exaGetPixmapOffset(pDstPixmap); 321 srcpitch = p->srcpitch; 322 srcoff = p->srcoff; 323 /* 324 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 325 * actually wrote anything and only sync if it did 326 */ 327 srcstart = srcX + (srcpitch * srcY) + srcoff; 328 dststart = dstX + (dstpitch * dstY) + dstoff; 329 330 /* 331 * we always copy up to 32 pixels at a time so direction doesn't 332 * matter if w<=32 333 */ 334 if (w > 32) { 335 if (p->xdir < 0) { 336 srcstart += (w - 32); 337 dststart += (w - 32); 338 xinc = -32; 339 } else 340 xinc = 32; 341 } else 342 xinc = 32; 343 if (p->ydir < 0) { 344 srcstart += (h - 1) * srcpitch; 345 dststart += (h - 1) * dstpitch; 346 srcinc = -srcpitch; 347 dstinc = -dstpitch; 348 } else { 349 srcinc = srcpitch; 350 dstinc = dstpitch; 351 } 352 if (p->last_rop == 0xcc) { 353 /* plain old copy */ 354 if ( xinc > 0) { 355 /* going left to right */ 356 for (line = 0; line < h; line++) { 357 count = 0; 358 s = srcstart; 359 d = dststart; 360 while ( count < w) { 361 num = min(32, w - count); 362 write_sx_io(p, s, 363 SX_LDB(10, num - 1, s & 7)); 364 write_sx_io(p, d, 365 SX_STBM(10, num - 1, d & 7)); 366 s += xinc; 367 d += xinc; 368 count += 32; 369 } 370 srcstart += srcinc; 371 dststart += dstinc; 372 } 373 } else { 374 /* going right to left */ 375 int i, chunks = (w >> 5); 376 for (line = 0; line < h; line++) { 377 s = srcstart; 378 d = dststart; 379 count = w; 380 for (i = 0; i < chunks; i++) { 381 write_sx_io(p, s, 382 SX_LDB(10, 31, s & 7)); 383 write_sx_io(p, d, 384 SX_STBM(10, 31, d & 7)); 385 s -= 32; 386 d -= 32; 387 count -= 32; 388 } 389 /* leftovers, if any */ 390 if (count > 0) { 391 s += (32 - count); 392 d += (32 - count); 393 write_sx_io(p, s, 394 SX_LDB(10, count - 1, s & 7)); 395 write_sx_io(p, d, 396 SX_STBM(10, count - 1, d & 7)); 397 } 398 srcstart += srcinc; 399 dststart += dstinc; 400 } 401 } 402 } else { 403 /* ROPs needed */ 404 if ( xinc > 0) { 405 /* going left to right */ 406 for (line = 0; line < h; line++) { 407 count = 0; 408 s = srcstart; 409 d = dststart; 410 while ( count < w) { 411 num = min(32, w - count); 412 write_sx_io(p, s, 413 SX_LDB(10, num - 1, s & 7)); 414 write_sx_io(p, d, 415 SX_LDB(42, num - 1, d & 7)); 416 if (num > 16) { 417 write_sx_reg(p, SX_INSTRUCTIONS, 418 SX_ROP(10, 42, 74, 15)); 419 write_sx_reg(p, SX_INSTRUCTIONS, 420 SX_ROP(26, 58, 90, num - 17)); 421 } else { 422 write_sx_reg(p, SX_INSTRUCTIONS, 423 SX_ROP(10, 42, 74, num - 1)); 424 } 425 write_sx_io(p, d, 426 SX_STBM(74, num - 1, d & 7)); 427 s += xinc; 428 d += xinc; 429 count += 32; 430 } 431 srcstart += srcinc; 432 dststart += dstinc; 433 } 434 } else { 435 /* going right to left */ 436 int i, chunks = (w >> 5); 437 for (line = 0; line < h; line++) { 438 s = srcstart; 439 d = dststart; 440 count = w; 441 for (i = 0; i < chunks; i++) { 442 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 443 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 444 write_sx_reg(p, SX_INSTRUCTIONS, 445 SX_ROP(10, 42, 74, 15)); 446 write_sx_reg(p, SX_INSTRUCTIONS, 447 SX_ROP(26, 58, 90, 15)); 448 write_sx_io(p, d, 449 SX_STBM(74, 31, d & 7)); 450 s -= 128; 451 d -= 128; 452 count -= 32; 453 } 454 /* leftovers, if any */ 455 if (count > 0) { 456 s += (32 - count); 457 d += (32 - count); 458 write_sx_io(p, s, 459 SX_LDB(10, count - 1, s & 7)); 460 write_sx_io(p, d, 461 SX_LDB(42, count - 1, d & 7)); 462 if (count > 16) { 463 write_sx_reg(p, SX_INSTRUCTIONS, 464 SX_ROP(10, 42, 74, 15)); 465 write_sx_reg(p, SX_INSTRUCTIONS, 466 SX_ROP(26, 58, 90, count - 17)); 467 } else { 468 write_sx_reg(p, SX_INSTRUCTIONS, 469 SX_ROP(10, 42, 74, count - 1)); 470 } 471 472 write_sx_io(p, d, 473 SX_STBM(74, count - 1, d & 7)); 474 } 475 srcstart += srcinc; 476 dststart += dstinc; 477 } 478 } 479 } 480 exaMarkSync(pDstPixmap->drawable.pScreen); 481} 482 483static void 484CG14DoneCopy(PixmapPtr pDstPixmap) 485{ 486} 487 488static Bool 489CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 490{ 491 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 492 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 493 494 ENTER; 495 DPRINTF(X_ERROR, "bits per pixel: %d %08x\n", 496 pPixmap->drawable.bitsPerPixel, fg); 497 498 /* 499 * GXset and GXclear are really just specual cases of GXcopy with 500 * fixed fill colour 501 */ 502 switch (alu) { 503 case GXclear: 504 alu = GXcopy; 505 fg = 0; 506 break; 507 case GXset: 508 alu = GXcopy; 509 fg = 0xffffffff; 510 break; 511 } 512 /* repeat the colour in every sub byte if we're in 8 bit */ 513 if (pPixmap->drawable.bitsPerPixel == 8) { 514 fg |= fg << 8; 515 fg |= fg << 16; 516 } 517 write_sx_reg(p, SX_QUEUED(8), fg); 518 write_sx_reg(p, SX_QUEUED(9), fg); 519 if (planemask != p->last_mask) { 520 CG14Wait(p); 521 write_sx_reg(p, SX_PLANEMASK, planemask); 522 p->last_mask = planemask; 523 } 524 alu = sx_rop[alu]; 525 if (alu != p->last_rop) { 526 CG14Wait(p); 527 write_sx_reg(p, SX_ROP_CONTROL, alu); 528 p->last_rop = alu; 529 } 530 531 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 532 return TRUE; 533} 534 535static void 536CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 537{ 538 int line, x, num; 539 uint32_t ptr; 540 541 ENTER; 542 if (p->last_rop == 0xcc) { 543 /* simple fill */ 544 for (line = 0; line < h; line++) { 545 x = 0; 546 while (x < w) { 547 ptr = start + (x << 2); 548 num = min(32, w - x); 549 write_sx_io(p, ptr, 550 SX_STS(8, num - 1, ptr & 7)); 551 x += 32; 552 } 553 start += pitch; 554 } 555 } else if (p->last_rop == 0xaa) { 556 /* nothing to do here */ 557 return; 558 } else { 559 /* alright, let's do actual ROP stuff */ 560 561 /* first repeat the fill colour into 16 registers */ 562 write_sx_reg(p, SX_INSTRUCTIONS, 563 SX_SELECT_S(8, 8, 10, 15)); 564 565 for (line = 0; line < h; line++) { 566 x = 0; 567 while (x < w) { 568 ptr = start + (x << 2); 569 num = min(32, w - x); 570 /* now suck fb data into registers */ 571 write_sx_io(p, ptr, 572 SX_LD(42, num - 1, ptr & 7)); 573 /* 574 * ROP them with the fill data we left in 10 575 * non-memory ops can only have counts up to 16 576 */ 577 if (num <= 16) { 578 write_sx_reg(p, SX_INSTRUCTIONS, 579 SX_ROP(10, 42, 74, num - 1)); 580 } else { 581 write_sx_reg(p, SX_INSTRUCTIONS, 582 SX_ROP(10, 42, 74, 15)); 583 write_sx_reg(p, SX_INSTRUCTIONS, 584 SX_ROP(10, 58, 90, num - 17)); 585 } 586 /* and write the result back into memory */ 587 write_sx_io(p, ptr, 588 SX_ST(74, num - 1, ptr & 7)); 589 x += 32; 590 } 591 start += pitch; 592 } 593 } 594} 595 596static void 597CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 598{ 599 int line, num, pre, cnt; 600 uint32_t ptr; 601 602 ENTER; 603 pre = start & 3; 604 if (pre != 0) pre = 4 - pre; 605 606 if (p->last_rop == 0xcc) { 607 /* simple fill */ 608 for (line = 0; line < h; line++) { 609 ptr = start; 610 cnt = w; 611 pre = min(pre, cnt); 612 if (pre) { 613 write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); 614 ptr += pre; 615 cnt -= pre; 616 if (cnt == 0) goto next; 617 } 618 /* now do the aligned pixels in 32bit chunks */ 619 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 620 while(cnt > 3) { 621 num = min(32, cnt >> 2); 622 write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); 623 ptr += num << 2; 624 cnt -= num << 2; 625 } 626 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 627 if (cnt > 0) { 628 write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); 629 } 630 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 631next: 632 start += pitch; 633 } 634 } else if (p->last_rop == 0xaa) { 635 /* nothing to do here */ 636 return; 637 } else { 638 /* alright, let's do actual ROP stuff */ 639 640 /* first repeat the fill colour into 16 registers */ 641 write_sx_reg(p, SX_INSTRUCTIONS, 642 SX_SELECT_S(8, 8, 10, 15)); 643 644 for (line = 0; line < h; line++) { 645 ptr = start; 646 cnt = w; 647 pre = min(pre, cnt); 648 if (pre) { 649 write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7)); 650 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1)); 651 write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7)); 652 ptr += pre; 653 cnt -= pre; 654 if (cnt == 0) goto next2; 655 } 656 /* now do the aligned pixels in 32bit chunks */ 657 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 658 while(cnt > 3) { 659 num = min(32, cnt >> 2); 660 write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7)); 661 if (num <= 16) { 662 write_sx_reg(p, SX_INSTRUCTIONS, 663 SX_ROP(10, 26, 58, num - 1)); 664 } else { 665 write_sx_reg(p, SX_INSTRUCTIONS, 666 SX_ROP(10, 26, 58, 15)); 667 write_sx_reg(p, SX_INSTRUCTIONS, 668 SX_ROP(10, 42, 74, num - 17)); 669 } 670 write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7)); 671 ptr += num << 2; 672 cnt -= num << 2; 673 } 674 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 675 if (cnt > 0) { 676 write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7)); 677 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1)); 678 write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7)); 679 } 680 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 681next2: 682 start += pitch; 683 } 684 } 685} 686 687static void 688CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 689{ 690 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 691 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 692 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 693 int start, depth; 694 695 ENTER; 696 dstpitch = exaGetPixmapPitch(pPixmap); 697 dstoff = exaGetPixmapOffset(pPixmap); 698 699 depth = pPixmap->drawable.bitsPerPixel; 700 switch (depth) { 701 case 32: 702 start = dstoff + (y1 * dstpitch) + (x1 << 2); 703 CG14Solid32(p, start, dstpitch, w, h); 704 break; 705 case 8: 706 start = dstoff + (y1 * dstpitch) + x1; 707 CG14Solid8(p, start, dstpitch, w, h); 708 break; 709 } 710 711 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 712 dstpitch, dstoff, start); 713 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 714 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 715 exaMarkSync(pPixmap->drawable.pScreen); 716} 717 718/* 719 * Memcpy-based UTS. 720 */ 721static Bool 722CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 723 char *src, int src_pitch) 724{ 725 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 726 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 727 char *dst = p->fb + exaGetPixmapOffset(pDst); 728 int dst_pitch = exaGetPixmapPitch(pDst); 729 730 int bpp = pDst->drawable.bitsPerPixel; 731 int cpp = (bpp + 7) >> 3; 732 int wBytes = w * cpp; 733 734 ENTER; 735 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 736 dst += (x * cpp) + (y * dst_pitch); 737 738 CG14Wait(p); 739 740 while (h--) { 741 memcpy(dst, src, wBytes); 742 src += src_pitch; 743 dst += dst_pitch; 744 } 745 __asm("stbar;"); 746 return TRUE; 747} 748 749/* 750 * Memcpy-based DFS. 751 */ 752static Bool 753CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 754 char *dst, int dst_pitch) 755{ 756 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 757 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 758 char *src = p->fb + exaGetPixmapOffset(pSrc); 759 int src_pitch = exaGetPixmapPitch(pSrc); 760 761 ENTER; 762 int bpp = pSrc->drawable.bitsPerPixel; 763 int cpp = (bpp + 7) >> 3; 764 int wBytes = w * cpp; 765 766 src += (x * cpp) + (y * src_pitch); 767 768 CG14Wait(p); 769 770 while (h--) { 771 memcpy(dst, src, wBytes); 772 src += src_pitch; 773 dst += dst_pitch; 774 } 775 776 return TRUE; 777} 778 779Bool 780CG14CheckComposite(int op, PicturePtr pSrcPicture, 781 PicturePtr pMaskPicture, 782 PicturePtr pDstPicture) 783{ 784 int i, ok = FALSE; 785 786 ENTER; 787 788 /* 789 * SX is in theory capable of accelerating pretty much all Xrender ops, 790 * even coordinate transformation and gradients. Support will be added 791 * over time and likely have to spill over into its own source file. 792 */ 793 794 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 795 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 796 return FALSE; 797 } 798 799 if (pSrcPicture != NULL) { 800 i = 0; 801 while ((i < arraysize(src_formats)) && (!ok)) { 802 ok = (pSrcPicture->format == src_formats[i]); 803 i++; 804 } 805 806 if (!ok) { 807 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 808 __func__, pSrcPicture->format); 809 return FALSE; 810 } 811 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 812 } 813 814 if (pDstPicture != NULL) { 815 i = 0; 816 ok = FALSE; 817 while ((i < arraysize(src_formats)) && (!ok)) { 818 ok = (pDstPicture->format == src_formats[i]); 819 i++; 820 } 821 822 if (!ok) { 823 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 824 __func__, pDstPicture->format); 825 return FALSE; 826 } 827 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 828 } 829 830 if (pMaskPicture != NULL) { 831 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 832 pMaskPicture->pDrawable->width, 833 pMaskPicture->pDrawable->height); 834 } 835 return TRUE; 836} 837 838Bool 839CG14PrepareComposite(int op, PicturePtr pSrcPicture, 840 PicturePtr pMaskPicture, 841 PicturePtr pDstPicture, 842 PixmapPtr pSrc, 843 PixmapPtr pMask, 844 PixmapPtr pDst) 845{ 846 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 847 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 848 849 ENTER; 850 851 p->no_source_pixmap = FALSE; 852 p->source_is_solid = FALSE; 853 854 if (pSrcPicture->format == PICT_a1) { 855 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 856 pDstPicture->format, op); 857 if (pMaskPicture != NULL) { 858 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 859 } 860 } 861 if (pSrcPicture->pSourcePict != NULL) { 862 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 863 p->fillcolour = 864 pSrcPicture->pSourcePict->solidFill.color; 865 DPRINTF(X_ERROR, "%s: solid src %08x\n", 866 __func__, p->fillcolour); 867 p->no_source_pixmap = TRUE; 868 p->source_is_solid = TRUE; 869 } 870 } 871 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 872 if (pMaskPicture->pSourcePict->type == 873 SourcePictTypeSolidFill) { 874 p->fillcolour = 875 pMaskPicture->pSourcePict->solidFill.color; 876 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 877 __func__, p->fillcolour); 878 } 879 } 880 if (pMaskPicture != NULL) { 881 p->mskoff = exaGetPixmapOffset(pMask); 882 p->mskpitch = exaGetPixmapPitch(pMask); 883 p->mskformat = pMaskPicture->format; 884 } else { 885 p->mskoff = 0; 886 p->mskpitch = 0; 887 p->mskformat = 0; 888 } 889 if (pSrc != NULL) { 890 p->source_is_solid = 891 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 892 p->srcoff = exaGetPixmapOffset(pSrc); 893 p->srcpitch = exaGetPixmapPitch(pSrc); 894 if (p->source_is_solid) { 895 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 896 } 897 } 898 p->srcformat = pSrcPicture->format; 899 p->dstformat = pDstPicture->format; 900 901 if (p->source_is_solid) { 902 uint32_t temp; 903 904 /* stuff source colour into SX registers, swap as needed */ 905 temp = p->fillcolour; 906 switch (p->srcformat) { 907 case PICT_a8r8g8b8: 908 case PICT_x8r8g8b8: 909 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 910 temp = temp >> 8; 911 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 912 temp = temp >> 8; 913 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 914 break; 915 case PICT_a8b8g8r8: 916 case PICT_x8b8g8r8: 917 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 918 temp = temp >> 8; 919 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 920 temp = temp >> 8; 921 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 922 break; 923 } 924 write_sx_reg(p, SX_QUEUED(8), 0xff); 925 } 926 p->op = op; 927 if (op == PictOpSrc) { 928 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 929 } 930#ifdef SX_DEBUG 931 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 932 *(uint32_t *)(p->fb + p->srcoff)); 933#endif 934 return TRUE; 935} 936 937void 938CG14Composite(PixmapPtr pDst, int srcX, int srcY, 939 int maskX, int maskY, 940 int dstX, int dstY, 941 int width, int height) 942{ 943 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 944 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 945 uint32_t dstoff, dstpitch; 946 uint32_t dst, msk, src; 947 int flip = 0; 948 949 ENTER; 950 dstoff = exaGetPixmapOffset(pDst); 951 dstpitch = exaGetPixmapPitch(pDst); 952 953 flip = (PICT_FORMAT_TYPE(p->srcformat) != 954 PICT_FORMAT_TYPE(p->dstformat)); 955 956 switch (p->op) { 957 case PictOpOver: 958 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 959 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 960 p->mskformat, p->dstformat, srcX, srcY); 961 if (p->source_is_solid) { 962 switch (p->mskformat) { 963 case PICT_a8: 964 msk = p->mskoff + 965 (maskY * p->mskpitch) + 966 maskX; 967 CG14Comp_Over8Solid(p, 968 msk, p->mskpitch, 969 dst, dstpitch, 970 width, height); 971 break; 972 case PICT_a8r8g8b8: 973 case PICT_a8b8g8r8: 974 msk = p->mskoff + 975 (maskY * p->mskpitch) + 976 (maskX << 2); 977 CG14Comp_Over32Solid(p, 978 msk, p->mskpitch, 979 dst, dstpitch, 980 width, height); 981 break; 982 default: 983 xf86Msg(X_ERROR, 984 "unsupported mask format %08x\n", p->mskformat); 985 } 986 } else { 987 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 988 p->mskformat); 989 switch (p->srcformat) { 990 case PICT_a8r8g8b8: 991 case PICT_a8b8g8r8: 992 src = p->srcoff + 993 (srcY * p->srcpitch) + 994 (srcX << 2); 995 dst = dstoff + 996 (dstY * dstpitch) + 997 (dstX << 2); 998 if (p->mskformat == PICT_a8) { 999 msk = p->mskoff + 1000 (maskY * p->mskpitch) + 1001 maskX; 1002 CG14Comp_Over32Mask(p, 1003 src, p->srcpitch, 1004 msk, p->mskpitch, 1005 dst, dstpitch, 1006 width, height, flip); 1007 } else { 1008 CG14Comp_Over32(p, 1009 src, p->srcpitch, 1010 dst, dstpitch, 1011 width, height, flip); 1012 } 1013 break; 1014 case PICT_x8r8g8b8: 1015 case PICT_x8b8g8r8: 1016 src = p->srcoff + 1017 (srcY * p->srcpitch) + 1018 (srcX << 2); 1019 dst = dstoff + 1020 (dstY * dstpitch) + 1021 (dstX << 2); 1022 if (p->mskformat == PICT_a8) { 1023 msk = p->mskoff + 1024 (maskY * p->mskpitch) + 1025 maskX; 1026 CG14Comp_Over32Mask_noalpha(p, 1027 src, p->srcpitch, 1028 msk, p->mskpitch, 1029 dst, dstpitch, 1030 width, height, flip); 1031 } else if ((p->mskformat == PICT_a8r8g8b8) || 1032 (p->mskformat == PICT_a8b8g8r8)) { 1033 msk = p->mskoff + 1034 (maskY * p->mskpitch) + 1035 (maskX << 2); 1036 CG14Comp_Over32Mask32_noalpha(p, 1037 src, p->srcpitch, 1038 msk, p->mskpitch, 1039 dst, dstpitch, 1040 width, height, flip); 1041 } else { 1042 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1043 } 1044 break; 1045 default: 1046 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1047 __func__, p->srcformat); 1048 } 1049 } 1050 break; 1051 case PictOpAdd: 1052 DPRINTF(X_ERROR, "Add %08x %08x\n", 1053 p->srcformat, p->dstformat); 1054 switch (p->srcformat) { 1055 case PICT_a8: 1056 src = p->srcoff + 1057 (srcY * p->srcpitch) + srcX; 1058 if (p->dstformat == PICT_a8) { 1059 dst = dstoff + 1060 (dstY * dstpitch) + dstX; 1061 CG14Comp_Add8(p, 1062 src, p->srcpitch, 1063 dst, dstpitch, 1064 width, height); 1065 } else { 1066 dst = dstoff + 1067 (dstY * dstpitch) + 1068 (dstX << 2); 1069 CG14Comp_Add8_32(p, 1070 src, p->srcpitch, 1071 dst, dstpitch, 1072 width, height); 1073 } 1074 break; 1075 case PICT_a8r8g8b8: 1076 case PICT_x8r8g8b8: 1077 src = p->srcoff + 1078 (srcY * p->srcpitch) + (srcX << 2); 1079 dst = dstoff + (dstY * dstpitch) + 1080 (dstX << 2); 1081 CG14Comp_Add32(p, src, p->srcpitch, 1082 dst, dstpitch, width, height); 1083 break; 1084 default: 1085 xf86Msg(X_ERROR, 1086 "unsupported src format\n"); 1087 } 1088 break; 1089 case PictOpSrc: 1090 DPRINTF(X_ERROR, "Src %08x %08x\n", 1091 p->srcformat, p->dstformat); 1092 if (p->mskformat != 0) 1093 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1094 if (p->srcformat == PICT_a8) { 1095 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1096 } else { 1097 /* convert between RGB and BGR? */ 1098 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1099 } 1100 break; 1101 default: 1102 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1103 } 1104 exaMarkSync(pDst->drawable.pScreen); 1105} 1106 1107 1108 1109Bool 1110CG14InitAccel(ScreenPtr pScreen) 1111{ 1112 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1113 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1114 ExaDriverPtr pExa; 1115 1116 pExa = exaDriverAlloc(); 1117 if (!pExa) 1118 return FALSE; 1119 1120 p->pExa = pExa; 1121 1122 pExa->exa_major = EXA_VERSION_MAJOR; 1123 pExa->exa_minor = EXA_VERSION_MINOR; 1124 1125 pExa->memoryBase = p->fb; 1126 pExa->memorySize = p->memsize; 1127 pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 1128 1129 /* 1130 * SX memory instructions are written to 64bit aligned addresses with 1131 * a 3 bit displacement. Make sure the displacement remains constant 1132 * within one column 1133 */ 1134 1135 pExa->pixmapOffsetAlign = 8; 1136 pExa->pixmapPitchAlign = 8; 1137 1138 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1139 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1140 /*| EXA_MIXED_PIXMAPS*/; 1141 1142 /* 1143 * these limits are bogus 1144 * SX doesn't deal with coordinates at all, so there is no limit but 1145 * we have to put something here 1146 */ 1147 pExa->maxX = 4096; 1148 pExa->maxY = 4096; 1149 1150 pExa->WaitMarker = CG14WaitMarker; 1151 1152 pExa->PrepareSolid = CG14PrepareSolid; 1153 pExa->Solid = CG14Solid; 1154 pExa->DoneSolid = CG14DoneCopy; 1155 pExa->PrepareCopy = CG14PrepareCopy; 1156 pExa->Copy = CG14Copy32; 1157 pExa->DoneCopy = CG14DoneCopy; 1158 if (p->use_xrender) { 1159 pExa->CheckComposite = CG14CheckComposite; 1160 pExa->PrepareComposite = CG14PrepareComposite; 1161 pExa->Composite = CG14Composite; 1162 pExa->DoneComposite = CG14DoneCopy; 1163 } 1164 1165 /* EXA hits more optimized paths when it does not have to fallback 1166 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1167 */ 1168 pExa->UploadToScreen = CG14UploadToScreen; 1169 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1170 1171 p->queuecount = 0; 1172 /* do some hardware init */ 1173 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1174 p->last_mask = 0xffffffff; 1175 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1176 p->last_rop = 0xcc; 1177 return exaDriverInit(pScreen, pExa); 1178} 1179