cg14_accel.c revision b46cab2a
1/* $NetBSD: cg14_accel.c,v 1.17 2021/12/03 06:10:07 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45//#define SX_DEBUG 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "bits per pixel: %d\n", 102 pSrcPixmap->drawable.bitsPerPixel); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 write_sx_io(p, s, 188 SX_LD(10, num - 1, s & 7)); 189 write_sx_io(p, d, 190 SX_STM(10, num - 1, d & 7)); 191 s += xinc; 192 d += xinc; 193 count += 32; 194 } 195 srcstart += srcinc; 196 dststart += dstinc; 197 } 198 } else { 199 /* going right to left */ 200 int i, chunks = (w >> 5); 201 for (line = 0; line < h; line++) { 202 s = srcstart; 203 d = dststart; 204 count = w; 205 for (i = 0; i < chunks; i++) { 206 write_sx_io(p, s, 207 SX_LD(10, 31, s & 7)); 208 write_sx_io(p, d, 209 SX_STM(10, 31, d & 7)); 210 s -= 128; 211 d -= 128; 212 count -= 32; 213 } 214 /* leftovers, if any */ 215 if (count > 0) { 216 s += (32 - count) << 2; 217 d += (32 - count) << 2; 218 write_sx_io(p, s, 219 SX_LD(10, count - 1, s & 7)); 220 write_sx_io(p, d, 221 SX_STM(10, count - 1, d & 7)); 222 } 223 srcstart += srcinc; 224 dststart += dstinc; 225 } 226 } 227 } else { 228 /* ROPs needed */ 229 if ( xinc > 0) { 230 /* going left to right */ 231 for (line = 0; line < h; line++) { 232 count = 0; 233 s = srcstart; 234 d = dststart; 235 while ( count < w) { 236 num = min(32, w - count); 237 write_sx_io(p, s, 238 SX_LD(10, num - 1, s & 7)); 239 write_sx_io(p, d, 240 SX_LD(42, num - 1, d & 7)); 241 if (num > 16) { 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_ROP(10, 42, 74, 15)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_ROP(26, 58, 90, num - 17)); 246 } else { 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(10, 42, 74, num - 1)); 249 } 250 write_sx_io(p, d, 251 SX_STM(74, num - 1, d & 7)); 252 s += xinc; 253 d += xinc; 254 count += 32; 255 } 256 srcstart += srcinc; 257 dststart += dstinc; 258 } 259 } else { 260 /* going right to left */ 261 int i, chunks = (w >> 5); 262 for (line = 0; line < h; line++) { 263 s = srcstart; 264 d = dststart; 265 count = w; 266 for (i = 0; i < chunks; i++) { 267 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 268 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, 15)); 271 write_sx_reg(p, SX_INSTRUCTIONS, 272 SX_ROP(26, 58, 90, 15)); 273 write_sx_io(p, d, 274 SX_STM(74, 31, d & 7)); 275 s -= 128; 276 d -= 128; 277 count -= 32; 278 } 279 /* leftovers, if any */ 280 if (count > 0) { 281 s += (32 - count) << 2; 282 d += (32 - count) << 2; 283 write_sx_io(p, s, 284 SX_LD(10, count - 1, s & 7)); 285 write_sx_io(p, d, 286 SX_LD(42, count - 1, d & 7)); 287 if (count > 16) { 288 write_sx_reg(p, SX_INSTRUCTIONS, 289 SX_ROP(10, 42, 74, 15)); 290 write_sx_reg(p, SX_INSTRUCTIONS, 291 SX_ROP(26, 58, 90, count - 17)); 292 } else { 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ROP(10, 42, 74, count - 1)); 295 } 296 297 write_sx_io(p, d, 298 SX_STM(74, count - 1, d & 7)); 299 } 300 srcstart += srcinc; 301 dststart += dstinc; 302 } 303 } 304 } 305 exaMarkSync(pDstPixmap->drawable.pScreen); 306} 307 308static void 309CG14Copy8(PixmapPtr pDstPixmap, 310 int srcX, int srcY, int dstX, int dstY, int w, int h) 311{ 312 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 313 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 314 int dstpitch, dstoff, srcpitch, srcoff; 315 int srcstart, dststart, xinc, srcinc, dstinc; 316 int line, count, s, d, num; 317 318 ENTER; 319 dstpitch = exaGetPixmapPitch(pDstPixmap); 320 dstoff = exaGetPixmapOffset(pDstPixmap); 321 srcpitch = p->srcpitch; 322 srcoff = p->srcoff; 323 /* 324 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 325 * actually wrote anything and only sync if it did 326 */ 327 srcstart = srcX + (srcpitch * srcY) + srcoff; 328 dststart = dstX + (dstpitch * dstY) + dstoff; 329 330 /* 331 * we always copy up to 32 pixels at a time so direction doesn't 332 * matter if w<=32 333 */ 334 if (w > 32) { 335 if (p->xdir < 0) { 336 srcstart += (w - 32); 337 dststart += (w - 32); 338 xinc = -32; 339 } else 340 xinc = 32; 341 } else 342 xinc = 32; 343 if (p->ydir < 0) { 344 srcstart += (h - 1) * srcpitch; 345 dststart += (h - 1) * dstpitch; 346 srcinc = -srcpitch; 347 dstinc = -dstpitch; 348 } else { 349 srcinc = srcpitch; 350 dstinc = dstpitch; 351 } 352 if (p->last_rop == 0xcc) { 353 /* plain old copy */ 354 if ( xinc > 0) { 355 /* going left to right */ 356 for (line = 0; line < h; line++) { 357 count = 0; 358 s = srcstart; 359 d = dststart; 360 while ( count < w) { 361 num = min(32, w - count); 362 write_sx_io(p, s, 363 SX_LDB(10, num - 1, s & 7)); 364 write_sx_io(p, d, 365 SX_STBM(10, num - 1, d & 7)); 366 s += xinc; 367 d += xinc; 368 count += 32; 369 } 370 srcstart += srcinc; 371 dststart += dstinc; 372 } 373 } else { 374 /* going right to left */ 375 int i, chunks = (w >> 5); 376 for (line = 0; line < h; line++) { 377 s = srcstart; 378 d = dststart; 379 count = w; 380 for (i = 0; i < chunks; i++) { 381 write_sx_io(p, s, 382 SX_LDB(10, 31, s & 7)); 383 write_sx_io(p, d, 384 SX_STBM(10, 31, d & 7)); 385 s -= 32; 386 d -= 32; 387 count -= 32; 388 } 389 /* leftovers, if any */ 390 if (count > 0) { 391 s += (32 - count); 392 d += (32 - count); 393 write_sx_io(p, s, 394 SX_LDB(10, count - 1, s & 7)); 395 write_sx_io(p, d, 396 SX_STBM(10, count - 1, d & 7)); 397 } 398 srcstart += srcinc; 399 dststart += dstinc; 400 } 401 } 402 } else { 403 /* ROPs needed */ 404 if ( xinc > 0) { 405 /* going left to right */ 406 for (line = 0; line < h; line++) { 407 count = 0; 408 s = srcstart; 409 d = dststart; 410 while ( count < w) { 411 num = min(32, w - count); 412 write_sx_io(p, s, 413 SX_LDB(10, num - 1, s & 7)); 414 write_sx_io(p, d, 415 SX_LDB(42, num - 1, d & 7)); 416 if (num > 16) { 417 write_sx_reg(p, SX_INSTRUCTIONS, 418 SX_ROP(10, 42, 74, 15)); 419 write_sx_reg(p, SX_INSTRUCTIONS, 420 SX_ROP(26, 58, 90, num - 17)); 421 } else { 422 write_sx_reg(p, SX_INSTRUCTIONS, 423 SX_ROP(10, 42, 74, num - 1)); 424 } 425 write_sx_io(p, d, 426 SX_STBM(74, num - 1, d & 7)); 427 s += xinc; 428 d += xinc; 429 count += 32; 430 } 431 srcstart += srcinc; 432 dststart += dstinc; 433 } 434 } else { 435 /* going right to left */ 436 int i, chunks = (w >> 5); 437 for (line = 0; line < h; line++) { 438 s = srcstart; 439 d = dststart; 440 count = w; 441 for (i = 0; i < chunks; i++) { 442 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 443 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 444 write_sx_reg(p, SX_INSTRUCTIONS, 445 SX_ROP(10, 42, 74, 15)); 446 write_sx_reg(p, SX_INSTRUCTIONS, 447 SX_ROP(26, 58, 90, 15)); 448 write_sx_io(p, d, 449 SX_STBM(74, 31, d & 7)); 450 s -= 128; 451 d -= 128; 452 count -= 32; 453 } 454 /* leftovers, if any */ 455 if (count > 0) { 456 s += (32 - count); 457 d += (32 - count); 458 write_sx_io(p, s, 459 SX_LDB(10, count - 1, s & 7)); 460 write_sx_io(p, d, 461 SX_LDB(42, count - 1, d & 7)); 462 if (count > 16) { 463 write_sx_reg(p, SX_INSTRUCTIONS, 464 SX_ROP(10, 42, 74, 15)); 465 write_sx_reg(p, SX_INSTRUCTIONS, 466 SX_ROP(26, 58, 90, count - 17)); 467 } else { 468 write_sx_reg(p, SX_INSTRUCTIONS, 469 SX_ROP(10, 42, 74, count - 1)); 470 } 471 472 write_sx_io(p, d, 473 SX_STBM(74, count - 1, d & 7)); 474 } 475 srcstart += srcinc; 476 dststart += dstinc; 477 } 478 } 479 } 480 exaMarkSync(pDstPixmap->drawable.pScreen); 481} 482 483static void 484CG14DoneCopy(PixmapPtr pDstPixmap) 485{ 486} 487 488static Bool 489CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 490{ 491 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 492 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 493 494 ENTER; 495 DPRINTF(X_ERROR, "bits per pixel: %d %08x\n", 496 pPixmap->drawable.bitsPerPixel, fg); 497 498 /* repeat the colour in every sub byte if we're in 8 bit */ 499 if (pPixmap->drawable.bitsPerPixel == 8) { 500 fg |= fg << 8; 501 fg |= fg << 16; 502 } 503 write_sx_reg(p, SX_QUEUED(8), fg); 504 write_sx_reg(p, SX_QUEUED(9), fg); 505 if (planemask != p->last_mask) { 506 CG14Wait(p); 507 write_sx_reg(p, SX_PLANEMASK, planemask); 508 p->last_mask = planemask; 509 } 510 alu = sx_rop[alu]; 511 if (alu != p->last_rop) { 512 CG14Wait(p); 513 write_sx_reg(p, SX_ROP_CONTROL, alu); 514 p->last_rop = alu; 515 } 516 if (0) return FALSE; 517 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 518 return TRUE; 519} 520 521static void 522CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 523{ 524 int line, x, num; 525 uint32_t ptr; 526 527 ENTER; 528 if (p->last_rop == 0xcc) { 529 /* simple fill */ 530 for (line = 0; line < h; line++) { 531 x = 0; 532 while (x < w) { 533 ptr = start + (x << 2); 534 num = min(32, w - x); 535 write_sx_io(p, ptr, 536 SX_STS(8, num - 1, ptr & 7)); 537 x += 32; 538 } 539 start += pitch; 540 } 541 } else if (p->last_rop == 0xaa) { 542 /* nothing to do here */ 543 return; 544 } else { 545 /* alright, let's do actual ROP stuff */ 546 547 /* first repeat the fill colour into 16 registers */ 548 write_sx_reg(p, SX_INSTRUCTIONS, 549 SX_SELECT_S(8, 8, 10, 15)); 550 551 for (line = 0; line < h; line++) { 552 x = 0; 553 while (x < w) { 554 ptr = start + (x << 2); 555 num = min(32, w - x); 556 /* now suck fb data into registers */ 557 write_sx_io(p, ptr, 558 SX_LD(42, num - 1, ptr & 7)); 559 /* 560 * ROP them with the fill data we left in 10 561 * non-memory ops can only have counts up to 16 562 */ 563 if (num <= 16) { 564 write_sx_reg(p, SX_INSTRUCTIONS, 565 SX_ROP(10, 42, 74, num - 1)); 566 } else { 567 write_sx_reg(p, SX_INSTRUCTIONS, 568 SX_ROP(10, 42, 74, 15)); 569 write_sx_reg(p, SX_INSTRUCTIONS, 570 SX_ROP(10, 58, 90, num - 17)); 571 } 572 /* and write the result back into memory */ 573 write_sx_io(p, ptr, 574 SX_ST(74, num - 1, ptr & 7)); 575 x += 32; 576 } 577 start += pitch; 578 } 579 } 580} 581 582static void 583CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 584{ 585 int line, x, num, off, pre, cnt; 586 uint32_t ptr; 587 588 ENTER; 589 pre = start & 3; 590 if (pre != 0) pre = 4 - pre; 591 592 if (p->last_rop == 0xcc) { 593 /* simple fill */ 594 for (line = 0; line < h; line++) { 595 ptr = start; 596 cnt = w; 597 pre = min(pre, cnt); 598 if (pre) { 599 write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); 600 ptr += pre; 601 cnt -= pre; 602 if (cnt == 0) goto next; 603 } 604 /* now do the aligned pixels in 32bit chunks */ 605 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 606 while(cnt > 3) { 607 num = min(32, cnt >> 2); 608 write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); 609 ptr += num << 2; 610 cnt -= num << 2; 611 } 612 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 613 if (cnt > 0) { 614 write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); 615 } 616 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 617next: 618 start += pitch; 619 } 620 } else if (p->last_rop == 0xaa) { 621 /* nothing to do here */ 622 return; 623 } else { 624 /* alright, let's do actual ROP stuff */ 625 off = start & 7; 626 start &= ~7; 627 628 /* first repeat the fill colour into 16 registers */ 629 write_sx_reg(p, SX_INSTRUCTIONS, 630 SX_SELECT_S(8, 8, 10, 15)); 631 632 for (line = 0; line < h; line++) { 633 x = 0; 634 while (x < w) { 635 ptr = start + x; 636 num = min(32, w - x); 637 /* now suck fb data into registers */ 638 write_sx_io(p, ptr, 639 SX_LDB(42, num - 1, off)); 640 /* 641 * ROP them with the fill data we left in 10 642 * non-memory ops can only have counts up to 16 643 */ 644 if (num <= 16) { 645 write_sx_reg(p, SX_INSTRUCTIONS, 646 SX_ROP(10, 42, 74, num - 1)); 647 } else { 648 write_sx_reg(p, SX_INSTRUCTIONS, 649 SX_ROP(10, 42, 74, 15)); 650 write_sx_reg(p, SX_INSTRUCTIONS, 651 SX_ROP(10, 58, 90, num - 17)); 652 } 653 /* and write the result back into memory */ 654 write_sx_io(p, ptr, 655 SX_STB(74, num - 1, off)); 656 x += 32; 657 } 658 start += pitch; 659 } 660 } 661} 662 663static void 664CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 665{ 666 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 667 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 668 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 669 int start, depth; 670 671 ENTER; 672 dstpitch = exaGetPixmapPitch(pPixmap); 673 dstoff = exaGetPixmapOffset(pPixmap); 674 675 depth = pPixmap->drawable.bitsPerPixel; 676 switch (depth) { 677 case 32: 678 start = dstoff + (y1 * dstpitch) + (x1 << 2); 679 CG14Solid32(p, start, dstpitch, w, h); 680 break; 681 case 8: 682 start = dstoff + (y1 * dstpitch) + x1; 683 CG14Solid8(p, start, dstpitch, w, h); 684 break; 685 } 686 687 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 688 dstpitch, dstoff, start); 689 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 690 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 691 exaMarkSync(pPixmap->drawable.pScreen); 692} 693 694/* 695 * Memcpy-based UTS. 696 */ 697static Bool 698CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 699 char *src, int src_pitch) 700{ 701 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 702 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 703 char *dst = p->fb + exaGetPixmapOffset(pDst); 704 int dst_pitch = exaGetPixmapPitch(pDst); 705 706 int bpp = pDst->drawable.bitsPerPixel; 707 int cpp = (bpp + 7) >> 3; 708 int wBytes = w * cpp; 709 710 ENTER; 711 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 712 dst += (x * cpp) + (y * dst_pitch); 713 714 CG14Wait(p); 715 716 while (h--) { 717 memcpy(dst, src, wBytes); 718 src += src_pitch; 719 dst += dst_pitch; 720 } 721 __asm("stbar;"); 722 return TRUE; 723} 724 725/* 726 * Memcpy-based DFS. 727 */ 728static Bool 729CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 730 char *dst, int dst_pitch) 731{ 732 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 733 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 734 char *src = p->fb + exaGetPixmapOffset(pSrc); 735 int src_pitch = exaGetPixmapPitch(pSrc); 736 737 ENTER; 738 int bpp = pSrc->drawable.bitsPerPixel; 739 int cpp = (bpp + 7) >> 3; 740 int wBytes = w * cpp; 741 742 src += (x * cpp) + (y * src_pitch); 743 744 CG14Wait(p); 745 746 while (h--) { 747 memcpy(dst, src, wBytes); 748 src += src_pitch; 749 dst += dst_pitch; 750 } 751 752 return TRUE; 753} 754 755Bool 756CG14CheckComposite(int op, PicturePtr pSrcPicture, 757 PicturePtr pMaskPicture, 758 PicturePtr pDstPicture) 759{ 760 int i, ok = FALSE; 761 762 ENTER; 763 764 /* 765 * SX is in theory capable of accelerating pretty much all Xrender ops, 766 * even coordinate transformation and gradients. Support will be added 767 * over time and likely have to spill over into its own source file. 768 */ 769 770 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 771 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 772 return FALSE; 773 } 774 775 if (pSrcPicture != NULL) { 776 i = 0; 777 while ((i < arraysize(src_formats)) && (!ok)) { 778 ok = (pSrcPicture->format == src_formats[i]); 779 i++; 780 } 781 782 if (!ok) { 783 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 784 __func__, pSrcPicture->format); 785 return FALSE; 786 } 787 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 788 } 789 790 if (pDstPicture != NULL) { 791 i = 0; 792 ok = FALSE; 793 while ((i < arraysize(src_formats)) && (!ok)) { 794 ok = (pDstPicture->format == src_formats[i]); 795 i++; 796 } 797 798 if (!ok) { 799 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 800 __func__, pDstPicture->format); 801 return FALSE; 802 } 803 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 804 } 805 806 if (pMaskPicture != NULL) { 807 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 808 pMaskPicture->pDrawable->width, 809 pMaskPicture->pDrawable->height); 810 } 811 return TRUE; 812} 813 814Bool 815CG14PrepareComposite(int op, PicturePtr pSrcPicture, 816 PicturePtr pMaskPicture, 817 PicturePtr pDstPicture, 818 PixmapPtr pSrc, 819 PixmapPtr pMask, 820 PixmapPtr pDst) 821{ 822 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 823 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 824 825 ENTER; 826 827 p->no_source_pixmap = FALSE; 828 p->source_is_solid = FALSE; 829 830 if (pSrcPicture->format == PICT_a1) { 831 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 832 pDstPicture->format, op); 833 if (pMaskPicture != NULL) { 834 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 835 } 836 } 837 if (pSrcPicture->pSourcePict != NULL) { 838 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 839 p->fillcolour = 840 pSrcPicture->pSourcePict->solidFill.color; 841 DPRINTF(X_ERROR, "%s: solid src %08x\n", 842 __func__, p->fillcolour); 843 p->no_source_pixmap = TRUE; 844 p->source_is_solid = TRUE; 845 } 846 } 847 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 848 if (pMaskPicture->pSourcePict->type == 849 SourcePictTypeSolidFill) { 850 p->fillcolour = 851 pMaskPicture->pSourcePict->solidFill.color; 852 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 853 __func__, p->fillcolour); 854 } 855 } 856 if (pMaskPicture != NULL) { 857 p->mskoff = exaGetPixmapOffset(pMask); 858 p->mskpitch = exaGetPixmapPitch(pMask); 859 p->mskformat = pMaskPicture->format; 860 } else { 861 p->mskoff = 0; 862 p->mskpitch = 0; 863 p->mskformat = 0; 864 } 865 if (pSrc != NULL) { 866 p->source_is_solid = 867 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 868 p->srcoff = exaGetPixmapOffset(pSrc); 869 p->srcpitch = exaGetPixmapPitch(pSrc); 870 if (p->source_is_solid) { 871 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 872 } 873 } 874 p->srcformat = pSrcPicture->format; 875 p->dstformat = pDstPicture->format; 876 877 if (p->source_is_solid) { 878 uint32_t temp; 879 880 /* stuff source colour into SX registers, swap as needed */ 881 temp = p->fillcolour; 882 switch (p->srcformat) { 883 case PICT_a8r8g8b8: 884 case PICT_x8r8g8b8: 885 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 886 temp = temp >> 8; 887 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 888 temp = temp >> 8; 889 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 890 break; 891 case PICT_a8b8g8r8: 892 case PICT_x8b8g8r8: 893 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 894 temp = temp >> 8; 895 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 896 temp = temp >> 8; 897 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 898 break; 899 } 900 write_sx_reg(p, SX_QUEUED(8), 0xff); 901 } 902 p->op = op; 903 if (op == PictOpSrc) { 904 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 905 } 906#ifdef SX_DEBUG 907 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 908 *(uint32_t *)(p->fb + p->srcoff)); 909#endif 910 return TRUE; 911} 912 913void 914CG14Composite(PixmapPtr pDst, int srcX, int srcY, 915 int maskX, int maskY, 916 int dstX, int dstY, 917 int width, int height) 918{ 919 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 920 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 921 uint32_t dstoff, dstpitch; 922 uint32_t dst, msk, src; 923 int flip = 0; 924 925 ENTER; 926 dstoff = exaGetPixmapOffset(pDst); 927 dstpitch = exaGetPixmapPitch(pDst); 928 929 flip = (PICT_FORMAT_TYPE(p->srcformat) != 930 PICT_FORMAT_TYPE(p->dstformat)); 931 932 switch (p->op) { 933 case PictOpOver: 934 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 935 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 936 p->mskformat, p->dstformat, srcX, srcY); 937 if (p->source_is_solid) { 938 switch (p->mskformat) { 939 case PICT_a8: 940 msk = p->mskoff + 941 (maskY * p->mskpitch) + 942 maskX; 943 CG14Comp_Over8Solid(p, 944 msk, p->mskpitch, 945 dst, dstpitch, 946 width, height); 947 break; 948 case PICT_a8r8g8b8: 949 case PICT_a8b8g8r8: 950 msk = p->mskoff + 951 (maskY * p->mskpitch) + 952 (maskX << 2); 953 CG14Comp_Over32Solid(p, 954 msk, p->mskpitch, 955 dst, dstpitch, 956 width, height); 957 break; 958 default: 959 xf86Msg(X_ERROR, 960 "unsupported mask format %08x\n", p->mskformat); 961 } 962 } else { 963 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 964 p->mskformat); 965 switch (p->srcformat) { 966 case PICT_a8r8g8b8: 967 case PICT_a8b8g8r8: 968 src = p->srcoff + 969 (srcY * p->srcpitch) + 970 (srcX << 2); 971 dst = dstoff + 972 (dstY * dstpitch) + 973 (dstX << 2); 974 if (p->mskformat == PICT_a8) { 975 msk = p->mskoff + 976 (maskY * p->mskpitch) + 977 maskX; 978 CG14Comp_Over32Mask(p, 979 src, p->srcpitch, 980 msk, p->mskpitch, 981 dst, dstpitch, 982 width, height, flip); 983 } else { 984 CG14Comp_Over32(p, 985 src, p->srcpitch, 986 dst, dstpitch, 987 width, height, flip); 988 } 989 break; 990 case PICT_x8r8g8b8: 991 case PICT_x8b8g8r8: 992 src = p->srcoff + 993 (srcY * p->srcpitch) + 994 (srcX << 2); 995 dst = dstoff + 996 (dstY * dstpitch) + 997 (dstX << 2); 998 if (p->mskformat == PICT_a8) { 999 msk = p->mskoff + 1000 (maskY * p->mskpitch) + 1001 maskX; 1002 CG14Comp_Over32Mask_noalpha(p, 1003 src, p->srcpitch, 1004 msk, p->mskpitch, 1005 dst, dstpitch, 1006 width, height, flip); 1007 } else if ((p->mskformat == PICT_a8r8g8b8) || 1008 (p->mskformat == PICT_a8b8g8r8)) { 1009 msk = p->mskoff + 1010 (maskY * p->mskpitch) + 1011 (maskX << 2); 1012 CG14Comp_Over32Mask32_noalpha(p, 1013 src, p->srcpitch, 1014 msk, p->mskpitch, 1015 dst, dstpitch, 1016 width, height, flip); 1017 } else { 1018 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1019 } 1020 break; 1021 default: 1022 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1023 __func__, p->srcformat); 1024 } 1025 } 1026 break; 1027 case PictOpAdd: 1028 DPRINTF(X_ERROR, "Add %08x %08x\n", 1029 p->srcformat, p->dstformat); 1030 switch (p->srcformat) { 1031 case PICT_a8: 1032 src = p->srcoff + 1033 (srcY * p->srcpitch) + srcX; 1034 if (p->dstformat == PICT_a8) { 1035 dst = dstoff + 1036 (dstY * dstpitch) + dstX; 1037 CG14Comp_Add8(p, 1038 src, p->srcpitch, 1039 dst, dstpitch, 1040 width, height); 1041 } else { 1042 dst = dstoff + 1043 (dstY * dstpitch) + 1044 (dstX << 2); 1045 CG14Comp_Add8_32(p, 1046 src, p->srcpitch, 1047 dst, dstpitch, 1048 width, height); 1049 } 1050 break; 1051 case PICT_a8r8g8b8: 1052 case PICT_x8r8g8b8: 1053 src = p->srcoff + 1054 (srcY * p->srcpitch) + (srcX << 2); 1055 dst = dstoff + (dstY * dstpitch) + 1056 (dstX << 2); 1057 CG14Comp_Add32(p, src, p->srcpitch, 1058 dst, dstpitch, width, height); 1059 break; 1060 default: 1061 xf86Msg(X_ERROR, 1062 "unsupported src format\n"); 1063 } 1064 break; 1065 case PictOpSrc: 1066 DPRINTF(X_ERROR, "Src %08x %08x\n", 1067 p->srcformat, p->dstformat); 1068 if (p->mskformat != 0) 1069 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1070 if (p->srcformat == PICT_a8) { 1071 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1072 } else { 1073 /* convert between RGB and BGR? */ 1074 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1075 } 1076 break; 1077 default: 1078 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1079 } 1080 exaMarkSync(pDst->drawable.pScreen); 1081} 1082 1083 1084 1085Bool 1086CG14InitAccel(ScreenPtr pScreen) 1087{ 1088 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1089 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1090 ExaDriverPtr pExa; 1091 1092 pExa = exaDriverAlloc(); 1093 if (!pExa) 1094 return FALSE; 1095 1096 p->pExa = pExa; 1097 1098 pExa->exa_major = EXA_VERSION_MAJOR; 1099 pExa->exa_minor = EXA_VERSION_MINOR; 1100 1101 pExa->memoryBase = p->fb; 1102 pExa->memorySize = p->memsize; 1103 pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 1104 1105 /* 1106 * SX memory instructions are written to 64bit aligned addresses with 1107 * a 3 bit displacement. Make sure the displacement remains constant 1108 * within one column 1109 */ 1110 1111 pExa->pixmapOffsetAlign = 8; 1112 pExa->pixmapPitchAlign = 8; 1113 1114 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1115 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1116 /*| EXA_MIXED_PIXMAPS*/; 1117 1118 /* 1119 * these limits are bogus 1120 * SX doesn't deal with coordinates at all, so there is no limit but 1121 * we have to put something here 1122 */ 1123 pExa->maxX = 4096; 1124 pExa->maxY = 4096; 1125 1126 pExa->WaitMarker = CG14WaitMarker; 1127 1128 pExa->PrepareSolid = CG14PrepareSolid; 1129 pExa->Solid = CG14Solid; 1130 pExa->DoneSolid = CG14DoneCopy; 1131 pExa->PrepareCopy = CG14PrepareCopy; 1132 pExa->Copy = CG14Copy32; 1133 pExa->DoneCopy = CG14DoneCopy; 1134 if (p->use_xrender) { 1135 pExa->CheckComposite = CG14CheckComposite; 1136 pExa->PrepareComposite = CG14PrepareComposite; 1137 pExa->Composite = CG14Composite; 1138 pExa->DoneComposite = CG14DoneCopy; 1139 } 1140 1141 /* EXA hits more optimized paths when it does not have to fallback 1142 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1143 */ 1144 pExa->UploadToScreen = CG14UploadToScreen; 1145 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1146 1147 p->queuecount = 0; 1148 /* do some hardware init */ 1149 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1150 p->last_mask = 0xffffffff; 1151 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1152 p->last_rop = 0xcc; 1153 return exaDriverInit(pScreen, pExa); 1154} 1155