cg14_accel.c revision fc473876
1/* $NetBSD: cg14_accel.c,v 1.14 2019/03/01 02:22:27 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44#include <sparc/sxreg.h> 45 46/*#define SX_DEBUG*/ 47 48#ifdef SX_DEBUG 49#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 50#define DPRINTF xf86Msg 51#else 52#define ENTER 53#define DPRINTF while (0) xf86Msg 54#endif 55 56#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 57 58/* 0xcc is SX's GXcopy equivalent */ 59uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 60 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 61 62int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 63 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 64int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 65 66static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 67static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 68 69static inline void 70CG14Wait(Cg14Ptr p) 71{ 72 int bail = 10000000; 73 /* we wait for the busy bit to clear */ 74 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 75 (bail > 0)) { 76 bail--; 77 }; 78 if (bail == 0) { 79 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 80 read_sx_reg(p, SX_CONTROL_STATUS), 81 read_sx_reg(p, SX_ERROR)); 82 } 83} 84 85static void 86CG14WaitMarker(ScreenPtr pScreen, int Marker) 87{ 88 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 89 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 90 91 CG14Wait(p); 92} 93 94static Bool 95CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 96 int xdir, int ydir, int alu, Pixel planemask) 97{ 98 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 99 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 100 101 ENTER; 102 DPRINTF(X_ERROR, "bits per pixel: %d\n", 103 pSrcPixmap->drawable.bitsPerPixel); 104 105 if (planemask != p->last_mask) { 106 CG14Wait(p); 107 write_sx_reg(p, SX_PLANEMASK, planemask); 108 p->last_mask = planemask; 109 } 110 alu = sx_rop[alu]; 111 if (alu != p->last_rop) { 112 CG14Wait(p); 113 write_sx_reg(p, SX_ROP_CONTROL, alu); 114 p->last_rop = alu; 115 } 116 switch (pSrcPixmap->drawable.bitsPerPixel) { 117 case 8: 118 p->pExa->Copy = CG14Copy8; 119 break; 120 case 32: 121 p->pExa->Copy = CG14Copy32; 122 break; 123 default: 124 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 125 pSrcPixmap->drawable.bitsPerPixel); 126 } 127 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 128 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 129 p->xdir = xdir; 130 p->ydir = ydir; 131 return TRUE; 132} 133 134static void 135CG14Copy32(PixmapPtr pDstPixmap, 136 int srcX, int srcY, int dstX, int dstY, int w, int h) 137{ 138 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 139 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 140 int dstpitch, dstoff, srcpitch, srcoff; 141 int srcstart, dststart, xinc, srcinc, dstinc; 142 int line, count, s, d, num; 143 144 ENTER; 145 dstpitch = exaGetPixmapPitch(pDstPixmap); 146 dstoff = exaGetPixmapOffset(pDstPixmap); 147 srcpitch = p->srcpitch; 148 srcoff = p->srcoff; 149 /* 150 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 151 * actually wrote anything and only sync if it did 152 */ 153 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 154 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 155 156 /* 157 * we always copy up to 32 pixels at a time so direction doesn't 158 * matter if w<=32 159 */ 160 if (w > 32) { 161 if (p->xdir < 0) { 162 srcstart += (w - 32) << 2; 163 dststart += (w - 32) << 2; 164 xinc = -128; 165 } else 166 xinc = 128; 167 } else 168 xinc = 128; 169 if (p->ydir < 0) { 170 srcstart += (h - 1) * srcpitch; 171 dststart += (h - 1) * dstpitch; 172 srcinc = -srcpitch; 173 dstinc = -dstpitch; 174 } else { 175 srcinc = srcpitch; 176 dstinc = dstpitch; 177 } 178 if (p->last_rop == 0xcc) { 179 /* plain old copy */ 180 if ( xinc > 0) { 181 /* going left to right */ 182 for (line = 0; line < h; line++) { 183 count = 0; 184 s = srcstart; 185 d = dststart; 186 while ( count < w) { 187 num = min(32, w - count); 188 write_sx_io(p, s, 189 SX_LD(10, num - 1, s & 7)); 190 write_sx_io(p, d, 191 SX_STM(10, num - 1, d & 7)); 192 s += xinc; 193 d += xinc; 194 count += 32; 195 } 196 srcstart += srcinc; 197 dststart += dstinc; 198 } 199 } else { 200 /* going right to left */ 201 int i, chunks = (w >> 5); 202 for (line = 0; line < h; line++) { 203 s = srcstart; 204 d = dststart; 205 count = w; 206 for (i = 0; i < chunks; i++) { 207 write_sx_io(p, s, 208 SX_LD(10, 31, s & 7)); 209 write_sx_io(p, d, 210 SX_STM(10, 31, d & 7)); 211 s -= 128; 212 d -= 128; 213 count -= 32; 214 } 215 /* leftovers, if any */ 216 if (count > 0) { 217 s += (32 - count) << 2; 218 d += (32 - count) << 2; 219 write_sx_io(p, s, 220 SX_LD(10, count - 1, s & 7)); 221 write_sx_io(p, d, 222 SX_STM(10, count - 1, d & 7)); 223 } 224 srcstart += srcinc; 225 dststart += dstinc; 226 } 227 } 228 } else { 229 /* ROPs needed */ 230 if ( xinc > 0) { 231 /* going left to right */ 232 for (line = 0; line < h; line++) { 233 count = 0; 234 s = srcstart; 235 d = dststart; 236 while ( count < w) { 237 num = min(32, w - count); 238 write_sx_io(p, s, 239 SX_LD(10, num - 1, s & 7)); 240 write_sx_io(p, d, 241 SX_LD(42, num - 1, d & 7)); 242 if (num > 16) { 243 write_sx_reg(p, SX_INSTRUCTIONS, 244 SX_ROP(10, 42, 74, 15)); 245 write_sx_reg(p, SX_INSTRUCTIONS, 246 SX_ROP(26, 58, 90, num - 17)); 247 } else { 248 write_sx_reg(p, SX_INSTRUCTIONS, 249 SX_ROP(10, 42, 74, num - 1)); 250 } 251 write_sx_io(p, d, 252 SX_STM(74, num - 1, d & 7)); 253 s += xinc; 254 d += xinc; 255 count += 32; 256 } 257 srcstart += srcinc; 258 dststart += dstinc; 259 } 260 } else { 261 /* going right to left */ 262 int i, chunks = (w >> 5); 263 for (line = 0; line < h; line++) { 264 s = srcstart; 265 d = dststart; 266 count = w; 267 for (i = 0; i < chunks; i++) { 268 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 269 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 270 write_sx_reg(p, SX_INSTRUCTIONS, 271 SX_ROP(10, 42, 74, 15)); 272 write_sx_reg(p, SX_INSTRUCTIONS, 273 SX_ROP(26, 58, 90, 15)); 274 write_sx_io(p, d, 275 SX_STM(74, 31, d & 7)); 276 s -= 128; 277 d -= 128; 278 count -= 32; 279 } 280 /* leftovers, if any */ 281 if (count > 0) { 282 s += (32 - count) << 2; 283 d += (32 - count) << 2; 284 write_sx_io(p, s, 285 SX_LD(10, count - 1, s & 7)); 286 write_sx_io(p, d, 287 SX_LD(42, count - 1, d & 7)); 288 if (count > 16) { 289 write_sx_reg(p, SX_INSTRUCTIONS, 290 SX_ROP(10, 42, 74, 15)); 291 write_sx_reg(p, SX_INSTRUCTIONS, 292 SX_ROP(26, 58, 90, count - 17)); 293 } else { 294 write_sx_reg(p, SX_INSTRUCTIONS, 295 SX_ROP(10, 42, 74, count - 1)); 296 } 297 298 write_sx_io(p, d, 299 SX_STM(74, count - 1, d & 7)); 300 } 301 srcstart += srcinc; 302 dststart += dstinc; 303 } 304 } 305 } 306 exaMarkSync(pDstPixmap->drawable.pScreen); 307} 308 309static void 310CG14Copy8(PixmapPtr pDstPixmap, 311 int srcX, int srcY, int dstX, int dstY, int w, int h) 312{ 313 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 314 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 315 int dstpitch, dstoff, srcpitch, srcoff; 316 int srcstart, dststart, xinc, srcinc, dstinc; 317 int line, count, s, d, num; 318 319 ENTER; 320 dstpitch = exaGetPixmapPitch(pDstPixmap); 321 dstoff = exaGetPixmapOffset(pDstPixmap); 322 srcpitch = p->srcpitch; 323 srcoff = p->srcoff; 324 /* 325 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 326 * actually wrote anything and only sync if it did 327 */ 328 srcstart = srcX + (srcpitch * srcY) + srcoff; 329 dststart = dstX + (dstpitch * dstY) + dstoff; 330 331 /* 332 * we always copy up to 32 pixels at a time so direction doesn't 333 * matter if w<=32 334 */ 335 if (w > 32) { 336 if (p->xdir < 0) { 337 srcstart += (w - 32); 338 dststart += (w - 32); 339 xinc = -32; 340 } else 341 xinc = 32; 342 } else 343 xinc = 32; 344 if (p->ydir < 0) { 345 srcstart += (h - 1) * srcpitch; 346 dststart += (h - 1) * dstpitch; 347 srcinc = -srcpitch; 348 dstinc = -dstpitch; 349 } else { 350 srcinc = srcpitch; 351 dstinc = dstpitch; 352 } 353 if (p->last_rop == 0xcc) { 354 /* plain old copy */ 355 if ( xinc > 0) { 356 /* going left to right */ 357 for (line = 0; line < h; line++) { 358 count = 0; 359 s = srcstart; 360 d = dststart; 361 while ( count < w) { 362 num = min(32, w - count); 363 write_sx_io(p, s, 364 SX_LDB(10, num - 1, s & 7)); 365 write_sx_io(p, d, 366 SX_STBM(10, num - 1, d & 7)); 367 s += xinc; 368 d += xinc; 369 count += 32; 370 } 371 srcstart += srcinc; 372 dststart += dstinc; 373 } 374 } else { 375 /* going right to left */ 376 int i, chunks = (w >> 5); 377 for (line = 0; line < h; line++) { 378 s = srcstart; 379 d = dststart; 380 count = w; 381 for (i = 0; i < chunks; i++) { 382 write_sx_io(p, s, 383 SX_LDB(10, 31, s & 7)); 384 write_sx_io(p, d, 385 SX_STBM(10, 31, d & 7)); 386 s -= 32; 387 d -= 32; 388 count -= 32; 389 } 390 /* leftovers, if any */ 391 if (count > 0) { 392 s += (32 - count); 393 d += (32 - count); 394 write_sx_io(p, s, 395 SX_LDB(10, count - 1, s & 7)); 396 write_sx_io(p, d, 397 SX_STBM(10, count - 1, d & 7)); 398 } 399 srcstart += srcinc; 400 dststart += dstinc; 401 } 402 } 403 } else { 404 /* ROPs needed */ 405 if ( xinc > 0) { 406 /* going left to right */ 407 for (line = 0; line < h; line++) { 408 count = 0; 409 s = srcstart; 410 d = dststart; 411 while ( count < w) { 412 num = min(32, w - count); 413 write_sx_io(p, s, 414 SX_LDB(10, num - 1, s & 7)); 415 write_sx_io(p, d, 416 SX_LDB(42, num - 1, d & 7)); 417 if (num > 16) { 418 write_sx_reg(p, SX_INSTRUCTIONS, 419 SX_ROP(10, 42, 74, 15)); 420 write_sx_reg(p, SX_INSTRUCTIONS, 421 SX_ROP(26, 58, 90, num - 17)); 422 } else { 423 write_sx_reg(p, SX_INSTRUCTIONS, 424 SX_ROP(10, 42, 74, num - 1)); 425 } 426 write_sx_io(p, d, 427 SX_STBM(74, num - 1, d & 7)); 428 s += xinc; 429 d += xinc; 430 count += 32; 431 } 432 srcstart += srcinc; 433 dststart += dstinc; 434 } 435 } else { 436 /* going right to left */ 437 int i, chunks = (w >> 5); 438 for (line = 0; line < h; line++) { 439 s = srcstart; 440 d = dststart; 441 count = w; 442 for (i = 0; i < chunks; i++) { 443 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 444 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 445 write_sx_reg(p, SX_INSTRUCTIONS, 446 SX_ROP(10, 42, 74, 15)); 447 write_sx_reg(p, SX_INSTRUCTIONS, 448 SX_ROP(26, 58, 90, 15)); 449 write_sx_io(p, d, 450 SX_STBM(74, 31, d & 7)); 451 s -= 128; 452 d -= 128; 453 count -= 32; 454 } 455 /* leftovers, if any */ 456 if (count > 0) { 457 s += (32 - count); 458 d += (32 - count); 459 write_sx_io(p, s, 460 SX_LDB(10, count - 1, s & 7)); 461 write_sx_io(p, d, 462 SX_LDB(42, count - 1, d & 7)); 463 if (count > 16) { 464 write_sx_reg(p, SX_INSTRUCTIONS, 465 SX_ROP(10, 42, 74, 15)); 466 write_sx_reg(p, SX_INSTRUCTIONS, 467 SX_ROP(26, 58, 90, count - 17)); 468 } else { 469 write_sx_reg(p, SX_INSTRUCTIONS, 470 SX_ROP(10, 42, 74, count - 1)); 471 } 472 473 write_sx_io(p, d, 474 SX_STBM(74, count - 1, d & 7)); 475 } 476 srcstart += srcinc; 477 dststart += dstinc; 478 } 479 } 480 } 481 exaMarkSync(pDstPixmap->drawable.pScreen); 482} 483 484static void 485CG14DoneCopy(PixmapPtr pDstPixmap) 486{ 487} 488 489static Bool 490CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 491{ 492 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 493 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 494 495 ENTER; 496 DPRINTF(X_ERROR, "bits per pixel: %d\n", 497 pPixmap->drawable.bitsPerPixel); 498 write_sx_reg(p, SX_QUEUED(8), fg); 499 write_sx_reg(p, SX_QUEUED(9), fg); 500 if (planemask != p->last_mask) { 501 CG14Wait(p); 502 write_sx_reg(p, SX_PLANEMASK, planemask); 503 p->last_mask = planemask; 504 } 505 alu = sx_rop[alu]; 506 if (alu != p->last_rop) { 507 CG14Wait(p); 508 write_sx_reg(p, SX_ROP_CONTROL, alu); 509 p->last_rop = alu; 510 } 511 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 512 return TRUE; 513} 514 515static void 516CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 517{ 518 int line, x, num; 519 uint32_t ptr; 520 521 ENTER; 522 if (p->last_rop == 0xcc) { 523 /* simple fill */ 524 for (line = 0; line < h; line++) { 525 x = 0; 526 while (x < w) { 527 ptr = start + (x << 2); 528 num = min(32, w - x); 529 write_sx_io(p, ptr, 530 SX_STS(8, num - 1, ptr & 7)); 531 x += 32; 532 } 533 start += pitch; 534 } 535 } else if (p->last_rop == 0xaa) { 536 /* nothing to do here */ 537 return; 538 } else { 539 /* alright, let's do actual ROP stuff */ 540 541 /* first repeat the fill colour into 16 registers */ 542 write_sx_reg(p, SX_INSTRUCTIONS, 543 SX_SELECT_S(8, 8, 10, 15)); 544 545 for (line = 0; line < h; line++) { 546 x = 0; 547 while (x < w) { 548 ptr = start + (x << 2); 549 num = min(32, w - x); 550 /* now suck fb data into registers */ 551 write_sx_io(p, ptr, 552 SX_LD(42, num - 1, ptr & 7)); 553 /* 554 * ROP them with the fill data we left in 10 555 * non-memory ops can only have counts up to 16 556 */ 557 if (num <= 16) { 558 write_sx_reg(p, SX_INSTRUCTIONS, 559 SX_ROP(10, 42, 74, num - 1)); 560 } else { 561 write_sx_reg(p, SX_INSTRUCTIONS, 562 SX_ROP(10, 42, 74, 15)); 563 write_sx_reg(p, SX_INSTRUCTIONS, 564 SX_ROP(10, 58, 90, num - 17)); 565 } 566 /* and write the result back into memory */ 567 write_sx_io(p, ptr, 568 SX_ST(74, num - 1, ptr & 7)); 569 x += 32; 570 } 571 start += pitch; 572 } 573 } 574} 575 576static void 577CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 578{ 579 int line, x, num, off; 580 uint32_t ptr; 581 582 ENTER; 583 off = start & 7; 584 start &= ~7; 585 586 if (p->last_rop == 0xcc) { 587 /* simple fill */ 588 for (line = 0; line < h; line++) { 589 x = 0; 590 while (x < w) { 591 ptr = start + x; 592 num = min(32, w - x); 593 write_sx_io(p, ptr, 594 SX_STBS(8, num - 1, off)); 595 x += 32; 596 } 597 start += pitch; 598 } 599 } else if (p->last_rop == 0xaa) { 600 /* nothing to do here */ 601 return; 602 } else { 603 /* alright, let's do actual ROP stuff */ 604 605 /* first repeat the fill colour into 16 registers */ 606 write_sx_reg(p, SX_INSTRUCTIONS, 607 SX_SELECT_S(8, 8, 10, 15)); 608 609 for (line = 0; line < h; line++) { 610 x = 0; 611 while (x < w) { 612 ptr = start + x; 613 num = min(32, w - x); 614 /* now suck fb data into registers */ 615 write_sx_io(p, ptr, 616 SX_LDB(42, num - 1, off)); 617 /* 618 * ROP them with the fill data we left in 10 619 * non-memory ops can only have counts up to 16 620 */ 621 if (num <= 16) { 622 write_sx_reg(p, SX_INSTRUCTIONS, 623 SX_ROP(10, 42, 74, num - 1)); 624 } else { 625 write_sx_reg(p, SX_INSTRUCTIONS, 626 SX_ROP(10, 42, 74, 15)); 627 write_sx_reg(p, SX_INSTRUCTIONS, 628 SX_ROP(10, 58, 90, num - 17)); 629 } 630 /* and write the result back into memory */ 631 write_sx_io(p, ptr, 632 SX_STB(74, num - 1, off)); 633 x += 32; 634 } 635 start += pitch; 636 } 637 } 638} 639 640static void 641CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 642{ 643 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 644 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 645 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 646 int start, depth; 647 648 ENTER; 649 dstpitch = exaGetPixmapPitch(pPixmap); 650 dstoff = exaGetPixmapOffset(pPixmap); 651 652 depth = pPixmap->drawable.bitsPerPixel; 653 switch (depth) { 654 case 32: 655 start = dstoff + (y1 * dstpitch) + (x1 << 2); 656 CG14Solid32(p, start, dstpitch, w, h); 657 break; 658 case 8: 659 start = dstoff + (y1 * dstpitch) + x1; 660 CG14Solid8(p, start, dstpitch, w, h); 661 break; 662 } 663 664 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 665 dstpitch, dstoff, start); 666 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 667 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 668 exaMarkSync(pPixmap->drawable.pScreen); 669} 670 671/* 672 * Memcpy-based UTS. 673 */ 674static Bool 675CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 676 char *src, int src_pitch) 677{ 678 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 679 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 680 char *dst = p->fb + exaGetPixmapOffset(pDst); 681 int dst_pitch = exaGetPixmapPitch(pDst); 682 683 int bpp = pDst->drawable.bitsPerPixel; 684 int cpp = (bpp + 7) >> 3; 685 int wBytes = w * cpp; 686 687 ENTER; 688 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 689 dst += (x * cpp) + (y * dst_pitch); 690 691 CG14Wait(p); 692 693 while (h--) { 694 memcpy(dst, src, wBytes); 695 src += src_pitch; 696 dst += dst_pitch; 697 } 698 __asm("stbar;"); 699 return TRUE; 700} 701 702/* 703 * Memcpy-based DFS. 704 */ 705static Bool 706CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 707 char *dst, int dst_pitch) 708{ 709 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 710 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 711 char *src = p->fb + exaGetPixmapOffset(pSrc); 712 int src_pitch = exaGetPixmapPitch(pSrc); 713 714 ENTER; 715 int bpp = pSrc->drawable.bitsPerPixel; 716 int cpp = (bpp + 7) >> 3; 717 int wBytes = w * cpp; 718 719 src += (x * cpp) + (y * src_pitch); 720 721 CG14Wait(p); 722 723 while (h--) { 724 memcpy(dst, src, wBytes); 725 src += src_pitch; 726 dst += dst_pitch; 727 } 728 729 return TRUE; 730} 731 732Bool 733CG14CheckComposite(int op, PicturePtr pSrcPicture, 734 PicturePtr pMaskPicture, 735 PicturePtr pDstPicture) 736{ 737 int i, ok = FALSE; 738 739 ENTER; 740 741 /* 742 * SX is in theory capable of accelerating pretty much all Xrender ops, 743 * even coordinate transformation and gradients. Support will be added 744 * over time and likely have to spill over into its own source file. 745 */ 746 747 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 748 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 749 return FALSE; 750 } 751 752 if (pSrcPicture != NULL) { 753 i = 0; 754 while ((i < arraysize(src_formats)) && (!ok)) { 755 ok = (pSrcPicture->format == src_formats[i]); 756 i++; 757 } 758 759 if (!ok) { 760 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 761 __func__, pSrcPicture->format); 762 return FALSE; 763 } 764 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 765 } 766 767 if (pDstPicture != NULL) { 768 i = 0; 769 ok = FALSE; 770 while ((i < arraysize(src_formats)) && (!ok)) { 771 ok = (pDstPicture->format == src_formats[i]); 772 i++; 773 } 774 775 if (!ok) { 776 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 777 __func__, pDstPicture->format); 778 return FALSE; 779 } 780 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 781 } 782 783 if (pMaskPicture != NULL) { 784 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 785 pMaskPicture->pDrawable->width, 786 pMaskPicture->pDrawable->height); 787 } 788 return TRUE; 789} 790 791Bool 792CG14PrepareComposite(int op, PicturePtr pSrcPicture, 793 PicturePtr pMaskPicture, 794 PicturePtr pDstPicture, 795 PixmapPtr pSrc, 796 PixmapPtr pMask, 797 PixmapPtr pDst) 798{ 799 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 800 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 801 802 ENTER; 803 804 p->no_source_pixmap = FALSE; 805 p->source_is_solid = FALSE; 806 807 if (pSrcPicture->format == PICT_a1) { 808 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 809 pDstPicture->format, op); 810 if (pMaskPicture != NULL) { 811 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 812 } 813 } 814 if (pSrcPicture->pSourcePict != NULL) { 815 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 816 p->fillcolour = 817 pSrcPicture->pSourcePict->solidFill.color; 818 DPRINTF(X_ERROR, "%s: solid src %08x\n", 819 __func__, p->fillcolour); 820 p->no_source_pixmap = TRUE; 821 p->source_is_solid = TRUE; 822 } 823 } 824 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 825 if (pMaskPicture->pSourcePict->type == 826 SourcePictTypeSolidFill) { 827 p->fillcolour = 828 pMaskPicture->pSourcePict->solidFill.color; 829 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 830 __func__, p->fillcolour); 831 } 832 } 833 if (pMaskPicture != NULL) { 834 p->mskoff = exaGetPixmapOffset(pMask); 835 p->mskpitch = exaGetPixmapPitch(pMask); 836 p->mskformat = pMaskPicture->format; 837 } else { 838 p->mskoff = 0; 839 p->mskpitch = 0; 840 p->mskformat = 0; 841 } 842 if (pSrc != NULL) { 843 p->source_is_solid = 844 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 845 p->srcoff = exaGetPixmapOffset(pSrc); 846 p->srcpitch = exaGetPixmapPitch(pSrc); 847 if (p->source_is_solid) { 848 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 849 } 850 } 851 p->srcformat = pSrcPicture->format; 852 p->dstformat = pDstPicture->format; 853 854 if (p->source_is_solid) { 855 uint32_t temp; 856 857 /* stuff source colour into SX registers, swap as needed */ 858 temp = p->fillcolour; 859 switch (p->srcformat) { 860 case PICT_a8r8g8b8: 861 case PICT_x8r8g8b8: 862 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 863 temp = temp >> 8; 864 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 865 temp = temp >> 8; 866 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 867 break; 868 case PICT_a8b8g8r8: 869 case PICT_x8b8g8r8: 870 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 871 temp = temp >> 8; 872 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 873 temp = temp >> 8; 874 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 875 break; 876 } 877 write_sx_reg(p, SX_QUEUED(8), 0xff); 878 } 879 p->op = op; 880 if (op == PictOpSrc) { 881 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 882 } 883#ifdef SX_DEBUG 884 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 885 *(uint32_t *)(p->fb + p->srcoff)); 886#endif 887 return TRUE; 888} 889 890void 891CG14Composite(PixmapPtr pDst, int srcX, int srcY, 892 int maskX, int maskY, 893 int dstX, int dstY, 894 int width, int height) 895{ 896 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 897 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 898 uint32_t dstoff, dstpitch; 899 uint32_t dst, msk, src; 900 int flip = 0; 901 902 ENTER; 903 dstoff = exaGetPixmapOffset(pDst); 904 dstpitch = exaGetPixmapPitch(pDst); 905 906 flip = (PICT_FORMAT_TYPE(p->srcformat) != 907 PICT_FORMAT_TYPE(p->dstformat)); 908 909 switch (p->op) { 910 case PictOpOver: 911 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 912 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 913 p->mskformat, p->dstformat, srcX, srcY); 914 if (p->source_is_solid) { 915 switch (p->mskformat) { 916 case PICT_a8: 917 msk = p->mskoff + 918 (maskY * p->mskpitch) + 919 maskX; 920 CG14Comp_Over8Solid(p, 921 msk, p->mskpitch, 922 dst, dstpitch, 923 width, height); 924 break; 925 case PICT_a8r8g8b8: 926 case PICT_a8b8g8r8: 927 msk = p->mskoff + 928 (maskY * p->mskpitch) + 929 (maskX << 2); 930 CG14Comp_Over32Solid(p, 931 msk, p->mskpitch, 932 dst, dstpitch, 933 width, height); 934 break; 935 default: 936 xf86Msg(X_ERROR, 937 "unsupported mask format %08x\n", p->mskformat); 938 } 939 } else { 940 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 941 p->mskformat); 942 switch (p->srcformat) { 943 case PICT_a8r8g8b8: 944 case PICT_a8b8g8r8: 945 src = p->srcoff + 946 (srcY * p->srcpitch) + 947 (srcX << 2); 948 dst = dstoff + 949 (dstY * dstpitch) + 950 (dstX << 2); 951 if (p->mskformat == PICT_a8) { 952 msk = p->mskoff + 953 (maskY * p->mskpitch) + 954 maskX; 955 CG14Comp_Over32Mask(p, 956 src, p->srcpitch, 957 msk, p->mskpitch, 958 dst, dstpitch, 959 width, height, flip); 960 } else { 961 CG14Comp_Over32(p, 962 src, p->srcpitch, 963 dst, dstpitch, 964 width, height, flip); 965 } 966 break; 967 case PICT_x8r8g8b8: 968 case PICT_x8b8g8r8: 969 src = p->srcoff + 970 (srcY * p->srcpitch) + 971 (srcX << 2); 972 dst = dstoff + 973 (dstY * dstpitch) + 974 (dstX << 2); 975 if (p->mskformat == PICT_a8) { 976 msk = p->mskoff + 977 (maskY * p->mskpitch) + 978 maskX; 979 CG14Comp_Over32Mask_noalpha(p, 980 src, p->srcpitch, 981 msk, p->mskpitch, 982 dst, dstpitch, 983 width, height, flip); 984 } else if ((p->mskformat == PICT_a8r8g8b8) || 985 (p->mskformat == PICT_a8b8g8r8)) { 986 msk = p->mskoff + 987 (maskY * p->mskpitch) + 988 (maskX << 2); 989 CG14Comp_Over32Mask32_noalpha(p, 990 src, p->srcpitch, 991 msk, p->mskpitch, 992 dst, dstpitch, 993 width, height, flip); 994 } else { 995 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 996 } 997 break; 998 default: 999 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1000 __func__, p->srcformat); 1001 } 1002 } 1003 break; 1004 case PictOpAdd: 1005 DPRINTF(X_ERROR, "Add %08x %08x\n", 1006 p->srcformat, p->dstformat); 1007 switch (p->srcformat) { 1008 case PICT_a8: 1009 src = p->srcoff + 1010 (srcY * p->srcpitch) + srcX; 1011 if (p->dstformat == PICT_a8) { 1012 dst = dstoff + 1013 (dstY * dstpitch) + dstX; 1014 CG14Comp_Add8(p, 1015 src, p->srcpitch, 1016 dst, dstpitch, 1017 width, height); 1018 } else { 1019 dst = dstoff + 1020 (dstY * dstpitch) + 1021 (dstX << 2); 1022 CG14Comp_Add8_32(p, 1023 src, p->srcpitch, 1024 dst, dstpitch, 1025 width, height); 1026 } 1027 break; 1028 case PICT_a8r8g8b8: 1029 case PICT_x8r8g8b8: 1030 src = p->srcoff + 1031 (srcY * p->srcpitch) + (srcX << 2); 1032 dst = dstoff + (dstY * dstpitch) + 1033 (dstX << 2); 1034 CG14Comp_Add32(p, src, p->srcpitch, 1035 dst, dstpitch, width, height); 1036 break; 1037 default: 1038 xf86Msg(X_ERROR, 1039 "unsupported src format\n"); 1040 } 1041 break; 1042 case PictOpSrc: 1043 DPRINTF(X_ERROR, "Src %08x %08x\n", 1044 p->srcformat, p->dstformat); 1045 if (p->mskformat != 0) 1046 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1047 if (p->srcformat == PICT_a8) { 1048 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1049 } else { 1050 /* convert between RGB and BGR? */ 1051 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1052 } 1053 break; 1054 default: 1055 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1056 } 1057 exaMarkSync(pDst->drawable.pScreen); 1058} 1059 1060 1061 1062Bool 1063CG14InitAccel(ScreenPtr pScreen) 1064{ 1065 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1066 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1067 ExaDriverPtr pExa; 1068 1069 pExa = exaDriverAlloc(); 1070 if (!pExa) 1071 return FALSE; 1072 1073 p->pExa = pExa; 1074 1075 pExa->exa_major = EXA_VERSION_MAJOR; 1076 pExa->exa_minor = EXA_VERSION_MINOR; 1077 1078 pExa->memoryBase = p->fb; 1079 pExa->memorySize = p->memsize; 1080 pExa->offScreenBase = p->width * p->height * 4; 1081 1082 /* 1083 * SX memory instructions are written to 64bit aligned addresses with 1084 * a 3 bit displacement. Make sure the displacement remains constant 1085 * within one column 1086 */ 1087 1088 pExa->pixmapOffsetAlign = 8; 1089 pExa->pixmapPitchAlign = 8; 1090 1091 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1092 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1093 /*| EXA_MIXED_PIXMAPS*/; 1094 1095 /* 1096 * these limits are bogus 1097 * SX doesn't deal with coordinates at all, so there is no limit but 1098 * we have to put something here 1099 */ 1100 pExa->maxX = 4096; 1101 pExa->maxY = 4096; 1102 1103 pExa->WaitMarker = CG14WaitMarker; 1104 1105 pExa->PrepareSolid = CG14PrepareSolid; 1106 pExa->Solid = CG14Solid; 1107 pExa->DoneSolid = CG14DoneCopy; 1108 pExa->PrepareCopy = CG14PrepareCopy; 1109 pExa->Copy = CG14Copy32; 1110 pExa->DoneCopy = CG14DoneCopy; 1111 if (p->use_xrender) { 1112 pExa->CheckComposite = CG14CheckComposite; 1113 pExa->PrepareComposite = CG14PrepareComposite; 1114 pExa->Composite = CG14Composite; 1115 pExa->DoneComposite = CG14DoneCopy; 1116 } 1117 1118 /* EXA hits more optimized paths when it does not have to fallback 1119 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1120 */ 1121 pExa->UploadToScreen = CG14UploadToScreen; 1122 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1123 1124 /* do some hardware init */ 1125 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1126 p->last_mask = 0xffffffff; 1127 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1128 p->last_rop = 0xcc; 1129 return exaDriverInit(pScreen, pExa); 1130} 1131