cg14_accel.c revision b0f02aef
1/* $NetBSD: cg14_accel.c,v 1.31 2022/05/11 21:10:37 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45/*#define SX_DEBUG*/ 46/*#define SX_TRACE*/ 47 48#ifdef SX_TRACE 49#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 50#else 51#define ENTER 52#endif 53 54#ifdef SX_DEBUG 55#define DPRINTF xf86Msg 56#else 57#define DPRINTF while (0) xf86Msg 58#endif 59 60#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 61 62/* 0xcc is SX's GXcopy equivalent */ 63uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 64 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 65 66int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 67 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 68int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 69 70static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 71static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 72 73static inline void 74CG14Wait(Cg14Ptr p) 75{ 76 int bail = 10000000; 77 /* we wait for the busy bit to clear */ 78 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 79 (bail > 0)) { 80 bail--; 81 }; 82 if (bail == 0) { 83 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 84 read_sx_reg(p, SX_CONTROL_STATUS), 85 read_sx_reg(p, SX_ERROR)); 86 } 87} 88 89static void 90CG14WaitMarker(ScreenPtr pScreen, int Marker) 91{ 92 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 93 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 94 95 CG14Wait(p); 96} 97 98static Bool 99CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 100 int xdir, int ydir, int alu, Pixel planemask) 101{ 102 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 103 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 104 105 ENTER; 106 DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__, 107 pSrcPixmap->drawable.bitsPerPixel, alu); 108 109 if (planemask != p->last_mask) { 110 CG14Wait(p); 111 write_sx_reg(p, SX_PLANEMASK, planemask); 112 p->last_mask = planemask; 113 } 114 alu = sx_rop[alu]; 115 if (alu != p->last_rop) { 116 CG14Wait(p); 117 write_sx_reg(p, SX_ROP_CONTROL, alu); 118 p->last_rop = alu; 119 } 120 switch (pSrcPixmap->drawable.bitsPerPixel) { 121 case 8: 122 p->pExa->Copy = CG14Copy8; 123 break; 124 case 32: 125 p->pExa->Copy = CG14Copy32; 126 break; 127 default: 128 DPRINTF(X_ERROR, "%s depth %d\n", __func__, 129 pSrcPixmap->drawable.bitsPerPixel); 130 } 131 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 132 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 133 p->xdir = xdir; 134 p->ydir = ydir; 135 return TRUE; 136} 137 138static void 139CG14Copy32(PixmapPtr pDstPixmap, 140 int srcX, int srcY, int dstX, int dstY, int w, int h) 141{ 142 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 143 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 144 int dstpitch, dstoff, srcpitch, srcoff; 145 int srcstart, dststart, xinc, srcinc, dstinc; 146 int line, count, s, d, num; 147 148 ENTER; 149 dstpitch = exaGetPixmapPitch(pDstPixmap); 150 dstoff = exaGetPixmapOffset(pDstPixmap); 151 srcpitch = p->srcpitch; 152 srcoff = p->srcoff; 153 /* 154 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 155 * actually wrote anything and only sync if it did 156 */ 157 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 158 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 159 160 /* 161 * we always copy up to 32 pixels at a time so direction doesn't 162 * matter if w<=32 163 */ 164 if (w > 32) { 165 if (p->xdir < 0) { 166 srcstart += (w - 32) << 2; 167 dststart += (w - 32) << 2; 168 xinc = -128; 169 } else 170 xinc = 128; 171 } else 172 xinc = 128; 173 if (p->ydir < 0) { 174 srcstart += (h - 1) * srcpitch; 175 dststart += (h - 1) * dstpitch; 176 srcinc = -srcpitch; 177 dstinc = -dstpitch; 178 } else { 179 srcinc = srcpitch; 180 dstinc = dstpitch; 181 } 182 if (p->last_rop == 0xcc) { 183 /* plain old copy */ 184 if ( xinc > 0) { 185 /* going left to right */ 186 for (line = 0; line < h; line++) { 187 count = 0; 188 s = srcstart; 189 d = dststart; 190 while ( count < w) { 191 num = min(32, w - count); 192 sxm(SX_LD, s, 10, num - 1); 193 sxm(SX_STM, d, 10, num - 1); 194 s += xinc; 195 d += xinc; 196 count += 32; 197 } 198 srcstart += srcinc; 199 dststart += dstinc; 200 } 201 } else { 202 /* going right to left */ 203 int i, chunks = (w >> 5); 204 for (line = 0; line < h; line++) { 205 s = srcstart; 206 d = dststart; 207 count = w; 208 for (i = 0; i < chunks; i++) { 209 sxm(SX_LD, s, 10, 31); 210 sxm(SX_STM, d, 10, 31); 211 s -= 128; 212 d -= 128; 213 count -= 32; 214 } 215 /* leftovers, if any */ 216 if (count > 0) { 217 s += (32 - count) << 2; 218 d += (32 - count) << 2; 219 sxm(SX_LD, s, 10, count - 1); 220 sxm(SX_STM, d, 10, count - 1); 221 } 222 srcstart += srcinc; 223 dststart += dstinc; 224 } 225 } 226 } else { 227 /* ROPs needed */ 228 if ( xinc > 0) { 229 /* going left to right */ 230 for (line = 0; line < h; line++) { 231 count = 0; 232 s = srcstart; 233 d = dststart; 234 while ( count < w) { 235 num = min(32, w - count); 236 sxm(SX_LD, s, 10, num - 1); 237 sxm(SX_LD, d, 42, num - 1); 238 if (num > 16) { 239 sxi(SX_ROP, 10, 42, 74, 15); 240 sxi(SX_ROP, 26, 58, 90, num - 17); 241 } else { 242 sxi(SX_ROP, 10, 42, 74, num - 1); 243 } 244 sxm(SX_STM, d, 74, num - 1); 245 s += xinc; 246 d += xinc; 247 count += 32; 248 } 249 srcstart += srcinc; 250 dststart += dstinc; 251 } 252 } else { 253 /* going right to left */ 254 int i, chunks = (w >> 5); 255 for (line = 0; line < h; line++) { 256 s = srcstart; 257 d = dststart; 258 count = w; 259 for (i = 0; i < chunks; i++) { 260 sxm(SX_LD, s, 10, 31); 261 sxm(SX_LD, d, 42, 31); 262 sxi(SX_ROP, 10, 42, 74, 15); 263 sxi(SX_ROP, 26, 58, 90, 15); 264 sxm(SX_STM, d, 74, 31); 265 s -= 128; 266 d -= 128; 267 count -= 32; 268 } 269 /* leftovers, if any */ 270 if (count > 0) { 271 s += (32 - count) << 2; 272 d += (32 - count) << 2; 273 sxm(SX_LD, s, 10, count - 1); 274 sxm(SX_LD, d, 42, count - 1); 275 if (count > 16) { 276 sxi(SX_ROP, 10, 42, 74, 15); 277 sxi(SX_ROP, 26, 58, 90, count - 17); 278 } else { 279 sxi(SX_ROP, 10, 42, 74, count - 1); 280 } 281 sxm(SX_STM, d, 74, count - 1); 282 } 283 srcstart += srcinc; 284 dststart += dstinc; 285 } 286 } 287 } 288 exaMarkSync(pDstPixmap->drawable.pScreen); 289} 290 291/* 292 * copy with same alignment, left to right, no ROP 293 */ 294static void 295CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 296 int srcpitch, int dstpitch) 297{ 298 int saddr, daddr, pre, cnt, wrds; 299 300 ENTER; 301 302 pre = srcstart & 3; 303 if (pre != 0) pre = 4 - pre; 304 pre = min(pre, w); 305 306 while (h > 0) { 307 saddr = srcstart; 308 daddr = dststart; 309 cnt = w; 310 if (pre > 0) { 311 sxm(SX_LDB, saddr, 8, pre - 1); 312 sxm(SX_STB, daddr, 8, pre - 1); 313 saddr += pre; 314 daddr += pre; 315 cnt -= pre; 316 if (cnt == 0) goto next; 317 } 318 while (cnt > 3) { 319 wrds = min(32, cnt >> 2); 320 sxm(SX_LD, saddr, 8, wrds - 1); 321 sxm(SX_ST, daddr, 8, wrds - 1); 322 saddr += wrds << 2; 323 daddr += wrds << 2; 324 cnt -= wrds << 2; 325 } 326 if (cnt > 0) { 327 sxm(SX_LDB, saddr, 8, cnt - 1); 328 sxm(SX_STB, daddr, 8, cnt - 1); 329 } 330next: 331 srcstart += srcpitch; 332 dststart += dstpitch; 333 h--; 334 } 335} 336 337/* 338 * copy with same alignment, left to right, ROP 339 */ 340static void 341CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 342 int srcpitch, int dstpitch) 343{ 344 int saddr, daddr, pre, cnt, wrds; 345 346 ENTER; 347 348 pre = srcstart & 3; 349 if (pre != 0) pre = 4 - pre; 350 pre = min(pre, w); 351 352 while (h > 0) { 353 saddr = srcstart; 354 daddr = dststart; 355 cnt = w; 356 if (pre > 0) { 357 sxm(SX_LDB, saddr, 8, pre - 1); 358 sxm(SX_LDB, daddr, 40, pre - 1); 359 sxi(SX_ROP, 8, 40, 72, pre - 1); 360 sxm(SX_STB, daddr, 72, pre - 1); 361 saddr += pre; 362 daddr += pre; 363 cnt -= pre; 364 if (cnt == 0) goto next; 365 } 366 while (cnt > 3) { 367 wrds = min(32, cnt >> 2); 368 sxm(SX_LD, saddr, 8, wrds - 1); 369 sxm(SX_LD, daddr, 40, wrds - 1); 370 if (cnt > 16) { 371 sxi(SX_ROP, 8, 40, 72, 15); 372 sxi(SX_ROP, 8, 56, 88, wrds - 17); 373 } else 374 sxi(SX_ROP, 8, 40, 72, wrds - 1); 375 sxm(SX_ST, daddr, 72, wrds - 1); 376 saddr += wrds << 2; 377 daddr += wrds << 2; 378 cnt -= wrds << 2; 379 } 380 if (cnt > 0) { 381 sxm(SX_LDB, saddr, 8, cnt - 1); 382 sxm(SX_LDB, daddr, 40, cnt - 1); 383 sxi(SX_ROP, 8, 40, 72, cnt - 1); 384 sxm(SX_STB, daddr, 72, cnt - 1); 385 } 386next: 387 srcstart += srcpitch; 388 dststart += dstpitch; 389 h--; 390 } 391} 392 393/* up to 124 pixels so direction doesn't matter, unaligned, ROP */ 394static void 395CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 396{ 397 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 398 int ssreg; 399#ifdef DEBUG 400 int taddr = 4 + dstpitch * 50; 401#endif 402 uint32_t lmask, rmask; 403 ENTER; 404 405 pre = dststart & 3; 406 lmask = 0xffffffff >> pre; 407 spre = srcstart & 3; 408 /* 409 * make sure we count all the words needed to cover the destination 410 * line, covering potential partials on both ends 411 */ 412 wrds = (w + pre + 3) >> 2; 413 swrds = (w + spre + 3) >> 2; 414 415 if (spre < pre) { 416 dist = 32 - (pre - spre) * 8; 417 sreg = 9; 418 } else { 419 dist = (spre - pre) * 8; 420 sreg = 8; 421 } 422 423 /* 424 * mask out trailing pixels to avoid partial writes 425 */ 426 post = (dststart + w) & 3; 427 if (post != 0) { 428 rmask = ~(0xffffffff >> (post * 8)); 429 write_sx_reg(p, SX_QUEUED(7), rmask); 430 write_sx_reg(p, SX_QUEUED(6), ~rmask); 431 } 432 433 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 434 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 435 436 /* mask out the leading pixels in dst by using a mask and ROP */ 437 if (pre != 0) { 438 CG14Wait(p); 439 write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); 440 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 441 } 442 443 saddr = srcstart & ~3; 444 daddr = dststart & ~3; 445 446 while (h > 0) { 447 sxm(SX_LD, daddr, 80, wrds - 1); 448 sxm(SX_LD, saddr, sreg, swrds - 1); 449 if (wrds > 15) { 450 if (dist != 0) { 451 sxi(SX_FUNNEL_I, 8, dist, 40, 15); 452 sxi(SX_FUNNEL_I, 24, dist, 56, wrds - 16); 453 /* shifted source pixels are now at register 40+ */ 454 ssreg = 40; 455 } else ssreg = 8; 456 if (pre != 0) { 457 /* mask out leading junk */ 458 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 459 sxi(SX_ROPB, ssreg, 80, 8, 0); 460 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 461 sxi(SX_ROPB, ssreg + 1, 81, 9, 14); 462 } else { 463 sxi(SX_ROPB, ssreg, 80, 8, 15); 464 } 465 sxi(SX_ROPB, ssreg + 16, 96, 24, wrds - 16); 466 } else { 467 if (dist != 0) { 468 sxi(SX_FUNNEL_I, 8, dist, 40, wrds); 469 ssreg = 40; 470 } else ssreg = 8; 471 if (pre != 0) { 472 /* mask out leading junk */ 473 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 474 sxi(SX_ROPB, ssreg, 80, 8, 0); 475 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 476 sxi(SX_ROPB, ssreg + 1, 81, 9, wrds); 477 } else { 478 sxi(SX_ROPB, ssreg, 80, 8, wrds); 479 } 480 } 481 if (post != 0) { 482 /* 483 * if the last word to be written out is a partial we 484 * mask out the leftovers and replace them with 485 * background pixels 486 * we could pull the same ROP * mask trick as we do on 487 * the left end but it's less annoying this way and 488 * the instruction count is the same 489 */ 490 sxi(SX_ANDS, 7 + wrds, 7, 5, 0); 491 sxi(SX_ANDS, 79 + wrds, 6, 4, 0); 492 sxi(SX_ORS, 5, 4, 7 + wrds, 0); 493 } 494#ifdef DEBUG 495 sxm(SX_ST, taddr, 40, wrds - 1); 496 taddr += dstpitch; 497#endif 498 sxm(SX_ST, daddr, 8, wrds - 1); 499 saddr += srcpitch; 500 daddr += dstpitch; 501 h--; 502 } 503} 504 505/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ 506static void 507CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 508 int srcpitch, int dstpitch) 509{ 510 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 511 int ssreg; 512#ifdef DEBUG 513 int taddr = 4 + dstpitch * 50; 514#endif 515 uint32_t lmask, rmask; 516 ENTER; 517 518 pre = dststart & 3; 519 lmask = 0xffffffff >> pre; 520 spre = srcstart & 3; 521 /* 522 * make sure we count all the words needed to cover the destination 523 * line, covering potential partials on both ends 524 */ 525 wrds = (w + pre + 3) >> 2; 526 swrds = (w + spre + 3) >> 2; 527 528 if (spre < pre) { 529 dist = 32 - (pre - spre) * 8; 530 sreg = 9; 531 } else { 532 dist = (spre - pre) * 8; 533 sreg = 8; 534 } 535 536 /* 537 * mask out trailing pixels to avoid partial writes 538 */ 539 post = (dststart + w) & 3; 540 if (post != 0) { 541 rmask = ~(0xffffffff >> (post * 8)); 542 write_sx_reg(p, SX_QUEUED(7), rmask); 543 write_sx_reg(p, SX_QUEUED(6), ~rmask); 544 } 545 546 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 547 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 548 549 /* mask out the leading pixels in dst by using a mask and ROP */ 550 if (pre != 0) { 551 CG14Wait(p); 552 write_sx_reg(p, SX_ROP_CONTROL, 0xca); 553 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 554 } 555 556 saddr = srcstart & ~3; 557 daddr = dststart & ~3; 558 559 while (h > 0) { 560 sxm(SX_LD, saddr, sreg, swrds - 1); 561 if (wrds > 15) { 562 if (dist != 0) { 563 sxi(SX_FUNNEL_I, 8, dist, 40, 15); 564 sxi(SX_FUNNEL_I, 24, dist, 56, wrds - 16); 565 /* shifted source pixels are now at reg 40+ */ 566 ssreg = 40; 567 } else ssreg = 8; 568 if (pre != 0) { 569 /* read only the first word */ 570 sxm(SX_LD, daddr, 80, 0); 571 /* mask out leading junk */ 572 sxi(SX_ROPB, ssreg, 80, ssreg, 0); 573 } 574 } else { 575 if (dist != 0) { 576 sxi(SX_FUNNEL_I, 8, dist, 40, wrds); 577 ssreg = 40; 578 } else ssreg = 8; 579 if (pre != 0) { 580 /* read only the first word */ 581 sxm(SX_LD, daddr, 80, 0); 582 /* mask out leading junk */ 583 sxi(SX_ROPB, ssreg, 80, ssreg, 0); 584 } 585 } 586 if (post != 0) { 587 int laddr = daddr + ((wrds - 1) << 2); 588 /* 589 * if the last word to be written out is a partial we 590 * mask out the leftovers and replace them with 591 * background pixels 592 * we could pull the same ROP * mask trick as we do on 593 * the left end but it's less annoying this way and 594 * the instruction count is the same 595 */ 596 sxm(SX_LD, laddr, 81, 0); 597 sxi(SX_ANDS, ssreg + wrds - 1, 7, 5, 0); 598 sxi(SX_ANDS, 81, 6, 4, 0); 599 sxi(SX_ORS, 5, 4, ssreg + wrds - 1, 0); 600 } 601#ifdef DEBUG 602 sxm(SX_ST, taddr, 40, wrds - 1); 603 taddr += dstpitch; 604#endif 605 sxm(SX_ST, daddr, ssreg, wrds - 1); 606 saddr += srcpitch; 607 daddr += dstpitch; 608 h--; 609 } 610} 611 612static void 613CG14Copy8(PixmapPtr pDstPixmap, 614 int srcX, int srcY, int dstX, int dstY, int w, int h) 615{ 616 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 617 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 618 int dstpitch, dstoff, srcpitch, srcoff; 619 int srcstart, dststart, xinc, srcinc, dstinc; 620 int line, count, s, d, num; 621 622 ENTER; 623 dstpitch = exaGetPixmapPitch(pDstPixmap); 624 dstoff = exaGetPixmapOffset(pDstPixmap); 625 srcpitch = p->srcpitch; 626 srcoff = p->srcoff; 627 /* 628 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 629 * actually wrote anything and only sync if it did 630 */ 631 srcstart = srcX + (srcpitch * srcY) + srcoff; 632 dststart = dstX + (dstpitch * dstY) + dstoff; 633 634 if (p->ydir < 0) { 635 srcstart += (h - 1) * srcpitch; 636 dststart += (h - 1) * dstpitch; 637 srcinc = -srcpitch; 638 dstinc = -dstpitch; 639 } else { 640 srcinc = srcpitch; 641 dstinc = dstpitch; 642 } 643 644 /* 645 * this copies up to 124 pixels wide in one go, so horizontal 646 * direction / overlap don't matter 647 * uses all 32bit accesses and funnel shifter for unaligned copies 648 */ 649 if ((w < 125) && (w > 8)) { 650 switch (p->last_rop) { 651 case 0xcc: 652 CG14Copy8_short_norop(p, 653 srcstart, dststart, w, h, srcinc, dstinc); 654 break; 655 default: 656 CG14Copy8_short_rop(p, 657 srcstart, dststart, w, h, srcinc, dstinc); 658 } 659 return; 660 } 661 662 /* 663 * only invert x direction if absolutely necessary, it's a pain to 664 * go backwards on SX so avoid as much as possible 665 */ 666 if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) { 667 xinc = -32; 668 } else 669 xinc = 32; 670 671 /* 672 * for aligned copies we can go all 32bit and avoid VRAM reads in the 673 * most common case 674 */ 675 if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { 676 switch (p->last_rop) { 677 case 0xcc: 678 CG14Copy8_aligned_norop(p, 679 srcstart, dststart, w, h, srcinc, dstinc); 680 break; 681 default: 682 CG14Copy8_aligned_rop(p, 683 srcstart, dststart, w, h, srcinc, dstinc); 684 } 685 return; 686 } 687 688 /* 689 * if we make it here we either have something large and unaligned, 690 * something we need to do right to left, or something tiny. 691 * we handle the non-tiny cases by breaking them down into chunks that 692 * Copy8_short_*() can handle, making sure the destinations are 32bit 693 * aligned whenever possible 694 * since we copy by block, not by line we need to go backwards even if 695 * we don't copy within the same line 696 */ 697 if (w > 8) { 698 int next, wi, end = dststart + w; 699 DPRINTF(X_ERROR, "%s %08x %08x %d\n", 700 __func__, srcstart, dststart, w); 701 if ((p->xdir < 0) && (srcoff == dstoff)) { 702 srcstart += w; 703 next = max((end - 120) & ~3, dststart); 704 wi = end - next; 705 srcstart -= wi; 706 while (wi > 0) { 707 DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", 708 __func__, srcstart, next, wi); 709 if (p->last_rop == 0xcc) { 710 CG14Copy8_short_norop(p, srcstart, 711 next, wi, h, srcinc, dstinc); 712 } else 713 CG14Copy8_short_rop(p, srcstart, 714 next, wi, h, srcinc, dstinc); 715 end = next; 716 /* 717 * avoid extremely narrow copies so I don't 718 * have to deal with dangling start and end 719 * pixels in the same word 720 */ 721 if ((end - dststart) < 140) { 722 next = max((end - 80) & ~3, dststart); 723 } else { 724 next = max((end - 120) & ~3, dststart); 725 } 726 wi = end - next; 727 srcstart -= wi; 728 } 729 } else { 730 next = min(end, (dststart + 124) & ~3); 731 wi = next - dststart; 732 while (wi > 0) { 733 DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", 734 __func__, srcstart, next, wi); 735 if (p->last_rop == 0xcc) { 736 CG14Copy8_short_norop(p, 737 srcstart, dststart, wi, h, 738 srcinc, dstinc); 739 } else 740 CG14Copy8_short_rop(p, 741 srcstart, dststart, wi, h, 742 srcinc, dstinc); 743 srcstart += wi; 744 dststart = next; 745 if ((end - dststart) < 140) { 746 next = min(end, (dststart + 84) & ~3); 747 } else { 748 next = min(end, (dststart + 124) & ~3); 749 } 750 wi = next - dststart; 751 } 752 } 753 return; 754 } 755 if (xinc < 0) { 756 srcstart += (w - 32); 757 dststart += (w - 32); 758 } 759 760 DPRINTF(X_ERROR, "%s fallback to byte-wise %d %d\n", __func__, w, h); 761 if (p->last_rop == 0xcc) { 762 /* plain old copy */ 763 if ( xinc > 0) { 764 /* going left to right */ 765 for (line = 0; line < h; line++) { 766 count = 0; 767 s = srcstart; 768 d = dststart; 769 while ( count < w) { 770 num = min(32, w - count); 771 sxm(SX_LDB, s, 10, num - 1); 772 sxm(SX_STBM, d, 10, num - 1); 773 s += xinc; 774 d += xinc; 775 count += 32; 776 } 777 srcstart += srcinc; 778 dststart += dstinc; 779 } 780 } else { 781 /* going right to left */ 782 int i, chunks = (w >> 5); 783 for (line = 0; line < h; line++) { 784 s = srcstart; 785 d = dststart; 786 count = w; 787 for (i = 0; i < chunks; i++) { 788 sxm(SX_LDB, s, 10, 31); 789 sxm(SX_STBM, d, 10, 31); 790 s -= 32; 791 d -= 32; 792 count -= 32; 793 } 794 /* leftovers, if any */ 795 if (count > 0) { 796 s += (32 - count); 797 d += (32 - count); 798 sxm(SX_LDB, s, 10, count - 1); 799 sxm(SX_STBM, d, 10, count - 1); 800 } 801 srcstart += srcinc; 802 dststart += dstinc; 803 } 804 } 805 } else { 806 /* ROPs needed */ 807 if ( xinc > 0) { 808 /* going left to right */ 809 for (line = 0; line < h; line++) { 810 count = 0; 811 s = srcstart; 812 d = dststart; 813 while ( count < w) { 814 num = min(32, w - count); 815 sxm(SX_LDB, s, 10, num - 1); 816 sxm(SX_LDB, d, 42, num - 1); 817 if (num > 16) { 818 sxi(SX_ROP, 10, 42, 74, 15); 819 sxi(SX_ROP, 26, 58, 90, num - 17); 820 } else { 821 sxi(SX_ROP, 10, 42, 74, num - 1); 822 } 823 sxm(SX_STBM, d, 74, num - 1); 824 s += xinc; 825 d += xinc; 826 count += 32; 827 } 828 srcstart += srcinc; 829 dststart += dstinc; 830 } 831 } else { 832 /* going right to left */ 833 int i, chunks = (w >> 5); 834 for (line = 0; line < h; line++) { 835 s = srcstart; 836 d = dststart; 837 count = w; 838 for (i = 0; i < chunks; i++) { 839 sxm(SX_LDB, s, 10, 31); 840 sxm(SX_LDB, d, 42, 31); 841 sxi(SX_ROP, 10, 42, 74, 15); 842 sxi(SX_ROP, 26, 58, 90, 15); 843 sxm(SX_STBM, d, 74, 31); 844 s -= 128; 845 d -= 128; 846 count -= 32; 847 } 848 /* leftovers, if any */ 849 if (count > 0) { 850 s += (32 - count); 851 d += (32 - count); 852 sxm(SX_LDB, s, 10, count - 1); 853 sxm(SX_LDB, d, 42, count - 1); 854 if (count > 16) { 855 sxi(SX_ROP, 10, 42, 74, 15); 856 sxi(SX_ROP, 26, 58, 90, count - 17); 857 } else { 858 sxi(SX_ROP, 10, 42, 74, count - 1); 859 } 860 sxm(SX_STBM, d, 74, count - 1); 861 } 862 srcstart += srcinc; 863 dststart += dstinc; 864 } 865 } 866 } 867 exaMarkSync(pDstPixmap->drawable.pScreen); 868} 869 870static void 871CG14DoneCopy(PixmapPtr pDstPixmap) 872{ 873} 874 875static Bool 876CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 877{ 878 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 879 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 880 881 ENTER; 882 DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n", 883 pPixmap->drawable.bitsPerPixel, fg); 884 885 /* 886 * GXset and GXclear are really just specual cases of GXcopy with 887 * fixed fill colour 888 */ 889 switch (alu) { 890 case GXclear: 891 alu = GXcopy; 892 fg = 0; 893 break; 894 case GXset: 895 alu = GXcopy; 896 fg = 0xffffffff; 897 break; 898 } 899 /* repeat the colour in every sub byte if we're in 8 bit */ 900 if (pPixmap->drawable.bitsPerPixel == 8) { 901 fg |= fg << 8; 902 fg |= fg << 16; 903 } 904 write_sx_reg(p, SX_QUEUED(8), fg); 905 write_sx_reg(p, SX_QUEUED(9), fg); 906 if (planemask != p->last_mask) { 907 CG14Wait(p); 908 write_sx_reg(p, SX_PLANEMASK, planemask); 909 p->last_mask = planemask; 910 } 911 alu = sx_rop[alu]; 912 if (alu != p->last_rop) { 913 CG14Wait(p); 914 write_sx_reg(p, SX_ROP_CONTROL, alu); 915 p->last_rop = alu; 916 } 917 918 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 919 return TRUE; 920} 921 922static void 923CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 924{ 925 int line, x, num; 926 uint32_t ptr; 927 928 ENTER; 929 if (p->last_rop == 0xcc) { 930 /* simple fill */ 931 for (line = 0; line < h; line++) { 932 x = 0; 933 while (x < w) { 934 ptr = start + (x << 2); 935 num = min(32, w - x); 936 sxm(SX_STS, ptr, 8, num - 1); 937 x += 32; 938 } 939 start += pitch; 940 } 941 } else if (p->last_rop == 0xaa) { 942 /* nothing to do here */ 943 return; 944 } else { 945 /* alright, let's do actual ROP stuff */ 946 947 /* first repeat the fill colour into 16 registers */ 948 sxi(SX_SELECT_S, 8, 8, 10, 15); 949 950 for (line = 0; line < h; line++) { 951 x = 0; 952 while (x < w) { 953 ptr = start + (x << 2); 954 num = min(32, w - x); 955 /* now suck fb data into registers */ 956 sxm(SX_LD, ptr, 42, num - 1); 957 /* 958 * ROP them with the fill data we left in 10 959 * non-memory ops can only have counts up to 16 960 */ 961 if (num <= 16) { 962 sxi(SX_ROP, 10, 42, 74, num - 1); 963 } else { 964 sxi(SX_ROP, 10, 42, 74, 15); 965 sxi(SX_ROP, 10, 58, 90, num - 17); 966 } 967 /* and write the result back into memory */ 968 sxm(SX_ST, ptr, 74, num - 1); 969 x += 32; 970 } 971 start += pitch; 972 } 973 } 974} 975 976static void 977CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 978{ 979 int line, num, pre, cnt; 980 uint32_t ptr; 981 982 ENTER; 983 pre = start & 3; 984 if (pre != 0) pre = 4 - pre; 985 986 if (p->last_rop == 0xcc) { 987 /* simple fill */ 988 for (line = 0; line < h; line++) { 989 ptr = start; 990 cnt = w; 991 pre = min(pre, cnt); 992 if (pre) { 993 sxm(SX_STBS, ptr, 8, pre - 1); 994 ptr += pre; 995 cnt -= pre; 996 if (cnt == 0) goto next; 997 } 998 /* now do the aligned pixels in 32bit chunks */ 999 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1000 while(cnt > 3) { 1001 num = min(32, cnt >> 2); 1002 sxm(SX_STS, ptr, 8, num - 1); 1003 ptr += num << 2; 1004 cnt -= num << 2; 1005 } 1006 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1007 if (cnt > 0) { 1008 sxm(SX_STBS, ptr, 8, cnt - 1); 1009 } 1010 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1011next: 1012 start += pitch; 1013 } 1014 } else if (p->last_rop == 0xaa) { 1015 /* nothing to do here */ 1016 return; 1017 } else { 1018 /* alright, let's do actual ROP stuff */ 1019 1020 /* first repeat the fill colour into 16 registers */ 1021 sxi(SX_SELECT_S, 8, 8, 10, 15); 1022 1023 for (line = 0; line < h; line++) { 1024 ptr = start; 1025 cnt = w; 1026 pre = min(pre, cnt); 1027 if (pre) { 1028 sxm(SX_LDB, ptr, 26, pre - 1); 1029 sxi(SX_ROP, 10, 26, 42, pre - 1); 1030 sxm(SX_STB, ptr, 42, pre - 1); 1031 ptr += pre; 1032 cnt -= pre; 1033 if (cnt == 0) goto next2; 1034 } 1035 /* now do the aligned pixels in 32bit chunks */ 1036 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1037 while(cnt > 3) { 1038 num = min(32, cnt >> 2); 1039 sxm(SX_LD, ptr, 26, num - 1); 1040 if (num <= 16) { 1041 sxi(SX_ROP, 10, 26, 58, num - 1); 1042 } else { 1043 sxi(SX_ROP, 10, 26, 58, 15); 1044 sxi(SX_ROP, 10, 42, 74, num - 17); 1045 } 1046 sxm(SX_ST, ptr, 58, num - 1); 1047 ptr += num << 2; 1048 cnt -= num << 2; 1049 } 1050 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1051 if (cnt > 0) { 1052 sxm(SX_LDB, ptr, 26, cnt - 1); 1053 sxi(SX_ROP, 10, 26, 42, cnt - 1); 1054 sxm(SX_STB, ptr, 42, cnt - 1); 1055 } 1056 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1057next2: 1058 start += pitch; 1059 } 1060 } 1061} 1062 1063static void 1064CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 1065{ 1066 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1067 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1068 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 1069 int start, depth; 1070 1071 ENTER; 1072 dstpitch = exaGetPixmapPitch(pPixmap); 1073 dstoff = exaGetPixmapOffset(pPixmap); 1074 1075 depth = pPixmap->drawable.bitsPerPixel; 1076 switch (depth) { 1077 case 32: 1078 start = dstoff + (y1 * dstpitch) + (x1 << 2); 1079 CG14Solid32(p, start, dstpitch, w, h); 1080 break; 1081 case 8: 1082 start = dstoff + (y1 * dstpitch) + x1; 1083 CG14Solid8(p, start, dstpitch, w, h); 1084 break; 1085 } 1086 1087 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 1088 dstpitch, dstoff, start); 1089 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 1090 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 1091 exaMarkSync(pPixmap->drawable.pScreen); 1092} 1093 1094/* 1095 * Memcpy-based UTS. 1096 */ 1097static Bool 1098CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1099 char *src, int src_pitch) 1100{ 1101 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1102 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1103 char *dst = p->fb + exaGetPixmapOffset(pDst); 1104 int dst_pitch = exaGetPixmapPitch(pDst); 1105 1106 int bpp = pDst->drawable.bitsPerPixel; 1107 int cpp = (bpp + 7) >> 3; 1108 int wBytes = w * cpp; 1109 1110 ENTER; 1111 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 1112 dst += (x * cpp) + (y * dst_pitch); 1113 1114 CG14Wait(p); 1115 1116 while (h--) { 1117 memcpy(dst, src, wBytes); 1118 src += src_pitch; 1119 dst += dst_pitch; 1120 } 1121 __asm("stbar;"); 1122 return TRUE; 1123} 1124 1125/* 1126 * Memcpy-based DFS. 1127 */ 1128static Bool 1129CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1130 char *dst, int dst_pitch) 1131{ 1132 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1133 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1134 char *src = p->fb + exaGetPixmapOffset(pSrc); 1135 int src_pitch = exaGetPixmapPitch(pSrc); 1136 1137 ENTER; 1138 int bpp = pSrc->drawable.bitsPerPixel; 1139 int cpp = (bpp + 7) >> 3; 1140 int wBytes = w * cpp; 1141 1142 src += (x * cpp) + (y * src_pitch); 1143 1144 CG14Wait(p); 1145 1146 while (h--) { 1147 memcpy(dst, src, wBytes); 1148 src += src_pitch; 1149 dst += dst_pitch; 1150 } 1151 1152 return TRUE; 1153} 1154 1155Bool 1156CG14CheckComposite(int op, PicturePtr pSrcPicture, 1157 PicturePtr pMaskPicture, 1158 PicturePtr pDstPicture) 1159{ 1160 int i, ok = FALSE; 1161 1162 ENTER; 1163 1164 /* 1165 * SX is in theory capable of accelerating pretty much all Xrender ops, 1166 * even coordinate transformation and gradients. Support will be added 1167 * over time and likely have to spill over into its own source file. 1168 */ 1169 1170 if ((op != PictOpOver) && (op != PictOpAdd)/* && (op != PictOpSrc)*/) { 1171 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 1172 return FALSE; 1173 } 1174 1175 if (pSrcPicture != NULL) { 1176 i = 0; 1177 while ((i < arraysize(src_formats)) && (!ok)) { 1178 ok = (pSrcPicture->format == src_formats[i]); 1179 i++; 1180 } 1181 1182 if (!ok) { 1183 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 1184 __func__, pSrcPicture->format); 1185 return FALSE; 1186 } 1187 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 1188 } 1189 1190 if (pDstPicture != NULL) { 1191 i = 0; 1192 ok = FALSE; 1193 while ((i < arraysize(src_formats)) && (!ok)) { 1194 ok = (pDstPicture->format == src_formats[i]); 1195 i++; 1196 } 1197 1198 if (!ok) { 1199 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 1200 __func__, pDstPicture->format); 1201 return FALSE; 1202 } 1203 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 1204 } 1205 1206 if (pMaskPicture != NULL) { 1207 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 1208 pMaskPicture->pDrawable->width, 1209 pMaskPicture->pDrawable->height); 1210 } 1211 return TRUE; 1212} 1213 1214Bool 1215CG14PrepareComposite(int op, PicturePtr pSrcPicture, 1216 PicturePtr pMaskPicture, 1217 PicturePtr pDstPicture, 1218 PixmapPtr pSrc, 1219 PixmapPtr pMask, 1220 PixmapPtr pDst) 1221{ 1222 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1223 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1224 1225 ENTER; 1226 1227 p->no_source_pixmap = FALSE; 1228 p->source_is_solid = FALSE; 1229 1230 if (pSrcPicture->format == PICT_a1) { 1231 DPRINTF(X_ERROR, "src mono, dst %x, op %d\n", 1232 pDstPicture->format, op); 1233 if (pMaskPicture != NULL) { 1234 DPRINTF(X_ERROR, "msk %x\n", pMaskPicture->format); 1235 } 1236 } 1237 if (pSrcPicture->pSourcePict != NULL) { 1238 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 1239 p->fillcolour = 1240 pSrcPicture->pSourcePict->solidFill.color; 1241 DPRINTF(X_ERROR, "%s: solid src %08x\n", 1242 __func__, p->fillcolour); 1243 p->no_source_pixmap = TRUE; 1244 p->source_is_solid = TRUE; 1245 } 1246 } 1247 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 1248 if (pMaskPicture->pSourcePict->type == 1249 SourcePictTypeSolidFill) { 1250 p->fillcolour = 1251 pMaskPicture->pSourcePict->solidFill.color; 1252 DPRINTF(X_ERROR, "%s: solid mask %08x\n", 1253 __func__, p->fillcolour); 1254 } 1255 } 1256 if (pMaskPicture != NULL) { 1257 p->mskoff = exaGetPixmapOffset(pMask); 1258 p->mskpitch = exaGetPixmapPitch(pMask); 1259 p->mskformat = pMaskPicture->format; 1260 } else { 1261 p->mskoff = 0; 1262 p->mskpitch = 0; 1263 p->mskformat = 0; 1264 } 1265 if (pSrc != NULL) { 1266 p->source_is_solid = 1267 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 1268 p->srcoff = exaGetPixmapOffset(pSrc); 1269 p->srcpitch = exaGetPixmapPitch(pSrc); 1270 if (p->source_is_solid) { 1271 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 1272 } 1273 } 1274 p->srcformat = pSrcPicture->format; 1275 p->dstformat = pDstPicture->format; 1276 1277 if (p->source_is_solid) { 1278 uint32_t temp; 1279 1280 /* stuff source colour into SX registers, swap as needed */ 1281 temp = p->fillcolour; 1282 DPRINTF(X_ERROR, "solid %08x\n", temp); 1283 switch (p->srcformat) { 1284 case PICT_a8r8g8b8: 1285 case PICT_x8r8g8b8: 1286 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1287 temp = temp >> 8; 1288 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1289 temp = temp >> 8; 1290 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1291 break; 1292 case PICT_a8b8g8r8: 1293 case PICT_x8b8g8r8: 1294 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1295 temp = temp >> 8; 1296 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1297 temp = temp >> 8; 1298 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1299 break; 1300 } 1301 write_sx_reg(p, SX_QUEUED(8), 0xff); 1302 } 1303 p->op = op; 1304 if (op == PictOpSrc) { 1305 if (pSrc == NULL) { 1306 DPRINTF(X_ERROR, "src type %d\n", pSrcPicture->pSourcePict->type); 1307 return FALSE; 1308 } 1309 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 1310 } 1311#ifdef SX_DEBUG 1312 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 1313 *(uint32_t *)(p->fb + p->srcoff)); 1314#endif 1315 return TRUE; 1316} 1317 1318void 1319CG14Composite(PixmapPtr pDst, int srcX, int srcY, 1320 int maskX, int maskY, 1321 int dstX, int dstY, 1322 int width, int height) 1323{ 1324 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1325 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1326 uint32_t dstoff, dstpitch; 1327 uint32_t dst, msk, src; 1328 int flip = 0; 1329 1330 ENTER; 1331 dstoff = exaGetPixmapOffset(pDst); 1332 dstpitch = exaGetPixmapPitch(pDst); 1333 1334 flip = (PICT_FORMAT_TYPE(p->srcformat) != 1335 PICT_FORMAT_TYPE(p->dstformat)); 1336 1337 switch (p->op) { 1338 case PictOpOver: 1339 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 1340 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 1341 p->mskformat, p->dstformat, srcX, srcY); 1342 if (p->source_is_solid) { 1343 switch (p->mskformat) { 1344 case PICT_a8: 1345 msk = p->mskoff + 1346 (maskY * p->mskpitch) + 1347 maskX; 1348 CG14Comp_Over8Solid(p, 1349 msk, p->mskpitch, 1350 dst, dstpitch, 1351 width, height); 1352 break; 1353 case PICT_a8r8g8b8: 1354 case PICT_a8b8g8r8: 1355 msk = p->mskoff + 1356 (maskY * p->mskpitch) + 1357 (maskX << 2); 1358 CG14Comp_Over32Solid(p, 1359 msk, p->mskpitch, 1360 dst, dstpitch, 1361 width, height); 1362 break; 1363 default: 1364 xf86Msg(X_ERROR, 1365 "unsupported mask format %08x\n", p->mskformat); 1366 } 1367 } else { 1368 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 1369 p->mskformat); 1370 switch (p->srcformat) { 1371 case PICT_a8r8g8b8: 1372 case PICT_a8b8g8r8: 1373 src = p->srcoff + 1374 (srcY * p->srcpitch) + 1375 (srcX << 2); 1376 dst = dstoff + 1377 (dstY * dstpitch) + 1378 (dstX << 2); 1379 if (p->mskformat == PICT_a8) { 1380 msk = p->mskoff + 1381 (maskY * p->mskpitch) + 1382 maskX; 1383 CG14Comp_Over32Mask(p, 1384 src, p->srcpitch, 1385 msk, p->mskpitch, 1386 dst, dstpitch, 1387 width, height, flip); 1388 } else { 1389 CG14Comp_Over32(p, 1390 src, p->srcpitch, 1391 dst, dstpitch, 1392 width, height, flip); 1393 } 1394 break; 1395 case PICT_x8r8g8b8: 1396 case PICT_x8b8g8r8: 1397 src = p->srcoff + 1398 (srcY * p->srcpitch) + 1399 (srcX << 2); 1400 dst = dstoff + 1401 (dstY * dstpitch) + 1402 (dstX << 2); 1403 if (p->mskformat == PICT_a8) { 1404 msk = p->mskoff + 1405 (maskY * p->mskpitch) + 1406 maskX; 1407 CG14Comp_Over32Mask_noalpha(p, 1408 src, p->srcpitch, 1409 msk, p->mskpitch, 1410 dst, dstpitch, 1411 width, height, flip); 1412 } else if ((p->mskformat == PICT_a8r8g8b8) || 1413 (p->mskformat == PICT_a8b8g8r8)) { 1414 msk = p->mskoff + 1415 (maskY * p->mskpitch) + 1416 (maskX << 2); 1417 CG14Comp_Over32Mask32_noalpha(p, 1418 src, p->srcpitch, 1419 msk, p->mskpitch, 1420 dst, dstpitch, 1421 width, height, flip); 1422 } else { 1423 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1424 } 1425 break; 1426 default: 1427 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1428 __func__, p->srcformat); 1429 } 1430 } 1431 break; 1432 case PictOpAdd: 1433 DPRINTF(X_ERROR, "Add %08x %08x\n", 1434 p->srcformat, p->dstformat); 1435 switch (p->srcformat) { 1436 case PICT_a8: 1437 src = p->srcoff + 1438 (srcY * p->srcpitch) + srcX; 1439 if (p->dstformat == PICT_a8) { 1440 dst = dstoff + 1441 (dstY * dstpitch) + dstX; 1442 CG14Comp_Add8(p, 1443 src, p->srcpitch, 1444 dst, dstpitch, 1445 width, height); 1446 } else { 1447 dst = dstoff + 1448 (dstY * dstpitch) + 1449 (dstX << 2); 1450 CG14Comp_Add8_32(p, 1451 src, p->srcpitch, 1452 dst, dstpitch, 1453 width, height); 1454 } 1455 break; 1456 case PICT_a8r8g8b8: 1457 case PICT_x8r8g8b8: 1458 src = p->srcoff + 1459 (srcY * p->srcpitch) + (srcX << 2); 1460 dst = dstoff + (dstY * dstpitch) + 1461 (dstX << 2); 1462 CG14Comp_Add32(p, src, p->srcpitch, 1463 dst, dstpitch, width, height); 1464 break; 1465 default: 1466 xf86Msg(X_ERROR, 1467 "unsupported src format\n"); 1468 } 1469 break; 1470 case PictOpSrc: 1471 DPRINTF(X_ERROR, "Src %08x %08x\n", 1472 p->srcformat, p->dstformat); 1473 if (p->mskformat != 0) 1474 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1475 if (p->srcformat == PICT_a8) { 1476 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1477 } else { 1478 /* convert between RGB and BGR? */ 1479 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1480 } 1481 break; 1482 default: 1483 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1484 } 1485 exaMarkSync(pDst->drawable.pScreen); 1486} 1487 1488 1489 1490Bool 1491CG14InitAccel(ScreenPtr pScreen) 1492{ 1493 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1494 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1495 ExaDriverPtr pExa; 1496 1497 pExa = exaDriverAlloc(); 1498 if (!pExa) 1499 return FALSE; 1500 1501 p->pExa = pExa; 1502 1503 pExa->exa_major = EXA_VERSION_MAJOR; 1504 pExa->exa_minor = EXA_VERSION_MINOR; 1505 1506 pExa->memoryBase = p->fb; 1507 pExa->memorySize = p->memsize; 1508 pExa->offScreenBase = p->width * p->height * (pScrn->bitsPerPixel >> 3); 1509 1510 /* 1511 * SX memory instructions are written to 64bit aligned addresses with 1512 * a 3 bit displacement. Make sure the displacement remains constant 1513 * within one column 1514 */ 1515 1516 pExa->pixmapOffsetAlign = 8; 1517 pExa->pixmapPitchAlign = 8; 1518 1519 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1520 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1521 /*| EXA_MIXED_PIXMAPS*/; 1522 1523 /* 1524 * these limits are bogus 1525 * SX doesn't deal with coordinates at all, so there is no limit but 1526 * we have to put something here 1527 */ 1528 pExa->maxX = 4096; 1529 pExa->maxY = 4096; 1530 1531 pExa->WaitMarker = CG14WaitMarker; 1532 1533 pExa->PrepareSolid = CG14PrepareSolid; 1534 pExa->Solid = CG14Solid; 1535 pExa->DoneSolid = CG14DoneCopy; 1536 pExa->PrepareCopy = CG14PrepareCopy; 1537 pExa->Copy = CG14Copy32; 1538 pExa->DoneCopy = CG14DoneCopy; 1539 if (p->use_xrender) { 1540 pExa->CheckComposite = CG14CheckComposite; 1541 pExa->PrepareComposite = CG14PrepareComposite; 1542 pExa->Composite = CG14Composite; 1543 pExa->DoneComposite = CG14DoneCopy; 1544 } 1545 1546 /* EXA hits more optimized paths when it does not have to fallback 1547 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1548 */ 1549 pExa->UploadToScreen = CG14UploadToScreen; 1550 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1551 1552 p->queuecount = 0; 1553 /* do some hardware init */ 1554 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1555 p->last_mask = 0xffffffff; 1556 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1557 p->last_rop = 0xcc; 1558 return exaDriverInit(pScreen, pExa); 1559} 1560