cg14_accel.c revision 72fd264f
1/* $NetBSD: cg14_accel.c,v 1.27 2021/12/24 04:41:40 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45//#define SX_DEBUG 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__, 102 pSrcPixmap->drawable.bitsPerPixel, alu); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 sxm(SX_LD, s, 10, num - 1); 188 sxm(SX_STM, d, 10, num - 1); 189 s += xinc; 190 d += xinc; 191 count += 32; 192 } 193 srcstart += srcinc; 194 dststart += dstinc; 195 } 196 } else { 197 /* going right to left */ 198 int i, chunks = (w >> 5); 199 for (line = 0; line < h; line++) { 200 s = srcstart; 201 d = dststart; 202 count = w; 203 for (i = 0; i < chunks; i++) { 204 sxm(SX_LD, s, 10, 31); 205 sxm(SX_STM, d, 10, 31); 206 s -= 128; 207 d -= 128; 208 count -= 32; 209 } 210 /* leftovers, if any */ 211 if (count > 0) { 212 s += (32 - count) << 2; 213 d += (32 - count) << 2; 214 sxm(SX_LD, s, 10, count - 1); 215 sxm(SX_STM, d, 10, count - 1); 216 } 217 srcstart += srcinc; 218 dststart += dstinc; 219 } 220 } 221 } else { 222 /* ROPs needed */ 223 if ( xinc > 0) { 224 /* going left to right */ 225 for (line = 0; line < h; line++) { 226 count = 0; 227 s = srcstart; 228 d = dststart; 229 while ( count < w) { 230 num = min(32, w - count); 231 sxm(SX_LD, s, 10, num - 1); 232 sxm(SX_LD, d, 42, num - 1); 233 if (num > 16) { 234 sxi(SX_ROP(10, 42, 74, 15)); 235 sxi(SX_ROP(26, 58, 90, num - 17)); 236 } else { 237 sxi(SX_ROP(10, 42, 74, num - 1)); 238 } 239 sxm(SX_STM, d, 74, num - 1); 240 s += xinc; 241 d += xinc; 242 count += 32; 243 } 244 srcstart += srcinc; 245 dststart += dstinc; 246 } 247 } else { 248 /* going right to left */ 249 int i, chunks = (w >> 5); 250 for (line = 0; line < h; line++) { 251 s = srcstart; 252 d = dststart; 253 count = w; 254 for (i = 0; i < chunks; i++) { 255 sxm(SX_LD, s, 10, 31); 256 sxm(SX_LD, d, 42, 31); 257 sxi(SX_ROP(10, 42, 74, 15)); 258 sxi(SX_ROP(26, 58, 90, 15)); 259 sxm(SX_STM, d, 74, 31); 260 s -= 128; 261 d -= 128; 262 count -= 32; 263 } 264 /* leftovers, if any */ 265 if (count > 0) { 266 s += (32 - count) << 2; 267 d += (32 - count) << 2; 268 sxm(SX_LD, s, 10, count - 1); 269 sxm(SX_LD, d, 42, count - 1); 270 if (count > 16) { 271 sxi(SX_ROP(10, 42, 74, 15)); 272 sxi(SX_ROP(26, 58, 90, count - 17)); 273 } else { 274 sxi(SX_ROP(10, 42, 74, count - 1)); 275 } 276 sxm(SX_STM, d, 74, count - 1); 277 } 278 srcstart += srcinc; 279 dststart += dstinc; 280 } 281 } 282 } 283 exaMarkSync(pDstPixmap->drawable.pScreen); 284} 285 286/* 287 * copy with same alignment, left to right, no ROP 288 */ 289static void 290CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 291 int srcpitch, int dstpitch) 292{ 293 int saddr, daddr, pre, cnt, wrds; 294 295 ENTER; 296 297 pre = srcstart & 3; 298 if (pre != 0) pre = 4 - pre; 299 pre = min(pre, w); 300 301 while (h > 0) { 302 saddr = srcstart; 303 daddr = dststart; 304 cnt = w; 305 if (pre > 0) { 306 sxm(SX_LDB, saddr, 8, pre - 1); 307 sxm(SX_STB, daddr, 8, pre - 1); 308 saddr += pre; 309 daddr += pre; 310 cnt -= pre; 311 if (cnt == 0) goto next; 312 } 313 while (cnt > 3) { 314 wrds = min(32, cnt >> 2); 315 sxm(SX_LD, saddr, 8, wrds - 1); 316 sxm(SX_ST, daddr, 8, wrds - 1); 317 saddr += wrds << 2; 318 daddr += wrds << 2; 319 cnt -= wrds << 2; 320 } 321 if (cnt > 0) { 322 sxm(SX_LDB, saddr, 8, cnt - 1); 323 sxm(SX_STB, daddr, 8, cnt - 1); 324 } 325next: 326 srcstart += srcpitch; 327 dststart += dstpitch; 328 h--; 329 } 330} 331 332/* 333 * copy with same alignment, left to right, ROP 334 */ 335static void 336CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 337 int srcpitch, int dstpitch) 338{ 339 int saddr, daddr, pre, cnt, wrds; 340 341 ENTER; 342 343 pre = srcstart & 3; 344 if (pre != 0) pre = 4 - pre; 345 pre = min(pre, w); 346 347 while (h > 0) { 348 saddr = srcstart; 349 daddr = dststart; 350 cnt = w; 351 if (pre > 0) { 352 sxm(SX_LDB, saddr, 8, pre - 1); 353 sxm(SX_LDB, daddr, 40, pre - 1); 354 sxi(SX_ROP(8, 40, 72, pre - 1)); 355 sxm(SX_STB, daddr, 72, pre - 1); 356 saddr += pre; 357 daddr += pre; 358 cnt -= pre; 359 if (cnt == 0) goto next; 360 } 361 while (cnt > 3) { 362 wrds = min(32, cnt >> 2); 363 sxm(SX_LD, saddr, 8, wrds - 1); 364 sxm(SX_LD, daddr, 40, wrds - 1); 365 if (cnt > 16) { 366 sxi(SX_ROP(8, 40, 72, 15)); 367 sxi(SX_ROP(8, 56, 88, wrds - 17)); 368 } else 369 sxi(SX_ROP(8, 40, 72, wrds - 1)); 370 sxm(SX_ST, daddr, 72, wrds - 1); 371 saddr += wrds << 2; 372 daddr += wrds << 2; 373 cnt -= wrds << 2; 374 } 375 if (cnt > 0) { 376 sxm(SX_LDB, saddr, 8, cnt - 1); 377 sxm(SX_LDB, daddr, 40, cnt - 1); 378 sxi(SX_ROP(8, 40, 72, cnt - 1)); 379 sxm(SX_STB, daddr, 72, cnt - 1); 380 } 381next: 382 srcstart += srcpitch; 383 dststart += dstpitch; 384 h--; 385 } 386} 387 388/* up to 124 pixels so direction doesn't matter, unaligned, ROP */ 389static void 390CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 391{ 392 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 393 int ssreg; 394#ifdef DEBUG 395 int taddr = 4 + dstpitch * 50; 396#endif 397 uint32_t lmask, rmask; 398 ENTER; 399 400 pre = dststart & 3; 401 lmask = 0xffffffff >> pre; 402 spre = srcstart & 3; 403 /* 404 * make sure we count all the words needed to cover the destination 405 * line, covering potential partials on both ends 406 */ 407 wrds = (w + pre + 3) >> 2; 408 swrds = (w + spre + 3) >> 2; 409 410 if (spre < pre) { 411 dist = 32 - (pre - spre) * 8; 412 sreg = 9; 413 } else { 414 dist = (spre - pre) * 8; 415 sreg = 8; 416 } 417 418 /* 419 * mask out trailing pixels to avoid partial writes 420 */ 421 post = (dststart + w) & 3; 422 if (post != 0) { 423 rmask = ~(0xffffffff >> (post * 8)); 424 write_sx_reg(p, SX_QUEUED(7), rmask); 425 write_sx_reg(p, SX_QUEUED(6), ~rmask); 426 } 427 428 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 429 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 430 431 /* mask out the leading pixels in dst by using a mask and ROP */ 432 if (pre != 0) { 433 CG14Wait(p); 434 write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); 435 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 436 } 437 438 saddr = srcstart & ~3; 439 daddr = dststart & ~3; 440 441 while (h > 0) { 442 sxm(SX_LD, daddr, 80, wrds - 1); 443 sxm(SX_LD, saddr, sreg, swrds - 1); 444 if (wrds > 15) { 445 if (dist != 0) { 446 sxi(SX_FUNNEL_I(8, dist, 40, 15)); 447 sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16)); 448 /* shifted source pixels are now at register 40+ */ 449 ssreg = 40; 450 } else ssreg = 8; 451 if (pre != 0) { 452 /* mask out leading junk */ 453 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 454 sxi(SX_ROPB(ssreg, 80, 8, 0)); 455 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 456 sxi(SX_ROPB(ssreg + 1, 81, 9, 14)); 457 } else { 458 sxi(SX_ROPB(ssreg, 80, 8, 15)); 459 } 460 sxi(SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); 461 } else { 462 if (dist != 0) { 463 sxi(SX_FUNNEL_I(8, dist, 40, wrds)); 464 ssreg = 40; 465 } else ssreg = 8; 466 if (pre != 0) { 467 /* mask out leading junk */ 468 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 469 sxi(SX_ROPB(ssreg, 80, 8, 0)); 470 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 471 sxi(SX_ROPB(ssreg + 1, 81, 9, wrds)); 472 } else { 473 sxi(SX_ROPB(ssreg, 80, 8, wrds)); 474 } 475 } 476 if (post != 0) { 477 /* 478 * if the last word to be written out is a partial we 479 * mask out the leftovers and replace them with 480 * background pixels 481 * we could pull the same ROP * mask trick as we do on 482 * the left end but it's less annoying this way and 483 * the instruction count is the same 484 */ 485 sxi(SX_ANDS(7 + wrds, 7, 5, 0)); 486 sxi(SX_ANDS(79 + wrds, 6, 4, 0)); 487 sxi(SX_ORS(5, 4, 7 + wrds, 0)); 488 } 489#ifdef DEBUG 490 sxm(SX_ST, taddr, 40, wrds - 1); 491 taddr += dstpitch; 492#endif 493 sxm(SX_ST, daddr, 8, wrds - 1); 494 saddr += srcpitch; 495 daddr += dstpitch; 496 h--; 497 } 498} 499 500/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ 501static void 502CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, 503 int srcpitch, int dstpitch) 504{ 505 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 506 int ssreg; 507#ifdef DEBUG 508 int taddr = 4 + dstpitch * 50; 509#endif 510 uint32_t lmask, rmask; 511 ENTER; 512 513 pre = dststart & 3; 514 lmask = 0xffffffff >> pre; 515 spre = srcstart & 3; 516 /* 517 * make sure we count all the words needed to cover the destination 518 * line, covering potential partials on both ends 519 */ 520 wrds = (w + pre + 3) >> 2; 521 swrds = (w + spre + 3) >> 2; 522 523 if (spre < pre) { 524 dist = 32 - (pre - spre) * 8; 525 sreg = 9; 526 } else { 527 dist = (spre - pre) * 8; 528 sreg = 8; 529 } 530 531 /* 532 * mask out trailing pixels to avoid partial writes 533 */ 534 post = (dststart + w) & 3; 535 if (post != 0) { 536 rmask = ~(0xffffffff >> (post * 8)); 537 write_sx_reg(p, SX_QUEUED(7), rmask); 538 write_sx_reg(p, SX_QUEUED(6), ~rmask); 539 } 540 541 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 542 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 543 544 /* mask out the leading pixels in dst by using a mask and ROP */ 545 if (pre != 0) { 546 CG14Wait(p); 547 write_sx_reg(p, SX_ROP_CONTROL, 0xca); 548 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 549 } 550 551 saddr = srcstart & ~3; 552 daddr = dststart & ~3; 553 554 while (h > 0) { 555 sxm(SX_LD, saddr, sreg, swrds - 1); 556 if (wrds > 15) { 557 if (dist != 0) { 558 sxi(SX_FUNNEL_I(8, dist, 40, 15)); 559 sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16)); 560 /* shifted source pixels are now at reg 40+ */ 561 ssreg = 40; 562 } else ssreg = 8; 563 if (pre != 0) { 564 /* read only the first word */ 565 sxm(SX_LD, daddr, 80, 0); 566 /* mask out leading junk */ 567 sxi(SX_ROPB(ssreg, 80, ssreg, 0)); 568 } 569 } else { 570 if (dist != 0) { 571 sxi(SX_FUNNEL_I(8, dist, 40, wrds)); 572 ssreg = 40; 573 } else ssreg = 8; 574 if (pre != 0) { 575 /* read only the first word */ 576 sxm(SX_LD, daddr, 80, 0); 577 /* mask out leading junk */ 578 sxi(SX_ROPB(ssreg, 80, ssreg, 0)); 579 } 580 } 581 if (post != 0) { 582 int laddr = daddr + ((wrds - 1) << 2); 583 /* 584 * if the last word to be written out is a partial we 585 * mask out the leftovers and replace them with 586 * background pixels 587 * we could pull the same ROP * mask trick as we do on 588 * the left end but it's less annoying this way and 589 * the instruction count is the same 590 */ 591 sxm(SX_LD, laddr, 81, 0); 592 sxi(SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); 593 sxi(SX_ANDS(81, 6, 4, 0)); 594 sxi(SX_ORS(5, 4, ssreg + wrds - 1, 0)); 595 } 596#ifdef DEBUG 597 sxm(SX_ST, taddr, 40, wrds - 1); 598 taddr += dstpitch; 599#endif 600 sxm(SX_ST, daddr, ssreg, wrds - 1); 601 saddr += srcpitch; 602 daddr += dstpitch; 603 h--; 604 } 605} 606 607static void 608CG14Copy8(PixmapPtr pDstPixmap, 609 int srcX, int srcY, int dstX, int dstY, int w, int h) 610{ 611 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 612 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 613 int dstpitch, dstoff, srcpitch, srcoff; 614 int srcstart, dststart, xinc, srcinc, dstinc; 615 int line, count, s, d, num; 616 617 ENTER; 618 dstpitch = exaGetPixmapPitch(pDstPixmap); 619 dstoff = exaGetPixmapOffset(pDstPixmap); 620 srcpitch = p->srcpitch; 621 srcoff = p->srcoff; 622 /* 623 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 624 * actually wrote anything and only sync if it did 625 */ 626 srcstart = srcX + (srcpitch * srcY) + srcoff; 627 dststart = dstX + (dstpitch * dstY) + dstoff; 628 629 if (p->ydir < 0) { 630 srcstart += (h - 1) * srcpitch; 631 dststart += (h - 1) * dstpitch; 632 srcinc = -srcpitch; 633 dstinc = -dstpitch; 634 } else { 635 srcinc = srcpitch; 636 dstinc = dstpitch; 637 } 638 639 /* 640 * this copies up to 124 pixels wide in one go, so horizontal 641 * direction / overlap don't matter 642 * uses all 32bit accesses and funnel shifter for unaligned copies 643 */ 644 if ((w < 125) && (w > 8)) { 645 switch (p->last_rop) { 646 case 0xcc: 647 CG14Copy8_short_norop(p, 648 srcstart, dststart, w, h, srcinc, dstinc); 649 break; 650 default: 651 CG14Copy8_short_rop(p, 652 srcstart, dststart, w, h, srcinc, dstinc); 653 } 654 return; 655 } 656 657 /* 658 * only invert x direction if absolutely necessary, it's a pain to 659 * go backwards on SX so avoid as much as possible 660 */ 661 if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) { 662 xinc = -32; 663 } else 664 xinc = 32; 665 666 /* 667 * for aligned copies we can go all 32bit and avoid VRAM reads in the 668 * most common case 669 */ 670 if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { 671 switch (p->last_rop) { 672 case 0xcc: 673 CG14Copy8_aligned_norop(p, 674 srcstart, dststart, w, h, srcinc, dstinc); 675 break; 676 default: 677 CG14Copy8_aligned_rop(p, 678 srcstart, dststart, w, h, srcinc, dstinc); 679 } 680 return; 681 } 682 683 /* 684 * if we make it here we either have something large and unaligned, 685 * something we need to do right to left, or something tiny. 686 * we handle the non-tiny cases by breaking them down into chunks that 687 * Copy8_short_*() can handle, making sure the destinations are 32bit 688 * aligned whenever possible 689 * since we copy by block, not by line we need to go backwards even if 690 * we don't copy within the same line 691 */ 692 if (w > 8) { 693 int next, wi, end = dststart + w; 694 DPRINTF(X_ERROR, "%s %08x %08x %d\n", 695 __func__, srcstart, dststart, w); 696 if ((p->xdir < 0) && (srcoff == dstoff)) { 697 srcstart += w; 698 next = max((end - 120) & ~3, dststart); 699 wi = end - next; 700 srcstart -= wi; 701 while (wi > 0) { 702 DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", 703 __func__, srcstart, next, wi); 704 if (p->last_rop == 0xcc) { 705 CG14Copy8_short_norop(p, srcstart, 706 next, wi, h, srcinc, dstinc); 707 } else 708 CG14Copy8_short_rop(p, srcstart, 709 next, wi, h, srcinc, dstinc); 710 end = next; 711 /* 712 * avoid extremely narrow copies so I don't 713 * have to deal with dangling start and end 714 * pixels in the same word 715 */ 716 if ((end - dststart) < 140) { 717 next = max((end - 80) & ~3, dststart); 718 } else { 719 next = max((end - 120) & ~3, dststart); 720 } 721 wi = end - next; 722 srcstart -= wi; 723 } 724 } else { 725 next = min(end, (dststart + 124) & ~3); 726 wi = next - dststart; 727 while (wi > 0) { 728 DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", 729 __func__, srcstart, next, wi); 730 if (p->last_rop == 0xcc) { 731 CG14Copy8_short_norop(p, 732 srcstart, dststart, wi, h, 733 srcinc, dstinc); 734 } else 735 CG14Copy8_short_rop(p, 736 srcstart, dststart, wi, h, 737 srcinc, dstinc); 738 srcstart += wi; 739 dststart = next; 740 if ((end - dststart) < 140) { 741 next = min(end, (dststart + 84) & ~3); 742 } else { 743 next = min(end, (dststart + 124) & ~3); 744 } 745 wi = next - dststart; 746 } 747 } 748 return; 749 } 750 if (xinc < 0) { 751 srcstart += (w - 32); 752 dststart += (w - 32); 753 } 754 755 DPRINTF(X_ERROR, "%s fallback to byte-wise %d %d\n", __func__, w, h); 756 if (p->last_rop == 0xcc) { 757 /* plain old copy */ 758 if ( xinc > 0) { 759 /* going left to right */ 760 for (line = 0; line < h; line++) { 761 count = 0; 762 s = srcstart; 763 d = dststart; 764 while ( count < w) { 765 num = min(32, w - count); 766 sxm(SX_LDB, s, 10, num - 1); 767 sxm(SX_STBM, d, 10, num - 1); 768 s += xinc; 769 d += xinc; 770 count += 32; 771 } 772 srcstart += srcinc; 773 dststart += dstinc; 774 } 775 } else { 776 /* going right to left */ 777 int i, chunks = (w >> 5); 778 for (line = 0; line < h; line++) { 779 s = srcstart; 780 d = dststart; 781 count = w; 782 for (i = 0; i < chunks; i++) { 783 sxm(SX_LDB, s, 10, 31); 784 sxm(SX_STBM, d, 10, 31); 785 s -= 32; 786 d -= 32; 787 count -= 32; 788 } 789 /* leftovers, if any */ 790 if (count > 0) { 791 s += (32 - count); 792 d += (32 - count); 793 sxm(SX_LDB, s, 10, count - 1); 794 sxm(SX_STBM, d, 10, count - 1); 795 } 796 srcstart += srcinc; 797 dststart += dstinc; 798 } 799 } 800 } else { 801 /* ROPs needed */ 802 if ( xinc > 0) { 803 /* going left to right */ 804 for (line = 0; line < h; line++) { 805 count = 0; 806 s = srcstart; 807 d = dststart; 808 while ( count < w) { 809 num = min(32, w - count); 810 sxm(SX_LDB, s, 10, num - 1); 811 sxm(SX_LDB, d, 42, num - 1); 812 if (num > 16) { 813 sxi(SX_ROP(10, 42, 74, 15)); 814 sxi(SX_ROP(26, 58, 90, num - 17)); 815 } else { 816 sxi(SX_ROP(10, 42, 74, num - 1)); 817 } 818 sxm(SX_STBM, d, 74, num - 1); 819 s += xinc; 820 d += xinc; 821 count += 32; 822 } 823 srcstart += srcinc; 824 dststart += dstinc; 825 } 826 } else { 827 /* going right to left */ 828 int i, chunks = (w >> 5); 829 for (line = 0; line < h; line++) { 830 s = srcstart; 831 d = dststart; 832 count = w; 833 for (i = 0; i < chunks; i++) { 834 sxm(SX_LDB, s, 10, 31); 835 sxm(SX_LDB, d, 42, 31); 836 sxi(SX_ROP(10, 42, 74, 15)); 837 sxi(SX_ROP(26, 58, 90, 15)); 838 sxm(SX_STBM, d, 74, 31); 839 s -= 128; 840 d -= 128; 841 count -= 32; 842 } 843 /* leftovers, if any */ 844 if (count > 0) { 845 s += (32 - count); 846 d += (32 - count); 847 sxm(SX_LDB, s, 10, count - 1); 848 sxm(SX_LDB, d, 42, count - 1); 849 if (count > 16) { 850 sxi(SX_ROP(10, 42, 74, 15)); 851 sxi(SX_ROP(26, 58, 90, count - 17)); 852 } else { 853 sxi(SX_ROP(10, 42, 74, count - 1)); 854 } 855 sxm(SX_STBM, d, 74, count - 1); 856 } 857 srcstart += srcinc; 858 dststart += dstinc; 859 } 860 } 861 } 862 exaMarkSync(pDstPixmap->drawable.pScreen); 863} 864 865static void 866CG14DoneCopy(PixmapPtr pDstPixmap) 867{ 868} 869 870static Bool 871CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 872{ 873 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 874 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 875 876 ENTER; 877 DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n", 878 pPixmap->drawable.bitsPerPixel, fg); 879 880 /* 881 * GXset and GXclear are really just specual cases of GXcopy with 882 * fixed fill colour 883 */ 884 switch (alu) { 885 case GXclear: 886 alu = GXcopy; 887 fg = 0; 888 break; 889 case GXset: 890 alu = GXcopy; 891 fg = 0xffffffff; 892 break; 893 } 894 /* repeat the colour in every sub byte if we're in 8 bit */ 895 if (pPixmap->drawable.bitsPerPixel == 8) { 896 fg |= fg << 8; 897 fg |= fg << 16; 898 } 899 write_sx_reg(p, SX_QUEUED(8), fg); 900 write_sx_reg(p, SX_QUEUED(9), fg); 901 if (planemask != p->last_mask) { 902 CG14Wait(p); 903 write_sx_reg(p, SX_PLANEMASK, planemask); 904 p->last_mask = planemask; 905 } 906 alu = sx_rop[alu]; 907 if (alu != p->last_rop) { 908 CG14Wait(p); 909 write_sx_reg(p, SX_ROP_CONTROL, alu); 910 p->last_rop = alu; 911 } 912 913 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 914 return TRUE; 915} 916 917static void 918CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 919{ 920 int line, x, num; 921 uint32_t ptr; 922 923 ENTER; 924 if (p->last_rop == 0xcc) { 925 /* simple fill */ 926 for (line = 0; line < h; line++) { 927 x = 0; 928 while (x < w) { 929 ptr = start + (x << 2); 930 num = min(32, w - x); 931 sxm(SX_STS, ptr, 8, num - 1); 932 x += 32; 933 } 934 start += pitch; 935 } 936 } else if (p->last_rop == 0xaa) { 937 /* nothing to do here */ 938 return; 939 } else { 940 /* alright, let's do actual ROP stuff */ 941 942 /* first repeat the fill colour into 16 registers */ 943 sxi(SX_SELECT_S(8, 8, 10, 15)); 944 945 for (line = 0; line < h; line++) { 946 x = 0; 947 while (x < w) { 948 ptr = start + (x << 2); 949 num = min(32, w - x); 950 /* now suck fb data into registers */ 951 sxm(SX_LD, ptr, 42, num - 1); 952 /* 953 * ROP them with the fill data we left in 10 954 * non-memory ops can only have counts up to 16 955 */ 956 if (num <= 16) { 957 sxi(SX_ROP(10, 42, 74, num - 1)); 958 } else { 959 sxi(SX_ROP(10, 42, 74, 15)); 960 sxi(SX_ROP(10, 58, 90, num - 17)); 961 } 962 /* and write the result back into memory */ 963 sxm(SX_ST, ptr, 74, num - 1); 964 x += 32; 965 } 966 start += pitch; 967 } 968 } 969} 970 971static void 972CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 973{ 974 int line, num, pre, cnt; 975 uint32_t ptr; 976 977 ENTER; 978 pre = start & 3; 979 if (pre != 0) pre = 4 - pre; 980 981 if (p->last_rop == 0xcc) { 982 /* simple fill */ 983 for (line = 0; line < h; line++) { 984 ptr = start; 985 cnt = w; 986 pre = min(pre, cnt); 987 if (pre) { 988 sxm(SX_STBS, ptr, 8, pre - 1); 989 ptr += pre; 990 cnt -= pre; 991 if (cnt == 0) goto next; 992 } 993 /* now do the aligned pixels in 32bit chunks */ 994 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 995 while(cnt > 3) { 996 num = min(32, cnt >> 2); 997 sxm(SX_STS, ptr, 8, num - 1); 998 ptr += num << 2; 999 cnt -= num << 2; 1000 } 1001 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1002 if (cnt > 0) { 1003 sxm(SX_STBS, ptr, 8, cnt - 1); 1004 } 1005 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1006next: 1007 start += pitch; 1008 } 1009 } else if (p->last_rop == 0xaa) { 1010 /* nothing to do here */ 1011 return; 1012 } else { 1013 /* alright, let's do actual ROP stuff */ 1014 1015 /* first repeat the fill colour into 16 registers */ 1016 sxi(SX_SELECT_S(8, 8, 10, 15)); 1017 1018 for (line = 0; line < h; line++) { 1019 ptr = start; 1020 cnt = w; 1021 pre = min(pre, cnt); 1022 if (pre) { 1023 sxm(SX_LDB, ptr, 26, pre - 1); 1024 sxi(SX_ROP(10, 26, 42, pre - 1)); 1025 sxm(SX_STB, ptr, 42, pre - 1); 1026 ptr += pre; 1027 cnt -= pre; 1028 if (cnt == 0) goto next2; 1029 } 1030 /* now do the aligned pixels in 32bit chunks */ 1031 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1032 while(cnt > 3) { 1033 num = min(32, cnt >> 2); 1034 sxm(SX_LD, ptr, 26, num - 1); 1035 if (num <= 16) { 1036 sxi(SX_ROP(10, 26, 58, num - 1)); 1037 } else { 1038 sxi(SX_ROP(10, 26, 58, 15)); 1039 sxi(SX_ROP(10, 42, 74, num - 17)); 1040 } 1041 sxm(SX_ST, ptr, 58, num - 1); 1042 ptr += num << 2; 1043 cnt -= num << 2; 1044 } 1045 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1046 if (cnt > 0) { 1047 sxm(SX_LDB, ptr, 26, cnt - 1); 1048 sxi(SX_ROP(10, 26, 42, cnt - 1)); 1049 sxm(SX_STB, ptr, 42, cnt - 1); 1050 } 1051 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1052next2: 1053 start += pitch; 1054 } 1055 } 1056} 1057 1058static void 1059CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 1060{ 1061 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1062 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1063 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 1064 int start, depth; 1065 1066 ENTER; 1067 dstpitch = exaGetPixmapPitch(pPixmap); 1068 dstoff = exaGetPixmapOffset(pPixmap); 1069 1070 depth = pPixmap->drawable.bitsPerPixel; 1071 switch (depth) { 1072 case 32: 1073 start = dstoff + (y1 * dstpitch) + (x1 << 2); 1074 CG14Solid32(p, start, dstpitch, w, h); 1075 break; 1076 case 8: 1077 start = dstoff + (y1 * dstpitch) + x1; 1078 CG14Solid8(p, start, dstpitch, w, h); 1079 break; 1080 } 1081 1082 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 1083 dstpitch, dstoff, start); 1084 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 1085 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 1086 exaMarkSync(pPixmap->drawable.pScreen); 1087} 1088 1089/* 1090 * Memcpy-based UTS. 1091 */ 1092static Bool 1093CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1094 char *src, int src_pitch) 1095{ 1096 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1097 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1098 char *dst = p->fb + exaGetPixmapOffset(pDst); 1099 int dst_pitch = exaGetPixmapPitch(pDst); 1100 1101 int bpp = pDst->drawable.bitsPerPixel; 1102 int cpp = (bpp + 7) >> 3; 1103 int wBytes = w * cpp; 1104 1105 ENTER; 1106 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 1107 dst += (x * cpp) + (y * dst_pitch); 1108 1109 CG14Wait(p); 1110 1111 while (h--) { 1112 memcpy(dst, src, wBytes); 1113 src += src_pitch; 1114 dst += dst_pitch; 1115 } 1116 __asm("stbar;"); 1117 return TRUE; 1118} 1119 1120/* 1121 * Memcpy-based DFS. 1122 */ 1123static Bool 1124CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1125 char *dst, int dst_pitch) 1126{ 1127 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1128 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1129 char *src = p->fb + exaGetPixmapOffset(pSrc); 1130 int src_pitch = exaGetPixmapPitch(pSrc); 1131 1132 ENTER; 1133 int bpp = pSrc->drawable.bitsPerPixel; 1134 int cpp = (bpp + 7) >> 3; 1135 int wBytes = w * cpp; 1136 1137 src += (x * cpp) + (y * src_pitch); 1138 1139 CG14Wait(p); 1140 1141 while (h--) { 1142 memcpy(dst, src, wBytes); 1143 src += src_pitch; 1144 dst += dst_pitch; 1145 } 1146 1147 return TRUE; 1148} 1149 1150Bool 1151CG14CheckComposite(int op, PicturePtr pSrcPicture, 1152 PicturePtr pMaskPicture, 1153 PicturePtr pDstPicture) 1154{ 1155 int i, ok = FALSE; 1156 1157 ENTER; 1158 1159 /* 1160 * SX is in theory capable of accelerating pretty much all Xrender ops, 1161 * even coordinate transformation and gradients. Support will be added 1162 * over time and likely have to spill over into its own source file. 1163 */ 1164 1165 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 1166 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 1167 return FALSE; 1168 } 1169 1170 if (pSrcPicture != NULL) { 1171 i = 0; 1172 while ((i < arraysize(src_formats)) && (!ok)) { 1173 ok = (pSrcPicture->format == src_formats[i]); 1174 i++; 1175 } 1176 1177 if (!ok) { 1178 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 1179 __func__, pSrcPicture->format); 1180 return FALSE; 1181 } 1182 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 1183 } 1184 1185 if (pDstPicture != NULL) { 1186 i = 0; 1187 ok = FALSE; 1188 while ((i < arraysize(src_formats)) && (!ok)) { 1189 ok = (pDstPicture->format == src_formats[i]); 1190 i++; 1191 } 1192 1193 if (!ok) { 1194 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 1195 __func__, pDstPicture->format); 1196 return FALSE; 1197 } 1198 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 1199 } 1200 1201 if (pMaskPicture != NULL) { 1202 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 1203 pMaskPicture->pDrawable->width, 1204 pMaskPicture->pDrawable->height); 1205 } 1206 return TRUE; 1207} 1208 1209Bool 1210CG14PrepareComposite(int op, PicturePtr pSrcPicture, 1211 PicturePtr pMaskPicture, 1212 PicturePtr pDstPicture, 1213 PixmapPtr pSrc, 1214 PixmapPtr pMask, 1215 PixmapPtr pDst) 1216{ 1217 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1218 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1219 1220 ENTER; 1221 1222 p->no_source_pixmap = FALSE; 1223 p->source_is_solid = FALSE; 1224 1225 if (pSrcPicture->format == PICT_a1) { 1226 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 1227 pDstPicture->format, op); 1228 if (pMaskPicture != NULL) { 1229 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 1230 } 1231 } 1232 if (pSrcPicture->pSourcePict != NULL) { 1233 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 1234 p->fillcolour = 1235 pSrcPicture->pSourcePict->solidFill.color; 1236 DPRINTF(X_ERROR, "%s: solid src %08x\n", 1237 __func__, p->fillcolour); 1238 p->no_source_pixmap = TRUE; 1239 p->source_is_solid = TRUE; 1240 } 1241 } 1242 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 1243 if (pMaskPicture->pSourcePict->type == 1244 SourcePictTypeSolidFill) { 1245 p->fillcolour = 1246 pMaskPicture->pSourcePict->solidFill.color; 1247 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 1248 __func__, p->fillcolour); 1249 } 1250 } 1251 if (pMaskPicture != NULL) { 1252 p->mskoff = exaGetPixmapOffset(pMask); 1253 p->mskpitch = exaGetPixmapPitch(pMask); 1254 p->mskformat = pMaskPicture->format; 1255 } else { 1256 p->mskoff = 0; 1257 p->mskpitch = 0; 1258 p->mskformat = 0; 1259 } 1260 if (pSrc != NULL) { 1261 p->source_is_solid = 1262 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 1263 p->srcoff = exaGetPixmapOffset(pSrc); 1264 p->srcpitch = exaGetPixmapPitch(pSrc); 1265 if (p->source_is_solid) { 1266 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 1267 } 1268 } 1269 p->srcformat = pSrcPicture->format; 1270 p->dstformat = pDstPicture->format; 1271 1272 if (p->source_is_solid) { 1273 uint32_t temp; 1274 1275 /* stuff source colour into SX registers, swap as needed */ 1276 temp = p->fillcolour; 1277 switch (p->srcformat) { 1278 case PICT_a8r8g8b8: 1279 case PICT_x8r8g8b8: 1280 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1281 temp = temp >> 8; 1282 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1283 temp = temp >> 8; 1284 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1285 break; 1286 case PICT_a8b8g8r8: 1287 case PICT_x8b8g8r8: 1288 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1289 temp = temp >> 8; 1290 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1291 temp = temp >> 8; 1292 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1293 break; 1294 } 1295 write_sx_reg(p, SX_QUEUED(8), 0xff); 1296 } 1297 p->op = op; 1298 if (op == PictOpSrc) { 1299 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 1300 } 1301#ifdef SX_DEBUG 1302 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 1303 *(uint32_t *)(p->fb + p->srcoff)); 1304#endif 1305 return TRUE; 1306} 1307 1308void 1309CG14Composite(PixmapPtr pDst, int srcX, int srcY, 1310 int maskX, int maskY, 1311 int dstX, int dstY, 1312 int width, int height) 1313{ 1314 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1315 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1316 uint32_t dstoff, dstpitch; 1317 uint32_t dst, msk, src; 1318 int flip = 0; 1319 1320 ENTER; 1321 dstoff = exaGetPixmapOffset(pDst); 1322 dstpitch = exaGetPixmapPitch(pDst); 1323 1324 flip = (PICT_FORMAT_TYPE(p->srcformat) != 1325 PICT_FORMAT_TYPE(p->dstformat)); 1326 1327 switch (p->op) { 1328 case PictOpOver: 1329 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 1330 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 1331 p->mskformat, p->dstformat, srcX, srcY); 1332 if (p->source_is_solid) { 1333 switch (p->mskformat) { 1334 case PICT_a8: 1335 msk = p->mskoff + 1336 (maskY * p->mskpitch) + 1337 maskX; 1338 CG14Comp_Over8Solid(p, 1339 msk, p->mskpitch, 1340 dst, dstpitch, 1341 width, height); 1342 break; 1343 case PICT_a8r8g8b8: 1344 case PICT_a8b8g8r8: 1345 msk = p->mskoff + 1346 (maskY * p->mskpitch) + 1347 (maskX << 2); 1348 CG14Comp_Over32Solid(p, 1349 msk, p->mskpitch, 1350 dst, dstpitch, 1351 width, height); 1352 break; 1353 default: 1354 xf86Msg(X_ERROR, 1355 "unsupported mask format %08x\n", p->mskformat); 1356 } 1357 } else { 1358 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 1359 p->mskformat); 1360 switch (p->srcformat) { 1361 case PICT_a8r8g8b8: 1362 case PICT_a8b8g8r8: 1363 src = p->srcoff + 1364 (srcY * p->srcpitch) + 1365 (srcX << 2); 1366 dst = dstoff + 1367 (dstY * dstpitch) + 1368 (dstX << 2); 1369 if (p->mskformat == PICT_a8) { 1370 msk = p->mskoff + 1371 (maskY * p->mskpitch) + 1372 maskX; 1373 CG14Comp_Over32Mask(p, 1374 src, p->srcpitch, 1375 msk, p->mskpitch, 1376 dst, dstpitch, 1377 width, height, flip); 1378 } else { 1379 CG14Comp_Over32(p, 1380 src, p->srcpitch, 1381 dst, dstpitch, 1382 width, height, flip); 1383 } 1384 break; 1385 case PICT_x8r8g8b8: 1386 case PICT_x8b8g8r8: 1387 src = p->srcoff + 1388 (srcY * p->srcpitch) + 1389 (srcX << 2); 1390 dst = dstoff + 1391 (dstY * dstpitch) + 1392 (dstX << 2); 1393 if (p->mskformat == PICT_a8) { 1394 msk = p->mskoff + 1395 (maskY * p->mskpitch) + 1396 maskX; 1397 CG14Comp_Over32Mask_noalpha(p, 1398 src, p->srcpitch, 1399 msk, p->mskpitch, 1400 dst, dstpitch, 1401 width, height, flip); 1402 } else if ((p->mskformat == PICT_a8r8g8b8) || 1403 (p->mskformat == PICT_a8b8g8r8)) { 1404 msk = p->mskoff + 1405 (maskY * p->mskpitch) + 1406 (maskX << 2); 1407 CG14Comp_Over32Mask32_noalpha(p, 1408 src, p->srcpitch, 1409 msk, p->mskpitch, 1410 dst, dstpitch, 1411 width, height, flip); 1412 } else { 1413 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1414 } 1415 break; 1416 default: 1417 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1418 __func__, p->srcformat); 1419 } 1420 } 1421 break; 1422 case PictOpAdd: 1423 DPRINTF(X_ERROR, "Add %08x %08x\n", 1424 p->srcformat, p->dstformat); 1425 switch (p->srcformat) { 1426 case PICT_a8: 1427 src = p->srcoff + 1428 (srcY * p->srcpitch) + srcX; 1429 if (p->dstformat == PICT_a8) { 1430 dst = dstoff + 1431 (dstY * dstpitch) + dstX; 1432 CG14Comp_Add8(p, 1433 src, p->srcpitch, 1434 dst, dstpitch, 1435 width, height); 1436 } else { 1437 dst = dstoff + 1438 (dstY * dstpitch) + 1439 (dstX << 2); 1440 CG14Comp_Add8_32(p, 1441 src, p->srcpitch, 1442 dst, dstpitch, 1443 width, height); 1444 } 1445 break; 1446 case PICT_a8r8g8b8: 1447 case PICT_x8r8g8b8: 1448 src = p->srcoff + 1449 (srcY * p->srcpitch) + (srcX << 2); 1450 dst = dstoff + (dstY * dstpitch) + 1451 (dstX << 2); 1452 CG14Comp_Add32(p, src, p->srcpitch, 1453 dst, dstpitch, width, height); 1454 break; 1455 default: 1456 xf86Msg(X_ERROR, 1457 "unsupported src format\n"); 1458 } 1459 break; 1460 case PictOpSrc: 1461 DPRINTF(X_ERROR, "Src %08x %08x\n", 1462 p->srcformat, p->dstformat); 1463 if (p->mskformat != 0) 1464 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1465 if (p->srcformat == PICT_a8) { 1466 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1467 } else { 1468 /* convert between RGB and BGR? */ 1469 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1470 } 1471 break; 1472 default: 1473 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1474 } 1475 exaMarkSync(pDst->drawable.pScreen); 1476} 1477 1478 1479 1480Bool 1481CG14InitAccel(ScreenPtr pScreen) 1482{ 1483 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1484 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1485 ExaDriverPtr pExa; 1486 1487 pExa = exaDriverAlloc(); 1488 if (!pExa) 1489 return FALSE; 1490 1491 p->pExa = pExa; 1492 1493 pExa->exa_major = EXA_VERSION_MAJOR; 1494 pExa->exa_minor = EXA_VERSION_MINOR; 1495 1496 pExa->memoryBase = p->fb; 1497 pExa->memorySize = p->memsize; 1498 pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 1499 1500 /* 1501 * SX memory instructions are written to 64bit aligned addresses with 1502 * a 3 bit displacement. Make sure the displacement remains constant 1503 * within one column 1504 */ 1505 1506 pExa->pixmapOffsetAlign = 8; 1507 pExa->pixmapPitchAlign = 8; 1508 1509 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1510 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1511 /*| EXA_MIXED_PIXMAPS*/; 1512 1513 /* 1514 * these limits are bogus 1515 * SX doesn't deal with coordinates at all, so there is no limit but 1516 * we have to put something here 1517 */ 1518 pExa->maxX = 4096; 1519 pExa->maxY = 4096; 1520 1521 pExa->WaitMarker = CG14WaitMarker; 1522 1523 pExa->PrepareSolid = CG14PrepareSolid; 1524 pExa->Solid = CG14Solid; 1525 pExa->DoneSolid = CG14DoneCopy; 1526 pExa->PrepareCopy = CG14PrepareCopy; 1527 pExa->Copy = CG14Copy32; 1528 pExa->DoneCopy = CG14DoneCopy; 1529 if (p->use_xrender) { 1530 pExa->CheckComposite = CG14CheckComposite; 1531 pExa->PrepareComposite = CG14PrepareComposite; 1532 pExa->Composite = CG14Composite; 1533 pExa->DoneComposite = CG14DoneCopy; 1534 } 1535 1536 /* EXA hits more optimized paths when it does not have to fallback 1537 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1538 */ 1539 pExa->UploadToScreen = CG14UploadToScreen; 1540 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1541 1542 p->queuecount = 0; 1543 /* do some hardware init */ 1544 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1545 p->last_mask = 0xffffffff; 1546 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1547 p->last_rop = 0xcc; 1548 return exaDriverInit(pScreen, pExa); 1549} 1550