cg14_accel.c revision 76a85281
1/* $NetBSD: cg14_accel.c,v 1.24 2021/12/10 19:42:07 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45//#define SX_DEBUG 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__, 102 pSrcPixmap->drawable.bitsPerPixel, alu); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 write_sx_io(p, s, 188 SX_LD(10, num - 1, s & 7)); 189 write_sx_io(p, d, 190 SX_STM(10, num - 1, d & 7)); 191 s += xinc; 192 d += xinc; 193 count += 32; 194 } 195 srcstart += srcinc; 196 dststart += dstinc; 197 } 198 } else { 199 /* going right to left */ 200 int i, chunks = (w >> 5); 201 for (line = 0; line < h; line++) { 202 s = srcstart; 203 d = dststart; 204 count = w; 205 for (i = 0; i < chunks; i++) { 206 write_sx_io(p, s, 207 SX_LD(10, 31, s & 7)); 208 write_sx_io(p, d, 209 SX_STM(10, 31, d & 7)); 210 s -= 128; 211 d -= 128; 212 count -= 32; 213 } 214 /* leftovers, if any */ 215 if (count > 0) { 216 s += (32 - count) << 2; 217 d += (32 - count) << 2; 218 write_sx_io(p, s, 219 SX_LD(10, count - 1, s & 7)); 220 write_sx_io(p, d, 221 SX_STM(10, count - 1, d & 7)); 222 } 223 srcstart += srcinc; 224 dststart += dstinc; 225 } 226 } 227 } else { 228 /* ROPs needed */ 229 if ( xinc > 0) { 230 /* going left to right */ 231 for (line = 0; line < h; line++) { 232 count = 0; 233 s = srcstart; 234 d = dststart; 235 while ( count < w) { 236 num = min(32, w - count); 237 write_sx_io(p, s, 238 SX_LD(10, num - 1, s & 7)); 239 write_sx_io(p, d, 240 SX_LD(42, num - 1, d & 7)); 241 if (num > 16) { 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_ROP(10, 42, 74, 15)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_ROP(26, 58, 90, num - 17)); 246 } else { 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(10, 42, 74, num - 1)); 249 } 250 write_sx_io(p, d, 251 SX_STM(74, num - 1, d & 7)); 252 s += xinc; 253 d += xinc; 254 count += 32; 255 } 256 srcstart += srcinc; 257 dststart += dstinc; 258 } 259 } else { 260 /* going right to left */ 261 int i, chunks = (w >> 5); 262 for (line = 0; line < h; line++) { 263 s = srcstart; 264 d = dststart; 265 count = w; 266 for (i = 0; i < chunks; i++) { 267 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 268 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, 15)); 271 write_sx_reg(p, SX_INSTRUCTIONS, 272 SX_ROP(26, 58, 90, 15)); 273 write_sx_io(p, d, 274 SX_STM(74, 31, d & 7)); 275 s -= 128; 276 d -= 128; 277 count -= 32; 278 } 279 /* leftovers, if any */ 280 if (count > 0) { 281 s += (32 - count) << 2; 282 d += (32 - count) << 2; 283 write_sx_io(p, s, 284 SX_LD(10, count - 1, s & 7)); 285 write_sx_io(p, d, 286 SX_LD(42, count - 1, d & 7)); 287 if (count > 16) { 288 write_sx_reg(p, SX_INSTRUCTIONS, 289 SX_ROP(10, 42, 74, 15)); 290 write_sx_reg(p, SX_INSTRUCTIONS, 291 SX_ROP(26, 58, 90, count - 17)); 292 } else { 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ROP(10, 42, 74, count - 1)); 295 } 296 297 write_sx_io(p, d, 298 SX_STM(74, count - 1, d & 7)); 299 } 300 srcstart += srcinc; 301 dststart += dstinc; 302 } 303 } 304 } 305 exaMarkSync(pDstPixmap->drawable.pScreen); 306} 307 308/* 309 * copy with same alignment, left to right, no ROP 310 */ 311static void 312CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 313{ 314 int saddr, daddr, pre, cnt, wrds; 315 316 ENTER; 317 318 pre = srcstart & 3; 319 if (pre != 0) pre = 4 - pre; 320 pre = min(pre, w); 321 322 while (h > 0) { 323 saddr = srcstart; 324 daddr = dststart; 325 cnt = w; 326 if (pre > 0) { 327 write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 328 write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7)); 329 saddr += pre; 330 daddr += pre; 331 cnt -= pre; 332 if (cnt == 0) goto next; 333 } 334 while (cnt > 3) { 335 wrds = min(32, cnt >> 2); 336 write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 337 write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 338 saddr += wrds << 2; 339 daddr += wrds << 2; 340 cnt -= wrds << 2; 341 } 342 if (cnt > 0) { 343 write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 344 write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7)); 345 } 346next: 347 srcstart += srcpitch; 348 dststart += dstpitch; 349 h--; 350 } 351} 352 353/* 354 * copy with same alignment, left to right, ROP 355 */ 356static void 357CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 358{ 359 int saddr, daddr, pre, cnt, wrds; 360 361 ENTER; 362 363 pre = srcstart & 3; 364 if (pre != 0) pre = 4 - pre; 365 pre = min(pre, w); 366 367 while (h > 0) { 368 saddr = srcstart; 369 daddr = dststart; 370 cnt = w; 371 if (pre > 0) { 372 write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 373 write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7)); 374 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1)); 375 write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7)); 376 saddr += pre; 377 daddr += pre; 378 cnt -= pre; 379 if (cnt == 0) goto next; 380 } 381 while (cnt > 3) { 382 wrds = min(32, cnt >> 2); 383 write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 384 write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7)); 385 if (cnt > 16) { 386 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15)); 387 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17)); 388 } else 389 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1)); 390 write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7)); 391 saddr += wrds << 2; 392 daddr += wrds << 2; 393 cnt -= wrds << 2; 394 } 395 if (cnt > 0) { 396 write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 397 write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7)); 398 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1)); 399 write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7)); 400 } 401next: 402 srcstart += srcpitch; 403 dststart += dstpitch; 404 h--; 405 } 406} 407 408/* up to 124 pixels so direction doesn't matter, unaligned, ROP */ 409static void 410CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 411{ 412 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 413 int ssreg; 414#ifdef DEBUG 415 int taddr = 4 + dstpitch * 50; 416#endif 417 uint32_t lmask, rmask; 418 ENTER; 419 420 pre = dststart & 3; 421 lmask = 0xffffffff >> pre; 422 spre = srcstart & 3; 423 /* 424 * make sure we count all the words needed to cover the destination 425 * line, covering potential partials on both ends 426 */ 427 wrds = (w + pre + 3) >> 2; 428 swrds = (w + spre + 3) >> 2; 429 430 if (spre < pre) { 431 dist = 32 - (pre - spre) * 8; 432 sreg = 9; 433 } else { 434 dist = (spre - pre) * 8; 435 sreg = 8; 436 } 437 438 /* 439 * mask out trailing pixels to avoid partial writes 440 */ 441 post = (dststart + w) & 3; 442 if (post != 0) { 443 rmask = ~(0xffffffff >> (post * 8)); 444 write_sx_reg(p, SX_QUEUED(7), rmask); 445 write_sx_reg(p, SX_QUEUED(6), ~rmask); 446 } 447 448 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 449 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 450 451 /* mask out the leading pixels in dst by using a mask and ROP */ 452 if (pre != 0) { 453 write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); 454 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 455 } 456 457 saddr = srcstart & ~3; 458 daddr = dststart & ~3; 459 460 while (h > 0) { 461 write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); 462 write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 463 if (wrds > 15) { 464 if (dist != 0) { 465 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 466 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 467 /* shifted source pixels are now at register 40+ */ 468 ssreg = 40; 469 } else ssreg = 8; 470 if (pre != 0) { 471 /* mask out leading junk */ 472 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 473 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 474 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 475 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14)); 476 } else { 477 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15)); 478 } 479 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); 480 } else { 481 if (dist != 0) { 482 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 483 ssreg = 40; 484 } else ssreg = 8; 485 if (pre != 0) { 486 /* mask out leading junk */ 487 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 488 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 489 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 490 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds)); 491 } else { 492 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds)); 493 } 494 } 495 if (post != 0) { 496 /* 497 * if the last word to be written out is a partial we 498 * mask out the leftovers and replace them with 499 * background pixels 500 * we could pull the same ROP * mask trick as we do on 501 * the left end but it's less annoying this way and 502 * the instruction count is the same 503 */ 504 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0)); 505 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0)); 506 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0)); 507 } 508#ifdef DEBUG 509 write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 510 taddr += dstpitch; 511#endif 512 write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 513 saddr += srcpitch; 514 daddr += dstpitch; 515 h--; 516 } 517} 518 519/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ 520static void 521CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 522{ 523 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 524 int ssreg; 525#ifdef DEBUG 526 int taddr = 4 + dstpitch * 50; 527#endif 528 uint32_t lmask, rmask; 529 ENTER; 530 531 pre = dststart & 3; 532 lmask = 0xffffffff >> pre; 533 spre = srcstart & 3; 534 /* 535 * make sure we count all the words needed to cover the destination 536 * line, covering potential partials on both ends 537 */ 538 wrds = (w + pre + 3) >> 2; 539 swrds = (w + spre + 3) >> 2; 540 541 if (spre < pre) { 542 dist = 32 - (pre - spre) * 8; 543 sreg = 9; 544 } else { 545 dist = (spre - pre) * 8; 546 sreg = 8; 547 } 548 549 /* 550 * mask out trailing pixels to avoid partial writes 551 */ 552 post = (dststart + w) & 3; 553 if (post != 0) { 554 rmask = ~(0xffffffff >> (post * 8)); 555 write_sx_reg(p, SX_QUEUED(7), rmask); 556 write_sx_reg(p, SX_QUEUED(6), ~rmask); 557 } 558 559 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 560 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 561 562 /* mask out the leading pixels in dst by using a mask and ROP */ 563 if (pre != 0) { 564 write_sx_reg(p, SX_ROP_CONTROL, 0xca); 565 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 566 } 567 568 saddr = srcstart & ~3; 569 daddr = dststart & ~3; 570 571 while (h > 0) { 572 write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 573 if (wrds > 15) { 574 if (dist != 0) { 575 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 576 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 577 /* shifted source pixels are now at register 40+ */ 578 ssreg = 40; 579 } else ssreg = 8; 580 if (pre != 0) { 581 /* read only the first word */ 582 write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 583 /* mask out leading junk */ 584 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 585 } 586 } else { 587 if (dist != 0) { 588 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 589 ssreg = 40; 590 } else ssreg = 8; 591 if (pre != 0) { 592 /* read only the first word */ 593 write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 594 /* mask out leading junk */ 595 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 596 } 597 } 598 if (post != 0) { 599 int laddr = daddr + ((wrds - 1) << 2); 600 /* 601 * if the last word to be written out is a partial we 602 * mask out the leftovers and replace them with 603 * background pixels 604 * we could pull the same ROP * mask trick as we do on 605 * the left end but it's less annoying this way and 606 * the instruction count is the same 607 */ 608 write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7)); 609 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); 610 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0)); 611 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0)); 612 } 613#ifdef DEBUG 614 write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 615 taddr += dstpitch; 616#endif 617 write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7)); 618 saddr += srcpitch; 619 daddr += dstpitch; 620 h--; 621 } 622} 623 624static void 625CG14Copy8(PixmapPtr pDstPixmap, 626 int srcX, int srcY, int dstX, int dstY, int w, int h) 627{ 628 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 629 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 630 int dstpitch, dstoff, srcpitch, srcoff; 631 int srcstart, dststart, xinc, srcinc, dstinc; 632 int line, count, s, d, num; 633 634 ENTER; 635 dstpitch = exaGetPixmapPitch(pDstPixmap); 636 dstoff = exaGetPixmapOffset(pDstPixmap); 637 srcpitch = p->srcpitch; 638 srcoff = p->srcoff; 639 /* 640 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 641 * actually wrote anything and only sync if it did 642 */ 643 srcstart = srcX + (srcpitch * srcY) + srcoff; 644 dststart = dstX + (dstpitch * dstY) + dstoff; 645 646 if (p->ydir < 0) { 647 srcstart += (h - 1) * srcpitch; 648 dststart += (h - 1) * dstpitch; 649 srcinc = -srcpitch; 650 dstinc = -dstpitch; 651 } else { 652 srcinc = srcpitch; 653 dstinc = dstpitch; 654 } 655 656 /* 657 * this copies up to 124 pixels wide in one go, so horizontal 658 * direction / overlap don't matter 659 * uses all 32bit accesses and funnel shifter for unaligned copies 660 */ 661 if ((w < 125) && (w > 8)) { 662 switch (p->last_rop) { 663 case 0xcc: 664 CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 665 break; 666 default: 667 CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 668 } 669 return; 670 } 671 672 /* 673 * only invert x direction if absolutely necessary, it's a pain to 674 * go backwards on SX so avoid as much as possible 675 */ 676 if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) { 677 srcstart += (w - 32); 678 dststart += (w - 32); 679 xinc = -32; 680 } else 681 xinc = 32; 682 683 /* 684 * for aligned copies we can go all 32bit and avoid VRAM reads in the 685 * most common case 686 */ 687 if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { 688 switch (p->last_rop) { 689 case 0xcc: 690 CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 691 break; 692 default: 693 CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 694 } 695 return; 696 } 697 698 if (p->last_rop == 0xcc) { 699 /* plain old copy */ 700 if ( xinc > 0) { 701 /* going left to right */ 702 for (line = 0; line < h; line++) { 703 count = 0; 704 s = srcstart; 705 d = dststart; 706 while ( count < w) { 707 num = min(32, w - count); 708 write_sx_io(p, s, 709 SX_LDB(10, num - 1, s & 7)); 710 write_sx_io(p, d, 711 SX_STBM(10, num - 1, d & 7)); 712 s += xinc; 713 d += xinc; 714 count += 32; 715 } 716 srcstart += srcinc; 717 dststart += dstinc; 718 } 719 } else { 720 /* going right to left */ 721 int i, chunks = (w >> 5); 722 for (line = 0; line < h; line++) { 723 s = srcstart; 724 d = dststart; 725 count = w; 726 for (i = 0; i < chunks; i++) { 727 write_sx_io(p, s, 728 SX_LDB(10, 31, s & 7)); 729 write_sx_io(p, d, 730 SX_STBM(10, 31, d & 7)); 731 s -= 32; 732 d -= 32; 733 count -= 32; 734 } 735 /* leftovers, if any */ 736 if (count > 0) { 737 s += (32 - count); 738 d += (32 - count); 739 write_sx_io(p, s, 740 SX_LDB(10, count - 1, s & 7)); 741 write_sx_io(p, d, 742 SX_STBM(10, count - 1, d & 7)); 743 } 744 srcstart += srcinc; 745 dststart += dstinc; 746 } 747 } 748 } else { 749 /* ROPs needed */ 750 if ( xinc > 0) { 751 /* going left to right */ 752 for (line = 0; line < h; line++) { 753 count = 0; 754 s = srcstart; 755 d = dststart; 756 while ( count < w) { 757 num = min(32, w - count); 758 write_sx_io(p, s, 759 SX_LDB(10, num - 1, s & 7)); 760 write_sx_io(p, d, 761 SX_LDB(42, num - 1, d & 7)); 762 if (num > 16) { 763 write_sx_reg(p, SX_INSTRUCTIONS, 764 SX_ROP(10, 42, 74, 15)); 765 write_sx_reg(p, SX_INSTRUCTIONS, 766 SX_ROP(26, 58, 90, num - 17)); 767 } else { 768 write_sx_reg(p, SX_INSTRUCTIONS, 769 SX_ROP(10, 42, 74, num - 1)); 770 } 771 write_sx_io(p, d, 772 SX_STBM(74, num - 1, d & 7)); 773 s += xinc; 774 d += xinc; 775 count += 32; 776 } 777 srcstart += srcinc; 778 dststart += dstinc; 779 } 780 } else { 781 /* going right to left */ 782 int i, chunks = (w >> 5); 783 for (line = 0; line < h; line++) { 784 s = srcstart; 785 d = dststart; 786 count = w; 787 for (i = 0; i < chunks; i++) { 788 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 789 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 790 write_sx_reg(p, SX_INSTRUCTIONS, 791 SX_ROP(10, 42, 74, 15)); 792 write_sx_reg(p, SX_INSTRUCTIONS, 793 SX_ROP(26, 58, 90, 15)); 794 write_sx_io(p, d, 795 SX_STBM(74, 31, d & 7)); 796 s -= 128; 797 d -= 128; 798 count -= 32; 799 } 800 /* leftovers, if any */ 801 if (count > 0) { 802 s += (32 - count); 803 d += (32 - count); 804 write_sx_io(p, s, 805 SX_LDB(10, count - 1, s & 7)); 806 write_sx_io(p, d, 807 SX_LDB(42, count - 1, d & 7)); 808 if (count > 16) { 809 write_sx_reg(p, SX_INSTRUCTIONS, 810 SX_ROP(10, 42, 74, 15)); 811 write_sx_reg(p, SX_INSTRUCTIONS, 812 SX_ROP(26, 58, 90, count - 17)); 813 } else { 814 write_sx_reg(p, SX_INSTRUCTIONS, 815 SX_ROP(10, 42, 74, count - 1)); 816 } 817 818 write_sx_io(p, d, 819 SX_STBM(74, count - 1, d & 7)); 820 } 821 srcstart += srcinc; 822 dststart += dstinc; 823 } 824 } 825 } 826 exaMarkSync(pDstPixmap->drawable.pScreen); 827} 828 829static void 830CG14DoneCopy(PixmapPtr pDstPixmap) 831{ 832} 833 834static Bool 835CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 836{ 837 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 838 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 839 840 ENTER; 841 DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n", 842 pPixmap->drawable.bitsPerPixel, fg); 843 844 /* 845 * GXset and GXclear are really just specual cases of GXcopy with 846 * fixed fill colour 847 */ 848 switch (alu) { 849 case GXclear: 850 alu = GXcopy; 851 fg = 0; 852 break; 853 case GXset: 854 alu = GXcopy; 855 fg = 0xffffffff; 856 break; 857 } 858 /* repeat the colour in every sub byte if we're in 8 bit */ 859 if (pPixmap->drawable.bitsPerPixel == 8) { 860 fg |= fg << 8; 861 fg |= fg << 16; 862 } 863 write_sx_reg(p, SX_QUEUED(8), fg); 864 write_sx_reg(p, SX_QUEUED(9), fg); 865 if (planemask != p->last_mask) { 866 CG14Wait(p); 867 write_sx_reg(p, SX_PLANEMASK, planemask); 868 p->last_mask = planemask; 869 } 870 alu = sx_rop[alu]; 871 if (alu != p->last_rop) { 872 CG14Wait(p); 873 write_sx_reg(p, SX_ROP_CONTROL, alu); 874 p->last_rop = alu; 875 } 876 877 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 878 return TRUE; 879} 880 881static void 882CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 883{ 884 int line, x, num; 885 uint32_t ptr; 886 887 ENTER; 888 if (p->last_rop == 0xcc) { 889 /* simple fill */ 890 for (line = 0; line < h; line++) { 891 x = 0; 892 while (x < w) { 893 ptr = start + (x << 2); 894 num = min(32, w - x); 895 write_sx_io(p, ptr, 896 SX_STS(8, num - 1, ptr & 7)); 897 x += 32; 898 } 899 start += pitch; 900 } 901 } else if (p->last_rop == 0xaa) { 902 /* nothing to do here */ 903 return; 904 } else { 905 /* alright, let's do actual ROP stuff */ 906 907 /* first repeat the fill colour into 16 registers */ 908 write_sx_reg(p, SX_INSTRUCTIONS, 909 SX_SELECT_S(8, 8, 10, 15)); 910 911 for (line = 0; line < h; line++) { 912 x = 0; 913 while (x < w) { 914 ptr = start + (x << 2); 915 num = min(32, w - x); 916 /* now suck fb data into registers */ 917 write_sx_io(p, ptr, 918 SX_LD(42, num - 1, ptr & 7)); 919 /* 920 * ROP them with the fill data we left in 10 921 * non-memory ops can only have counts up to 16 922 */ 923 if (num <= 16) { 924 write_sx_reg(p, SX_INSTRUCTIONS, 925 SX_ROP(10, 42, 74, num - 1)); 926 } else { 927 write_sx_reg(p, SX_INSTRUCTIONS, 928 SX_ROP(10, 42, 74, 15)); 929 write_sx_reg(p, SX_INSTRUCTIONS, 930 SX_ROP(10, 58, 90, num - 17)); 931 } 932 /* and write the result back into memory */ 933 write_sx_io(p, ptr, 934 SX_ST(74, num - 1, ptr & 7)); 935 x += 32; 936 } 937 start += pitch; 938 } 939 } 940} 941 942static void 943CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 944{ 945 int line, num, pre, cnt; 946 uint32_t ptr; 947 948 ENTER; 949 pre = start & 3; 950 if (pre != 0) pre = 4 - pre; 951 952 if (p->last_rop == 0xcc) { 953 /* simple fill */ 954 for (line = 0; line < h; line++) { 955 ptr = start; 956 cnt = w; 957 pre = min(pre, cnt); 958 if (pre) { 959 write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); 960 ptr += pre; 961 cnt -= pre; 962 if (cnt == 0) goto next; 963 } 964 /* now do the aligned pixels in 32bit chunks */ 965 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 966 while(cnt > 3) { 967 num = min(32, cnt >> 2); 968 write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); 969 ptr += num << 2; 970 cnt -= num << 2; 971 } 972 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 973 if (cnt > 0) { 974 write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); 975 } 976 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 977next: 978 start += pitch; 979 } 980 } else if (p->last_rop == 0xaa) { 981 /* nothing to do here */ 982 return; 983 } else { 984 /* alright, let's do actual ROP stuff */ 985 986 /* first repeat the fill colour into 16 registers */ 987 write_sx_reg(p, SX_INSTRUCTIONS, 988 SX_SELECT_S(8, 8, 10, 15)); 989 990 for (line = 0; line < h; line++) { 991 ptr = start; 992 cnt = w; 993 pre = min(pre, cnt); 994 if (pre) { 995 write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7)); 996 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1)); 997 write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7)); 998 ptr += pre; 999 cnt -= pre; 1000 if (cnt == 0) goto next2; 1001 } 1002 /* now do the aligned pixels in 32bit chunks */ 1003 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1004 while(cnt > 3) { 1005 num = min(32, cnt >> 2); 1006 write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7)); 1007 if (num <= 16) { 1008 write_sx_reg(p, SX_INSTRUCTIONS, 1009 SX_ROP(10, 26, 58, num - 1)); 1010 } else { 1011 write_sx_reg(p, SX_INSTRUCTIONS, 1012 SX_ROP(10, 26, 58, 15)); 1013 write_sx_reg(p, SX_INSTRUCTIONS, 1014 SX_ROP(10, 42, 74, num - 17)); 1015 } 1016 write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7)); 1017 ptr += num << 2; 1018 cnt -= num << 2; 1019 } 1020 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1021 if (cnt > 0) { 1022 write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7)); 1023 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1)); 1024 write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7)); 1025 } 1026 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1027next2: 1028 start += pitch; 1029 } 1030 } 1031} 1032 1033static void 1034CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 1035{ 1036 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1037 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1038 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 1039 int start, depth; 1040 1041 ENTER; 1042 dstpitch = exaGetPixmapPitch(pPixmap); 1043 dstoff = exaGetPixmapOffset(pPixmap); 1044 1045 depth = pPixmap->drawable.bitsPerPixel; 1046 switch (depth) { 1047 case 32: 1048 start = dstoff + (y1 * dstpitch) + (x1 << 2); 1049 CG14Solid32(p, start, dstpitch, w, h); 1050 break; 1051 case 8: 1052 start = dstoff + (y1 * dstpitch) + x1; 1053 CG14Solid8(p, start, dstpitch, w, h); 1054 break; 1055 } 1056 1057 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 1058 dstpitch, dstoff, start); 1059 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 1060 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 1061 exaMarkSync(pPixmap->drawable.pScreen); 1062} 1063 1064/* 1065 * Memcpy-based UTS. 1066 */ 1067static Bool 1068CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1069 char *src, int src_pitch) 1070{ 1071 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1072 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1073 char *dst = p->fb + exaGetPixmapOffset(pDst); 1074 int dst_pitch = exaGetPixmapPitch(pDst); 1075 1076 int bpp = pDst->drawable.bitsPerPixel; 1077 int cpp = (bpp + 7) >> 3; 1078 int wBytes = w * cpp; 1079 1080 ENTER; 1081 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 1082 dst += (x * cpp) + (y * dst_pitch); 1083 1084 CG14Wait(p); 1085 1086 while (h--) { 1087 memcpy(dst, src, wBytes); 1088 src += src_pitch; 1089 dst += dst_pitch; 1090 } 1091 __asm("stbar;"); 1092 return TRUE; 1093} 1094 1095/* 1096 * Memcpy-based DFS. 1097 */ 1098static Bool 1099CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1100 char *dst, int dst_pitch) 1101{ 1102 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1103 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1104 char *src = p->fb + exaGetPixmapOffset(pSrc); 1105 int src_pitch = exaGetPixmapPitch(pSrc); 1106 1107 ENTER; 1108 int bpp = pSrc->drawable.bitsPerPixel; 1109 int cpp = (bpp + 7) >> 3; 1110 int wBytes = w * cpp; 1111 1112 src += (x * cpp) + (y * src_pitch); 1113 1114 CG14Wait(p); 1115 1116 while (h--) { 1117 memcpy(dst, src, wBytes); 1118 src += src_pitch; 1119 dst += dst_pitch; 1120 } 1121 1122 return TRUE; 1123} 1124 1125Bool 1126CG14CheckComposite(int op, PicturePtr pSrcPicture, 1127 PicturePtr pMaskPicture, 1128 PicturePtr pDstPicture) 1129{ 1130 int i, ok = FALSE; 1131 1132 ENTER; 1133 1134 /* 1135 * SX is in theory capable of accelerating pretty much all Xrender ops, 1136 * even coordinate transformation and gradients. Support will be added 1137 * over time and likely have to spill over into its own source file. 1138 */ 1139 1140 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 1141 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 1142 return FALSE; 1143 } 1144 1145 if (pSrcPicture != NULL) { 1146 i = 0; 1147 while ((i < arraysize(src_formats)) && (!ok)) { 1148 ok = (pSrcPicture->format == src_formats[i]); 1149 i++; 1150 } 1151 1152 if (!ok) { 1153 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 1154 __func__, pSrcPicture->format); 1155 return FALSE; 1156 } 1157 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 1158 } 1159 1160 if (pDstPicture != NULL) { 1161 i = 0; 1162 ok = FALSE; 1163 while ((i < arraysize(src_formats)) && (!ok)) { 1164 ok = (pDstPicture->format == src_formats[i]); 1165 i++; 1166 } 1167 1168 if (!ok) { 1169 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 1170 __func__, pDstPicture->format); 1171 return FALSE; 1172 } 1173 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 1174 } 1175 1176 if (pMaskPicture != NULL) { 1177 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 1178 pMaskPicture->pDrawable->width, 1179 pMaskPicture->pDrawable->height); 1180 } 1181 return TRUE; 1182} 1183 1184Bool 1185CG14PrepareComposite(int op, PicturePtr pSrcPicture, 1186 PicturePtr pMaskPicture, 1187 PicturePtr pDstPicture, 1188 PixmapPtr pSrc, 1189 PixmapPtr pMask, 1190 PixmapPtr pDst) 1191{ 1192 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1193 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1194 1195 ENTER; 1196 1197 p->no_source_pixmap = FALSE; 1198 p->source_is_solid = FALSE; 1199 1200 if (pSrcPicture->format == PICT_a1) { 1201 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 1202 pDstPicture->format, op); 1203 if (pMaskPicture != NULL) { 1204 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 1205 } 1206 } 1207 if (pSrcPicture->pSourcePict != NULL) { 1208 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 1209 p->fillcolour = 1210 pSrcPicture->pSourcePict->solidFill.color; 1211 DPRINTF(X_ERROR, "%s: solid src %08x\n", 1212 __func__, p->fillcolour); 1213 p->no_source_pixmap = TRUE; 1214 p->source_is_solid = TRUE; 1215 } 1216 } 1217 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 1218 if (pMaskPicture->pSourcePict->type == 1219 SourcePictTypeSolidFill) { 1220 p->fillcolour = 1221 pMaskPicture->pSourcePict->solidFill.color; 1222 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 1223 __func__, p->fillcolour); 1224 } 1225 } 1226 if (pMaskPicture != NULL) { 1227 p->mskoff = exaGetPixmapOffset(pMask); 1228 p->mskpitch = exaGetPixmapPitch(pMask); 1229 p->mskformat = pMaskPicture->format; 1230 } else { 1231 p->mskoff = 0; 1232 p->mskpitch = 0; 1233 p->mskformat = 0; 1234 } 1235 if (pSrc != NULL) { 1236 p->source_is_solid = 1237 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 1238 p->srcoff = exaGetPixmapOffset(pSrc); 1239 p->srcpitch = exaGetPixmapPitch(pSrc); 1240 if (p->source_is_solid) { 1241 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 1242 } 1243 } 1244 p->srcformat = pSrcPicture->format; 1245 p->dstformat = pDstPicture->format; 1246 1247 if (p->source_is_solid) { 1248 uint32_t temp; 1249 1250 /* stuff source colour into SX registers, swap as needed */ 1251 temp = p->fillcolour; 1252 switch (p->srcformat) { 1253 case PICT_a8r8g8b8: 1254 case PICT_x8r8g8b8: 1255 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1256 temp = temp >> 8; 1257 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1258 temp = temp >> 8; 1259 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1260 break; 1261 case PICT_a8b8g8r8: 1262 case PICT_x8b8g8r8: 1263 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1264 temp = temp >> 8; 1265 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1266 temp = temp >> 8; 1267 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1268 break; 1269 } 1270 write_sx_reg(p, SX_QUEUED(8), 0xff); 1271 } 1272 p->op = op; 1273 if (op == PictOpSrc) { 1274 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 1275 } 1276#ifdef SX_DEBUG 1277 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 1278 *(uint32_t *)(p->fb + p->srcoff)); 1279#endif 1280 return TRUE; 1281} 1282 1283void 1284CG14Composite(PixmapPtr pDst, int srcX, int srcY, 1285 int maskX, int maskY, 1286 int dstX, int dstY, 1287 int width, int height) 1288{ 1289 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1290 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1291 uint32_t dstoff, dstpitch; 1292 uint32_t dst, msk, src; 1293 int flip = 0; 1294 1295 ENTER; 1296 dstoff = exaGetPixmapOffset(pDst); 1297 dstpitch = exaGetPixmapPitch(pDst); 1298 1299 flip = (PICT_FORMAT_TYPE(p->srcformat) != 1300 PICT_FORMAT_TYPE(p->dstformat)); 1301 1302 switch (p->op) { 1303 case PictOpOver: 1304 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 1305 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 1306 p->mskformat, p->dstformat, srcX, srcY); 1307 if (p->source_is_solid) { 1308 switch (p->mskformat) { 1309 case PICT_a8: 1310 msk = p->mskoff + 1311 (maskY * p->mskpitch) + 1312 maskX; 1313 CG14Comp_Over8Solid(p, 1314 msk, p->mskpitch, 1315 dst, dstpitch, 1316 width, height); 1317 break; 1318 case PICT_a8r8g8b8: 1319 case PICT_a8b8g8r8: 1320 msk = p->mskoff + 1321 (maskY * p->mskpitch) + 1322 (maskX << 2); 1323 CG14Comp_Over32Solid(p, 1324 msk, p->mskpitch, 1325 dst, dstpitch, 1326 width, height); 1327 break; 1328 default: 1329 xf86Msg(X_ERROR, 1330 "unsupported mask format %08x\n", p->mskformat); 1331 } 1332 } else { 1333 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 1334 p->mskformat); 1335 switch (p->srcformat) { 1336 case PICT_a8r8g8b8: 1337 case PICT_a8b8g8r8: 1338 src = p->srcoff + 1339 (srcY * p->srcpitch) + 1340 (srcX << 2); 1341 dst = dstoff + 1342 (dstY * dstpitch) + 1343 (dstX << 2); 1344 if (p->mskformat == PICT_a8) { 1345 msk = p->mskoff + 1346 (maskY * p->mskpitch) + 1347 maskX; 1348 CG14Comp_Over32Mask(p, 1349 src, p->srcpitch, 1350 msk, p->mskpitch, 1351 dst, dstpitch, 1352 width, height, flip); 1353 } else { 1354 CG14Comp_Over32(p, 1355 src, p->srcpitch, 1356 dst, dstpitch, 1357 width, height, flip); 1358 } 1359 break; 1360 case PICT_x8r8g8b8: 1361 case PICT_x8b8g8r8: 1362 src = p->srcoff + 1363 (srcY * p->srcpitch) + 1364 (srcX << 2); 1365 dst = dstoff + 1366 (dstY * dstpitch) + 1367 (dstX << 2); 1368 if (p->mskformat == PICT_a8) { 1369 msk = p->mskoff + 1370 (maskY * p->mskpitch) + 1371 maskX; 1372 CG14Comp_Over32Mask_noalpha(p, 1373 src, p->srcpitch, 1374 msk, p->mskpitch, 1375 dst, dstpitch, 1376 width, height, flip); 1377 } else if ((p->mskformat == PICT_a8r8g8b8) || 1378 (p->mskformat == PICT_a8b8g8r8)) { 1379 msk = p->mskoff + 1380 (maskY * p->mskpitch) + 1381 (maskX << 2); 1382 CG14Comp_Over32Mask32_noalpha(p, 1383 src, p->srcpitch, 1384 msk, p->mskpitch, 1385 dst, dstpitch, 1386 width, height, flip); 1387 } else { 1388 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1389 } 1390 break; 1391 default: 1392 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1393 __func__, p->srcformat); 1394 } 1395 } 1396 break; 1397 case PictOpAdd: 1398 DPRINTF(X_ERROR, "Add %08x %08x\n", 1399 p->srcformat, p->dstformat); 1400 switch (p->srcformat) { 1401 case PICT_a8: 1402 src = p->srcoff + 1403 (srcY * p->srcpitch) + srcX; 1404 if (p->dstformat == PICT_a8) { 1405 dst = dstoff + 1406 (dstY * dstpitch) + dstX; 1407 CG14Comp_Add8(p, 1408 src, p->srcpitch, 1409 dst, dstpitch, 1410 width, height); 1411 } else { 1412 dst = dstoff + 1413 (dstY * dstpitch) + 1414 (dstX << 2); 1415 CG14Comp_Add8_32(p, 1416 src, p->srcpitch, 1417 dst, dstpitch, 1418 width, height); 1419 } 1420 break; 1421 case PICT_a8r8g8b8: 1422 case PICT_x8r8g8b8: 1423 src = p->srcoff + 1424 (srcY * p->srcpitch) + (srcX << 2); 1425 dst = dstoff + (dstY * dstpitch) + 1426 (dstX << 2); 1427 CG14Comp_Add32(p, src, p->srcpitch, 1428 dst, dstpitch, width, height); 1429 break; 1430 default: 1431 xf86Msg(X_ERROR, 1432 "unsupported src format\n"); 1433 } 1434 break; 1435 case PictOpSrc: 1436 DPRINTF(X_ERROR, "Src %08x %08x\n", 1437 p->srcformat, p->dstformat); 1438 if (p->mskformat != 0) 1439 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1440 if (p->srcformat == PICT_a8) { 1441 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1442 } else { 1443 /* convert between RGB and BGR? */ 1444 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1445 } 1446 break; 1447 default: 1448 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1449 } 1450 exaMarkSync(pDst->drawable.pScreen); 1451} 1452 1453 1454 1455Bool 1456CG14InitAccel(ScreenPtr pScreen) 1457{ 1458 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1459 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1460 ExaDriverPtr pExa; 1461 1462 pExa = exaDriverAlloc(); 1463 if (!pExa) 1464 return FALSE; 1465 1466 p->pExa = pExa; 1467 1468 pExa->exa_major = EXA_VERSION_MAJOR; 1469 pExa->exa_minor = EXA_VERSION_MINOR; 1470 1471 pExa->memoryBase = p->fb; 1472 pExa->memorySize = p->memsize; 1473 pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 1474 1475 /* 1476 * SX memory instructions are written to 64bit aligned addresses with 1477 * a 3 bit displacement. Make sure the displacement remains constant 1478 * within one column 1479 */ 1480 1481 pExa->pixmapOffsetAlign = 8; 1482 pExa->pixmapPitchAlign = 8; 1483 1484 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1485 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1486 /*| EXA_MIXED_PIXMAPS*/; 1487 1488 /* 1489 * these limits are bogus 1490 * SX doesn't deal with coordinates at all, so there is no limit but 1491 * we have to put something here 1492 */ 1493 pExa->maxX = 4096; 1494 pExa->maxY = 4096; 1495 1496 pExa->WaitMarker = CG14WaitMarker; 1497 1498 pExa->PrepareSolid = CG14PrepareSolid; 1499 pExa->Solid = CG14Solid; 1500 pExa->DoneSolid = CG14DoneCopy; 1501 pExa->PrepareCopy = CG14PrepareCopy; 1502 pExa->Copy = CG14Copy32; 1503 pExa->DoneCopy = CG14DoneCopy; 1504 if (p->use_xrender) { 1505 pExa->CheckComposite = CG14CheckComposite; 1506 pExa->PrepareComposite = CG14PrepareComposite; 1507 pExa->Composite = CG14Composite; 1508 pExa->DoneComposite = CG14DoneCopy; 1509 } 1510 1511 /* EXA hits more optimized paths when it does not have to fallback 1512 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1513 */ 1514 pExa->UploadToScreen = CG14UploadToScreen; 1515 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1516 1517 p->queuecount = 0; 1518 /* do some hardware init */ 1519 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1520 p->last_mask = 0xffffffff; 1521 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1522 p->last_rop = 0xcc; 1523 return exaDriverInit(pScreen, pExa); 1524} 1525