cg14_accel.c revision c1537409
1/* $NetBSD: cg14_accel.c,v 1.26 2021/12/19 04:50:27 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45//#define SX_DEBUG 46 47#ifdef SX_DEBUG 48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 49#define DPRINTF xf86Msg 50#else 51#define ENTER 52#define DPRINTF while (0) xf86Msg 53#endif 54 55#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 56 57/* 0xcc is SX's GXcopy equivalent */ 58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 59 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 60 61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 62 PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 64 65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67 68static inline void 69CG14Wait(Cg14Ptr p) 70{ 71 int bail = 10000000; 72 /* we wait for the busy bit to clear */ 73 while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74 (bail > 0)) { 75 bail--; 76 }; 77 if (bail == 0) { 78 xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79 read_sx_reg(p, SX_CONTROL_STATUS), 80 read_sx_reg(p, SX_ERROR)); 81 } 82} 83 84static void 85CG14WaitMarker(ScreenPtr pScreen, int Marker) 86{ 87 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 88 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 89 90 CG14Wait(p); 91} 92 93static Bool 94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 95 int xdir, int ydir, int alu, Pixel planemask) 96{ 97 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 98 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 99 100 ENTER; 101 DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__, 102 pSrcPixmap->drawable.bitsPerPixel, alu); 103 104 if (planemask != p->last_mask) { 105 CG14Wait(p); 106 write_sx_reg(p, SX_PLANEMASK, planemask); 107 p->last_mask = planemask; 108 } 109 alu = sx_rop[alu]; 110 if (alu != p->last_rop) { 111 CG14Wait(p); 112 write_sx_reg(p, SX_ROP_CONTROL, alu); 113 p->last_rop = alu; 114 } 115 switch (pSrcPixmap->drawable.bitsPerPixel) { 116 case 8: 117 p->pExa->Copy = CG14Copy8; 118 break; 119 case 32: 120 p->pExa->Copy = CG14Copy32; 121 break; 122 default: 123 xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124 pSrcPixmap->drawable.bitsPerPixel); 125 } 126 p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 127 p->srcoff = exaGetPixmapOffset(pSrcPixmap); 128 p->xdir = xdir; 129 p->ydir = ydir; 130 return TRUE; 131} 132 133static void 134CG14Copy32(PixmapPtr pDstPixmap, 135 int srcX, int srcY, int dstX, int dstY, int w, int h) 136{ 137 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 138 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 139 int dstpitch, dstoff, srcpitch, srcoff; 140 int srcstart, dststart, xinc, srcinc, dstinc; 141 int line, count, s, d, num; 142 143 ENTER; 144 dstpitch = exaGetPixmapPitch(pDstPixmap); 145 dstoff = exaGetPixmapOffset(pDstPixmap); 146 srcpitch = p->srcpitch; 147 srcoff = p->srcoff; 148 /* 149 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 150 * actually wrote anything and only sync if it did 151 */ 152 srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 153 dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 154 155 /* 156 * we always copy up to 32 pixels at a time so direction doesn't 157 * matter if w<=32 158 */ 159 if (w > 32) { 160 if (p->xdir < 0) { 161 srcstart += (w - 32) << 2; 162 dststart += (w - 32) << 2; 163 xinc = -128; 164 } else 165 xinc = 128; 166 } else 167 xinc = 128; 168 if (p->ydir < 0) { 169 srcstart += (h - 1) * srcpitch; 170 dststart += (h - 1) * dstpitch; 171 srcinc = -srcpitch; 172 dstinc = -dstpitch; 173 } else { 174 srcinc = srcpitch; 175 dstinc = dstpitch; 176 } 177 if (p->last_rop == 0xcc) { 178 /* plain old copy */ 179 if ( xinc > 0) { 180 /* going left to right */ 181 for (line = 0; line < h; line++) { 182 count = 0; 183 s = srcstart; 184 d = dststart; 185 while ( count < w) { 186 num = min(32, w - count); 187 write_sx_io(p, s, 188 SX_LD(10, num - 1, s & 7)); 189 write_sx_io(p, d, 190 SX_STM(10, num - 1, d & 7)); 191 s += xinc; 192 d += xinc; 193 count += 32; 194 } 195 srcstart += srcinc; 196 dststart += dstinc; 197 } 198 } else { 199 /* going right to left */ 200 int i, chunks = (w >> 5); 201 for (line = 0; line < h; line++) { 202 s = srcstart; 203 d = dststart; 204 count = w; 205 for (i = 0; i < chunks; i++) { 206 write_sx_io(p, s, 207 SX_LD(10, 31, s & 7)); 208 write_sx_io(p, d, 209 SX_STM(10, 31, d & 7)); 210 s -= 128; 211 d -= 128; 212 count -= 32; 213 } 214 /* leftovers, if any */ 215 if (count > 0) { 216 s += (32 - count) << 2; 217 d += (32 - count) << 2; 218 write_sx_io(p, s, 219 SX_LD(10, count - 1, s & 7)); 220 write_sx_io(p, d, 221 SX_STM(10, count - 1, d & 7)); 222 } 223 srcstart += srcinc; 224 dststart += dstinc; 225 } 226 } 227 } else { 228 /* ROPs needed */ 229 if ( xinc > 0) { 230 /* going left to right */ 231 for (line = 0; line < h; line++) { 232 count = 0; 233 s = srcstart; 234 d = dststart; 235 while ( count < w) { 236 num = min(32, w - count); 237 write_sx_io(p, s, 238 SX_LD(10, num - 1, s & 7)); 239 write_sx_io(p, d, 240 SX_LD(42, num - 1, d & 7)); 241 if (num > 16) { 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_ROP(10, 42, 74, 15)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_ROP(26, 58, 90, num - 17)); 246 } else { 247 write_sx_reg(p, SX_INSTRUCTIONS, 248 SX_ROP(10, 42, 74, num - 1)); 249 } 250 write_sx_io(p, d, 251 SX_STM(74, num - 1, d & 7)); 252 s += xinc; 253 d += xinc; 254 count += 32; 255 } 256 srcstart += srcinc; 257 dststart += dstinc; 258 } 259 } else { 260 /* going right to left */ 261 int i, chunks = (w >> 5); 262 for (line = 0; line < h; line++) { 263 s = srcstart; 264 d = dststart; 265 count = w; 266 for (i = 0; i < chunks; i++) { 267 write_sx_io(p, s, SX_LD(10, 31, s & 7)); 268 write_sx_io(p, d, SX_LD(42, 31, d & 7)); 269 write_sx_reg(p, SX_INSTRUCTIONS, 270 SX_ROP(10, 42, 74, 15)); 271 write_sx_reg(p, SX_INSTRUCTIONS, 272 SX_ROP(26, 58, 90, 15)); 273 write_sx_io(p, d, 274 SX_STM(74, 31, d & 7)); 275 s -= 128; 276 d -= 128; 277 count -= 32; 278 } 279 /* leftovers, if any */ 280 if (count > 0) { 281 s += (32 - count) << 2; 282 d += (32 - count) << 2; 283 write_sx_io(p, s, 284 SX_LD(10, count - 1, s & 7)); 285 write_sx_io(p, d, 286 SX_LD(42, count - 1, d & 7)); 287 if (count > 16) { 288 write_sx_reg(p, SX_INSTRUCTIONS, 289 SX_ROP(10, 42, 74, 15)); 290 write_sx_reg(p, SX_INSTRUCTIONS, 291 SX_ROP(26, 58, 90, count - 17)); 292 } else { 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ROP(10, 42, 74, count - 1)); 295 } 296 297 write_sx_io(p, d, 298 SX_STM(74, count - 1, d & 7)); 299 } 300 srcstart += srcinc; 301 dststart += dstinc; 302 } 303 } 304 } 305 exaMarkSync(pDstPixmap->drawable.pScreen); 306} 307 308/* 309 * copy with same alignment, left to right, no ROP 310 */ 311static void 312CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 313{ 314 int saddr, daddr, pre, cnt, wrds; 315 316 ENTER; 317 318 pre = srcstart & 3; 319 if (pre != 0) pre = 4 - pre; 320 pre = min(pre, w); 321 322 while (h > 0) { 323 saddr = srcstart; 324 daddr = dststart; 325 cnt = w; 326 if (pre > 0) { 327 write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 328 write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7)); 329 saddr += pre; 330 daddr += pre; 331 cnt -= pre; 332 if (cnt == 0) goto next; 333 } 334 while (cnt > 3) { 335 wrds = min(32, cnt >> 2); 336 write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 337 write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 338 saddr += wrds << 2; 339 daddr += wrds << 2; 340 cnt -= wrds << 2; 341 } 342 if (cnt > 0) { 343 write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 344 write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7)); 345 } 346next: 347 srcstart += srcpitch; 348 dststart += dstpitch; 349 h--; 350 } 351} 352 353/* 354 * copy with same alignment, left to right, ROP 355 */ 356static void 357CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 358{ 359 int saddr, daddr, pre, cnt, wrds; 360 361 ENTER; 362 363 pre = srcstart & 3; 364 if (pre != 0) pre = 4 - pre; 365 pre = min(pre, w); 366 367 while (h > 0) { 368 saddr = srcstart; 369 daddr = dststart; 370 cnt = w; 371 if (pre > 0) { 372 write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 373 write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7)); 374 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1)); 375 write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7)); 376 saddr += pre; 377 daddr += pre; 378 cnt -= pre; 379 if (cnt == 0) goto next; 380 } 381 while (cnt > 3) { 382 wrds = min(32, cnt >> 2); 383 write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 384 write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7)); 385 if (cnt > 16) { 386 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15)); 387 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17)); 388 } else 389 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1)); 390 write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7)); 391 saddr += wrds << 2; 392 daddr += wrds << 2; 393 cnt -= wrds << 2; 394 } 395 if (cnt > 0) { 396 write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 397 write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7)); 398 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1)); 399 write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7)); 400 } 401next: 402 srcstart += srcpitch; 403 dststart += dstpitch; 404 h--; 405 } 406} 407 408/* up to 124 pixels so direction doesn't matter, unaligned, ROP */ 409static void 410CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 411{ 412 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 413 int ssreg; 414#ifdef DEBUG 415 int taddr = 4 + dstpitch * 50; 416#endif 417 uint32_t lmask, rmask; 418 ENTER; 419 420 pre = dststart & 3; 421 lmask = 0xffffffff >> pre; 422 spre = srcstart & 3; 423 /* 424 * make sure we count all the words needed to cover the destination 425 * line, covering potential partials on both ends 426 */ 427 wrds = (w + pre + 3) >> 2; 428 swrds = (w + spre + 3) >> 2; 429 430 if (spre < pre) { 431 dist = 32 - (pre - spre) * 8; 432 sreg = 9; 433 } else { 434 dist = (spre - pre) * 8; 435 sreg = 8; 436 } 437 438 /* 439 * mask out trailing pixels to avoid partial writes 440 */ 441 post = (dststart + w) & 3; 442 if (post != 0) { 443 rmask = ~(0xffffffff >> (post * 8)); 444 write_sx_reg(p, SX_QUEUED(7), rmask); 445 write_sx_reg(p, SX_QUEUED(6), ~rmask); 446 } 447 448 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 449 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 450 451 /* mask out the leading pixels in dst by using a mask and ROP */ 452 if (pre != 0) { 453 CG14Wait(p); 454 write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); 455 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 456 } 457 458 saddr = srcstart & ~3; 459 daddr = dststart & ~3; 460 461 while (h > 0) { 462 write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); 463 write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 464 if (wrds > 15) { 465 if (dist != 0) { 466 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 467 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 468 /* shifted source pixels are now at register 40+ */ 469 ssreg = 40; 470 } else ssreg = 8; 471 if (pre != 0) { 472 /* mask out leading junk */ 473 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 474 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 475 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 476 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14)); 477 } else { 478 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15)); 479 } 480 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); 481 } else { 482 if (dist != 0) { 483 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 484 ssreg = 40; 485 } else ssreg = 8; 486 if (pre != 0) { 487 /* mask out leading junk */ 488 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 489 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 490 write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 491 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds)); 492 } else { 493 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds)); 494 } 495 } 496 if (post != 0) { 497 /* 498 * if the last word to be written out is a partial we 499 * mask out the leftovers and replace them with 500 * background pixels 501 * we could pull the same ROP * mask trick as we do on 502 * the left end but it's less annoying this way and 503 * the instruction count is the same 504 */ 505 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0)); 506 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0)); 507 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0)); 508 } 509#ifdef DEBUG 510 write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 511 taddr += dstpitch; 512#endif 513 write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 514 saddr += srcpitch; 515 daddr += dstpitch; 516 h--; 517 } 518} 519 520/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ 521static void 522CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 523{ 524 int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 525 int ssreg; 526#ifdef DEBUG 527 int taddr = 4 + dstpitch * 50; 528#endif 529 uint32_t lmask, rmask; 530 ENTER; 531 532 pre = dststart & 3; 533 lmask = 0xffffffff >> pre; 534 spre = srcstart & 3; 535 /* 536 * make sure we count all the words needed to cover the destination 537 * line, covering potential partials on both ends 538 */ 539 wrds = (w + pre + 3) >> 2; 540 swrds = (w + spre + 3) >> 2; 541 542 if (spre < pre) { 543 dist = 32 - (pre - spre) * 8; 544 sreg = 9; 545 } else { 546 dist = (spre - pre) * 8; 547 sreg = 8; 548 } 549 550 /* 551 * mask out trailing pixels to avoid partial writes 552 */ 553 post = (dststart + w) & 3; 554 if (post != 0) { 555 rmask = ~(0xffffffff >> (post * 8)); 556 write_sx_reg(p, SX_QUEUED(7), rmask); 557 write_sx_reg(p, SX_QUEUED(6), ~rmask); 558 } 559 560 DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 561 w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 562 563 /* mask out the leading pixels in dst by using a mask and ROP */ 564 if (pre != 0) { 565 CG14Wait(p); 566 write_sx_reg(p, SX_ROP_CONTROL, 0xca); 567 write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 568 } 569 570 saddr = srcstart & ~3; 571 daddr = dststart & ~3; 572 573 while (h > 0) { 574 write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 575 if (wrds > 15) { 576 if (dist != 0) { 577 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 578 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 579 /* shifted source pixels are now at register 40+ */ 580 ssreg = 40; 581 } else ssreg = 8; 582 if (pre != 0) { 583 /* read only the first word */ 584 write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 585 /* mask out leading junk */ 586 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 587 } 588 } else { 589 if (dist != 0) { 590 write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 591 ssreg = 40; 592 } else ssreg = 8; 593 if (pre != 0) { 594 /* read only the first word */ 595 write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 596 /* mask out leading junk */ 597 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 598 } 599 } 600 if (post != 0) { 601 int laddr = daddr + ((wrds - 1) << 2); 602 /* 603 * if the last word to be written out is a partial we 604 * mask out the leftovers and replace them with 605 * background pixels 606 * we could pull the same ROP * mask trick as we do on 607 * the left end but it's less annoying this way and 608 * the instruction count is the same 609 */ 610 write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7)); 611 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); 612 write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0)); 613 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0)); 614 } 615#ifdef DEBUG 616 write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 617 taddr += dstpitch; 618#endif 619 write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7)); 620 saddr += srcpitch; 621 daddr += dstpitch; 622 h--; 623 } 624} 625 626static void 627CG14Copy8(PixmapPtr pDstPixmap, 628 int srcX, int srcY, int dstX, int dstY, int w, int h) 629{ 630 ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 631 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 632 int dstpitch, dstoff, srcpitch, srcoff; 633 int srcstart, dststart, xinc, srcinc, dstinc; 634 int line, count, s, d, num; 635 636 ENTER; 637 dstpitch = exaGetPixmapPitch(pDstPixmap); 638 dstoff = exaGetPixmapOffset(pDstPixmap); 639 srcpitch = p->srcpitch; 640 srcoff = p->srcoff; 641 /* 642 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 643 * actually wrote anything and only sync if it did 644 */ 645 srcstart = srcX + (srcpitch * srcY) + srcoff; 646 dststart = dstX + (dstpitch * dstY) + dstoff; 647 648 if (p->ydir < 0) { 649 srcstart += (h - 1) * srcpitch; 650 dststart += (h - 1) * dstpitch; 651 srcinc = -srcpitch; 652 dstinc = -dstpitch; 653 } else { 654 srcinc = srcpitch; 655 dstinc = dstpitch; 656 } 657 658 /* 659 * this copies up to 124 pixels wide in one go, so horizontal 660 * direction / overlap don't matter 661 * uses all 32bit accesses and funnel shifter for unaligned copies 662 */ 663 if ((w < 125) && (w > 8)) { 664 switch (p->last_rop) { 665 case 0xcc: 666 CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 667 break; 668 default: 669 CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 670 } 671 return; 672 } 673 674 /* 675 * only invert x direction if absolutely necessary, it's a pain to 676 * go backwards on SX so avoid as much as possible 677 */ 678 if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) { 679 xinc = -32; 680 } else 681 xinc = 32; 682 683 /* 684 * for aligned copies we can go all 32bit and avoid VRAM reads in the 685 * most common case 686 */ 687 if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { 688 switch (p->last_rop) { 689 case 0xcc: 690 CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 691 break; 692 default: 693 CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 694 } 695 return; 696 } 697 698 /* 699 * if we make it here we either have something large and unaligned, 700 * something we need to do right to left, or something tiny. 701 * we handle the non-tiny cases by breaking them down into chunks that 702 * Copy8_short_*() can handle, making sure the destinations are 32bit 703 * aligned whenever possible 704 * since we copy by block, not by line we need to go backwards even if 705 * we don't copy within the same line 706 */ 707 if (w > 8) { 708 int next, wi, end = dststart + w; 709 DPRINTF(X_ERROR, "%s %08x %08x %d\n", __func__, srcstart, dststart, w); 710 if ((p->xdir < 0) && (srcoff == dstoff)) { 711 srcstart += w; 712 next = max((end - 120) & ~3, dststart); 713 wi = end - next; 714 srcstart -= wi; 715 while (wi > 0) { 716 DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", __func__, srcstart, next, wi); 717 if (p->last_rop == 0xcc) { 718 CG14Copy8_short_norop(p, srcstart, next, wi, h, srcinc, dstinc); 719 } else 720 CG14Copy8_short_rop(p, srcstart, next, wi, h, srcinc, dstinc); 721 end = next; 722 /* 723 * avoid extremely narrow copies so I don't 724 * have to deal with dangling start and end 725 * pixels in the same word 726 */ 727 if ((end - dststart) < 140) { 728 next = max((end - 80) & ~3, dststart); 729 } else { 730 next = max((end - 120) & ~3, dststart); 731 } 732 wi = end - next; 733 srcstart -= wi; 734 } 735 } else { 736 next = min(end, (dststart + 124) & ~3); 737 wi = next - dststart; 738 while (wi > 0) { 739 DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", __func__, srcstart, next, wi); 740 if (p->last_rop == 0xcc) { 741 CG14Copy8_short_norop(p, srcstart, dststart, wi, h, srcinc, dstinc); 742 } else 743 CG14Copy8_short_rop(p, srcstart, dststart, wi, h, srcinc, dstinc); 744 srcstart += wi; 745 dststart = next; 746 if ((end - dststart) < 140) { 747 next = min(end, (dststart + 84) & ~3); 748 } else { 749 next = min(end, (dststart + 124) & ~3); 750 } 751 wi = next - dststart; 752 } 753 } 754 return; 755 } 756 if (xinc < 0) { 757 srcstart += (w - 32); 758 dststart += (w - 32); 759 } 760 761 DPRINTF(X_ERROR, "%s fallback to byte-wise %d %d\n", __func__, w, h); 762 if (p->last_rop == 0xcc) { 763 /* plain old copy */ 764 if ( xinc > 0) { 765 /* going left to right */ 766 for (line = 0; line < h; line++) { 767 count = 0; 768 s = srcstart; 769 d = dststart; 770 while ( count < w) { 771 num = min(32, w - count); 772 write_sx_io(p, s, 773 SX_LDB(10, num - 1, s & 7)); 774 write_sx_io(p, d, 775 SX_STBM(10, num - 1, d & 7)); 776 s += xinc; 777 d += xinc; 778 count += 32; 779 } 780 srcstart += srcinc; 781 dststart += dstinc; 782 } 783 } else { 784 /* going right to left */ 785 int i, chunks = (w >> 5); 786 for (line = 0; line < h; line++) { 787 s = srcstart; 788 d = dststart; 789 count = w; 790 for (i = 0; i < chunks; i++) { 791 write_sx_io(p, s, 792 SX_LDB(10, 31, s & 7)); 793 write_sx_io(p, d, 794 SX_STBM(10, 31, d & 7)); 795 s -= 32; 796 d -= 32; 797 count -= 32; 798 } 799 /* leftovers, if any */ 800 if (count > 0) { 801 s += (32 - count); 802 d += (32 - count); 803 write_sx_io(p, s, 804 SX_LDB(10, count - 1, s & 7)); 805 write_sx_io(p, d, 806 SX_STBM(10, count - 1, d & 7)); 807 } 808 srcstart += srcinc; 809 dststart += dstinc; 810 } 811 } 812 } else { 813 /* ROPs needed */ 814 if ( xinc > 0) { 815 /* going left to right */ 816 for (line = 0; line < h; line++) { 817 count = 0; 818 s = srcstart; 819 d = dststart; 820 while ( count < w) { 821 num = min(32, w - count); 822 write_sx_io(p, s, 823 SX_LDB(10, num - 1, s & 7)); 824 write_sx_io(p, d, 825 SX_LDB(42, num - 1, d & 7)); 826 if (num > 16) { 827 write_sx_reg(p, SX_INSTRUCTIONS, 828 SX_ROP(10, 42, 74, 15)); 829 write_sx_reg(p, SX_INSTRUCTIONS, 830 SX_ROP(26, 58, 90, num - 17)); 831 } else { 832 write_sx_reg(p, SX_INSTRUCTIONS, 833 SX_ROP(10, 42, 74, num - 1)); 834 } 835 write_sx_io(p, d, 836 SX_STBM(74, num - 1, d & 7)); 837 s += xinc; 838 d += xinc; 839 count += 32; 840 } 841 srcstart += srcinc; 842 dststart += dstinc; 843 } 844 } else { 845 /* going right to left */ 846 int i, chunks = (w >> 5); 847 for (line = 0; line < h; line++) { 848 s = srcstart; 849 d = dststart; 850 count = w; 851 for (i = 0; i < chunks; i++) { 852 write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 853 write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 854 write_sx_reg(p, SX_INSTRUCTIONS, 855 SX_ROP(10, 42, 74, 15)); 856 write_sx_reg(p, SX_INSTRUCTIONS, 857 SX_ROP(26, 58, 90, 15)); 858 write_sx_io(p, d, 859 SX_STBM(74, 31, d & 7)); 860 s -= 128; 861 d -= 128; 862 count -= 32; 863 } 864 /* leftovers, if any */ 865 if (count > 0) { 866 s += (32 - count); 867 d += (32 - count); 868 write_sx_io(p, s, 869 SX_LDB(10, count - 1, s & 7)); 870 write_sx_io(p, d, 871 SX_LDB(42, count - 1, d & 7)); 872 if (count > 16) { 873 write_sx_reg(p, SX_INSTRUCTIONS, 874 SX_ROP(10, 42, 74, 15)); 875 write_sx_reg(p, SX_INSTRUCTIONS, 876 SX_ROP(26, 58, 90, count - 17)); 877 } else { 878 write_sx_reg(p, SX_INSTRUCTIONS, 879 SX_ROP(10, 42, 74, count - 1)); 880 } 881 882 write_sx_io(p, d, 883 SX_STBM(74, count - 1, d & 7)); 884 } 885 srcstart += srcinc; 886 dststart += dstinc; 887 } 888 } 889 } 890 exaMarkSync(pDstPixmap->drawable.pScreen); 891} 892 893static void 894CG14DoneCopy(PixmapPtr pDstPixmap) 895{ 896} 897 898static Bool 899CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 900{ 901 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 902 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 903 904 ENTER; 905 DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n", 906 pPixmap->drawable.bitsPerPixel, fg); 907 908 /* 909 * GXset and GXclear are really just specual cases of GXcopy with 910 * fixed fill colour 911 */ 912 switch (alu) { 913 case GXclear: 914 alu = GXcopy; 915 fg = 0; 916 break; 917 case GXset: 918 alu = GXcopy; 919 fg = 0xffffffff; 920 break; 921 } 922 /* repeat the colour in every sub byte if we're in 8 bit */ 923 if (pPixmap->drawable.bitsPerPixel == 8) { 924 fg |= fg << 8; 925 fg |= fg << 16; 926 } 927 write_sx_reg(p, SX_QUEUED(8), fg); 928 write_sx_reg(p, SX_QUEUED(9), fg); 929 if (planemask != p->last_mask) { 930 CG14Wait(p); 931 write_sx_reg(p, SX_PLANEMASK, planemask); 932 p->last_mask = planemask; 933 } 934 alu = sx_rop[alu]; 935 if (alu != p->last_rop) { 936 CG14Wait(p); 937 write_sx_reg(p, SX_ROP_CONTROL, alu); 938 p->last_rop = alu; 939 } 940 941 DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 942 return TRUE; 943} 944 945static void 946CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 947{ 948 int line, x, num; 949 uint32_t ptr; 950 951 ENTER; 952 if (p->last_rop == 0xcc) { 953 /* simple fill */ 954 for (line = 0; line < h; line++) { 955 x = 0; 956 while (x < w) { 957 ptr = start + (x << 2); 958 num = min(32, w - x); 959 write_sx_io(p, ptr, 960 SX_STS(8, num - 1, ptr & 7)); 961 x += 32; 962 } 963 start += pitch; 964 } 965 } else if (p->last_rop == 0xaa) { 966 /* nothing to do here */ 967 return; 968 } else { 969 /* alright, let's do actual ROP stuff */ 970 971 /* first repeat the fill colour into 16 registers */ 972 write_sx_reg(p, SX_INSTRUCTIONS, 973 SX_SELECT_S(8, 8, 10, 15)); 974 975 for (line = 0; line < h; line++) { 976 x = 0; 977 while (x < w) { 978 ptr = start + (x << 2); 979 num = min(32, w - x); 980 /* now suck fb data into registers */ 981 write_sx_io(p, ptr, 982 SX_LD(42, num - 1, ptr & 7)); 983 /* 984 * ROP them with the fill data we left in 10 985 * non-memory ops can only have counts up to 16 986 */ 987 if (num <= 16) { 988 write_sx_reg(p, SX_INSTRUCTIONS, 989 SX_ROP(10, 42, 74, num - 1)); 990 } else { 991 write_sx_reg(p, SX_INSTRUCTIONS, 992 SX_ROP(10, 42, 74, 15)); 993 write_sx_reg(p, SX_INSTRUCTIONS, 994 SX_ROP(10, 58, 90, num - 17)); 995 } 996 /* and write the result back into memory */ 997 write_sx_io(p, ptr, 998 SX_ST(74, num - 1, ptr & 7)); 999 x += 32; 1000 } 1001 start += pitch; 1002 } 1003 } 1004} 1005 1006static void 1007CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 1008{ 1009 int line, num, pre, cnt; 1010 uint32_t ptr; 1011 1012 ENTER; 1013 pre = start & 3; 1014 if (pre != 0) pre = 4 - pre; 1015 1016 if (p->last_rop == 0xcc) { 1017 /* simple fill */ 1018 for (line = 0; line < h; line++) { 1019 ptr = start; 1020 cnt = w; 1021 pre = min(pre, cnt); 1022 if (pre) { 1023 write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); 1024 ptr += pre; 1025 cnt -= pre; 1026 if (cnt == 0) goto next; 1027 } 1028 /* now do the aligned pixels in 32bit chunks */ 1029 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1030 while(cnt > 3) { 1031 num = min(32, cnt >> 2); 1032 write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); 1033 ptr += num << 2; 1034 cnt -= num << 2; 1035 } 1036 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1037 if (cnt > 0) { 1038 write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); 1039 } 1040 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1041next: 1042 start += pitch; 1043 } 1044 } else if (p->last_rop == 0xaa) { 1045 /* nothing to do here */ 1046 return; 1047 } else { 1048 /* alright, let's do actual ROP stuff */ 1049 1050 /* first repeat the fill colour into 16 registers */ 1051 write_sx_reg(p, SX_INSTRUCTIONS, 1052 SX_SELECT_S(8, 8, 10, 15)); 1053 1054 for (line = 0; line < h; line++) { 1055 ptr = start; 1056 cnt = w; 1057 pre = min(pre, cnt); 1058 if (pre) { 1059 write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7)); 1060 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1)); 1061 write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7)); 1062 ptr += pre; 1063 cnt -= pre; 1064 if (cnt == 0) goto next2; 1065 } 1066 /* now do the aligned pixels in 32bit chunks */ 1067 if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1068 while(cnt > 3) { 1069 num = min(32, cnt >> 2); 1070 write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7)); 1071 if (num <= 16) { 1072 write_sx_reg(p, SX_INSTRUCTIONS, 1073 SX_ROP(10, 26, 58, num - 1)); 1074 } else { 1075 write_sx_reg(p, SX_INSTRUCTIONS, 1076 SX_ROP(10, 26, 58, 15)); 1077 write_sx_reg(p, SX_INSTRUCTIONS, 1078 SX_ROP(10, 42, 74, num - 17)); 1079 } 1080 write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7)); 1081 ptr += num << 2; 1082 cnt -= num << 2; 1083 } 1084 if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1085 if (cnt > 0) { 1086 write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7)); 1087 write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1)); 1088 write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7)); 1089 } 1090 if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1091next2: 1092 start += pitch; 1093 } 1094 } 1095} 1096 1097static void 1098CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 1099{ 1100 ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1101 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1102 int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 1103 int start, depth; 1104 1105 ENTER; 1106 dstpitch = exaGetPixmapPitch(pPixmap); 1107 dstoff = exaGetPixmapOffset(pPixmap); 1108 1109 depth = pPixmap->drawable.bitsPerPixel; 1110 switch (depth) { 1111 case 32: 1112 start = dstoff + (y1 * dstpitch) + (x1 << 2); 1113 CG14Solid32(p, start, dstpitch, w, h); 1114 break; 1115 case 8: 1116 start = dstoff + (y1 * dstpitch) + x1; 1117 CG14Solid8(p, start, dstpitch, w, h); 1118 break; 1119 } 1120 1121 DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 1122 dstpitch, dstoff, start); 1123 DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 1124 read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 1125 exaMarkSync(pPixmap->drawable.pScreen); 1126} 1127 1128/* 1129 * Memcpy-based UTS. 1130 */ 1131static Bool 1132CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1133 char *src, int src_pitch) 1134{ 1135 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1136 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1137 char *dst = p->fb + exaGetPixmapOffset(pDst); 1138 int dst_pitch = exaGetPixmapPitch(pDst); 1139 1140 int bpp = pDst->drawable.bitsPerPixel; 1141 int cpp = (bpp + 7) >> 3; 1142 int wBytes = w * cpp; 1143 1144 ENTER; 1145 DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 1146 dst += (x * cpp) + (y * dst_pitch); 1147 1148 CG14Wait(p); 1149 1150 while (h--) { 1151 memcpy(dst, src, wBytes); 1152 src += src_pitch; 1153 dst += dst_pitch; 1154 } 1155 __asm("stbar;"); 1156 return TRUE; 1157} 1158 1159/* 1160 * Memcpy-based DFS. 1161 */ 1162static Bool 1163CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1164 char *dst, int dst_pitch) 1165{ 1166 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1167 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1168 char *src = p->fb + exaGetPixmapOffset(pSrc); 1169 int src_pitch = exaGetPixmapPitch(pSrc); 1170 1171 ENTER; 1172 int bpp = pSrc->drawable.bitsPerPixel; 1173 int cpp = (bpp + 7) >> 3; 1174 int wBytes = w * cpp; 1175 1176 src += (x * cpp) + (y * src_pitch); 1177 1178 CG14Wait(p); 1179 1180 while (h--) { 1181 memcpy(dst, src, wBytes); 1182 src += src_pitch; 1183 dst += dst_pitch; 1184 } 1185 1186 return TRUE; 1187} 1188 1189Bool 1190CG14CheckComposite(int op, PicturePtr pSrcPicture, 1191 PicturePtr pMaskPicture, 1192 PicturePtr pDstPicture) 1193{ 1194 int i, ok = FALSE; 1195 1196 ENTER; 1197 1198 /* 1199 * SX is in theory capable of accelerating pretty much all Xrender ops, 1200 * even coordinate transformation and gradients. Support will be added 1201 * over time and likely have to spill over into its own source file. 1202 */ 1203 1204 if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 1205 DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 1206 return FALSE; 1207 } 1208 1209 if (pSrcPicture != NULL) { 1210 i = 0; 1211 while ((i < arraysize(src_formats)) && (!ok)) { 1212 ok = (pSrcPicture->format == src_formats[i]); 1213 i++; 1214 } 1215 1216 if (!ok) { 1217 DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 1218 __func__, pSrcPicture->format); 1219 return FALSE; 1220 } 1221 DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 1222 } 1223 1224 if (pDstPicture != NULL) { 1225 i = 0; 1226 ok = FALSE; 1227 while ((i < arraysize(src_formats)) && (!ok)) { 1228 ok = (pDstPicture->format == src_formats[i]); 1229 i++; 1230 } 1231 1232 if (!ok) { 1233 DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 1234 __func__, pDstPicture->format); 1235 return FALSE; 1236 } 1237 DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 1238 } 1239 1240 if (pMaskPicture != NULL) { 1241 DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 1242 pMaskPicture->pDrawable->width, 1243 pMaskPicture->pDrawable->height); 1244 } 1245 return TRUE; 1246} 1247 1248Bool 1249CG14PrepareComposite(int op, PicturePtr pSrcPicture, 1250 PicturePtr pMaskPicture, 1251 PicturePtr pDstPicture, 1252 PixmapPtr pSrc, 1253 PixmapPtr pMask, 1254 PixmapPtr pDst) 1255{ 1256 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1257 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1258 1259 ENTER; 1260 1261 p->no_source_pixmap = FALSE; 1262 p->source_is_solid = FALSE; 1263 1264 if (pSrcPicture->format == PICT_a1) { 1265 xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 1266 pDstPicture->format, op); 1267 if (pMaskPicture != NULL) { 1268 xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 1269 } 1270 } 1271 if (pSrcPicture->pSourcePict != NULL) { 1272 if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 1273 p->fillcolour = 1274 pSrcPicture->pSourcePict->solidFill.color; 1275 DPRINTF(X_ERROR, "%s: solid src %08x\n", 1276 __func__, p->fillcolour); 1277 p->no_source_pixmap = TRUE; 1278 p->source_is_solid = TRUE; 1279 } 1280 } 1281 if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 1282 if (pMaskPicture->pSourcePict->type == 1283 SourcePictTypeSolidFill) { 1284 p->fillcolour = 1285 pMaskPicture->pSourcePict->solidFill.color; 1286 xf86Msg(X_ERROR, "%s: solid mask %08x\n", 1287 __func__, p->fillcolour); 1288 } 1289 } 1290 if (pMaskPicture != NULL) { 1291 p->mskoff = exaGetPixmapOffset(pMask); 1292 p->mskpitch = exaGetPixmapPitch(pMask); 1293 p->mskformat = pMaskPicture->format; 1294 } else { 1295 p->mskoff = 0; 1296 p->mskpitch = 0; 1297 p->mskformat = 0; 1298 } 1299 if (pSrc != NULL) { 1300 p->source_is_solid = 1301 ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 1302 p->srcoff = exaGetPixmapOffset(pSrc); 1303 p->srcpitch = exaGetPixmapPitch(pSrc); 1304 if (p->source_is_solid) { 1305 p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 1306 } 1307 } 1308 p->srcformat = pSrcPicture->format; 1309 p->dstformat = pDstPicture->format; 1310 1311 if (p->source_is_solid) { 1312 uint32_t temp; 1313 1314 /* stuff source colour into SX registers, swap as needed */ 1315 temp = p->fillcolour; 1316 switch (p->srcformat) { 1317 case PICT_a8r8g8b8: 1318 case PICT_x8r8g8b8: 1319 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1320 temp = temp >> 8; 1321 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1322 temp = temp >> 8; 1323 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1324 break; 1325 case PICT_a8b8g8r8: 1326 case PICT_x8b8g8r8: 1327 write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1328 temp = temp >> 8; 1329 write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1330 temp = temp >> 8; 1331 write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1332 break; 1333 } 1334 write_sx_reg(p, SX_QUEUED(8), 0xff); 1335 } 1336 p->op = op; 1337 if (op == PictOpSrc) { 1338 CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 1339 } 1340#ifdef SX_DEBUG 1341 DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 1342 *(uint32_t *)(p->fb + p->srcoff)); 1343#endif 1344 return TRUE; 1345} 1346 1347void 1348CG14Composite(PixmapPtr pDst, int srcX, int srcY, 1349 int maskX, int maskY, 1350 int dstX, int dstY, 1351 int width, int height) 1352{ 1353 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1354 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1355 uint32_t dstoff, dstpitch; 1356 uint32_t dst, msk, src; 1357 int flip = 0; 1358 1359 ENTER; 1360 dstoff = exaGetPixmapOffset(pDst); 1361 dstpitch = exaGetPixmapPitch(pDst); 1362 1363 flip = (PICT_FORMAT_TYPE(p->srcformat) != 1364 PICT_FORMAT_TYPE(p->dstformat)); 1365 1366 switch (p->op) { 1367 case PictOpOver: 1368 dst = dstoff + (dstY * dstpitch) + (dstX << 2); 1369 DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 1370 p->mskformat, p->dstformat, srcX, srcY); 1371 if (p->source_is_solid) { 1372 switch (p->mskformat) { 1373 case PICT_a8: 1374 msk = p->mskoff + 1375 (maskY * p->mskpitch) + 1376 maskX; 1377 CG14Comp_Over8Solid(p, 1378 msk, p->mskpitch, 1379 dst, dstpitch, 1380 width, height); 1381 break; 1382 case PICT_a8r8g8b8: 1383 case PICT_a8b8g8r8: 1384 msk = p->mskoff + 1385 (maskY * p->mskpitch) + 1386 (maskX << 2); 1387 CG14Comp_Over32Solid(p, 1388 msk, p->mskpitch, 1389 dst, dstpitch, 1390 width, height); 1391 break; 1392 default: 1393 xf86Msg(X_ERROR, 1394 "unsupported mask format %08x\n", p->mskformat); 1395 } 1396 } else { 1397 DPRINTF(X_ERROR, "non-solid over with msk %x\n", 1398 p->mskformat); 1399 switch (p->srcformat) { 1400 case PICT_a8r8g8b8: 1401 case PICT_a8b8g8r8: 1402 src = p->srcoff + 1403 (srcY * p->srcpitch) + 1404 (srcX << 2); 1405 dst = dstoff + 1406 (dstY * dstpitch) + 1407 (dstX << 2); 1408 if (p->mskformat == PICT_a8) { 1409 msk = p->mskoff + 1410 (maskY * p->mskpitch) + 1411 maskX; 1412 CG14Comp_Over32Mask(p, 1413 src, p->srcpitch, 1414 msk, p->mskpitch, 1415 dst, dstpitch, 1416 width, height, flip); 1417 } else { 1418 CG14Comp_Over32(p, 1419 src, p->srcpitch, 1420 dst, dstpitch, 1421 width, height, flip); 1422 } 1423 break; 1424 case PICT_x8r8g8b8: 1425 case PICT_x8b8g8r8: 1426 src = p->srcoff + 1427 (srcY * p->srcpitch) + 1428 (srcX << 2); 1429 dst = dstoff + 1430 (dstY * dstpitch) + 1431 (dstX << 2); 1432 if (p->mskformat == PICT_a8) { 1433 msk = p->mskoff + 1434 (maskY * p->mskpitch) + 1435 maskX; 1436 CG14Comp_Over32Mask_noalpha(p, 1437 src, p->srcpitch, 1438 msk, p->mskpitch, 1439 dst, dstpitch, 1440 width, height, flip); 1441 } else if ((p->mskformat == PICT_a8r8g8b8) || 1442 (p->mskformat == PICT_a8b8g8r8)) { 1443 msk = p->mskoff + 1444 (maskY * p->mskpitch) + 1445 (maskX << 2); 1446 CG14Comp_Over32Mask32_noalpha(p, 1447 src, p->srcpitch, 1448 msk, p->mskpitch, 1449 dst, dstpitch, 1450 width, height, flip); 1451 } else { 1452 xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 1453 } 1454 break; 1455 default: 1456 xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1457 __func__, p->srcformat); 1458 } 1459 } 1460 break; 1461 case PictOpAdd: 1462 DPRINTF(X_ERROR, "Add %08x %08x\n", 1463 p->srcformat, p->dstformat); 1464 switch (p->srcformat) { 1465 case PICT_a8: 1466 src = p->srcoff + 1467 (srcY * p->srcpitch) + srcX; 1468 if (p->dstformat == PICT_a8) { 1469 dst = dstoff + 1470 (dstY * dstpitch) + dstX; 1471 CG14Comp_Add8(p, 1472 src, p->srcpitch, 1473 dst, dstpitch, 1474 width, height); 1475 } else { 1476 dst = dstoff + 1477 (dstY * dstpitch) + 1478 (dstX << 2); 1479 CG14Comp_Add8_32(p, 1480 src, p->srcpitch, 1481 dst, dstpitch, 1482 width, height); 1483 } 1484 break; 1485 case PICT_a8r8g8b8: 1486 case PICT_x8r8g8b8: 1487 src = p->srcoff + 1488 (srcY * p->srcpitch) + (srcX << 2); 1489 dst = dstoff + (dstY * dstpitch) + 1490 (dstX << 2); 1491 CG14Comp_Add32(p, src, p->srcpitch, 1492 dst, dstpitch, width, height); 1493 break; 1494 default: 1495 xf86Msg(X_ERROR, 1496 "unsupported src format\n"); 1497 } 1498 break; 1499 case PictOpSrc: 1500 DPRINTF(X_ERROR, "Src %08x %08x\n", 1501 p->srcformat, p->dstformat); 1502 if (p->mskformat != 0) 1503 xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1504 if (p->srcformat == PICT_a8) { 1505 CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1506 } else { 1507 /* convert between RGB and BGR? */ 1508 CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1509 } 1510 break; 1511 default: 1512 xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 1513 } 1514 exaMarkSync(pDst->drawable.pScreen); 1515} 1516 1517 1518 1519Bool 1520CG14InitAccel(ScreenPtr pScreen) 1521{ 1522 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1523 Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1524 ExaDriverPtr pExa; 1525 1526 pExa = exaDriverAlloc(); 1527 if (!pExa) 1528 return FALSE; 1529 1530 p->pExa = pExa; 1531 1532 pExa->exa_major = EXA_VERSION_MAJOR; 1533 pExa->exa_minor = EXA_VERSION_MINOR; 1534 1535 pExa->memoryBase = p->fb; 1536 pExa->memorySize = p->memsize; 1537 pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 1538 1539 /* 1540 * SX memory instructions are written to 64bit aligned addresses with 1541 * a 3 bit displacement. Make sure the displacement remains constant 1542 * within one column 1543 */ 1544 1545 pExa->pixmapOffsetAlign = 8; 1546 pExa->pixmapPitchAlign = 8; 1547 1548 pExa->flags = EXA_OFFSCREEN_PIXMAPS 1549 | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1550 /*| EXA_MIXED_PIXMAPS*/; 1551 1552 /* 1553 * these limits are bogus 1554 * SX doesn't deal with coordinates at all, so there is no limit but 1555 * we have to put something here 1556 */ 1557 pExa->maxX = 4096; 1558 pExa->maxY = 4096; 1559 1560 pExa->WaitMarker = CG14WaitMarker; 1561 1562 pExa->PrepareSolid = CG14PrepareSolid; 1563 pExa->Solid = CG14Solid; 1564 pExa->DoneSolid = CG14DoneCopy; 1565 pExa->PrepareCopy = CG14PrepareCopy; 1566 pExa->Copy = CG14Copy32; 1567 pExa->DoneCopy = CG14DoneCopy; 1568 if (p->use_xrender) { 1569 pExa->CheckComposite = CG14CheckComposite; 1570 pExa->PrepareComposite = CG14PrepareComposite; 1571 pExa->Composite = CG14Composite; 1572 pExa->DoneComposite = CG14DoneCopy; 1573 } 1574 1575 /* EXA hits more optimized paths when it does not have to fallback 1576 * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 1577 */ 1578 pExa->UploadToScreen = CG14UploadToScreen; 1579 pExa->DownloadFromScreen = CG14DownloadFromScreen; 1580 1581 p->queuecount = 0; 1582 /* do some hardware init */ 1583 write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 1584 p->last_mask = 0xffffffff; 1585 write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 1586 p->last_rop = 0xcc; 1587 return exaDriverInit(pScreen, pExa); 1588} 1589