1/* $NetBSD: cg14_render.c,v 1.19 2023/01/11 09:23:57 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45/*#define SX_SINGLE*/ 46/*#define SX_RENDER_DEBUG*/ 47/*#define SX_RENDER_VERBOSE*/ 48/*#define SX_ADD_SOFTWARE*/ 49/*#define SX_RENDER_TRACE*/ 50 51#ifdef SX_RENDER_TRACE 52#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 53#define DONE xf86Msg(X_ERROR, "<%s\n", __func__); 54#else 55#define ENTER 56#define DONE 57#endif 58 59#ifdef SX_RENDER_DEBUG 60#define DPRINTF xf86Msg 61#else 62#define DPRINTF while (0) xf86Msg 63#endif 64 65#ifdef SX_RENDER_VERBOSE 66char c[8] = " .,:+*oX"; 67#endif 68 69void CG14Comp_Over32Solid(Cg14Ptr p, 70 uint32_t src, uint32_t srcpitch, 71 uint32_t dst, uint32_t dstpitch, 72 int width, int height) 73{ 74 uint32_t msk = src, mskx, dstx, m; 75 int line, x, i; 76 77 ENTER; 78 79 for (line = 0; line < height; line++) { 80 mskx = msk; 81 dstx = dst; 82#ifndef SX_SINGLE 83 int rest; 84 for (x = 0; x < width; x += 4) { 85 rest = width - x; 86 /* fetch 4 mask values */ 87 sxm(SX_LDUQ0, mskx, 12, 3); 88 /* fetch destination pixels */ 89 sxm(SX_LDUQ0, dstx, 60, 3); 90 /* duplicate them for all channels */ 91 sxi(SX_ORS, 0, 12, 13, 2); 92 sxi(SX_ORS, 0, 16, 17, 2); 93 sxi(SX_ORS, 0, 20, 21, 2); 94 sxi(SX_ORS, 0, 24, 25, 2); 95 /* generate inverted alpha */ 96 sxi(SX_XORS, 12, 8, 28, 15); 97 /* multiply source */ 98 sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 99 sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 100 sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 101 sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 102 /* multiply dest */ 103 sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 104 /* add up */ 105 sxi(SX_ADDV, 44, 76, 92, 15); 106 /* write back */ 107 if (rest < 4) { 108 sxm(SX_STUQ0C, dstx, 92, rest - 1); 109 } else { 110 sxm(SX_STUQ0C, dstx, 92, 3); 111 } 112 dstx += 16; 113 mskx += 16; 114 } 115#else /* SX_SINGLE */ 116 for (x = 0; x < width; x++) { 117 m = *(volatile uint32_t *)(p->fb + mskx); 118 m = m >> 24; 119 if (m == 0) { 120 /* nothing to do - all transparent */ 121 } else if (m == 0xff) { 122 /* all opaque */ 123 sxm(SX_STUQ0, dstx, 8, 0); 124 } else { 125 /* fetch alpha value, stick it into scam */ 126 /* mask is in R[12:15] */ 127 /*write_sx_io(p, mskx, 128 SX_LDUQ0(12, 0, mskx & 7));*/ 129 write_sx_reg(p, SX_QUEUED(12), m); 130 /* fetch dst pixel */ 131 sxm(SX_LDUQ0, dstx, 20, 0); 132 sxi(SX_ORV, 12, 0, R_SCAM, 0); 133 /* 134 * src * alpha + R0 135 * R[9:11] * SCAM + R0 -> R[17:19] 136 */ 137 sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 138 139 /* invert SCAM */ 140 sxi(SX_XORV, 12, 8, R_SCAM, 0); 141#ifdef SX_RENDER_DEBUG 142 sxi(SX_XORV, 12, 8, 13, 0); 143#endif 144 /* dst * (1 - alpha) + R[13:15] */ 145 sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 146 sxm(SX_STUQ0C, dstx, 24, 0); 147 } 148 dstx += 4; 149 mskx += 4; 150 } 151#endif /* SX_SINGLE */ 152 dst += dstpitch; 153 msk += srcpitch; 154 } 155} 156 157void CG14Comp_Over8Solid(Cg14Ptr p, 158 uint32_t src, uint32_t srcpitch, 159 uint32_t dst, uint32_t dstpitch, 160 int width, int height) 161{ 162 uint32_t msk = src, mskx, dstx, m; 163 int line, x, i; 164#ifdef SX_RENDER_VERBOSE 165 char buffer[256]; 166#endif 167 ENTER; 168 169 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 170 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 171 *(uint32_t *)(p->fb + p->srcoff)); 172 for (line = 0; line < height; line++) { 173 mskx = msk; 174 dstx = dst; 175#ifndef SX_SINGLE 176 int rest; 177 for (x = 0; x < width; x += 4) { 178 rest = width - x; 179 /* fetch 4 mask values */ 180 sxm(SX_LDB, mskx, 12, 3); 181 /* fetch destination pixels */ 182 sxm(SX_LDUQ0, dstx, 60, 3); 183 /* duplicate them for all channels */ 184 sxi(SX_ORS, 0, 13, 16, 3); 185 sxi(SX_ORS, 0, 14, 20, 3); 186 sxi(SX_ORS, 0, 15, 24, 3); 187 sxi(SX_ORS, 0, 12, 13, 2); 188 /* generate inverted alpha */ 189 sxi(SX_XORS, 12, 8, 28, 15); 190 /* multiply source */ 191 sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 192 sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 193 sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 194 sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 195 /* multiply dest */ 196 sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 197 /* add up */ 198 sxi(SX_ADDV, 44, 76, 92, 15); 199 /* write back */ 200 if (rest < 4) { 201 sxm(SX_STUQ0C, dstx, 92, rest - 1); 202 } else { 203 sxm(SX_STUQ0C, dstx, 92, 3); 204 } 205 dstx += 16; 206 mskx += 4; 207 } 208#else /* SX_SINGLE */ 209 for (x = 0; x < width; x++) { 210 m = *(volatile uint8_t *)(p->fb + mskx); 211#ifdef SX_RENDER_VERBOSE 212 buffer[x] = c[m >> 5]; 213#endif 214 if (m == 0) { 215 /* nothing to do - all transparent */ 216 } else if (m == 0xff) { 217 /* all opaque */ 218 sxm(SX_STUQ0, dstx, 8, 0); 219 } else { 220 /* fetch alpha value, stick it into scam */ 221 /* mask is in R[12:15] */ 222 /*write_sx_io(p, mskx & ~7, 223 SX_LDB(12, 0, mskx & 7));*/ 224 write_sx_reg(p, SX_QUEUED(12), m); 225 /* fetch dst pixel */ 226 sxm(SX_LDUQ0, dstx, 20, 0); 227 sxi(SX_ORV, 12, 0, R_SCAM, 0); 228 /* 229 * src * alpha + R0 230 * R[9:11] * SCAM + R0 -> R[17:19] 231 */ 232 sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 233 234 /* invert SCAM */ 235 sxi(SX_XORV, 12, 8, R_SCAM, 0); 236#ifdef SX_RENDER_DEBUG 237 sxi(SX_XORV, 12, 8, 13, 0); 238#endif 239 /* dst * (1 - alpha) + R[13:15] */ 240 sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 241 sxm(SX_STUQ0C, dstx, 24, 0); 242 } 243 dstx += 4; 244 mskx += 1; 245 } 246#endif /* SX_SINGLE */ 247#ifdef SX_RENDER_VERBOSE 248 buffer[x] = 0; 249 xf86Msg(X_ERROR, "%s\n", buffer); 250#endif 251 dst += dstpitch; 252 msk += srcpitch; 253 } 254 DONE; 255} 256 257void CG14Comp_Add32(Cg14Ptr p, 258 uint32_t src, uint32_t srcpitch, 259 uint32_t dst, uint32_t dstpitch, 260 int width, int height) 261{ 262 int line; 263 uint32_t srcx, dstx; 264 int full, part, x; 265 266 ENTER; 267 full = width >> 3; /* chunks of 8 */ 268 part = width & 7; /* leftovers */ 269 /* we do this up to 8 pixels at a time */ 270 for (line = 0; line < height; line++) { 271 srcx = src; 272 dstx = dst; 273 for (x = 0; x < full; x++) { 274 sxm(SX_LDUQ0, srcx, 8, 31); 275 sxm(SX_LDUQ0, dstx, 40, 31); 276 sxi(SX_ADDV, 8, 40, 72, 15); 277 sxi(SX_ADDV, 24, 56, 88, 15); 278 sxm(SX_STUQ0, dstx, 72, 31); 279 srcx += 128; 280 dstx += 128; 281 } 282 283 /* do leftovers */ 284 sxm(SX_LDUQ0, srcx, 8, part - 1); 285 sxm(SX_LDUQ0, dstx, 40, part - 1); 286 if (part & 16) { 287 sxi(SX_ADDV, 8, 40, 72, 15); 288 sxi(SX_ADDV, 24, 56, 88, part - 17); 289 } else { 290 sxi(SX_ADDV, 8, 40, 72, part - 1); 291 } 292 sxm(SX_STUQ0, dstx, 72, part - 1); 293 294 /* next line */ 295 src += srcpitch; 296 dst += dstpitch; 297 } 298} 299 300void CG14Comp_Add8(Cg14Ptr p, 301 uint32_t src, uint32_t srcpitch, 302 uint32_t dst, uint32_t dstpitch, 303 int width, int height) 304{ 305 int line; 306 uint32_t srcx, dstx, srcoff, dstoff; 307 int pre, full, part, x; 308 uint8_t *d; 309#ifdef SX_RENDER_VERBOSE 310 char buffer[256]; 311#endif 312 ENTER; 313 314 srcoff = src & 7; 315 src &= ~7; 316 dstoff = dst & 7; 317 dst &= ~7; 318 full = width >> 5; /* chunks of 32 */ 319 part = width & 31; /* leftovers */ 320 321#ifdef SX_RENDER_DEBUG 322 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 323 width, height, full, part); 324#endif 325 /* we do this up to 32 pixels at a time */ 326 for (line = 0; line < height; line++) { 327 srcx = src; 328 dstx = dst; 329#ifdef SX_ADD_SOFTWARE 330 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 331 d = (uint8_t *)(p->fb + dstx + dstoff); 332 for (x = 0; x < width; x++) { 333 d[x] = min(255, s[x] + d[x]); 334 } 335#else 336 for (x = 0; x < full; x++) { 337 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 338 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 339 sxi(SX_ADDV, 8, 40, 72, 15); 340 sxi(SX_ADDV, 24, 56, 88, 15); 341 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 342 srcx += 32; 343 dstx += 32; 344 } 345 346 if (part > 0) { 347 /* do leftovers */ 348 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 349 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 350 if (part > 16) { 351 sxi(SX_ADDV, 8, 40, 72, 15); 352 sxi(SX_ADDV, 24, 56, 88, part - 17); 353 } else { 354 sxi(SX_ADDV, 8, 40, 72, part - 1); 355 } 356 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 357 } 358#endif 359#ifdef SX_RENDER_VERBOSE 360 d = (uint8_t *)(p->fb + src + srcoff); 361 for (x = 0; x < width; x++) { 362 buffer[x] = c[d[x]>>5]; 363 } 364 buffer[x] = 0; 365 xf86Msg(X_ERROR, "%s\n", buffer); 366#endif 367 /* next line */ 368 src += srcpitch; 369 dst += dstpitch; 370 } 371} 372 373void CG14Comp_Add8_32(Cg14Ptr p, 374 uint32_t src, uint32_t srcpitch, 375 uint32_t dst, uint32_t dstpitch, 376 int width, int height) 377{ 378 int line; 379 uint32_t srcx, dstx, srcoff, dstoff; 380 int pre, full, part, x; 381 uint8_t *d; 382#ifdef SX_RENDER_VERBOSE 383 char buffer[256]; 384#endif 385 ENTER; 386 387 srcoff = src & 7; 388 src &= ~7; 389 dstoff = dst & 7; 390 dst &= ~7; 391 full = width >> 5; /* chunks of 32 */ 392 part = width & 31; /* leftovers */ 393 394#ifdef SX_RENDER_DEBUG 395 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 396 width, height, full, part); 397#endif 398 /* we do this up to 32 pixels at a time */ 399 for (line = 0; line < height; line++) { 400 srcx = src; 401 dstx = dst; 402 for (x = 0; x < full; x++) { 403 /* load source bytes */ 404 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 405 /* load alpha from destination */ 406 write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 407 sxi(SX_ADDV, 8, 40, 72, 15); 408 sxi(SX_ADDV, 24, 56, 88, 15); 409 /* write clamped values back into dest alpha */ 410 write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 411 srcx += 32; 412 dstx += 128; 413 } 414 415 if (part > 0) { 416 /* do leftovers */ 417 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 418 write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 419 if (part > 16) { 420 sxi(SX_ADDV, 8, 40, 72, 15); 421 sxi(SX_ADDV, 24, 56, 88, part - 17); 422 } else { 423 sxi(SX_ADDV, 8, 40, 72, part - 1); 424 } 425 write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 426 } 427#ifdef SX_RENDER_VERBOSE 428 d = (uint8_t *)(p->fb + src + srcoff); 429 for (x = 0; x < width; x++) { 430 buffer[x] = c[d[x]>>5]; 431 } 432 buffer[x] = 0; 433 xf86Msg(X_ERROR, "%s\n", buffer); 434#endif 435 /* next line */ 436 src += srcpitch; 437 dst += dstpitch; 438 } 439} 440 441void CG14Comp_Over32(Cg14Ptr p, 442 uint32_t src, uint32_t srcpitch, 443 uint32_t dst, uint32_t dstpitch, 444 int width, int height, int flip) 445{ 446 uint32_t srcx, dstx, mskx, m; 447 int line, x, i, num; 448 449 ENTER; 450 451 write_sx_reg(p, SX_QUEUED(8), 0xff); 452 for (line = 0; line < height; line++) { 453 srcx = src; 454 dstx = dst; 455 456 for (x = 0; x < width; x += 4) { 457 /* we do up to 4 pixels at a time */ 458 num = min(4, width - x); 459 if (num <= 0) { 460 xf86Msg(X_ERROR, "wtf?!\n"); 461 continue; 462 } 463 /* fetch source pixels */ 464 sxm(SX_LDUQ0, srcx, 12, num - 1); 465 if (flip) { 466 sxi(SX_GATHER, 13, 4, 40, num - 1); 467 sxi(SX_GATHER, 15, 4, 44, num - 1); 468 sxi(SX_SCATTER, 40, 4, 15, num - 1); 469 sxi(SX_SCATTER, 44, 4, 13, num - 1); 470 } 471 /* fetch dst pixels */ 472 sxm(SX_LDUQ0, dstx, 44, num - 1); 473 /* now process up to 4 pixels */ 474 for (i = 0; i < num; i++) { 475 int ii = i << 2; 476 /* write inverted alpha into SCAM */ 477 sxi(SX_XORS, 12 + ii, 8, R_SCAM, 0); 478 /* dst * (1 - alpha) + src */ 479 sxi(SX_SAXP16X16SR8, 44 + ii, 12 + ii, 76 + ii, 3); 480 } 481 sxm(SX_STUQ0C, dstx, 76, num - 1); 482 srcx += 16; 483 dstx += 16; 484 } 485 src += srcpitch; 486 dst += dstpitch; 487 } 488} 489 490void CG14Comp_Over32Mask(Cg14Ptr p, 491 uint32_t src, uint32_t srcpitch, 492 uint32_t msk, uint32_t mskpitch, 493 uint32_t dst, uint32_t dstpitch, 494 int width, int height, int flip) 495{ 496 uint32_t srcx, dstx, mskx, m; 497 int line, x, i, num; 498 499 ENTER; 500 501 write_sx_reg(p, SX_QUEUED(8), 0xff); 502 for (line = 0; line < height; line++) { 503 srcx = src; 504 mskx = msk; 505 dstx = dst; 506 507 for (x = 0; x < width; x += 4) { 508 /* we do up to 4 pixels at a time */ 509 num = min(4, width - x); 510 if (num <= 0) { 511 xf86Msg(X_ERROR, "wtf?!\n"); 512 continue; 513 } 514 /* fetch source pixels */ 515 sxm(SX_LDUQ0, srcx, 12, num - 1); 516 if (flip) { 517 sxi(SX_GATHER, 13, 4, 40, num - 1); 518 sxi(SX_GATHER, 15, 4, 44, num - 1); 519 sxi(SX_SCATTER, 40, 4, 15, num - 1); 520 sxi(SX_SCATTER, 44, 4, 13, num - 1); 521 } 522 /* fetch mask */ 523 sxm(SX_LDB, mskx, 28, num - 1); 524 /* fetch dst pixels */ 525 sxm(SX_LDUQ0, dstx, 44, num - 1); 526 /* now process up to 4 pixels */ 527 for (i = 0; i < num; i++) { 528 int ii = i << 2; 529 /* mask alpha to SCAM */ 530 sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 531 /* src * alpha */ 532 sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 533 /* write inverted alpha into SCAM */ 534 sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 535 /* dst * (1 - alpha) + R[60:] */ 536 sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 537 } 538 sxm(SX_STUQ0C, dstx, 76, num - 1); 539 srcx += 16; 540 mskx += 4; 541 dstx += 16; 542 } 543 src += srcpitch; 544 msk += mskpitch; 545 dst += dstpitch; 546 } 547} 548 549void CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 550 uint32_t src, uint32_t srcpitch, 551 uint32_t msk, uint32_t mskpitch, 552 uint32_t dst, uint32_t dstpitch, 553 int width, int height, int flip) 554{ 555 uint32_t srcx, dstx, mskx, m; 556 int line, x, i, num; 557 558 ENTER; 559 560 write_sx_reg(p, SX_QUEUED(8), 0xff); 561 write_sx_reg(p, SX_QUEUED(9), 0xff); 562 sxi(SX_ORS, 8, 0, 10, 1); 563 for (line = 0; line < height; line++) { 564 srcx = src; 565 mskx = msk; 566 dstx = dst; 567 568 for (x = 0; x < width; x += 4) { 569 /* we do up to 4 pixels at a time */ 570 num = min(4, width - x); 571 if (num <= 0) { 572 xf86Msg(X_ERROR, "wtf?!\n"); 573 continue; 574 } 575 /* fetch source pixels */ 576 sxm(SX_LDUQ0, srcx, 12, num - 1); 577 if (flip) { 578 sxi(SX_GATHER, 13, 4, 40, num - 1); 579 sxi(SX_GATHER, 15, 4, 44, num - 1); 580 sxi(SX_SCATTER, 40, 4, 15, num - 1); 581 sxi(SX_SCATTER, 44, 4, 13, num - 1); 582 } 583 /* fetch mask */ 584 sxm(SX_LDB, mskx, 28, num - 1); 585 /* fetch dst pixels */ 586 sxm(SX_LDUQ0, dstx, 44, num - 1); 587 /* set src alpha to 0xff */ 588 sxi(SX_SCATTER, 8, 4, 12, num - 1); 589 /* now process up to 4 pixels */ 590 for (i = 0; i < num; i++) { 591 int ii = i << 2; 592 /* mask alpha to SCAM */ 593 sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 594 /* src * alpha */ 595 sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 596 /* write inverted alpha into SCAM */ 597 sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 598 /* dst * (1 - alpha) + R[60:] */ 599 sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 600 } 601 sxm(SX_STUQ0C, dstx, 76, num - 1); 602 srcx += 16; 603 mskx += 4; 604 dstx += 16; 605 } 606 src += srcpitch; 607 msk += mskpitch; 608 dst += dstpitch; 609 } 610} 611 612void CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 613 uint32_t src, uint32_t srcpitch, 614 uint32_t msk, uint32_t mskpitch, 615 uint32_t dst, uint32_t dstpitch, 616 int width, int height, int flip) 617{ 618 uint32_t srcx, dstx, mskx, m; 619 int line, x, i, num; 620 621 ENTER; 622 623 write_sx_reg(p, SX_QUEUED(8), 0xff); 624 write_sx_reg(p, SX_QUEUED(9), 0xff); 625 sxi(SX_ORS, 8, 0, 10, 1); 626 for (line = 0; line < height; line++) { 627 srcx = src; 628 mskx = msk; 629 dstx = dst; 630 631 for (x = 0; x < width; x += 4) { 632 /* we do up to 4 pixels at a time */ 633 num = min(4, width - x); 634 if (num <= 0) { 635 xf86Msg(X_ERROR, "wtf?!\n"); 636 continue; 637 } 638 /* fetch source pixels */ 639 sxm(SX_LDUQ0, srcx, 12, num - 1); 640 if (flip) { 641 sxi(SX_GATHER, 13, 4, 40, num - 1); 642 sxi(SX_GATHER, 15, 4, 44, num - 1); 643 sxi(SX_SCATTER, 40, 4, 15, num - 1); 644 sxi(SX_SCATTER, 44, 4, 13, num - 1); 645 } 646 /* fetch mask */ 647 sxm(SX_LDUQ0, mskx, 28, num - 1); 648 /* fetch dst pixels */ 649 sxm(SX_LDUQ0, dstx, 44, num - 1); 650 /* set src alpha to 0xff */ 651 sxi(SX_SCATTER, 8, 4, 12, num - 1); 652 /* now process up to 4 pixels */ 653 for (i = 0; i < num; i++) { 654 int ii = i << 2; 655 /* mask alpha to SCAM */ 656 sxi(SX_ORS, 28 + ii, 0, R_SCAM, 0); 657 /* src * alpha */ 658 sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 659 /* write inverted alpha into SCAM */ 660 sxi(SX_XORS, 28 + ii, 8, R_SCAM, 0); 661 /* dst * (1 - alpha) + R[60:] */ 662 sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 663 } 664 sxm(SX_STUQ0C, dstx, 76, num - 1); 665 srcx += 16; 666 mskx += 16; 667 dstx += 16; 668 } 669 src += srcpitch; 670 msk += mskpitch; 671 dst += dstpitch; 672 } 673} 674