cg14_render.c revision d71cb32d
1/* $NetBSD: cg14_render.c,v 1.10 2017/10/30 22:09:54 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44#include <sparc/sxreg.h> 45 46/*#define SX_SINGLE*/ 47/*#define SX_RENDER_DEBUG*/ 48/*#define SX_ADD_SOFTWARE*/ 49 50#ifdef SX_RENDER_DEBUG 51#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52#define DPRINTF xf86Msg 53#else 54#define ENTER 55#define DPRINTF while (0) xf86Msg 56#endif 57 58char c[8] = " .,:+*oX"; 59 60 61void CG14Comp_Over32Solid(Cg14Ptr p, 62 uint32_t src, uint32_t srcpitch, 63 uint32_t dst, uint32_t dstpitch, 64 int width, int height) 65{ 66 uint32_t msk = src, mskx, dstx, m; 67 int line, x, i; 68 69 ENTER; 70 71 for (line = 0; line < height; line++) { 72 mskx = msk; 73 dstx = dst; 74#ifndef SX_SINGLE 75 int rest; 76 for (x = 0; x < width; x += 4) { 77 rest = width - x; 78 /* fetch 4 mask values */ 79 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 80 /* fetch destination pixels */ 81 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 82 /* duplicate them for all channels */ 83 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 84 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 85 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 86 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 87 /* generate inverted alpha */ 88 write_sx_reg(p, SX_INSTRUCTIONS, 89 SX_XORS(12, 8, 28, 15)); 90 /* multiply source */ 91 write_sx_reg(p, SX_INSTRUCTIONS, 92 SX_MUL16X16SR8(8, 12, 44, 3)); 93 write_sx_reg(p, SX_INSTRUCTIONS, 94 SX_MUL16X16SR8(8, 16, 48, 3)); 95 write_sx_reg(p, SX_INSTRUCTIONS, 96 SX_MUL16X16SR8(8, 20, 52, 3)); 97 write_sx_reg(p, SX_INSTRUCTIONS, 98 SX_MUL16X16SR8(8, 24, 56, 3)); 99 /* multiply dest */ 100 write_sx_reg(p, SX_INSTRUCTIONS, 101 SX_MUL16X16SR8(28, 60, 76, 15)); 102 /* add up */ 103 write_sx_reg(p, SX_INSTRUCTIONS, 104 SX_ADDV(44, 76, 92, 15)); 105 /* write back */ 106 if (rest < 4) { 107 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 108 } else { 109 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 110 } 111 dstx += 16; 112 mskx += 16; 113 } 114#else /* SX_SINGLE */ 115 for (x = 0; x < width; x++) { 116 m = *(volatile uint32_t *)(p->fb + mskx); 117 m = m >> 24; 118 if (m == 0) { 119 /* nothing to do - all transparent */ 120 } else if (m == 0xff) { 121 /* all opaque */ 122 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 123 } else { 124 /* fetch alpha value, stick it into scam */ 125 /* mask is in R[12:15] */ 126 /*write_sx_io(p, mskx, 127 SX_LDUQ0(12, 0, mskx & 7));*/ 128 write_sx_reg(p, SX_QUEUED(12), m); 129 /* fetch dst pixel */ 130 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 131 write_sx_reg(p, SX_INSTRUCTIONS, 132 SX_ORV(12, 0, R_SCAM, 0)); 133 /* 134 * src * alpha + R0 135 * R[9:11] * SCAM + R0 -> R[17:19] 136 */ 137 write_sx_reg(p, SX_INSTRUCTIONS, 138 SX_SAXP16X16SR8(9, 0, 17, 2)); 139 140 /* invert SCAM */ 141 write_sx_reg(p, SX_INSTRUCTIONS, 142 SX_XORV(12, 8, R_SCAM, 0)); 143#ifdef SX_DEBUG 144 write_sx_reg(p, SX_INSTRUCTIONS, 145 SX_XORV(12, 8, 13, 0)); 146#endif 147 /* dst * (1 - alpha) + R[13:15] */ 148 write_sx_reg(p, SX_INSTRUCTIONS, 149 SX_SAXP16X16SR8(21, 17, 25, 2)); 150 write_sx_io(p, dstx, 151 SX_STUQ0C(24, 0, dstx & 7)); 152 } 153 dstx += 4; 154 mskx += 4; 155 } 156#endif /* SX_SINGLE */ 157 dst += dstpitch; 158 msk += srcpitch; 159 } 160} 161 162void CG14Comp_Over8Solid(Cg14Ptr p, 163 uint32_t src, uint32_t srcpitch, 164 uint32_t dst, uint32_t dstpitch, 165 int width, int height) 166{ 167 uint32_t msk = src, mskx, dstx, m; 168 int line, x, i; 169#ifdef SX_DEBUG 170 char buffer[256]; 171#endif 172 ENTER; 173 174 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 175 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 176 *(uint32_t *)(p->fb + p->srcoff)); 177 for (line = 0; line < height; line++) { 178 mskx = msk; 179 dstx = dst; 180#ifndef SX_SINGLE 181 int rest; 182 for (x = 0; x < width; x += 4) { 183 rest = width - x; 184 /* fetch 4 mask values */ 185 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 186 /* fetch destination pixels */ 187 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 188 /* duplicate them for all channels */ 189 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 190 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 191 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 192 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 193 /* generate inverted alpha */ 194 write_sx_reg(p, SX_INSTRUCTIONS, 195 SX_XORS(12, 8, 28, 15)); 196 /* multiply source */ 197 write_sx_reg(p, SX_INSTRUCTIONS, 198 SX_MUL16X16SR8(8, 12, 44, 3)); 199 write_sx_reg(p, SX_INSTRUCTIONS, 200 SX_MUL16X16SR8(8, 16, 48, 3)); 201 write_sx_reg(p, SX_INSTRUCTIONS, 202 SX_MUL16X16SR8(8, 20, 52, 3)); 203 write_sx_reg(p, SX_INSTRUCTIONS, 204 SX_MUL16X16SR8(8, 24, 56, 3)); 205 /* multiply dest */ 206 write_sx_reg(p, SX_INSTRUCTIONS, 207 SX_MUL16X16SR8(28, 60, 76, 15)); 208 /* add up */ 209 write_sx_reg(p, SX_INSTRUCTIONS, 210 SX_ADDV(44, 76, 92, 15)); 211 /* write back */ 212 if (rest < 4) { 213 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 214 } else { 215 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 216 } 217 dstx += 16; 218 mskx += 4; 219 } 220#else /* SX_SINGLE */ 221 for (x = 0; x < width; x++) { 222 m = *(volatile uint8_t *)(p->fb + mskx); 223#ifdef SX_DEBUG 224 buffer[x] = c[m >> 5]; 225#endif 226 if (m == 0) { 227 /* nothing to do - all transparent */ 228 } else if (m == 0xff) { 229 /* all opaque */ 230 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 231 } else { 232 /* fetch alpha value, stick it into scam */ 233 /* mask is in R[12:15] */ 234 /*write_sx_io(p, mskx & ~7, 235 SX_LDB(12, 0, mskx & 7));*/ 236 write_sx_reg(p, SX_QUEUED(12), m); 237 /* fetch dst pixel */ 238 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 239 write_sx_reg(p, SX_INSTRUCTIONS, 240 SX_ORV(12, 0, R_SCAM, 0)); 241 /* 242 * src * alpha + R0 243 * R[9:11] * SCAM + R0 -> R[17:19] 244 */ 245 write_sx_reg(p, SX_INSTRUCTIONS, 246 SX_SAXP16X16SR8(9, 0, 17, 2)); 247 248 /* invert SCAM */ 249 write_sx_reg(p, SX_INSTRUCTIONS, 250 SX_XORV(12, 8, R_SCAM, 0)); 251#ifdef SX_DEBUG 252 write_sx_reg(p, SX_INSTRUCTIONS, 253 SX_XORV(12, 8, 13, 0)); 254#endif 255 /* dst * (1 - alpha) + R[13:15] */ 256 write_sx_reg(p, SX_INSTRUCTIONS, 257 SX_SAXP16X16SR8(21, 17, 25, 2)); 258 write_sx_io(p, dstx, 259 SX_STUQ0C(24, 0, dstx & 7)); 260 } 261 dstx += 4; 262 mskx += 1; 263 } 264#endif /* SX_SINGLE */ 265#ifdef SX_DEBUG 266 buffer[x] = 0; 267 xf86Msg(X_ERROR, "%s\n", buffer); 268#endif 269 dst += dstpitch; 270 msk += srcpitch; 271 } 272} 273 274void CG14Comp_Add32(Cg14Ptr p, 275 uint32_t src, uint32_t srcpitch, 276 uint32_t dst, uint32_t dstpitch, 277 int width, int height) 278{ 279 int line; 280 uint32_t srcx, dstx; 281 int full, part, x; 282 283 ENTER; 284 full = width >> 3; /* chunks of 8 */ 285 part = width & 7; /* leftovers */ 286 /* we do this up to 8 pixels at a time */ 287 for (line = 0; line < height; line++) { 288 srcx = src; 289 dstx = dst; 290 for (x = 0; x < full; x++) { 291 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 292 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ADDV(8, 40, 72, 15)); 295 write_sx_reg(p, SX_INSTRUCTIONS, 296 SX_ADDV(24, 56, 88, 15)); 297 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 298 srcx += 128; 299 dstx += 128; 300 } 301 302 /* do leftovers */ 303 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 304 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 305 if (part & 16) { 306 write_sx_reg(p, SX_INSTRUCTIONS, 307 SX_ADDV(8, 40, 72, 15)); 308 write_sx_reg(p, SX_INSTRUCTIONS, 309 SX_ADDV(24, 56, 88, part - 17)); 310 } else { 311 write_sx_reg(p, SX_INSTRUCTIONS, 312 SX_ADDV(8, 40, 72, part - 1)); 313 } 314 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 315 316 /* next line */ 317 src += srcpitch; 318 dst += dstpitch; 319 } 320} 321 322void CG14Comp_Add8(Cg14Ptr p, 323 uint32_t src, uint32_t srcpitch, 324 uint32_t dst, uint32_t dstpitch, 325 int width, int height) 326{ 327 int line; 328 uint32_t srcx, dstx, srcoff, dstoff; 329 int pre, full, part, x; 330 uint8_t *d; 331 char buffer[256]; 332 ENTER; 333 334 srcoff = src & 7; 335 src &= ~7; 336 dstoff = dst & 7; 337 dst &= ~7; 338 full = width >> 5; /* chunks of 32 */ 339 part = width & 31; /* leftovers */ 340 341#ifdef SX_DEBUG 342 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 343 width, height, full, part); 344#endif 345 /* we do this up to 32 pixels at a time */ 346 for (line = 0; line < height; line++) { 347 srcx = src; 348 dstx = dst; 349#ifdef SX_ADD_SOFTWARE 350 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 351 d = (uint8_t *)(p->fb + dstx + dstoff); 352 for (x = 0; x < width; x++) { 353 d[x] = min(255, s[x] + d[x]); 354 } 355#else 356 for (x = 0; x < full; x++) { 357 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 358 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 359 write_sx_reg(p, SX_INSTRUCTIONS, 360 SX_ADDV(8, 40, 72, 15)); 361 write_sx_reg(p, SX_INSTRUCTIONS, 362 SX_ADDV(24, 56, 88, 15)); 363 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 364 srcx += 32; 365 dstx += 32; 366 } 367 368 if (part > 0) { 369 /* do leftovers */ 370 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 371 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 372 if (part > 16) { 373 write_sx_reg(p, SX_INSTRUCTIONS, 374 SX_ADDV(8, 40, 72, 15)); 375 write_sx_reg(p, SX_INSTRUCTIONS, 376 SX_ADDV(24, 56, 88, part - 17)); 377 } else { 378 write_sx_reg(p, SX_INSTRUCTIONS, 379 SX_ADDV(8, 40, 72, part - 1)); 380 } 381 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 382 } 383#endif 384#ifdef SX_DEBUG 385 d = (uint8_t *)(p->fb + src + srcoff); 386 for (x = 0; x < width; x++) { 387 buffer[x] = c[d[x]>>5]; 388 } 389 buffer[x] = 0; 390 xf86Msg(X_ERROR, "%s\n", buffer); 391#endif 392 /* next line */ 393 src += srcpitch; 394 dst += dstpitch; 395 } 396} 397 398void CG14Comp_Add8_32(Cg14Ptr p, 399 uint32_t src, uint32_t srcpitch, 400 uint32_t dst, uint32_t dstpitch, 401 int width, int height) 402{ 403 int line; 404 uint32_t srcx, dstx, srcoff, dstoff; 405 int pre, full, part, x; 406 uint8_t *d; 407 char buffer[256]; 408 ENTER; 409 410 srcoff = src & 7; 411 src &= ~7; 412 dstoff = dst & 7; 413 dst &= ~7; 414 full = width >> 5; /* chunks of 32 */ 415 part = width & 31; /* leftovers */ 416 417#ifdef SX_DEBUG 418 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 419 width, height, full, part); 420#endif 421 /* we do this up to 32 pixels at a time */ 422 for (line = 0; line < height; line++) { 423 srcx = src; 424 dstx = dst; 425 for (x = 0; x < full; x++) { 426 /* load source bytes */ 427 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 428 /* load alpha from destination */ 429 write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 430 write_sx_reg(p, SX_INSTRUCTIONS, 431 SX_ADDV(8, 40, 72, 15)); 432 write_sx_reg(p, SX_INSTRUCTIONS, 433 SX_ADDV(24, 56, 88, 15)); 434 /* write clamped values back into dest alpha */ 435 write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 436 srcx += 32; 437 dstx += 128; 438 } 439 440 if (part > 0) { 441 /* do leftovers */ 442 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 443 write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 444 if (part > 16) { 445 write_sx_reg(p, SX_INSTRUCTIONS, 446 SX_ADDV(8, 40, 72, 15)); 447 write_sx_reg(p, SX_INSTRUCTIONS, 448 SX_ADDV(24, 56, 88, part - 17)); 449 } else { 450 write_sx_reg(p, SX_INSTRUCTIONS, 451 SX_ADDV(8, 40, 72, part - 1)); 452 } 453 write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 454 } 455#ifdef SX_DEBUG 456 d = (uint8_t *)(p->fb + src + srcoff); 457 for (x = 0; x < width; x++) { 458 buffer[x] = c[d[x]>>5]; 459 } 460 buffer[x] = 0; 461 xf86Msg(X_ERROR, "%s\n", buffer); 462#endif 463 /* next line */ 464 src += srcpitch; 465 dst += dstpitch; 466 } 467} 468 469void CG14Comp_Over32(Cg14Ptr p, 470 uint32_t src, uint32_t srcpitch, 471 uint32_t dst, uint32_t dstpitch, 472 int width, int height) 473{ 474 uint32_t srcx, dstx, m; 475 int line, x, i; 476 477 ENTER; 478 479 write_sx_reg(p, SX_QUEUED(8), 0xff); 480 for (line = 0; line < height; line++) { 481 srcx = src; 482 dstx = dst; 483 484 for (x = 0; x < width; x++) { 485 /* fetch source pixel */ 486 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 487 /* fetch dst pixel */ 488 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 489 /* src is premultiplied with alpha */ 490 /* write inverted alpha into SCAM */ 491 write_sx_reg(p, SX_INSTRUCTIONS, 492 SX_XORV(12, 8, R_SCAM, 0)); 493 /* dst * (1 - alpha) + R[13:15] */ 494 write_sx_reg(p, SX_INSTRUCTIONS, 495 SX_SAXP16X16SR8(20, 12, 24, 3)); 496 write_sx_io(p, dstx, 497 SX_STUQ0C(24, 0, dstx & 7)); 498 dstx += 4; 499 srcx += 4; 500 } 501 dst += dstpitch; 502 src += srcpitch; 503 } 504} 505 506void CG14Comp_Over32Mask(Cg14Ptr p, 507 uint32_t src, uint32_t srcpitch, 508 uint32_t msk, uint32_t mskpitch, 509 uint32_t dst, uint32_t dstpitch, 510 int width, int height) 511{ 512 uint32_t srcx, dstx, mskx, m; 513 int line, x, i; 514 515 ENTER; 516 517 write_sx_reg(p, SX_QUEUED(8), 0xff); 518 for (line = 0; line < height; line++) { 519 srcx = src; 520 mskx = msk; 521 dstx = dst; 522 523 for (x = 0; x < width; x++) { 524 /* fetch source pixel */ 525 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 526 /* fetch mask */ 527 write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 528 /* fetch dst pixel */ 529 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 530 /* stick mask alpha into SCAM */ 531 write_sx_reg(p, SX_INSTRUCTIONS, 532 SX_ORS(9, 0, R_SCAM, 0)); 533 /* apply mask */ 534 /* src is premultiplied with alpha */ 535 write_sx_reg(p, SX_INSTRUCTIONS, 536 SX_SAXP16X16SR8(12, 0, 16, 3)); 537 /* write inverted alpha into SCAM */ 538 write_sx_reg(p, SX_INSTRUCTIONS, 539 SX_XORV(16, 8, R_SCAM, 0)); 540 /* dst * (1 - alpha) + R[13:15] */ 541 write_sx_reg(p, SX_INSTRUCTIONS, 542 SX_SAXP16X16SR8(20, 16, 24, 3)); 543 write_sx_io(p, dstx, 544 SX_STUQ0C(24, 0, dstx & 7)); 545 srcx += 4; 546 mskx += 1; 547 dstx += 4; 548 } 549 src += srcpitch; 550 msk += mskpitch; 551 dst += dstpitch; 552 } 553} 554 555void CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 556 uint32_t src, uint32_t srcpitch, 557 uint32_t msk, uint32_t mskpitch, 558 uint32_t dst, uint32_t dstpitch, 559 int width, int height) 560{ 561 uint32_t srcx, dstx, mskx, m; 562 int line, x, i; 563 564 ENTER; 565 566 write_sx_reg(p, SX_QUEUED(8), 0xff); 567 for (line = 0; line < height; line++) { 568 srcx = src; 569 mskx = msk; 570 dstx = dst; 571 572 for (x = 0; x < width; x++) { 573 /* fetch source pixel */ 574 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 575 /* set src alpha to 0xff */ 576 write_sx_reg(p, SX_INSTRUCTIONS, 577 SX_ORS(8, 0, 12, 0)); 578 /* fetch mask */ 579 write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 580 /* fetch dst pixel */ 581 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 582 /* write alpha into SCAM */ 583 write_sx_reg(p, SX_INSTRUCTIONS, 584 SX_ORS(9, 0, R_SCAM, 0)); 585 /* src * alpha + R0 */ 586 write_sx_reg(p, SX_INSTRUCTIONS, 587 SX_SAXP16X16SR8(12, 0, 16, 3)); 588 /* write inverted alpha into SCAM */ 589 write_sx_reg(p, SX_INSTRUCTIONS, 590 SX_XORV(9, 8, R_SCAM, 0)); 591 /* dst * (1 - alpha) + R[13:15] */ 592 write_sx_reg(p, SX_INSTRUCTIONS, 593 SX_SAXP16X16SR8(20, 16, 24, 3)); 594 write_sx_io(p, dstx, 595 SX_STUQ0C(24, 0, dstx & 7)); 596 srcx += 4; 597 mskx += 1; 598 dstx += 4; 599 } 600 src += srcpitch; 601 msk += mskpitch; 602 dst += dstpitch; 603 } 604} 605 606void CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 607 uint32_t src, uint32_t srcpitch, 608 uint32_t msk, uint32_t mskpitch, 609 uint32_t dst, uint32_t dstpitch, 610 int width, int height) 611{ 612 uint32_t srcx, dstx, mskx, m; 613 int line, x, i; 614 615 ENTER; 616 617 write_sx_reg(p, SX_QUEUED(8), 0xff); 618 for (line = 0; line < height; line++) { 619 srcx = src; 620 mskx = msk; 621 dstx = dst; 622 623 for (x = 0; x < width; x++) { 624 /* fetch source pixel */ 625 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 626 /* fetch mask */ 627 write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7)); 628 /* fetch dst pixel */ 629 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 630 /* set src alpha to 0xff */ 631 write_sx_reg(p, SX_INSTRUCTIONS, 632 SX_ORS(8, 0, 12, 0)); 633 /* mask alpha to SCAM */ 634 write_sx_reg(p, SX_INSTRUCTIONS, 635 SX_ORS(16, 0, R_SCAM, 0)); 636 /* src * alpha */ 637 write_sx_reg(p, SX_INSTRUCTIONS, 638 SX_SAXP16X16SR8(12, 0, 24, 3)); 639 /* write inverted alpha into SCAM */ 640 write_sx_reg(p, SX_INSTRUCTIONS, 641 SX_XORS(16, 8, R_SCAM, 0)); 642 /* dst * (1 - alpha) + R[24:31] */ 643 write_sx_reg(p, SX_INSTRUCTIONS, 644 SX_SAXP16X16SR8(20, 24, 28, 3)); 645 write_sx_io(p, dstx, 646 SX_STUQ0C(28, 0, dstx & 7)); 647 srcx += 4; 648 mskx += 4; 649 dstx += 4; 650 } 651 src += srcpitch; 652 msk += mskpitch; 653 dst += dstpitch; 654 } 655} 656