cg14_render.c revision 78d1a11b
1/* $NetBSD: cg14_render.c,v 1.12 2017/12/08 22:49:37 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44#include <sparc/sxreg.h> 45 46/*#define SX_SINGLE*/ 47/*#define SX_RENDER_DEBUG*/ 48/*#define SX_ADD_SOFTWARE*/ 49 50#ifdef SX_RENDER_DEBUG 51#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52#define DPRINTF xf86Msg 53#else 54#define ENTER 55#define DPRINTF while (0) xf86Msg 56#endif 57 58char c[8] = " .,:+*oX"; 59 60 61void CG14Comp_Over32Solid(Cg14Ptr p, 62 uint32_t src, uint32_t srcpitch, 63 uint32_t dst, uint32_t dstpitch, 64 int width, int height) 65{ 66 uint32_t msk = src, mskx, dstx, m; 67 int line, x, i; 68 69 ENTER; 70 71 for (line = 0; line < height; line++) { 72 mskx = msk; 73 dstx = dst; 74#ifndef SX_SINGLE 75 int rest; 76 for (x = 0; x < width; x += 4) { 77 rest = width - x; 78 /* fetch 4 mask values */ 79 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 80 /* fetch destination pixels */ 81 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 82 /* duplicate them for all channels */ 83 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 84 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 85 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 86 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 87 /* generate inverted alpha */ 88 write_sx_reg(p, SX_INSTRUCTIONS, 89 SX_XORS(12, 8, 28, 15)); 90 /* multiply source */ 91 write_sx_reg(p, SX_INSTRUCTIONS, 92 SX_MUL16X16SR8(8, 12, 44, 3)); 93 write_sx_reg(p, SX_INSTRUCTIONS, 94 SX_MUL16X16SR8(8, 16, 48, 3)); 95 write_sx_reg(p, SX_INSTRUCTIONS, 96 SX_MUL16X16SR8(8, 20, 52, 3)); 97 write_sx_reg(p, SX_INSTRUCTIONS, 98 SX_MUL16X16SR8(8, 24, 56, 3)); 99 /* multiply dest */ 100 write_sx_reg(p, SX_INSTRUCTIONS, 101 SX_MUL16X16SR8(28, 60, 76, 15)); 102 /* add up */ 103 write_sx_reg(p, SX_INSTRUCTIONS, 104 SX_ADDV(44, 76, 92, 15)); 105 /* write back */ 106 if (rest < 4) { 107 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 108 } else { 109 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 110 } 111 dstx += 16; 112 mskx += 16; 113 } 114#else /* SX_SINGLE */ 115 for (x = 0; x < width; x++) { 116 m = *(volatile uint32_t *)(p->fb + mskx); 117 m = m >> 24; 118 if (m == 0) { 119 /* nothing to do - all transparent */ 120 } else if (m == 0xff) { 121 /* all opaque */ 122 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 123 } else { 124 /* fetch alpha value, stick it into scam */ 125 /* mask is in R[12:15] */ 126 /*write_sx_io(p, mskx, 127 SX_LDUQ0(12, 0, mskx & 7));*/ 128 write_sx_reg(p, SX_QUEUED(12), m); 129 /* fetch dst pixel */ 130 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 131 write_sx_reg(p, SX_INSTRUCTIONS, 132 SX_ORV(12, 0, R_SCAM, 0)); 133 /* 134 * src * alpha + R0 135 * R[9:11] * SCAM + R0 -> R[17:19] 136 */ 137 write_sx_reg(p, SX_INSTRUCTIONS, 138 SX_SAXP16X16SR8(9, 0, 17, 2)); 139 140 /* invert SCAM */ 141 write_sx_reg(p, SX_INSTRUCTIONS, 142 SX_XORV(12, 8, R_SCAM, 0)); 143#ifdef SX_DEBUG 144 write_sx_reg(p, SX_INSTRUCTIONS, 145 SX_XORV(12, 8, 13, 0)); 146#endif 147 /* dst * (1 - alpha) + R[13:15] */ 148 write_sx_reg(p, SX_INSTRUCTIONS, 149 SX_SAXP16X16SR8(21, 17, 25, 2)); 150 write_sx_io(p, dstx, 151 SX_STUQ0C(24, 0, dstx & 7)); 152 } 153 dstx += 4; 154 mskx += 4; 155 } 156#endif /* SX_SINGLE */ 157 dst += dstpitch; 158 msk += srcpitch; 159 } 160} 161 162void CG14Comp_Over8Solid(Cg14Ptr p, 163 uint32_t src, uint32_t srcpitch, 164 uint32_t dst, uint32_t dstpitch, 165 int width, int height) 166{ 167 uint32_t msk = src, mskx, dstx, m; 168 int line, x, i; 169#ifdef SX_DEBUG 170 char buffer[256]; 171#endif 172 ENTER; 173 174 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 175 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 176 *(uint32_t *)(p->fb + p->srcoff)); 177 for (line = 0; line < height; line++) { 178 mskx = msk; 179 dstx = dst; 180#ifndef SX_SINGLE 181 int rest; 182 for (x = 0; x < width; x += 4) { 183 rest = width - x; 184 /* fetch 4 mask values */ 185 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 186 /* fetch destination pixels */ 187 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 188 /* duplicate them for all channels */ 189 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 190 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 191 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 192 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 193 /* generate inverted alpha */ 194 write_sx_reg(p, SX_INSTRUCTIONS, 195 SX_XORS(12, 8, 28, 15)); 196 /* multiply source */ 197 write_sx_reg(p, SX_INSTRUCTIONS, 198 SX_MUL16X16SR8(8, 12, 44, 3)); 199 write_sx_reg(p, SX_INSTRUCTIONS, 200 SX_MUL16X16SR8(8, 16, 48, 3)); 201 write_sx_reg(p, SX_INSTRUCTIONS, 202 SX_MUL16X16SR8(8, 20, 52, 3)); 203 write_sx_reg(p, SX_INSTRUCTIONS, 204 SX_MUL16X16SR8(8, 24, 56, 3)); 205 /* multiply dest */ 206 write_sx_reg(p, SX_INSTRUCTIONS, 207 SX_MUL16X16SR8(28, 60, 76, 15)); 208 /* add up */ 209 write_sx_reg(p, SX_INSTRUCTIONS, 210 SX_ADDV(44, 76, 92, 15)); 211 /* write back */ 212 if (rest < 4) { 213 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 214 } else { 215 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 216 } 217 dstx += 16; 218 mskx += 4; 219 } 220#else /* SX_SINGLE */ 221 for (x = 0; x < width; x++) { 222 m = *(volatile uint8_t *)(p->fb + mskx); 223#ifdef SX_DEBUG 224 buffer[x] = c[m >> 5]; 225#endif 226 if (m == 0) { 227 /* nothing to do - all transparent */ 228 } else if (m == 0xff) { 229 /* all opaque */ 230 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 231 } else { 232 /* fetch alpha value, stick it into scam */ 233 /* mask is in R[12:15] */ 234 /*write_sx_io(p, mskx & ~7, 235 SX_LDB(12, 0, mskx & 7));*/ 236 write_sx_reg(p, SX_QUEUED(12), m); 237 /* fetch dst pixel */ 238 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 239 write_sx_reg(p, SX_INSTRUCTIONS, 240 SX_ORV(12, 0, R_SCAM, 0)); 241 /* 242 * src * alpha + R0 243 * R[9:11] * SCAM + R0 -> R[17:19] 244 */ 245 write_sx_reg(p, SX_INSTRUCTIONS, 246 SX_SAXP16X16SR8(9, 0, 17, 2)); 247 248 /* invert SCAM */ 249 write_sx_reg(p, SX_INSTRUCTIONS, 250 SX_XORV(12, 8, R_SCAM, 0)); 251#ifdef SX_DEBUG 252 write_sx_reg(p, SX_INSTRUCTIONS, 253 SX_XORV(12, 8, 13, 0)); 254#endif 255 /* dst * (1 - alpha) + R[13:15] */ 256 write_sx_reg(p, SX_INSTRUCTIONS, 257 SX_SAXP16X16SR8(21, 17, 25, 2)); 258 write_sx_io(p, dstx, 259 SX_STUQ0C(24, 0, dstx & 7)); 260 } 261 dstx += 4; 262 mskx += 1; 263 } 264#endif /* SX_SINGLE */ 265#ifdef SX_DEBUG 266 buffer[x] = 0; 267 xf86Msg(X_ERROR, "%s\n", buffer); 268#endif 269 dst += dstpitch; 270 msk += srcpitch; 271 } 272} 273 274void CG14Comp_Add32(Cg14Ptr p, 275 uint32_t src, uint32_t srcpitch, 276 uint32_t dst, uint32_t dstpitch, 277 int width, int height) 278{ 279 int line; 280 uint32_t srcx, dstx; 281 int full, part, x; 282 283 ENTER; 284 full = width >> 3; /* chunks of 8 */ 285 part = width & 7; /* leftovers */ 286 /* we do this up to 8 pixels at a time */ 287 for (line = 0; line < height; line++) { 288 srcx = src; 289 dstx = dst; 290 for (x = 0; x < full; x++) { 291 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 292 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 293 write_sx_reg(p, SX_INSTRUCTIONS, 294 SX_ADDV(8, 40, 72, 15)); 295 write_sx_reg(p, SX_INSTRUCTIONS, 296 SX_ADDV(24, 56, 88, 15)); 297 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 298 srcx += 128; 299 dstx += 128; 300 } 301 302 /* do leftovers */ 303 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 304 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 305 if (part & 16) { 306 write_sx_reg(p, SX_INSTRUCTIONS, 307 SX_ADDV(8, 40, 72, 15)); 308 write_sx_reg(p, SX_INSTRUCTIONS, 309 SX_ADDV(24, 56, 88, part - 17)); 310 } else { 311 write_sx_reg(p, SX_INSTRUCTIONS, 312 SX_ADDV(8, 40, 72, part - 1)); 313 } 314 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 315 316 /* next line */ 317 src += srcpitch; 318 dst += dstpitch; 319 } 320} 321 322void CG14Comp_Add8(Cg14Ptr p, 323 uint32_t src, uint32_t srcpitch, 324 uint32_t dst, uint32_t dstpitch, 325 int width, int height) 326{ 327 int line; 328 uint32_t srcx, dstx, srcoff, dstoff; 329 int pre, full, part, x; 330 uint8_t *d; 331 char buffer[256]; 332 ENTER; 333 334 srcoff = src & 7; 335 src &= ~7; 336 dstoff = dst & 7; 337 dst &= ~7; 338 full = width >> 5; /* chunks of 32 */ 339 part = width & 31; /* leftovers */ 340 341#ifdef SX_DEBUG 342 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 343 width, height, full, part); 344#endif 345 /* we do this up to 32 pixels at a time */ 346 for (line = 0; line < height; line++) { 347 srcx = src; 348 dstx = dst; 349#ifdef SX_ADD_SOFTWARE 350 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 351 d = (uint8_t *)(p->fb + dstx + dstoff); 352 for (x = 0; x < width; x++) { 353 d[x] = min(255, s[x] + d[x]); 354 } 355#else 356 for (x = 0; x < full; x++) { 357 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 358 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 359 write_sx_reg(p, SX_INSTRUCTIONS, 360 SX_ADDV(8, 40, 72, 15)); 361 write_sx_reg(p, SX_INSTRUCTIONS, 362 SX_ADDV(24, 56, 88, 15)); 363 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 364 srcx += 32; 365 dstx += 32; 366 } 367 368 if (part > 0) { 369 /* do leftovers */ 370 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 371 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 372 if (part > 16) { 373 write_sx_reg(p, SX_INSTRUCTIONS, 374 SX_ADDV(8, 40, 72, 15)); 375 write_sx_reg(p, SX_INSTRUCTIONS, 376 SX_ADDV(24, 56, 88, part - 17)); 377 } else { 378 write_sx_reg(p, SX_INSTRUCTIONS, 379 SX_ADDV(8, 40, 72, part - 1)); 380 } 381 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 382 } 383#endif 384#ifdef SX_DEBUG 385 d = (uint8_t *)(p->fb + src + srcoff); 386 for (x = 0; x < width; x++) { 387 buffer[x] = c[d[x]>>5]; 388 } 389 buffer[x] = 0; 390 xf86Msg(X_ERROR, "%s\n", buffer); 391#endif 392 /* next line */ 393 src += srcpitch; 394 dst += dstpitch; 395 } 396} 397 398void CG14Comp_Add8_32(Cg14Ptr p, 399 uint32_t src, uint32_t srcpitch, 400 uint32_t dst, uint32_t dstpitch, 401 int width, int height) 402{ 403 int line; 404 uint32_t srcx, dstx, srcoff, dstoff; 405 int pre, full, part, x; 406 uint8_t *d; 407 char buffer[256]; 408 ENTER; 409 410 srcoff = src & 7; 411 src &= ~7; 412 dstoff = dst & 7; 413 dst &= ~7; 414 full = width >> 5; /* chunks of 32 */ 415 part = width & 31; /* leftovers */ 416 417#ifdef SX_DEBUG 418 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 419 width, height, full, part); 420#endif 421 /* we do this up to 32 pixels at a time */ 422 for (line = 0; line < height; line++) { 423 srcx = src; 424 dstx = dst; 425 for (x = 0; x < full; x++) { 426 /* load source bytes */ 427 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 428 /* load alpha from destination */ 429 write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 430 write_sx_reg(p, SX_INSTRUCTIONS, 431 SX_ADDV(8, 40, 72, 15)); 432 write_sx_reg(p, SX_INSTRUCTIONS, 433 SX_ADDV(24, 56, 88, 15)); 434 /* write clamped values back into dest alpha */ 435 write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 436 srcx += 32; 437 dstx += 128; 438 } 439 440 if (part > 0) { 441 /* do leftovers */ 442 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 443 write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 444 if (part > 16) { 445 write_sx_reg(p, SX_INSTRUCTIONS, 446 SX_ADDV(8, 40, 72, 15)); 447 write_sx_reg(p, SX_INSTRUCTIONS, 448 SX_ADDV(24, 56, 88, part - 17)); 449 } else { 450 write_sx_reg(p, SX_INSTRUCTIONS, 451 SX_ADDV(8, 40, 72, part - 1)); 452 } 453 write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 454 } 455#ifdef SX_DEBUG 456 d = (uint8_t *)(p->fb + src + srcoff); 457 for (x = 0; x < width; x++) { 458 buffer[x] = c[d[x]>>5]; 459 } 460 buffer[x] = 0; 461 xf86Msg(X_ERROR, "%s\n", buffer); 462#endif 463 /* next line */ 464 src += srcpitch; 465 dst += dstpitch; 466 } 467} 468 469void CG14Comp_Over32(Cg14Ptr p, 470 uint32_t src, uint32_t srcpitch, 471 uint32_t dst, uint32_t dstpitch, 472 int width, int height, int flip) 473{ 474 uint32_t srcx, dstx, mskx, m; 475 int line, x, i, num; 476 477 ENTER; 478 479 write_sx_reg(p, SX_QUEUED(8), 0xff); 480 for (line = 0; line < height; line++) { 481 srcx = src; 482 dstx = dst; 483 484 for (x = 0; x < width; x += 4) { 485 /* we do up to 4 pixels at a time */ 486 num = min(4, width - x); 487 if (num <= 0) { 488 xf86Msg(X_ERROR, "wtf?!\n"); 489 continue; 490 } 491 /* fetch source pixels */ 492 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 493 if (flip) { 494 write_sx_reg(p, SX_INSTRUCTIONS, 495 SX_GATHER(13, 4, 40, num - 1)); 496 write_sx_reg(p, SX_INSTRUCTIONS, 497 SX_GATHER(15, 4, 44, num - 1)); 498 write_sx_reg(p, SX_INSTRUCTIONS, 499 SX_SCATTER(40, 4, 15, num - 1)); 500 write_sx_reg(p, SX_INSTRUCTIONS, 501 SX_SCATTER(44, 4, 13, num - 1)); 502 } 503 /* fetch dst pixels */ 504 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 505 /* now process up to 4 pixels */ 506 for (i = 0; i < num; i++) { 507 int ii = i << 2; 508 /* write inverted alpha into SCAM */ 509 write_sx_reg(p, SX_INSTRUCTIONS, 510 SX_XORS(12 + ii, 8, R_SCAM, 0)); 511 /* dst * (1 - alpha) + src */ 512 write_sx_reg(p, SX_INSTRUCTIONS, 513 SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3)); 514 } 515 write_sx_io(p, dstx, 516 SX_STUQ0C(76, num - 1, dstx & 7)); 517 srcx += 16; 518 dstx += 16; 519 } 520 src += srcpitch; 521 dst += dstpitch; 522 } 523} 524 525void CG14Comp_Over32Mask(Cg14Ptr p, 526 uint32_t src, uint32_t srcpitch, 527 uint32_t msk, uint32_t mskpitch, 528 uint32_t dst, uint32_t dstpitch, 529 int width, int height, int flip) 530{ 531 uint32_t srcx, dstx, mskx, m; 532 int line, x, i, num; 533 534 ENTER; 535 536 write_sx_reg(p, SX_QUEUED(8), 0xff); 537 for (line = 0; line < height; line++) { 538 srcx = src; 539 mskx = msk; 540 dstx = dst; 541 542 for (x = 0; x < width; x += 4) { 543 /* we do up to 4 pixels at a time */ 544 num = min(4, width - x); 545 if (num <= 0) { 546 xf86Msg(X_ERROR, "wtf?!\n"); 547 continue; 548 } 549 /* fetch source pixels */ 550 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 551 if (flip) { 552 write_sx_reg(p, SX_INSTRUCTIONS, 553 SX_GATHER(13, 4, 40, num - 1)); 554 write_sx_reg(p, SX_INSTRUCTIONS, 555 SX_GATHER(15, 4, 44, num - 1)); 556 write_sx_reg(p, SX_INSTRUCTIONS, 557 SX_SCATTER(40, 4, 15, num - 1)); 558 write_sx_reg(p, SX_INSTRUCTIONS, 559 SX_SCATTER(44, 4, 13, num - 1)); 560 } 561 /* fetch mask */ 562 write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); 563 /* fetch dst pixels */ 564 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 565 /* now process up to 4 pixels */ 566 for (i = 0; i < num; i++) { 567 int ii = i << 2; 568 /* mask alpha to SCAM */ 569 write_sx_reg(p, SX_INSTRUCTIONS, 570 SX_ORS(28 + i, 0, R_SCAM, 0)); 571 /* src * alpha */ 572 write_sx_reg(p, SX_INSTRUCTIONS, 573 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 574 /* write inverted alpha into SCAM */ 575 write_sx_reg(p, SX_INSTRUCTIONS, 576 SX_XORS(28 + i, 8, R_SCAM, 0)); 577 /* dst * (1 - alpha) + R[60:] */ 578 write_sx_reg(p, SX_INSTRUCTIONS, 579 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 580 } 581 write_sx_io(p, dstx, 582 SX_STUQ0C(76, num - 1, dstx & 7)); 583 srcx += 16; 584 mskx += 4; 585 dstx += 16; 586 } 587 src += srcpitch; 588 msk += mskpitch; 589 dst += dstpitch; 590 } 591} 592 593void CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 594 uint32_t src, uint32_t srcpitch, 595 uint32_t msk, uint32_t mskpitch, 596 uint32_t dst, uint32_t dstpitch, 597 int width, int height, int flip) 598{ 599 uint32_t srcx, dstx, mskx, m; 600 int line, x, i, num; 601 602 ENTER; 603 604 write_sx_reg(p, SX_QUEUED(8), 0xff); 605 write_sx_reg(p, SX_QUEUED(9), 0xff); 606 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); 607 for (line = 0; line < height; line++) { 608 srcx = src; 609 mskx = msk; 610 dstx = dst; 611 612 for (x = 0; x < width; x += 4) { 613 /* we do up to 4 pixels at a time */ 614 num = min(4, width - x); 615 if (num <= 0) { 616 xf86Msg(X_ERROR, "wtf?!\n"); 617 continue; 618 } 619 /* fetch source pixels */ 620 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 621 if (flip) { 622 write_sx_reg(p, SX_INSTRUCTIONS, 623 SX_GATHER(13, 4, 40, num - 1)); 624 write_sx_reg(p, SX_INSTRUCTIONS, 625 SX_GATHER(15, 4, 44, num - 1)); 626 write_sx_reg(p, SX_INSTRUCTIONS, 627 SX_SCATTER(40, 4, 15, num - 1)); 628 write_sx_reg(p, SX_INSTRUCTIONS, 629 SX_SCATTER(44, 4, 13, num - 1)); 630 } 631 /* fetch mask */ 632 write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); 633 /* fetch dst pixels */ 634 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 635 /* set src alpha to 0xff */ 636 write_sx_reg(p, SX_INSTRUCTIONS, 637 SX_SCATTER(8, 4, 12, num - 1)); 638 /* now process up to 4 pixels */ 639 for (i = 0; i < num; i++) { 640 int ii = i << 2; 641 /* mask alpha to SCAM */ 642 write_sx_reg(p, SX_INSTRUCTIONS, 643 SX_ORS(28 + i, 0, R_SCAM, 0)); 644 /* src * alpha */ 645 write_sx_reg(p, SX_INSTRUCTIONS, 646 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 647 /* write inverted alpha into SCAM */ 648 write_sx_reg(p, SX_INSTRUCTIONS, 649 SX_XORS(28 + i, 8, R_SCAM, 0)); 650 /* dst * (1 - alpha) + R[60:] */ 651 write_sx_reg(p, SX_INSTRUCTIONS, 652 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 653 } 654 write_sx_io(p, dstx, 655 SX_STUQ0C(76, num - 1, dstx & 7)); 656 srcx += 16; 657 mskx += 4; 658 dstx += 16; 659 } 660 src += srcpitch; 661 msk += mskpitch; 662 dst += dstpitch; 663 } 664} 665 666void CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 667 uint32_t src, uint32_t srcpitch, 668 uint32_t msk, uint32_t mskpitch, 669 uint32_t dst, uint32_t dstpitch, 670 int width, int height, int flip) 671{ 672 uint32_t srcx, dstx, mskx, m; 673 int line, x, i, num; 674 675 ENTER; 676 677 write_sx_reg(p, SX_QUEUED(8), 0xff); 678 write_sx_reg(p, SX_QUEUED(9), 0xff); 679 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); 680 for (line = 0; line < height; line++) { 681 srcx = src; 682 mskx = msk; 683 dstx = dst; 684 685 for (x = 0; x < width; x += 4) { 686 /* we do up to 4 pixels at a time */ 687 num = min(4, width - x); 688 if (num <= 0) { 689 xf86Msg(X_ERROR, "wtf?!\n"); 690 continue; 691 } 692 /* fetch source pixels */ 693 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 694 if (flip) { 695 write_sx_reg(p, SX_INSTRUCTIONS, 696 SX_GATHER(13, 4, 40, num - 1)); 697 write_sx_reg(p, SX_INSTRUCTIONS, 698 SX_GATHER(15, 4, 44, num - 1)); 699 write_sx_reg(p, SX_INSTRUCTIONS, 700 SX_SCATTER(40, 4, 15, num - 1)); 701 write_sx_reg(p, SX_INSTRUCTIONS, 702 SX_SCATTER(44, 4, 13, num - 1)); 703 } 704 /* fetch mask */ 705 write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7)); 706 /* fetch dst pixels */ 707 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 708 /* set src alpha to 0xff */ 709 write_sx_reg(p, SX_INSTRUCTIONS, 710 SX_SCATTER(8, 4, 12, num - 1)); 711 /* now process up to 4 pixels */ 712 for (i = 0; i < num; i++) { 713 int ii = i << 2; 714 /* mask alpha to SCAM */ 715 write_sx_reg(p, SX_INSTRUCTIONS, 716 SX_ORS(28 + ii, 0, R_SCAM, 0)); 717 /* src * alpha */ 718 write_sx_reg(p, SX_INSTRUCTIONS, 719 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 720 /* write inverted alpha into SCAM */ 721 write_sx_reg(p, SX_INSTRUCTIONS, 722 SX_XORS(28 + ii, 8, R_SCAM, 0)); 723 /* dst * (1 - alpha) + R[60:] */ 724 write_sx_reg(p, SX_INSTRUCTIONS, 725 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 726 } 727 write_sx_io(p, dstx, 728 SX_STUQ0C(76, num - 1, dstx & 7)); 729 srcx += 16; 730 mskx += 16; 731 dstx += 16; 732 } 733 src += srcpitch; 734 msk += mskpitch; 735 dst += dstpitch; 736 } 737} 738