cg14_render.c revision c2193d98
1/* $NetBSD: cg14_render.c,v 1.13 2019/07/24 16:07:59 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <sys/types.h> 37 38/* all driver need this */ 39#include "xf86.h" 40#include "xf86_OSproc.h" 41#include "compiler.h" 42 43#include "cg14.h" 44 45/*#define SX_SINGLE*/ 46/*#define SX_RENDER_DEBUG*/ 47/*#define SX_ADD_SOFTWARE*/ 48 49#ifdef SX_RENDER_DEBUG 50#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 51#define DPRINTF xf86Msg 52#else 53#define ENTER 54#define DPRINTF while (0) xf86Msg 55#endif 56 57char c[8] = " .,:+*oX"; 58 59 60void CG14Comp_Over32Solid(Cg14Ptr p, 61 uint32_t src, uint32_t srcpitch, 62 uint32_t dst, uint32_t dstpitch, 63 int width, int height) 64{ 65 uint32_t msk = src, mskx, dstx, m; 66 int line, x, i; 67 68 ENTER; 69 70 for (line = 0; line < height; line++) { 71 mskx = msk; 72 dstx = dst; 73#ifndef SX_SINGLE 74 int rest; 75 for (x = 0; x < width; x += 4) { 76 rest = width - x; 77 /* fetch 4 mask values */ 78 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 79 /* fetch destination pixels */ 80 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 81 /* duplicate them for all channels */ 82 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 83 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 84 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 85 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 86 /* generate inverted alpha */ 87 write_sx_reg(p, SX_INSTRUCTIONS, 88 SX_XORS(12, 8, 28, 15)); 89 /* multiply source */ 90 write_sx_reg(p, SX_INSTRUCTIONS, 91 SX_MUL16X16SR8(8, 12, 44, 3)); 92 write_sx_reg(p, SX_INSTRUCTIONS, 93 SX_MUL16X16SR8(8, 16, 48, 3)); 94 write_sx_reg(p, SX_INSTRUCTIONS, 95 SX_MUL16X16SR8(8, 20, 52, 3)); 96 write_sx_reg(p, SX_INSTRUCTIONS, 97 SX_MUL16X16SR8(8, 24, 56, 3)); 98 /* multiply dest */ 99 write_sx_reg(p, SX_INSTRUCTIONS, 100 SX_MUL16X16SR8(28, 60, 76, 15)); 101 /* add up */ 102 write_sx_reg(p, SX_INSTRUCTIONS, 103 SX_ADDV(44, 76, 92, 15)); 104 /* write back */ 105 if (rest < 4) { 106 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 107 } else { 108 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 109 } 110 dstx += 16; 111 mskx += 16; 112 } 113#else /* SX_SINGLE */ 114 for (x = 0; x < width; x++) { 115 m = *(volatile uint32_t *)(p->fb + mskx); 116 m = m >> 24; 117 if (m == 0) { 118 /* nothing to do - all transparent */ 119 } else if (m == 0xff) { 120 /* all opaque */ 121 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 122 } else { 123 /* fetch alpha value, stick it into scam */ 124 /* mask is in R[12:15] */ 125 /*write_sx_io(p, mskx, 126 SX_LDUQ0(12, 0, mskx & 7));*/ 127 write_sx_reg(p, SX_QUEUED(12), m); 128 /* fetch dst pixel */ 129 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 130 write_sx_reg(p, SX_INSTRUCTIONS, 131 SX_ORV(12, 0, R_SCAM, 0)); 132 /* 133 * src * alpha + R0 134 * R[9:11] * SCAM + R0 -> R[17:19] 135 */ 136 write_sx_reg(p, SX_INSTRUCTIONS, 137 SX_SAXP16X16SR8(9, 0, 17, 2)); 138 139 /* invert SCAM */ 140 write_sx_reg(p, SX_INSTRUCTIONS, 141 SX_XORV(12, 8, R_SCAM, 0)); 142#ifdef SX_DEBUG 143 write_sx_reg(p, SX_INSTRUCTIONS, 144 SX_XORV(12, 8, 13, 0)); 145#endif 146 /* dst * (1 - alpha) + R[13:15] */ 147 write_sx_reg(p, SX_INSTRUCTIONS, 148 SX_SAXP16X16SR8(21, 17, 25, 2)); 149 write_sx_io(p, dstx, 150 SX_STUQ0C(24, 0, dstx & 7)); 151 } 152 dstx += 4; 153 mskx += 4; 154 } 155#endif /* SX_SINGLE */ 156 dst += dstpitch; 157 msk += srcpitch; 158 } 159} 160 161void CG14Comp_Over8Solid(Cg14Ptr p, 162 uint32_t src, uint32_t srcpitch, 163 uint32_t dst, uint32_t dstpitch, 164 int width, int height) 165{ 166 uint32_t msk = src, mskx, dstx, m; 167 int line, x, i; 168#ifdef SX_DEBUG 169 char buffer[256]; 170#endif 171 ENTER; 172 173 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 174 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 175 *(uint32_t *)(p->fb + p->srcoff)); 176 for (line = 0; line < height; line++) { 177 mskx = msk; 178 dstx = dst; 179#ifndef SX_SINGLE 180 int rest; 181 for (x = 0; x < width; x += 4) { 182 rest = width - x; 183 /* fetch 4 mask values */ 184 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 185 /* fetch destination pixels */ 186 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 187 /* duplicate them for all channels */ 188 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 189 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 190 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 191 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 192 /* generate inverted alpha */ 193 write_sx_reg(p, SX_INSTRUCTIONS, 194 SX_XORS(12, 8, 28, 15)); 195 /* multiply source */ 196 write_sx_reg(p, SX_INSTRUCTIONS, 197 SX_MUL16X16SR8(8, 12, 44, 3)); 198 write_sx_reg(p, SX_INSTRUCTIONS, 199 SX_MUL16X16SR8(8, 16, 48, 3)); 200 write_sx_reg(p, SX_INSTRUCTIONS, 201 SX_MUL16X16SR8(8, 20, 52, 3)); 202 write_sx_reg(p, SX_INSTRUCTIONS, 203 SX_MUL16X16SR8(8, 24, 56, 3)); 204 /* multiply dest */ 205 write_sx_reg(p, SX_INSTRUCTIONS, 206 SX_MUL16X16SR8(28, 60, 76, 15)); 207 /* add up */ 208 write_sx_reg(p, SX_INSTRUCTIONS, 209 SX_ADDV(44, 76, 92, 15)); 210 /* write back */ 211 if (rest < 4) { 212 write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 213 } else { 214 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 215 } 216 dstx += 16; 217 mskx += 4; 218 } 219#else /* SX_SINGLE */ 220 for (x = 0; x < width; x++) { 221 m = *(volatile uint8_t *)(p->fb + mskx); 222#ifdef SX_DEBUG 223 buffer[x] = c[m >> 5]; 224#endif 225 if (m == 0) { 226 /* nothing to do - all transparent */ 227 } else if (m == 0xff) { 228 /* all opaque */ 229 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 230 } else { 231 /* fetch alpha value, stick it into scam */ 232 /* mask is in R[12:15] */ 233 /*write_sx_io(p, mskx & ~7, 234 SX_LDB(12, 0, mskx & 7));*/ 235 write_sx_reg(p, SX_QUEUED(12), m); 236 /* fetch dst pixel */ 237 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 238 write_sx_reg(p, SX_INSTRUCTIONS, 239 SX_ORV(12, 0, R_SCAM, 0)); 240 /* 241 * src * alpha + R0 242 * R[9:11] * SCAM + R0 -> R[17:19] 243 */ 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_SAXP16X16SR8(9, 0, 17, 2)); 246 247 /* invert SCAM */ 248 write_sx_reg(p, SX_INSTRUCTIONS, 249 SX_XORV(12, 8, R_SCAM, 0)); 250#ifdef SX_DEBUG 251 write_sx_reg(p, SX_INSTRUCTIONS, 252 SX_XORV(12, 8, 13, 0)); 253#endif 254 /* dst * (1 - alpha) + R[13:15] */ 255 write_sx_reg(p, SX_INSTRUCTIONS, 256 SX_SAXP16X16SR8(21, 17, 25, 2)); 257 write_sx_io(p, dstx, 258 SX_STUQ0C(24, 0, dstx & 7)); 259 } 260 dstx += 4; 261 mskx += 1; 262 } 263#endif /* SX_SINGLE */ 264#ifdef SX_DEBUG 265 buffer[x] = 0; 266 xf86Msg(X_ERROR, "%s\n", buffer); 267#endif 268 dst += dstpitch; 269 msk += srcpitch; 270 } 271} 272 273void CG14Comp_Add32(Cg14Ptr p, 274 uint32_t src, uint32_t srcpitch, 275 uint32_t dst, uint32_t dstpitch, 276 int width, int height) 277{ 278 int line; 279 uint32_t srcx, dstx; 280 int full, part, x; 281 282 ENTER; 283 full = width >> 3; /* chunks of 8 */ 284 part = width & 7; /* leftovers */ 285 /* we do this up to 8 pixels at a time */ 286 for (line = 0; line < height; line++) { 287 srcx = src; 288 dstx = dst; 289 for (x = 0; x < full; x++) { 290 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 291 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 292 write_sx_reg(p, SX_INSTRUCTIONS, 293 SX_ADDV(8, 40, 72, 15)); 294 write_sx_reg(p, SX_INSTRUCTIONS, 295 SX_ADDV(24, 56, 88, 15)); 296 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 297 srcx += 128; 298 dstx += 128; 299 } 300 301 /* do leftovers */ 302 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 303 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 304 if (part & 16) { 305 write_sx_reg(p, SX_INSTRUCTIONS, 306 SX_ADDV(8, 40, 72, 15)); 307 write_sx_reg(p, SX_INSTRUCTIONS, 308 SX_ADDV(24, 56, 88, part - 17)); 309 } else { 310 write_sx_reg(p, SX_INSTRUCTIONS, 311 SX_ADDV(8, 40, 72, part - 1)); 312 } 313 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 314 315 /* next line */ 316 src += srcpitch; 317 dst += dstpitch; 318 } 319} 320 321void CG14Comp_Add8(Cg14Ptr p, 322 uint32_t src, uint32_t srcpitch, 323 uint32_t dst, uint32_t dstpitch, 324 int width, int height) 325{ 326 int line; 327 uint32_t srcx, dstx, srcoff, dstoff; 328 int pre, full, part, x; 329 uint8_t *d; 330 char buffer[256]; 331 ENTER; 332 333 srcoff = src & 7; 334 src &= ~7; 335 dstoff = dst & 7; 336 dst &= ~7; 337 full = width >> 5; /* chunks of 32 */ 338 part = width & 31; /* leftovers */ 339 340#ifdef SX_DEBUG 341 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 342 width, height, full, part); 343#endif 344 /* we do this up to 32 pixels at a time */ 345 for (line = 0; line < height; line++) { 346 srcx = src; 347 dstx = dst; 348#ifdef SX_ADD_SOFTWARE 349 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 350 d = (uint8_t *)(p->fb + dstx + dstoff); 351 for (x = 0; x < width; x++) { 352 d[x] = min(255, s[x] + d[x]); 353 } 354#else 355 for (x = 0; x < full; x++) { 356 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 357 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 358 write_sx_reg(p, SX_INSTRUCTIONS, 359 SX_ADDV(8, 40, 72, 15)); 360 write_sx_reg(p, SX_INSTRUCTIONS, 361 SX_ADDV(24, 56, 88, 15)); 362 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 363 srcx += 32; 364 dstx += 32; 365 } 366 367 if (part > 0) { 368 /* do leftovers */ 369 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 370 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 371 if (part > 16) { 372 write_sx_reg(p, SX_INSTRUCTIONS, 373 SX_ADDV(8, 40, 72, 15)); 374 write_sx_reg(p, SX_INSTRUCTIONS, 375 SX_ADDV(24, 56, 88, part - 17)); 376 } else { 377 write_sx_reg(p, SX_INSTRUCTIONS, 378 SX_ADDV(8, 40, 72, part - 1)); 379 } 380 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 381 } 382#endif 383#ifdef SX_DEBUG 384 d = (uint8_t *)(p->fb + src + srcoff); 385 for (x = 0; x < width; x++) { 386 buffer[x] = c[d[x]>>5]; 387 } 388 buffer[x] = 0; 389 xf86Msg(X_ERROR, "%s\n", buffer); 390#endif 391 /* next line */ 392 src += srcpitch; 393 dst += dstpitch; 394 } 395} 396 397void CG14Comp_Add8_32(Cg14Ptr p, 398 uint32_t src, uint32_t srcpitch, 399 uint32_t dst, uint32_t dstpitch, 400 int width, int height) 401{ 402 int line; 403 uint32_t srcx, dstx, srcoff, dstoff; 404 int pre, full, part, x; 405 uint8_t *d; 406 char buffer[256]; 407 ENTER; 408 409 srcoff = src & 7; 410 src &= ~7; 411 dstoff = dst & 7; 412 dst &= ~7; 413 full = width >> 5; /* chunks of 32 */ 414 part = width & 31; /* leftovers */ 415 416#ifdef SX_DEBUG 417 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 418 width, height, full, part); 419#endif 420 /* we do this up to 32 pixels at a time */ 421 for (line = 0; line < height; line++) { 422 srcx = src; 423 dstx = dst; 424 for (x = 0; x < full; x++) { 425 /* load source bytes */ 426 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 427 /* load alpha from destination */ 428 write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 429 write_sx_reg(p, SX_INSTRUCTIONS, 430 SX_ADDV(8, 40, 72, 15)); 431 write_sx_reg(p, SX_INSTRUCTIONS, 432 SX_ADDV(24, 56, 88, 15)); 433 /* write clamped values back into dest alpha */ 434 write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 435 srcx += 32; 436 dstx += 128; 437 } 438 439 if (part > 0) { 440 /* do leftovers */ 441 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 442 write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 443 if (part > 16) { 444 write_sx_reg(p, SX_INSTRUCTIONS, 445 SX_ADDV(8, 40, 72, 15)); 446 write_sx_reg(p, SX_INSTRUCTIONS, 447 SX_ADDV(24, 56, 88, part - 17)); 448 } else { 449 write_sx_reg(p, SX_INSTRUCTIONS, 450 SX_ADDV(8, 40, 72, part - 1)); 451 } 452 write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 453 } 454#ifdef SX_DEBUG 455 d = (uint8_t *)(p->fb + src + srcoff); 456 for (x = 0; x < width; x++) { 457 buffer[x] = c[d[x]>>5]; 458 } 459 buffer[x] = 0; 460 xf86Msg(X_ERROR, "%s\n", buffer); 461#endif 462 /* next line */ 463 src += srcpitch; 464 dst += dstpitch; 465 } 466} 467 468void CG14Comp_Over32(Cg14Ptr p, 469 uint32_t src, uint32_t srcpitch, 470 uint32_t dst, uint32_t dstpitch, 471 int width, int height, int flip) 472{ 473 uint32_t srcx, dstx, mskx, m; 474 int line, x, i, num; 475 476 ENTER; 477 478 write_sx_reg(p, SX_QUEUED(8), 0xff); 479 for (line = 0; line < height; line++) { 480 srcx = src; 481 dstx = dst; 482 483 for (x = 0; x < width; x += 4) { 484 /* we do up to 4 pixels at a time */ 485 num = min(4, width - x); 486 if (num <= 0) { 487 xf86Msg(X_ERROR, "wtf?!\n"); 488 continue; 489 } 490 /* fetch source pixels */ 491 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 492 if (flip) { 493 write_sx_reg(p, SX_INSTRUCTIONS, 494 SX_GATHER(13, 4, 40, num - 1)); 495 write_sx_reg(p, SX_INSTRUCTIONS, 496 SX_GATHER(15, 4, 44, num - 1)); 497 write_sx_reg(p, SX_INSTRUCTIONS, 498 SX_SCATTER(40, 4, 15, num - 1)); 499 write_sx_reg(p, SX_INSTRUCTIONS, 500 SX_SCATTER(44, 4, 13, num - 1)); 501 } 502 /* fetch dst pixels */ 503 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 504 /* now process up to 4 pixels */ 505 for (i = 0; i < num; i++) { 506 int ii = i << 2; 507 /* write inverted alpha into SCAM */ 508 write_sx_reg(p, SX_INSTRUCTIONS, 509 SX_XORS(12 + ii, 8, R_SCAM, 0)); 510 /* dst * (1 - alpha) + src */ 511 write_sx_reg(p, SX_INSTRUCTIONS, 512 SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3)); 513 } 514 write_sx_io(p, dstx, 515 SX_STUQ0C(76, num - 1, dstx & 7)); 516 srcx += 16; 517 dstx += 16; 518 } 519 src += srcpitch; 520 dst += dstpitch; 521 } 522} 523 524void CG14Comp_Over32Mask(Cg14Ptr p, 525 uint32_t src, uint32_t srcpitch, 526 uint32_t msk, uint32_t mskpitch, 527 uint32_t dst, uint32_t dstpitch, 528 int width, int height, int flip) 529{ 530 uint32_t srcx, dstx, mskx, m; 531 int line, x, i, num; 532 533 ENTER; 534 535 write_sx_reg(p, SX_QUEUED(8), 0xff); 536 for (line = 0; line < height; line++) { 537 srcx = src; 538 mskx = msk; 539 dstx = dst; 540 541 for (x = 0; x < width; x += 4) { 542 /* we do up to 4 pixels at a time */ 543 num = min(4, width - x); 544 if (num <= 0) { 545 xf86Msg(X_ERROR, "wtf?!\n"); 546 continue; 547 } 548 /* fetch source pixels */ 549 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 550 if (flip) { 551 write_sx_reg(p, SX_INSTRUCTIONS, 552 SX_GATHER(13, 4, 40, num - 1)); 553 write_sx_reg(p, SX_INSTRUCTIONS, 554 SX_GATHER(15, 4, 44, num - 1)); 555 write_sx_reg(p, SX_INSTRUCTIONS, 556 SX_SCATTER(40, 4, 15, num - 1)); 557 write_sx_reg(p, SX_INSTRUCTIONS, 558 SX_SCATTER(44, 4, 13, num - 1)); 559 } 560 /* fetch mask */ 561 write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); 562 /* fetch dst pixels */ 563 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 564 /* now process up to 4 pixels */ 565 for (i = 0; i < num; i++) { 566 int ii = i << 2; 567 /* mask alpha to SCAM */ 568 write_sx_reg(p, SX_INSTRUCTIONS, 569 SX_ORS(28 + i, 0, R_SCAM, 0)); 570 /* src * alpha */ 571 write_sx_reg(p, SX_INSTRUCTIONS, 572 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 573 /* write inverted alpha into SCAM */ 574 write_sx_reg(p, SX_INSTRUCTIONS, 575 SX_XORS(28 + i, 8, R_SCAM, 0)); 576 /* dst * (1 - alpha) + R[60:] */ 577 write_sx_reg(p, SX_INSTRUCTIONS, 578 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 579 } 580 write_sx_io(p, dstx, 581 SX_STUQ0C(76, num - 1, dstx & 7)); 582 srcx += 16; 583 mskx += 4; 584 dstx += 16; 585 } 586 src += srcpitch; 587 msk += mskpitch; 588 dst += dstpitch; 589 } 590} 591 592void CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 593 uint32_t src, uint32_t srcpitch, 594 uint32_t msk, uint32_t mskpitch, 595 uint32_t dst, uint32_t dstpitch, 596 int width, int height, int flip) 597{ 598 uint32_t srcx, dstx, mskx, m; 599 int line, x, i, num; 600 601 ENTER; 602 603 write_sx_reg(p, SX_QUEUED(8), 0xff); 604 write_sx_reg(p, SX_QUEUED(9), 0xff); 605 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); 606 for (line = 0; line < height; line++) { 607 srcx = src; 608 mskx = msk; 609 dstx = dst; 610 611 for (x = 0; x < width; x += 4) { 612 /* we do up to 4 pixels at a time */ 613 num = min(4, width - x); 614 if (num <= 0) { 615 xf86Msg(X_ERROR, "wtf?!\n"); 616 continue; 617 } 618 /* fetch source pixels */ 619 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 620 if (flip) { 621 write_sx_reg(p, SX_INSTRUCTIONS, 622 SX_GATHER(13, 4, 40, num - 1)); 623 write_sx_reg(p, SX_INSTRUCTIONS, 624 SX_GATHER(15, 4, 44, num - 1)); 625 write_sx_reg(p, SX_INSTRUCTIONS, 626 SX_SCATTER(40, 4, 15, num - 1)); 627 write_sx_reg(p, SX_INSTRUCTIONS, 628 SX_SCATTER(44, 4, 13, num - 1)); 629 } 630 /* fetch mask */ 631 write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); 632 /* fetch dst pixels */ 633 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 634 /* set src alpha to 0xff */ 635 write_sx_reg(p, SX_INSTRUCTIONS, 636 SX_SCATTER(8, 4, 12, num - 1)); 637 /* now process up to 4 pixels */ 638 for (i = 0; i < num; i++) { 639 int ii = i << 2; 640 /* mask alpha to SCAM */ 641 write_sx_reg(p, SX_INSTRUCTIONS, 642 SX_ORS(28 + i, 0, R_SCAM, 0)); 643 /* src * alpha */ 644 write_sx_reg(p, SX_INSTRUCTIONS, 645 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 646 /* write inverted alpha into SCAM */ 647 write_sx_reg(p, SX_INSTRUCTIONS, 648 SX_XORS(28 + i, 8, R_SCAM, 0)); 649 /* dst * (1 - alpha) + R[60:] */ 650 write_sx_reg(p, SX_INSTRUCTIONS, 651 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 652 } 653 write_sx_io(p, dstx, 654 SX_STUQ0C(76, num - 1, dstx & 7)); 655 srcx += 16; 656 mskx += 4; 657 dstx += 16; 658 } 659 src += srcpitch; 660 msk += mskpitch; 661 dst += dstpitch; 662 } 663} 664 665void CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 666 uint32_t src, uint32_t srcpitch, 667 uint32_t msk, uint32_t mskpitch, 668 uint32_t dst, uint32_t dstpitch, 669 int width, int height, int flip) 670{ 671 uint32_t srcx, dstx, mskx, m; 672 int line, x, i, num; 673 674 ENTER; 675 676 write_sx_reg(p, SX_QUEUED(8), 0xff); 677 write_sx_reg(p, SX_QUEUED(9), 0xff); 678 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); 679 for (line = 0; line < height; line++) { 680 srcx = src; 681 mskx = msk; 682 dstx = dst; 683 684 for (x = 0; x < width; x += 4) { 685 /* we do up to 4 pixels at a time */ 686 num = min(4, width - x); 687 if (num <= 0) { 688 xf86Msg(X_ERROR, "wtf?!\n"); 689 continue; 690 } 691 /* fetch source pixels */ 692 write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); 693 if (flip) { 694 write_sx_reg(p, SX_INSTRUCTIONS, 695 SX_GATHER(13, 4, 40, num - 1)); 696 write_sx_reg(p, SX_INSTRUCTIONS, 697 SX_GATHER(15, 4, 44, num - 1)); 698 write_sx_reg(p, SX_INSTRUCTIONS, 699 SX_SCATTER(40, 4, 15, num - 1)); 700 write_sx_reg(p, SX_INSTRUCTIONS, 701 SX_SCATTER(44, 4, 13, num - 1)); 702 } 703 /* fetch mask */ 704 write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7)); 705 /* fetch dst pixels */ 706 write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); 707 /* set src alpha to 0xff */ 708 write_sx_reg(p, SX_INSTRUCTIONS, 709 SX_SCATTER(8, 4, 12, num - 1)); 710 /* now process up to 4 pixels */ 711 for (i = 0; i < num; i++) { 712 int ii = i << 2; 713 /* mask alpha to SCAM */ 714 write_sx_reg(p, SX_INSTRUCTIONS, 715 SX_ORS(28 + ii, 0, R_SCAM, 0)); 716 /* src * alpha */ 717 write_sx_reg(p, SX_INSTRUCTIONS, 718 SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); 719 /* write inverted alpha into SCAM */ 720 write_sx_reg(p, SX_INSTRUCTIONS, 721 SX_XORS(28 + ii, 8, R_SCAM, 0)); 722 /* dst * (1 - alpha) + R[60:] */ 723 write_sx_reg(p, SX_INSTRUCTIONS, 724 SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); 725 } 726 write_sx_io(p, dstx, 727 SX_STUQ0C(76, num - 1, dstx & 7)); 728 srcx += 16; 729 mskx += 16; 730 dstx += 16; 731 } 732 src += srcpitch; 733 msk += mskpitch; 734 dst += dstpitch; 735 } 736} 737