cg14_render.c revision 78cb1511
1/* $NetBSD: cg14_render.c,v 1.2 2013/06/25 12:31:29 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#include <sys/types.h> 33 34/* all driver need this */ 35#include "xf86.h" 36#include "xf86_OSproc.h" 37#include "compiler.h" 38 39#include "cg14.h" 40#include <sparc/sxreg.h> 41 42#define SX_SINGLE 43/*#define SX_RENDER_DEBUG*/ 44/*#define SX_ADD_SOFTWARE*/ 45 46#ifdef SX__RENDER_DEBUG 47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48#define DPRINTF xf86Msg 49#else 50#define ENTER 51#define DPRINTF while (0) xf86Msg 52#endif 53 54char c[8] = " .,:+*oX"; 55 56 57void CG14Comp_Over32Solid(Cg14Ptr p, 58 uint32_t src, uint32_t srcpitch, 59 uint32_t dst, uint32_t dstpitch, 60 int width, int height) 61{ 62 uint32_t msk = src, mskx, dstx, m; 63 int line, x, i; 64 65 ENTER; 66 /* first get the source colour */ 67 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 68 write_sx_reg(p, SX_QUEUED(8), 0xff); 69 for (line = 0; line < height; line++) { 70 mskx = msk; 71 dstx = dst; 72#ifdef SX_SINGLE 73 74 for (x = 0; x < width; x++) { 75 m = *(volatile uint32_t *)(p->fb + mskx); 76 m = m >> 24; 77 if (m == 0) { 78 /* nothing to do - all transparent */ 79 } else if (m == 0xff) { 80 /* all opaque */ 81 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 82 } else { 83 /* fetch alpha value, stick it into scam */ 84 /* mask is in R[12:15] */ 85 /*write_sx_io(p, mskx, 86 SX_LDUQ0(12, 0, mskx & 7));*/ 87 write_sx_reg(p, SX_QUEUED(12), m); 88 /* fetch dst pixel */ 89 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 90 write_sx_reg(p, SX_INSTRUCTIONS, 91 SX_ORV(12, 0, R_SCAM, 0)); 92 /* 93 * src * alpha + R0 94 * R[9:11] * SCAM + R0 -> R[17:19] 95 */ 96 write_sx_reg(p, SX_INSTRUCTIONS, 97 SX_SAXP16X16SR8(9, 0, 17, 2)); 98 99 /* invert SCAM */ 100 write_sx_reg(p, SX_INSTRUCTIONS, 101 SX_XORV(12, 8, R_SCAM, 0)); 102#ifdef SX_DEBUG 103 write_sx_reg(p, SX_INSTRUCTIONS, 104 SX_XORV(12, 8, 13, 0)); 105#endif 106 /* dst * (1 - alpha) + R[13:15] */ 107 write_sx_reg(p, SX_INSTRUCTIONS, 108 SX_SAXP16X16SR8(21, 17, 25, 2)); 109 write_sx_io(p, dstx, 110 SX_STUQ0C(24, 0, dstx & 7)); 111 } 112 dstx += 4; 113 mskx += 4; 114 } 115#else 116 for (x = 0; x < width; x += 4) { 117 /* fetch 4 mask values */ 118 write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 119 /* fetch destination pixels */ 120 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 121 /* duplicate them for all channels */ 122 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 123 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 124 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 125 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 126 /* generate inverted alpha */ 127 write_sx_reg(p, SX_INSTRUCTIONS, 128 SX_XORS(12, 8, 28, 15)); 129 /* multiply source */ 130 write_sx_reg(p, SX_INSTRUCTIONS, 131 SX_MUL16X16SR8(8, 12, 44, 3)); 132 write_sx_reg(p, SX_INSTRUCTIONS, 133 SX_MUL16X16SR8(8, 16, 48, 3)); 134 write_sx_reg(p, SX_INSTRUCTIONS, 135 SX_MUL16X16SR8(8, 20, 52, 3)); 136 write_sx_reg(p, SX_INSTRUCTIONS, 137 SX_MUL16X16SR8(8, 24, 56, 3)); 138 /* multiply dest */ 139 write_sx_reg(p, SX_INSTRUCTIONS, 140 SX_MUL16X16SR8(28, 60, 76, 15)); 141 /* add up */ 142 write_sx_reg(p, SX_INSTRUCTIONS, 143 SX_ADDV(44, 76, 92, 15)); 144 /* write back */ 145 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 146 dstx += 16; 147 mskx += 16; 148 } 149#endif 150 dst += dstpitch; 151 msk += srcpitch; 152 } 153} 154 155void CG14Comp_Over8Solid(Cg14Ptr p, 156 uint32_t src, uint32_t srcpitch, 157 uint32_t dst, uint32_t dstpitch, 158 int width, int height) 159{ 160 uint32_t msk = src, mskx, dstx, m; 161 int line, x, i; 162#ifdef SX_DEBUG 163 char buffer[256]; 164#endif 165 ENTER; 166 167 /* first get the source colour */ 168 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 169 write_sx_reg(p, SX_QUEUED(8), 0xff); 170 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 171 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 172 *(uint32_t *)(p->fb + p->srcoff)); 173 for (line = 0; line < height; line++) { 174 mskx = msk; 175 dstx = dst; 176#ifdef SX_SINGLE 177 178 for (x = 0; x < width; x++) { 179 m = *(volatile uint8_t *)(p->fb + mskx); 180#ifdef SX_DEBUG 181 buffer[x] = c[m >> 5]; 182#endif 183 if (m == 0) { 184 /* nothing to do - all transparent */ 185 } else if (m == 0xff) { 186 /* all opaque */ 187 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 188 } else { 189 /* fetch alpha value, stick it into scam */ 190 /* mask is in R[12:15] */ 191 /*write_sx_io(p, mskx & ~7, 192 SX_LDB(12, 0, mskx & 7));*/ 193 write_sx_reg(p, SX_QUEUED(12), m); 194 /* fetch dst pixel */ 195 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 196 write_sx_reg(p, SX_INSTRUCTIONS, 197 SX_ORV(12, 0, R_SCAM, 0)); 198 /* 199 * src * alpha + R0 200 * R[9:11] * SCAM + R0 -> R[17:19] 201 */ 202 write_sx_reg(p, SX_INSTRUCTIONS, 203 SX_SAXP16X16SR8(9, 0, 17, 2)); 204 205 /* invert SCAM */ 206 write_sx_reg(p, SX_INSTRUCTIONS, 207 SX_XORV(12, 8, R_SCAM, 0)); 208#ifdef SX_DEBUG 209 write_sx_reg(p, SX_INSTRUCTIONS, 210 SX_XORV(12, 8, 13, 0)); 211#endif 212 /* dst * (1 - alpha) + R[13:15] */ 213 write_sx_reg(p, SX_INSTRUCTIONS, 214 SX_SAXP16X16SR8(21, 17, 25, 2)); 215 write_sx_io(p, dstx, 216 SX_STUQ0C(24, 0, dstx & 7)); 217 } 218 dstx += 4; 219 mskx += 1; 220 } 221#ifdef SX_DEBUG 222 buffer[x] = 0; 223 xf86Msg(X_ERROR, "%s\n", buffer); 224#endif 225#else 226 for (x = 0; x < width; x += 4) { 227 /* fetch 4 mask values */ 228 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 229 /* fetch destination pixels */ 230 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 231 /* duplicate them for all channels */ 232 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 233 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 234 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 235 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 236 /* generate inverted alpha */ 237 write_sx_reg(p, SX_INSTRUCTIONS, 238 SX_XORS(12, 8, 28, 15)); 239 /* multiply source */ 240 write_sx_reg(p, SX_INSTRUCTIONS, 241 SX_MUL16X16SR8(8, 12, 44, 3)); 242 write_sx_reg(p, SX_INSTRUCTIONS, 243 SX_MUL16X16SR8(8, 16, 48, 3)); 244 write_sx_reg(p, SX_INSTRUCTIONS, 245 SX_MUL16X16SR8(8, 20, 52, 3)); 246 write_sx_reg(p, SX_INSTRUCTIONS, 247 SX_MUL16X16SR8(8, 24, 56, 3)); 248 /* multiply dest */ 249 write_sx_reg(p, SX_INSTRUCTIONS, 250 SX_MUL16X16SR8(28, 60, 76, 15)); 251 /* add up */ 252 write_sx_reg(p, SX_INSTRUCTIONS, 253 SX_ADDV(44, 76, 92, 15)); 254 /* write back */ 255 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 256 dstx += 16; 257 mskx += 4; 258 } 259#endif 260 dst += dstpitch; 261 msk += srcpitch; 262 } 263} 264 265void CG14Comp_Add32(Cg14Ptr p, 266 uint32_t src, uint32_t srcpitch, 267 uint32_t dst, uint32_t dstpitch, 268 int width, int height) 269{ 270 int line; 271 uint32_t srcx, dstx; 272 int full, part, x; 273 274 ENTER; 275 full = width >> 3; /* chunks of 8 */ 276 part = width & 7; /* leftovers */ 277 /* we do this up to 8 pixels at a time */ 278 for (line = 0; line < height; line++) { 279 srcx = src; 280 dstx = dst; 281 for (x = 0; x < full; x++) { 282 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 283 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 284 write_sx_reg(p, SX_INSTRUCTIONS, 285 SX_ADDV(8, 40, 72, 15)); 286 write_sx_reg(p, SX_INSTRUCTIONS, 287 SX_ADDV(24, 56, 88, 15)); 288 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 289 srcx += 128; 290 dstx += 128; 291 } 292 293 /* do leftovers */ 294 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 295 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 296 if (part & 16) { 297 write_sx_reg(p, SX_INSTRUCTIONS, 298 SX_ADDV(8, 40, 72, 15)); 299 write_sx_reg(p, SX_INSTRUCTIONS, 300 SX_ADDV(24, 56, 88, part - 17)); 301 } else { 302 write_sx_reg(p, SX_INSTRUCTIONS, 303 SX_ADDV(8, 40, 72, part - 1)); 304 } 305 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 306 307 /* next line */ 308 src += srcpitch; 309 dst += dstpitch; 310 } 311} 312 313void CG14Comp_Add8(Cg14Ptr p, 314 uint32_t src, uint32_t srcpitch, 315 uint32_t dst, uint32_t dstpitch, 316 int width, int height) 317{ 318 int line; 319 uint32_t srcx, dstx, srcoff, dstoff; 320 int pre, full, part, x; 321 uint8_t *d; 322 char buffer[256]; 323 ENTER; 324 325 srcoff = src & 7; 326 src &= ~7; 327 dstoff = dst & 7; 328 dst &= ~7; 329 full = width >> 5; /* chunks of 32 */ 330 part = width & 31; /* leftovers */ 331 332#ifdef SX_DEBUG 333 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 334 width, height, full, part); 335#endif 336 /* we do this up to 32 pixels at a time */ 337 for (line = 0; line < height; line++) { 338 srcx = src; 339 dstx = dst; 340#ifdef SX_ADD_SOFTWARE 341 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 342 d = (uint8_t *)(p->fb + dstx + dstoff); 343 for (x = 0; x < width; x++) { 344 d[x] = min(255, s[x] + d[x]); 345 } 346#else 347 for (x = 0; x < full; x++) { 348 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 349 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 350 write_sx_reg(p, SX_INSTRUCTIONS, 351 SX_ADDV(8, 40, 72, 15)); 352 write_sx_reg(p, SX_INSTRUCTIONS, 353 SX_ADDV(24, 56, 88, 15)); 354 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 355 srcx += 32; 356 dstx += 32; 357 } 358 359 if (part > 0) { 360 /* do leftovers */ 361 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 362 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 363 if (part > 16) { 364 write_sx_reg(p, SX_INSTRUCTIONS, 365 SX_ADDV(8, 40, 72, 15)); 366 write_sx_reg(p, SX_INSTRUCTIONS, 367 SX_ADDV(24, 56, 88, part - 17)); 368 } else { 369 write_sx_reg(p, SX_INSTRUCTIONS, 370 SX_ADDV(8, 40, 72, part - 1)); 371 } 372 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 373 } 374#endif 375#ifdef SX_DEBUG 376 d = (uint8_t *)(p->fb + src + srcoff); 377 for (x = 0; x < width; x++) { 378 buffer[x] = c[d[x]>>5]; 379 } 380 buffer[x] = 0; 381 xf86Msg(X_ERROR, "%s\n", buffer); 382#endif 383 /* next line */ 384 src += srcpitch; 385 dst += dstpitch; 386 } 387} 388 389void CG14Comp_Over32(Cg14Ptr p, 390 uint32_t src, uint32_t srcpitch, 391 uint32_t dst, uint32_t dstpitch, 392 int width, int height) 393{ 394 uint32_t srcx, dstx, m; 395 int line, x, i; 396 397 ENTER; 398 399 write_sx_reg(p, SX_QUEUED(8), 0xff); 400 for (line = 0; line < height; line++) { 401 srcx = src; 402 dstx = dst; 403 404 for (x = 0; x < width; x++) { 405 /* fetch source pixel */ 406 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 407 /* fetch dst pixel */ 408 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 409 /* src is premultiplied with alpha */ 410 /* write inverted alpha into SCAM */ 411 write_sx_reg(p, SX_INSTRUCTIONS, 412 SX_XORV(12, 8, R_SCAM, 0)); 413 /* dst * (1 - alpha) + R[13:15] */ 414 write_sx_reg(p, SX_INSTRUCTIONS, 415 SX_SAXP16X16SR8(21, 13, 25, 2)); 416 write_sx_io(p, dstx, 417 SX_STUQ0C(24, 0, dstx & 7)); 418 dstx += 4; 419 srcx += 4; 420 } 421 dst += dstpitch; 422 src += srcpitch; 423 } 424} 425 426void CG14Comp_Over32Mask(Cg14Ptr p, 427 uint32_t src, uint32_t srcpitch, 428 uint32_t msk, uint32_t mskpitch, 429 uint32_t dst, uint32_t dstpitch, 430 int width, int height) 431{ 432 uint32_t srcx, dstx, mskx, m; 433 int line, x, i; 434 435 ENTER; 436 437 write_sx_reg(p, SX_QUEUED(8), 0xff); 438 for (line = 0; line < height; line++) { 439 srcx = src; 440 mskx = msk; 441 dstx = dst; 442 443 for (x = 0; x < width; x++) { 444 /* fetch source pixel */ 445 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 446 /* fetch mask */ 447 write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 448 /* fetch dst pixel */ 449 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 450 /* apply mask */ 451 write_sx_reg(p, SX_INSTRUCTIONS, 452 SX_ANDS(12, 9, 16, 3)); 453 /* src is premultiplied with alpha */ 454 /* write inverted alpha into SCAM */ 455 write_sx_reg(p, SX_INSTRUCTIONS, 456 SX_XORV(16, 8, R_SCAM, 0)); 457 /* dst * (1 - alpha) + R[13:15] */ 458 write_sx_reg(p, SX_INSTRUCTIONS, 459 SX_SAXP16X16SR8(21, 17, 25, 2)); 460 write_sx_io(p, dstx, 461 SX_STUQ0C(24, 0, dstx & 7)); 462 srcx += 4; 463 mskx += 1; 464 dstx += 4; 465 } 466 src += srcpitch; 467 msk += mskpitch; 468 dst += dstpitch; 469 } 470} 471