cg14_render.c revision a3a2ba44
1/* $NetBSD: cg14_render.c,v 1.1 2013/06/25 12:26:57 macallan Exp $ */ 2/* 3 * Copyright (c) 2013 Michael Lorenz 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * - Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following 14 * disclaimer in the documentation and/or other materials provided 15 * with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 */ 31 32#include <sys/types.h> 33 34/* all driver need this */ 35#include "xf86.h" 36#include "xf86_OSproc.h" 37#include "compiler.h" 38 39#include "cg14.h" 40#include <sparc/sxreg.h> 41 42#define SX_SINGLE 43/*#define SX_RENDER_DEBUG*/ 44/*#define SX_ADD_SOFTWARE*/ 45 46#ifdef SX__RENDER_DEBUG 47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48#define DPRINTF xf86Msg 49#else 50#define ENTER 51#define DPRINTF while (0) xf86Msg 52#endif 53 54char c[8] = " .,:+*oX"; 55 56void CG14Comp_Over8Solid(Cg14Ptr p, 57 uint32_t src, uint32_t srcpitch, 58 uint32_t dst, uint32_t dstpitch, 59 int width, int height) 60{ 61 uint32_t msk = src, mskx, dstx, m; 62 int line, x, i; 63#ifdef SX_DEBUG 64 char buffer[256]; 65#endif 66 ENTER; 67 68 /* first get the source colour */ 69 write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 70 write_sx_reg(p, SX_QUEUED(8), 0xff); 71 DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 72 read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 73 *(uint32_t *)(p->fb + p->srcoff)); 74 for (line = 0; line < height; line++) { 75 mskx = msk; 76 dstx = dst; 77#ifdef SX_SINGLE 78 79 for (x = 0; x < width; x++) { 80 m = *(volatile uint8_t *)(p->fb + mskx); 81#ifdef SX_DEBUG 82 buffer[x] = c[m >> 5]; 83#endif 84 if (m == 0) { 85 /* nothing to do - all transparent */ 86 } else if (m == 0xff) { 87 /* all opaque */ 88 write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 89 } else { 90 /* fetch alpha value, stick it into scam */ 91 /* mask is in R[12:15] */ 92 /*write_sx_io(p, mskx & ~7, 93 SX_LDB(12, 0, mskx & 7));*/ 94 write_sx_reg(p, SX_QUEUED(12), m); 95 /* fetch dst pixel */ 96 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 97 write_sx_reg(p, SX_INSTRUCTIONS, 98 SX_ORV(12, 0, R_SCAM, 0)); 99 /* 100 * src * alpha + R0 101 * R[9:11] * SCAM + R0 -> R[17:19] 102 */ 103 write_sx_reg(p, SX_INSTRUCTIONS, 104 SX_SAXP16X16SR8(9, 0, 17, 2)); 105 106 /* invert SCAM */ 107 write_sx_reg(p, SX_INSTRUCTIONS, 108 SX_XORV(12, 8, R_SCAM, 0)); 109#ifdef SX_DEBUG 110 write_sx_reg(p, SX_INSTRUCTIONS, 111 SX_XORV(12, 8, 13, 0)); 112#endif 113 /* dst * (1 - alpha) + R[13:15] */ 114 write_sx_reg(p, SX_INSTRUCTIONS, 115 SX_SAXP16X16SR8(21, 17, 25, 2)); 116 write_sx_io(p, dstx, 117 SX_STUQ0C(24, 0, dstx & 7)); 118 } 119 dstx += 4; 120 mskx += 1; 121 } 122#ifdef SX_DEBUG 123 buffer[x] = 0; 124 xf86Msg(X_ERROR, "%s\n", buffer); 125#endif 126#else 127 for (x = 0; x < width; x += 4) { 128 /* fetch 4 mask values */ 129 write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 130 /* fetch destination pixels */ 131 write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 132 /* duplicate them for all channels */ 133 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 134 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 135 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 136 write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 137 /* generate inverted alpha */ 138 write_sx_reg(p, SX_INSTRUCTIONS, 139 SX_XORS(12, 8, 28, 15)); 140 /* multiply source */ 141 write_sx_reg(p, SX_INSTRUCTIONS, 142 SX_MUL16X16SR8(8, 12, 44, 3)); 143 write_sx_reg(p, SX_INSTRUCTIONS, 144 SX_MUL16X16SR8(8, 16, 48, 3)); 145 write_sx_reg(p, SX_INSTRUCTIONS, 146 SX_MUL16X16SR8(8, 20, 52, 3)); 147 write_sx_reg(p, SX_INSTRUCTIONS, 148 SX_MUL16X16SR8(8, 24, 56, 3)); 149 /* multiply dest */ 150 write_sx_reg(p, SX_INSTRUCTIONS, 151 SX_MUL16X16SR8(28, 60, 76, 15)); 152 /* add up */ 153 write_sx_reg(p, SX_INSTRUCTIONS, 154 SX_ADDV(44, 76, 92, 15)); 155 /* write back */ 156 write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 157 dstx += 16; 158 mskx += 4; 159 } 160#endif 161 dst += dstpitch; 162 msk += srcpitch; 163 } 164} 165 166void CG14Comp_Add32(Cg14Ptr p, 167 uint32_t src, uint32_t srcpitch, 168 uint32_t dst, uint32_t dstpitch, 169 int width, int height) 170{ 171 int line; 172 uint32_t srcx, dstx; 173 int full, part, x; 174 175 ENTER; 176 full = width >> 3; /* chunks of 8 */ 177 part = width & 7; /* leftovers */ 178 /* we do this up to 8 pixels at a time */ 179 for (line = 0; line < height; line++) { 180 srcx = src; 181 dstx = dst; 182 for (x = 0; x < full; x++) { 183 write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 184 write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 185 write_sx_reg(p, SX_INSTRUCTIONS, 186 SX_ADDV(8, 40, 72, 15)); 187 write_sx_reg(p, SX_INSTRUCTIONS, 188 SX_ADDV(24, 56, 88, 15)); 189 write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 190 srcx += 128; 191 dstx += 128; 192 } 193 194 /* do leftovers */ 195 write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 196 write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 197 if (part & 16) { 198 write_sx_reg(p, SX_INSTRUCTIONS, 199 SX_ADDV(8, 40, 72, 15)); 200 write_sx_reg(p, SX_INSTRUCTIONS, 201 SX_ADDV(24, 56, 88, part - 17)); 202 } else { 203 write_sx_reg(p, SX_INSTRUCTIONS, 204 SX_ADDV(8, 40, 72, part - 1)); 205 } 206 write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 207 208 /* next line */ 209 src += srcpitch; 210 dst += dstpitch; 211 } 212} 213 214void CG14Comp_Add8(Cg14Ptr p, 215 uint32_t src, uint32_t srcpitch, 216 uint32_t dst, uint32_t dstpitch, 217 int width, int height) 218{ 219 int line; 220 uint32_t srcx, dstx, srcoff, dstoff; 221 int pre, full, part, x; 222 uint8_t *d; 223 char buffer[256]; 224 ENTER; 225 226 srcoff = src & 7; 227 src &= ~7; 228 dstoff = dst & 7; 229 dst &= ~7; 230 full = width >> 5; /* chunks of 32 */ 231 part = width & 31; /* leftovers */ 232 233#ifdef SX_DEBUG 234 xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 235 width, height, full, part); 236#endif 237 /* we do this up to 32 pixels at a time */ 238 for (line = 0; line < height; line++) { 239 srcx = src; 240 dstx = dst; 241#ifdef SX_ADD_SOFTWARE 242 uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 243 d = (uint8_t *)(p->fb + dstx + dstoff); 244 for (x = 0; x < width; x++) { 245 d[x] = min(255, s[x] + d[x]); 246 } 247#else 248 for (x = 0; x < full; x++) { 249 write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 250 write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 251 write_sx_reg(p, SX_INSTRUCTIONS, 252 SX_ADDV(8, 40, 72, 15)); 253 write_sx_reg(p, SX_INSTRUCTIONS, 254 SX_ADDV(24, 56, 88, 15)); 255 write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 256 srcx += 32; 257 dstx += 32; 258 } 259 260 if (part > 0) { 261 /* do leftovers */ 262 write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 263 write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 264 if (part > 16) { 265 write_sx_reg(p, SX_INSTRUCTIONS, 266 SX_ADDV(8, 40, 72, 15)); 267 write_sx_reg(p, SX_INSTRUCTIONS, 268 SX_ADDV(24, 56, 88, part - 17)); 269 } else { 270 write_sx_reg(p, SX_INSTRUCTIONS, 271 SX_ADDV(8, 40, 72, part - 1)); 272 } 273 write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 274 } 275#endif 276#ifdef SX_DEBUG 277 d = (uint8_t *)(p->fb + src + srcoff); 278 for (x = 0; x < width; x++) { 279 buffer[x] = c[d[x]>>5]; 280 } 281 buffer[x] = 0; 282 xf86Msg(X_ERROR, "%s\n", buffer); 283#endif 284 /* next line */ 285 src += srcpitch; 286 dst += dstpitch; 287 } 288} 289 290void CG14Comp_Over32(Cg14Ptr p, 291 uint32_t src, uint32_t srcpitch, 292 uint32_t dst, uint32_t dstpitch, 293 int width, int height) 294{ 295 uint32_t srcx, dstx, m; 296 int line, x, i; 297 298 ENTER; 299 300 write_sx_reg(p, SX_QUEUED(8), 0xff); 301 for (line = 0; line < height; line++) { 302 srcx = src; 303 dstx = dst; 304 305 for (x = 0; x < width; x++) { 306 /* fetch source pixel */ 307 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 308 /* fetch dst pixel */ 309 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 310 /* src is premultiplied with alpha */ 311 /* write inverted alpha into SCAM */ 312 write_sx_reg(p, SX_INSTRUCTIONS, 313 SX_XORV(12, 8, R_SCAM, 0)); 314 /* dst * (1 - alpha) + R[13:15] */ 315 write_sx_reg(p, SX_INSTRUCTIONS, 316 SX_SAXP16X16SR8(21, 13, 25, 2)); 317 write_sx_io(p, dstx, 318 SX_STUQ0C(24, 0, dstx & 7)); 319 dstx += 4; 320 srcx += 4; 321 } 322 dst += dstpitch; 323 src += srcpitch; 324 } 325} 326 327void CG14Comp_Over32Mask(Cg14Ptr p, 328 uint32_t src, uint32_t srcpitch, 329 uint32_t msk, uint32_t mskpitch, 330 uint32_t dst, uint32_t dstpitch, 331 int width, int height) 332{ 333 uint32_t srcx, dstx, mskx, m; 334 int line, x, i; 335 336 ENTER; 337 338 write_sx_reg(p, SX_QUEUED(8), 0xff); 339 for (line = 0; line < height; line++) { 340 srcx = src; 341 mskx = msk; 342 dstx = dst; 343 344 for (x = 0; x < width; x++) { 345 /* fetch source pixel */ 346 write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 347 /* fetch mask */ 348 write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 349 /* fetch dst pixel */ 350 write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 351 /* apply mask */ 352 write_sx_reg(p, SX_INSTRUCTIONS, 353 SX_ANDS(12, 9, 16, 3)); 354 /* src is premultiplied with alpha */ 355 /* write inverted alpha into SCAM */ 356 write_sx_reg(p, SX_INSTRUCTIONS, 357 SX_XORV(16, 8, R_SCAM, 0)); 358 /* dst * (1 - alpha) + R[13:15] */ 359 write_sx_reg(p, SX_INSTRUCTIONS, 360 SX_SAXP16X16SR8(21, 17, 25, 2)); 361 write_sx_io(p, dstx, 362 SX_STUQ0C(24, 0, dstx & 7)); 363 srcx += 4; 364 mskx += 1; 365 dstx += 4; 366 } 367 src += srcpitch; 368 msk += mskpitch; 369 dst += dstpitch; 370 } 371} 372