cg14_render.c revision c88c16f8
1c88c16f8Smacallan/* $NetBSD: cg14_render.c,v 1.8 2016/09/16 21:16:37 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 36a3a2ba44Smacallan#include <sys/types.h> 37a3a2ba44Smacallan 38a3a2ba44Smacallan/* all driver need this */ 39a3a2ba44Smacallan#include "xf86.h" 40a3a2ba44Smacallan#include "xf86_OSproc.h" 41a3a2ba44Smacallan#include "compiler.h" 42a3a2ba44Smacallan 43a3a2ba44Smacallan#include "cg14.h" 44a3a2ba44Smacallan#include <sparc/sxreg.h> 45a3a2ba44Smacallan 46a3a2ba44Smacallan#define SX_SINGLE 47a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 49a3a2ba44Smacallan 50a3a2ba44Smacallan#ifdef SX__RENDER_DEBUG 51a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52a3a2ba44Smacallan#define DPRINTF xf86Msg 53a3a2ba44Smacallan#else 54a3a2ba44Smacallan#define ENTER 55a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 56a3a2ba44Smacallan#endif 57a3a2ba44Smacallan 58a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 59a3a2ba44Smacallan 6078cb1511Smacallan 6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 6278cb1511Smacallan uint32_t src, uint32_t srcpitch, 6378cb1511Smacallan uint32_t dst, uint32_t dstpitch, 6478cb1511Smacallan int width, int height) 6578cb1511Smacallan{ 6678cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 6778cb1511Smacallan int line, x, i; 6878cb1511Smacallan 6978cb1511Smacallan ENTER; 70f7cb851fSmacallan 7178cb1511Smacallan for (line = 0; line < height; line++) { 7278cb1511Smacallan mskx = msk; 7378cb1511Smacallan dstx = dst; 7478cb1511Smacallan#ifdef SX_SINGLE 7578cb1511Smacallan 7678cb1511Smacallan for (x = 0; x < width; x++) { 7778cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 7878cb1511Smacallan m = m >> 24; 7978cb1511Smacallan if (m == 0) { 8078cb1511Smacallan /* nothing to do - all transparent */ 8178cb1511Smacallan } else if (m == 0xff) { 8278cb1511Smacallan /* all opaque */ 8378cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 8478cb1511Smacallan } else { 8578cb1511Smacallan /* fetch alpha value, stick it into scam */ 8678cb1511Smacallan /* mask is in R[12:15] */ 8778cb1511Smacallan /*write_sx_io(p, mskx, 8878cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 8978cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 9078cb1511Smacallan /* fetch dst pixel */ 9178cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 9278cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9378cb1511Smacallan SX_ORV(12, 0, R_SCAM, 0)); 9478cb1511Smacallan /* 9578cb1511Smacallan * src * alpha + R0 9678cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 9778cb1511Smacallan */ 9878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9978cb1511Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 10078cb1511Smacallan 10178cb1511Smacallan /* invert SCAM */ 10278cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 10378cb1511Smacallan SX_XORV(12, 8, R_SCAM, 0)); 10478cb1511Smacallan#ifdef SX_DEBUG 10578cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 10678cb1511Smacallan SX_XORV(12, 8, 13, 0)); 10778cb1511Smacallan#endif 10878cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 10978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 11078cb1511Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 11178cb1511Smacallan write_sx_io(p, dstx, 11278cb1511Smacallan SX_STUQ0C(24, 0, dstx & 7)); 11378cb1511Smacallan } 11478cb1511Smacallan dstx += 4; 11578cb1511Smacallan mskx += 4; 11678cb1511Smacallan } 11778cb1511Smacallan#else 11878cb1511Smacallan for (x = 0; x < width; x += 4) { 11978cb1511Smacallan /* fetch 4 mask values */ 12078cb1511Smacallan write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 12178cb1511Smacallan /* fetch destination pixels */ 12278cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 12378cb1511Smacallan /* duplicate them for all channels */ 1246bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 1256bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 1266bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 12778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 12878cb1511Smacallan /* generate inverted alpha */ 12978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13078cb1511Smacallan SX_XORS(12, 8, 28, 15)); 13178cb1511Smacallan /* multiply source */ 13278cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13378cb1511Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 13478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13578cb1511Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 13678cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13778cb1511Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 13878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13978cb1511Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 14078cb1511Smacallan /* multiply dest */ 14178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14278cb1511Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 14378cb1511Smacallan /* add up */ 14478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14578cb1511Smacallan SX_ADDV(44, 76, 92, 15)); 14678cb1511Smacallan /* write back */ 14778cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 14878cb1511Smacallan dstx += 16; 14978cb1511Smacallan mskx += 16; 15078cb1511Smacallan } 15178cb1511Smacallan#endif 15278cb1511Smacallan dst += dstpitch; 15378cb1511Smacallan msk += srcpitch; 15478cb1511Smacallan } 15578cb1511Smacallan} 15678cb1511Smacallan 157a3a2ba44Smacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 158a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 159a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 160a3a2ba44Smacallan int width, int height) 161a3a2ba44Smacallan{ 162a3a2ba44Smacallan uint32_t msk = src, mskx, dstx, m; 163a3a2ba44Smacallan int line, x, i; 164a3a2ba44Smacallan#ifdef SX_DEBUG 165a3a2ba44Smacallan char buffer[256]; 166a3a2ba44Smacallan#endif 167a3a2ba44Smacallan ENTER; 168a3a2ba44Smacallan 169a3a2ba44Smacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 170a3a2ba44Smacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 171a3a2ba44Smacallan *(uint32_t *)(p->fb + p->srcoff)); 172a3a2ba44Smacallan for (line = 0; line < height; line++) { 173a3a2ba44Smacallan mskx = msk; 174a3a2ba44Smacallan dstx = dst; 175a3a2ba44Smacallan#ifdef SX_SINGLE 176a3a2ba44Smacallan 177a3a2ba44Smacallan for (x = 0; x < width; x++) { 178a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 179a3a2ba44Smacallan#ifdef SX_DEBUG 180a3a2ba44Smacallan buffer[x] = c[m >> 5]; 181a3a2ba44Smacallan#endif 182a3a2ba44Smacallan if (m == 0) { 183a3a2ba44Smacallan /* nothing to do - all transparent */ 184a3a2ba44Smacallan } else if (m == 0xff) { 185a3a2ba44Smacallan /* all opaque */ 186a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 187a3a2ba44Smacallan } else { 188a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 189a3a2ba44Smacallan /* mask is in R[12:15] */ 190a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 191a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 192a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 193a3a2ba44Smacallan /* fetch dst pixel */ 194a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 195a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 196a3a2ba44Smacallan SX_ORV(12, 0, R_SCAM, 0)); 197a3a2ba44Smacallan /* 198a3a2ba44Smacallan * src * alpha + R0 199a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 200a3a2ba44Smacallan */ 201a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 202a3a2ba44Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 203a3a2ba44Smacallan 204a3a2ba44Smacallan /* invert SCAM */ 205a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 206a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 207a3a2ba44Smacallan#ifdef SX_DEBUG 208a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 209a3a2ba44Smacallan SX_XORV(12, 8, 13, 0)); 210a3a2ba44Smacallan#endif 211a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 212a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 213a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 214a3a2ba44Smacallan write_sx_io(p, dstx, 215a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 216a3a2ba44Smacallan } 217a3a2ba44Smacallan dstx += 4; 218a3a2ba44Smacallan mskx += 1; 219a3a2ba44Smacallan } 220a3a2ba44Smacallan#ifdef SX_DEBUG 221a3a2ba44Smacallan buffer[x] = 0; 222a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 223a3a2ba44Smacallan#endif 224a3a2ba44Smacallan#else 225a3a2ba44Smacallan for (x = 0; x < width; x += 4) { 226a3a2ba44Smacallan /* fetch 4 mask values */ 227a3a2ba44Smacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 228a3a2ba44Smacallan /* fetch destination pixels */ 229a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 230a3a2ba44Smacallan /* duplicate them for all channels */ 231c88c16f8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 232c88c16f8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 233c88c16f8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 2346bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 235a3a2ba44Smacallan /* generate inverted alpha */ 236a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 237a3a2ba44Smacallan SX_XORS(12, 8, 28, 15)); 238a3a2ba44Smacallan /* multiply source */ 239a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 240a3a2ba44Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 241a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 242a3a2ba44Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 243a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 244a3a2ba44Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 245a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 246a3a2ba44Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 247a3a2ba44Smacallan /* multiply dest */ 248a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 249a3a2ba44Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 250a3a2ba44Smacallan /* add up */ 251a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 252a3a2ba44Smacallan SX_ADDV(44, 76, 92, 15)); 253a3a2ba44Smacallan /* write back */ 254a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 255a3a2ba44Smacallan dstx += 16; 256a3a2ba44Smacallan mskx += 4; 257a3a2ba44Smacallan } 258a3a2ba44Smacallan#endif 259a3a2ba44Smacallan dst += dstpitch; 260a3a2ba44Smacallan msk += srcpitch; 261a3a2ba44Smacallan } 262a3a2ba44Smacallan} 263a3a2ba44Smacallan 264a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 265a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 266a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 267a3a2ba44Smacallan int width, int height) 268a3a2ba44Smacallan{ 269a3a2ba44Smacallan int line; 270a3a2ba44Smacallan uint32_t srcx, dstx; 271a3a2ba44Smacallan int full, part, x; 272a3a2ba44Smacallan 273a3a2ba44Smacallan ENTER; 274a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 275a3a2ba44Smacallan part = width & 7; /* leftovers */ 276a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 277a3a2ba44Smacallan for (line = 0; line < height; line++) { 278a3a2ba44Smacallan srcx = src; 279a3a2ba44Smacallan dstx = dst; 280a3a2ba44Smacallan for (x = 0; x < full; x++) { 281a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 282a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 283a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 284a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 285a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 286a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 287a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 288a3a2ba44Smacallan srcx += 128; 289a3a2ba44Smacallan dstx += 128; 290a3a2ba44Smacallan } 291a3a2ba44Smacallan 292a3a2ba44Smacallan /* do leftovers */ 293a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 294a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 295a3a2ba44Smacallan if (part & 16) { 296a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 297a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 298a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 299a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 300a3a2ba44Smacallan } else { 301a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 302a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 303a3a2ba44Smacallan } 304a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 305a3a2ba44Smacallan 306a3a2ba44Smacallan /* next line */ 307a3a2ba44Smacallan src += srcpitch; 308a3a2ba44Smacallan dst += dstpitch; 309a3a2ba44Smacallan } 310a3a2ba44Smacallan} 311a3a2ba44Smacallan 312a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 313a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 314a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 315a3a2ba44Smacallan int width, int height) 316a3a2ba44Smacallan{ 317a3a2ba44Smacallan int line; 318a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 319a3a2ba44Smacallan int pre, full, part, x; 320a3a2ba44Smacallan uint8_t *d; 321a3a2ba44Smacallan char buffer[256]; 322a3a2ba44Smacallan ENTER; 323a3a2ba44Smacallan 324a3a2ba44Smacallan srcoff = src & 7; 325a3a2ba44Smacallan src &= ~7; 326a3a2ba44Smacallan dstoff = dst & 7; 327a3a2ba44Smacallan dst &= ~7; 328a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 329a3a2ba44Smacallan part = width & 31; /* leftovers */ 330a3a2ba44Smacallan 331a3a2ba44Smacallan#ifdef SX_DEBUG 332a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 333a3a2ba44Smacallan width, height, full, part); 334a3a2ba44Smacallan#endif 335a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 336a3a2ba44Smacallan for (line = 0; line < height; line++) { 337a3a2ba44Smacallan srcx = src; 338a3a2ba44Smacallan dstx = dst; 339a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 340a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 341a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 342a3a2ba44Smacallan for (x = 0; x < width; x++) { 343a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 344a3a2ba44Smacallan } 345a3a2ba44Smacallan#else 346a3a2ba44Smacallan for (x = 0; x < full; x++) { 347a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 348a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 349a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 350a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 351a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 352a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 353a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 354a3a2ba44Smacallan srcx += 32; 355a3a2ba44Smacallan dstx += 32; 356a3a2ba44Smacallan } 357a3a2ba44Smacallan 358a3a2ba44Smacallan if (part > 0) { 359a3a2ba44Smacallan /* do leftovers */ 360a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 361a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 362a3a2ba44Smacallan if (part > 16) { 363a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 364a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 365a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 366a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 367a3a2ba44Smacallan } else { 368a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 369a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 370a3a2ba44Smacallan } 371a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 372a3a2ba44Smacallan } 373a3a2ba44Smacallan#endif 374a3a2ba44Smacallan#ifdef SX_DEBUG 375a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 376a3a2ba44Smacallan for (x = 0; x < width; x++) { 377a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 378a3a2ba44Smacallan } 379a3a2ba44Smacallan buffer[x] = 0; 380a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 381a3a2ba44Smacallan#endif 382a3a2ba44Smacallan /* next line */ 383a3a2ba44Smacallan src += srcpitch; 384a3a2ba44Smacallan dst += dstpitch; 385a3a2ba44Smacallan } 386a3a2ba44Smacallan} 387a3a2ba44Smacallan 388a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 389a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 390a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 391a3a2ba44Smacallan int width, int height) 392a3a2ba44Smacallan{ 393a3a2ba44Smacallan uint32_t srcx, dstx, m; 394a3a2ba44Smacallan int line, x, i; 395a3a2ba44Smacallan 396a3a2ba44Smacallan ENTER; 397a3a2ba44Smacallan 398a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 399a3a2ba44Smacallan for (line = 0; line < height; line++) { 400a3a2ba44Smacallan srcx = src; 401a3a2ba44Smacallan dstx = dst; 402a3a2ba44Smacallan 403a3a2ba44Smacallan for (x = 0; x < width; x++) { 404a3a2ba44Smacallan /* fetch source pixel */ 405a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 406a3a2ba44Smacallan /* fetch dst pixel */ 407a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 408a3a2ba44Smacallan /* src is premultiplied with alpha */ 409a3a2ba44Smacallan /* write inverted alpha into SCAM */ 410a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 411a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 412a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 413a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 414239808baSmacallan SX_SAXP16X16SR8(20, 12, 24, 3)); 415a3a2ba44Smacallan write_sx_io(p, dstx, 416a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 417a3a2ba44Smacallan dstx += 4; 418a3a2ba44Smacallan srcx += 4; 419a3a2ba44Smacallan } 420a3a2ba44Smacallan dst += dstpitch; 421a3a2ba44Smacallan src += srcpitch; 422a3a2ba44Smacallan } 423a3a2ba44Smacallan} 424a3a2ba44Smacallan 425a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 426a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 427a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 428a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 429a3a2ba44Smacallan int width, int height) 430a3a2ba44Smacallan{ 431a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 432a3a2ba44Smacallan int line, x, i; 433a3a2ba44Smacallan 434a3a2ba44Smacallan ENTER; 435a3a2ba44Smacallan 436a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 437a3a2ba44Smacallan for (line = 0; line < height; line++) { 438a3a2ba44Smacallan srcx = src; 439a3a2ba44Smacallan mskx = msk; 440a3a2ba44Smacallan dstx = dst; 441a3a2ba44Smacallan 442a3a2ba44Smacallan for (x = 0; x < width; x++) { 443a3a2ba44Smacallan /* fetch source pixel */ 444a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 445a3a2ba44Smacallan /* fetch mask */ 446a3a2ba44Smacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 447a3a2ba44Smacallan /* fetch dst pixel */ 448a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 449f7cb851fSmacallan /* stick mask alpha into SCAM */ 450a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 451f7cb851fSmacallan SX_ORS(9, 0, R_SCAM, 0)); 452f7cb851fSmacallan /* apply mask */ 453a3a2ba44Smacallan /* src is premultiplied with alpha */ 454f7cb851fSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 455f7cb851fSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 456a3a2ba44Smacallan /* write inverted alpha into SCAM */ 457a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 458a3a2ba44Smacallan SX_XORV(16, 8, R_SCAM, 0)); 459a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 460a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 461239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 462a3a2ba44Smacallan write_sx_io(p, dstx, 463a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 464a3a2ba44Smacallan srcx += 4; 465a3a2ba44Smacallan mskx += 1; 466a3a2ba44Smacallan dstx += 4; 467a3a2ba44Smacallan } 468a3a2ba44Smacallan src += srcpitch; 469a3a2ba44Smacallan msk += mskpitch; 470a3a2ba44Smacallan dst += dstpitch; 471a3a2ba44Smacallan } 472a3a2ba44Smacallan} 4736bdc2ffdSmacallan 4746bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 4756bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 4766bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 4776bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 4786bdc2ffdSmacallan int width, int height) 4796bdc2ffdSmacallan{ 4806bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 4816bdc2ffdSmacallan int line, x, i; 4826bdc2ffdSmacallan 4836bdc2ffdSmacallan ENTER; 4846bdc2ffdSmacallan 4856bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 4866bdc2ffdSmacallan for (line = 0; line < height; line++) { 4876bdc2ffdSmacallan srcx = src; 4886bdc2ffdSmacallan mskx = msk; 4896bdc2ffdSmacallan dstx = dst; 4906bdc2ffdSmacallan 4916bdc2ffdSmacallan for (x = 0; x < width; x++) { 4926bdc2ffdSmacallan /* fetch source pixel */ 4936bdc2ffdSmacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 494239808baSmacallan /* set src alpha to 0xff */ 495239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 496239808baSmacallan SX_ORS(8, 0, 12, 0)); 4976bdc2ffdSmacallan /* fetch mask */ 4986bdc2ffdSmacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 4996bdc2ffdSmacallan /* fetch dst pixel */ 5006bdc2ffdSmacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 5016bdc2ffdSmacallan /* write alpha into SCAM */ 5026bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5036bdc2ffdSmacallan SX_ORS(9, 0, R_SCAM, 0)); 5046bdc2ffdSmacallan /* src * alpha + R0 */ 5056bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 506239808baSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 5076bdc2ffdSmacallan /* write inverted alpha into SCAM */ 5086bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5096bdc2ffdSmacallan SX_XORV(9, 8, R_SCAM, 0)); 5106bdc2ffdSmacallan /* dst * (1 - alpha) + R[13:15] */ 5116bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 512239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 5136bdc2ffdSmacallan write_sx_io(p, dstx, 5146bdc2ffdSmacallan SX_STUQ0C(24, 0, dstx & 7)); 5156bdc2ffdSmacallan srcx += 4; 5166bdc2ffdSmacallan mskx += 1; 5176bdc2ffdSmacallan dstx += 4; 5186bdc2ffdSmacallan } 5196bdc2ffdSmacallan src += srcpitch; 5206bdc2ffdSmacallan msk += mskpitch; 5216bdc2ffdSmacallan dst += dstpitch; 5226bdc2ffdSmacallan } 5236bdc2ffdSmacallan} 524fa158432Smacallan 525fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 526fa158432Smacallan uint32_t src, uint32_t srcpitch, 527fa158432Smacallan uint32_t msk, uint32_t mskpitch, 528fa158432Smacallan uint32_t dst, uint32_t dstpitch, 529fa158432Smacallan int width, int height) 530fa158432Smacallan{ 531fa158432Smacallan uint32_t srcx, dstx, mskx, m; 532fa158432Smacallan int line, x, i; 533fa158432Smacallan 534fa158432Smacallan ENTER; 535fa158432Smacallan 536fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 537fa158432Smacallan for (line = 0; line < height; line++) { 538fa158432Smacallan srcx = src; 539fa158432Smacallan mskx = msk; 540fa158432Smacallan dstx = dst; 541fa158432Smacallan 542fa158432Smacallan for (x = 0; x < width; x++) { 543fa158432Smacallan /* fetch source pixel */ 544fa158432Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 545fa158432Smacallan /* fetch mask */ 546239808baSmacallan write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7)); 547fa158432Smacallan /* fetch dst pixel */ 548fa158432Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 54981a370e6Smacallan /* set src alpha to 0xff */ 550fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 55181a370e6Smacallan SX_ORS(8, 0, 12, 0)); 552239808baSmacallan /* mask alpha to SCAM */ 553239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 554239808baSmacallan SX_ORS(16, 0, R_SCAM, 0)); 555239808baSmacallan /* src * alpha */ 556fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 557239808baSmacallan SX_SAXP16X16SR8(12, 0, 24, 3)); 558fa158432Smacallan /* write inverted alpha into SCAM */ 559fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 560239808baSmacallan SX_XORS(16, 8, R_SCAM, 0)); 561239808baSmacallan /* dst * (1 - alpha) + R[24:31] */ 562fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 563239808baSmacallan SX_SAXP16X16SR8(20, 24, 28, 3)); 564fa158432Smacallan write_sx_io(p, dstx, 56581a370e6Smacallan SX_STUQ0C(28, 0, dstx & 7)); 566fa158432Smacallan srcx += 4; 567fa158432Smacallan mskx += 4; 568fa158432Smacallan dstx += 4; 569fa158432Smacallan } 570fa158432Smacallan src += srcpitch; 571fa158432Smacallan msk += mskpitch; 572fa158432Smacallan dst += dstpitch; 573fa158432Smacallan } 574fa158432Smacallan} 575