cg14_render.c revision 81a370e6
181a370e6Smacallan/* $NetBSD: cg14_render.c,v 1.6 2013/07/30 19:28:46 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32a3a2ba44Smacallan#include <sys/types.h> 33a3a2ba44Smacallan 34a3a2ba44Smacallan/* all driver need this */ 35a3a2ba44Smacallan#include "xf86.h" 36a3a2ba44Smacallan#include "xf86_OSproc.h" 37a3a2ba44Smacallan#include "compiler.h" 38a3a2ba44Smacallan 39a3a2ba44Smacallan#include "cg14.h" 40a3a2ba44Smacallan#include <sparc/sxreg.h> 41a3a2ba44Smacallan 42a3a2ba44Smacallan#define SX_SINGLE 43a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 44a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 45a3a2ba44Smacallan 46a3a2ba44Smacallan#ifdef SX__RENDER_DEBUG 47a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48a3a2ba44Smacallan#define DPRINTF xf86Msg 49a3a2ba44Smacallan#else 50a3a2ba44Smacallan#define ENTER 51a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 52a3a2ba44Smacallan#endif 53a3a2ba44Smacallan 54a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 55a3a2ba44Smacallan 5678cb1511Smacallan 5778cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 5878cb1511Smacallan uint32_t src, uint32_t srcpitch, 5978cb1511Smacallan uint32_t dst, uint32_t dstpitch, 6078cb1511Smacallan int width, int height) 6178cb1511Smacallan{ 6278cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 6378cb1511Smacallan int line, x, i; 6478cb1511Smacallan 6578cb1511Smacallan ENTER; 66f7cb851fSmacallan 6778cb1511Smacallan for (line = 0; line < height; line++) { 6878cb1511Smacallan mskx = msk; 6978cb1511Smacallan dstx = dst; 7078cb1511Smacallan#ifdef SX_SINGLE 7178cb1511Smacallan 7278cb1511Smacallan for (x = 0; x < width; x++) { 7378cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 7478cb1511Smacallan m = m >> 24; 7578cb1511Smacallan if (m == 0) { 7678cb1511Smacallan /* nothing to do - all transparent */ 7778cb1511Smacallan } else if (m == 0xff) { 7878cb1511Smacallan /* all opaque */ 7978cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 8078cb1511Smacallan } else { 8178cb1511Smacallan /* fetch alpha value, stick it into scam */ 8278cb1511Smacallan /* mask is in R[12:15] */ 8378cb1511Smacallan /*write_sx_io(p, mskx, 8478cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 8578cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 8678cb1511Smacallan /* fetch dst pixel */ 8778cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 8878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8978cb1511Smacallan SX_ORV(12, 0, R_SCAM, 0)); 9078cb1511Smacallan /* 9178cb1511Smacallan * src * alpha + R0 9278cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 9378cb1511Smacallan */ 9478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9578cb1511Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 9678cb1511Smacallan 9778cb1511Smacallan /* invert SCAM */ 9878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9978cb1511Smacallan SX_XORV(12, 8, R_SCAM, 0)); 10078cb1511Smacallan#ifdef SX_DEBUG 10178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 10278cb1511Smacallan SX_XORV(12, 8, 13, 0)); 10378cb1511Smacallan#endif 10478cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 10578cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 10678cb1511Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 10778cb1511Smacallan write_sx_io(p, dstx, 10878cb1511Smacallan SX_STUQ0C(24, 0, dstx & 7)); 10978cb1511Smacallan } 11078cb1511Smacallan dstx += 4; 11178cb1511Smacallan mskx += 4; 11278cb1511Smacallan } 11378cb1511Smacallan#else 11478cb1511Smacallan for (x = 0; x < width; x += 4) { 11578cb1511Smacallan /* fetch 4 mask values */ 11678cb1511Smacallan write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 11778cb1511Smacallan /* fetch destination pixels */ 11878cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 11978cb1511Smacallan /* duplicate them for all channels */ 1206bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 1216bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 1226bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 12378cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 12478cb1511Smacallan /* generate inverted alpha */ 12578cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 12678cb1511Smacallan SX_XORS(12, 8, 28, 15)); 12778cb1511Smacallan /* multiply source */ 12878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 12978cb1511Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 13078cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13178cb1511Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 13278cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13378cb1511Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 13478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13578cb1511Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 13678cb1511Smacallan /* multiply dest */ 13778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13878cb1511Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 13978cb1511Smacallan /* add up */ 14078cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14178cb1511Smacallan SX_ADDV(44, 76, 92, 15)); 14278cb1511Smacallan /* write back */ 14378cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 14478cb1511Smacallan dstx += 16; 14578cb1511Smacallan mskx += 16; 14678cb1511Smacallan } 14778cb1511Smacallan#endif 14878cb1511Smacallan dst += dstpitch; 14978cb1511Smacallan msk += srcpitch; 15078cb1511Smacallan } 15178cb1511Smacallan} 15278cb1511Smacallan 153a3a2ba44Smacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 154a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 155a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 156a3a2ba44Smacallan int width, int height) 157a3a2ba44Smacallan{ 158a3a2ba44Smacallan uint32_t msk = src, mskx, dstx, m; 159a3a2ba44Smacallan int line, x, i; 160a3a2ba44Smacallan#ifdef SX_DEBUG 161a3a2ba44Smacallan char buffer[256]; 162a3a2ba44Smacallan#endif 163a3a2ba44Smacallan ENTER; 164a3a2ba44Smacallan 165a3a2ba44Smacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 166a3a2ba44Smacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 167a3a2ba44Smacallan *(uint32_t *)(p->fb + p->srcoff)); 168a3a2ba44Smacallan for (line = 0; line < height; line++) { 169a3a2ba44Smacallan mskx = msk; 170a3a2ba44Smacallan dstx = dst; 171a3a2ba44Smacallan#ifdef SX_SINGLE 172a3a2ba44Smacallan 173a3a2ba44Smacallan for (x = 0; x < width; x++) { 174a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 175a3a2ba44Smacallan#ifdef SX_DEBUG 176a3a2ba44Smacallan buffer[x] = c[m >> 5]; 177a3a2ba44Smacallan#endif 178a3a2ba44Smacallan if (m == 0) { 179a3a2ba44Smacallan /* nothing to do - all transparent */ 180a3a2ba44Smacallan } else if (m == 0xff) { 181a3a2ba44Smacallan /* all opaque */ 182a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 183a3a2ba44Smacallan } else { 184a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 185a3a2ba44Smacallan /* mask is in R[12:15] */ 186a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 187a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 188a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 189a3a2ba44Smacallan /* fetch dst pixel */ 190a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 191a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 192a3a2ba44Smacallan SX_ORV(12, 0, R_SCAM, 0)); 193a3a2ba44Smacallan /* 194a3a2ba44Smacallan * src * alpha + R0 195a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 196a3a2ba44Smacallan */ 197a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 198a3a2ba44Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 199a3a2ba44Smacallan 200a3a2ba44Smacallan /* invert SCAM */ 201a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 202a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 203a3a2ba44Smacallan#ifdef SX_DEBUG 204a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 205a3a2ba44Smacallan SX_XORV(12, 8, 13, 0)); 206a3a2ba44Smacallan#endif 207a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 208a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 209a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 210a3a2ba44Smacallan write_sx_io(p, dstx, 211a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 212a3a2ba44Smacallan } 213a3a2ba44Smacallan dstx += 4; 214a3a2ba44Smacallan mskx += 1; 215a3a2ba44Smacallan } 216a3a2ba44Smacallan#ifdef SX_DEBUG 217a3a2ba44Smacallan buffer[x] = 0; 218a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 219a3a2ba44Smacallan#endif 220a3a2ba44Smacallan#else 221a3a2ba44Smacallan for (x = 0; x < width; x += 4) { 222a3a2ba44Smacallan /* fetch 4 mask values */ 223a3a2ba44Smacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 224a3a2ba44Smacallan /* fetch destination pixels */ 225a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 226a3a2ba44Smacallan /* duplicate them for all channels */ 2276bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 2286bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 2296bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 230a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 231a3a2ba44Smacallan /* generate inverted alpha */ 232a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 233a3a2ba44Smacallan SX_XORS(12, 8, 28, 15)); 234a3a2ba44Smacallan /* multiply source */ 235a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 236a3a2ba44Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 237a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 238a3a2ba44Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 239a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 240a3a2ba44Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 241a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 242a3a2ba44Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 243a3a2ba44Smacallan /* multiply dest */ 244a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 245a3a2ba44Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 246a3a2ba44Smacallan /* add up */ 247a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 248a3a2ba44Smacallan SX_ADDV(44, 76, 92, 15)); 249a3a2ba44Smacallan /* write back */ 250a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 251a3a2ba44Smacallan dstx += 16; 252a3a2ba44Smacallan mskx += 4; 253a3a2ba44Smacallan } 254a3a2ba44Smacallan#endif 255a3a2ba44Smacallan dst += dstpitch; 256a3a2ba44Smacallan msk += srcpitch; 257a3a2ba44Smacallan } 258a3a2ba44Smacallan} 259a3a2ba44Smacallan 260a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 261a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 262a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 263a3a2ba44Smacallan int width, int height) 264a3a2ba44Smacallan{ 265a3a2ba44Smacallan int line; 266a3a2ba44Smacallan uint32_t srcx, dstx; 267a3a2ba44Smacallan int full, part, x; 268a3a2ba44Smacallan 269a3a2ba44Smacallan ENTER; 270a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 271a3a2ba44Smacallan part = width & 7; /* leftovers */ 272a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 273a3a2ba44Smacallan for (line = 0; line < height; line++) { 274a3a2ba44Smacallan srcx = src; 275a3a2ba44Smacallan dstx = dst; 276a3a2ba44Smacallan for (x = 0; x < full; x++) { 277a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 278a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 279a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 280a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 281a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 282a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 283a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 284a3a2ba44Smacallan srcx += 128; 285a3a2ba44Smacallan dstx += 128; 286a3a2ba44Smacallan } 287a3a2ba44Smacallan 288a3a2ba44Smacallan /* do leftovers */ 289a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 290a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 291a3a2ba44Smacallan if (part & 16) { 292a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 293a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 294a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 295a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 296a3a2ba44Smacallan } else { 297a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 298a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 299a3a2ba44Smacallan } 300a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 301a3a2ba44Smacallan 302a3a2ba44Smacallan /* next line */ 303a3a2ba44Smacallan src += srcpitch; 304a3a2ba44Smacallan dst += dstpitch; 305a3a2ba44Smacallan } 306a3a2ba44Smacallan} 307a3a2ba44Smacallan 308a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 309a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 310a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 311a3a2ba44Smacallan int width, int height) 312a3a2ba44Smacallan{ 313a3a2ba44Smacallan int line; 314a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 315a3a2ba44Smacallan int pre, full, part, x; 316a3a2ba44Smacallan uint8_t *d; 317a3a2ba44Smacallan char buffer[256]; 318a3a2ba44Smacallan ENTER; 319a3a2ba44Smacallan 320a3a2ba44Smacallan srcoff = src & 7; 321a3a2ba44Smacallan src &= ~7; 322a3a2ba44Smacallan dstoff = dst & 7; 323a3a2ba44Smacallan dst &= ~7; 324a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 325a3a2ba44Smacallan part = width & 31; /* leftovers */ 326a3a2ba44Smacallan 327a3a2ba44Smacallan#ifdef SX_DEBUG 328a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 329a3a2ba44Smacallan width, height, full, part); 330a3a2ba44Smacallan#endif 331a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 332a3a2ba44Smacallan for (line = 0; line < height; line++) { 333a3a2ba44Smacallan srcx = src; 334a3a2ba44Smacallan dstx = dst; 335a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 336a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 337a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 338a3a2ba44Smacallan for (x = 0; x < width; x++) { 339a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 340a3a2ba44Smacallan } 341a3a2ba44Smacallan#else 342a3a2ba44Smacallan for (x = 0; x < full; x++) { 343a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 344a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 345a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 346a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 347a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 348a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 349a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 350a3a2ba44Smacallan srcx += 32; 351a3a2ba44Smacallan dstx += 32; 352a3a2ba44Smacallan } 353a3a2ba44Smacallan 354a3a2ba44Smacallan if (part > 0) { 355a3a2ba44Smacallan /* do leftovers */ 356a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 357a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 358a3a2ba44Smacallan if (part > 16) { 359a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 360a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 361a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 362a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 363a3a2ba44Smacallan } else { 364a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 365a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 366a3a2ba44Smacallan } 367a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 368a3a2ba44Smacallan } 369a3a2ba44Smacallan#endif 370a3a2ba44Smacallan#ifdef SX_DEBUG 371a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 372a3a2ba44Smacallan for (x = 0; x < width; x++) { 373a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 374a3a2ba44Smacallan } 375a3a2ba44Smacallan buffer[x] = 0; 376a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 377a3a2ba44Smacallan#endif 378a3a2ba44Smacallan /* next line */ 379a3a2ba44Smacallan src += srcpitch; 380a3a2ba44Smacallan dst += dstpitch; 381a3a2ba44Smacallan } 382a3a2ba44Smacallan} 383a3a2ba44Smacallan 384a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 385a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 386a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 387a3a2ba44Smacallan int width, int height) 388a3a2ba44Smacallan{ 389a3a2ba44Smacallan uint32_t srcx, dstx, m; 390a3a2ba44Smacallan int line, x, i; 391a3a2ba44Smacallan 392a3a2ba44Smacallan ENTER; 393a3a2ba44Smacallan 394a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 395a3a2ba44Smacallan for (line = 0; line < height; line++) { 396a3a2ba44Smacallan srcx = src; 397a3a2ba44Smacallan dstx = dst; 398a3a2ba44Smacallan 399a3a2ba44Smacallan for (x = 0; x < width; x++) { 400a3a2ba44Smacallan /* fetch source pixel */ 401a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 402a3a2ba44Smacallan /* fetch dst pixel */ 403a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 404a3a2ba44Smacallan /* src is premultiplied with alpha */ 405a3a2ba44Smacallan /* write inverted alpha into SCAM */ 406a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 407a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 408a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 409a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 410a3a2ba44Smacallan SX_SAXP16X16SR8(21, 13, 25, 2)); 411a3a2ba44Smacallan write_sx_io(p, dstx, 412a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 413a3a2ba44Smacallan dstx += 4; 414a3a2ba44Smacallan srcx += 4; 415a3a2ba44Smacallan } 416a3a2ba44Smacallan dst += dstpitch; 417a3a2ba44Smacallan src += srcpitch; 418a3a2ba44Smacallan } 419a3a2ba44Smacallan} 420a3a2ba44Smacallan 421a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 422a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 423a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 424a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 425a3a2ba44Smacallan int width, int height) 426a3a2ba44Smacallan{ 427a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 428a3a2ba44Smacallan int line, x, i; 429a3a2ba44Smacallan 430a3a2ba44Smacallan ENTER; 431a3a2ba44Smacallan 432a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 433a3a2ba44Smacallan for (line = 0; line < height; line++) { 434a3a2ba44Smacallan srcx = src; 435a3a2ba44Smacallan mskx = msk; 436a3a2ba44Smacallan dstx = dst; 437a3a2ba44Smacallan 438a3a2ba44Smacallan for (x = 0; x < width; x++) { 439a3a2ba44Smacallan /* fetch source pixel */ 440a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 441a3a2ba44Smacallan /* fetch mask */ 442a3a2ba44Smacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 443a3a2ba44Smacallan /* fetch dst pixel */ 444a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 445f7cb851fSmacallan /* stick mask alpha into SCAM */ 446a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 447f7cb851fSmacallan SX_ORS(9, 0, R_SCAM, 0)); 448f7cb851fSmacallan /* apply mask */ 449a3a2ba44Smacallan /* src is premultiplied with alpha */ 450f7cb851fSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 451f7cb851fSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 452a3a2ba44Smacallan /* write inverted alpha into SCAM */ 453a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 454a3a2ba44Smacallan SX_XORV(16, 8, R_SCAM, 0)); 455a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 456a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 457a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 458a3a2ba44Smacallan write_sx_io(p, dstx, 459a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 460a3a2ba44Smacallan srcx += 4; 461a3a2ba44Smacallan mskx += 1; 462a3a2ba44Smacallan dstx += 4; 463a3a2ba44Smacallan } 464a3a2ba44Smacallan src += srcpitch; 465a3a2ba44Smacallan msk += mskpitch; 466a3a2ba44Smacallan dst += dstpitch; 467a3a2ba44Smacallan } 468a3a2ba44Smacallan} 4696bdc2ffdSmacallan 4706bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 4716bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 4726bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 4736bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 4746bdc2ffdSmacallan int width, int height) 4756bdc2ffdSmacallan{ 4766bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 4776bdc2ffdSmacallan int line, x, i; 4786bdc2ffdSmacallan 4796bdc2ffdSmacallan ENTER; 4806bdc2ffdSmacallan 4816bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 4826bdc2ffdSmacallan for (line = 0; line < height; line++) { 4836bdc2ffdSmacallan srcx = src; 4846bdc2ffdSmacallan mskx = msk; 4856bdc2ffdSmacallan dstx = dst; 4866bdc2ffdSmacallan 4876bdc2ffdSmacallan for (x = 0; x < width; x++) { 4886bdc2ffdSmacallan /* fetch source pixel */ 4896bdc2ffdSmacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 4906bdc2ffdSmacallan /* fetch mask */ 4916bdc2ffdSmacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 4926bdc2ffdSmacallan /* fetch dst pixel */ 4936bdc2ffdSmacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 4946bdc2ffdSmacallan /* write alpha into SCAM */ 4956bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 4966bdc2ffdSmacallan SX_ORS(9, 0, R_SCAM, 0)); 4976bdc2ffdSmacallan /* src * alpha + R0 */ 4986bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 4996bdc2ffdSmacallan SX_SAXP16X16SR8(13, 0, 17, 2)); 5006bdc2ffdSmacallan /* write inverted alpha into SCAM */ 5016bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5026bdc2ffdSmacallan SX_XORV(9, 8, R_SCAM, 0)); 5036bdc2ffdSmacallan /* dst * (1 - alpha) + R[13:15] */ 5046bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5056bdc2ffdSmacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 5066bdc2ffdSmacallan write_sx_io(p, dstx, 5076bdc2ffdSmacallan SX_STUQ0C(24, 0, dstx & 7)); 5086bdc2ffdSmacallan srcx += 4; 5096bdc2ffdSmacallan mskx += 1; 5106bdc2ffdSmacallan dstx += 4; 5116bdc2ffdSmacallan } 5126bdc2ffdSmacallan src += srcpitch; 5136bdc2ffdSmacallan msk += mskpitch; 5146bdc2ffdSmacallan dst += dstpitch; 5156bdc2ffdSmacallan } 5166bdc2ffdSmacallan} 517fa158432Smacallan 518fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 519fa158432Smacallan uint32_t src, uint32_t srcpitch, 520fa158432Smacallan uint32_t msk, uint32_t mskpitch, 521fa158432Smacallan uint32_t dst, uint32_t dstpitch, 522fa158432Smacallan int width, int height) 523fa158432Smacallan{ 524fa158432Smacallan uint32_t srcx, dstx, mskx, m; 525fa158432Smacallan int line, x, i; 526fa158432Smacallan 527fa158432Smacallan ENTER; 528fa158432Smacallan 529fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 530fa158432Smacallan for (line = 0; line < height; line++) { 531fa158432Smacallan srcx = src; 532fa158432Smacallan mskx = msk; 533fa158432Smacallan dstx = dst; 534fa158432Smacallan 535fa158432Smacallan for (x = 0; x < width; x++) { 536fa158432Smacallan /* fetch source pixel */ 537fa158432Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 538fa158432Smacallan /* fetch mask */ 53981a370e6Smacallan write_sx_io(p, mskx & (~7), SX_LDUQ0(16, 0, mskx & 7)); 540fa158432Smacallan /* fetch dst pixel */ 541fa158432Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 54281a370e6Smacallan /* set src alpha to 0xff */ 543fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 54481a370e6Smacallan SX_ORS(8, 0, 12, 0)); 54581a370e6Smacallan /* apply mask */ 546fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 54781a370e6Smacallan SX_MUL16X16SR8R(12, 16, 24, 3)); 548fa158432Smacallan /* write inverted alpha into SCAM */ 549fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 55081a370e6Smacallan SX_XORV(12, 8, R_SCAM, 0)); 55181a370e6Smacallan /* dst * (1 - alpha) + R[25:31] */ 552fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 55381a370e6Smacallan SX_SAXP16X16SR8(21, 25, 29, 2)); 554fa158432Smacallan write_sx_io(p, dstx, 55581a370e6Smacallan SX_STUQ0C(28, 0, dstx & 7)); 556fa158432Smacallan srcx += 4; 557fa158432Smacallan mskx += 4; 558fa158432Smacallan dstx += 4; 559fa158432Smacallan } 560fa158432Smacallan src += srcpitch; 561fa158432Smacallan msk += mskpitch; 562fa158432Smacallan dst += dstpitch; 563fa158432Smacallan } 564fa158432Smacallan} 565