cg14_render.c revision d71cb32d
1d71cb32dSmacallan/* $NetBSD: cg14_render.c,v 1.10 2017/10/30 22:09:54 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 36a3a2ba44Smacallan#include <sys/types.h> 37a3a2ba44Smacallan 38a3a2ba44Smacallan/* all driver need this */ 39a3a2ba44Smacallan#include "xf86.h" 40a3a2ba44Smacallan#include "xf86_OSproc.h" 41a3a2ba44Smacallan#include "compiler.h" 42a3a2ba44Smacallan 43a3a2ba44Smacallan#include "cg14.h" 44a3a2ba44Smacallan#include <sparc/sxreg.h> 45a3a2ba44Smacallan 46f221549cSmacallan/*#define SX_SINGLE*/ 47a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 49a3a2ba44Smacallan 50f221549cSmacallan#ifdef SX_RENDER_DEBUG 51a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52a3a2ba44Smacallan#define DPRINTF xf86Msg 53a3a2ba44Smacallan#else 54a3a2ba44Smacallan#define ENTER 55a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 56a3a2ba44Smacallan#endif 57a3a2ba44Smacallan 58a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 59a3a2ba44Smacallan 6078cb1511Smacallan 6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 6278cb1511Smacallan uint32_t src, uint32_t srcpitch, 6378cb1511Smacallan uint32_t dst, uint32_t dstpitch, 6478cb1511Smacallan int width, int height) 6578cb1511Smacallan{ 6678cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 6778cb1511Smacallan int line, x, i; 6878cb1511Smacallan 6978cb1511Smacallan ENTER; 70f7cb851fSmacallan 7178cb1511Smacallan for (line = 0; line < height; line++) { 7278cb1511Smacallan mskx = msk; 7378cb1511Smacallan dstx = dst; 74f221549cSmacallan#ifndef SX_SINGLE 75f221549cSmacallan int rest; 76f221549cSmacallan for (x = 0; x < width; x += 4) { 77f221549cSmacallan rest = width - x; 78f221549cSmacallan /* fetch 4 mask values */ 79f221549cSmacallan write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 80f221549cSmacallan /* fetch destination pixels */ 81f221549cSmacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 82f221549cSmacallan /* duplicate them for all channels */ 83f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 84f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 85f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 86f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 87f221549cSmacallan /* generate inverted alpha */ 88f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 89f221549cSmacallan SX_XORS(12, 8, 28, 15)); 90f221549cSmacallan /* multiply source */ 91f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 92f221549cSmacallan SX_MUL16X16SR8(8, 12, 44, 3)); 93f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 94f221549cSmacallan SX_MUL16X16SR8(8, 16, 48, 3)); 95f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 96f221549cSmacallan SX_MUL16X16SR8(8, 20, 52, 3)); 97f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 98f221549cSmacallan SX_MUL16X16SR8(8, 24, 56, 3)); 99f221549cSmacallan /* multiply dest */ 100f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 101f221549cSmacallan SX_MUL16X16SR8(28, 60, 76, 15)); 102f221549cSmacallan /* add up */ 103f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 104f221549cSmacallan SX_ADDV(44, 76, 92, 15)); 105f221549cSmacallan /* write back */ 106f221549cSmacallan if (rest < 4) { 107f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 108f221549cSmacallan } else { 109f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 110f221549cSmacallan } 111f221549cSmacallan dstx += 16; 112f221549cSmacallan mskx += 16; 113f221549cSmacallan } 114f221549cSmacallan#else /* SX_SINGLE */ 11578cb1511Smacallan for (x = 0; x < width; x++) { 11678cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 11778cb1511Smacallan m = m >> 24; 11878cb1511Smacallan if (m == 0) { 11978cb1511Smacallan /* nothing to do - all transparent */ 12078cb1511Smacallan } else if (m == 0xff) { 12178cb1511Smacallan /* all opaque */ 12278cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 12378cb1511Smacallan } else { 12478cb1511Smacallan /* fetch alpha value, stick it into scam */ 12578cb1511Smacallan /* mask is in R[12:15] */ 12678cb1511Smacallan /*write_sx_io(p, mskx, 12778cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 12878cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 12978cb1511Smacallan /* fetch dst pixel */ 13078cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 13178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13278cb1511Smacallan SX_ORV(12, 0, R_SCAM, 0)); 13378cb1511Smacallan /* 13478cb1511Smacallan * src * alpha + R0 13578cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 13678cb1511Smacallan */ 13778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13878cb1511Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 13978cb1511Smacallan 14078cb1511Smacallan /* invert SCAM */ 14178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14278cb1511Smacallan SX_XORV(12, 8, R_SCAM, 0)); 14378cb1511Smacallan#ifdef SX_DEBUG 14478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14578cb1511Smacallan SX_XORV(12, 8, 13, 0)); 14678cb1511Smacallan#endif 14778cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 14878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14978cb1511Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 15078cb1511Smacallan write_sx_io(p, dstx, 15178cb1511Smacallan SX_STUQ0C(24, 0, dstx & 7)); 15278cb1511Smacallan } 15378cb1511Smacallan dstx += 4; 15478cb1511Smacallan mskx += 4; 15578cb1511Smacallan } 156f221549cSmacallan#endif /* SX_SINGLE */ 157f221549cSmacallan dst += dstpitch; 158f221549cSmacallan msk += srcpitch; 159f221549cSmacallan } 160f221549cSmacallan} 161f221549cSmacallan 162f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 163f221549cSmacallan uint32_t src, uint32_t srcpitch, 164f221549cSmacallan uint32_t dst, uint32_t dstpitch, 165f221549cSmacallan int width, int height) 166f221549cSmacallan{ 167f221549cSmacallan uint32_t msk = src, mskx, dstx, m; 168f221549cSmacallan int line, x, i; 169f221549cSmacallan#ifdef SX_DEBUG 170f221549cSmacallan char buffer[256]; 171f221549cSmacallan#endif 172f221549cSmacallan ENTER; 173f221549cSmacallan 174f221549cSmacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 175f221549cSmacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 176f221549cSmacallan *(uint32_t *)(p->fb + p->srcoff)); 177f221549cSmacallan for (line = 0; line < height; line++) { 178f221549cSmacallan mskx = msk; 179f221549cSmacallan dstx = dst; 180f221549cSmacallan#ifndef SX_SINGLE 181f221549cSmacallan int rest; 18278cb1511Smacallan for (x = 0; x < width; x += 4) { 183f221549cSmacallan rest = width - x; 18478cb1511Smacallan /* fetch 4 mask values */ 185f221549cSmacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 18678cb1511Smacallan /* fetch destination pixels */ 18778cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 18878cb1511Smacallan /* duplicate them for all channels */ 189f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 190f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 191f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 1926bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 19378cb1511Smacallan /* generate inverted alpha */ 19478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 19578cb1511Smacallan SX_XORS(12, 8, 28, 15)); 19678cb1511Smacallan /* multiply source */ 19778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 19878cb1511Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 19978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20078cb1511Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 20178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20278cb1511Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 20378cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20478cb1511Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 20578cb1511Smacallan /* multiply dest */ 20678cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20778cb1511Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 20878cb1511Smacallan /* add up */ 20978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 21078cb1511Smacallan SX_ADDV(44, 76, 92, 15)); 21178cb1511Smacallan /* write back */ 212f221549cSmacallan if (rest < 4) { 213f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 214f221549cSmacallan } else { 215f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 216f221549cSmacallan } 21778cb1511Smacallan dstx += 16; 218f221549cSmacallan mskx += 4; 21978cb1511Smacallan } 220f221549cSmacallan#else /* SX_SINGLE */ 221a3a2ba44Smacallan for (x = 0; x < width; x++) { 222a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 223a3a2ba44Smacallan#ifdef SX_DEBUG 224a3a2ba44Smacallan buffer[x] = c[m >> 5]; 225a3a2ba44Smacallan#endif 226a3a2ba44Smacallan if (m == 0) { 227a3a2ba44Smacallan /* nothing to do - all transparent */ 228a3a2ba44Smacallan } else if (m == 0xff) { 229a3a2ba44Smacallan /* all opaque */ 230a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 231a3a2ba44Smacallan } else { 232a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 233a3a2ba44Smacallan /* mask is in R[12:15] */ 234a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 235a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 236a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 237a3a2ba44Smacallan /* fetch dst pixel */ 238a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 239a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 240a3a2ba44Smacallan SX_ORV(12, 0, R_SCAM, 0)); 241a3a2ba44Smacallan /* 242a3a2ba44Smacallan * src * alpha + R0 243a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 244a3a2ba44Smacallan */ 245a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 246a3a2ba44Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 247a3a2ba44Smacallan 248a3a2ba44Smacallan /* invert SCAM */ 249a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 250a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 251a3a2ba44Smacallan#ifdef SX_DEBUG 252a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 253a3a2ba44Smacallan SX_XORV(12, 8, 13, 0)); 254a3a2ba44Smacallan#endif 255a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 256a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 257a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 258a3a2ba44Smacallan write_sx_io(p, dstx, 259a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 260a3a2ba44Smacallan } 261a3a2ba44Smacallan dstx += 4; 262a3a2ba44Smacallan mskx += 1; 263a3a2ba44Smacallan } 264f221549cSmacallan#endif /* SX_SINGLE */ 265a3a2ba44Smacallan#ifdef SX_DEBUG 266a3a2ba44Smacallan buffer[x] = 0; 267a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 268a3a2ba44Smacallan#endif 269a3a2ba44Smacallan dst += dstpitch; 270a3a2ba44Smacallan msk += srcpitch; 271a3a2ba44Smacallan } 272a3a2ba44Smacallan} 273a3a2ba44Smacallan 274a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 275a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 276a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 277a3a2ba44Smacallan int width, int height) 278a3a2ba44Smacallan{ 279a3a2ba44Smacallan int line; 280a3a2ba44Smacallan uint32_t srcx, dstx; 281a3a2ba44Smacallan int full, part, x; 282a3a2ba44Smacallan 283a3a2ba44Smacallan ENTER; 284a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 285a3a2ba44Smacallan part = width & 7; /* leftovers */ 286a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 287a3a2ba44Smacallan for (line = 0; line < height; line++) { 288a3a2ba44Smacallan srcx = src; 289a3a2ba44Smacallan dstx = dst; 290a3a2ba44Smacallan for (x = 0; x < full; x++) { 291a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 292a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 293a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 294a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 295a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 296a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 297a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 298a3a2ba44Smacallan srcx += 128; 299a3a2ba44Smacallan dstx += 128; 300a3a2ba44Smacallan } 301a3a2ba44Smacallan 302a3a2ba44Smacallan /* do leftovers */ 303a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 304a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 305a3a2ba44Smacallan if (part & 16) { 306a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 307a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 308a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 309a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 310a3a2ba44Smacallan } else { 311a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 312a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 313a3a2ba44Smacallan } 314a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 315a3a2ba44Smacallan 316a3a2ba44Smacallan /* next line */ 317a3a2ba44Smacallan src += srcpitch; 318a3a2ba44Smacallan dst += dstpitch; 319a3a2ba44Smacallan } 320a3a2ba44Smacallan} 321a3a2ba44Smacallan 322a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 323a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 324a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 325a3a2ba44Smacallan int width, int height) 326a3a2ba44Smacallan{ 327a3a2ba44Smacallan int line; 328a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 329a3a2ba44Smacallan int pre, full, part, x; 330a3a2ba44Smacallan uint8_t *d; 331a3a2ba44Smacallan char buffer[256]; 332a3a2ba44Smacallan ENTER; 333a3a2ba44Smacallan 334a3a2ba44Smacallan srcoff = src & 7; 335a3a2ba44Smacallan src &= ~7; 336a3a2ba44Smacallan dstoff = dst & 7; 337a3a2ba44Smacallan dst &= ~7; 338a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 339a3a2ba44Smacallan part = width & 31; /* leftovers */ 340a3a2ba44Smacallan 341a3a2ba44Smacallan#ifdef SX_DEBUG 342a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 343a3a2ba44Smacallan width, height, full, part); 344a3a2ba44Smacallan#endif 345a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 346a3a2ba44Smacallan for (line = 0; line < height; line++) { 347a3a2ba44Smacallan srcx = src; 348a3a2ba44Smacallan dstx = dst; 349a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 350a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 351a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 352a3a2ba44Smacallan for (x = 0; x < width; x++) { 353a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 354a3a2ba44Smacallan } 355a3a2ba44Smacallan#else 356a3a2ba44Smacallan for (x = 0; x < full; x++) { 357a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 358a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 359a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 360a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 361a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 362a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 363a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 364a3a2ba44Smacallan srcx += 32; 365a3a2ba44Smacallan dstx += 32; 366a3a2ba44Smacallan } 367a3a2ba44Smacallan 368a3a2ba44Smacallan if (part > 0) { 369a3a2ba44Smacallan /* do leftovers */ 370a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 371a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 372a3a2ba44Smacallan if (part > 16) { 373a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 374a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 375a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 376a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 377a3a2ba44Smacallan } else { 378a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 379a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 380a3a2ba44Smacallan } 381a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 382a3a2ba44Smacallan } 383a3a2ba44Smacallan#endif 384d71cb32dSmacallan#ifdef SX_DEBUG 385d71cb32dSmacallan d = (uint8_t *)(p->fb + src + srcoff); 386d71cb32dSmacallan for (x = 0; x < width; x++) { 387d71cb32dSmacallan buffer[x] = c[d[x]>>5]; 388d71cb32dSmacallan } 389d71cb32dSmacallan buffer[x] = 0; 390d71cb32dSmacallan xf86Msg(X_ERROR, "%s\n", buffer); 391d71cb32dSmacallan#endif 392d71cb32dSmacallan /* next line */ 393d71cb32dSmacallan src += srcpitch; 394d71cb32dSmacallan dst += dstpitch; 395d71cb32dSmacallan } 396d71cb32dSmacallan} 397d71cb32dSmacallan 398d71cb32dSmacallanvoid CG14Comp_Add8_32(Cg14Ptr p, 399d71cb32dSmacallan uint32_t src, uint32_t srcpitch, 400d71cb32dSmacallan uint32_t dst, uint32_t dstpitch, 401d71cb32dSmacallan int width, int height) 402d71cb32dSmacallan{ 403d71cb32dSmacallan int line; 404d71cb32dSmacallan uint32_t srcx, dstx, srcoff, dstoff; 405d71cb32dSmacallan int pre, full, part, x; 406d71cb32dSmacallan uint8_t *d; 407d71cb32dSmacallan char buffer[256]; 408d71cb32dSmacallan ENTER; 409d71cb32dSmacallan 410d71cb32dSmacallan srcoff = src & 7; 411d71cb32dSmacallan src &= ~7; 412d71cb32dSmacallan dstoff = dst & 7; 413d71cb32dSmacallan dst &= ~7; 414d71cb32dSmacallan full = width >> 5; /* chunks of 32 */ 415d71cb32dSmacallan part = width & 31; /* leftovers */ 416d71cb32dSmacallan 417d71cb32dSmacallan#ifdef SX_DEBUG 418d71cb32dSmacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 419d71cb32dSmacallan width, height, full, part); 420d71cb32dSmacallan#endif 421d71cb32dSmacallan /* we do this up to 32 pixels at a time */ 422d71cb32dSmacallan for (line = 0; line < height; line++) { 423d71cb32dSmacallan srcx = src; 424d71cb32dSmacallan dstx = dst; 425d71cb32dSmacallan for (x = 0; x < full; x++) { 426d71cb32dSmacallan /* load source bytes */ 427d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 428d71cb32dSmacallan /* load alpha from destination */ 429d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 430d71cb32dSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 431d71cb32dSmacallan SX_ADDV(8, 40, 72, 15)); 432d71cb32dSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 433d71cb32dSmacallan SX_ADDV(24, 56, 88, 15)); 434d71cb32dSmacallan /* write clamped values back into dest alpha */ 435d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 436d71cb32dSmacallan srcx += 32; 437d71cb32dSmacallan dstx += 128; 438d71cb32dSmacallan } 439d71cb32dSmacallan 440d71cb32dSmacallan if (part > 0) { 441d71cb32dSmacallan /* do leftovers */ 442d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 443d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 444d71cb32dSmacallan if (part > 16) { 445d71cb32dSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 446d71cb32dSmacallan SX_ADDV(8, 40, 72, 15)); 447d71cb32dSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 448d71cb32dSmacallan SX_ADDV(24, 56, 88, part - 17)); 449d71cb32dSmacallan } else { 450d71cb32dSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 451d71cb32dSmacallan SX_ADDV(8, 40, 72, part - 1)); 452d71cb32dSmacallan } 453d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 454d71cb32dSmacallan } 455a3a2ba44Smacallan#ifdef SX_DEBUG 456a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 457a3a2ba44Smacallan for (x = 0; x < width; x++) { 458a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 459a3a2ba44Smacallan } 460a3a2ba44Smacallan buffer[x] = 0; 461a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 462a3a2ba44Smacallan#endif 463a3a2ba44Smacallan /* next line */ 464a3a2ba44Smacallan src += srcpitch; 465a3a2ba44Smacallan dst += dstpitch; 466a3a2ba44Smacallan } 467a3a2ba44Smacallan} 468a3a2ba44Smacallan 469a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 470a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 471a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 472a3a2ba44Smacallan int width, int height) 473a3a2ba44Smacallan{ 474a3a2ba44Smacallan uint32_t srcx, dstx, m; 475a3a2ba44Smacallan int line, x, i; 476a3a2ba44Smacallan 477a3a2ba44Smacallan ENTER; 478a3a2ba44Smacallan 479a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 480a3a2ba44Smacallan for (line = 0; line < height; line++) { 481a3a2ba44Smacallan srcx = src; 482a3a2ba44Smacallan dstx = dst; 483a3a2ba44Smacallan 484a3a2ba44Smacallan for (x = 0; x < width; x++) { 485a3a2ba44Smacallan /* fetch source pixel */ 486a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 487a3a2ba44Smacallan /* fetch dst pixel */ 488a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 489a3a2ba44Smacallan /* src is premultiplied with alpha */ 490a3a2ba44Smacallan /* write inverted alpha into SCAM */ 491a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 492a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 493a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 494a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 495239808baSmacallan SX_SAXP16X16SR8(20, 12, 24, 3)); 496a3a2ba44Smacallan write_sx_io(p, dstx, 497a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 498a3a2ba44Smacallan dstx += 4; 499a3a2ba44Smacallan srcx += 4; 500a3a2ba44Smacallan } 501a3a2ba44Smacallan dst += dstpitch; 502a3a2ba44Smacallan src += srcpitch; 503a3a2ba44Smacallan } 504a3a2ba44Smacallan} 505a3a2ba44Smacallan 506a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 507a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 508a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 509a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 510a3a2ba44Smacallan int width, int height) 511a3a2ba44Smacallan{ 512a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 513a3a2ba44Smacallan int line, x, i; 514a3a2ba44Smacallan 515a3a2ba44Smacallan ENTER; 516a3a2ba44Smacallan 517a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 518a3a2ba44Smacallan for (line = 0; line < height; line++) { 519a3a2ba44Smacallan srcx = src; 520a3a2ba44Smacallan mskx = msk; 521a3a2ba44Smacallan dstx = dst; 522a3a2ba44Smacallan 523a3a2ba44Smacallan for (x = 0; x < width; x++) { 524a3a2ba44Smacallan /* fetch source pixel */ 525a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 526a3a2ba44Smacallan /* fetch mask */ 527a3a2ba44Smacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 528a3a2ba44Smacallan /* fetch dst pixel */ 529a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 530f7cb851fSmacallan /* stick mask alpha into SCAM */ 531a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 532f7cb851fSmacallan SX_ORS(9, 0, R_SCAM, 0)); 533f7cb851fSmacallan /* apply mask */ 534a3a2ba44Smacallan /* src is premultiplied with alpha */ 535f7cb851fSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 536f7cb851fSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 537a3a2ba44Smacallan /* write inverted alpha into SCAM */ 538a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 539a3a2ba44Smacallan SX_XORV(16, 8, R_SCAM, 0)); 540a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 541a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 542239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 543a3a2ba44Smacallan write_sx_io(p, dstx, 544a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 545a3a2ba44Smacallan srcx += 4; 546a3a2ba44Smacallan mskx += 1; 547a3a2ba44Smacallan dstx += 4; 548a3a2ba44Smacallan } 549a3a2ba44Smacallan src += srcpitch; 550a3a2ba44Smacallan msk += mskpitch; 551a3a2ba44Smacallan dst += dstpitch; 552a3a2ba44Smacallan } 553a3a2ba44Smacallan} 5546bdc2ffdSmacallan 5556bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 5566bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 5576bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 5586bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 5596bdc2ffdSmacallan int width, int height) 5606bdc2ffdSmacallan{ 5616bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 5626bdc2ffdSmacallan int line, x, i; 5636bdc2ffdSmacallan 5646bdc2ffdSmacallan ENTER; 5656bdc2ffdSmacallan 5666bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 5676bdc2ffdSmacallan for (line = 0; line < height; line++) { 5686bdc2ffdSmacallan srcx = src; 5696bdc2ffdSmacallan mskx = msk; 5706bdc2ffdSmacallan dstx = dst; 5716bdc2ffdSmacallan 5726bdc2ffdSmacallan for (x = 0; x < width; x++) { 5736bdc2ffdSmacallan /* fetch source pixel */ 5746bdc2ffdSmacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 575239808baSmacallan /* set src alpha to 0xff */ 576239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 577239808baSmacallan SX_ORS(8, 0, 12, 0)); 5786bdc2ffdSmacallan /* fetch mask */ 5796bdc2ffdSmacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 5806bdc2ffdSmacallan /* fetch dst pixel */ 5816bdc2ffdSmacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 5826bdc2ffdSmacallan /* write alpha into SCAM */ 5836bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5846bdc2ffdSmacallan SX_ORS(9, 0, R_SCAM, 0)); 5856bdc2ffdSmacallan /* src * alpha + R0 */ 5866bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 587239808baSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 5886bdc2ffdSmacallan /* write inverted alpha into SCAM */ 5896bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5906bdc2ffdSmacallan SX_XORV(9, 8, R_SCAM, 0)); 5916bdc2ffdSmacallan /* dst * (1 - alpha) + R[13:15] */ 5926bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 593239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 5946bdc2ffdSmacallan write_sx_io(p, dstx, 5956bdc2ffdSmacallan SX_STUQ0C(24, 0, dstx & 7)); 5966bdc2ffdSmacallan srcx += 4; 5976bdc2ffdSmacallan mskx += 1; 5986bdc2ffdSmacallan dstx += 4; 5996bdc2ffdSmacallan } 6006bdc2ffdSmacallan src += srcpitch; 6016bdc2ffdSmacallan msk += mskpitch; 6026bdc2ffdSmacallan dst += dstpitch; 6036bdc2ffdSmacallan } 6046bdc2ffdSmacallan} 605fa158432Smacallan 606fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 607fa158432Smacallan uint32_t src, uint32_t srcpitch, 608fa158432Smacallan uint32_t msk, uint32_t mskpitch, 609fa158432Smacallan uint32_t dst, uint32_t dstpitch, 610fa158432Smacallan int width, int height) 611fa158432Smacallan{ 612fa158432Smacallan uint32_t srcx, dstx, mskx, m; 613fa158432Smacallan int line, x, i; 614fa158432Smacallan 615fa158432Smacallan ENTER; 616fa158432Smacallan 617fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 618fa158432Smacallan for (line = 0; line < height; line++) { 619fa158432Smacallan srcx = src; 620fa158432Smacallan mskx = msk; 621fa158432Smacallan dstx = dst; 622fa158432Smacallan 623fa158432Smacallan for (x = 0; x < width; x++) { 624fa158432Smacallan /* fetch source pixel */ 625fa158432Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 626fa158432Smacallan /* fetch mask */ 627239808baSmacallan write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7)); 628fa158432Smacallan /* fetch dst pixel */ 629fa158432Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 63081a370e6Smacallan /* set src alpha to 0xff */ 631fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 63281a370e6Smacallan SX_ORS(8, 0, 12, 0)); 633239808baSmacallan /* mask alpha to SCAM */ 634239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 635239808baSmacallan SX_ORS(16, 0, R_SCAM, 0)); 636239808baSmacallan /* src * alpha */ 637fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 638239808baSmacallan SX_SAXP16X16SR8(12, 0, 24, 3)); 639fa158432Smacallan /* write inverted alpha into SCAM */ 640fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 641239808baSmacallan SX_XORS(16, 8, R_SCAM, 0)); 642239808baSmacallan /* dst * (1 - alpha) + R[24:31] */ 643fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 644239808baSmacallan SX_SAXP16X16SR8(20, 24, 28, 3)); 645fa158432Smacallan write_sx_io(p, dstx, 64681a370e6Smacallan SX_STUQ0C(28, 0, dstx & 7)); 647fa158432Smacallan srcx += 4; 648fa158432Smacallan mskx += 4; 649fa158432Smacallan dstx += 4; 650fa158432Smacallan } 651fa158432Smacallan src += srcpitch; 652fa158432Smacallan msk += mskpitch; 653fa158432Smacallan dst += dstpitch; 654fa158432Smacallan } 655fa158432Smacallan} 656