cg14_render.c revision f221549c
1f221549cSmacallan/* $NetBSD: cg14_render.c,v 1.9 2016/09/16 22:07:25 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 36a3a2ba44Smacallan#include <sys/types.h> 37a3a2ba44Smacallan 38a3a2ba44Smacallan/* all driver need this */ 39a3a2ba44Smacallan#include "xf86.h" 40a3a2ba44Smacallan#include "xf86_OSproc.h" 41a3a2ba44Smacallan#include "compiler.h" 42a3a2ba44Smacallan 43a3a2ba44Smacallan#include "cg14.h" 44a3a2ba44Smacallan#include <sparc/sxreg.h> 45a3a2ba44Smacallan 46f221549cSmacallan/*#define SX_SINGLE*/ 47a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 49a3a2ba44Smacallan 50f221549cSmacallan#ifdef SX_RENDER_DEBUG 51a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 52a3a2ba44Smacallan#define DPRINTF xf86Msg 53a3a2ba44Smacallan#else 54a3a2ba44Smacallan#define ENTER 55a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 56a3a2ba44Smacallan#endif 57a3a2ba44Smacallan 58a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 59a3a2ba44Smacallan 6078cb1511Smacallan 6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 6278cb1511Smacallan uint32_t src, uint32_t srcpitch, 6378cb1511Smacallan uint32_t dst, uint32_t dstpitch, 6478cb1511Smacallan int width, int height) 6578cb1511Smacallan{ 6678cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 6778cb1511Smacallan int line, x, i; 6878cb1511Smacallan 6978cb1511Smacallan ENTER; 70f7cb851fSmacallan 7178cb1511Smacallan for (line = 0; line < height; line++) { 7278cb1511Smacallan mskx = msk; 7378cb1511Smacallan dstx = dst; 74f221549cSmacallan#ifndef SX_SINGLE 75f221549cSmacallan int rest; 76f221549cSmacallan for (x = 0; x < width; x += 4) { 77f221549cSmacallan rest = width - x; 78f221549cSmacallan /* fetch 4 mask values */ 79f221549cSmacallan write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 80f221549cSmacallan /* fetch destination pixels */ 81f221549cSmacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 82f221549cSmacallan /* duplicate them for all channels */ 83f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 84f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 85f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 86f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 87f221549cSmacallan /* generate inverted alpha */ 88f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 89f221549cSmacallan SX_XORS(12, 8, 28, 15)); 90f221549cSmacallan /* multiply source */ 91f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 92f221549cSmacallan SX_MUL16X16SR8(8, 12, 44, 3)); 93f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 94f221549cSmacallan SX_MUL16X16SR8(8, 16, 48, 3)); 95f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 96f221549cSmacallan SX_MUL16X16SR8(8, 20, 52, 3)); 97f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 98f221549cSmacallan SX_MUL16X16SR8(8, 24, 56, 3)); 99f221549cSmacallan /* multiply dest */ 100f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 101f221549cSmacallan SX_MUL16X16SR8(28, 60, 76, 15)); 102f221549cSmacallan /* add up */ 103f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 104f221549cSmacallan SX_ADDV(44, 76, 92, 15)); 105f221549cSmacallan /* write back */ 106f221549cSmacallan if (rest < 4) { 107f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 108f221549cSmacallan } else { 109f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 110f221549cSmacallan } 111f221549cSmacallan dstx += 16; 112f221549cSmacallan mskx += 16; 113f221549cSmacallan } 114f221549cSmacallan#else /* SX_SINGLE */ 11578cb1511Smacallan for (x = 0; x < width; x++) { 11678cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 11778cb1511Smacallan m = m >> 24; 11878cb1511Smacallan if (m == 0) { 11978cb1511Smacallan /* nothing to do - all transparent */ 12078cb1511Smacallan } else if (m == 0xff) { 12178cb1511Smacallan /* all opaque */ 12278cb1511Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 12378cb1511Smacallan } else { 12478cb1511Smacallan /* fetch alpha value, stick it into scam */ 12578cb1511Smacallan /* mask is in R[12:15] */ 12678cb1511Smacallan /*write_sx_io(p, mskx, 12778cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 12878cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 12978cb1511Smacallan /* fetch dst pixel */ 13078cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 13178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13278cb1511Smacallan SX_ORV(12, 0, R_SCAM, 0)); 13378cb1511Smacallan /* 13478cb1511Smacallan * src * alpha + R0 13578cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 13678cb1511Smacallan */ 13778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 13878cb1511Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 13978cb1511Smacallan 14078cb1511Smacallan /* invert SCAM */ 14178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14278cb1511Smacallan SX_XORV(12, 8, R_SCAM, 0)); 14378cb1511Smacallan#ifdef SX_DEBUG 14478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14578cb1511Smacallan SX_XORV(12, 8, 13, 0)); 14678cb1511Smacallan#endif 14778cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 14878cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 14978cb1511Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 15078cb1511Smacallan write_sx_io(p, dstx, 15178cb1511Smacallan SX_STUQ0C(24, 0, dstx & 7)); 15278cb1511Smacallan } 15378cb1511Smacallan dstx += 4; 15478cb1511Smacallan mskx += 4; 15578cb1511Smacallan } 156f221549cSmacallan#endif /* SX_SINGLE */ 157f221549cSmacallan dst += dstpitch; 158f221549cSmacallan msk += srcpitch; 159f221549cSmacallan } 160f221549cSmacallan} 161f221549cSmacallan 162f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 163f221549cSmacallan uint32_t src, uint32_t srcpitch, 164f221549cSmacallan uint32_t dst, uint32_t dstpitch, 165f221549cSmacallan int width, int height) 166f221549cSmacallan{ 167f221549cSmacallan uint32_t msk = src, mskx, dstx, m; 168f221549cSmacallan int line, x, i; 169f221549cSmacallan#ifdef SX_DEBUG 170f221549cSmacallan char buffer[256]; 171f221549cSmacallan#endif 172f221549cSmacallan ENTER; 173f221549cSmacallan 174f221549cSmacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 175f221549cSmacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 176f221549cSmacallan *(uint32_t *)(p->fb + p->srcoff)); 177f221549cSmacallan for (line = 0; line < height; line++) { 178f221549cSmacallan mskx = msk; 179f221549cSmacallan dstx = dst; 180f221549cSmacallan#ifndef SX_SINGLE 181f221549cSmacallan int rest; 18278cb1511Smacallan for (x = 0; x < width; x += 4) { 183f221549cSmacallan rest = width - x; 18478cb1511Smacallan /* fetch 4 mask values */ 185f221549cSmacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 18678cb1511Smacallan /* fetch destination pixels */ 18778cb1511Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 18878cb1511Smacallan /* duplicate them for all channels */ 189f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); 190f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); 191f221549cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); 1926bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 19378cb1511Smacallan /* generate inverted alpha */ 19478cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 19578cb1511Smacallan SX_XORS(12, 8, 28, 15)); 19678cb1511Smacallan /* multiply source */ 19778cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 19878cb1511Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 19978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20078cb1511Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 20178cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20278cb1511Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 20378cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20478cb1511Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 20578cb1511Smacallan /* multiply dest */ 20678cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 20778cb1511Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 20878cb1511Smacallan /* add up */ 20978cb1511Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 21078cb1511Smacallan SX_ADDV(44, 76, 92, 15)); 21178cb1511Smacallan /* write back */ 212f221549cSmacallan if (rest < 4) { 213f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); 214f221549cSmacallan } else { 215f221549cSmacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 216f221549cSmacallan } 21778cb1511Smacallan dstx += 16; 218f221549cSmacallan mskx += 4; 21978cb1511Smacallan } 220f221549cSmacallan#else /* SX_SINGLE */ 221a3a2ba44Smacallan for (x = 0; x < width; x++) { 222a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 223a3a2ba44Smacallan#ifdef SX_DEBUG 224a3a2ba44Smacallan buffer[x] = c[m >> 5]; 225a3a2ba44Smacallan#endif 226a3a2ba44Smacallan if (m == 0) { 227a3a2ba44Smacallan /* nothing to do - all transparent */ 228a3a2ba44Smacallan } else if (m == 0xff) { 229a3a2ba44Smacallan /* all opaque */ 230a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 231a3a2ba44Smacallan } else { 232a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 233a3a2ba44Smacallan /* mask is in R[12:15] */ 234a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 235a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 236a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 237a3a2ba44Smacallan /* fetch dst pixel */ 238a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 239a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 240a3a2ba44Smacallan SX_ORV(12, 0, R_SCAM, 0)); 241a3a2ba44Smacallan /* 242a3a2ba44Smacallan * src * alpha + R0 243a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 244a3a2ba44Smacallan */ 245a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 246a3a2ba44Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 247a3a2ba44Smacallan 248a3a2ba44Smacallan /* invert SCAM */ 249a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 250a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 251a3a2ba44Smacallan#ifdef SX_DEBUG 252a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 253a3a2ba44Smacallan SX_XORV(12, 8, 13, 0)); 254a3a2ba44Smacallan#endif 255a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 256a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 257a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 258a3a2ba44Smacallan write_sx_io(p, dstx, 259a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 260a3a2ba44Smacallan } 261a3a2ba44Smacallan dstx += 4; 262a3a2ba44Smacallan mskx += 1; 263a3a2ba44Smacallan } 264f221549cSmacallan#endif /* SX_SINGLE */ 265a3a2ba44Smacallan#ifdef SX_DEBUG 266a3a2ba44Smacallan buffer[x] = 0; 267a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 268a3a2ba44Smacallan#endif 269a3a2ba44Smacallan dst += dstpitch; 270a3a2ba44Smacallan msk += srcpitch; 271a3a2ba44Smacallan } 272a3a2ba44Smacallan} 273a3a2ba44Smacallan 274a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 275a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 276a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 277a3a2ba44Smacallan int width, int height) 278a3a2ba44Smacallan{ 279a3a2ba44Smacallan int line; 280a3a2ba44Smacallan uint32_t srcx, dstx; 281a3a2ba44Smacallan int full, part, x; 282a3a2ba44Smacallan 283a3a2ba44Smacallan ENTER; 284a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 285a3a2ba44Smacallan part = width & 7; /* leftovers */ 286a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 287a3a2ba44Smacallan for (line = 0; line < height; line++) { 288a3a2ba44Smacallan srcx = src; 289a3a2ba44Smacallan dstx = dst; 290a3a2ba44Smacallan for (x = 0; x < full; x++) { 291a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 292a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 293a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 294a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 295a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 296a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 297a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 298a3a2ba44Smacallan srcx += 128; 299a3a2ba44Smacallan dstx += 128; 300a3a2ba44Smacallan } 301a3a2ba44Smacallan 302a3a2ba44Smacallan /* do leftovers */ 303a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 304a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 305a3a2ba44Smacallan if (part & 16) { 306a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 307a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 308a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 309a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 310a3a2ba44Smacallan } else { 311a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 312a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 313a3a2ba44Smacallan } 314a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 315a3a2ba44Smacallan 316a3a2ba44Smacallan /* next line */ 317a3a2ba44Smacallan src += srcpitch; 318a3a2ba44Smacallan dst += dstpitch; 319a3a2ba44Smacallan } 320a3a2ba44Smacallan} 321a3a2ba44Smacallan 322a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 323a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 324a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 325a3a2ba44Smacallan int width, int height) 326a3a2ba44Smacallan{ 327a3a2ba44Smacallan int line; 328a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 329a3a2ba44Smacallan int pre, full, part, x; 330a3a2ba44Smacallan uint8_t *d; 331a3a2ba44Smacallan char buffer[256]; 332a3a2ba44Smacallan ENTER; 333a3a2ba44Smacallan 334a3a2ba44Smacallan srcoff = src & 7; 335a3a2ba44Smacallan src &= ~7; 336a3a2ba44Smacallan dstoff = dst & 7; 337a3a2ba44Smacallan dst &= ~7; 338a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 339a3a2ba44Smacallan part = width & 31; /* leftovers */ 340a3a2ba44Smacallan 341a3a2ba44Smacallan#ifdef SX_DEBUG 342a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 343a3a2ba44Smacallan width, height, full, part); 344a3a2ba44Smacallan#endif 345a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 346a3a2ba44Smacallan for (line = 0; line < height; line++) { 347a3a2ba44Smacallan srcx = src; 348a3a2ba44Smacallan dstx = dst; 349a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 350a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 351a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 352a3a2ba44Smacallan for (x = 0; x < width; x++) { 353a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 354a3a2ba44Smacallan } 355a3a2ba44Smacallan#else 356a3a2ba44Smacallan for (x = 0; x < full; x++) { 357a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 358a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 359a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 360a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 361a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 362a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 363a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 364a3a2ba44Smacallan srcx += 32; 365a3a2ba44Smacallan dstx += 32; 366a3a2ba44Smacallan } 367a3a2ba44Smacallan 368a3a2ba44Smacallan if (part > 0) { 369a3a2ba44Smacallan /* do leftovers */ 370a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 371a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 372a3a2ba44Smacallan if (part > 16) { 373a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 374a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 375a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 376a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 377a3a2ba44Smacallan } else { 378a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 379a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 380a3a2ba44Smacallan } 381a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 382a3a2ba44Smacallan } 383a3a2ba44Smacallan#endif 384a3a2ba44Smacallan#ifdef SX_DEBUG 385a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 386a3a2ba44Smacallan for (x = 0; x < width; x++) { 387a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 388a3a2ba44Smacallan } 389a3a2ba44Smacallan buffer[x] = 0; 390a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 391a3a2ba44Smacallan#endif 392a3a2ba44Smacallan /* next line */ 393a3a2ba44Smacallan src += srcpitch; 394a3a2ba44Smacallan dst += dstpitch; 395a3a2ba44Smacallan } 396a3a2ba44Smacallan} 397a3a2ba44Smacallan 398a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 399a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 400a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 401a3a2ba44Smacallan int width, int height) 402a3a2ba44Smacallan{ 403a3a2ba44Smacallan uint32_t srcx, dstx, m; 404a3a2ba44Smacallan int line, x, i; 405a3a2ba44Smacallan 406a3a2ba44Smacallan ENTER; 407a3a2ba44Smacallan 408a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 409a3a2ba44Smacallan for (line = 0; line < height; line++) { 410a3a2ba44Smacallan srcx = src; 411a3a2ba44Smacallan dstx = dst; 412a3a2ba44Smacallan 413a3a2ba44Smacallan for (x = 0; x < width; x++) { 414a3a2ba44Smacallan /* fetch source pixel */ 415a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 416a3a2ba44Smacallan /* fetch dst pixel */ 417a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 418a3a2ba44Smacallan /* src is premultiplied with alpha */ 419a3a2ba44Smacallan /* write inverted alpha into SCAM */ 420a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 421a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 422a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 423a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 424239808baSmacallan SX_SAXP16X16SR8(20, 12, 24, 3)); 425a3a2ba44Smacallan write_sx_io(p, dstx, 426a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 427a3a2ba44Smacallan dstx += 4; 428a3a2ba44Smacallan srcx += 4; 429a3a2ba44Smacallan } 430a3a2ba44Smacallan dst += dstpitch; 431a3a2ba44Smacallan src += srcpitch; 432a3a2ba44Smacallan } 433a3a2ba44Smacallan} 434a3a2ba44Smacallan 435a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 436a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 437a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 438a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 439a3a2ba44Smacallan int width, int height) 440a3a2ba44Smacallan{ 441a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 442a3a2ba44Smacallan int line, x, i; 443a3a2ba44Smacallan 444a3a2ba44Smacallan ENTER; 445a3a2ba44Smacallan 446a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 447a3a2ba44Smacallan for (line = 0; line < height; line++) { 448a3a2ba44Smacallan srcx = src; 449a3a2ba44Smacallan mskx = msk; 450a3a2ba44Smacallan dstx = dst; 451a3a2ba44Smacallan 452a3a2ba44Smacallan for (x = 0; x < width; x++) { 453a3a2ba44Smacallan /* fetch source pixel */ 454a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 455a3a2ba44Smacallan /* fetch mask */ 456a3a2ba44Smacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 457a3a2ba44Smacallan /* fetch dst pixel */ 458a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 459f7cb851fSmacallan /* stick mask alpha into SCAM */ 460a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 461f7cb851fSmacallan SX_ORS(9, 0, R_SCAM, 0)); 462f7cb851fSmacallan /* apply mask */ 463a3a2ba44Smacallan /* src is premultiplied with alpha */ 464f7cb851fSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 465f7cb851fSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 466a3a2ba44Smacallan /* write inverted alpha into SCAM */ 467a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 468a3a2ba44Smacallan SX_XORV(16, 8, R_SCAM, 0)); 469a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 470a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 471239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 472a3a2ba44Smacallan write_sx_io(p, dstx, 473a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 474a3a2ba44Smacallan srcx += 4; 475a3a2ba44Smacallan mskx += 1; 476a3a2ba44Smacallan dstx += 4; 477a3a2ba44Smacallan } 478a3a2ba44Smacallan src += srcpitch; 479a3a2ba44Smacallan msk += mskpitch; 480a3a2ba44Smacallan dst += dstpitch; 481a3a2ba44Smacallan } 482a3a2ba44Smacallan} 4836bdc2ffdSmacallan 4846bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 4856bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 4866bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 4876bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 4886bdc2ffdSmacallan int width, int height) 4896bdc2ffdSmacallan{ 4906bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 4916bdc2ffdSmacallan int line, x, i; 4926bdc2ffdSmacallan 4936bdc2ffdSmacallan ENTER; 4946bdc2ffdSmacallan 4956bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 4966bdc2ffdSmacallan for (line = 0; line < height; line++) { 4976bdc2ffdSmacallan srcx = src; 4986bdc2ffdSmacallan mskx = msk; 4996bdc2ffdSmacallan dstx = dst; 5006bdc2ffdSmacallan 5016bdc2ffdSmacallan for (x = 0; x < width; x++) { 5026bdc2ffdSmacallan /* fetch source pixel */ 5036bdc2ffdSmacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 504239808baSmacallan /* set src alpha to 0xff */ 505239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 506239808baSmacallan SX_ORS(8, 0, 12, 0)); 5076bdc2ffdSmacallan /* fetch mask */ 5086bdc2ffdSmacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 5096bdc2ffdSmacallan /* fetch dst pixel */ 5106bdc2ffdSmacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 5116bdc2ffdSmacallan /* write alpha into SCAM */ 5126bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5136bdc2ffdSmacallan SX_ORS(9, 0, R_SCAM, 0)); 5146bdc2ffdSmacallan /* src * alpha + R0 */ 5156bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 516239808baSmacallan SX_SAXP16X16SR8(12, 0, 16, 3)); 5176bdc2ffdSmacallan /* write inverted alpha into SCAM */ 5186bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 5196bdc2ffdSmacallan SX_XORV(9, 8, R_SCAM, 0)); 5206bdc2ffdSmacallan /* dst * (1 - alpha) + R[13:15] */ 5216bdc2ffdSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 522239808baSmacallan SX_SAXP16X16SR8(20, 16, 24, 3)); 5236bdc2ffdSmacallan write_sx_io(p, dstx, 5246bdc2ffdSmacallan SX_STUQ0C(24, 0, dstx & 7)); 5256bdc2ffdSmacallan srcx += 4; 5266bdc2ffdSmacallan mskx += 1; 5276bdc2ffdSmacallan dstx += 4; 5286bdc2ffdSmacallan } 5296bdc2ffdSmacallan src += srcpitch; 5306bdc2ffdSmacallan msk += mskpitch; 5316bdc2ffdSmacallan dst += dstpitch; 5326bdc2ffdSmacallan } 5336bdc2ffdSmacallan} 534fa158432Smacallan 535fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 536fa158432Smacallan uint32_t src, uint32_t srcpitch, 537fa158432Smacallan uint32_t msk, uint32_t mskpitch, 538fa158432Smacallan uint32_t dst, uint32_t dstpitch, 539fa158432Smacallan int width, int height) 540fa158432Smacallan{ 541fa158432Smacallan uint32_t srcx, dstx, mskx, m; 542fa158432Smacallan int line, x, i; 543fa158432Smacallan 544fa158432Smacallan ENTER; 545fa158432Smacallan 546fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 547fa158432Smacallan for (line = 0; line < height; line++) { 548fa158432Smacallan srcx = src; 549fa158432Smacallan mskx = msk; 550fa158432Smacallan dstx = dst; 551fa158432Smacallan 552fa158432Smacallan for (x = 0; x < width; x++) { 553fa158432Smacallan /* fetch source pixel */ 554fa158432Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 555fa158432Smacallan /* fetch mask */ 556239808baSmacallan write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7)); 557fa158432Smacallan /* fetch dst pixel */ 558fa158432Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 55981a370e6Smacallan /* set src alpha to 0xff */ 560fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 56181a370e6Smacallan SX_ORS(8, 0, 12, 0)); 562239808baSmacallan /* mask alpha to SCAM */ 563239808baSmacallan write_sx_reg(p, SX_INSTRUCTIONS, 564239808baSmacallan SX_ORS(16, 0, R_SCAM, 0)); 565239808baSmacallan /* src * alpha */ 566fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 567239808baSmacallan SX_SAXP16X16SR8(12, 0, 24, 3)); 568fa158432Smacallan /* write inverted alpha into SCAM */ 569fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 570239808baSmacallan SX_XORS(16, 8, R_SCAM, 0)); 571239808baSmacallan /* dst * (1 - alpha) + R[24:31] */ 572fa158432Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 573239808baSmacallan SX_SAXP16X16SR8(20, 24, 28, 3)); 574fa158432Smacallan write_sx_io(p, dstx, 57581a370e6Smacallan SX_STUQ0C(28, 0, dstx & 7)); 576fa158432Smacallan srcx += 4; 577fa158432Smacallan mskx += 4; 578fa158432Smacallan dstx += 4; 579fa158432Smacallan } 580fa158432Smacallan src += srcpitch; 581fa158432Smacallan msk += mskpitch; 582fa158432Smacallan dst += dstpitch; 583fa158432Smacallan } 584fa158432Smacallan} 585