cg14_render.c revision 665b72dd
1665b72ddSmacallan/* $NetBSD: cg14_render.c,v 1.19 2023/01/11 09:23:57 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 36a3a2ba44Smacallan#include <sys/types.h> 37a3a2ba44Smacallan 38a3a2ba44Smacallan/* all driver need this */ 39a3a2ba44Smacallan#include "xf86.h" 40a3a2ba44Smacallan#include "xf86_OSproc.h" 41a3a2ba44Smacallan#include "compiler.h" 42a3a2ba44Smacallan 43a3a2ba44Smacallan#include "cg14.h" 44a3a2ba44Smacallan 45f221549cSmacallan/*#define SX_SINGLE*/ 46a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 472066ab30Smacallan/*#define SX_RENDER_VERBOSE*/ 48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 492066ab30Smacallan/*#define SX_RENDER_TRACE*/ 50a3a2ba44Smacallan 512066ab30Smacallan#ifdef SX_RENDER_TRACE 52a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 532066ab30Smacallan#define DONE xf86Msg(X_ERROR, "<%s\n", __func__); 54a3a2ba44Smacallan#else 55a3a2ba44Smacallan#define ENTER 562066ab30Smacallan#define DONE 572066ab30Smacallan#endif 582066ab30Smacallan 592066ab30Smacallan#ifdef SX_RENDER_DEBUG 602066ab30Smacallan#define DPRINTF xf86Msg 612066ab30Smacallan#else 62a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 63a3a2ba44Smacallan#endif 64a3a2ba44Smacallan 65665b72ddSmacallan#ifdef SX_RENDER_VERBOSE 66a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 67ad6af7a7Smacallan#endif 6878cb1511Smacallan 6978cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 7078cb1511Smacallan uint32_t src, uint32_t srcpitch, 7178cb1511Smacallan uint32_t dst, uint32_t dstpitch, 7278cb1511Smacallan int width, int height) 7378cb1511Smacallan{ 7478cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 7578cb1511Smacallan int line, x, i; 7678cb1511Smacallan 7778cb1511Smacallan ENTER; 78f7cb851fSmacallan 7978cb1511Smacallan for (line = 0; line < height; line++) { 8078cb1511Smacallan mskx = msk; 8178cb1511Smacallan dstx = dst; 82f221549cSmacallan#ifndef SX_SINGLE 83f221549cSmacallan int rest; 84f221549cSmacallan for (x = 0; x < width; x += 4) { 85f221549cSmacallan rest = width - x; 86f221549cSmacallan /* fetch 4 mask values */ 8772fd264fSmacallan sxm(SX_LDUQ0, mskx, 12, 3); 88f221549cSmacallan /* fetch destination pixels */ 8972fd264fSmacallan sxm(SX_LDUQ0, dstx, 60, 3); 90f221549cSmacallan /* duplicate them for all channels */ 91230e26c7Smacallan sxi(SX_ORS, 0, 12, 13, 2); 92230e26c7Smacallan sxi(SX_ORS, 0, 16, 17, 2); 93230e26c7Smacallan sxi(SX_ORS, 0, 20, 21, 2); 94230e26c7Smacallan sxi(SX_ORS, 0, 24, 25, 2); 95f221549cSmacallan /* generate inverted alpha */ 96230e26c7Smacallan sxi(SX_XORS, 12, 8, 28, 15); 97f221549cSmacallan /* multiply source */ 98230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 99230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 100230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 101230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 102f221549cSmacallan /* multiply dest */ 103230e26c7Smacallan sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 104f221549cSmacallan /* add up */ 105230e26c7Smacallan sxi(SX_ADDV, 44, 76, 92, 15); 106f221549cSmacallan /* write back */ 107f221549cSmacallan if (rest < 4) { 10872fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, rest - 1); 109f221549cSmacallan } else { 11072fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, 3); 111f221549cSmacallan } 112f221549cSmacallan dstx += 16; 113f221549cSmacallan mskx += 16; 114f221549cSmacallan } 115f221549cSmacallan#else /* SX_SINGLE */ 11678cb1511Smacallan for (x = 0; x < width; x++) { 11778cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 11878cb1511Smacallan m = m >> 24; 11978cb1511Smacallan if (m == 0) { 12078cb1511Smacallan /* nothing to do - all transparent */ 12178cb1511Smacallan } else if (m == 0xff) { 12278cb1511Smacallan /* all opaque */ 12372fd264fSmacallan sxm(SX_STUQ0, dstx, 8, 0); 12478cb1511Smacallan } else { 12578cb1511Smacallan /* fetch alpha value, stick it into scam */ 12678cb1511Smacallan /* mask is in R[12:15] */ 12778cb1511Smacallan /*write_sx_io(p, mskx, 12878cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 12978cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 13078cb1511Smacallan /* fetch dst pixel */ 13172fd264fSmacallan sxm(SX_LDUQ0, dstx, 20, 0); 132230e26c7Smacallan sxi(SX_ORV, 12, 0, R_SCAM, 0); 13378cb1511Smacallan /* 13478cb1511Smacallan * src * alpha + R0 13578cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 13678cb1511Smacallan */ 137230e26c7Smacallan sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 13878cb1511Smacallan 13978cb1511Smacallan /* invert SCAM */ 140230e26c7Smacallan sxi(SX_XORV, 12, 8, R_SCAM, 0); 141ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 142230e26c7Smacallan sxi(SX_XORV, 12, 8, 13, 0); 14378cb1511Smacallan#endif 14478cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 145230e26c7Smacallan sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 14672fd264fSmacallan sxm(SX_STUQ0C, dstx, 24, 0); 14778cb1511Smacallan } 14878cb1511Smacallan dstx += 4; 14978cb1511Smacallan mskx += 4; 15078cb1511Smacallan } 151f221549cSmacallan#endif /* SX_SINGLE */ 152f221549cSmacallan dst += dstpitch; 153f221549cSmacallan msk += srcpitch; 154f221549cSmacallan } 155f221549cSmacallan} 156f221549cSmacallan 157f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 158f221549cSmacallan uint32_t src, uint32_t srcpitch, 159f221549cSmacallan uint32_t dst, uint32_t dstpitch, 160f221549cSmacallan int width, int height) 161f221549cSmacallan{ 162f221549cSmacallan uint32_t msk = src, mskx, dstx, m; 163f221549cSmacallan int line, x, i; 164665b72ddSmacallan#ifdef SX_RENDER_VERBOSE 165f221549cSmacallan char buffer[256]; 166f221549cSmacallan#endif 167f221549cSmacallan ENTER; 168f221549cSmacallan 169f221549cSmacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 170f221549cSmacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 171f221549cSmacallan *(uint32_t *)(p->fb + p->srcoff)); 172f221549cSmacallan for (line = 0; line < height; line++) { 173f221549cSmacallan mskx = msk; 174f221549cSmacallan dstx = dst; 175f221549cSmacallan#ifndef SX_SINGLE 176f221549cSmacallan int rest; 17778cb1511Smacallan for (x = 0; x < width; x += 4) { 178f221549cSmacallan rest = width - x; 17978cb1511Smacallan /* fetch 4 mask values */ 18072fd264fSmacallan sxm(SX_LDB, mskx, 12, 3); 18178cb1511Smacallan /* fetch destination pixels */ 18272fd264fSmacallan sxm(SX_LDUQ0, dstx, 60, 3); 18378cb1511Smacallan /* duplicate them for all channels */ 184230e26c7Smacallan sxi(SX_ORS, 0, 13, 16, 3); 185230e26c7Smacallan sxi(SX_ORS, 0, 14, 20, 3); 186230e26c7Smacallan sxi(SX_ORS, 0, 15, 24, 3); 187230e26c7Smacallan sxi(SX_ORS, 0, 12, 13, 2); 18878cb1511Smacallan /* generate inverted alpha */ 189230e26c7Smacallan sxi(SX_XORS, 12, 8, 28, 15); 19078cb1511Smacallan /* multiply source */ 191230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 192230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 193230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 194230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 19578cb1511Smacallan /* multiply dest */ 196230e26c7Smacallan sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 19778cb1511Smacallan /* add up */ 198230e26c7Smacallan sxi(SX_ADDV, 44, 76, 92, 15); 19978cb1511Smacallan /* write back */ 200f221549cSmacallan if (rest < 4) { 20172fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, rest - 1); 202f221549cSmacallan } else { 20372fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, 3); 204f221549cSmacallan } 20578cb1511Smacallan dstx += 16; 206f221549cSmacallan mskx += 4; 20778cb1511Smacallan } 208f221549cSmacallan#else /* SX_SINGLE */ 209a3a2ba44Smacallan for (x = 0; x < width; x++) { 210a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 2112066ab30Smacallan#ifdef SX_RENDER_VERBOSE 212a3a2ba44Smacallan buffer[x] = c[m >> 5]; 213a3a2ba44Smacallan#endif 214a3a2ba44Smacallan if (m == 0) { 215a3a2ba44Smacallan /* nothing to do - all transparent */ 216a3a2ba44Smacallan } else if (m == 0xff) { 217a3a2ba44Smacallan /* all opaque */ 21872fd264fSmacallan sxm(SX_STUQ0, dstx, 8, 0); 219a3a2ba44Smacallan } else { 220a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 221a3a2ba44Smacallan /* mask is in R[12:15] */ 222a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 223a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 224a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 225a3a2ba44Smacallan /* fetch dst pixel */ 22672fd264fSmacallan sxm(SX_LDUQ0, dstx, 20, 0); 227230e26c7Smacallan sxi(SX_ORV, 12, 0, R_SCAM, 0); 228a3a2ba44Smacallan /* 229a3a2ba44Smacallan * src * alpha + R0 230a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 231a3a2ba44Smacallan */ 232230e26c7Smacallan sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 233a3a2ba44Smacallan 234a3a2ba44Smacallan /* invert SCAM */ 235230e26c7Smacallan sxi(SX_XORV, 12, 8, R_SCAM, 0); 236ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 237230e26c7Smacallan sxi(SX_XORV, 12, 8, 13, 0); 238a3a2ba44Smacallan#endif 239a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 240230e26c7Smacallan sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 24172fd264fSmacallan sxm(SX_STUQ0C, dstx, 24, 0); 242a3a2ba44Smacallan } 243a3a2ba44Smacallan dstx += 4; 244a3a2ba44Smacallan mskx += 1; 245a3a2ba44Smacallan } 246f221549cSmacallan#endif /* SX_SINGLE */ 2472066ab30Smacallan#ifdef SX_RENDER_VERBOSE 248a3a2ba44Smacallan buffer[x] = 0; 249a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 250a3a2ba44Smacallan#endif 251a3a2ba44Smacallan dst += dstpitch; 252a3a2ba44Smacallan msk += srcpitch; 253a3a2ba44Smacallan } 2542066ab30Smacallan DONE; 255a3a2ba44Smacallan} 256a3a2ba44Smacallan 257a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 258a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 259a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 260a3a2ba44Smacallan int width, int height) 261a3a2ba44Smacallan{ 262a3a2ba44Smacallan int line; 263a3a2ba44Smacallan uint32_t srcx, dstx; 264a3a2ba44Smacallan int full, part, x; 265a3a2ba44Smacallan 266a3a2ba44Smacallan ENTER; 267a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 268a3a2ba44Smacallan part = width & 7; /* leftovers */ 269a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 270a3a2ba44Smacallan for (line = 0; line < height; line++) { 271a3a2ba44Smacallan srcx = src; 272a3a2ba44Smacallan dstx = dst; 273a3a2ba44Smacallan for (x = 0; x < full; x++) { 27472fd264fSmacallan sxm(SX_LDUQ0, srcx, 8, 31); 27572fd264fSmacallan sxm(SX_LDUQ0, dstx, 40, 31); 276230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 277230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 27872fd264fSmacallan sxm(SX_STUQ0, dstx, 72, 31); 279a3a2ba44Smacallan srcx += 128; 280a3a2ba44Smacallan dstx += 128; 281a3a2ba44Smacallan } 282a3a2ba44Smacallan 283a3a2ba44Smacallan /* do leftovers */ 28472fd264fSmacallan sxm(SX_LDUQ0, srcx, 8, part - 1); 28572fd264fSmacallan sxm(SX_LDUQ0, dstx, 40, part - 1); 286a3a2ba44Smacallan if (part & 16) { 287230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 288230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 289a3a2ba44Smacallan } else { 290230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 291a3a2ba44Smacallan } 29272fd264fSmacallan sxm(SX_STUQ0, dstx, 72, part - 1); 293a3a2ba44Smacallan 294a3a2ba44Smacallan /* next line */ 295a3a2ba44Smacallan src += srcpitch; 296a3a2ba44Smacallan dst += dstpitch; 297a3a2ba44Smacallan } 298a3a2ba44Smacallan} 299a3a2ba44Smacallan 300a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 301a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 302a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 303a3a2ba44Smacallan int width, int height) 304a3a2ba44Smacallan{ 305a3a2ba44Smacallan int line; 306a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 307a3a2ba44Smacallan int pre, full, part, x; 308a3a2ba44Smacallan uint8_t *d; 3092066ab30Smacallan#ifdef SX_RENDER_VERBOSE 310a3a2ba44Smacallan char buffer[256]; 3112066ab30Smacallan#endif 312a3a2ba44Smacallan ENTER; 313a3a2ba44Smacallan 314a3a2ba44Smacallan srcoff = src & 7; 315a3a2ba44Smacallan src &= ~7; 316a3a2ba44Smacallan dstoff = dst & 7; 317a3a2ba44Smacallan dst &= ~7; 318a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 319a3a2ba44Smacallan part = width & 31; /* leftovers */ 320a3a2ba44Smacallan 321ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 322a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 323a3a2ba44Smacallan width, height, full, part); 324a3a2ba44Smacallan#endif 325a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 326a3a2ba44Smacallan for (line = 0; line < height; line++) { 327a3a2ba44Smacallan srcx = src; 328a3a2ba44Smacallan dstx = dst; 329a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 330a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 331a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 332a3a2ba44Smacallan for (x = 0; x < width; x++) { 333a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 334a3a2ba44Smacallan } 335a3a2ba44Smacallan#else 336a3a2ba44Smacallan for (x = 0; x < full; x++) { 337a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 338a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 339230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 340230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 341a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 342a3a2ba44Smacallan srcx += 32; 343a3a2ba44Smacallan dstx += 32; 344a3a2ba44Smacallan } 345a3a2ba44Smacallan 346a3a2ba44Smacallan if (part > 0) { 347a3a2ba44Smacallan /* do leftovers */ 348a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 349a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 350a3a2ba44Smacallan if (part > 16) { 351230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 352230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 353a3a2ba44Smacallan } else { 354230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 355a3a2ba44Smacallan } 356a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 357a3a2ba44Smacallan } 358a3a2ba44Smacallan#endif 3592066ab30Smacallan#ifdef SX_RENDER_VERBOSE 360d71cb32dSmacallan d = (uint8_t *)(p->fb + src + srcoff); 361d71cb32dSmacallan for (x = 0; x < width; x++) { 362d71cb32dSmacallan buffer[x] = c[d[x]>>5]; 363d71cb32dSmacallan } 364d71cb32dSmacallan buffer[x] = 0; 365d71cb32dSmacallan xf86Msg(X_ERROR, "%s\n", buffer); 366d71cb32dSmacallan#endif 367d71cb32dSmacallan /* next line */ 368d71cb32dSmacallan src += srcpitch; 369d71cb32dSmacallan dst += dstpitch; 370d71cb32dSmacallan } 371d71cb32dSmacallan} 372d71cb32dSmacallan 373d71cb32dSmacallanvoid CG14Comp_Add8_32(Cg14Ptr p, 374d71cb32dSmacallan uint32_t src, uint32_t srcpitch, 375d71cb32dSmacallan uint32_t dst, uint32_t dstpitch, 376d71cb32dSmacallan int width, int height) 377d71cb32dSmacallan{ 378d71cb32dSmacallan int line; 379d71cb32dSmacallan uint32_t srcx, dstx, srcoff, dstoff; 380d71cb32dSmacallan int pre, full, part, x; 381d71cb32dSmacallan uint8_t *d; 3822066ab30Smacallan#ifdef SX_RENDER_VERBOSE 383d71cb32dSmacallan char buffer[256]; 3842066ab30Smacallan#endif 385d71cb32dSmacallan ENTER; 386d71cb32dSmacallan 387d71cb32dSmacallan srcoff = src & 7; 388d71cb32dSmacallan src &= ~7; 389d71cb32dSmacallan dstoff = dst & 7; 390d71cb32dSmacallan dst &= ~7; 391d71cb32dSmacallan full = width >> 5; /* chunks of 32 */ 392d71cb32dSmacallan part = width & 31; /* leftovers */ 393d71cb32dSmacallan 3943a2c2bcaSmacallan#ifdef SX_RENDER_DEBUG 395d71cb32dSmacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 396d71cb32dSmacallan width, height, full, part); 397d71cb32dSmacallan#endif 398d71cb32dSmacallan /* we do this up to 32 pixels at a time */ 399d71cb32dSmacallan for (line = 0; line < height; line++) { 400d71cb32dSmacallan srcx = src; 401d71cb32dSmacallan dstx = dst; 402d71cb32dSmacallan for (x = 0; x < full; x++) { 403d71cb32dSmacallan /* load source bytes */ 404d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 405d71cb32dSmacallan /* load alpha from destination */ 406d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 407230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 408230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 409d71cb32dSmacallan /* write clamped values back into dest alpha */ 410d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 411d71cb32dSmacallan srcx += 32; 412d71cb32dSmacallan dstx += 128; 413d71cb32dSmacallan } 414d71cb32dSmacallan 415d71cb32dSmacallan if (part > 0) { 416d71cb32dSmacallan /* do leftovers */ 417d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 418d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 419d71cb32dSmacallan if (part > 16) { 420230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 421230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 422d71cb32dSmacallan } else { 423230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 424d71cb32dSmacallan } 425d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 426d71cb32dSmacallan } 4272066ab30Smacallan#ifdef SX_RENDER_VERBOSE 428a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 429a3a2ba44Smacallan for (x = 0; x < width; x++) { 430a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 431a3a2ba44Smacallan } 432a3a2ba44Smacallan buffer[x] = 0; 433a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 434a3a2ba44Smacallan#endif 435a3a2ba44Smacallan /* next line */ 436a3a2ba44Smacallan src += srcpitch; 437a3a2ba44Smacallan dst += dstpitch; 438a3a2ba44Smacallan } 439a3a2ba44Smacallan} 440a3a2ba44Smacallan 441a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 442a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 443a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 444e311bbeeSmacallan int width, int height, int flip) 445a3a2ba44Smacallan{ 44678d1a11bSmacallan uint32_t srcx, dstx, mskx, m; 44778d1a11bSmacallan int line, x, i, num; 448a3a2ba44Smacallan 449a3a2ba44Smacallan ENTER; 450a3a2ba44Smacallan 451a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 452a3a2ba44Smacallan for (line = 0; line < height; line++) { 453a3a2ba44Smacallan srcx = src; 454a3a2ba44Smacallan dstx = dst; 455a3a2ba44Smacallan 45678d1a11bSmacallan for (x = 0; x < width; x += 4) { 45778d1a11bSmacallan /* we do up to 4 pixels at a time */ 45878d1a11bSmacallan num = min(4, width - x); 45978d1a11bSmacallan if (num <= 0) { 46078d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 46178d1a11bSmacallan continue; 46278d1a11bSmacallan } 46378d1a11bSmacallan /* fetch source pixels */ 46472fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 465e311bbeeSmacallan if (flip) { 466230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 467230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 468230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 469230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 47078d1a11bSmacallan } 47178d1a11bSmacallan /* fetch dst pixels */ 47272fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 47378d1a11bSmacallan /* now process up to 4 pixels */ 47478d1a11bSmacallan for (i = 0; i < num; i++) { 47578d1a11bSmacallan int ii = i << 2; 47678d1a11bSmacallan /* write inverted alpha into SCAM */ 477230e26c7Smacallan sxi(SX_XORS, 12 + ii, 8, R_SCAM, 0); 47878d1a11bSmacallan /* dst * (1 - alpha) + src */ 479230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 12 + ii, 76 + ii, 3); 480e311bbeeSmacallan } 48172fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 48278d1a11bSmacallan srcx += 16; 48378d1a11bSmacallan dstx += 16; 484a3a2ba44Smacallan } 485a3a2ba44Smacallan src += srcpitch; 48678d1a11bSmacallan dst += dstpitch; 487a3a2ba44Smacallan } 488a3a2ba44Smacallan} 489a3a2ba44Smacallan 490a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 491a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 492a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 493a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 494e311bbeeSmacallan int width, int height, int flip) 495a3a2ba44Smacallan{ 496a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 49778d1a11bSmacallan int line, x, i, num; 498a3a2ba44Smacallan 499a3a2ba44Smacallan ENTER; 500a3a2ba44Smacallan 501a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 502a3a2ba44Smacallan for (line = 0; line < height; line++) { 503a3a2ba44Smacallan srcx = src; 504a3a2ba44Smacallan mskx = msk; 505a3a2ba44Smacallan dstx = dst; 506a3a2ba44Smacallan 50778d1a11bSmacallan for (x = 0; x < width; x += 4) { 50878d1a11bSmacallan /* we do up to 4 pixels at a time */ 50978d1a11bSmacallan num = min(4, width - x); 51078d1a11bSmacallan if (num <= 0) { 51178d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 51278d1a11bSmacallan continue; 51378d1a11bSmacallan } 51478d1a11bSmacallan /* fetch source pixels */ 51572fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 516e311bbeeSmacallan if (flip) { 517230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 518230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 519230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 520230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 521e311bbeeSmacallan } 522a3a2ba44Smacallan /* fetch mask */ 52372fd264fSmacallan sxm(SX_LDB, mskx, 28, num - 1); 52478d1a11bSmacallan /* fetch dst pixels */ 52572fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 52678d1a11bSmacallan /* now process up to 4 pixels */ 52778d1a11bSmacallan for (i = 0; i < num; i++) { 52878d1a11bSmacallan int ii = i << 2; 52978d1a11bSmacallan /* mask alpha to SCAM */ 530230e26c7Smacallan sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 53178d1a11bSmacallan /* src * alpha */ 532230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 53378d1a11bSmacallan /* write inverted alpha into SCAM */ 534230e26c7Smacallan sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 53578d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 536230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 53778d1a11bSmacallan } 53872fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 53978d1a11bSmacallan srcx += 16; 54078d1a11bSmacallan mskx += 4; 54178d1a11bSmacallan dstx += 16; 542a3a2ba44Smacallan } 543a3a2ba44Smacallan src += srcpitch; 544a3a2ba44Smacallan msk += mskpitch; 545a3a2ba44Smacallan dst += dstpitch; 546a3a2ba44Smacallan } 547a3a2ba44Smacallan} 5486bdc2ffdSmacallan 5496bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 5506bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 5516bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 5526bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 553e311bbeeSmacallan int width, int height, int flip) 5546bdc2ffdSmacallan{ 5556bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 55678d1a11bSmacallan int line, x, i, num; 5576bdc2ffdSmacallan 5586bdc2ffdSmacallan ENTER; 5596bdc2ffdSmacallan 5606bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 56178d1a11bSmacallan write_sx_reg(p, SX_QUEUED(9), 0xff); 562230e26c7Smacallan sxi(SX_ORS, 8, 0, 10, 1); 5636bdc2ffdSmacallan for (line = 0; line < height; line++) { 5646bdc2ffdSmacallan srcx = src; 5656bdc2ffdSmacallan mskx = msk; 5666bdc2ffdSmacallan dstx = dst; 5676bdc2ffdSmacallan 56878d1a11bSmacallan for (x = 0; x < width; x += 4) { 56978d1a11bSmacallan /* we do up to 4 pixels at a time */ 57078d1a11bSmacallan num = min(4, width - x); 57178d1a11bSmacallan if (num <= 0) { 57278d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 57378d1a11bSmacallan continue; 57478d1a11bSmacallan } 57578d1a11bSmacallan /* fetch source pixels */ 57672fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 577e311bbeeSmacallan if (flip) { 578230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 579230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 580230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 581230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 582e311bbeeSmacallan } 5836bdc2ffdSmacallan /* fetch mask */ 58472fd264fSmacallan sxm(SX_LDB, mskx, 28, num - 1); 58578d1a11bSmacallan /* fetch dst pixels */ 58672fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 58778d1a11bSmacallan /* set src alpha to 0xff */ 588230e26c7Smacallan sxi(SX_SCATTER, 8, 4, 12, num - 1); 58978d1a11bSmacallan /* now process up to 4 pixels */ 59078d1a11bSmacallan for (i = 0; i < num; i++) { 59178d1a11bSmacallan int ii = i << 2; 59278d1a11bSmacallan /* mask alpha to SCAM */ 593230e26c7Smacallan sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 59478d1a11bSmacallan /* src * alpha */ 595230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 59678d1a11bSmacallan /* write inverted alpha into SCAM */ 597230e26c7Smacallan sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 59878d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 599230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 60078d1a11bSmacallan } 60172fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 60278d1a11bSmacallan srcx += 16; 60378d1a11bSmacallan mskx += 4; 60478d1a11bSmacallan dstx += 16; 6056bdc2ffdSmacallan } 6066bdc2ffdSmacallan src += srcpitch; 6076bdc2ffdSmacallan msk += mskpitch; 6086bdc2ffdSmacallan dst += dstpitch; 6096bdc2ffdSmacallan } 6106bdc2ffdSmacallan} 611fa158432Smacallan 612fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 613fa158432Smacallan uint32_t src, uint32_t srcpitch, 614fa158432Smacallan uint32_t msk, uint32_t mskpitch, 615fa158432Smacallan uint32_t dst, uint32_t dstpitch, 616e311bbeeSmacallan int width, int height, int flip) 617fa158432Smacallan{ 618fa158432Smacallan uint32_t srcx, dstx, mskx, m; 61978d1a11bSmacallan int line, x, i, num; 620fa158432Smacallan 621fa158432Smacallan ENTER; 622fa158432Smacallan 623fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 62478d1a11bSmacallan write_sx_reg(p, SX_QUEUED(9), 0xff); 625230e26c7Smacallan sxi(SX_ORS, 8, 0, 10, 1); 626fa158432Smacallan for (line = 0; line < height; line++) { 627fa158432Smacallan srcx = src; 628fa158432Smacallan mskx = msk; 629fa158432Smacallan dstx = dst; 630fa158432Smacallan 63178d1a11bSmacallan for (x = 0; x < width; x += 4) { 63278d1a11bSmacallan /* we do up to 4 pixels at a time */ 63378d1a11bSmacallan num = min(4, width - x); 63478d1a11bSmacallan if (num <= 0) { 63578d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 63678d1a11bSmacallan continue; 63778d1a11bSmacallan } 63878d1a11bSmacallan /* fetch source pixels */ 63972fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 640e311bbeeSmacallan if (flip) { 641230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 642230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 643230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 644230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 645e311bbeeSmacallan } 646fa158432Smacallan /* fetch mask */ 64772fd264fSmacallan sxm(SX_LDUQ0, mskx, 28, num - 1); 64878d1a11bSmacallan /* fetch dst pixels */ 64972fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 65078d1a11bSmacallan /* set src alpha to 0xff */ 651230e26c7Smacallan sxi(SX_SCATTER, 8, 4, 12, num - 1); 65278d1a11bSmacallan /* now process up to 4 pixels */ 65378d1a11bSmacallan for (i = 0; i < num; i++) { 65478d1a11bSmacallan int ii = i << 2; 65578d1a11bSmacallan /* mask alpha to SCAM */ 656230e26c7Smacallan sxi(SX_ORS, 28 + ii, 0, R_SCAM, 0); 65778d1a11bSmacallan /* src * alpha */ 658230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 65978d1a11bSmacallan /* write inverted alpha into SCAM */ 660230e26c7Smacallan sxi(SX_XORS, 28 + ii, 8, R_SCAM, 0); 66178d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 662230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 66378d1a11bSmacallan } 66472fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 66578d1a11bSmacallan srcx += 16; 66678d1a11bSmacallan mskx += 16; 66778d1a11bSmacallan dstx += 16; 668fa158432Smacallan } 669fa158432Smacallan src += srcpitch; 670fa158432Smacallan msk += mskpitch; 671fa158432Smacallan dst += dstpitch; 672fa158432Smacallan } 673fa158432Smacallan} 674