cg14_render.c revision ad6af7a7
1ad6af7a7Smacallan/* $NetBSD: cg14_render.c,v 1.16 2022/05/11 17:13:04 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 36a3a2ba44Smacallan#include <sys/types.h> 37a3a2ba44Smacallan 38a3a2ba44Smacallan/* all driver need this */ 39a3a2ba44Smacallan#include "xf86.h" 40a3a2ba44Smacallan#include "xf86_OSproc.h" 41a3a2ba44Smacallan#include "compiler.h" 42a3a2ba44Smacallan 43a3a2ba44Smacallan#include "cg14.h" 44a3a2ba44Smacallan 45f221549cSmacallan/*#define SX_SINGLE*/ 46a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 47a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 48a3a2ba44Smacallan 49f221549cSmacallan#ifdef SX_RENDER_DEBUG 50a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 51a3a2ba44Smacallan#define DPRINTF xf86Msg 52a3a2ba44Smacallan#else 53a3a2ba44Smacallan#define ENTER 54a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 55a3a2ba44Smacallan#endif 56a3a2ba44Smacallan 57ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 58a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 59ad6af7a7Smacallan#endif 6078cb1511Smacallan 6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p, 6278cb1511Smacallan uint32_t src, uint32_t srcpitch, 6378cb1511Smacallan uint32_t dst, uint32_t dstpitch, 6478cb1511Smacallan int width, int height) 6578cb1511Smacallan{ 6678cb1511Smacallan uint32_t msk = src, mskx, dstx, m; 6778cb1511Smacallan int line, x, i; 6878cb1511Smacallan 6978cb1511Smacallan ENTER; 70f7cb851fSmacallan 7178cb1511Smacallan for (line = 0; line < height; line++) { 7278cb1511Smacallan mskx = msk; 7378cb1511Smacallan dstx = dst; 74f221549cSmacallan#ifndef SX_SINGLE 75f221549cSmacallan int rest; 76f221549cSmacallan for (x = 0; x < width; x += 4) { 77f221549cSmacallan rest = width - x; 78f221549cSmacallan /* fetch 4 mask values */ 7972fd264fSmacallan sxm(SX_LDUQ0, mskx, 12, 3); 80f221549cSmacallan /* fetch destination pixels */ 8172fd264fSmacallan sxm(SX_LDUQ0, dstx, 60, 3); 82f221549cSmacallan /* duplicate them for all channels */ 83230e26c7Smacallan sxi(SX_ORS, 0, 12, 13, 2); 84230e26c7Smacallan sxi(SX_ORS, 0, 16, 17, 2); 85230e26c7Smacallan sxi(SX_ORS, 0, 20, 21, 2); 86230e26c7Smacallan sxi(SX_ORS, 0, 24, 25, 2); 87f221549cSmacallan /* generate inverted alpha */ 88230e26c7Smacallan sxi(SX_XORS, 12, 8, 28, 15); 89f221549cSmacallan /* multiply source */ 90230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 91230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 92230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 93230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 94f221549cSmacallan /* multiply dest */ 95230e26c7Smacallan sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 96f221549cSmacallan /* add up */ 97230e26c7Smacallan sxi(SX_ADDV, 44, 76, 92, 15); 98f221549cSmacallan /* write back */ 99f221549cSmacallan if (rest < 4) { 10072fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, rest - 1); 101f221549cSmacallan } else { 10272fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, 3); 103f221549cSmacallan } 104f221549cSmacallan dstx += 16; 105f221549cSmacallan mskx += 16; 106f221549cSmacallan } 107f221549cSmacallan#else /* SX_SINGLE */ 10878cb1511Smacallan for (x = 0; x < width; x++) { 10978cb1511Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 11078cb1511Smacallan m = m >> 24; 11178cb1511Smacallan if (m == 0) { 11278cb1511Smacallan /* nothing to do - all transparent */ 11378cb1511Smacallan } else if (m == 0xff) { 11478cb1511Smacallan /* all opaque */ 11572fd264fSmacallan sxm(SX_STUQ0, dstx, 8, 0); 11678cb1511Smacallan } else { 11778cb1511Smacallan /* fetch alpha value, stick it into scam */ 11878cb1511Smacallan /* mask is in R[12:15] */ 11978cb1511Smacallan /*write_sx_io(p, mskx, 12078cb1511Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 12178cb1511Smacallan write_sx_reg(p, SX_QUEUED(12), m); 12278cb1511Smacallan /* fetch dst pixel */ 12372fd264fSmacallan sxm(SX_LDUQ0, dstx, 20, 0); 124230e26c7Smacallan sxi(SX_ORV, 12, 0, R_SCAM, 0); 12578cb1511Smacallan /* 12678cb1511Smacallan * src * alpha + R0 12778cb1511Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 12878cb1511Smacallan */ 129230e26c7Smacallan sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 13078cb1511Smacallan 13178cb1511Smacallan /* invert SCAM */ 132230e26c7Smacallan sxi(SX_XORV, 12, 8, R_SCAM, 0); 133ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 134230e26c7Smacallan sxi(SX_XORV, 12, 8, 13, 0); 13578cb1511Smacallan#endif 13678cb1511Smacallan /* dst * (1 - alpha) + R[13:15] */ 137230e26c7Smacallan sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 13872fd264fSmacallan sxm(SX_STUQ0C, dstx, 24, 0); 13978cb1511Smacallan } 14078cb1511Smacallan dstx += 4; 14178cb1511Smacallan mskx += 4; 14278cb1511Smacallan } 143f221549cSmacallan#endif /* SX_SINGLE */ 144f221549cSmacallan dst += dstpitch; 145f221549cSmacallan msk += srcpitch; 146f221549cSmacallan } 147f221549cSmacallan} 148f221549cSmacallan 149f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 150f221549cSmacallan uint32_t src, uint32_t srcpitch, 151f221549cSmacallan uint32_t dst, uint32_t dstpitch, 152f221549cSmacallan int width, int height) 153f221549cSmacallan{ 154f221549cSmacallan uint32_t msk = src, mskx, dstx, m; 155f221549cSmacallan int line, x, i; 156ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 157f221549cSmacallan char buffer[256]; 158f221549cSmacallan#endif 159f221549cSmacallan ENTER; 160f221549cSmacallan 161f221549cSmacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 162f221549cSmacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 163f221549cSmacallan *(uint32_t *)(p->fb + p->srcoff)); 164f221549cSmacallan for (line = 0; line < height; line++) { 165f221549cSmacallan mskx = msk; 166f221549cSmacallan dstx = dst; 167f221549cSmacallan#ifndef SX_SINGLE 168f221549cSmacallan int rest; 16978cb1511Smacallan for (x = 0; x < width; x += 4) { 170f221549cSmacallan rest = width - x; 17178cb1511Smacallan /* fetch 4 mask values */ 17272fd264fSmacallan sxm(SX_LDB, mskx, 12, 3); 17378cb1511Smacallan /* fetch destination pixels */ 17472fd264fSmacallan sxm(SX_LDUQ0, dstx, 60, 3); 17578cb1511Smacallan /* duplicate them for all channels */ 176230e26c7Smacallan sxi(SX_ORS, 0, 13, 16, 3); 177230e26c7Smacallan sxi(SX_ORS, 0, 14, 20, 3); 178230e26c7Smacallan sxi(SX_ORS, 0, 15, 24, 3); 179230e26c7Smacallan sxi(SX_ORS, 0, 12, 13, 2); 18078cb1511Smacallan /* generate inverted alpha */ 181230e26c7Smacallan sxi(SX_XORS, 12, 8, 28, 15); 18278cb1511Smacallan /* multiply source */ 183230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 12, 44, 3); 184230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 16, 48, 3); 185230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 20, 52, 3); 186230e26c7Smacallan sxi(SX_MUL16X16SR8, 8, 24, 56, 3); 18778cb1511Smacallan /* multiply dest */ 188230e26c7Smacallan sxi(SX_MUL16X16SR8, 28, 60, 76, 15); 18978cb1511Smacallan /* add up */ 190230e26c7Smacallan sxi(SX_ADDV, 44, 76, 92, 15); 19178cb1511Smacallan /* write back */ 192f221549cSmacallan if (rest < 4) { 19372fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, rest - 1); 194f221549cSmacallan } else { 19572fd264fSmacallan sxm(SX_STUQ0C, dstx, 92, 3); 196f221549cSmacallan } 19778cb1511Smacallan dstx += 16; 198f221549cSmacallan mskx += 4; 19978cb1511Smacallan } 200f221549cSmacallan#else /* SX_SINGLE */ 201a3a2ba44Smacallan for (x = 0; x < width; x++) { 202a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 203ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 204a3a2ba44Smacallan buffer[x] = c[m >> 5]; 205a3a2ba44Smacallan#endif 206a3a2ba44Smacallan if (m == 0) { 207a3a2ba44Smacallan /* nothing to do - all transparent */ 208a3a2ba44Smacallan } else if (m == 0xff) { 209a3a2ba44Smacallan /* all opaque */ 21072fd264fSmacallan sxm(SX_STUQ0, dstx, 8, 0); 211a3a2ba44Smacallan } else { 212a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 213a3a2ba44Smacallan /* mask is in R[12:15] */ 214a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 215a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 216a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 217a3a2ba44Smacallan /* fetch dst pixel */ 21872fd264fSmacallan sxm(SX_LDUQ0, dstx, 20, 0); 219230e26c7Smacallan sxi(SX_ORV, 12, 0, R_SCAM, 0); 220a3a2ba44Smacallan /* 221a3a2ba44Smacallan * src * alpha + R0 222a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 223a3a2ba44Smacallan */ 224230e26c7Smacallan sxi(SX_SAXP16X16SR8, 9, 0, 17, 2); 225a3a2ba44Smacallan 226a3a2ba44Smacallan /* invert SCAM */ 227230e26c7Smacallan sxi(SX_XORV, 12, 8, R_SCAM, 0); 228ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 229230e26c7Smacallan sxi(SX_XORV, 12, 8, 13, 0); 230a3a2ba44Smacallan#endif 231a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 232230e26c7Smacallan sxi(SX_SAXP16X16SR8, 21, 17, 25, 2); 23372fd264fSmacallan sxm(SX_STUQ0C, dstx, 24, 0); 234a3a2ba44Smacallan } 235a3a2ba44Smacallan dstx += 4; 236a3a2ba44Smacallan mskx += 1; 237a3a2ba44Smacallan } 238f221549cSmacallan#endif /* SX_SINGLE */ 239ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 240a3a2ba44Smacallan buffer[x] = 0; 241a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 242a3a2ba44Smacallan#endif 243a3a2ba44Smacallan dst += dstpitch; 244a3a2ba44Smacallan msk += srcpitch; 245a3a2ba44Smacallan } 246a3a2ba44Smacallan} 247a3a2ba44Smacallan 248a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 249a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 250a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 251a3a2ba44Smacallan int width, int height) 252a3a2ba44Smacallan{ 253a3a2ba44Smacallan int line; 254a3a2ba44Smacallan uint32_t srcx, dstx; 255a3a2ba44Smacallan int full, part, x; 256a3a2ba44Smacallan 257a3a2ba44Smacallan ENTER; 258a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 259a3a2ba44Smacallan part = width & 7; /* leftovers */ 260a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 261a3a2ba44Smacallan for (line = 0; line < height; line++) { 262a3a2ba44Smacallan srcx = src; 263a3a2ba44Smacallan dstx = dst; 264a3a2ba44Smacallan for (x = 0; x < full; x++) { 26572fd264fSmacallan sxm(SX_LDUQ0, srcx, 8, 31); 26672fd264fSmacallan sxm(SX_LDUQ0, dstx, 40, 31); 267230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 268230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 26972fd264fSmacallan sxm(SX_STUQ0, dstx, 72, 31); 270a3a2ba44Smacallan srcx += 128; 271a3a2ba44Smacallan dstx += 128; 272a3a2ba44Smacallan } 273a3a2ba44Smacallan 274a3a2ba44Smacallan /* do leftovers */ 27572fd264fSmacallan sxm(SX_LDUQ0, srcx, 8, part - 1); 27672fd264fSmacallan sxm(SX_LDUQ0, dstx, 40, part - 1); 277a3a2ba44Smacallan if (part & 16) { 278230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 279230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 280a3a2ba44Smacallan } else { 281230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 282a3a2ba44Smacallan } 28372fd264fSmacallan sxm(SX_STUQ0, dstx, 72, part - 1); 284a3a2ba44Smacallan 285a3a2ba44Smacallan /* next line */ 286a3a2ba44Smacallan src += srcpitch; 287a3a2ba44Smacallan dst += dstpitch; 288a3a2ba44Smacallan } 289a3a2ba44Smacallan} 290a3a2ba44Smacallan 291a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 292a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 293a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 294a3a2ba44Smacallan int width, int height) 295a3a2ba44Smacallan{ 296a3a2ba44Smacallan int line; 297a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 298a3a2ba44Smacallan int pre, full, part, x; 299a3a2ba44Smacallan uint8_t *d; 300a3a2ba44Smacallan char buffer[256]; 301a3a2ba44Smacallan ENTER; 302a3a2ba44Smacallan 303a3a2ba44Smacallan srcoff = src & 7; 304a3a2ba44Smacallan src &= ~7; 305a3a2ba44Smacallan dstoff = dst & 7; 306a3a2ba44Smacallan dst &= ~7; 307a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 308a3a2ba44Smacallan part = width & 31; /* leftovers */ 309a3a2ba44Smacallan 310ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 311a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 312a3a2ba44Smacallan width, height, full, part); 313a3a2ba44Smacallan#endif 314a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 315a3a2ba44Smacallan for (line = 0; line < height; line++) { 316a3a2ba44Smacallan srcx = src; 317a3a2ba44Smacallan dstx = dst; 318a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 319a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 320a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 321a3a2ba44Smacallan for (x = 0; x < width; x++) { 322a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 323a3a2ba44Smacallan } 324a3a2ba44Smacallan#else 325a3a2ba44Smacallan for (x = 0; x < full; x++) { 326a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 327a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 328230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 329230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 330a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 331a3a2ba44Smacallan srcx += 32; 332a3a2ba44Smacallan dstx += 32; 333a3a2ba44Smacallan } 334a3a2ba44Smacallan 335a3a2ba44Smacallan if (part > 0) { 336a3a2ba44Smacallan /* do leftovers */ 337a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 338a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 339a3a2ba44Smacallan if (part > 16) { 340230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 341230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 342a3a2ba44Smacallan } else { 343230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 344a3a2ba44Smacallan } 345a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 346a3a2ba44Smacallan } 347a3a2ba44Smacallan#endif 348ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 349d71cb32dSmacallan d = (uint8_t *)(p->fb + src + srcoff); 350d71cb32dSmacallan for (x = 0; x < width; x++) { 351d71cb32dSmacallan buffer[x] = c[d[x]>>5]; 352d71cb32dSmacallan } 353d71cb32dSmacallan buffer[x] = 0; 354d71cb32dSmacallan xf86Msg(X_ERROR, "%s\n", buffer); 355d71cb32dSmacallan#endif 356d71cb32dSmacallan /* next line */ 357d71cb32dSmacallan src += srcpitch; 358d71cb32dSmacallan dst += dstpitch; 359d71cb32dSmacallan } 360d71cb32dSmacallan} 361d71cb32dSmacallan 362d71cb32dSmacallanvoid CG14Comp_Add8_32(Cg14Ptr p, 363d71cb32dSmacallan uint32_t src, uint32_t srcpitch, 364d71cb32dSmacallan uint32_t dst, uint32_t dstpitch, 365d71cb32dSmacallan int width, int height) 366d71cb32dSmacallan{ 367d71cb32dSmacallan int line; 368d71cb32dSmacallan uint32_t srcx, dstx, srcoff, dstoff; 369d71cb32dSmacallan int pre, full, part, x; 370d71cb32dSmacallan uint8_t *d; 371d71cb32dSmacallan char buffer[256]; 372d71cb32dSmacallan ENTER; 373d71cb32dSmacallan 374d71cb32dSmacallan srcoff = src & 7; 375d71cb32dSmacallan src &= ~7; 376d71cb32dSmacallan dstoff = dst & 7; 377d71cb32dSmacallan dst &= ~7; 378d71cb32dSmacallan full = width >> 5; /* chunks of 32 */ 379d71cb32dSmacallan part = width & 31; /* leftovers */ 380d71cb32dSmacallan 381ad6af7a7Smacallan#ifdef SX__RENDER_DEBUG 382d71cb32dSmacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 383d71cb32dSmacallan width, height, full, part); 384d71cb32dSmacallan#endif 385d71cb32dSmacallan /* we do this up to 32 pixels at a time */ 386d71cb32dSmacallan for (line = 0; line < height; line++) { 387d71cb32dSmacallan srcx = src; 388d71cb32dSmacallan dstx = dst; 389d71cb32dSmacallan for (x = 0; x < full; x++) { 390d71cb32dSmacallan /* load source bytes */ 391d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 392d71cb32dSmacallan /* load alpha from destination */ 393d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); 394230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 395230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, 15); 396d71cb32dSmacallan /* write clamped values back into dest alpha */ 397d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); 398d71cb32dSmacallan srcx += 32; 399d71cb32dSmacallan dstx += 128; 400d71cb32dSmacallan } 401d71cb32dSmacallan 402d71cb32dSmacallan if (part > 0) { 403d71cb32dSmacallan /* do leftovers */ 404d71cb32dSmacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 405d71cb32dSmacallan write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); 406d71cb32dSmacallan if (part > 16) { 407230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, 15); 408230e26c7Smacallan sxi(SX_ADDV, 24, 56, 88, part - 17); 409d71cb32dSmacallan } else { 410230e26c7Smacallan sxi(SX_ADDV, 8, 40, 72, part - 1); 411d71cb32dSmacallan } 412d71cb32dSmacallan write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); 413d71cb32dSmacallan } 414ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG 415a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 416a3a2ba44Smacallan for (x = 0; x < width; x++) { 417a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 418a3a2ba44Smacallan } 419a3a2ba44Smacallan buffer[x] = 0; 420a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 421a3a2ba44Smacallan#endif 422a3a2ba44Smacallan /* next line */ 423a3a2ba44Smacallan src += srcpitch; 424a3a2ba44Smacallan dst += dstpitch; 425a3a2ba44Smacallan } 426a3a2ba44Smacallan} 427a3a2ba44Smacallan 428a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 429a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 430a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 431e311bbeeSmacallan int width, int height, int flip) 432a3a2ba44Smacallan{ 43378d1a11bSmacallan uint32_t srcx, dstx, mskx, m; 43478d1a11bSmacallan int line, x, i, num; 435a3a2ba44Smacallan 436a3a2ba44Smacallan ENTER; 437a3a2ba44Smacallan 438a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 439a3a2ba44Smacallan for (line = 0; line < height; line++) { 440a3a2ba44Smacallan srcx = src; 441a3a2ba44Smacallan dstx = dst; 442a3a2ba44Smacallan 44378d1a11bSmacallan for (x = 0; x < width; x += 4) { 44478d1a11bSmacallan /* we do up to 4 pixels at a time */ 44578d1a11bSmacallan num = min(4, width - x); 44678d1a11bSmacallan if (num <= 0) { 44778d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 44878d1a11bSmacallan continue; 44978d1a11bSmacallan } 45078d1a11bSmacallan /* fetch source pixels */ 45172fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 452e311bbeeSmacallan if (flip) { 453230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 454230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 455230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 456230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 45778d1a11bSmacallan } 45878d1a11bSmacallan /* fetch dst pixels */ 45972fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 46078d1a11bSmacallan /* now process up to 4 pixels */ 46178d1a11bSmacallan for (i = 0; i < num; i++) { 46278d1a11bSmacallan int ii = i << 2; 46378d1a11bSmacallan /* write inverted alpha into SCAM */ 464230e26c7Smacallan sxi(SX_XORS, 12 + ii, 8, R_SCAM, 0); 46578d1a11bSmacallan /* dst * (1 - alpha) + src */ 466230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 12 + ii, 76 + ii, 3); 467e311bbeeSmacallan } 46872fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 46978d1a11bSmacallan srcx += 16; 47078d1a11bSmacallan dstx += 16; 471a3a2ba44Smacallan } 472a3a2ba44Smacallan src += srcpitch; 47378d1a11bSmacallan dst += dstpitch; 474a3a2ba44Smacallan } 475a3a2ba44Smacallan} 476a3a2ba44Smacallan 477a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 478a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 479a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 480a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 481e311bbeeSmacallan int width, int height, int flip) 482a3a2ba44Smacallan{ 483a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 48478d1a11bSmacallan int line, x, i, num; 485a3a2ba44Smacallan 486a3a2ba44Smacallan ENTER; 487a3a2ba44Smacallan 488a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 489a3a2ba44Smacallan for (line = 0; line < height; line++) { 490a3a2ba44Smacallan srcx = src; 491a3a2ba44Smacallan mskx = msk; 492a3a2ba44Smacallan dstx = dst; 493a3a2ba44Smacallan 49478d1a11bSmacallan for (x = 0; x < width; x += 4) { 49578d1a11bSmacallan /* we do up to 4 pixels at a time */ 49678d1a11bSmacallan num = min(4, width - x); 49778d1a11bSmacallan if (num <= 0) { 49878d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 49978d1a11bSmacallan continue; 50078d1a11bSmacallan } 50178d1a11bSmacallan /* fetch source pixels */ 50272fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 503e311bbeeSmacallan if (flip) { 504230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 505230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 506230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 507230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 508e311bbeeSmacallan } 509a3a2ba44Smacallan /* fetch mask */ 51072fd264fSmacallan sxm(SX_LDB, mskx, 28, num - 1); 51178d1a11bSmacallan /* fetch dst pixels */ 51272fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 51378d1a11bSmacallan /* now process up to 4 pixels */ 51478d1a11bSmacallan for (i = 0; i < num; i++) { 51578d1a11bSmacallan int ii = i << 2; 51678d1a11bSmacallan /* mask alpha to SCAM */ 517230e26c7Smacallan sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 51878d1a11bSmacallan /* src * alpha */ 519230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 52078d1a11bSmacallan /* write inverted alpha into SCAM */ 521230e26c7Smacallan sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 52278d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 523230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 52478d1a11bSmacallan } 52572fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 52678d1a11bSmacallan srcx += 16; 52778d1a11bSmacallan mskx += 4; 52878d1a11bSmacallan dstx += 16; 529a3a2ba44Smacallan } 530a3a2ba44Smacallan src += srcpitch; 531a3a2ba44Smacallan msk += mskpitch; 532a3a2ba44Smacallan dst += dstpitch; 533a3a2ba44Smacallan } 534a3a2ba44Smacallan} 5356bdc2ffdSmacallan 5366bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p, 5376bdc2ffdSmacallan uint32_t src, uint32_t srcpitch, 5386bdc2ffdSmacallan uint32_t msk, uint32_t mskpitch, 5396bdc2ffdSmacallan uint32_t dst, uint32_t dstpitch, 540e311bbeeSmacallan int width, int height, int flip) 5416bdc2ffdSmacallan{ 5426bdc2ffdSmacallan uint32_t srcx, dstx, mskx, m; 54378d1a11bSmacallan int line, x, i, num; 5446bdc2ffdSmacallan 5456bdc2ffdSmacallan ENTER; 5466bdc2ffdSmacallan 5476bdc2ffdSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 54878d1a11bSmacallan write_sx_reg(p, SX_QUEUED(9), 0xff); 549230e26c7Smacallan sxi(SX_ORS, 8, 0, 10, 1); 5506bdc2ffdSmacallan for (line = 0; line < height; line++) { 5516bdc2ffdSmacallan srcx = src; 5526bdc2ffdSmacallan mskx = msk; 5536bdc2ffdSmacallan dstx = dst; 5546bdc2ffdSmacallan 55578d1a11bSmacallan for (x = 0; x < width; x += 4) { 55678d1a11bSmacallan /* we do up to 4 pixels at a time */ 55778d1a11bSmacallan num = min(4, width - x); 55878d1a11bSmacallan if (num <= 0) { 55978d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 56078d1a11bSmacallan continue; 56178d1a11bSmacallan } 56278d1a11bSmacallan /* fetch source pixels */ 56372fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 564e311bbeeSmacallan if (flip) { 565230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 566230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 567230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 568230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 569e311bbeeSmacallan } 5706bdc2ffdSmacallan /* fetch mask */ 57172fd264fSmacallan sxm(SX_LDB, mskx, 28, num - 1); 57278d1a11bSmacallan /* fetch dst pixels */ 57372fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 57478d1a11bSmacallan /* set src alpha to 0xff */ 575230e26c7Smacallan sxi(SX_SCATTER, 8, 4, 12, num - 1); 57678d1a11bSmacallan /* now process up to 4 pixels */ 57778d1a11bSmacallan for (i = 0; i < num; i++) { 57878d1a11bSmacallan int ii = i << 2; 57978d1a11bSmacallan /* mask alpha to SCAM */ 580230e26c7Smacallan sxi(SX_ORS, 28 + i, 0, R_SCAM, 0); 58178d1a11bSmacallan /* src * alpha */ 582230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 58378d1a11bSmacallan /* write inverted alpha into SCAM */ 584230e26c7Smacallan sxi(SX_XORS, 28 + i, 8, R_SCAM, 0); 58578d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 586230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 58778d1a11bSmacallan } 58872fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 58978d1a11bSmacallan srcx += 16; 59078d1a11bSmacallan mskx += 4; 59178d1a11bSmacallan dstx += 16; 5926bdc2ffdSmacallan } 5936bdc2ffdSmacallan src += srcpitch; 5946bdc2ffdSmacallan msk += mskpitch; 5956bdc2ffdSmacallan dst += dstpitch; 5966bdc2ffdSmacallan } 5976bdc2ffdSmacallan} 598fa158432Smacallan 599fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p, 600fa158432Smacallan uint32_t src, uint32_t srcpitch, 601fa158432Smacallan uint32_t msk, uint32_t mskpitch, 602fa158432Smacallan uint32_t dst, uint32_t dstpitch, 603e311bbeeSmacallan int width, int height, int flip) 604fa158432Smacallan{ 605fa158432Smacallan uint32_t srcx, dstx, mskx, m; 60678d1a11bSmacallan int line, x, i, num; 607fa158432Smacallan 608fa158432Smacallan ENTER; 609fa158432Smacallan 610fa158432Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 61178d1a11bSmacallan write_sx_reg(p, SX_QUEUED(9), 0xff); 612230e26c7Smacallan sxi(SX_ORS, 8, 0, 10, 1); 613fa158432Smacallan for (line = 0; line < height; line++) { 614fa158432Smacallan srcx = src; 615fa158432Smacallan mskx = msk; 616fa158432Smacallan dstx = dst; 617fa158432Smacallan 61878d1a11bSmacallan for (x = 0; x < width; x += 4) { 61978d1a11bSmacallan /* we do up to 4 pixels at a time */ 62078d1a11bSmacallan num = min(4, width - x); 62178d1a11bSmacallan if (num <= 0) { 62278d1a11bSmacallan xf86Msg(X_ERROR, "wtf?!\n"); 62378d1a11bSmacallan continue; 62478d1a11bSmacallan } 62578d1a11bSmacallan /* fetch source pixels */ 62672fd264fSmacallan sxm(SX_LDUQ0, srcx, 12, num - 1); 627e311bbeeSmacallan if (flip) { 628230e26c7Smacallan sxi(SX_GATHER, 13, 4, 40, num - 1); 629230e26c7Smacallan sxi(SX_GATHER, 15, 4, 44, num - 1); 630230e26c7Smacallan sxi(SX_SCATTER, 40, 4, 15, num - 1); 631230e26c7Smacallan sxi(SX_SCATTER, 44, 4, 13, num - 1); 632e311bbeeSmacallan } 633fa158432Smacallan /* fetch mask */ 63472fd264fSmacallan sxm(SX_LDUQ0, mskx, 28, num - 1); 63578d1a11bSmacallan /* fetch dst pixels */ 63672fd264fSmacallan sxm(SX_LDUQ0, dstx, 44, num - 1); 63778d1a11bSmacallan /* set src alpha to 0xff */ 638230e26c7Smacallan sxi(SX_SCATTER, 8, 4, 12, num - 1); 63978d1a11bSmacallan /* now process up to 4 pixels */ 64078d1a11bSmacallan for (i = 0; i < num; i++) { 64178d1a11bSmacallan int ii = i << 2; 64278d1a11bSmacallan /* mask alpha to SCAM */ 643230e26c7Smacallan sxi(SX_ORS, 28 + ii, 0, R_SCAM, 0); 64478d1a11bSmacallan /* src * alpha */ 645230e26c7Smacallan sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3); 64678d1a11bSmacallan /* write inverted alpha into SCAM */ 647230e26c7Smacallan sxi(SX_XORS, 28 + ii, 8, R_SCAM, 0); 64878d1a11bSmacallan /* dst * (1 - alpha) + R[60:] */ 649230e26c7Smacallan sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3); 65078d1a11bSmacallan } 65172fd264fSmacallan sxm(SX_STUQ0C, dstx, 76, num - 1); 65278d1a11bSmacallan srcx += 16; 65378d1a11bSmacallan mskx += 16; 65478d1a11bSmacallan dstx += 16; 655fa158432Smacallan } 656fa158432Smacallan src += srcpitch; 657fa158432Smacallan msk += mskpitch; 658fa158432Smacallan dst += dstpitch; 659fa158432Smacallan } 660fa158432Smacallan} 661