cg14_render.c revision a3a2ba44
1a3a2ba44Smacallan/* $NetBSD: cg14_render.c,v 1.1 2013/06/25 12:26:57 macallan Exp $ */ 2a3a2ba44Smacallan/* 3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz 4a3a2ba44Smacallan * All rights reserved. 5a3a2ba44Smacallan * 6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without 7a3a2ba44Smacallan * modification, are permitted provided that the following conditions 8a3a2ba44Smacallan * are met: 9a3a2ba44Smacallan * 10a3a2ba44Smacallan * - Redistributions of source code must retain the above copyright 11a3a2ba44Smacallan * notice, this list of conditions and the following disclaimer. 12a3a2ba44Smacallan * - Redistributions in binary form must reproduce the above 13a3a2ba44Smacallan * copyright notice, this list of conditions and the following 14a3a2ba44Smacallan * disclaimer in the documentation and/or other materials provided 15a3a2ba44Smacallan * with the distribution. 16a3a2ba44Smacallan * 17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE. 29a3a2ba44Smacallan * 30a3a2ba44Smacallan */ 31a3a2ba44Smacallan 32a3a2ba44Smacallan#include <sys/types.h> 33a3a2ba44Smacallan 34a3a2ba44Smacallan/* all driver need this */ 35a3a2ba44Smacallan#include "xf86.h" 36a3a2ba44Smacallan#include "xf86_OSproc.h" 37a3a2ba44Smacallan#include "compiler.h" 38a3a2ba44Smacallan 39a3a2ba44Smacallan#include "cg14.h" 40a3a2ba44Smacallan#include <sparc/sxreg.h> 41a3a2ba44Smacallan 42a3a2ba44Smacallan#define SX_SINGLE 43a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/ 44a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/ 45a3a2ba44Smacallan 46a3a2ba44Smacallan#ifdef SX__RENDER_DEBUG 47a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 48a3a2ba44Smacallan#define DPRINTF xf86Msg 49a3a2ba44Smacallan#else 50a3a2ba44Smacallan#define ENTER 51a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg 52a3a2ba44Smacallan#endif 53a3a2ba44Smacallan 54a3a2ba44Smacallanchar c[8] = " .,:+*oX"; 55a3a2ba44Smacallan 56a3a2ba44Smacallanvoid CG14Comp_Over8Solid(Cg14Ptr p, 57a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 58a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 59a3a2ba44Smacallan int width, int height) 60a3a2ba44Smacallan{ 61a3a2ba44Smacallan uint32_t msk = src, mskx, dstx, m; 62a3a2ba44Smacallan int line, x, i; 63a3a2ba44Smacallan#ifdef SX_DEBUG 64a3a2ba44Smacallan char buffer[256]; 65a3a2ba44Smacallan#endif 66a3a2ba44Smacallan ENTER; 67a3a2ba44Smacallan 68a3a2ba44Smacallan /* first get the source colour */ 69a3a2ba44Smacallan write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 70a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 71a3a2ba44Smacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 72a3a2ba44Smacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 73a3a2ba44Smacallan *(uint32_t *)(p->fb + p->srcoff)); 74a3a2ba44Smacallan for (line = 0; line < height; line++) { 75a3a2ba44Smacallan mskx = msk; 76a3a2ba44Smacallan dstx = dst; 77a3a2ba44Smacallan#ifdef SX_SINGLE 78a3a2ba44Smacallan 79a3a2ba44Smacallan for (x = 0; x < width; x++) { 80a3a2ba44Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 81a3a2ba44Smacallan#ifdef SX_DEBUG 82a3a2ba44Smacallan buffer[x] = c[m >> 5]; 83a3a2ba44Smacallan#endif 84a3a2ba44Smacallan if (m == 0) { 85a3a2ba44Smacallan /* nothing to do - all transparent */ 86a3a2ba44Smacallan } else if (m == 0xff) { 87a3a2ba44Smacallan /* all opaque */ 88a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 89a3a2ba44Smacallan } else { 90a3a2ba44Smacallan /* fetch alpha value, stick it into scam */ 91a3a2ba44Smacallan /* mask is in R[12:15] */ 92a3a2ba44Smacallan /*write_sx_io(p, mskx & ~7, 93a3a2ba44Smacallan SX_LDB(12, 0, mskx & 7));*/ 94a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(12), m); 95a3a2ba44Smacallan /* fetch dst pixel */ 96a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 97a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 98a3a2ba44Smacallan SX_ORV(12, 0, R_SCAM, 0)); 99a3a2ba44Smacallan /* 100a3a2ba44Smacallan * src * alpha + R0 101a3a2ba44Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 102a3a2ba44Smacallan */ 103a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 104a3a2ba44Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 105a3a2ba44Smacallan 106a3a2ba44Smacallan /* invert SCAM */ 107a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 108a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 109a3a2ba44Smacallan#ifdef SX_DEBUG 110a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 111a3a2ba44Smacallan SX_XORV(12, 8, 13, 0)); 112a3a2ba44Smacallan#endif 113a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 114a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 115a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 116a3a2ba44Smacallan write_sx_io(p, dstx, 117a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 118a3a2ba44Smacallan } 119a3a2ba44Smacallan dstx += 4; 120a3a2ba44Smacallan mskx += 1; 121a3a2ba44Smacallan } 122a3a2ba44Smacallan#ifdef SX_DEBUG 123a3a2ba44Smacallan buffer[x] = 0; 124a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 125a3a2ba44Smacallan#endif 126a3a2ba44Smacallan#else 127a3a2ba44Smacallan for (x = 0; x < width; x += 4) { 128a3a2ba44Smacallan /* fetch 4 mask values */ 129a3a2ba44Smacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 130a3a2ba44Smacallan /* fetch destination pixels */ 131a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 132a3a2ba44Smacallan /* duplicate them for all channels */ 133a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 134a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 135a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 136a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 137a3a2ba44Smacallan /* generate inverted alpha */ 138a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 139a3a2ba44Smacallan SX_XORS(12, 8, 28, 15)); 140a3a2ba44Smacallan /* multiply source */ 141a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 142a3a2ba44Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 143a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 144a3a2ba44Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 145a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 146a3a2ba44Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 147a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 148a3a2ba44Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 149a3a2ba44Smacallan /* multiply dest */ 150a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 151a3a2ba44Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 152a3a2ba44Smacallan /* add up */ 153a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 154a3a2ba44Smacallan SX_ADDV(44, 76, 92, 15)); 155a3a2ba44Smacallan /* write back */ 156a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 157a3a2ba44Smacallan dstx += 16; 158a3a2ba44Smacallan mskx += 4; 159a3a2ba44Smacallan } 160a3a2ba44Smacallan#endif 161a3a2ba44Smacallan dst += dstpitch; 162a3a2ba44Smacallan msk += srcpitch; 163a3a2ba44Smacallan } 164a3a2ba44Smacallan} 165a3a2ba44Smacallan 166a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 167a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 168a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 169a3a2ba44Smacallan int width, int height) 170a3a2ba44Smacallan{ 171a3a2ba44Smacallan int line; 172a3a2ba44Smacallan uint32_t srcx, dstx; 173a3a2ba44Smacallan int full, part, x; 174a3a2ba44Smacallan 175a3a2ba44Smacallan ENTER; 176a3a2ba44Smacallan full = width >> 3; /* chunks of 8 */ 177a3a2ba44Smacallan part = width & 7; /* leftovers */ 178a3a2ba44Smacallan /* we do this up to 8 pixels at a time */ 179a3a2ba44Smacallan for (line = 0; line < height; line++) { 180a3a2ba44Smacallan srcx = src; 181a3a2ba44Smacallan dstx = dst; 182a3a2ba44Smacallan for (x = 0; x < full; x++) { 183a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 184a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 185a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 186a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 187a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 188a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 189a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 190a3a2ba44Smacallan srcx += 128; 191a3a2ba44Smacallan dstx += 128; 192a3a2ba44Smacallan } 193a3a2ba44Smacallan 194a3a2ba44Smacallan /* do leftovers */ 195a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 196a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 197a3a2ba44Smacallan if (part & 16) { 198a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 199a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 200a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 201a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 202a3a2ba44Smacallan } else { 203a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 204a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 205a3a2ba44Smacallan } 206a3a2ba44Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 207a3a2ba44Smacallan 208a3a2ba44Smacallan /* next line */ 209a3a2ba44Smacallan src += srcpitch; 210a3a2ba44Smacallan dst += dstpitch; 211a3a2ba44Smacallan } 212a3a2ba44Smacallan} 213a3a2ba44Smacallan 214a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 215a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 216a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 217a3a2ba44Smacallan int width, int height) 218a3a2ba44Smacallan{ 219a3a2ba44Smacallan int line; 220a3a2ba44Smacallan uint32_t srcx, dstx, srcoff, dstoff; 221a3a2ba44Smacallan int pre, full, part, x; 222a3a2ba44Smacallan uint8_t *d; 223a3a2ba44Smacallan char buffer[256]; 224a3a2ba44Smacallan ENTER; 225a3a2ba44Smacallan 226a3a2ba44Smacallan srcoff = src & 7; 227a3a2ba44Smacallan src &= ~7; 228a3a2ba44Smacallan dstoff = dst & 7; 229a3a2ba44Smacallan dst &= ~7; 230a3a2ba44Smacallan full = width >> 5; /* chunks of 32 */ 231a3a2ba44Smacallan part = width & 31; /* leftovers */ 232a3a2ba44Smacallan 233a3a2ba44Smacallan#ifdef SX_DEBUG 234a3a2ba44Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 235a3a2ba44Smacallan width, height, full, part); 236a3a2ba44Smacallan#endif 237a3a2ba44Smacallan /* we do this up to 32 pixels at a time */ 238a3a2ba44Smacallan for (line = 0; line < height; line++) { 239a3a2ba44Smacallan srcx = src; 240a3a2ba44Smacallan dstx = dst; 241a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE 242a3a2ba44Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 243a3a2ba44Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 244a3a2ba44Smacallan for (x = 0; x < width; x++) { 245a3a2ba44Smacallan d[x] = min(255, s[x] + d[x]); 246a3a2ba44Smacallan } 247a3a2ba44Smacallan#else 248a3a2ba44Smacallan for (x = 0; x < full; x++) { 249a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 250a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 251a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 252a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 253a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 254a3a2ba44Smacallan SX_ADDV(24, 56, 88, 15)); 255a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 256a3a2ba44Smacallan srcx += 32; 257a3a2ba44Smacallan dstx += 32; 258a3a2ba44Smacallan } 259a3a2ba44Smacallan 260a3a2ba44Smacallan if (part > 0) { 261a3a2ba44Smacallan /* do leftovers */ 262a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 263a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 264a3a2ba44Smacallan if (part > 16) { 265a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 266a3a2ba44Smacallan SX_ADDV(8, 40, 72, 15)); 267a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 268a3a2ba44Smacallan SX_ADDV(24, 56, 88, part - 17)); 269a3a2ba44Smacallan } else { 270a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 271a3a2ba44Smacallan SX_ADDV(8, 40, 72, part - 1)); 272a3a2ba44Smacallan } 273a3a2ba44Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 274a3a2ba44Smacallan } 275a3a2ba44Smacallan#endif 276a3a2ba44Smacallan#ifdef SX_DEBUG 277a3a2ba44Smacallan d = (uint8_t *)(p->fb + src + srcoff); 278a3a2ba44Smacallan for (x = 0; x < width; x++) { 279a3a2ba44Smacallan buffer[x] = c[d[x]>>5]; 280a3a2ba44Smacallan } 281a3a2ba44Smacallan buffer[x] = 0; 282a3a2ba44Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 283a3a2ba44Smacallan#endif 284a3a2ba44Smacallan /* next line */ 285a3a2ba44Smacallan src += srcpitch; 286a3a2ba44Smacallan dst += dstpitch; 287a3a2ba44Smacallan } 288a3a2ba44Smacallan} 289a3a2ba44Smacallan 290a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 291a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 292a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 293a3a2ba44Smacallan int width, int height) 294a3a2ba44Smacallan{ 295a3a2ba44Smacallan uint32_t srcx, dstx, m; 296a3a2ba44Smacallan int line, x, i; 297a3a2ba44Smacallan 298a3a2ba44Smacallan ENTER; 299a3a2ba44Smacallan 300a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 301a3a2ba44Smacallan for (line = 0; line < height; line++) { 302a3a2ba44Smacallan srcx = src; 303a3a2ba44Smacallan dstx = dst; 304a3a2ba44Smacallan 305a3a2ba44Smacallan for (x = 0; x < width; x++) { 306a3a2ba44Smacallan /* fetch source pixel */ 307a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 308a3a2ba44Smacallan /* fetch dst pixel */ 309a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 310a3a2ba44Smacallan /* src is premultiplied with alpha */ 311a3a2ba44Smacallan /* write inverted alpha into SCAM */ 312a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 313a3a2ba44Smacallan SX_XORV(12, 8, R_SCAM, 0)); 314a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 315a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 316a3a2ba44Smacallan SX_SAXP16X16SR8(21, 13, 25, 2)); 317a3a2ba44Smacallan write_sx_io(p, dstx, 318a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 319a3a2ba44Smacallan dstx += 4; 320a3a2ba44Smacallan srcx += 4; 321a3a2ba44Smacallan } 322a3a2ba44Smacallan dst += dstpitch; 323a3a2ba44Smacallan src += srcpitch; 324a3a2ba44Smacallan } 325a3a2ba44Smacallan} 326a3a2ba44Smacallan 327a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p, 328a3a2ba44Smacallan uint32_t src, uint32_t srcpitch, 329a3a2ba44Smacallan uint32_t msk, uint32_t mskpitch, 330a3a2ba44Smacallan uint32_t dst, uint32_t dstpitch, 331a3a2ba44Smacallan int width, int height) 332a3a2ba44Smacallan{ 333a3a2ba44Smacallan uint32_t srcx, dstx, mskx, m; 334a3a2ba44Smacallan int line, x, i; 335a3a2ba44Smacallan 336a3a2ba44Smacallan ENTER; 337a3a2ba44Smacallan 338a3a2ba44Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 339a3a2ba44Smacallan for (line = 0; line < height; line++) { 340a3a2ba44Smacallan srcx = src; 341a3a2ba44Smacallan mskx = msk; 342a3a2ba44Smacallan dstx = dst; 343a3a2ba44Smacallan 344a3a2ba44Smacallan for (x = 0; x < width; x++) { 345a3a2ba44Smacallan /* fetch source pixel */ 346a3a2ba44Smacallan write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); 347a3a2ba44Smacallan /* fetch mask */ 348a3a2ba44Smacallan write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); 349a3a2ba44Smacallan /* fetch dst pixel */ 350a3a2ba44Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 351a3a2ba44Smacallan /* apply mask */ 352a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 353a3a2ba44Smacallan SX_ANDS(12, 9, 16, 3)); 354a3a2ba44Smacallan /* src is premultiplied with alpha */ 355a3a2ba44Smacallan /* write inverted alpha into SCAM */ 356a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 357a3a2ba44Smacallan SX_XORV(16, 8, R_SCAM, 0)); 358a3a2ba44Smacallan /* dst * (1 - alpha) + R[13:15] */ 359a3a2ba44Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 360a3a2ba44Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 361a3a2ba44Smacallan write_sx_io(p, dstx, 362a3a2ba44Smacallan SX_STUQ0C(24, 0, dstx & 7)); 363a3a2ba44Smacallan srcx += 4; 364a3a2ba44Smacallan mskx += 1; 365a3a2ba44Smacallan dstx += 4; 366a3a2ba44Smacallan } 367a3a2ba44Smacallan src += srcpitch; 368a3a2ba44Smacallan msk += mskpitch; 369a3a2ba44Smacallan dst += dstpitch; 370a3a2ba44Smacallan } 371a3a2ba44Smacallan} 372