cg14_render.c revision a3a2ba44
1a3a2ba44Smacallan/* $NetBSD: cg14_render.c,v 1.1 2013/06/25 12:26:57 macallan Exp $ */
2a3a2ba44Smacallan/*
3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz
4a3a2ba44Smacallan * All rights reserved.
5a3a2ba44Smacallan *
6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without
7a3a2ba44Smacallan * modification, are permitted provided that the following conditions
8a3a2ba44Smacallan * are met:
9a3a2ba44Smacallan *
10a3a2ba44Smacallan *    - Redistributions of source code must retain the above copyright
11a3a2ba44Smacallan *      notice, this list of conditions and the following disclaimer.
12a3a2ba44Smacallan *    - Redistributions in binary form must reproduce the above
13a3a2ba44Smacallan *      copyright notice, this list of conditions and the following
14a3a2ba44Smacallan *      disclaimer in the documentation and/or other materials provided
15a3a2ba44Smacallan *      with the distribution.
16a3a2ba44Smacallan *
17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE.
29a3a2ba44Smacallan *
30a3a2ba44Smacallan */
31a3a2ba44Smacallan
32a3a2ba44Smacallan#include <sys/types.h>
33a3a2ba44Smacallan
34a3a2ba44Smacallan/* all driver need this */
35a3a2ba44Smacallan#include "xf86.h"
36a3a2ba44Smacallan#include "xf86_OSproc.h"
37a3a2ba44Smacallan#include "compiler.h"
38a3a2ba44Smacallan
39a3a2ba44Smacallan#include "cg14.h"
40a3a2ba44Smacallan#include <sparc/sxreg.h>
41a3a2ba44Smacallan
42a3a2ba44Smacallan#define SX_SINGLE
43a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/
44a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/
45a3a2ba44Smacallan
46a3a2ba44Smacallan#ifdef SX__RENDER_DEBUG
47a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
48a3a2ba44Smacallan#define DPRINTF xf86Msg
49a3a2ba44Smacallan#else
50a3a2ba44Smacallan#define ENTER
51a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg
52a3a2ba44Smacallan#endif
53a3a2ba44Smacallan
54a3a2ba44Smacallanchar c[8] = " .,:+*oX";
55a3a2ba44Smacallan
56a3a2ba44Smacallanvoid CG14Comp_Over8Solid(Cg14Ptr p,
57a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
58a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
59a3a2ba44Smacallan                   int width, int height)
60a3a2ba44Smacallan{
61a3a2ba44Smacallan	uint32_t msk = src, mskx, dstx, m;
62a3a2ba44Smacallan	int line, x, i;
63a3a2ba44Smacallan#ifdef SX_DEBUG
64a3a2ba44Smacallan	char buffer[256];
65a3a2ba44Smacallan#endif
66a3a2ba44Smacallan	ENTER;
67a3a2ba44Smacallan
68a3a2ba44Smacallan	/* first get the source colour */
69a3a2ba44Smacallan	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
70a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
71a3a2ba44Smacallan	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
72a3a2ba44Smacallan	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
73a3a2ba44Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
74a3a2ba44Smacallan	for (line = 0; line < height; line++) {
75a3a2ba44Smacallan		mskx = msk;
76a3a2ba44Smacallan		dstx = dst;
77a3a2ba44Smacallan#ifdef SX_SINGLE
78a3a2ba44Smacallan
79a3a2ba44Smacallan		for (x = 0; x < width; x++) {
80a3a2ba44Smacallan			m = *(volatile uint8_t *)(p->fb + mskx);
81a3a2ba44Smacallan#ifdef SX_DEBUG
82a3a2ba44Smacallan			buffer[x] = c[m >> 5];
83a3a2ba44Smacallan#endif
84a3a2ba44Smacallan			if (m == 0) {
85a3a2ba44Smacallan				/* nothing to do - all transparent */
86a3a2ba44Smacallan			} else if (m == 0xff) {
87a3a2ba44Smacallan				/* all opaque */
88a3a2ba44Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
89a3a2ba44Smacallan			} else {
90a3a2ba44Smacallan				/* fetch alpha value, stick it into scam */
91a3a2ba44Smacallan				/* mask is in R[12:15] */
92a3a2ba44Smacallan				/*write_sx_io(p, mskx & ~7,
93a3a2ba44Smacallan				    SX_LDB(12, 0, mskx & 7));*/
94a3a2ba44Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
95a3a2ba44Smacallan				/* fetch dst pixel */
96a3a2ba44Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
97a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
98a3a2ba44Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
99a3a2ba44Smacallan				/*
100a3a2ba44Smacallan				 * src * alpha + R0
101a3a2ba44Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
102a3a2ba44Smacallan				 */
103a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
104a3a2ba44Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
105a3a2ba44Smacallan
106a3a2ba44Smacallan				/* invert SCAM */
107a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
108a3a2ba44Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
109a3a2ba44Smacallan#ifdef SX_DEBUG
110a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
111a3a2ba44Smacallan				    SX_XORV(12, 8, 13, 0));
112a3a2ba44Smacallan#endif
113a3a2ba44Smacallan				/* dst * (1 - alpha) + R[13:15] */
114a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
115a3a2ba44Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
116a3a2ba44Smacallan				write_sx_io(p, dstx,
117a3a2ba44Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
118a3a2ba44Smacallan			}
119a3a2ba44Smacallan			dstx += 4;
120a3a2ba44Smacallan			mskx += 1;
121a3a2ba44Smacallan		}
122a3a2ba44Smacallan#ifdef SX_DEBUG
123a3a2ba44Smacallan		buffer[x] = 0;
124a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
125a3a2ba44Smacallan#endif
126a3a2ba44Smacallan#else
127a3a2ba44Smacallan		for (x = 0; x < width; x += 4) {
128a3a2ba44Smacallan			/* fetch 4 mask values */
129a3a2ba44Smacallan			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
130a3a2ba44Smacallan			/* fetch destination pixels */
131a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
132a3a2ba44Smacallan			/* duplicate them for all channels */
133a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
134a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
135a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
136a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
137a3a2ba44Smacallan			/* generate inverted alpha */
138a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
139a3a2ba44Smacallan			    SX_XORS(12, 8, 28, 15));
140a3a2ba44Smacallan			/* multiply source */
141a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
142a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
143a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
144a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
145a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
146a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
147a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
148a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
149a3a2ba44Smacallan			/* multiply dest */
150a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
151a3a2ba44Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
152a3a2ba44Smacallan			/* add up */
153a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
154a3a2ba44Smacallan			    SX_ADDV(44, 76, 92, 15));
155a3a2ba44Smacallan			/* write back */
156a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
157a3a2ba44Smacallan			dstx += 16;
158a3a2ba44Smacallan			mskx += 4;
159a3a2ba44Smacallan		}
160a3a2ba44Smacallan#endif
161a3a2ba44Smacallan		dst += dstpitch;
162a3a2ba44Smacallan		msk += srcpitch;
163a3a2ba44Smacallan	}
164a3a2ba44Smacallan}
165a3a2ba44Smacallan
166a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p,
167a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
168a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
169a3a2ba44Smacallan                   int width, int height)
170a3a2ba44Smacallan{
171a3a2ba44Smacallan	int line;
172a3a2ba44Smacallan	uint32_t srcx, dstx;
173a3a2ba44Smacallan	int full, part, x;
174a3a2ba44Smacallan
175a3a2ba44Smacallan	ENTER;
176a3a2ba44Smacallan	full = width >> 3;	/* chunks of 8 */
177a3a2ba44Smacallan	part = width & 7;	/* leftovers */
178a3a2ba44Smacallan	/* we do this up to 8 pixels at a time */
179a3a2ba44Smacallan	for (line = 0; line < height; line++) {
180a3a2ba44Smacallan		srcx = src;
181a3a2ba44Smacallan		dstx = dst;
182a3a2ba44Smacallan		for (x = 0; x < full; x++) {
183a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
184a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
185a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
186a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
187a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
188a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
189a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
190a3a2ba44Smacallan			srcx += 128;
191a3a2ba44Smacallan			dstx += 128;
192a3a2ba44Smacallan		}
193a3a2ba44Smacallan
194a3a2ba44Smacallan		/* do leftovers */
195a3a2ba44Smacallan		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
196a3a2ba44Smacallan		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
197a3a2ba44Smacallan		if (part & 16) {
198a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
199a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
200a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
201a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, part - 17));
202a3a2ba44Smacallan		} else {
203a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
204a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, part - 1));
205a3a2ba44Smacallan		}
206a3a2ba44Smacallan		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
207a3a2ba44Smacallan
208a3a2ba44Smacallan		/* next line */
209a3a2ba44Smacallan		src += srcpitch;
210a3a2ba44Smacallan		dst += dstpitch;
211a3a2ba44Smacallan	}
212a3a2ba44Smacallan}
213a3a2ba44Smacallan
214a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p,
215a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
216a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
217a3a2ba44Smacallan                   int width, int height)
218a3a2ba44Smacallan{
219a3a2ba44Smacallan	int line;
220a3a2ba44Smacallan	uint32_t srcx, dstx, srcoff, dstoff;
221a3a2ba44Smacallan	int pre, full, part, x;
222a3a2ba44Smacallan	uint8_t *d;
223a3a2ba44Smacallan	char buffer[256];
224a3a2ba44Smacallan	ENTER;
225a3a2ba44Smacallan
226a3a2ba44Smacallan	srcoff = src & 7;
227a3a2ba44Smacallan	src &= ~7;
228a3a2ba44Smacallan	dstoff = dst & 7;
229a3a2ba44Smacallan	dst &= ~7;
230a3a2ba44Smacallan	full = width >> 5;	/* chunks of 32 */
231a3a2ba44Smacallan	part = width & 31;	/* leftovers */
232a3a2ba44Smacallan
233a3a2ba44Smacallan#ifdef SX_DEBUG
234a3a2ba44Smacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
235a3a2ba44Smacallan	    width, height, full, part);
236a3a2ba44Smacallan#endif
237a3a2ba44Smacallan	/* we do this up to 32 pixels at a time */
238a3a2ba44Smacallan	for (line = 0; line < height; line++) {
239a3a2ba44Smacallan		srcx = src;
240a3a2ba44Smacallan		dstx = dst;
241a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE
242a3a2ba44Smacallan		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
243a3a2ba44Smacallan		d = (uint8_t *)(p->fb + dstx + dstoff);
244a3a2ba44Smacallan		for (x = 0; x < width; x++) {
245a3a2ba44Smacallan			d[x] = min(255, s[x] + d[x]);
246a3a2ba44Smacallan		}
247a3a2ba44Smacallan#else
248a3a2ba44Smacallan		for (x = 0; x < full; x++) {
249a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
250a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
251a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
252a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
253a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
254a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
255a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
256a3a2ba44Smacallan			srcx += 32;
257a3a2ba44Smacallan			dstx += 32;
258a3a2ba44Smacallan		}
259a3a2ba44Smacallan
260a3a2ba44Smacallan		if (part > 0) {
261a3a2ba44Smacallan			/* do leftovers */
262a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
263a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
264a3a2ba44Smacallan			if (part > 16) {
265a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
266a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, 15));
267a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
268a3a2ba44Smacallan				    SX_ADDV(24, 56, 88, part - 17));
269a3a2ba44Smacallan			} else {
270a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
271a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, part - 1));
272a3a2ba44Smacallan			}
273a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
274a3a2ba44Smacallan		}
275a3a2ba44Smacallan#endif
276a3a2ba44Smacallan#ifdef SX_DEBUG
277a3a2ba44Smacallan		d = (uint8_t *)(p->fb + src + srcoff);
278a3a2ba44Smacallan		for (x = 0; x < width; x++) {
279a3a2ba44Smacallan			buffer[x] = c[d[x]>>5];
280a3a2ba44Smacallan		}
281a3a2ba44Smacallan		buffer[x] = 0;
282a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
283a3a2ba44Smacallan#endif
284a3a2ba44Smacallan		/* next line */
285a3a2ba44Smacallan		src += srcpitch;
286a3a2ba44Smacallan		dst += dstpitch;
287a3a2ba44Smacallan	}
288a3a2ba44Smacallan}
289a3a2ba44Smacallan
290a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p,
291a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
292a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
293a3a2ba44Smacallan                   int width, int height)
294a3a2ba44Smacallan{
295a3a2ba44Smacallan	uint32_t srcx, dstx, m;
296a3a2ba44Smacallan	int line, x, i;
297a3a2ba44Smacallan
298a3a2ba44Smacallan	ENTER;
299a3a2ba44Smacallan
300a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
301a3a2ba44Smacallan	for (line = 0; line < height; line++) {
302a3a2ba44Smacallan		srcx = src;
303a3a2ba44Smacallan		dstx = dst;
304a3a2ba44Smacallan
305a3a2ba44Smacallan		for (x = 0; x < width; x++) {
306a3a2ba44Smacallan			/* fetch source pixel */
307a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
308a3a2ba44Smacallan			/* fetch dst pixel */
309a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
310a3a2ba44Smacallan			/* src is premultiplied with alpha */
311a3a2ba44Smacallan			/* write inverted alpha into SCAM */
312a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
313a3a2ba44Smacallan			    SX_XORV(12, 8, R_SCAM, 0));
314a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
315a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
316a3a2ba44Smacallan			    SX_SAXP16X16SR8(21, 13, 25, 2));
317a3a2ba44Smacallan			write_sx_io(p, dstx,
318a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
319a3a2ba44Smacallan			dstx += 4;
320a3a2ba44Smacallan			srcx += 4;
321a3a2ba44Smacallan		}
322a3a2ba44Smacallan		dst += dstpitch;
323a3a2ba44Smacallan		src += srcpitch;
324a3a2ba44Smacallan	}
325a3a2ba44Smacallan}
326a3a2ba44Smacallan
327a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p,
328a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
329a3a2ba44Smacallan                   uint32_t msk, uint32_t mskpitch,
330a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
331a3a2ba44Smacallan                   int width, int height)
332a3a2ba44Smacallan{
333a3a2ba44Smacallan	uint32_t srcx, dstx, mskx, m;
334a3a2ba44Smacallan	int line, x, i;
335a3a2ba44Smacallan
336a3a2ba44Smacallan	ENTER;
337a3a2ba44Smacallan
338a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
339a3a2ba44Smacallan	for (line = 0; line < height; line++) {
340a3a2ba44Smacallan		srcx = src;
341a3a2ba44Smacallan		mskx = msk;
342a3a2ba44Smacallan		dstx = dst;
343a3a2ba44Smacallan
344a3a2ba44Smacallan		for (x = 0; x < width; x++) {
345a3a2ba44Smacallan			/* fetch source pixel */
346a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
347a3a2ba44Smacallan			/* fetch mask */
348a3a2ba44Smacallan			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
349a3a2ba44Smacallan			/* fetch dst pixel */
350a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
351a3a2ba44Smacallan			/* apply mask */
352a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
353a3a2ba44Smacallan			    SX_ANDS(12, 9, 16, 3));
354a3a2ba44Smacallan			/* src is premultiplied with alpha */
355a3a2ba44Smacallan			/* write inverted alpha into SCAM */
356a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
357a3a2ba44Smacallan			    SX_XORV(16, 8, R_SCAM, 0));
358a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
359a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
360a3a2ba44Smacallan			    SX_SAXP16X16SR8(21, 17, 25, 2));
361a3a2ba44Smacallan			write_sx_io(p, dstx,
362a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
363a3a2ba44Smacallan			srcx += 4;
364a3a2ba44Smacallan			mskx += 1;
365a3a2ba44Smacallan			dstx += 4;
366a3a2ba44Smacallan		}
367a3a2ba44Smacallan		src += srcpitch;
368a3a2ba44Smacallan		msk += mskpitch;
369a3a2ba44Smacallan		dst += dstpitch;
370a3a2ba44Smacallan	}
371a3a2ba44Smacallan}
372