cg14_render.c revision 665b72dd
1665b72ddSmacallan/* $NetBSD: cg14_render.c,v 1.19 2023/01/11 09:23:57 macallan Exp $ */
2a3a2ba44Smacallan/*
3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz
4a3a2ba44Smacallan * All rights reserved.
5a3a2ba44Smacallan *
6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without
7a3a2ba44Smacallan * modification, are permitted provided that the following conditions
8a3a2ba44Smacallan * are met:
9a3a2ba44Smacallan *
10a3a2ba44Smacallan *    - Redistributions of source code must retain the above copyright
11a3a2ba44Smacallan *      notice, this list of conditions and the following disclaimer.
12a3a2ba44Smacallan *    - Redistributions in binary form must reproduce the above
13a3a2ba44Smacallan *      copyright notice, this list of conditions and the following
14a3a2ba44Smacallan *      disclaimer in the documentation and/or other materials provided
15a3a2ba44Smacallan *      with the distribution.
16a3a2ba44Smacallan *
17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE.
29a3a2ba44Smacallan *
30a3a2ba44Smacallan */
31a3a2ba44Smacallan
32c88c16f8Smacallan#ifdef HAVE_CONFIG_H
33c88c16f8Smacallan#include "config.h"
34c88c16f8Smacallan#endif
35c88c16f8Smacallan
36a3a2ba44Smacallan#include <sys/types.h>
37a3a2ba44Smacallan
38a3a2ba44Smacallan/* all driver need this */
39a3a2ba44Smacallan#include "xf86.h"
40a3a2ba44Smacallan#include "xf86_OSproc.h"
41a3a2ba44Smacallan#include "compiler.h"
42a3a2ba44Smacallan
43a3a2ba44Smacallan#include "cg14.h"
44a3a2ba44Smacallan
45f221549cSmacallan/*#define SX_SINGLE*/
46a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/
472066ab30Smacallan/*#define SX_RENDER_VERBOSE*/
48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/
492066ab30Smacallan/*#define SX_RENDER_TRACE*/
50a3a2ba44Smacallan
512066ab30Smacallan#ifdef SX_RENDER_TRACE
52a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
532066ab30Smacallan#define DONE xf86Msg(X_ERROR, "<%s\n", __func__);
54a3a2ba44Smacallan#else
55a3a2ba44Smacallan#define ENTER
562066ab30Smacallan#define DONE
572066ab30Smacallan#endif
582066ab30Smacallan
592066ab30Smacallan#ifdef SX_RENDER_DEBUG
602066ab30Smacallan#define DPRINTF xf86Msg
612066ab30Smacallan#else
62a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg
63a3a2ba44Smacallan#endif
64a3a2ba44Smacallan
65665b72ddSmacallan#ifdef SX_RENDER_VERBOSE
66a3a2ba44Smacallanchar c[8] = " .,:+*oX";
67ad6af7a7Smacallan#endif
6878cb1511Smacallan
6978cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p,
7078cb1511Smacallan                   uint32_t src, uint32_t srcpitch,
7178cb1511Smacallan                   uint32_t dst, uint32_t dstpitch,
7278cb1511Smacallan                   int width, int height)
7378cb1511Smacallan{
7478cb1511Smacallan	uint32_t msk = src, mskx, dstx, m;
7578cb1511Smacallan	int line, x, i;
7678cb1511Smacallan
7778cb1511Smacallan	ENTER;
78f7cb851fSmacallan
7978cb1511Smacallan	for (line = 0; line < height; line++) {
8078cb1511Smacallan		mskx = msk;
8178cb1511Smacallan		dstx = dst;
82f221549cSmacallan#ifndef SX_SINGLE
83f221549cSmacallan		int rest;
84f221549cSmacallan		for (x = 0; x < width; x += 4) {
85f221549cSmacallan			rest = width - x;
86f221549cSmacallan			/* fetch 4 mask values */
8772fd264fSmacallan			sxm(SX_LDUQ0, mskx, 12, 3);
88f221549cSmacallan			/* fetch destination pixels */
8972fd264fSmacallan			sxm(SX_LDUQ0, dstx, 60, 3);
90f221549cSmacallan			/* duplicate them for all channels */
91230e26c7Smacallan			sxi(SX_ORS, 0, 12, 13, 2);
92230e26c7Smacallan			sxi(SX_ORS, 0, 16, 17, 2);
93230e26c7Smacallan			sxi(SX_ORS, 0, 20, 21, 2);
94230e26c7Smacallan			sxi(SX_ORS, 0, 24, 25, 2);
95f221549cSmacallan			/* generate inverted alpha */
96230e26c7Smacallan			sxi(SX_XORS, 12, 8, 28, 15);
97f221549cSmacallan			/* multiply source */
98230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 12, 44, 3);
99230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 16, 48, 3);
100230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 20, 52, 3);
101230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 24, 56, 3);
102f221549cSmacallan			/* multiply dest */
103230e26c7Smacallan			sxi(SX_MUL16X16SR8, 28, 60, 76, 15);
104f221549cSmacallan			/* add up */
105230e26c7Smacallan			sxi(SX_ADDV, 44, 76, 92, 15);
106f221549cSmacallan			/* write back */
107f221549cSmacallan			if (rest < 4) {
10872fd264fSmacallan				sxm(SX_STUQ0C, dstx, 92, rest - 1);
109f221549cSmacallan			} else {
11072fd264fSmacallan				sxm(SX_STUQ0C, dstx, 92, 3);
111f221549cSmacallan			}
112f221549cSmacallan			dstx += 16;
113f221549cSmacallan			mskx += 16;
114f221549cSmacallan		}
115f221549cSmacallan#else /* SX_SINGLE */
11678cb1511Smacallan		for (x = 0; x < width; x++) {
11778cb1511Smacallan			m = *(volatile uint32_t *)(p->fb + mskx);
11878cb1511Smacallan			m = m >> 24;
11978cb1511Smacallan			if (m == 0) {
12078cb1511Smacallan				/* nothing to do - all transparent */
12178cb1511Smacallan			} else if (m == 0xff) {
12278cb1511Smacallan				/* all opaque */
12372fd264fSmacallan				sxm(SX_STUQ0, dstx, 8, 0);
12478cb1511Smacallan			} else {
12578cb1511Smacallan				/* fetch alpha value, stick it into scam */
12678cb1511Smacallan				/* mask is in R[12:15] */
12778cb1511Smacallan				/*write_sx_io(p, mskx,
12878cb1511Smacallan				    SX_LDUQ0(12, 0, mskx & 7));*/
12978cb1511Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
13078cb1511Smacallan				/* fetch dst pixel */
13172fd264fSmacallan				sxm(SX_LDUQ0, dstx, 20, 0);
132230e26c7Smacallan				sxi(SX_ORV, 12, 0, R_SCAM, 0);
13378cb1511Smacallan				/*
13478cb1511Smacallan				 * src * alpha + R0
13578cb1511Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
13678cb1511Smacallan				 */
137230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 9, 0, 17, 2);
13878cb1511Smacallan
13978cb1511Smacallan				/* invert SCAM */
140230e26c7Smacallan				sxi(SX_XORV, 12, 8, R_SCAM, 0);
141ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG
142230e26c7Smacallan				sxi(SX_XORV, 12, 8, 13, 0);
14378cb1511Smacallan#endif
14478cb1511Smacallan				/* dst * (1 - alpha) + R[13:15] */
145230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 21, 17, 25, 2);
14672fd264fSmacallan				sxm(SX_STUQ0C, dstx, 24, 0);
14778cb1511Smacallan			}
14878cb1511Smacallan			dstx += 4;
14978cb1511Smacallan			mskx += 4;
15078cb1511Smacallan		}
151f221549cSmacallan#endif /* SX_SINGLE */
152f221549cSmacallan		dst += dstpitch;
153f221549cSmacallan		msk += srcpitch;
154f221549cSmacallan	}
155f221549cSmacallan}
156f221549cSmacallan
157f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p,
158f221549cSmacallan                   uint32_t src, uint32_t srcpitch,
159f221549cSmacallan                   uint32_t dst, uint32_t dstpitch,
160f221549cSmacallan                   int width, int height)
161f221549cSmacallan{
162f221549cSmacallan	uint32_t msk = src, mskx, dstx, m;
163f221549cSmacallan	int line, x, i;
164665b72ddSmacallan#ifdef SX_RENDER_VERBOSE
165f221549cSmacallan	char buffer[256];
166f221549cSmacallan#endif
167f221549cSmacallan	ENTER;
168f221549cSmacallan
169f221549cSmacallan	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
170f221549cSmacallan	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
171f221549cSmacallan	    *(uint32_t *)(p->fb + p->srcoff));
172f221549cSmacallan	for (line = 0; line < height; line++) {
173f221549cSmacallan		mskx = msk;
174f221549cSmacallan		dstx = dst;
175f221549cSmacallan#ifndef SX_SINGLE
176f221549cSmacallan		int rest;
17778cb1511Smacallan		for (x = 0; x < width; x += 4) {
178f221549cSmacallan			rest = width - x;
17978cb1511Smacallan			/* fetch 4 mask values */
18072fd264fSmacallan			sxm(SX_LDB, mskx, 12, 3);
18178cb1511Smacallan			/* fetch destination pixels */
18272fd264fSmacallan			sxm(SX_LDUQ0, dstx, 60, 3);
18378cb1511Smacallan			/* duplicate them for all channels */
184230e26c7Smacallan			sxi(SX_ORS, 0, 13, 16, 3);
185230e26c7Smacallan			sxi(SX_ORS, 0, 14, 20, 3);
186230e26c7Smacallan			sxi(SX_ORS, 0, 15, 24, 3);
187230e26c7Smacallan			sxi(SX_ORS, 0, 12, 13, 2);
18878cb1511Smacallan			/* generate inverted alpha */
189230e26c7Smacallan			sxi(SX_XORS, 12, 8, 28, 15);
19078cb1511Smacallan			/* multiply source */
191230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 12, 44, 3);
192230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 16, 48, 3);
193230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 20, 52, 3);
194230e26c7Smacallan			sxi(SX_MUL16X16SR8, 8, 24, 56, 3);
19578cb1511Smacallan			/* multiply dest */
196230e26c7Smacallan			sxi(SX_MUL16X16SR8, 28, 60, 76, 15);
19778cb1511Smacallan			/* add up */
198230e26c7Smacallan			sxi(SX_ADDV, 44, 76, 92, 15);
19978cb1511Smacallan			/* write back */
200f221549cSmacallan			if (rest < 4) {
20172fd264fSmacallan				sxm(SX_STUQ0C, dstx, 92, rest - 1);
202f221549cSmacallan			} else {
20372fd264fSmacallan				sxm(SX_STUQ0C, dstx, 92, 3);
204f221549cSmacallan			}
20578cb1511Smacallan			dstx += 16;
206f221549cSmacallan			mskx += 4;
20778cb1511Smacallan		}
208f221549cSmacallan#else /* SX_SINGLE */
209a3a2ba44Smacallan		for (x = 0; x < width; x++) {
210a3a2ba44Smacallan			m = *(volatile uint8_t *)(p->fb + mskx);
2112066ab30Smacallan#ifdef SX_RENDER_VERBOSE
212a3a2ba44Smacallan			buffer[x] = c[m >> 5];
213a3a2ba44Smacallan#endif
214a3a2ba44Smacallan			if (m == 0) {
215a3a2ba44Smacallan				/* nothing to do - all transparent */
216a3a2ba44Smacallan			} else if (m == 0xff) {
217a3a2ba44Smacallan				/* all opaque */
21872fd264fSmacallan				sxm(SX_STUQ0, dstx, 8, 0);
219a3a2ba44Smacallan			} else {
220a3a2ba44Smacallan				/* fetch alpha value, stick it into scam */
221a3a2ba44Smacallan				/* mask is in R[12:15] */
222a3a2ba44Smacallan				/*write_sx_io(p, mskx & ~7,
223a3a2ba44Smacallan				    SX_LDB(12, 0, mskx & 7));*/
224a3a2ba44Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
225a3a2ba44Smacallan				/* fetch dst pixel */
22672fd264fSmacallan				sxm(SX_LDUQ0, dstx, 20, 0);
227230e26c7Smacallan				sxi(SX_ORV, 12, 0, R_SCAM, 0);
228a3a2ba44Smacallan				/*
229a3a2ba44Smacallan				 * src * alpha + R0
230a3a2ba44Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
231a3a2ba44Smacallan				 */
232230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 9, 0, 17, 2);
233a3a2ba44Smacallan
234a3a2ba44Smacallan				/* invert SCAM */
235230e26c7Smacallan				sxi(SX_XORV, 12, 8, R_SCAM, 0);
236ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG
237230e26c7Smacallan				sxi(SX_XORV, 12, 8, 13, 0);
238a3a2ba44Smacallan#endif
239a3a2ba44Smacallan				/* dst * (1 - alpha) + R[13:15] */
240230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 21, 17, 25, 2);
24172fd264fSmacallan				sxm(SX_STUQ0C, dstx, 24, 0);
242a3a2ba44Smacallan			}
243a3a2ba44Smacallan			dstx += 4;
244a3a2ba44Smacallan			mskx += 1;
245a3a2ba44Smacallan		}
246f221549cSmacallan#endif /* SX_SINGLE */
2472066ab30Smacallan#ifdef SX_RENDER_VERBOSE
248a3a2ba44Smacallan		buffer[x] = 0;
249a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
250a3a2ba44Smacallan#endif
251a3a2ba44Smacallan		dst += dstpitch;
252a3a2ba44Smacallan		msk += srcpitch;
253a3a2ba44Smacallan	}
2542066ab30Smacallan	DONE;
255a3a2ba44Smacallan}
256a3a2ba44Smacallan
257a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p,
258a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
259a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
260a3a2ba44Smacallan                   int width, int height)
261a3a2ba44Smacallan{
262a3a2ba44Smacallan	int line;
263a3a2ba44Smacallan	uint32_t srcx, dstx;
264a3a2ba44Smacallan	int full, part, x;
265a3a2ba44Smacallan
266a3a2ba44Smacallan	ENTER;
267a3a2ba44Smacallan	full = width >> 3;	/* chunks of 8 */
268a3a2ba44Smacallan	part = width & 7;	/* leftovers */
269a3a2ba44Smacallan	/* we do this up to 8 pixels at a time */
270a3a2ba44Smacallan	for (line = 0; line < height; line++) {
271a3a2ba44Smacallan		srcx = src;
272a3a2ba44Smacallan		dstx = dst;
273a3a2ba44Smacallan		for (x = 0; x < full; x++) {
27472fd264fSmacallan			sxm(SX_LDUQ0, srcx, 8, 31);
27572fd264fSmacallan			sxm(SX_LDUQ0, dstx, 40, 31);
276230e26c7Smacallan			sxi(SX_ADDV, 8, 40, 72, 15);
277230e26c7Smacallan			sxi(SX_ADDV, 24, 56, 88, 15);
27872fd264fSmacallan			sxm(SX_STUQ0, dstx, 72, 31);
279a3a2ba44Smacallan			srcx += 128;
280a3a2ba44Smacallan			dstx += 128;
281a3a2ba44Smacallan		}
282a3a2ba44Smacallan
283a3a2ba44Smacallan		/* do leftovers */
28472fd264fSmacallan		sxm(SX_LDUQ0, srcx, 8, part - 1);
28572fd264fSmacallan		sxm(SX_LDUQ0, dstx, 40, part - 1);
286a3a2ba44Smacallan		if (part & 16) {
287230e26c7Smacallan			sxi(SX_ADDV, 8, 40, 72, 15);
288230e26c7Smacallan			sxi(SX_ADDV, 24, 56, 88, part - 17);
289a3a2ba44Smacallan		} else {
290230e26c7Smacallan			sxi(SX_ADDV, 8, 40, 72, part - 1);
291a3a2ba44Smacallan		}
29272fd264fSmacallan		sxm(SX_STUQ0, dstx, 72, part - 1);
293a3a2ba44Smacallan
294a3a2ba44Smacallan		/* next line */
295a3a2ba44Smacallan		src += srcpitch;
296a3a2ba44Smacallan		dst += dstpitch;
297a3a2ba44Smacallan	}
298a3a2ba44Smacallan}
299a3a2ba44Smacallan
300a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p,
301a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
302a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
303a3a2ba44Smacallan                   int width, int height)
304a3a2ba44Smacallan{
305a3a2ba44Smacallan	int line;
306a3a2ba44Smacallan	uint32_t srcx, dstx, srcoff, dstoff;
307a3a2ba44Smacallan	int pre, full, part, x;
308a3a2ba44Smacallan	uint8_t *d;
3092066ab30Smacallan#ifdef SX_RENDER_VERBOSE
310a3a2ba44Smacallan	char buffer[256];
3112066ab30Smacallan#endif
312a3a2ba44Smacallan	ENTER;
313a3a2ba44Smacallan
314a3a2ba44Smacallan	srcoff = src & 7;
315a3a2ba44Smacallan	src &= ~7;
316a3a2ba44Smacallan	dstoff = dst & 7;
317a3a2ba44Smacallan	dst &= ~7;
318a3a2ba44Smacallan	full = width >> 5;	/* chunks of 32 */
319a3a2ba44Smacallan	part = width & 31;	/* leftovers */
320a3a2ba44Smacallan
321ad6af7a7Smacallan#ifdef SX_RENDER_DEBUG
322a3a2ba44Smacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
323a3a2ba44Smacallan	    width, height, full, part);
324a3a2ba44Smacallan#endif
325a3a2ba44Smacallan	/* we do this up to 32 pixels at a time */
326a3a2ba44Smacallan	for (line = 0; line < height; line++) {
327a3a2ba44Smacallan		srcx = src;
328a3a2ba44Smacallan		dstx = dst;
329a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE
330a3a2ba44Smacallan		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
331a3a2ba44Smacallan		d = (uint8_t *)(p->fb + dstx + dstoff);
332a3a2ba44Smacallan		for (x = 0; x < width; x++) {
333a3a2ba44Smacallan			d[x] = min(255, s[x] + d[x]);
334a3a2ba44Smacallan		}
335a3a2ba44Smacallan#else
336a3a2ba44Smacallan		for (x = 0; x < full; x++) {
337a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
338a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
339230e26c7Smacallan			sxi(SX_ADDV, 8, 40, 72, 15);
340230e26c7Smacallan			sxi(SX_ADDV, 24, 56, 88, 15);
341a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
342a3a2ba44Smacallan			srcx += 32;
343a3a2ba44Smacallan			dstx += 32;
344a3a2ba44Smacallan		}
345a3a2ba44Smacallan
346a3a2ba44Smacallan		if (part > 0) {
347a3a2ba44Smacallan			/* do leftovers */
348a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
349a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
350a3a2ba44Smacallan			if (part > 16) {
351230e26c7Smacallan				sxi(SX_ADDV, 8, 40, 72, 15);
352230e26c7Smacallan				sxi(SX_ADDV, 24, 56, 88, part - 17);
353a3a2ba44Smacallan			} else {
354230e26c7Smacallan				sxi(SX_ADDV, 8, 40, 72, part - 1);
355a3a2ba44Smacallan			}
356a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
357a3a2ba44Smacallan		}
358a3a2ba44Smacallan#endif
3592066ab30Smacallan#ifdef SX_RENDER_VERBOSE
360d71cb32dSmacallan		d = (uint8_t *)(p->fb + src + srcoff);
361d71cb32dSmacallan		for (x = 0; x < width; x++) {
362d71cb32dSmacallan			buffer[x] = c[d[x]>>5];
363d71cb32dSmacallan		}
364d71cb32dSmacallan		buffer[x] = 0;
365d71cb32dSmacallan		xf86Msg(X_ERROR, "%s\n", buffer);
366d71cb32dSmacallan#endif
367d71cb32dSmacallan		/* next line */
368d71cb32dSmacallan		src += srcpitch;
369d71cb32dSmacallan		dst += dstpitch;
370d71cb32dSmacallan	}
371d71cb32dSmacallan}
372d71cb32dSmacallan
373d71cb32dSmacallanvoid CG14Comp_Add8_32(Cg14Ptr p,
374d71cb32dSmacallan                   uint32_t src, uint32_t srcpitch,
375d71cb32dSmacallan                   uint32_t dst, uint32_t dstpitch,
376d71cb32dSmacallan                   int width, int height)
377d71cb32dSmacallan{
378d71cb32dSmacallan	int line;
379d71cb32dSmacallan	uint32_t srcx, dstx, srcoff, dstoff;
380d71cb32dSmacallan	int pre, full, part, x;
381d71cb32dSmacallan	uint8_t *d;
3822066ab30Smacallan#ifdef SX_RENDER_VERBOSE
383d71cb32dSmacallan	char buffer[256];
3842066ab30Smacallan#endif
385d71cb32dSmacallan	ENTER;
386d71cb32dSmacallan
387d71cb32dSmacallan	srcoff = src & 7;
388d71cb32dSmacallan	src &= ~7;
389d71cb32dSmacallan	dstoff = dst & 7;
390d71cb32dSmacallan	dst &= ~7;
391d71cb32dSmacallan	full = width >> 5;	/* chunks of 32 */
392d71cb32dSmacallan	part = width & 31;	/* leftovers */
393d71cb32dSmacallan
3943a2c2bcaSmacallan#ifdef SX_RENDER_DEBUG
395d71cb32dSmacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
396d71cb32dSmacallan	    width, height, full, part);
397d71cb32dSmacallan#endif
398d71cb32dSmacallan	/* we do this up to 32 pixels at a time */
399d71cb32dSmacallan	for (line = 0; line < height; line++) {
400d71cb32dSmacallan		srcx = src;
401d71cb32dSmacallan		dstx = dst;
402d71cb32dSmacallan		for (x = 0; x < full; x++) {
403d71cb32dSmacallan			/* load source bytes */
404d71cb32dSmacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
405d71cb32dSmacallan			/* load alpha from destination */
406d71cb32dSmacallan			write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff));
407230e26c7Smacallan			sxi(SX_ADDV, 8, 40, 72, 15);
408230e26c7Smacallan			sxi(SX_ADDV, 24, 56, 88, 15);
409d71cb32dSmacallan			/* write clamped values back into dest alpha */
410d71cb32dSmacallan			write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff));
411d71cb32dSmacallan			srcx += 32;
412d71cb32dSmacallan			dstx += 128;
413d71cb32dSmacallan		}
414d71cb32dSmacallan
415d71cb32dSmacallan		if (part > 0) {
416d71cb32dSmacallan			/* do leftovers */
417d71cb32dSmacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
418d71cb32dSmacallan			write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff));
419d71cb32dSmacallan			if (part > 16) {
420230e26c7Smacallan				sxi(SX_ADDV, 8, 40, 72, 15);
421230e26c7Smacallan				sxi(SX_ADDV, 24, 56, 88, part - 17);
422d71cb32dSmacallan			} else {
423230e26c7Smacallan				sxi(SX_ADDV, 8, 40, 72, part - 1);
424d71cb32dSmacallan			}
425d71cb32dSmacallan			write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff));
426d71cb32dSmacallan		}
4272066ab30Smacallan#ifdef SX_RENDER_VERBOSE
428a3a2ba44Smacallan		d = (uint8_t *)(p->fb + src + srcoff);
429a3a2ba44Smacallan		for (x = 0; x < width; x++) {
430a3a2ba44Smacallan			buffer[x] = c[d[x]>>5];
431a3a2ba44Smacallan		}
432a3a2ba44Smacallan		buffer[x] = 0;
433a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
434a3a2ba44Smacallan#endif
435a3a2ba44Smacallan		/* next line */
436a3a2ba44Smacallan		src += srcpitch;
437a3a2ba44Smacallan		dst += dstpitch;
438a3a2ba44Smacallan	}
439a3a2ba44Smacallan}
440a3a2ba44Smacallan
441a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p,
442a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
443a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
444e311bbeeSmacallan                   int width, int height, int flip)
445a3a2ba44Smacallan{
44678d1a11bSmacallan	uint32_t srcx, dstx, mskx, m;
44778d1a11bSmacallan	int line, x, i, num;
448a3a2ba44Smacallan
449a3a2ba44Smacallan	ENTER;
450a3a2ba44Smacallan
451a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
452a3a2ba44Smacallan	for (line = 0; line < height; line++) {
453a3a2ba44Smacallan		srcx = src;
454a3a2ba44Smacallan		dstx = dst;
455a3a2ba44Smacallan
45678d1a11bSmacallan		for (x = 0; x < width; x += 4) {
45778d1a11bSmacallan			/* we do up to 4 pixels at a time */
45878d1a11bSmacallan			num = min(4, width - x);
45978d1a11bSmacallan			if (num <= 0) {
46078d1a11bSmacallan				xf86Msg(X_ERROR, "wtf?!\n");
46178d1a11bSmacallan				continue;
46278d1a11bSmacallan			}
46378d1a11bSmacallan			/* fetch source pixels */
46472fd264fSmacallan			sxm(SX_LDUQ0, srcx, 12, num - 1);
465e311bbeeSmacallan			if (flip) {
466230e26c7Smacallan				sxi(SX_GATHER, 13, 4, 40, num - 1);
467230e26c7Smacallan				sxi(SX_GATHER, 15, 4, 44, num - 1);
468230e26c7Smacallan				sxi(SX_SCATTER, 40, 4, 15, num - 1);
469230e26c7Smacallan				sxi(SX_SCATTER, 44, 4, 13, num - 1);
47078d1a11bSmacallan			}
47178d1a11bSmacallan			/* fetch dst pixels */
47272fd264fSmacallan			sxm(SX_LDUQ0, dstx, 44, num - 1);
47378d1a11bSmacallan			/* now process up to 4 pixels */
47478d1a11bSmacallan			for (i = 0; i < num; i++) {
47578d1a11bSmacallan				int ii = i << 2;
47678d1a11bSmacallan				/* write inverted alpha into SCAM */
477230e26c7Smacallan				sxi(SX_XORS, 12 + ii, 8, R_SCAM, 0);
47878d1a11bSmacallan				/* dst * (1 - alpha) + src */
479230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 44 + ii, 12 + ii, 76 + ii, 3);
480e311bbeeSmacallan			}
48172fd264fSmacallan			sxm(SX_STUQ0C, dstx, 76, num - 1);
48278d1a11bSmacallan			srcx += 16;
48378d1a11bSmacallan			dstx += 16;
484a3a2ba44Smacallan		}
485a3a2ba44Smacallan		src += srcpitch;
48678d1a11bSmacallan		dst += dstpitch;
487a3a2ba44Smacallan	}
488a3a2ba44Smacallan}
489a3a2ba44Smacallan
490a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p,
491a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
492a3a2ba44Smacallan                   uint32_t msk, uint32_t mskpitch,
493a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
494e311bbeeSmacallan                   int width, int height, int flip)
495a3a2ba44Smacallan{
496a3a2ba44Smacallan	uint32_t srcx, dstx, mskx, m;
49778d1a11bSmacallan	int line, x, i, num;
498a3a2ba44Smacallan
499a3a2ba44Smacallan	ENTER;
500a3a2ba44Smacallan
501a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
502a3a2ba44Smacallan	for (line = 0; line < height; line++) {
503a3a2ba44Smacallan		srcx = src;
504a3a2ba44Smacallan		mskx = msk;
505a3a2ba44Smacallan		dstx = dst;
506a3a2ba44Smacallan
50778d1a11bSmacallan		for (x = 0; x < width; x += 4) {
50878d1a11bSmacallan			/* we do up to 4 pixels at a time */
50978d1a11bSmacallan			num = min(4, width - x);
51078d1a11bSmacallan			if (num <= 0) {
51178d1a11bSmacallan				xf86Msg(X_ERROR, "wtf?!\n");
51278d1a11bSmacallan				continue;
51378d1a11bSmacallan			}
51478d1a11bSmacallan			/* fetch source pixels */
51572fd264fSmacallan			sxm(SX_LDUQ0, srcx, 12, num - 1);
516e311bbeeSmacallan			if (flip) {
517230e26c7Smacallan				sxi(SX_GATHER, 13, 4, 40, num - 1);
518230e26c7Smacallan				sxi(SX_GATHER, 15, 4, 44, num - 1);
519230e26c7Smacallan				sxi(SX_SCATTER, 40, 4, 15, num - 1);
520230e26c7Smacallan				sxi(SX_SCATTER, 44, 4, 13, num - 1);
521e311bbeeSmacallan			}
522a3a2ba44Smacallan			/* fetch mask */
52372fd264fSmacallan			sxm(SX_LDB, mskx, 28, num - 1);
52478d1a11bSmacallan			/* fetch dst pixels */
52572fd264fSmacallan			sxm(SX_LDUQ0, dstx, 44, num - 1);
52678d1a11bSmacallan			/* now process up to 4 pixels */
52778d1a11bSmacallan			for (i = 0; i < num; i++) {
52878d1a11bSmacallan				int ii = i << 2;
52978d1a11bSmacallan				/* mask alpha to SCAM */
530230e26c7Smacallan				sxi(SX_ORS, 28 + i, 0, R_SCAM, 0);
53178d1a11bSmacallan				/* src * alpha */
532230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3);
53378d1a11bSmacallan				/* write inverted alpha into SCAM */
534230e26c7Smacallan				sxi(SX_XORS, 28 + i, 8, R_SCAM, 0);
53578d1a11bSmacallan				/* dst * (1 - alpha) + R[60:] */
536230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3);
53778d1a11bSmacallan			}
53872fd264fSmacallan			sxm(SX_STUQ0C, dstx, 76, num - 1);
53978d1a11bSmacallan			srcx += 16;
54078d1a11bSmacallan			mskx += 4;
54178d1a11bSmacallan			dstx += 16;
542a3a2ba44Smacallan		}
543a3a2ba44Smacallan		src += srcpitch;
544a3a2ba44Smacallan		msk += mskpitch;
545a3a2ba44Smacallan		dst += dstpitch;
546a3a2ba44Smacallan	}
547a3a2ba44Smacallan}
5486bdc2ffdSmacallan
5496bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p,
5506bdc2ffdSmacallan                   uint32_t src, uint32_t srcpitch,
5516bdc2ffdSmacallan                   uint32_t msk, uint32_t mskpitch,
5526bdc2ffdSmacallan                   uint32_t dst, uint32_t dstpitch,
553e311bbeeSmacallan                   int width, int height, int flip)
5546bdc2ffdSmacallan{
5556bdc2ffdSmacallan	uint32_t srcx, dstx, mskx, m;
55678d1a11bSmacallan	int line, x, i, num;
5576bdc2ffdSmacallan
5586bdc2ffdSmacallan	ENTER;
5596bdc2ffdSmacallan
5606bdc2ffdSmacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
56178d1a11bSmacallan	write_sx_reg(p, SX_QUEUED(9), 0xff);
562230e26c7Smacallan	sxi(SX_ORS, 8, 0, 10, 1);
5636bdc2ffdSmacallan	for (line = 0; line < height; line++) {
5646bdc2ffdSmacallan		srcx = src;
5656bdc2ffdSmacallan		mskx = msk;
5666bdc2ffdSmacallan		dstx = dst;
5676bdc2ffdSmacallan
56878d1a11bSmacallan		for (x = 0; x < width; x += 4) {
56978d1a11bSmacallan			/* we do up to 4 pixels at a time */
57078d1a11bSmacallan			num = min(4, width - x);
57178d1a11bSmacallan			if (num <= 0) {
57278d1a11bSmacallan				xf86Msg(X_ERROR, "wtf?!\n");
57378d1a11bSmacallan				continue;
57478d1a11bSmacallan			}
57578d1a11bSmacallan			/* fetch source pixels */
57672fd264fSmacallan			sxm(SX_LDUQ0, srcx, 12, num - 1);
577e311bbeeSmacallan			if (flip) {
578230e26c7Smacallan				sxi(SX_GATHER, 13, 4, 40, num - 1);
579230e26c7Smacallan				sxi(SX_GATHER, 15, 4, 44, num - 1);
580230e26c7Smacallan				sxi(SX_SCATTER, 40, 4, 15, num - 1);
581230e26c7Smacallan				sxi(SX_SCATTER, 44, 4, 13, num - 1);
582e311bbeeSmacallan			}
5836bdc2ffdSmacallan			/* fetch mask */
58472fd264fSmacallan			sxm(SX_LDB, mskx, 28, num - 1);
58578d1a11bSmacallan			/* fetch dst pixels */
58672fd264fSmacallan			sxm(SX_LDUQ0, dstx, 44, num - 1);
58778d1a11bSmacallan			/* set src alpha to 0xff */
588230e26c7Smacallan			sxi(SX_SCATTER, 8, 4, 12, num - 1);
58978d1a11bSmacallan			/* now process up to 4 pixels */
59078d1a11bSmacallan			for (i = 0; i < num; i++) {
59178d1a11bSmacallan				int ii = i << 2;
59278d1a11bSmacallan				/* mask alpha to SCAM */
593230e26c7Smacallan				sxi(SX_ORS, 28 + i, 0, R_SCAM, 0);
59478d1a11bSmacallan				/* src * alpha */
595230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3);
59678d1a11bSmacallan				/* write inverted alpha into SCAM */
597230e26c7Smacallan				sxi(SX_XORS, 28 + i, 8, R_SCAM, 0);
59878d1a11bSmacallan				/* dst * (1 - alpha) + R[60:] */
599230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3);
60078d1a11bSmacallan			}
60172fd264fSmacallan			sxm(SX_STUQ0C, dstx, 76, num - 1);
60278d1a11bSmacallan			srcx += 16;
60378d1a11bSmacallan			mskx += 4;
60478d1a11bSmacallan			dstx += 16;
6056bdc2ffdSmacallan		}
6066bdc2ffdSmacallan		src += srcpitch;
6076bdc2ffdSmacallan		msk += mskpitch;
6086bdc2ffdSmacallan		dst += dstpitch;
6096bdc2ffdSmacallan	}
6106bdc2ffdSmacallan}
611fa158432Smacallan
612fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p,
613fa158432Smacallan                   uint32_t src, uint32_t srcpitch,
614fa158432Smacallan                   uint32_t msk, uint32_t mskpitch,
615fa158432Smacallan                   uint32_t dst, uint32_t dstpitch,
616e311bbeeSmacallan                   int width, int height, int flip)
617fa158432Smacallan{
618fa158432Smacallan	uint32_t srcx, dstx, mskx, m;
61978d1a11bSmacallan	int line, x, i, num;
620fa158432Smacallan
621fa158432Smacallan	ENTER;
622fa158432Smacallan
623fa158432Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
62478d1a11bSmacallan	write_sx_reg(p, SX_QUEUED(9), 0xff);
625230e26c7Smacallan	sxi(SX_ORS, 8, 0, 10, 1);
626fa158432Smacallan	for (line = 0; line < height; line++) {
627fa158432Smacallan		srcx = src;
628fa158432Smacallan		mskx = msk;
629fa158432Smacallan		dstx = dst;
630fa158432Smacallan
63178d1a11bSmacallan		for (x = 0; x < width; x += 4) {
63278d1a11bSmacallan			/* we do up to 4 pixels at a time */
63378d1a11bSmacallan			num = min(4, width - x);
63478d1a11bSmacallan			if (num <= 0) {
63578d1a11bSmacallan				xf86Msg(X_ERROR, "wtf?!\n");
63678d1a11bSmacallan				continue;
63778d1a11bSmacallan			}
63878d1a11bSmacallan			/* fetch source pixels */
63972fd264fSmacallan			sxm(SX_LDUQ0, srcx, 12, num - 1);
640e311bbeeSmacallan			if (flip) {
641230e26c7Smacallan				sxi(SX_GATHER, 13, 4, 40, num - 1);
642230e26c7Smacallan				sxi(SX_GATHER, 15, 4, 44, num - 1);
643230e26c7Smacallan				sxi(SX_SCATTER, 40, 4, 15, num - 1);
644230e26c7Smacallan				sxi(SX_SCATTER, 44, 4, 13, num - 1);
645e311bbeeSmacallan			}
646fa158432Smacallan			/* fetch mask */
64772fd264fSmacallan			sxm(SX_LDUQ0, mskx, 28, num - 1);
64878d1a11bSmacallan			/* fetch dst pixels */
64972fd264fSmacallan			sxm(SX_LDUQ0, dstx, 44, num - 1);
65078d1a11bSmacallan			/* set src alpha to 0xff */
651230e26c7Smacallan			sxi(SX_SCATTER, 8, 4, 12, num - 1);
65278d1a11bSmacallan			/* now process up to 4 pixels */
65378d1a11bSmacallan			for (i = 0; i < num; i++) {
65478d1a11bSmacallan				int ii = i << 2;
65578d1a11bSmacallan				/* mask alpha to SCAM */
656230e26c7Smacallan				sxi(SX_ORS, 28 + ii, 0, R_SCAM, 0);
65778d1a11bSmacallan				/* src * alpha */
658230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 12 + ii, 0, 60 + ii, 3);
65978d1a11bSmacallan				/* write inverted alpha into SCAM */
660230e26c7Smacallan				sxi(SX_XORS, 28 + ii, 8, R_SCAM, 0);
66178d1a11bSmacallan				/* dst * (1 - alpha) + R[60:] */
662230e26c7Smacallan				sxi(SX_SAXP16X16SR8, 44 + ii, 60 + ii, 76 + ii, 3);
66378d1a11bSmacallan			}
66472fd264fSmacallan			sxm(SX_STUQ0C, dstx, 76, num - 1);
66578d1a11bSmacallan			srcx += 16;
66678d1a11bSmacallan			mskx += 16;
66778d1a11bSmacallan			dstx += 16;
668fa158432Smacallan		}
669fa158432Smacallan		src += srcpitch;
670fa158432Smacallan		msk += mskpitch;
671fa158432Smacallan		dst += dstpitch;
672fa158432Smacallan	}
673fa158432Smacallan}
674