cg14_render.c revision d71cb32d
1d71cb32dSmacallan/* $NetBSD: cg14_render.c,v 1.10 2017/10/30 22:09:54 macallan Exp $ */
2a3a2ba44Smacallan/*
3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz
4a3a2ba44Smacallan * All rights reserved.
5a3a2ba44Smacallan *
6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without
7a3a2ba44Smacallan * modification, are permitted provided that the following conditions
8a3a2ba44Smacallan * are met:
9a3a2ba44Smacallan *
10a3a2ba44Smacallan *    - Redistributions of source code must retain the above copyright
11a3a2ba44Smacallan *      notice, this list of conditions and the following disclaimer.
12a3a2ba44Smacallan *    - Redistributions in binary form must reproduce the above
13a3a2ba44Smacallan *      copyright notice, this list of conditions and the following
14a3a2ba44Smacallan *      disclaimer in the documentation and/or other materials provided
15a3a2ba44Smacallan *      with the distribution.
16a3a2ba44Smacallan *
17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE.
29a3a2ba44Smacallan *
30a3a2ba44Smacallan */
31a3a2ba44Smacallan
32c88c16f8Smacallan#ifdef HAVE_CONFIG_H
33c88c16f8Smacallan#include "config.h"
34c88c16f8Smacallan#endif
35c88c16f8Smacallan
36a3a2ba44Smacallan#include <sys/types.h>
37a3a2ba44Smacallan
38a3a2ba44Smacallan/* all driver need this */
39a3a2ba44Smacallan#include "xf86.h"
40a3a2ba44Smacallan#include "xf86_OSproc.h"
41a3a2ba44Smacallan#include "compiler.h"
42a3a2ba44Smacallan
43a3a2ba44Smacallan#include "cg14.h"
44a3a2ba44Smacallan#include <sparc/sxreg.h>
45a3a2ba44Smacallan
46f221549cSmacallan/*#define SX_SINGLE*/
47a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/
48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/
49a3a2ba44Smacallan
50f221549cSmacallan#ifdef SX_RENDER_DEBUG
51a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
52a3a2ba44Smacallan#define DPRINTF xf86Msg
53a3a2ba44Smacallan#else
54a3a2ba44Smacallan#define ENTER
55a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg
56a3a2ba44Smacallan#endif
57a3a2ba44Smacallan
58a3a2ba44Smacallanchar c[8] = " .,:+*oX";
59a3a2ba44Smacallan
6078cb1511Smacallan
6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p,
6278cb1511Smacallan                   uint32_t src, uint32_t srcpitch,
6378cb1511Smacallan                   uint32_t dst, uint32_t dstpitch,
6478cb1511Smacallan                   int width, int height)
6578cb1511Smacallan{
6678cb1511Smacallan	uint32_t msk = src, mskx, dstx, m;
6778cb1511Smacallan	int line, x, i;
6878cb1511Smacallan
6978cb1511Smacallan	ENTER;
70f7cb851fSmacallan
7178cb1511Smacallan	for (line = 0; line < height; line++) {
7278cb1511Smacallan		mskx = msk;
7378cb1511Smacallan		dstx = dst;
74f221549cSmacallan#ifndef SX_SINGLE
75f221549cSmacallan		int rest;
76f221549cSmacallan		for (x = 0; x < width; x += 4) {
77f221549cSmacallan			rest = width - x;
78f221549cSmacallan			/* fetch 4 mask values */
79f221549cSmacallan			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
80f221549cSmacallan			/* fetch destination pixels */
81f221549cSmacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
82f221549cSmacallan			/* duplicate them for all channels */
83f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
84f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
85f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
86f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
87f221549cSmacallan			/* generate inverted alpha */
88f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
89f221549cSmacallan			    SX_XORS(12, 8, 28, 15));
90f221549cSmacallan			/* multiply source */
91f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
92f221549cSmacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
93f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
94f221549cSmacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
95f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
96f221549cSmacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
97f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
98f221549cSmacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
99f221549cSmacallan			/* multiply dest */
100f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
101f221549cSmacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
102f221549cSmacallan			/* add up */
103f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
104f221549cSmacallan			    SX_ADDV(44, 76, 92, 15));
105f221549cSmacallan			/* write back */
106f221549cSmacallan			if (rest < 4) {
107f221549cSmacallan				write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
108f221549cSmacallan			} else {
109f221549cSmacallan				write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
110f221549cSmacallan			}
111f221549cSmacallan			dstx += 16;
112f221549cSmacallan			mskx += 16;
113f221549cSmacallan		}
114f221549cSmacallan#else /* SX_SINGLE */
11578cb1511Smacallan		for (x = 0; x < width; x++) {
11678cb1511Smacallan			m = *(volatile uint32_t *)(p->fb + mskx);
11778cb1511Smacallan			m = m >> 24;
11878cb1511Smacallan			if (m == 0) {
11978cb1511Smacallan				/* nothing to do - all transparent */
12078cb1511Smacallan			} else if (m == 0xff) {
12178cb1511Smacallan				/* all opaque */
12278cb1511Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
12378cb1511Smacallan			} else {
12478cb1511Smacallan				/* fetch alpha value, stick it into scam */
12578cb1511Smacallan				/* mask is in R[12:15] */
12678cb1511Smacallan				/*write_sx_io(p, mskx,
12778cb1511Smacallan				    SX_LDUQ0(12, 0, mskx & 7));*/
12878cb1511Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
12978cb1511Smacallan				/* fetch dst pixel */
13078cb1511Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
13178cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
13278cb1511Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
13378cb1511Smacallan				/*
13478cb1511Smacallan				 * src * alpha + R0
13578cb1511Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
13678cb1511Smacallan				 */
13778cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
13878cb1511Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
13978cb1511Smacallan
14078cb1511Smacallan				/* invert SCAM */
14178cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
14278cb1511Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
14378cb1511Smacallan#ifdef SX_DEBUG
14478cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
14578cb1511Smacallan				    SX_XORV(12, 8, 13, 0));
14678cb1511Smacallan#endif
14778cb1511Smacallan				/* dst * (1 - alpha) + R[13:15] */
14878cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
14978cb1511Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
15078cb1511Smacallan				write_sx_io(p, dstx,
15178cb1511Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
15278cb1511Smacallan			}
15378cb1511Smacallan			dstx += 4;
15478cb1511Smacallan			mskx += 4;
15578cb1511Smacallan		}
156f221549cSmacallan#endif /* SX_SINGLE */
157f221549cSmacallan		dst += dstpitch;
158f221549cSmacallan		msk += srcpitch;
159f221549cSmacallan	}
160f221549cSmacallan}
161f221549cSmacallan
162f221549cSmacallanvoid CG14Comp_Over8Solid(Cg14Ptr p,
163f221549cSmacallan                   uint32_t src, uint32_t srcpitch,
164f221549cSmacallan                   uint32_t dst, uint32_t dstpitch,
165f221549cSmacallan                   int width, int height)
166f221549cSmacallan{
167f221549cSmacallan	uint32_t msk = src, mskx, dstx, m;
168f221549cSmacallan	int line, x, i;
169f221549cSmacallan#ifdef SX_DEBUG
170f221549cSmacallan	char buffer[256];
171f221549cSmacallan#endif
172f221549cSmacallan	ENTER;
173f221549cSmacallan
174f221549cSmacallan	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
175f221549cSmacallan	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
176f221549cSmacallan	    *(uint32_t *)(p->fb + p->srcoff));
177f221549cSmacallan	for (line = 0; line < height; line++) {
178f221549cSmacallan		mskx = msk;
179f221549cSmacallan		dstx = dst;
180f221549cSmacallan#ifndef SX_SINGLE
181f221549cSmacallan		int rest;
18278cb1511Smacallan		for (x = 0; x < width; x += 4) {
183f221549cSmacallan			rest = width - x;
18478cb1511Smacallan			/* fetch 4 mask values */
185f221549cSmacallan			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
18678cb1511Smacallan			/* fetch destination pixels */
18778cb1511Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
18878cb1511Smacallan			/* duplicate them for all channels */
189f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3));
190f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3));
191f221549cSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3));
1926bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
19378cb1511Smacallan			/* generate inverted alpha */
19478cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
19578cb1511Smacallan			    SX_XORS(12, 8, 28, 15));
19678cb1511Smacallan			/* multiply source */
19778cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
19878cb1511Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
19978cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
20078cb1511Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
20178cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
20278cb1511Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
20378cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
20478cb1511Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
20578cb1511Smacallan			/* multiply dest */
20678cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
20778cb1511Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
20878cb1511Smacallan			/* add up */
20978cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
21078cb1511Smacallan			    SX_ADDV(44, 76, 92, 15));
21178cb1511Smacallan			/* write back */
212f221549cSmacallan			if (rest < 4) {
213f221549cSmacallan				write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
214f221549cSmacallan			} else {
215f221549cSmacallan				write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
216f221549cSmacallan			}
21778cb1511Smacallan			dstx += 16;
218f221549cSmacallan			mskx += 4;
21978cb1511Smacallan		}
220f221549cSmacallan#else /* SX_SINGLE */
221a3a2ba44Smacallan		for (x = 0; x < width; x++) {
222a3a2ba44Smacallan			m = *(volatile uint8_t *)(p->fb + mskx);
223a3a2ba44Smacallan#ifdef SX_DEBUG
224a3a2ba44Smacallan			buffer[x] = c[m >> 5];
225a3a2ba44Smacallan#endif
226a3a2ba44Smacallan			if (m == 0) {
227a3a2ba44Smacallan				/* nothing to do - all transparent */
228a3a2ba44Smacallan			} else if (m == 0xff) {
229a3a2ba44Smacallan				/* all opaque */
230a3a2ba44Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
231a3a2ba44Smacallan			} else {
232a3a2ba44Smacallan				/* fetch alpha value, stick it into scam */
233a3a2ba44Smacallan				/* mask is in R[12:15] */
234a3a2ba44Smacallan				/*write_sx_io(p, mskx & ~7,
235a3a2ba44Smacallan				    SX_LDB(12, 0, mskx & 7));*/
236a3a2ba44Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
237a3a2ba44Smacallan				/* fetch dst pixel */
238a3a2ba44Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
239a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
240a3a2ba44Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
241a3a2ba44Smacallan				/*
242a3a2ba44Smacallan				 * src * alpha + R0
243a3a2ba44Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
244a3a2ba44Smacallan				 */
245a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
246a3a2ba44Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
247a3a2ba44Smacallan
248a3a2ba44Smacallan				/* invert SCAM */
249a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
250a3a2ba44Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
251a3a2ba44Smacallan#ifdef SX_DEBUG
252a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
253a3a2ba44Smacallan				    SX_XORV(12, 8, 13, 0));
254a3a2ba44Smacallan#endif
255a3a2ba44Smacallan				/* dst * (1 - alpha) + R[13:15] */
256a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
257a3a2ba44Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
258a3a2ba44Smacallan				write_sx_io(p, dstx,
259a3a2ba44Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
260a3a2ba44Smacallan			}
261a3a2ba44Smacallan			dstx += 4;
262a3a2ba44Smacallan			mskx += 1;
263a3a2ba44Smacallan		}
264f221549cSmacallan#endif /* SX_SINGLE */
265a3a2ba44Smacallan#ifdef SX_DEBUG
266a3a2ba44Smacallan		buffer[x] = 0;
267a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
268a3a2ba44Smacallan#endif
269a3a2ba44Smacallan		dst += dstpitch;
270a3a2ba44Smacallan		msk += srcpitch;
271a3a2ba44Smacallan	}
272a3a2ba44Smacallan}
273a3a2ba44Smacallan
274a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p,
275a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
276a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
277a3a2ba44Smacallan                   int width, int height)
278a3a2ba44Smacallan{
279a3a2ba44Smacallan	int line;
280a3a2ba44Smacallan	uint32_t srcx, dstx;
281a3a2ba44Smacallan	int full, part, x;
282a3a2ba44Smacallan
283a3a2ba44Smacallan	ENTER;
284a3a2ba44Smacallan	full = width >> 3;	/* chunks of 8 */
285a3a2ba44Smacallan	part = width & 7;	/* leftovers */
286a3a2ba44Smacallan	/* we do this up to 8 pixels at a time */
287a3a2ba44Smacallan	for (line = 0; line < height; line++) {
288a3a2ba44Smacallan		srcx = src;
289a3a2ba44Smacallan		dstx = dst;
290a3a2ba44Smacallan		for (x = 0; x < full; x++) {
291a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
292a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
293a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
294a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
295a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
296a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
297a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
298a3a2ba44Smacallan			srcx += 128;
299a3a2ba44Smacallan			dstx += 128;
300a3a2ba44Smacallan		}
301a3a2ba44Smacallan
302a3a2ba44Smacallan		/* do leftovers */
303a3a2ba44Smacallan		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
304a3a2ba44Smacallan		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
305a3a2ba44Smacallan		if (part & 16) {
306a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
307a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
308a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
309a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, part - 17));
310a3a2ba44Smacallan		} else {
311a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
312a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, part - 1));
313a3a2ba44Smacallan		}
314a3a2ba44Smacallan		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
315a3a2ba44Smacallan
316a3a2ba44Smacallan		/* next line */
317a3a2ba44Smacallan		src += srcpitch;
318a3a2ba44Smacallan		dst += dstpitch;
319a3a2ba44Smacallan	}
320a3a2ba44Smacallan}
321a3a2ba44Smacallan
322a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p,
323a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
324a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
325a3a2ba44Smacallan                   int width, int height)
326a3a2ba44Smacallan{
327a3a2ba44Smacallan	int line;
328a3a2ba44Smacallan	uint32_t srcx, dstx, srcoff, dstoff;
329a3a2ba44Smacallan	int pre, full, part, x;
330a3a2ba44Smacallan	uint8_t *d;
331a3a2ba44Smacallan	char buffer[256];
332a3a2ba44Smacallan	ENTER;
333a3a2ba44Smacallan
334a3a2ba44Smacallan	srcoff = src & 7;
335a3a2ba44Smacallan	src &= ~7;
336a3a2ba44Smacallan	dstoff = dst & 7;
337a3a2ba44Smacallan	dst &= ~7;
338a3a2ba44Smacallan	full = width >> 5;	/* chunks of 32 */
339a3a2ba44Smacallan	part = width & 31;	/* leftovers */
340a3a2ba44Smacallan
341a3a2ba44Smacallan#ifdef SX_DEBUG
342a3a2ba44Smacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
343a3a2ba44Smacallan	    width, height, full, part);
344a3a2ba44Smacallan#endif
345a3a2ba44Smacallan	/* we do this up to 32 pixels at a time */
346a3a2ba44Smacallan	for (line = 0; line < height; line++) {
347a3a2ba44Smacallan		srcx = src;
348a3a2ba44Smacallan		dstx = dst;
349a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE
350a3a2ba44Smacallan		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
351a3a2ba44Smacallan		d = (uint8_t *)(p->fb + dstx + dstoff);
352a3a2ba44Smacallan		for (x = 0; x < width; x++) {
353a3a2ba44Smacallan			d[x] = min(255, s[x] + d[x]);
354a3a2ba44Smacallan		}
355a3a2ba44Smacallan#else
356a3a2ba44Smacallan		for (x = 0; x < full; x++) {
357a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
358a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
359a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
360a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
361a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
362a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
363a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
364a3a2ba44Smacallan			srcx += 32;
365a3a2ba44Smacallan			dstx += 32;
366a3a2ba44Smacallan		}
367a3a2ba44Smacallan
368a3a2ba44Smacallan		if (part > 0) {
369a3a2ba44Smacallan			/* do leftovers */
370a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
371a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
372a3a2ba44Smacallan			if (part > 16) {
373a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
374a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, 15));
375a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
376a3a2ba44Smacallan				    SX_ADDV(24, 56, 88, part - 17));
377a3a2ba44Smacallan			} else {
378a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
379a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, part - 1));
380a3a2ba44Smacallan			}
381a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
382a3a2ba44Smacallan		}
383a3a2ba44Smacallan#endif
384d71cb32dSmacallan#ifdef SX_DEBUG
385d71cb32dSmacallan		d = (uint8_t *)(p->fb + src + srcoff);
386d71cb32dSmacallan		for (x = 0; x < width; x++) {
387d71cb32dSmacallan			buffer[x] = c[d[x]>>5];
388d71cb32dSmacallan		}
389d71cb32dSmacallan		buffer[x] = 0;
390d71cb32dSmacallan		xf86Msg(X_ERROR, "%s\n", buffer);
391d71cb32dSmacallan#endif
392d71cb32dSmacallan		/* next line */
393d71cb32dSmacallan		src += srcpitch;
394d71cb32dSmacallan		dst += dstpitch;
395d71cb32dSmacallan	}
396d71cb32dSmacallan}
397d71cb32dSmacallan
398d71cb32dSmacallanvoid CG14Comp_Add8_32(Cg14Ptr p,
399d71cb32dSmacallan                   uint32_t src, uint32_t srcpitch,
400d71cb32dSmacallan                   uint32_t dst, uint32_t dstpitch,
401d71cb32dSmacallan                   int width, int height)
402d71cb32dSmacallan{
403d71cb32dSmacallan	int line;
404d71cb32dSmacallan	uint32_t srcx, dstx, srcoff, dstoff;
405d71cb32dSmacallan	int pre, full, part, x;
406d71cb32dSmacallan	uint8_t *d;
407d71cb32dSmacallan	char buffer[256];
408d71cb32dSmacallan	ENTER;
409d71cb32dSmacallan
410d71cb32dSmacallan	srcoff = src & 7;
411d71cb32dSmacallan	src &= ~7;
412d71cb32dSmacallan	dstoff = dst & 7;
413d71cb32dSmacallan	dst &= ~7;
414d71cb32dSmacallan	full = width >> 5;	/* chunks of 32 */
415d71cb32dSmacallan	part = width & 31;	/* leftovers */
416d71cb32dSmacallan
417d71cb32dSmacallan#ifdef SX_DEBUG
418d71cb32dSmacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
419d71cb32dSmacallan	    width, height, full, part);
420d71cb32dSmacallan#endif
421d71cb32dSmacallan	/* we do this up to 32 pixels at a time */
422d71cb32dSmacallan	for (line = 0; line < height; line++) {
423d71cb32dSmacallan		srcx = src;
424d71cb32dSmacallan		dstx = dst;
425d71cb32dSmacallan		for (x = 0; x < full; x++) {
426d71cb32dSmacallan			/* load source bytes */
427d71cb32dSmacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
428d71cb32dSmacallan			/* load alpha from destination */
429d71cb32dSmacallan			write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff));
430d71cb32dSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
431d71cb32dSmacallan			    SX_ADDV(8, 40, 72, 15));
432d71cb32dSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
433d71cb32dSmacallan			    SX_ADDV(24, 56, 88, 15));
434d71cb32dSmacallan			/* write clamped values back into dest alpha */
435d71cb32dSmacallan			write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff));
436d71cb32dSmacallan			srcx += 32;
437d71cb32dSmacallan			dstx += 128;
438d71cb32dSmacallan		}
439d71cb32dSmacallan
440d71cb32dSmacallan		if (part > 0) {
441d71cb32dSmacallan			/* do leftovers */
442d71cb32dSmacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
443d71cb32dSmacallan			write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff));
444d71cb32dSmacallan			if (part > 16) {
445d71cb32dSmacallan				write_sx_reg(p, SX_INSTRUCTIONS,
446d71cb32dSmacallan				    SX_ADDV(8, 40, 72, 15));
447d71cb32dSmacallan				write_sx_reg(p, SX_INSTRUCTIONS,
448d71cb32dSmacallan				    SX_ADDV(24, 56, 88, part - 17));
449d71cb32dSmacallan			} else {
450d71cb32dSmacallan				write_sx_reg(p, SX_INSTRUCTIONS,
451d71cb32dSmacallan				    SX_ADDV(8, 40, 72, part - 1));
452d71cb32dSmacallan			}
453d71cb32dSmacallan			write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff));
454d71cb32dSmacallan		}
455a3a2ba44Smacallan#ifdef SX_DEBUG
456a3a2ba44Smacallan		d = (uint8_t *)(p->fb + src + srcoff);
457a3a2ba44Smacallan		for (x = 0; x < width; x++) {
458a3a2ba44Smacallan			buffer[x] = c[d[x]>>5];
459a3a2ba44Smacallan		}
460a3a2ba44Smacallan		buffer[x] = 0;
461a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
462a3a2ba44Smacallan#endif
463a3a2ba44Smacallan		/* next line */
464a3a2ba44Smacallan		src += srcpitch;
465a3a2ba44Smacallan		dst += dstpitch;
466a3a2ba44Smacallan	}
467a3a2ba44Smacallan}
468a3a2ba44Smacallan
469a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p,
470a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
471a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
472a3a2ba44Smacallan                   int width, int height)
473a3a2ba44Smacallan{
474a3a2ba44Smacallan	uint32_t srcx, dstx, m;
475a3a2ba44Smacallan	int line, x, i;
476a3a2ba44Smacallan
477a3a2ba44Smacallan	ENTER;
478a3a2ba44Smacallan
479a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
480a3a2ba44Smacallan	for (line = 0; line < height; line++) {
481a3a2ba44Smacallan		srcx = src;
482a3a2ba44Smacallan		dstx = dst;
483a3a2ba44Smacallan
484a3a2ba44Smacallan		for (x = 0; x < width; x++) {
485a3a2ba44Smacallan			/* fetch source pixel */
486a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
487a3a2ba44Smacallan			/* fetch dst pixel */
488a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
489a3a2ba44Smacallan			/* src is premultiplied with alpha */
490a3a2ba44Smacallan			/* write inverted alpha into SCAM */
491a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
492a3a2ba44Smacallan			    SX_XORV(12, 8, R_SCAM, 0));
493a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
494a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
495239808baSmacallan			    SX_SAXP16X16SR8(20, 12, 24, 3));
496a3a2ba44Smacallan			write_sx_io(p, dstx,
497a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
498a3a2ba44Smacallan			dstx += 4;
499a3a2ba44Smacallan			srcx += 4;
500a3a2ba44Smacallan		}
501a3a2ba44Smacallan		dst += dstpitch;
502a3a2ba44Smacallan		src += srcpitch;
503a3a2ba44Smacallan	}
504a3a2ba44Smacallan}
505a3a2ba44Smacallan
506a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p,
507a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
508a3a2ba44Smacallan                   uint32_t msk, uint32_t mskpitch,
509a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
510a3a2ba44Smacallan                   int width, int height)
511a3a2ba44Smacallan{
512a3a2ba44Smacallan	uint32_t srcx, dstx, mskx, m;
513a3a2ba44Smacallan	int line, x, i;
514a3a2ba44Smacallan
515a3a2ba44Smacallan	ENTER;
516a3a2ba44Smacallan
517a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
518a3a2ba44Smacallan	for (line = 0; line < height; line++) {
519a3a2ba44Smacallan		srcx = src;
520a3a2ba44Smacallan		mskx = msk;
521a3a2ba44Smacallan		dstx = dst;
522a3a2ba44Smacallan
523a3a2ba44Smacallan		for (x = 0; x < width; x++) {
524a3a2ba44Smacallan			/* fetch source pixel */
525a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
526a3a2ba44Smacallan			/* fetch mask */
527a3a2ba44Smacallan			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
528a3a2ba44Smacallan			/* fetch dst pixel */
529a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
530f7cb851fSmacallan			/* stick mask alpha into SCAM */
531a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
532f7cb851fSmacallan			    SX_ORS(9, 0, R_SCAM, 0));
533f7cb851fSmacallan			/* apply mask */
534a3a2ba44Smacallan			/* src is premultiplied with alpha */
535f7cb851fSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
536f7cb851fSmacallan			    SX_SAXP16X16SR8(12, 0, 16, 3));
537a3a2ba44Smacallan			/* write inverted alpha into SCAM */
538a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
539a3a2ba44Smacallan			    SX_XORV(16, 8, R_SCAM, 0));
540a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
541a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
542239808baSmacallan			    SX_SAXP16X16SR8(20, 16, 24, 3));
543a3a2ba44Smacallan			write_sx_io(p, dstx,
544a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
545a3a2ba44Smacallan			srcx += 4;
546a3a2ba44Smacallan			mskx += 1;
547a3a2ba44Smacallan			dstx += 4;
548a3a2ba44Smacallan		}
549a3a2ba44Smacallan		src += srcpitch;
550a3a2ba44Smacallan		msk += mskpitch;
551a3a2ba44Smacallan		dst += dstpitch;
552a3a2ba44Smacallan	}
553a3a2ba44Smacallan}
5546bdc2ffdSmacallan
5556bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p,
5566bdc2ffdSmacallan                   uint32_t src, uint32_t srcpitch,
5576bdc2ffdSmacallan                   uint32_t msk, uint32_t mskpitch,
5586bdc2ffdSmacallan                   uint32_t dst, uint32_t dstpitch,
5596bdc2ffdSmacallan                   int width, int height)
5606bdc2ffdSmacallan{
5616bdc2ffdSmacallan	uint32_t srcx, dstx, mskx, m;
5626bdc2ffdSmacallan	int line, x, i;
5636bdc2ffdSmacallan
5646bdc2ffdSmacallan	ENTER;
5656bdc2ffdSmacallan
5666bdc2ffdSmacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
5676bdc2ffdSmacallan	for (line = 0; line < height; line++) {
5686bdc2ffdSmacallan		srcx = src;
5696bdc2ffdSmacallan		mskx = msk;
5706bdc2ffdSmacallan		dstx = dst;
5716bdc2ffdSmacallan
5726bdc2ffdSmacallan		for (x = 0; x < width; x++) {
5736bdc2ffdSmacallan			/* fetch source pixel */
5746bdc2ffdSmacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
575239808baSmacallan			/* set src alpha to 0xff */
576239808baSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
577239808baSmacallan			    SX_ORS(8, 0, 12, 0));
5786bdc2ffdSmacallan			/* fetch mask */
5796bdc2ffdSmacallan			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
5806bdc2ffdSmacallan			/* fetch dst pixel */
5816bdc2ffdSmacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
5826bdc2ffdSmacallan			/* write alpha into SCAM */
5836bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
5846bdc2ffdSmacallan			    SX_ORS(9, 0, R_SCAM, 0));
5856bdc2ffdSmacallan			/* src * alpha + R0 */
5866bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
587239808baSmacallan			    SX_SAXP16X16SR8(12, 0, 16, 3));
5886bdc2ffdSmacallan			/* write inverted alpha into SCAM */
5896bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
5906bdc2ffdSmacallan			    SX_XORV(9, 8, R_SCAM, 0));
5916bdc2ffdSmacallan			/* dst * (1 - alpha) + R[13:15] */
5926bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
593239808baSmacallan			    SX_SAXP16X16SR8(20, 16, 24, 3));
5946bdc2ffdSmacallan			write_sx_io(p, dstx,
5956bdc2ffdSmacallan			    SX_STUQ0C(24, 0, dstx & 7));
5966bdc2ffdSmacallan			srcx += 4;
5976bdc2ffdSmacallan			mskx += 1;
5986bdc2ffdSmacallan			dstx += 4;
5996bdc2ffdSmacallan		}
6006bdc2ffdSmacallan		src += srcpitch;
6016bdc2ffdSmacallan		msk += mskpitch;
6026bdc2ffdSmacallan		dst += dstpitch;
6036bdc2ffdSmacallan	}
6046bdc2ffdSmacallan}
605fa158432Smacallan
606fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p,
607fa158432Smacallan                   uint32_t src, uint32_t srcpitch,
608fa158432Smacallan                   uint32_t msk, uint32_t mskpitch,
609fa158432Smacallan                   uint32_t dst, uint32_t dstpitch,
610fa158432Smacallan                   int width, int height)
611fa158432Smacallan{
612fa158432Smacallan	uint32_t srcx, dstx, mskx, m;
613fa158432Smacallan	int line, x, i;
614fa158432Smacallan
615fa158432Smacallan	ENTER;
616fa158432Smacallan
617fa158432Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
618fa158432Smacallan	for (line = 0; line < height; line++) {
619fa158432Smacallan		srcx = src;
620fa158432Smacallan		mskx = msk;
621fa158432Smacallan		dstx = dst;
622fa158432Smacallan
623fa158432Smacallan		for (x = 0; x < width; x++) {
624fa158432Smacallan			/* fetch source pixel */
625fa158432Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
626fa158432Smacallan			/* fetch mask */
627239808baSmacallan			write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7));
628fa158432Smacallan			/* fetch dst pixel */
629fa158432Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
63081a370e6Smacallan			/* set src alpha to 0xff */
631fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
63281a370e6Smacallan			    SX_ORS(8, 0, 12, 0));
633239808baSmacallan			/* mask alpha to SCAM */
634239808baSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
635239808baSmacallan			    SX_ORS(16, 0, R_SCAM, 0));
636239808baSmacallan			/* src * alpha */
637fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
638239808baSmacallan			    SX_SAXP16X16SR8(12, 0, 24, 3));
639fa158432Smacallan			/* write inverted alpha into SCAM */
640fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
641239808baSmacallan			    SX_XORS(16, 8, R_SCAM, 0));
642239808baSmacallan			/* dst * (1 - alpha) + R[24:31] */
643fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
644239808baSmacallan			    SX_SAXP16X16SR8(20, 24, 28, 3));
645fa158432Smacallan			write_sx_io(p, dstx,
64681a370e6Smacallan			    SX_STUQ0C(28, 0, dstx & 7));
647fa158432Smacallan			srcx += 4;
648fa158432Smacallan			mskx += 4;
649fa158432Smacallan			dstx += 4;
650fa158432Smacallan		}
651fa158432Smacallan		src += srcpitch;
652fa158432Smacallan		msk += mskpitch;
653fa158432Smacallan		dst += dstpitch;
654fa158432Smacallan	}
655fa158432Smacallan}
656