cg14_render.c revision c88c16f8
1c88c16f8Smacallan/* $NetBSD: cg14_render.c,v 1.8 2016/09/16 21:16:37 macallan Exp $ */
2a3a2ba44Smacallan/*
3a3a2ba44Smacallan * Copyright (c) 2013 Michael Lorenz
4a3a2ba44Smacallan * All rights reserved.
5a3a2ba44Smacallan *
6a3a2ba44Smacallan * Redistribution and use in source and binary forms, with or without
7a3a2ba44Smacallan * modification, are permitted provided that the following conditions
8a3a2ba44Smacallan * are met:
9a3a2ba44Smacallan *
10a3a2ba44Smacallan *    - Redistributions of source code must retain the above copyright
11a3a2ba44Smacallan *      notice, this list of conditions and the following disclaimer.
12a3a2ba44Smacallan *    - Redistributions in binary form must reproduce the above
13a3a2ba44Smacallan *      copyright notice, this list of conditions and the following
14a3a2ba44Smacallan *      disclaimer in the documentation and/or other materials provided
15a3a2ba44Smacallan *      with the distribution.
16a3a2ba44Smacallan *
17a3a2ba44Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18a3a2ba44Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19a3a2ba44Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20a3a2ba44Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21a3a2ba44Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22a3a2ba44Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23a3a2ba44Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24a3a2ba44Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25a3a2ba44Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26a3a2ba44Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27a3a2ba44Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28a3a2ba44Smacallan * POSSIBILITY OF SUCH DAMAGE.
29a3a2ba44Smacallan *
30a3a2ba44Smacallan */
31a3a2ba44Smacallan
32c88c16f8Smacallan#ifdef HAVE_CONFIG_H
33c88c16f8Smacallan#include "config.h"
34c88c16f8Smacallan#endif
35c88c16f8Smacallan
36a3a2ba44Smacallan#include <sys/types.h>
37a3a2ba44Smacallan
38a3a2ba44Smacallan/* all driver need this */
39a3a2ba44Smacallan#include "xf86.h"
40a3a2ba44Smacallan#include "xf86_OSproc.h"
41a3a2ba44Smacallan#include "compiler.h"
42a3a2ba44Smacallan
43a3a2ba44Smacallan#include "cg14.h"
44a3a2ba44Smacallan#include <sparc/sxreg.h>
45a3a2ba44Smacallan
46a3a2ba44Smacallan#define SX_SINGLE
47a3a2ba44Smacallan/*#define SX_RENDER_DEBUG*/
48a3a2ba44Smacallan/*#define SX_ADD_SOFTWARE*/
49a3a2ba44Smacallan
50a3a2ba44Smacallan#ifdef SX__RENDER_DEBUG
51a3a2ba44Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
52a3a2ba44Smacallan#define DPRINTF xf86Msg
53a3a2ba44Smacallan#else
54a3a2ba44Smacallan#define ENTER
55a3a2ba44Smacallan#define DPRINTF while (0) xf86Msg
56a3a2ba44Smacallan#endif
57a3a2ba44Smacallan
58a3a2ba44Smacallanchar c[8] = " .,:+*oX";
59a3a2ba44Smacallan
6078cb1511Smacallan
6178cb1511Smacallanvoid CG14Comp_Over32Solid(Cg14Ptr p,
6278cb1511Smacallan                   uint32_t src, uint32_t srcpitch,
6378cb1511Smacallan                   uint32_t dst, uint32_t dstpitch,
6478cb1511Smacallan                   int width, int height)
6578cb1511Smacallan{
6678cb1511Smacallan	uint32_t msk = src, mskx, dstx, m;
6778cb1511Smacallan	int line, x, i;
6878cb1511Smacallan
6978cb1511Smacallan	ENTER;
70f7cb851fSmacallan
7178cb1511Smacallan	for (line = 0; line < height; line++) {
7278cb1511Smacallan		mskx = msk;
7378cb1511Smacallan		dstx = dst;
7478cb1511Smacallan#ifdef SX_SINGLE
7578cb1511Smacallan
7678cb1511Smacallan		for (x = 0; x < width; x++) {
7778cb1511Smacallan			m = *(volatile uint32_t *)(p->fb + mskx);
7878cb1511Smacallan			m = m >> 24;
7978cb1511Smacallan			if (m == 0) {
8078cb1511Smacallan				/* nothing to do - all transparent */
8178cb1511Smacallan			} else if (m == 0xff) {
8278cb1511Smacallan				/* all opaque */
8378cb1511Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
8478cb1511Smacallan			} else {
8578cb1511Smacallan				/* fetch alpha value, stick it into scam */
8678cb1511Smacallan				/* mask is in R[12:15] */
8778cb1511Smacallan				/*write_sx_io(p, mskx,
8878cb1511Smacallan				    SX_LDUQ0(12, 0, mskx & 7));*/
8978cb1511Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
9078cb1511Smacallan				/* fetch dst pixel */
9178cb1511Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
9278cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
9378cb1511Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
9478cb1511Smacallan				/*
9578cb1511Smacallan				 * src * alpha + R0
9678cb1511Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
9778cb1511Smacallan				 */
9878cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
9978cb1511Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
10078cb1511Smacallan
10178cb1511Smacallan				/* invert SCAM */
10278cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
10378cb1511Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
10478cb1511Smacallan#ifdef SX_DEBUG
10578cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
10678cb1511Smacallan				    SX_XORV(12, 8, 13, 0));
10778cb1511Smacallan#endif
10878cb1511Smacallan				/* dst * (1 - alpha) + R[13:15] */
10978cb1511Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
11078cb1511Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
11178cb1511Smacallan				write_sx_io(p, dstx,
11278cb1511Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
11378cb1511Smacallan			}
11478cb1511Smacallan			dstx += 4;
11578cb1511Smacallan			mskx += 4;
11678cb1511Smacallan		}
11778cb1511Smacallan#else
11878cb1511Smacallan		for (x = 0; x < width; x += 4) {
11978cb1511Smacallan			/* fetch 4 mask values */
12078cb1511Smacallan			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
12178cb1511Smacallan			/* fetch destination pixels */
12278cb1511Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
12378cb1511Smacallan			/* duplicate them for all channels */
1246bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
1256bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
1266bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
12778cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
12878cb1511Smacallan			/* generate inverted alpha */
12978cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
13078cb1511Smacallan			    SX_XORS(12, 8, 28, 15));
13178cb1511Smacallan			/* multiply source */
13278cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
13378cb1511Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
13478cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
13578cb1511Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
13678cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
13778cb1511Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
13878cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
13978cb1511Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
14078cb1511Smacallan			/* multiply dest */
14178cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
14278cb1511Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
14378cb1511Smacallan			/* add up */
14478cb1511Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
14578cb1511Smacallan			    SX_ADDV(44, 76, 92, 15));
14678cb1511Smacallan			/* write back */
14778cb1511Smacallan			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
14878cb1511Smacallan			dstx += 16;
14978cb1511Smacallan			mskx += 16;
15078cb1511Smacallan		}
15178cb1511Smacallan#endif
15278cb1511Smacallan		dst += dstpitch;
15378cb1511Smacallan		msk += srcpitch;
15478cb1511Smacallan	}
15578cb1511Smacallan}
15678cb1511Smacallan
157a3a2ba44Smacallanvoid CG14Comp_Over8Solid(Cg14Ptr p,
158a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
159a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
160a3a2ba44Smacallan                   int width, int height)
161a3a2ba44Smacallan{
162a3a2ba44Smacallan	uint32_t msk = src, mskx, dstx, m;
163a3a2ba44Smacallan	int line, x, i;
164a3a2ba44Smacallan#ifdef SX_DEBUG
165a3a2ba44Smacallan	char buffer[256];
166a3a2ba44Smacallan#endif
167a3a2ba44Smacallan	ENTER;
168a3a2ba44Smacallan
169a3a2ba44Smacallan	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
170a3a2ba44Smacallan	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
171a3a2ba44Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
172a3a2ba44Smacallan	for (line = 0; line < height; line++) {
173a3a2ba44Smacallan		mskx = msk;
174a3a2ba44Smacallan		dstx = dst;
175a3a2ba44Smacallan#ifdef SX_SINGLE
176a3a2ba44Smacallan
177a3a2ba44Smacallan		for (x = 0; x < width; x++) {
178a3a2ba44Smacallan			m = *(volatile uint8_t *)(p->fb + mskx);
179a3a2ba44Smacallan#ifdef SX_DEBUG
180a3a2ba44Smacallan			buffer[x] = c[m >> 5];
181a3a2ba44Smacallan#endif
182a3a2ba44Smacallan			if (m == 0) {
183a3a2ba44Smacallan				/* nothing to do - all transparent */
184a3a2ba44Smacallan			} else if (m == 0xff) {
185a3a2ba44Smacallan				/* all opaque */
186a3a2ba44Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
187a3a2ba44Smacallan			} else {
188a3a2ba44Smacallan				/* fetch alpha value, stick it into scam */
189a3a2ba44Smacallan				/* mask is in R[12:15] */
190a3a2ba44Smacallan				/*write_sx_io(p, mskx & ~7,
191a3a2ba44Smacallan				    SX_LDB(12, 0, mskx & 7));*/
192a3a2ba44Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
193a3a2ba44Smacallan				/* fetch dst pixel */
194a3a2ba44Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
195a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
196a3a2ba44Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
197a3a2ba44Smacallan				/*
198a3a2ba44Smacallan				 * src * alpha + R0
199a3a2ba44Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
200a3a2ba44Smacallan				 */
201a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
202a3a2ba44Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
203a3a2ba44Smacallan
204a3a2ba44Smacallan				/* invert SCAM */
205a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
206a3a2ba44Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
207a3a2ba44Smacallan#ifdef SX_DEBUG
208a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
209a3a2ba44Smacallan				    SX_XORV(12, 8, 13, 0));
210a3a2ba44Smacallan#endif
211a3a2ba44Smacallan				/* dst * (1 - alpha) + R[13:15] */
212a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
213a3a2ba44Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
214a3a2ba44Smacallan				write_sx_io(p, dstx,
215a3a2ba44Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
216a3a2ba44Smacallan			}
217a3a2ba44Smacallan			dstx += 4;
218a3a2ba44Smacallan			mskx += 1;
219a3a2ba44Smacallan		}
220a3a2ba44Smacallan#ifdef SX_DEBUG
221a3a2ba44Smacallan		buffer[x] = 0;
222a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
223a3a2ba44Smacallan#endif
224a3a2ba44Smacallan#else
225a3a2ba44Smacallan		for (x = 0; x < width; x += 4) {
226a3a2ba44Smacallan			/* fetch 4 mask values */
227a3a2ba44Smacallan			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
228a3a2ba44Smacallan			/* fetch destination pixels */
229a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
230a3a2ba44Smacallan			/* duplicate them for all channels */
231c88c16f8Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3));
232c88c16f8Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3));
233c88c16f8Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3));
2346bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
235a3a2ba44Smacallan			/* generate inverted alpha */
236a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
237a3a2ba44Smacallan			    SX_XORS(12, 8, 28, 15));
238a3a2ba44Smacallan			/* multiply source */
239a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
240a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
241a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
242a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
243a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
244a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
245a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
246a3a2ba44Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
247a3a2ba44Smacallan			/* multiply dest */
248a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
249a3a2ba44Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
250a3a2ba44Smacallan			/* add up */
251a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
252a3a2ba44Smacallan			    SX_ADDV(44, 76, 92, 15));
253a3a2ba44Smacallan			/* write back */
254a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
255a3a2ba44Smacallan			dstx += 16;
256a3a2ba44Smacallan			mskx += 4;
257a3a2ba44Smacallan		}
258a3a2ba44Smacallan#endif
259a3a2ba44Smacallan		dst += dstpitch;
260a3a2ba44Smacallan		msk += srcpitch;
261a3a2ba44Smacallan	}
262a3a2ba44Smacallan}
263a3a2ba44Smacallan
264a3a2ba44Smacallanvoid CG14Comp_Add32(Cg14Ptr p,
265a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
266a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
267a3a2ba44Smacallan                   int width, int height)
268a3a2ba44Smacallan{
269a3a2ba44Smacallan	int line;
270a3a2ba44Smacallan	uint32_t srcx, dstx;
271a3a2ba44Smacallan	int full, part, x;
272a3a2ba44Smacallan
273a3a2ba44Smacallan	ENTER;
274a3a2ba44Smacallan	full = width >> 3;	/* chunks of 8 */
275a3a2ba44Smacallan	part = width & 7;	/* leftovers */
276a3a2ba44Smacallan	/* we do this up to 8 pixels at a time */
277a3a2ba44Smacallan	for (line = 0; line < height; line++) {
278a3a2ba44Smacallan		srcx = src;
279a3a2ba44Smacallan		dstx = dst;
280a3a2ba44Smacallan		for (x = 0; x < full; x++) {
281a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
282a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
283a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
284a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
285a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
286a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
287a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
288a3a2ba44Smacallan			srcx += 128;
289a3a2ba44Smacallan			dstx += 128;
290a3a2ba44Smacallan		}
291a3a2ba44Smacallan
292a3a2ba44Smacallan		/* do leftovers */
293a3a2ba44Smacallan		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
294a3a2ba44Smacallan		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
295a3a2ba44Smacallan		if (part & 16) {
296a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
297a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
298a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
299a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, part - 17));
300a3a2ba44Smacallan		} else {
301a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
302a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, part - 1));
303a3a2ba44Smacallan		}
304a3a2ba44Smacallan		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
305a3a2ba44Smacallan
306a3a2ba44Smacallan		/* next line */
307a3a2ba44Smacallan		src += srcpitch;
308a3a2ba44Smacallan		dst += dstpitch;
309a3a2ba44Smacallan	}
310a3a2ba44Smacallan}
311a3a2ba44Smacallan
312a3a2ba44Smacallanvoid CG14Comp_Add8(Cg14Ptr p,
313a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
314a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
315a3a2ba44Smacallan                   int width, int height)
316a3a2ba44Smacallan{
317a3a2ba44Smacallan	int line;
318a3a2ba44Smacallan	uint32_t srcx, dstx, srcoff, dstoff;
319a3a2ba44Smacallan	int pre, full, part, x;
320a3a2ba44Smacallan	uint8_t *d;
321a3a2ba44Smacallan	char buffer[256];
322a3a2ba44Smacallan	ENTER;
323a3a2ba44Smacallan
324a3a2ba44Smacallan	srcoff = src & 7;
325a3a2ba44Smacallan	src &= ~7;
326a3a2ba44Smacallan	dstoff = dst & 7;
327a3a2ba44Smacallan	dst &= ~7;
328a3a2ba44Smacallan	full = width >> 5;	/* chunks of 32 */
329a3a2ba44Smacallan	part = width & 31;	/* leftovers */
330a3a2ba44Smacallan
331a3a2ba44Smacallan#ifdef SX_DEBUG
332a3a2ba44Smacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
333a3a2ba44Smacallan	    width, height, full, part);
334a3a2ba44Smacallan#endif
335a3a2ba44Smacallan	/* we do this up to 32 pixels at a time */
336a3a2ba44Smacallan	for (line = 0; line < height; line++) {
337a3a2ba44Smacallan		srcx = src;
338a3a2ba44Smacallan		dstx = dst;
339a3a2ba44Smacallan#ifdef SX_ADD_SOFTWARE
340a3a2ba44Smacallan		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
341a3a2ba44Smacallan		d = (uint8_t *)(p->fb + dstx + dstoff);
342a3a2ba44Smacallan		for (x = 0; x < width; x++) {
343a3a2ba44Smacallan			d[x] = min(255, s[x] + d[x]);
344a3a2ba44Smacallan		}
345a3a2ba44Smacallan#else
346a3a2ba44Smacallan		for (x = 0; x < full; x++) {
347a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
348a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
349a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
350a3a2ba44Smacallan			    SX_ADDV(8, 40, 72, 15));
351a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
352a3a2ba44Smacallan			    SX_ADDV(24, 56, 88, 15));
353a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
354a3a2ba44Smacallan			srcx += 32;
355a3a2ba44Smacallan			dstx += 32;
356a3a2ba44Smacallan		}
357a3a2ba44Smacallan
358a3a2ba44Smacallan		if (part > 0) {
359a3a2ba44Smacallan			/* do leftovers */
360a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
361a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
362a3a2ba44Smacallan			if (part > 16) {
363a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
364a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, 15));
365a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
366a3a2ba44Smacallan				    SX_ADDV(24, 56, 88, part - 17));
367a3a2ba44Smacallan			} else {
368a3a2ba44Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
369a3a2ba44Smacallan				    SX_ADDV(8, 40, 72, part - 1));
370a3a2ba44Smacallan			}
371a3a2ba44Smacallan			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
372a3a2ba44Smacallan		}
373a3a2ba44Smacallan#endif
374a3a2ba44Smacallan#ifdef SX_DEBUG
375a3a2ba44Smacallan		d = (uint8_t *)(p->fb + src + srcoff);
376a3a2ba44Smacallan		for (x = 0; x < width; x++) {
377a3a2ba44Smacallan			buffer[x] = c[d[x]>>5];
378a3a2ba44Smacallan		}
379a3a2ba44Smacallan		buffer[x] = 0;
380a3a2ba44Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
381a3a2ba44Smacallan#endif
382a3a2ba44Smacallan		/* next line */
383a3a2ba44Smacallan		src += srcpitch;
384a3a2ba44Smacallan		dst += dstpitch;
385a3a2ba44Smacallan	}
386a3a2ba44Smacallan}
387a3a2ba44Smacallan
388a3a2ba44Smacallanvoid CG14Comp_Over32(Cg14Ptr p,
389a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
390a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
391a3a2ba44Smacallan                   int width, int height)
392a3a2ba44Smacallan{
393a3a2ba44Smacallan	uint32_t srcx, dstx, m;
394a3a2ba44Smacallan	int line, x, i;
395a3a2ba44Smacallan
396a3a2ba44Smacallan	ENTER;
397a3a2ba44Smacallan
398a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
399a3a2ba44Smacallan	for (line = 0; line < height; line++) {
400a3a2ba44Smacallan		srcx = src;
401a3a2ba44Smacallan		dstx = dst;
402a3a2ba44Smacallan
403a3a2ba44Smacallan		for (x = 0; x < width; x++) {
404a3a2ba44Smacallan			/* fetch source pixel */
405a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
406a3a2ba44Smacallan			/* fetch dst pixel */
407a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
408a3a2ba44Smacallan			/* src is premultiplied with alpha */
409a3a2ba44Smacallan			/* write inverted alpha into SCAM */
410a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
411a3a2ba44Smacallan			    SX_XORV(12, 8, R_SCAM, 0));
412a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
413a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
414239808baSmacallan			    SX_SAXP16X16SR8(20, 12, 24, 3));
415a3a2ba44Smacallan			write_sx_io(p, dstx,
416a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
417a3a2ba44Smacallan			dstx += 4;
418a3a2ba44Smacallan			srcx += 4;
419a3a2ba44Smacallan		}
420a3a2ba44Smacallan		dst += dstpitch;
421a3a2ba44Smacallan		src += srcpitch;
422a3a2ba44Smacallan	}
423a3a2ba44Smacallan}
424a3a2ba44Smacallan
425a3a2ba44Smacallanvoid CG14Comp_Over32Mask(Cg14Ptr p,
426a3a2ba44Smacallan                   uint32_t src, uint32_t srcpitch,
427a3a2ba44Smacallan                   uint32_t msk, uint32_t mskpitch,
428a3a2ba44Smacallan                   uint32_t dst, uint32_t dstpitch,
429a3a2ba44Smacallan                   int width, int height)
430a3a2ba44Smacallan{
431a3a2ba44Smacallan	uint32_t srcx, dstx, mskx, m;
432a3a2ba44Smacallan	int line, x, i;
433a3a2ba44Smacallan
434a3a2ba44Smacallan	ENTER;
435a3a2ba44Smacallan
436a3a2ba44Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
437a3a2ba44Smacallan	for (line = 0; line < height; line++) {
438a3a2ba44Smacallan		srcx = src;
439a3a2ba44Smacallan		mskx = msk;
440a3a2ba44Smacallan		dstx = dst;
441a3a2ba44Smacallan
442a3a2ba44Smacallan		for (x = 0; x < width; x++) {
443a3a2ba44Smacallan			/* fetch source pixel */
444a3a2ba44Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
445a3a2ba44Smacallan			/* fetch mask */
446a3a2ba44Smacallan			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
447a3a2ba44Smacallan			/* fetch dst pixel */
448a3a2ba44Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
449f7cb851fSmacallan			/* stick mask alpha into SCAM */
450a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
451f7cb851fSmacallan			    SX_ORS(9, 0, R_SCAM, 0));
452f7cb851fSmacallan			/* apply mask */
453a3a2ba44Smacallan			/* src is premultiplied with alpha */
454f7cb851fSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
455f7cb851fSmacallan			    SX_SAXP16X16SR8(12, 0, 16, 3));
456a3a2ba44Smacallan			/* write inverted alpha into SCAM */
457a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
458a3a2ba44Smacallan			    SX_XORV(16, 8, R_SCAM, 0));
459a3a2ba44Smacallan			/* dst * (1 - alpha) + R[13:15] */
460a3a2ba44Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
461239808baSmacallan			    SX_SAXP16X16SR8(20, 16, 24, 3));
462a3a2ba44Smacallan			write_sx_io(p, dstx,
463a3a2ba44Smacallan			    SX_STUQ0C(24, 0, dstx & 7));
464a3a2ba44Smacallan			srcx += 4;
465a3a2ba44Smacallan			mskx += 1;
466a3a2ba44Smacallan			dstx += 4;
467a3a2ba44Smacallan		}
468a3a2ba44Smacallan		src += srcpitch;
469a3a2ba44Smacallan		msk += mskpitch;
470a3a2ba44Smacallan		dst += dstpitch;
471a3a2ba44Smacallan	}
472a3a2ba44Smacallan}
4736bdc2ffdSmacallan
4746bdc2ffdSmacallanvoid CG14Comp_Over32Mask_noalpha(Cg14Ptr p,
4756bdc2ffdSmacallan                   uint32_t src, uint32_t srcpitch,
4766bdc2ffdSmacallan                   uint32_t msk, uint32_t mskpitch,
4776bdc2ffdSmacallan                   uint32_t dst, uint32_t dstpitch,
4786bdc2ffdSmacallan                   int width, int height)
4796bdc2ffdSmacallan{
4806bdc2ffdSmacallan	uint32_t srcx, dstx, mskx, m;
4816bdc2ffdSmacallan	int line, x, i;
4826bdc2ffdSmacallan
4836bdc2ffdSmacallan	ENTER;
4846bdc2ffdSmacallan
4856bdc2ffdSmacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
4866bdc2ffdSmacallan	for (line = 0; line < height; line++) {
4876bdc2ffdSmacallan		srcx = src;
4886bdc2ffdSmacallan		mskx = msk;
4896bdc2ffdSmacallan		dstx = dst;
4906bdc2ffdSmacallan
4916bdc2ffdSmacallan		for (x = 0; x < width; x++) {
4926bdc2ffdSmacallan			/* fetch source pixel */
4936bdc2ffdSmacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
494239808baSmacallan			/* set src alpha to 0xff */
495239808baSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
496239808baSmacallan			    SX_ORS(8, 0, 12, 0));
4976bdc2ffdSmacallan			/* fetch mask */
4986bdc2ffdSmacallan			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
4996bdc2ffdSmacallan			/* fetch dst pixel */
5006bdc2ffdSmacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
5016bdc2ffdSmacallan			/* write alpha into SCAM */
5026bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
5036bdc2ffdSmacallan			    SX_ORS(9, 0, R_SCAM, 0));
5046bdc2ffdSmacallan			/* src * alpha + R0 */
5056bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
506239808baSmacallan			    SX_SAXP16X16SR8(12, 0, 16, 3));
5076bdc2ffdSmacallan			/* write inverted alpha into SCAM */
5086bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
5096bdc2ffdSmacallan			    SX_XORV(9, 8, R_SCAM, 0));
5106bdc2ffdSmacallan			/* dst * (1 - alpha) + R[13:15] */
5116bdc2ffdSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
512239808baSmacallan			    SX_SAXP16X16SR8(20, 16, 24, 3));
5136bdc2ffdSmacallan			write_sx_io(p, dstx,
5146bdc2ffdSmacallan			    SX_STUQ0C(24, 0, dstx & 7));
5156bdc2ffdSmacallan			srcx += 4;
5166bdc2ffdSmacallan			mskx += 1;
5176bdc2ffdSmacallan			dstx += 4;
5186bdc2ffdSmacallan		}
5196bdc2ffdSmacallan		src += srcpitch;
5206bdc2ffdSmacallan		msk += mskpitch;
5216bdc2ffdSmacallan		dst += dstpitch;
5226bdc2ffdSmacallan	}
5236bdc2ffdSmacallan}
524fa158432Smacallan
525fa158432Smacallanvoid CG14Comp_Over32Mask32_noalpha(Cg14Ptr p,
526fa158432Smacallan                   uint32_t src, uint32_t srcpitch,
527fa158432Smacallan                   uint32_t msk, uint32_t mskpitch,
528fa158432Smacallan                   uint32_t dst, uint32_t dstpitch,
529fa158432Smacallan                   int width, int height)
530fa158432Smacallan{
531fa158432Smacallan	uint32_t srcx, dstx, mskx, m;
532fa158432Smacallan	int line, x, i;
533fa158432Smacallan
534fa158432Smacallan	ENTER;
535fa158432Smacallan
536fa158432Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
537fa158432Smacallan	for (line = 0; line < height; line++) {
538fa158432Smacallan		srcx = src;
539fa158432Smacallan		mskx = msk;
540fa158432Smacallan		dstx = dst;
541fa158432Smacallan
542fa158432Smacallan		for (x = 0; x < width; x++) {
543fa158432Smacallan			/* fetch source pixel */
544fa158432Smacallan			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
545fa158432Smacallan			/* fetch mask */
546239808baSmacallan			write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7));
547fa158432Smacallan			/* fetch dst pixel */
548fa158432Smacallan			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
54981a370e6Smacallan			/* set src alpha to 0xff */
550fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
55181a370e6Smacallan			    SX_ORS(8, 0, 12, 0));
552239808baSmacallan			/* mask alpha to SCAM */
553239808baSmacallan			write_sx_reg(p, SX_INSTRUCTIONS,
554239808baSmacallan			    SX_ORS(16, 0, R_SCAM, 0));
555239808baSmacallan			/* src * alpha */
556fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
557239808baSmacallan			    SX_SAXP16X16SR8(12, 0, 24, 3));
558fa158432Smacallan			/* write inverted alpha into SCAM */
559fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
560239808baSmacallan			    SX_XORS(16, 8, R_SCAM, 0));
561239808baSmacallan			/* dst * (1 - alpha) + R[24:31] */
562fa158432Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
563239808baSmacallan			    SX_SAXP16X16SR8(20, 24, 28, 3));
564fa158432Smacallan			write_sx_io(p, dstx,
56581a370e6Smacallan			    SX_STUQ0C(28, 0, dstx & 7));
566fa158432Smacallan			srcx += 4;
567fa158432Smacallan			mskx += 4;
568fa158432Smacallan			dstx += 4;
569fa158432Smacallan		}
570fa158432Smacallan		src += srcpitch;
571fa158432Smacallan		msk += mskpitch;
572fa158432Smacallan		dst += dstpitch;
573fa158432Smacallan	}
574fa158432Smacallan}
575