cg14_render.c revision a3a2ba44
1/* $NetBSD: cg14_render.c,v 1.1 2013/06/25 12:26:57 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#include <sys/types.h>
33
34/* all driver need this */
35#include "xf86.h"
36#include "xf86_OSproc.h"
37#include "compiler.h"
38
39#include "cg14.h"
40#include <sparc/sxreg.h>
41
42#define SX_SINGLE
43/*#define SX_RENDER_DEBUG*/
44/*#define SX_ADD_SOFTWARE*/
45
46#ifdef SX__RENDER_DEBUG
47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
48#define DPRINTF xf86Msg
49#else
50#define ENTER
51#define DPRINTF while (0) xf86Msg
52#endif
53
54char c[8] = " .,:+*oX";
55
56void CG14Comp_Over8Solid(Cg14Ptr p,
57                   uint32_t src, uint32_t srcpitch,
58                   uint32_t dst, uint32_t dstpitch,
59                   int width, int height)
60{
61	uint32_t msk = src, mskx, dstx, m;
62	int line, x, i;
63#ifdef SX_DEBUG
64	char buffer[256];
65#endif
66	ENTER;
67
68	/* first get the source colour */
69	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
70	write_sx_reg(p, SX_QUEUED(8), 0xff);
71	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
72	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
73	    *(uint32_t *)(p->fb + p->srcoff));
74	for (line = 0; line < height; line++) {
75		mskx = msk;
76		dstx = dst;
77#ifdef SX_SINGLE
78
79		for (x = 0; x < width; x++) {
80			m = *(volatile uint8_t *)(p->fb + mskx);
81#ifdef SX_DEBUG
82			buffer[x] = c[m >> 5];
83#endif
84			if (m == 0) {
85				/* nothing to do - all transparent */
86			} else if (m == 0xff) {
87				/* all opaque */
88				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
89			} else {
90				/* fetch alpha value, stick it into scam */
91				/* mask is in R[12:15] */
92				/*write_sx_io(p, mskx & ~7,
93				    SX_LDB(12, 0, mskx & 7));*/
94				write_sx_reg(p, SX_QUEUED(12), m);
95				/* fetch dst pixel */
96				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
97				write_sx_reg(p, SX_INSTRUCTIONS,
98				    SX_ORV(12, 0, R_SCAM, 0));
99				/*
100				 * src * alpha + R0
101				 * R[9:11] * SCAM + R0 -> R[17:19]
102				 */
103				write_sx_reg(p, SX_INSTRUCTIONS,
104				    SX_SAXP16X16SR8(9, 0, 17, 2));
105
106				/* invert SCAM */
107				write_sx_reg(p, SX_INSTRUCTIONS,
108				    SX_XORV(12, 8, R_SCAM, 0));
109#ifdef SX_DEBUG
110				write_sx_reg(p, SX_INSTRUCTIONS,
111				    SX_XORV(12, 8, 13, 0));
112#endif
113				/* dst * (1 - alpha) + R[13:15] */
114				write_sx_reg(p, SX_INSTRUCTIONS,
115				    SX_SAXP16X16SR8(21, 17, 25, 2));
116				write_sx_io(p, dstx,
117				    SX_STUQ0C(24, 0, dstx & 7));
118			}
119			dstx += 4;
120			mskx += 1;
121		}
122#ifdef SX_DEBUG
123		buffer[x] = 0;
124		xf86Msg(X_ERROR, "%s\n", buffer);
125#endif
126#else
127		for (x = 0; x < width; x += 4) {
128			/* fetch 4 mask values */
129			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
130			/* fetch destination pixels */
131			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
132			/* duplicate them for all channels */
133			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
134			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
135			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
136			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
137			/* generate inverted alpha */
138			write_sx_reg(p, SX_INSTRUCTIONS,
139			    SX_XORS(12, 8, 28, 15));
140			/* multiply source */
141			write_sx_reg(p, SX_INSTRUCTIONS,
142			    SX_MUL16X16SR8(8, 12, 44, 3));
143			write_sx_reg(p, SX_INSTRUCTIONS,
144			    SX_MUL16X16SR8(8, 16, 48, 3));
145			write_sx_reg(p, SX_INSTRUCTIONS,
146			    SX_MUL16X16SR8(8, 20, 52, 3));
147			write_sx_reg(p, SX_INSTRUCTIONS,
148			    SX_MUL16X16SR8(8, 24, 56, 3));
149			/* multiply dest */
150			write_sx_reg(p, SX_INSTRUCTIONS,
151			    SX_MUL16X16SR8(28, 60, 76, 15));
152			/* add up */
153			write_sx_reg(p, SX_INSTRUCTIONS,
154			    SX_ADDV(44, 76, 92, 15));
155			/* write back */
156			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
157			dstx += 16;
158			mskx += 4;
159		}
160#endif
161		dst += dstpitch;
162		msk += srcpitch;
163	}
164}
165
166void CG14Comp_Add32(Cg14Ptr p,
167                   uint32_t src, uint32_t srcpitch,
168                   uint32_t dst, uint32_t dstpitch,
169                   int width, int height)
170{
171	int line;
172	uint32_t srcx, dstx;
173	int full, part, x;
174
175	ENTER;
176	full = width >> 3;	/* chunks of 8 */
177	part = width & 7;	/* leftovers */
178	/* we do this up to 8 pixels at a time */
179	for (line = 0; line < height; line++) {
180		srcx = src;
181		dstx = dst;
182		for (x = 0; x < full; x++) {
183			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
184			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
185			write_sx_reg(p, SX_INSTRUCTIONS,
186			    SX_ADDV(8, 40, 72, 15));
187			write_sx_reg(p, SX_INSTRUCTIONS,
188			    SX_ADDV(24, 56, 88, 15));
189			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
190			srcx += 128;
191			dstx += 128;
192		}
193
194		/* do leftovers */
195		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
196		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
197		if (part & 16) {
198			write_sx_reg(p, SX_INSTRUCTIONS,
199			    SX_ADDV(8, 40, 72, 15));
200			write_sx_reg(p, SX_INSTRUCTIONS,
201			    SX_ADDV(24, 56, 88, part - 17));
202		} else {
203			write_sx_reg(p, SX_INSTRUCTIONS,
204			    SX_ADDV(8, 40, 72, part - 1));
205		}
206		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
207
208		/* next line */
209		src += srcpitch;
210		dst += dstpitch;
211	}
212}
213
214void CG14Comp_Add8(Cg14Ptr p,
215                   uint32_t src, uint32_t srcpitch,
216                   uint32_t dst, uint32_t dstpitch,
217                   int width, int height)
218{
219	int line;
220	uint32_t srcx, dstx, srcoff, dstoff;
221	int pre, full, part, x;
222	uint8_t *d;
223	char buffer[256];
224	ENTER;
225
226	srcoff = src & 7;
227	src &= ~7;
228	dstoff = dst & 7;
229	dst &= ~7;
230	full = width >> 5;	/* chunks of 32 */
231	part = width & 31;	/* leftovers */
232
233#ifdef SX_DEBUG
234	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
235	    width, height, full, part);
236#endif
237	/* we do this up to 32 pixels at a time */
238	for (line = 0; line < height; line++) {
239		srcx = src;
240		dstx = dst;
241#ifdef SX_ADD_SOFTWARE
242		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
243		d = (uint8_t *)(p->fb + dstx + dstoff);
244		for (x = 0; x < width; x++) {
245			d[x] = min(255, s[x] + d[x]);
246		}
247#else
248		for (x = 0; x < full; x++) {
249			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
250			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
251			write_sx_reg(p, SX_INSTRUCTIONS,
252			    SX_ADDV(8, 40, 72, 15));
253			write_sx_reg(p, SX_INSTRUCTIONS,
254			    SX_ADDV(24, 56, 88, 15));
255			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
256			srcx += 32;
257			dstx += 32;
258		}
259
260		if (part > 0) {
261			/* do leftovers */
262			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
263			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
264			if (part > 16) {
265				write_sx_reg(p, SX_INSTRUCTIONS,
266				    SX_ADDV(8, 40, 72, 15));
267				write_sx_reg(p, SX_INSTRUCTIONS,
268				    SX_ADDV(24, 56, 88, part - 17));
269			} else {
270				write_sx_reg(p, SX_INSTRUCTIONS,
271				    SX_ADDV(8, 40, 72, part - 1));
272			}
273			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
274		}
275#endif
276#ifdef SX_DEBUG
277		d = (uint8_t *)(p->fb + src + srcoff);
278		for (x = 0; x < width; x++) {
279			buffer[x] = c[d[x]>>5];
280		}
281		buffer[x] = 0;
282		xf86Msg(X_ERROR, "%s\n", buffer);
283#endif
284		/* next line */
285		src += srcpitch;
286		dst += dstpitch;
287	}
288}
289
290void CG14Comp_Over32(Cg14Ptr p,
291                   uint32_t src, uint32_t srcpitch,
292                   uint32_t dst, uint32_t dstpitch,
293                   int width, int height)
294{
295	uint32_t srcx, dstx, m;
296	int line, x, i;
297
298	ENTER;
299
300	write_sx_reg(p, SX_QUEUED(8), 0xff);
301	for (line = 0; line < height; line++) {
302		srcx = src;
303		dstx = dst;
304
305		for (x = 0; x < width; x++) {
306			/* fetch source pixel */
307			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
308			/* fetch dst pixel */
309			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
310			/* src is premultiplied with alpha */
311			/* write inverted alpha into SCAM */
312			write_sx_reg(p, SX_INSTRUCTIONS,
313			    SX_XORV(12, 8, R_SCAM, 0));
314			/* dst * (1 - alpha) + R[13:15] */
315			write_sx_reg(p, SX_INSTRUCTIONS,
316			    SX_SAXP16X16SR8(21, 13, 25, 2));
317			write_sx_io(p, dstx,
318			    SX_STUQ0C(24, 0, dstx & 7));
319			dstx += 4;
320			srcx += 4;
321		}
322		dst += dstpitch;
323		src += srcpitch;
324	}
325}
326
327void CG14Comp_Over32Mask(Cg14Ptr p,
328                   uint32_t src, uint32_t srcpitch,
329                   uint32_t msk, uint32_t mskpitch,
330                   uint32_t dst, uint32_t dstpitch,
331                   int width, int height)
332{
333	uint32_t srcx, dstx, mskx, m;
334	int line, x, i;
335
336	ENTER;
337
338	write_sx_reg(p, SX_QUEUED(8), 0xff);
339	for (line = 0; line < height; line++) {
340		srcx = src;
341		mskx = msk;
342		dstx = dst;
343
344		for (x = 0; x < width; x++) {
345			/* fetch source pixel */
346			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
347			/* fetch mask */
348			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
349			/* fetch dst pixel */
350			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
351			/* apply mask */
352			write_sx_reg(p, SX_INSTRUCTIONS,
353			    SX_ANDS(12, 9, 16, 3));
354			/* src is premultiplied with alpha */
355			/* write inverted alpha into SCAM */
356			write_sx_reg(p, SX_INSTRUCTIONS,
357			    SX_XORV(16, 8, R_SCAM, 0));
358			/* dst * (1 - alpha) + R[13:15] */
359			write_sx_reg(p, SX_INSTRUCTIONS,
360			    SX_SAXP16X16SR8(21, 17, 25, 2));
361			write_sx_io(p, dstx,
362			    SX_STUQ0C(24, 0, dstx & 7));
363			srcx += 4;
364			mskx += 1;
365			dstx += 4;
366		}
367		src += srcpitch;
368		msk += mskpitch;
369		dst += dstpitch;
370	}
371}
372