cg14_render.c revision 78cb1511
1/* $NetBSD: cg14_render.c,v 1.2 2013/06/25 12:31:29 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#include <sys/types.h>
33
34/* all driver need this */
35#include "xf86.h"
36#include "xf86_OSproc.h"
37#include "compiler.h"
38
39#include "cg14.h"
40#include <sparc/sxreg.h>
41
42#define SX_SINGLE
43/*#define SX_RENDER_DEBUG*/
44/*#define SX_ADD_SOFTWARE*/
45
46#ifdef SX__RENDER_DEBUG
47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
48#define DPRINTF xf86Msg
49#else
50#define ENTER
51#define DPRINTF while (0) xf86Msg
52#endif
53
54char c[8] = " .,:+*oX";
55
56
57void CG14Comp_Over32Solid(Cg14Ptr p,
58                   uint32_t src, uint32_t srcpitch,
59                   uint32_t dst, uint32_t dstpitch,
60                   int width, int height)
61{
62	uint32_t msk = src, mskx, dstx, m;
63	int line, x, i;
64
65	ENTER;
66	/* first get the source colour */
67	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
68	write_sx_reg(p, SX_QUEUED(8), 0xff);
69	for (line = 0; line < height; line++) {
70		mskx = msk;
71		dstx = dst;
72#ifdef SX_SINGLE
73
74		for (x = 0; x < width; x++) {
75			m = *(volatile uint32_t *)(p->fb + mskx);
76			m = m >> 24;
77			if (m == 0) {
78				/* nothing to do - all transparent */
79			} else if (m == 0xff) {
80				/* all opaque */
81				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
82			} else {
83				/* fetch alpha value, stick it into scam */
84				/* mask is in R[12:15] */
85				/*write_sx_io(p, mskx,
86				    SX_LDUQ0(12, 0, mskx & 7));*/
87				write_sx_reg(p, SX_QUEUED(12), m);
88				/* fetch dst pixel */
89				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
90				write_sx_reg(p, SX_INSTRUCTIONS,
91				    SX_ORV(12, 0, R_SCAM, 0));
92				/*
93				 * src * alpha + R0
94				 * R[9:11] * SCAM + R0 -> R[17:19]
95				 */
96				write_sx_reg(p, SX_INSTRUCTIONS,
97				    SX_SAXP16X16SR8(9, 0, 17, 2));
98
99				/* invert SCAM */
100				write_sx_reg(p, SX_INSTRUCTIONS,
101				    SX_XORV(12, 8, R_SCAM, 0));
102#ifdef SX_DEBUG
103				write_sx_reg(p, SX_INSTRUCTIONS,
104				    SX_XORV(12, 8, 13, 0));
105#endif
106				/* dst * (1 - alpha) + R[13:15] */
107				write_sx_reg(p, SX_INSTRUCTIONS,
108				    SX_SAXP16X16SR8(21, 17, 25, 2));
109				write_sx_io(p, dstx,
110				    SX_STUQ0C(24, 0, dstx & 7));
111			}
112			dstx += 4;
113			mskx += 4;
114		}
115#else
116		for (x = 0; x < width; x += 4) {
117			/* fetch 4 mask values */
118			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
119			/* fetch destination pixels */
120			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
121			/* duplicate them for all channels */
122			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
123			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
124			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
125			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
126			/* generate inverted alpha */
127			write_sx_reg(p, SX_INSTRUCTIONS,
128			    SX_XORS(12, 8, 28, 15));
129			/* multiply source */
130			write_sx_reg(p, SX_INSTRUCTIONS,
131			    SX_MUL16X16SR8(8, 12, 44, 3));
132			write_sx_reg(p, SX_INSTRUCTIONS,
133			    SX_MUL16X16SR8(8, 16, 48, 3));
134			write_sx_reg(p, SX_INSTRUCTIONS,
135			    SX_MUL16X16SR8(8, 20, 52, 3));
136			write_sx_reg(p, SX_INSTRUCTIONS,
137			    SX_MUL16X16SR8(8, 24, 56, 3));
138			/* multiply dest */
139			write_sx_reg(p, SX_INSTRUCTIONS,
140			    SX_MUL16X16SR8(28, 60, 76, 15));
141			/* add up */
142			write_sx_reg(p, SX_INSTRUCTIONS,
143			    SX_ADDV(44, 76, 92, 15));
144			/* write back */
145			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
146			dstx += 16;
147			mskx += 16;
148		}
149#endif
150		dst += dstpitch;
151		msk += srcpitch;
152	}
153}
154
155void CG14Comp_Over8Solid(Cg14Ptr p,
156                   uint32_t src, uint32_t srcpitch,
157                   uint32_t dst, uint32_t dstpitch,
158                   int width, int height)
159{
160	uint32_t msk = src, mskx, dstx, m;
161	int line, x, i;
162#ifdef SX_DEBUG
163	char buffer[256];
164#endif
165	ENTER;
166
167	/* first get the source colour */
168	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
169	write_sx_reg(p, SX_QUEUED(8), 0xff);
170	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
171	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
172	    *(uint32_t *)(p->fb + p->srcoff));
173	for (line = 0; line < height; line++) {
174		mskx = msk;
175		dstx = dst;
176#ifdef SX_SINGLE
177
178		for (x = 0; x < width; x++) {
179			m = *(volatile uint8_t *)(p->fb + mskx);
180#ifdef SX_DEBUG
181			buffer[x] = c[m >> 5];
182#endif
183			if (m == 0) {
184				/* nothing to do - all transparent */
185			} else if (m == 0xff) {
186				/* all opaque */
187				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
188			} else {
189				/* fetch alpha value, stick it into scam */
190				/* mask is in R[12:15] */
191				/*write_sx_io(p, mskx & ~7,
192				    SX_LDB(12, 0, mskx & 7));*/
193				write_sx_reg(p, SX_QUEUED(12), m);
194				/* fetch dst pixel */
195				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
196				write_sx_reg(p, SX_INSTRUCTIONS,
197				    SX_ORV(12, 0, R_SCAM, 0));
198				/*
199				 * src * alpha + R0
200				 * R[9:11] * SCAM + R0 -> R[17:19]
201				 */
202				write_sx_reg(p, SX_INSTRUCTIONS,
203				    SX_SAXP16X16SR8(9, 0, 17, 2));
204
205				/* invert SCAM */
206				write_sx_reg(p, SX_INSTRUCTIONS,
207				    SX_XORV(12, 8, R_SCAM, 0));
208#ifdef SX_DEBUG
209				write_sx_reg(p, SX_INSTRUCTIONS,
210				    SX_XORV(12, 8, 13, 0));
211#endif
212				/* dst * (1 - alpha) + R[13:15] */
213				write_sx_reg(p, SX_INSTRUCTIONS,
214				    SX_SAXP16X16SR8(21, 17, 25, 2));
215				write_sx_io(p, dstx,
216				    SX_STUQ0C(24, 0, dstx & 7));
217			}
218			dstx += 4;
219			mskx += 1;
220		}
221#ifdef SX_DEBUG
222		buffer[x] = 0;
223		xf86Msg(X_ERROR, "%s\n", buffer);
224#endif
225#else
226		for (x = 0; x < width; x += 4) {
227			/* fetch 4 mask values */
228			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
229			/* fetch destination pixels */
230			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
231			/* duplicate them for all channels */
232			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
233			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
234			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
235			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
236			/* generate inverted alpha */
237			write_sx_reg(p, SX_INSTRUCTIONS,
238			    SX_XORS(12, 8, 28, 15));
239			/* multiply source */
240			write_sx_reg(p, SX_INSTRUCTIONS,
241			    SX_MUL16X16SR8(8, 12, 44, 3));
242			write_sx_reg(p, SX_INSTRUCTIONS,
243			    SX_MUL16X16SR8(8, 16, 48, 3));
244			write_sx_reg(p, SX_INSTRUCTIONS,
245			    SX_MUL16X16SR8(8, 20, 52, 3));
246			write_sx_reg(p, SX_INSTRUCTIONS,
247			    SX_MUL16X16SR8(8, 24, 56, 3));
248			/* multiply dest */
249			write_sx_reg(p, SX_INSTRUCTIONS,
250			    SX_MUL16X16SR8(28, 60, 76, 15));
251			/* add up */
252			write_sx_reg(p, SX_INSTRUCTIONS,
253			    SX_ADDV(44, 76, 92, 15));
254			/* write back */
255			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
256			dstx += 16;
257			mskx += 4;
258		}
259#endif
260		dst += dstpitch;
261		msk += srcpitch;
262	}
263}
264
265void CG14Comp_Add32(Cg14Ptr p,
266                   uint32_t src, uint32_t srcpitch,
267                   uint32_t dst, uint32_t dstpitch,
268                   int width, int height)
269{
270	int line;
271	uint32_t srcx, dstx;
272	int full, part, x;
273
274	ENTER;
275	full = width >> 3;	/* chunks of 8 */
276	part = width & 7;	/* leftovers */
277	/* we do this up to 8 pixels at a time */
278	for (line = 0; line < height; line++) {
279		srcx = src;
280		dstx = dst;
281		for (x = 0; x < full; x++) {
282			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
283			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
284			write_sx_reg(p, SX_INSTRUCTIONS,
285			    SX_ADDV(8, 40, 72, 15));
286			write_sx_reg(p, SX_INSTRUCTIONS,
287			    SX_ADDV(24, 56, 88, 15));
288			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
289			srcx += 128;
290			dstx += 128;
291		}
292
293		/* do leftovers */
294		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
295		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
296		if (part & 16) {
297			write_sx_reg(p, SX_INSTRUCTIONS,
298			    SX_ADDV(8, 40, 72, 15));
299			write_sx_reg(p, SX_INSTRUCTIONS,
300			    SX_ADDV(24, 56, 88, part - 17));
301		} else {
302			write_sx_reg(p, SX_INSTRUCTIONS,
303			    SX_ADDV(8, 40, 72, part - 1));
304		}
305		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
306
307		/* next line */
308		src += srcpitch;
309		dst += dstpitch;
310	}
311}
312
313void CG14Comp_Add8(Cg14Ptr p,
314                   uint32_t src, uint32_t srcpitch,
315                   uint32_t dst, uint32_t dstpitch,
316                   int width, int height)
317{
318	int line;
319	uint32_t srcx, dstx, srcoff, dstoff;
320	int pre, full, part, x;
321	uint8_t *d;
322	char buffer[256];
323	ENTER;
324
325	srcoff = src & 7;
326	src &= ~7;
327	dstoff = dst & 7;
328	dst &= ~7;
329	full = width >> 5;	/* chunks of 32 */
330	part = width & 31;	/* leftovers */
331
332#ifdef SX_DEBUG
333	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
334	    width, height, full, part);
335#endif
336	/* we do this up to 32 pixels at a time */
337	for (line = 0; line < height; line++) {
338		srcx = src;
339		dstx = dst;
340#ifdef SX_ADD_SOFTWARE
341		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
342		d = (uint8_t *)(p->fb + dstx + dstoff);
343		for (x = 0; x < width; x++) {
344			d[x] = min(255, s[x] + d[x]);
345		}
346#else
347		for (x = 0; x < full; x++) {
348			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
349			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
350			write_sx_reg(p, SX_INSTRUCTIONS,
351			    SX_ADDV(8, 40, 72, 15));
352			write_sx_reg(p, SX_INSTRUCTIONS,
353			    SX_ADDV(24, 56, 88, 15));
354			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
355			srcx += 32;
356			dstx += 32;
357		}
358
359		if (part > 0) {
360			/* do leftovers */
361			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
362			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
363			if (part > 16) {
364				write_sx_reg(p, SX_INSTRUCTIONS,
365				    SX_ADDV(8, 40, 72, 15));
366				write_sx_reg(p, SX_INSTRUCTIONS,
367				    SX_ADDV(24, 56, 88, part - 17));
368			} else {
369				write_sx_reg(p, SX_INSTRUCTIONS,
370				    SX_ADDV(8, 40, 72, part - 1));
371			}
372			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
373		}
374#endif
375#ifdef SX_DEBUG
376		d = (uint8_t *)(p->fb + src + srcoff);
377		for (x = 0; x < width; x++) {
378			buffer[x] = c[d[x]>>5];
379		}
380		buffer[x] = 0;
381		xf86Msg(X_ERROR, "%s\n", buffer);
382#endif
383		/* next line */
384		src += srcpitch;
385		dst += dstpitch;
386	}
387}
388
389void CG14Comp_Over32(Cg14Ptr p,
390                   uint32_t src, uint32_t srcpitch,
391                   uint32_t dst, uint32_t dstpitch,
392                   int width, int height)
393{
394	uint32_t srcx, dstx, m;
395	int line, x, i;
396
397	ENTER;
398
399	write_sx_reg(p, SX_QUEUED(8), 0xff);
400	for (line = 0; line < height; line++) {
401		srcx = src;
402		dstx = dst;
403
404		for (x = 0; x < width; x++) {
405			/* fetch source pixel */
406			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
407			/* fetch dst pixel */
408			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
409			/* src is premultiplied with alpha */
410			/* write inverted alpha into SCAM */
411			write_sx_reg(p, SX_INSTRUCTIONS,
412			    SX_XORV(12, 8, R_SCAM, 0));
413			/* dst * (1 - alpha) + R[13:15] */
414			write_sx_reg(p, SX_INSTRUCTIONS,
415			    SX_SAXP16X16SR8(21, 13, 25, 2));
416			write_sx_io(p, dstx,
417			    SX_STUQ0C(24, 0, dstx & 7));
418			dstx += 4;
419			srcx += 4;
420		}
421		dst += dstpitch;
422		src += srcpitch;
423	}
424}
425
426void CG14Comp_Over32Mask(Cg14Ptr p,
427                   uint32_t src, uint32_t srcpitch,
428                   uint32_t msk, uint32_t mskpitch,
429                   uint32_t dst, uint32_t dstpitch,
430                   int width, int height)
431{
432	uint32_t srcx, dstx, mskx, m;
433	int line, x, i;
434
435	ENTER;
436
437	write_sx_reg(p, SX_QUEUED(8), 0xff);
438	for (line = 0; line < height; line++) {
439		srcx = src;
440		mskx = msk;
441		dstx = dst;
442
443		for (x = 0; x < width; x++) {
444			/* fetch source pixel */
445			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
446			/* fetch mask */
447			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
448			/* fetch dst pixel */
449			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
450			/* apply mask */
451			write_sx_reg(p, SX_INSTRUCTIONS,
452			    SX_ANDS(12, 9, 16, 3));
453			/* src is premultiplied with alpha */
454			/* write inverted alpha into SCAM */
455			write_sx_reg(p, SX_INSTRUCTIONS,
456			    SX_XORV(16, 8, R_SCAM, 0));
457			/* dst * (1 - alpha) + R[13:15] */
458			write_sx_reg(p, SX_INSTRUCTIONS,
459			    SX_SAXP16X16SR8(21, 17, 25, 2));
460			write_sx_io(p, dstx,
461			    SX_STUQ0C(24, 0, dstx & 7));
462			srcx += 4;
463			mskx += 1;
464			dstx += 4;
465		}
466		src += srcpitch;
467		msk += mskpitch;
468		dst += dstpitch;
469	}
470}
471