cg14_render.c revision 239808ba
1/* $NetBSD: cg14_render.c,v 1.7 2013/07/30 21:49:38 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#include <sys/types.h>
33
34/* all driver need this */
35#include "xf86.h"
36#include "xf86_OSproc.h"
37#include "compiler.h"
38
39#include "cg14.h"
40#include <sparc/sxreg.h>
41
42#define SX_SINGLE
43/*#define SX_RENDER_DEBUG*/
44/*#define SX_ADD_SOFTWARE*/
45
46#ifdef SX__RENDER_DEBUG
47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
48#define DPRINTF xf86Msg
49#else
50#define ENTER
51#define DPRINTF while (0) xf86Msg
52#endif
53
54char c[8] = " .,:+*oX";
55
56
57void CG14Comp_Over32Solid(Cg14Ptr p,
58                   uint32_t src, uint32_t srcpitch,
59                   uint32_t dst, uint32_t dstpitch,
60                   int width, int height)
61{
62	uint32_t msk = src, mskx, dstx, m;
63	int line, x, i;
64
65	ENTER;
66
67	for (line = 0; line < height; line++) {
68		mskx = msk;
69		dstx = dst;
70#ifdef SX_SINGLE
71
72		for (x = 0; x < width; x++) {
73			m = *(volatile uint32_t *)(p->fb + mskx);
74			m = m >> 24;
75			if (m == 0) {
76				/* nothing to do - all transparent */
77			} else if (m == 0xff) {
78				/* all opaque */
79				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
80			} else {
81				/* fetch alpha value, stick it into scam */
82				/* mask is in R[12:15] */
83				/*write_sx_io(p, mskx,
84				    SX_LDUQ0(12, 0, mskx & 7));*/
85				write_sx_reg(p, SX_QUEUED(12), m);
86				/* fetch dst pixel */
87				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
88				write_sx_reg(p, SX_INSTRUCTIONS,
89				    SX_ORV(12, 0, R_SCAM, 0));
90				/*
91				 * src * alpha + R0
92				 * R[9:11] * SCAM + R0 -> R[17:19]
93				 */
94				write_sx_reg(p, SX_INSTRUCTIONS,
95				    SX_SAXP16X16SR8(9, 0, 17, 2));
96
97				/* invert SCAM */
98				write_sx_reg(p, SX_INSTRUCTIONS,
99				    SX_XORV(12, 8, R_SCAM, 0));
100#ifdef SX_DEBUG
101				write_sx_reg(p, SX_INSTRUCTIONS,
102				    SX_XORV(12, 8, 13, 0));
103#endif
104				/* dst * (1 - alpha) + R[13:15] */
105				write_sx_reg(p, SX_INSTRUCTIONS,
106				    SX_SAXP16X16SR8(21, 17, 25, 2));
107				write_sx_io(p, dstx,
108				    SX_STUQ0C(24, 0, dstx & 7));
109			}
110			dstx += 4;
111			mskx += 4;
112		}
113#else
114		for (x = 0; x < width; x += 4) {
115			/* fetch 4 mask values */
116			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
117			/* fetch destination pixels */
118			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
119			/* duplicate them for all channels */
120			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
121			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
122			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
123			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
124			/* generate inverted alpha */
125			write_sx_reg(p, SX_INSTRUCTIONS,
126			    SX_XORS(12, 8, 28, 15));
127			/* multiply source */
128			write_sx_reg(p, SX_INSTRUCTIONS,
129			    SX_MUL16X16SR8(8, 12, 44, 3));
130			write_sx_reg(p, SX_INSTRUCTIONS,
131			    SX_MUL16X16SR8(8, 16, 48, 3));
132			write_sx_reg(p, SX_INSTRUCTIONS,
133			    SX_MUL16X16SR8(8, 20, 52, 3));
134			write_sx_reg(p, SX_INSTRUCTIONS,
135			    SX_MUL16X16SR8(8, 24, 56, 3));
136			/* multiply dest */
137			write_sx_reg(p, SX_INSTRUCTIONS,
138			    SX_MUL16X16SR8(28, 60, 76, 15));
139			/* add up */
140			write_sx_reg(p, SX_INSTRUCTIONS,
141			    SX_ADDV(44, 76, 92, 15));
142			/* write back */
143			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
144			dstx += 16;
145			mskx += 16;
146		}
147#endif
148		dst += dstpitch;
149		msk += srcpitch;
150	}
151}
152
153void CG14Comp_Over8Solid(Cg14Ptr p,
154                   uint32_t src, uint32_t srcpitch,
155                   uint32_t dst, uint32_t dstpitch,
156                   int width, int height)
157{
158	uint32_t msk = src, mskx, dstx, m;
159	int line, x, i;
160#ifdef SX_DEBUG
161	char buffer[256];
162#endif
163	ENTER;
164
165	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
166	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
167	    *(uint32_t *)(p->fb + p->srcoff));
168	for (line = 0; line < height; line++) {
169		mskx = msk;
170		dstx = dst;
171#ifdef SX_SINGLE
172
173		for (x = 0; x < width; x++) {
174			m = *(volatile uint8_t *)(p->fb + mskx);
175#ifdef SX_DEBUG
176			buffer[x] = c[m >> 5];
177#endif
178			if (m == 0) {
179				/* nothing to do - all transparent */
180			} else if (m == 0xff) {
181				/* all opaque */
182				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
183			} else {
184				/* fetch alpha value, stick it into scam */
185				/* mask is in R[12:15] */
186				/*write_sx_io(p, mskx & ~7,
187				    SX_LDB(12, 0, mskx & 7));*/
188				write_sx_reg(p, SX_QUEUED(12), m);
189				/* fetch dst pixel */
190				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
191				write_sx_reg(p, SX_INSTRUCTIONS,
192				    SX_ORV(12, 0, R_SCAM, 0));
193				/*
194				 * src * alpha + R0
195				 * R[9:11] * SCAM + R0 -> R[17:19]
196				 */
197				write_sx_reg(p, SX_INSTRUCTIONS,
198				    SX_SAXP16X16SR8(9, 0, 17, 2));
199
200				/* invert SCAM */
201				write_sx_reg(p, SX_INSTRUCTIONS,
202				    SX_XORV(12, 8, R_SCAM, 0));
203#ifdef SX_DEBUG
204				write_sx_reg(p, SX_INSTRUCTIONS,
205				    SX_XORV(12, 8, 13, 0));
206#endif
207				/* dst * (1 - alpha) + R[13:15] */
208				write_sx_reg(p, SX_INSTRUCTIONS,
209				    SX_SAXP16X16SR8(21, 17, 25, 2));
210				write_sx_io(p, dstx,
211				    SX_STUQ0C(24, 0, dstx & 7));
212			}
213			dstx += 4;
214			mskx += 1;
215		}
216#ifdef SX_DEBUG
217		buffer[x] = 0;
218		xf86Msg(X_ERROR, "%s\n", buffer);
219#endif
220#else
221		for (x = 0; x < width; x += 4) {
222			/* fetch 4 mask values */
223			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
224			/* fetch destination pixels */
225			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
226			/* duplicate them for all channels */
227			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
228			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
229			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
230			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
231			/* generate inverted alpha */
232			write_sx_reg(p, SX_INSTRUCTIONS,
233			    SX_XORS(12, 8, 28, 15));
234			/* multiply source */
235			write_sx_reg(p, SX_INSTRUCTIONS,
236			    SX_MUL16X16SR8(8, 12, 44, 3));
237			write_sx_reg(p, SX_INSTRUCTIONS,
238			    SX_MUL16X16SR8(8, 16, 48, 3));
239			write_sx_reg(p, SX_INSTRUCTIONS,
240			    SX_MUL16X16SR8(8, 20, 52, 3));
241			write_sx_reg(p, SX_INSTRUCTIONS,
242			    SX_MUL16X16SR8(8, 24, 56, 3));
243			/* multiply dest */
244			write_sx_reg(p, SX_INSTRUCTIONS,
245			    SX_MUL16X16SR8(28, 60, 76, 15));
246			/* add up */
247			write_sx_reg(p, SX_INSTRUCTIONS,
248			    SX_ADDV(44, 76, 92, 15));
249			/* write back */
250			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
251			dstx += 16;
252			mskx += 4;
253		}
254#endif
255		dst += dstpitch;
256		msk += srcpitch;
257	}
258}
259
260void CG14Comp_Add32(Cg14Ptr p,
261                   uint32_t src, uint32_t srcpitch,
262                   uint32_t dst, uint32_t dstpitch,
263                   int width, int height)
264{
265	int line;
266	uint32_t srcx, dstx;
267	int full, part, x;
268
269	ENTER;
270	full = width >> 3;	/* chunks of 8 */
271	part = width & 7;	/* leftovers */
272	/* we do this up to 8 pixels at a time */
273	for (line = 0; line < height; line++) {
274		srcx = src;
275		dstx = dst;
276		for (x = 0; x < full; x++) {
277			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
278			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
279			write_sx_reg(p, SX_INSTRUCTIONS,
280			    SX_ADDV(8, 40, 72, 15));
281			write_sx_reg(p, SX_INSTRUCTIONS,
282			    SX_ADDV(24, 56, 88, 15));
283			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
284			srcx += 128;
285			dstx += 128;
286		}
287
288		/* do leftovers */
289		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
290		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
291		if (part & 16) {
292			write_sx_reg(p, SX_INSTRUCTIONS,
293			    SX_ADDV(8, 40, 72, 15));
294			write_sx_reg(p, SX_INSTRUCTIONS,
295			    SX_ADDV(24, 56, 88, part - 17));
296		} else {
297			write_sx_reg(p, SX_INSTRUCTIONS,
298			    SX_ADDV(8, 40, 72, part - 1));
299		}
300		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
301
302		/* next line */
303		src += srcpitch;
304		dst += dstpitch;
305	}
306}
307
308void CG14Comp_Add8(Cg14Ptr p,
309                   uint32_t src, uint32_t srcpitch,
310                   uint32_t dst, uint32_t dstpitch,
311                   int width, int height)
312{
313	int line;
314	uint32_t srcx, dstx, srcoff, dstoff;
315	int pre, full, part, x;
316	uint8_t *d;
317	char buffer[256];
318	ENTER;
319
320	srcoff = src & 7;
321	src &= ~7;
322	dstoff = dst & 7;
323	dst &= ~7;
324	full = width >> 5;	/* chunks of 32 */
325	part = width & 31;	/* leftovers */
326
327#ifdef SX_DEBUG
328	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
329	    width, height, full, part);
330#endif
331	/* we do this up to 32 pixels at a time */
332	for (line = 0; line < height; line++) {
333		srcx = src;
334		dstx = dst;
335#ifdef SX_ADD_SOFTWARE
336		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
337		d = (uint8_t *)(p->fb + dstx + dstoff);
338		for (x = 0; x < width; x++) {
339			d[x] = min(255, s[x] + d[x]);
340		}
341#else
342		for (x = 0; x < full; x++) {
343			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
344			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
345			write_sx_reg(p, SX_INSTRUCTIONS,
346			    SX_ADDV(8, 40, 72, 15));
347			write_sx_reg(p, SX_INSTRUCTIONS,
348			    SX_ADDV(24, 56, 88, 15));
349			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
350			srcx += 32;
351			dstx += 32;
352		}
353
354		if (part > 0) {
355			/* do leftovers */
356			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
357			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
358			if (part > 16) {
359				write_sx_reg(p, SX_INSTRUCTIONS,
360				    SX_ADDV(8, 40, 72, 15));
361				write_sx_reg(p, SX_INSTRUCTIONS,
362				    SX_ADDV(24, 56, 88, part - 17));
363			} else {
364				write_sx_reg(p, SX_INSTRUCTIONS,
365				    SX_ADDV(8, 40, 72, part - 1));
366			}
367			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
368		}
369#endif
370#ifdef SX_DEBUG
371		d = (uint8_t *)(p->fb + src + srcoff);
372		for (x = 0; x < width; x++) {
373			buffer[x] = c[d[x]>>5];
374		}
375		buffer[x] = 0;
376		xf86Msg(X_ERROR, "%s\n", buffer);
377#endif
378		/* next line */
379		src += srcpitch;
380		dst += dstpitch;
381	}
382}
383
384void CG14Comp_Over32(Cg14Ptr p,
385                   uint32_t src, uint32_t srcpitch,
386                   uint32_t dst, uint32_t dstpitch,
387                   int width, int height)
388{
389	uint32_t srcx, dstx, m;
390	int line, x, i;
391
392	ENTER;
393
394	write_sx_reg(p, SX_QUEUED(8), 0xff);
395	for (line = 0; line < height; line++) {
396		srcx = src;
397		dstx = dst;
398
399		for (x = 0; x < width; x++) {
400			/* fetch source pixel */
401			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
402			/* fetch dst pixel */
403			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
404			/* src is premultiplied with alpha */
405			/* write inverted alpha into SCAM */
406			write_sx_reg(p, SX_INSTRUCTIONS,
407			    SX_XORV(12, 8, R_SCAM, 0));
408			/* dst * (1 - alpha) + R[13:15] */
409			write_sx_reg(p, SX_INSTRUCTIONS,
410			    SX_SAXP16X16SR8(20, 12, 24, 3));
411			write_sx_io(p, dstx,
412			    SX_STUQ0C(24, 0, dstx & 7));
413			dstx += 4;
414			srcx += 4;
415		}
416		dst += dstpitch;
417		src += srcpitch;
418	}
419}
420
421void CG14Comp_Over32Mask(Cg14Ptr p,
422                   uint32_t src, uint32_t srcpitch,
423                   uint32_t msk, uint32_t mskpitch,
424                   uint32_t dst, uint32_t dstpitch,
425                   int width, int height)
426{
427	uint32_t srcx, dstx, mskx, m;
428	int line, x, i;
429
430	ENTER;
431
432	write_sx_reg(p, SX_QUEUED(8), 0xff);
433	for (line = 0; line < height; line++) {
434		srcx = src;
435		mskx = msk;
436		dstx = dst;
437
438		for (x = 0; x < width; x++) {
439			/* fetch source pixel */
440			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
441			/* fetch mask */
442			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
443			/* fetch dst pixel */
444			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
445			/* stick mask alpha into SCAM */
446			write_sx_reg(p, SX_INSTRUCTIONS,
447			    SX_ORS(9, 0, R_SCAM, 0));
448			/* apply mask */
449			/* src is premultiplied with alpha */
450			write_sx_reg(p, SX_INSTRUCTIONS,
451			    SX_SAXP16X16SR8(12, 0, 16, 3));
452			/* write inverted alpha into SCAM */
453			write_sx_reg(p, SX_INSTRUCTIONS,
454			    SX_XORV(16, 8, R_SCAM, 0));
455			/* dst * (1 - alpha) + R[13:15] */
456			write_sx_reg(p, SX_INSTRUCTIONS,
457			    SX_SAXP16X16SR8(20, 16, 24, 3));
458			write_sx_io(p, dstx,
459			    SX_STUQ0C(24, 0, dstx & 7));
460			srcx += 4;
461			mskx += 1;
462			dstx += 4;
463		}
464		src += srcpitch;
465		msk += mskpitch;
466		dst += dstpitch;
467	}
468}
469
470void CG14Comp_Over32Mask_noalpha(Cg14Ptr p,
471                   uint32_t src, uint32_t srcpitch,
472                   uint32_t msk, uint32_t mskpitch,
473                   uint32_t dst, uint32_t dstpitch,
474                   int width, int height)
475{
476	uint32_t srcx, dstx, mskx, m;
477	int line, x, i;
478
479	ENTER;
480
481	write_sx_reg(p, SX_QUEUED(8), 0xff);
482	for (line = 0; line < height; line++) {
483		srcx = src;
484		mskx = msk;
485		dstx = dst;
486
487		for (x = 0; x < width; x++) {
488			/* fetch source pixel */
489			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
490			/* set src alpha to 0xff */
491			write_sx_reg(p, SX_INSTRUCTIONS,
492			    SX_ORS(8, 0, 12, 0));
493			/* fetch mask */
494			write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7));
495			/* fetch dst pixel */
496			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
497			/* write alpha into SCAM */
498			write_sx_reg(p, SX_INSTRUCTIONS,
499			    SX_ORS(9, 0, R_SCAM, 0));
500			/* src * alpha + R0 */
501			write_sx_reg(p, SX_INSTRUCTIONS,
502			    SX_SAXP16X16SR8(12, 0, 16, 3));
503			/* write inverted alpha into SCAM */
504			write_sx_reg(p, SX_INSTRUCTIONS,
505			    SX_XORV(9, 8, R_SCAM, 0));
506			/* dst * (1 - alpha) + R[13:15] */
507			write_sx_reg(p, SX_INSTRUCTIONS,
508			    SX_SAXP16X16SR8(20, 16, 24, 3));
509			write_sx_io(p, dstx,
510			    SX_STUQ0C(24, 0, dstx & 7));
511			srcx += 4;
512			mskx += 1;
513			dstx += 4;
514		}
515		src += srcpitch;
516		msk += mskpitch;
517		dst += dstpitch;
518	}
519}
520
521void CG14Comp_Over32Mask32_noalpha(Cg14Ptr p,
522                   uint32_t src, uint32_t srcpitch,
523                   uint32_t msk, uint32_t mskpitch,
524                   uint32_t dst, uint32_t dstpitch,
525                   int width, int height)
526{
527	uint32_t srcx, dstx, mskx, m;
528	int line, x, i;
529
530	ENTER;
531
532	write_sx_reg(p, SX_QUEUED(8), 0xff);
533	for (line = 0; line < height; line++) {
534		srcx = src;
535		mskx = msk;
536		dstx = dst;
537
538		for (x = 0; x < width; x++) {
539			/* fetch source pixel */
540			write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7));
541			/* fetch mask */
542			write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7));
543			/* fetch dst pixel */
544			write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
545			/* set src alpha to 0xff */
546			write_sx_reg(p, SX_INSTRUCTIONS,
547			    SX_ORS(8, 0, 12, 0));
548			/* mask alpha to SCAM */
549			write_sx_reg(p, SX_INSTRUCTIONS,
550			    SX_ORS(16, 0, R_SCAM, 0));
551			/* src * alpha */
552			write_sx_reg(p, SX_INSTRUCTIONS,
553			    SX_SAXP16X16SR8(12, 0, 24, 3));
554			/* write inverted alpha into SCAM */
555			write_sx_reg(p, SX_INSTRUCTIONS,
556			    SX_XORS(16, 8, R_SCAM, 0));
557			/* dst * (1 - alpha) + R[24:31] */
558			write_sx_reg(p, SX_INSTRUCTIONS,
559			    SX_SAXP16X16SR8(20, 24, 28, 3));
560			write_sx_io(p, dstx,
561			    SX_STUQ0C(28, 0, dstx & 7));
562			srcx += 4;
563			mskx += 4;
564			dstx += 4;
565		}
566		src += srcpitch;
567		msk += mskpitch;
568		dst += dstpitch;
569	}
570}
571