cg14_accel.c revision fc473876
1/* $NetBSD: cg14_accel.c,v 1.14 2019/03/01 02:22:27 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include <sys/types.h>
37
38/* all driver need this */
39#include "xf86.h"
40#include "xf86_OSproc.h"
41#include "compiler.h"
42
43#include "cg14.h"
44#include <sparc/sxreg.h>
45
46/*#define SX_DEBUG*/
47
48#ifdef SX_DEBUG
49#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
50#define DPRINTF xf86Msg
51#else
52#define ENTER
53#define DPRINTF while (0) xf86Msg
54#endif
55
56#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
57
58/* 0xcc is SX's GXcopy equivalent */
59uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
60		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
61
62int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
63		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
64int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
65
66static void CG14Copy32(PixmapPtr, int, int, int, int, int, int);
67static void CG14Copy8(PixmapPtr, int, int, int, int, int, int);
68
69static inline void
70CG14Wait(Cg14Ptr p)
71{
72	int bail = 10000000;
73	/* we wait for the busy bit to clear */
74	while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) &&
75	       (bail > 0)) {
76		bail--;
77	};
78	if (bail == 0) {
79		xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n",
80		    read_sx_reg(p, SX_CONTROL_STATUS),
81		    read_sx_reg(p, SX_ERROR));
82	}
83}
84
85static void
86CG14WaitMarker(ScreenPtr pScreen, int Marker)
87{
88	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
89	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
90
91	CG14Wait(p);
92}
93
94static Bool
95CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
96		int xdir, int ydir, int alu, Pixel planemask)
97{
98	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
99	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
100
101	ENTER;
102	DPRINTF(X_ERROR, "bits per pixel: %d\n",
103	    pSrcPixmap->drawable.bitsPerPixel);
104
105	if (planemask != p->last_mask) {
106		CG14Wait(p);
107		write_sx_reg(p, SX_PLANEMASK, planemask);
108		p->last_mask = planemask;
109	}
110	alu = sx_rop[alu];
111	if (alu != p->last_rop) {
112		CG14Wait(p);
113		write_sx_reg(p, SX_ROP_CONTROL, alu);
114		p->last_rop = alu;
115	}
116	switch (pSrcPixmap->drawable.bitsPerPixel)  {
117		case 8:
118			p->pExa->Copy = CG14Copy8;
119			break;
120		case 32:
121			p->pExa->Copy = CG14Copy32;
122			break;
123		default:
124			xf86Msg(X_ERROR, "%s depth %d\n", __func__,
125			    pSrcPixmap->drawable.bitsPerPixel);
126	}
127	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
128	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
129	p->xdir = xdir;
130	p->ydir = ydir;
131	return TRUE;
132}
133
134static void
135CG14Copy32(PixmapPtr pDstPixmap,
136         int srcX, int srcY, int dstX, int dstY, int w, int h)
137{
138	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
139	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
140	int dstpitch, dstoff, srcpitch, srcoff;
141	int srcstart, dststart, xinc, srcinc, dstinc;
142	int line, count, s, d, num;
143
144	ENTER;
145	dstpitch = exaGetPixmapPitch(pDstPixmap);
146	dstoff = exaGetPixmapOffset(pDstPixmap);
147	srcpitch = p->srcpitch;
148	srcoff = p->srcoff;
149	/*
150	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
151	 * actually wrote anything and only sync if it did
152	 */
153	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
154	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
155
156	/*
157	 * we always copy up to 32 pixels at a time so direction doesn't
158	 * matter if w<=32
159	 */
160	if (w > 32) {
161		if (p->xdir < 0) {
162			srcstart += (w - 32) << 2;
163			dststart += (w - 32) << 2;
164			xinc = -128;
165		} else
166			xinc = 128;
167	} else
168		xinc = 128;
169	if (p->ydir < 0) {
170		srcstart += (h - 1) * srcpitch;
171		dststart += (h - 1) * dstpitch;
172		srcinc = -srcpitch;
173		dstinc = -dstpitch;
174	} else {
175		srcinc = srcpitch;
176		dstinc = dstpitch;
177	}
178	if (p->last_rop == 0xcc) {
179		/* plain old copy */
180		if ( xinc > 0) {
181			/* going left to right */
182			for (line = 0; line < h; line++) {
183				count = 0;
184				s = srcstart;
185				d = dststart;
186				while ( count < w) {
187					num = min(32, w - count);
188					write_sx_io(p, s,
189					    SX_LD(10, num - 1, s & 7));
190					write_sx_io(p, d,
191					    SX_STM(10, num - 1, d & 7));
192					s += xinc;
193					d += xinc;
194					count += 32;
195				}
196				srcstart += srcinc;
197				dststart += dstinc;
198			}
199		} else {
200			/* going right to left */
201			int i, chunks = (w >> 5);
202			for (line = 0; line < h; line++) {
203				s = srcstart;
204				d = dststart;
205				count = w;
206				for (i = 0; i < chunks; i++) {
207					write_sx_io(p, s,
208					    SX_LD(10, 31, s & 7));
209					write_sx_io(p, d,
210					    SX_STM(10, 31, d & 7));
211					s -= 128;
212					d -= 128;
213					count -= 32;
214				}
215				/* leftovers, if any */
216				if (count > 0) {
217					s += (32 - count) << 2;
218					d += (32 - count) << 2;
219					write_sx_io(p, s,
220					    SX_LD(10, count - 1, s & 7));
221					write_sx_io(p, d,
222					    SX_STM(10, count - 1, d & 7));
223				}
224				srcstart += srcinc;
225				dststart += dstinc;
226			}
227		}
228	} else {
229		/* ROPs needed */
230		if ( xinc > 0) {
231			/* going left to right */
232			for (line = 0; line < h; line++) {
233				count = 0;
234				s = srcstart;
235				d = dststart;
236				while ( count < w) {
237					num = min(32, w - count);
238					write_sx_io(p, s,
239					    SX_LD(10, num - 1, s & 7));
240					write_sx_io(p, d,
241					    SX_LD(42, num - 1, d & 7));
242					if (num > 16) {
243						write_sx_reg(p, SX_INSTRUCTIONS,
244					    	 SX_ROP(10, 42, 74, 15));
245						write_sx_reg(p, SX_INSTRUCTIONS,
246					    	 SX_ROP(26, 58, 90, num - 17));
247					} else {
248						write_sx_reg(p, SX_INSTRUCTIONS,
249					    	 SX_ROP(10, 42, 74, num - 1));
250					}
251					write_sx_io(p, d,
252					    SX_STM(74, num - 1, d & 7));
253					s += xinc;
254					d += xinc;
255					count += 32;
256				}
257				srcstart += srcinc;
258				dststart += dstinc;
259			}
260		} else {
261			/* going right to left */
262			int i, chunks = (w >> 5);
263			for (line = 0; line < h; line++) {
264				s = srcstart;
265				d = dststart;
266				count = w;
267				for (i = 0; i < chunks; i++) {
268					write_sx_io(p, s, SX_LD(10, 31, s & 7));
269					write_sx_io(p, d, SX_LD(42, 31, d & 7));
270					write_sx_reg(p, SX_INSTRUCTIONS,
271				    	    SX_ROP(10, 42, 74, 15));
272					write_sx_reg(p, SX_INSTRUCTIONS,
273				    	    SX_ROP(26, 58, 90, 15));
274					write_sx_io(p, d,
275					    SX_STM(74, 31, d & 7));
276					s -= 128;
277					d -= 128;
278					count -= 32;
279				}
280				/* leftovers, if any */
281				if (count > 0) {
282					s += (32 - count) << 2;
283					d += (32 - count) << 2;
284					write_sx_io(p, s,
285					    SX_LD(10, count - 1, s & 7));
286					write_sx_io(p, d,
287					    SX_LD(42, count - 1, d & 7));
288					if (count > 16) {
289						write_sx_reg(p, SX_INSTRUCTIONS,
290					    	    SX_ROP(10, 42, 74, 15));
291						write_sx_reg(p, SX_INSTRUCTIONS,
292					    	 SX_ROP(26, 58, 90, count - 17));
293					} else {
294						write_sx_reg(p, SX_INSTRUCTIONS,
295					    	 SX_ROP(10, 42, 74, count - 1));
296					}
297
298					write_sx_io(p, d,
299					    SX_STM(74, count - 1, d & 7));
300				}
301				srcstart += srcinc;
302				dststart += dstinc;
303			}
304		}
305	}
306	exaMarkSync(pDstPixmap->drawable.pScreen);
307}
308
309static void
310CG14Copy8(PixmapPtr pDstPixmap,
311         int srcX, int srcY, int dstX, int dstY, int w, int h)
312{
313	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
314	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
315	int dstpitch, dstoff, srcpitch, srcoff;
316	int srcstart, dststart, xinc, srcinc, dstinc;
317	int line, count, s, d, num;
318
319	ENTER;
320	dstpitch = exaGetPixmapPitch(pDstPixmap);
321	dstoff = exaGetPixmapOffset(pDstPixmap);
322	srcpitch = p->srcpitch;
323	srcoff = p->srcoff;
324	/*
325	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
326	 * actually wrote anything and only sync if it did
327	 */
328	srcstart = srcX + (srcpitch * srcY) + srcoff;
329	dststart = dstX + (dstpitch * dstY) + dstoff;
330
331	/*
332	 * we always copy up to 32 pixels at a time so direction doesn't
333	 * matter if w<=32
334	 */
335	if (w > 32) {
336		if (p->xdir < 0) {
337			srcstart += (w - 32);
338			dststart += (w - 32);
339			xinc = -32;
340		} else
341			xinc = 32;
342	} else
343		xinc = 32;
344	if (p->ydir < 0) {
345		srcstart += (h - 1) * srcpitch;
346		dststart += (h - 1) * dstpitch;
347		srcinc = -srcpitch;
348		dstinc = -dstpitch;
349	} else {
350		srcinc = srcpitch;
351		dstinc = dstpitch;
352	}
353	if (p->last_rop == 0xcc) {
354		/* plain old copy */
355		if ( xinc > 0) {
356			/* going left to right */
357			for (line = 0; line < h; line++) {
358				count = 0;
359				s = srcstart;
360				d = dststart;
361				while ( count < w) {
362					num = min(32, w - count);
363					write_sx_io(p, s,
364					    SX_LDB(10, num - 1, s & 7));
365					write_sx_io(p, d,
366					    SX_STBM(10, num - 1, d & 7));
367					s += xinc;
368					d += xinc;
369					count += 32;
370				}
371				srcstart += srcinc;
372				dststart += dstinc;
373			}
374		} else {
375			/* going right to left */
376			int i, chunks = (w >> 5);
377			for (line = 0; line < h; line++) {
378				s = srcstart;
379				d = dststart;
380				count = w;
381				for (i = 0; i < chunks; i++) {
382					write_sx_io(p, s,
383					    SX_LDB(10, 31, s & 7));
384					write_sx_io(p, d,
385					    SX_STBM(10, 31, d & 7));
386					s -= 32;
387					d -= 32;
388					count -= 32;
389				}
390				/* leftovers, if any */
391				if (count > 0) {
392					s += (32 - count);
393					d += (32 - count);
394					write_sx_io(p, s,
395					    SX_LDB(10, count - 1, s & 7));
396					write_sx_io(p, d,
397					    SX_STBM(10, count - 1, d & 7));
398				}
399				srcstart += srcinc;
400				dststart += dstinc;
401			}
402		}
403	} else {
404		/* ROPs needed */
405		if ( xinc > 0) {
406			/* going left to right */
407			for (line = 0; line < h; line++) {
408				count = 0;
409				s = srcstart;
410				d = dststart;
411				while ( count < w) {
412					num = min(32, w - count);
413					write_sx_io(p, s,
414					    SX_LDB(10, num - 1, s & 7));
415					write_sx_io(p, d,
416					    SX_LDB(42, num - 1, d & 7));
417					if (num > 16) {
418						write_sx_reg(p, SX_INSTRUCTIONS,
419					    	 SX_ROP(10, 42, 74, 15));
420						write_sx_reg(p, SX_INSTRUCTIONS,
421					    	 SX_ROP(26, 58, 90, num - 17));
422					} else {
423						write_sx_reg(p, SX_INSTRUCTIONS,
424					    	 SX_ROP(10, 42, 74, num - 1));
425					}
426					write_sx_io(p, d,
427					    SX_STBM(74, num - 1, d & 7));
428					s += xinc;
429					d += xinc;
430					count += 32;
431				}
432				srcstart += srcinc;
433				dststart += dstinc;
434			}
435		} else {
436			/* going right to left */
437			int i, chunks = (w >> 5);
438			for (line = 0; line < h; line++) {
439				s = srcstart;
440				d = dststart;
441				count = w;
442				for (i = 0; i < chunks; i++) {
443					write_sx_io(p, s, SX_LDB(10, 31, s & 7));
444					write_sx_io(p, d, SX_LDB(42, 31, d & 7));
445					write_sx_reg(p, SX_INSTRUCTIONS,
446				    	    SX_ROP(10, 42, 74, 15));
447					write_sx_reg(p, SX_INSTRUCTIONS,
448				    	    SX_ROP(26, 58, 90, 15));
449					write_sx_io(p, d,
450					    SX_STBM(74, 31, d & 7));
451					s -= 128;
452					d -= 128;
453					count -= 32;
454				}
455				/* leftovers, if any */
456				if (count > 0) {
457					s += (32 - count);
458					d += (32 - count);
459					write_sx_io(p, s,
460					    SX_LDB(10, count - 1, s & 7));
461					write_sx_io(p, d,
462					    SX_LDB(42, count - 1, d & 7));
463					if (count > 16) {
464						write_sx_reg(p, SX_INSTRUCTIONS,
465					    	    SX_ROP(10, 42, 74, 15));
466						write_sx_reg(p, SX_INSTRUCTIONS,
467					    	 SX_ROP(26, 58, 90, count - 17));
468					} else {
469						write_sx_reg(p, SX_INSTRUCTIONS,
470					    	 SX_ROP(10, 42, 74, count - 1));
471					}
472
473					write_sx_io(p, d,
474					    SX_STBM(74, count - 1, d & 7));
475				}
476				srcstart += srcinc;
477				dststart += dstinc;
478			}
479		}
480	}
481	exaMarkSync(pDstPixmap->drawable.pScreen);
482}
483
484static void
485CG14DoneCopy(PixmapPtr pDstPixmap)
486{
487}
488
489static Bool
490CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
491{
492	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
493	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
494
495	ENTER;
496	DPRINTF(X_ERROR, "bits per pixel: %d\n",
497	    pPixmap->drawable.bitsPerPixel);
498	write_sx_reg(p, SX_QUEUED(8), fg);
499	write_sx_reg(p, SX_QUEUED(9), fg);
500	if (planemask != p->last_mask) {
501		CG14Wait(p);
502		write_sx_reg(p, SX_PLANEMASK, planemask);
503		p->last_mask = planemask;
504	}
505	alu = sx_rop[alu];
506	if (alu != p->last_rop) {
507		CG14Wait(p);
508		write_sx_reg(p, SX_ROP_CONTROL, alu);
509		p->last_rop = alu;
510	}
511	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
512	return TRUE;
513}
514
515static void
516CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
517{
518	int line, x, num;
519	uint32_t ptr;
520
521	ENTER;
522	if (p->last_rop == 0xcc) {
523		/* simple fill */
524		for (line = 0; line < h; line++) {
525			x = 0;
526			while (x < w) {
527				ptr = start + (x << 2);
528				num = min(32, w - x);
529				write_sx_io(p, ptr,
530				    SX_STS(8, num - 1, ptr & 7));
531				x += 32;
532			}
533			start += pitch;
534		}
535	} else if (p->last_rop == 0xaa) {
536		/* nothing to do here */
537		return;
538	} else {
539		/* alright, let's do actual ROP stuff */
540
541		/* first repeat the fill colour into 16 registers */
542		write_sx_reg(p, SX_INSTRUCTIONS,
543		    SX_SELECT_S(8, 8, 10, 15));
544
545		for (line = 0; line < h; line++) {
546			x = 0;
547			while (x < w) {
548				ptr = start + (x << 2);
549				num = min(32, w - x);
550				/* now suck fb data into registers */
551				write_sx_io(p, ptr,
552				    SX_LD(42, num - 1, ptr & 7));
553				/*
554				 * ROP them with the fill data we left in 10
555				 * non-memory ops can only have counts up to 16
556				 */
557				if (num <= 16) {
558					write_sx_reg(p, SX_INSTRUCTIONS,
559					    SX_ROP(10, 42, 74, num - 1));
560				} else {
561					write_sx_reg(p, SX_INSTRUCTIONS,
562					    SX_ROP(10, 42, 74, 15));
563					write_sx_reg(p, SX_INSTRUCTIONS,
564					    SX_ROP(10, 58, 90, num - 17));
565				}
566				/* and write the result back into memory */
567				write_sx_io(p, ptr,
568				    SX_ST(74, num - 1, ptr & 7));
569				x += 32;
570			}
571			start += pitch;
572		}
573	}
574}
575
576static void
577CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
578{
579	int line, x, num, off;
580	uint32_t ptr;
581
582	ENTER;
583	off = start & 7;
584	start &= ~7;
585
586	if (p->last_rop == 0xcc) {
587		/* simple fill */
588		for (line = 0; line < h; line++) {
589			x = 0;
590			while (x < w) {
591				ptr = start + x;
592				num = min(32, w - x);
593				write_sx_io(p, ptr,
594				    SX_STBS(8, num - 1, off));
595				x += 32;
596			}
597			start += pitch;
598		}
599	} else if (p->last_rop == 0xaa) {
600		/* nothing to do here */
601		return;
602	} else {
603		/* alright, let's do actual ROP stuff */
604
605		/* first repeat the fill colour into 16 registers */
606		write_sx_reg(p, SX_INSTRUCTIONS,
607		    SX_SELECT_S(8, 8, 10, 15));
608
609		for (line = 0; line < h; line++) {
610			x = 0;
611			while (x < w) {
612				ptr = start + x;
613				num = min(32, w - x);
614				/* now suck fb data into registers */
615				write_sx_io(p, ptr,
616				    SX_LDB(42, num - 1, off));
617				/*
618				 * ROP them with the fill data we left in 10
619				 * non-memory ops can only have counts up to 16
620				 */
621				if (num <= 16) {
622					write_sx_reg(p, SX_INSTRUCTIONS,
623					    SX_ROP(10, 42, 74, num - 1));
624				} else {
625					write_sx_reg(p, SX_INSTRUCTIONS,
626					    SX_ROP(10, 42, 74, 15));
627					write_sx_reg(p, SX_INSTRUCTIONS,
628					    SX_ROP(10, 58, 90, num - 17));
629				}
630				/* and write the result back into memory */
631				write_sx_io(p, ptr,
632				    SX_STB(74, num - 1, off));
633				x += 32;
634			}
635			start += pitch;
636		}
637	}
638}
639
640static void
641CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
642{
643	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
644	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
645	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
646	int start, depth;
647
648	ENTER;
649	dstpitch = exaGetPixmapPitch(pPixmap);
650	dstoff = exaGetPixmapOffset(pPixmap);
651
652	depth = pPixmap->drawable.bitsPerPixel;
653	switch (depth) {
654		case 32:
655			start = dstoff + (y1 * dstpitch) + (x1 << 2);
656			CG14Solid32(p, start, dstpitch, w, h);
657			break;
658		case 8:
659			start = dstoff + (y1 * dstpitch) + x1;
660			CG14Solid8(p, start, dstpitch, w, h);
661			break;
662	}
663
664	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
665	    dstpitch, dstoff, start);
666	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
667	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
668	exaMarkSync(pPixmap->drawable.pScreen);
669}
670
671/*
672 * Memcpy-based UTS.
673 */
674static Bool
675CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
676    char *src, int src_pitch)
677{
678	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
679	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
680	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
681	int    dst_pitch  = exaGetPixmapPitch(pDst);
682
683	int bpp    = pDst->drawable.bitsPerPixel;
684	int cpp    = (bpp + 7) >> 3;
685	int wBytes = w * cpp;
686
687	ENTER;
688	DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
689	dst += (x * cpp) + (y * dst_pitch);
690
691	CG14Wait(p);
692
693	while (h--) {
694		memcpy(dst, src, wBytes);
695		src += src_pitch;
696		dst += dst_pitch;
697	}
698	__asm("stbar;");
699	return TRUE;
700}
701
702/*
703 * Memcpy-based DFS.
704 */
705static Bool
706CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
707    char *dst, int dst_pitch)
708{
709	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
710	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
711	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
712	int    src_pitch  = exaGetPixmapPitch(pSrc);
713
714	ENTER;
715	int bpp    = pSrc->drawable.bitsPerPixel;
716	int cpp    = (bpp + 7) >> 3;
717	int wBytes = w * cpp;
718
719	src += (x * cpp) + (y * src_pitch);
720
721	CG14Wait(p);
722
723	while (h--) {
724		memcpy(dst, src, wBytes);
725		src += src_pitch;
726		dst += dst_pitch;
727	}
728
729	return TRUE;
730}
731
732Bool
733CG14CheckComposite(int op, PicturePtr pSrcPicture,
734                           PicturePtr pMaskPicture,
735                           PicturePtr pDstPicture)
736{
737	int i, ok = FALSE;
738
739	ENTER;
740
741	/*
742	 * SX is in theory capable of accelerating pretty much all Xrender ops,
743	 * even coordinate transformation and gradients. Support will be added
744	 * over time and likely have to spill over into its own source file.
745	 */
746
747	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
748		DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op);
749		return FALSE;
750	}
751
752	if (pSrcPicture != NULL) {
753		i = 0;
754		while ((i < arraysize(src_formats)) && (!ok)) {
755			ok =  (pSrcPicture->format == src_formats[i]);
756			i++;
757		}
758
759		if (!ok) {
760			DPRINTF(X_ERROR, "%s: unsupported src format %x\n",
761			    __func__, pSrcPicture->format);
762			return FALSE;
763		}
764		DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op);
765	}
766
767	if (pDstPicture != NULL) {
768		i = 0;
769		ok = FALSE;
770		while ((i < arraysize(src_formats)) && (!ok)) {
771			ok =  (pDstPicture->format == src_formats[i]);
772			i++;
773		}
774
775		if (!ok) {
776			DPRINTF(X_ERROR, "%s: unsupported dst format %x\n",
777			    __func__, pDstPicture->format);
778			return FALSE;
779		}
780		DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op);
781	}
782
783	if (pMaskPicture != NULL) {
784		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
785		    pMaskPicture->pDrawable->width,
786		    pMaskPicture->pDrawable->height);
787	}
788	return TRUE;
789}
790
791Bool
792CG14PrepareComposite(int op, PicturePtr pSrcPicture,
793                             PicturePtr pMaskPicture,
794                             PicturePtr pDstPicture,
795                             PixmapPtr  pSrc,
796                             PixmapPtr  pMask,
797                             PixmapPtr  pDst)
798{
799	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
800	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
801
802	ENTER;
803
804	p->no_source_pixmap = FALSE;
805	p->source_is_solid = FALSE;
806
807	if (pSrcPicture->format == PICT_a1) {
808		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
809		    pDstPicture->format, op);
810		if (pMaskPicture != NULL) {
811			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
812		}
813	}
814	if (pSrcPicture->pSourcePict != NULL) {
815		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
816			p->fillcolour =
817			    pSrcPicture->pSourcePict->solidFill.color;
818			DPRINTF(X_ERROR, "%s: solid src %08x\n",
819			    __func__, p->fillcolour);
820			p->no_source_pixmap = TRUE;
821			p->source_is_solid = TRUE;
822		}
823	}
824	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
825		if (pMaskPicture->pSourcePict->type ==
826		    SourcePictTypeSolidFill) {
827			p->fillcolour =
828			   pMaskPicture->pSourcePict->solidFill.color;
829			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
830			    __func__, p->fillcolour);
831		}
832	}
833	if (pMaskPicture != NULL) {
834		p->mskoff = exaGetPixmapOffset(pMask);
835		p->mskpitch = exaGetPixmapPitch(pMask);
836		p->mskformat = pMaskPicture->format;
837	} else {
838		p->mskoff = 0;
839		p->mskpitch = 0;
840		p->mskformat = 0;
841	}
842	if (pSrc != NULL) {
843		p->source_is_solid =
844		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
845		p->srcoff = exaGetPixmapOffset(pSrc);
846		p->srcpitch = exaGetPixmapPitch(pSrc);
847		if (p->source_is_solid) {
848			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
849		}
850	}
851	p->srcformat = pSrcPicture->format;
852	p->dstformat = pDstPicture->format;
853
854	if (p->source_is_solid) {
855		uint32_t temp;
856
857		/* stuff source colour into SX registers, swap as needed */
858		temp = p->fillcolour;
859		switch (p->srcformat) {
860			case PICT_a8r8g8b8:
861			case PICT_x8r8g8b8:
862				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
863				temp = temp >> 8;
864				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
865				temp = temp >> 8;
866				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
867				break;
868			case PICT_a8b8g8r8:
869			case PICT_x8b8g8r8:
870				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
871				temp = temp >> 8;
872				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
873				temp = temp >> 8;
874				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
875				break;
876		}
877		write_sx_reg(p, SX_QUEUED(8), 0xff);
878	}
879	p->op = op;
880	if (op == PictOpSrc) {
881		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
882	}
883#ifdef SX_DEBUG
884	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
885	    *(uint32_t *)(p->fb + p->srcoff));
886#endif
887	return TRUE;
888}
889
890void
891CG14Composite(PixmapPtr pDst, int srcX, int srcY,
892                              int maskX, int maskY,
893                              int dstX, int dstY,
894                              int width, int height)
895{
896	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
897	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
898	uint32_t dstoff, dstpitch;
899	uint32_t dst, msk, src;
900	int flip = 0;
901
902	ENTER;
903	dstoff = exaGetPixmapOffset(pDst);
904	dstpitch = exaGetPixmapPitch(pDst);
905
906	flip = (PICT_FORMAT_TYPE(p->srcformat) !=
907		PICT_FORMAT_TYPE(p->dstformat));
908
909	switch (p->op) {
910		case PictOpOver:
911			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
912			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
913			    p->mskformat, p->dstformat, srcX, srcY);
914			if (p->source_is_solid) {
915				switch (p->mskformat) {
916					case PICT_a8:
917						msk = p->mskoff +
918						    (maskY * p->mskpitch) +
919						    maskX;
920						CG14Comp_Over8Solid(p,
921						    msk, p->mskpitch,
922						    dst, dstpitch,
923						    width, height);
924						break;
925					case PICT_a8r8g8b8:
926					case PICT_a8b8g8r8:
927						msk = p->mskoff +
928						    (maskY * p->mskpitch) +
929						    (maskX << 2);
930						CG14Comp_Over32Solid(p,
931						    msk, p->mskpitch,
932						    dst, dstpitch,
933						    width, height);
934						break;
935					default:
936						xf86Msg(X_ERROR,
937						  "unsupported mask format %08x\n", p->mskformat);
938				}
939			} else {
940				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
941				    p->mskformat);
942				switch (p->srcformat) {
943					case PICT_a8r8g8b8:
944					case PICT_a8b8g8r8:
945						src = p->srcoff +
946						    (srcY * p->srcpitch) +
947						    (srcX << 2);
948						dst = dstoff +
949						    (dstY * dstpitch) +
950						    (dstX << 2);
951						if (p->mskformat == PICT_a8) {
952							msk = p->mskoff +
953							    (maskY * p->mskpitch) +
954							    maskX;
955							CG14Comp_Over32Mask(p,
956							    src, p->srcpitch,
957							    msk, p->mskpitch,
958							    dst, dstpitch,
959							    width, height, flip);
960						} else {
961							CG14Comp_Over32(p,
962							    src, p->srcpitch,
963							    dst, dstpitch,
964							    width, height, flip);
965						}
966						break;
967					case PICT_x8r8g8b8:
968					case PICT_x8b8g8r8:
969						src = p->srcoff +
970						    (srcY * p->srcpitch) +
971						    (srcX << 2);
972						dst = dstoff +
973						    (dstY * dstpitch) +
974						    (dstX << 2);
975						if (p->mskformat == PICT_a8) {
976							msk = p->mskoff +
977							    (maskY * p->mskpitch) +
978							    maskX;
979							CG14Comp_Over32Mask_noalpha(p,
980							    src, p->srcpitch,
981							    msk, p->mskpitch,
982							    dst, dstpitch,
983							    width, height, flip);
984						} else if ((p->mskformat == PICT_a8r8g8b8) ||
985							   (p->mskformat == PICT_a8b8g8r8)) {
986							msk = p->mskoff +
987							    (maskY * p->mskpitch) +
988							    (maskX << 2);
989							CG14Comp_Over32Mask32_noalpha(p,
990							    src, p->srcpitch,
991							    msk, p->mskpitch,
992							    dst, dstpitch,
993							    width, height, flip);
994						} else {
995							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
996						}
997						break;
998					default:
999						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
1000						    __func__, p->srcformat);
1001				}
1002			}
1003			break;
1004		case PictOpAdd:
1005			DPRINTF(X_ERROR, "Add %08x %08x\n",
1006			    p->srcformat, p->dstformat);
1007			switch (p->srcformat) {
1008				case PICT_a8:
1009					src = p->srcoff +
1010					    (srcY * p->srcpitch) + srcX;
1011					if (p->dstformat == PICT_a8) {
1012						dst = dstoff +
1013						      (dstY * dstpitch) + dstX;
1014						CG14Comp_Add8(p,
1015						    src, p->srcpitch,
1016						    dst, dstpitch,
1017						    width, height);
1018					} else {
1019						dst = dstoff +
1020						      (dstY * dstpitch) +
1021						      (dstX << 2);
1022						CG14Comp_Add8_32(p,
1023						    src, p->srcpitch,
1024						    dst, dstpitch,
1025						    width, height);
1026					}
1027					break;
1028				case PICT_a8r8g8b8:
1029				case PICT_x8r8g8b8:
1030					src = p->srcoff +
1031					    (srcY * p->srcpitch) + (srcX << 2);
1032					dst = dstoff + (dstY * dstpitch) +
1033					    (dstX << 2);
1034					CG14Comp_Add32(p, src, p->srcpitch,
1035					    dst, dstpitch, width, height);
1036					break;
1037				default:
1038					xf86Msg(X_ERROR,
1039					    "unsupported src format\n");
1040			}
1041			break;
1042		case PictOpSrc:
1043			DPRINTF(X_ERROR, "Src %08x %08x\n",
1044			    p->srcformat, p->dstformat);
1045			if (p->mskformat != 0)
1046				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
1047			if (p->srcformat == PICT_a8) {
1048				CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height);
1049			} else {
1050				/* convert between RGB and BGR? */
1051				CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height);
1052			}
1053			break;
1054		default:
1055			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
1056	}
1057	exaMarkSync(pDst->drawable.pScreen);
1058}
1059
1060
1061
1062Bool
1063CG14InitAccel(ScreenPtr pScreen)
1064{
1065	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1066	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1067	ExaDriverPtr pExa;
1068
1069	pExa = exaDriverAlloc();
1070	if (!pExa)
1071		return FALSE;
1072
1073	p->pExa = pExa;
1074
1075	pExa->exa_major = EXA_VERSION_MAJOR;
1076	pExa->exa_minor = EXA_VERSION_MINOR;
1077
1078	pExa->memoryBase = p->fb;
1079	pExa->memorySize = p->memsize;
1080	pExa->offScreenBase = p->width * p->height * 4;
1081
1082	/*
1083	 * SX memory instructions are written to 64bit aligned addresses with
1084	 * a 3 bit displacement. Make sure the displacement remains constant
1085	 * within one column
1086	 */
1087
1088	pExa->pixmapOffsetAlign = 8;
1089	pExa->pixmapPitchAlign = 8;
1090
1091	pExa->flags = EXA_OFFSCREEN_PIXMAPS
1092		      | EXA_SUPPORTS_OFFSCREEN_OVERLAPS
1093		      /*| EXA_MIXED_PIXMAPS*/;
1094
1095	/*
1096	 * these limits are bogus
1097	 * SX doesn't deal with coordinates at all, so there is no limit but
1098	 * we have to put something here
1099	 */
1100	pExa->maxX = 4096;
1101	pExa->maxY = 4096;
1102
1103	pExa->WaitMarker = CG14WaitMarker;
1104
1105	pExa->PrepareSolid = CG14PrepareSolid;
1106	pExa->Solid = CG14Solid;
1107	pExa->DoneSolid = CG14DoneCopy;
1108	pExa->PrepareCopy = CG14PrepareCopy;
1109	pExa->Copy = CG14Copy32;
1110	pExa->DoneCopy = CG14DoneCopy;
1111	if (p->use_xrender) {
1112		pExa->CheckComposite = CG14CheckComposite;
1113		pExa->PrepareComposite = CG14PrepareComposite;
1114		pExa->Composite = CG14Composite;
1115		pExa->DoneComposite = CG14DoneCopy;
1116	}
1117
1118	/* EXA hits more optimized paths when it does not have to fallback
1119	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
1120	 */
1121	pExa->UploadToScreen = CG14UploadToScreen;
1122	pExa->DownloadFromScreen = CG14DownloadFromScreen;
1123
1124	/* do some hardware init */
1125	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
1126	p->last_mask = 0xffffffff;
1127	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
1128	p->last_rop = 0xcc;
1129	return exaDriverInit(pScreen, pExa);
1130}
1131