cg14_accel.c revision c2193d98
1/* $NetBSD: cg14_accel.c,v 1.15 2019/07/24 16:07:59 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include <sys/types.h>
37
38/* all driver need this */
39#include "xf86.h"
40#include "xf86_OSproc.h"
41#include "compiler.h"
42
43#include "cg14.h"
44
45/*#define SX_DEBUG*/
46
47#ifdef SX_DEBUG
48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
49#define DPRINTF xf86Msg
50#else
51#define ENTER
52#define DPRINTF while (0) xf86Msg
53#endif
54
55#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
56
57/* 0xcc is SX's GXcopy equivalent */
58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
59		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
60
61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
62		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
64
65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int);
66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int);
67
68static inline void
69CG14Wait(Cg14Ptr p)
70{
71	int bail = 10000000;
72	/* we wait for the busy bit to clear */
73	while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) &&
74	       (bail > 0)) {
75		bail--;
76	};
77	if (bail == 0) {
78		xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n",
79		    read_sx_reg(p, SX_CONTROL_STATUS),
80		    read_sx_reg(p, SX_ERROR));
81	}
82}
83
84static void
85CG14WaitMarker(ScreenPtr pScreen, int Marker)
86{
87	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
88	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
89
90	CG14Wait(p);
91}
92
93static Bool
94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
95		int xdir, int ydir, int alu, Pixel planemask)
96{
97	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
98	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
99
100	ENTER;
101	DPRINTF(X_ERROR, "bits per pixel: %d\n",
102	    pSrcPixmap->drawable.bitsPerPixel);
103
104	if (planemask != p->last_mask) {
105		CG14Wait(p);
106		write_sx_reg(p, SX_PLANEMASK, planemask);
107		p->last_mask = planemask;
108	}
109	alu = sx_rop[alu];
110	if (alu != p->last_rop) {
111		CG14Wait(p);
112		write_sx_reg(p, SX_ROP_CONTROL, alu);
113		p->last_rop = alu;
114	}
115	switch (pSrcPixmap->drawable.bitsPerPixel)  {
116		case 8:
117			p->pExa->Copy = CG14Copy8;
118			break;
119		case 32:
120			p->pExa->Copy = CG14Copy32;
121			break;
122		default:
123			xf86Msg(X_ERROR, "%s depth %d\n", __func__,
124			    pSrcPixmap->drawable.bitsPerPixel);
125	}
126	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
127	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
128	p->xdir = xdir;
129	p->ydir = ydir;
130	return TRUE;
131}
132
133static void
134CG14Copy32(PixmapPtr pDstPixmap,
135         int srcX, int srcY, int dstX, int dstY, int w, int h)
136{
137	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
138	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
139	int dstpitch, dstoff, srcpitch, srcoff;
140	int srcstart, dststart, xinc, srcinc, dstinc;
141	int line, count, s, d, num;
142
143	ENTER;
144	dstpitch = exaGetPixmapPitch(pDstPixmap);
145	dstoff = exaGetPixmapOffset(pDstPixmap);
146	srcpitch = p->srcpitch;
147	srcoff = p->srcoff;
148	/*
149	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
150	 * actually wrote anything and only sync if it did
151	 */
152	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
153	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
154
155	/*
156	 * we always copy up to 32 pixels at a time so direction doesn't
157	 * matter if w<=32
158	 */
159	if (w > 32) {
160		if (p->xdir < 0) {
161			srcstart += (w - 32) << 2;
162			dststart += (w - 32) << 2;
163			xinc = -128;
164		} else
165			xinc = 128;
166	} else
167		xinc = 128;
168	if (p->ydir < 0) {
169		srcstart += (h - 1) * srcpitch;
170		dststart += (h - 1) * dstpitch;
171		srcinc = -srcpitch;
172		dstinc = -dstpitch;
173	} else {
174		srcinc = srcpitch;
175		dstinc = dstpitch;
176	}
177	if (p->last_rop == 0xcc) {
178		/* plain old copy */
179		if ( xinc > 0) {
180			/* going left to right */
181			for (line = 0; line < h; line++) {
182				count = 0;
183				s = srcstart;
184				d = dststart;
185				while ( count < w) {
186					num = min(32, w - count);
187					write_sx_io(p, s,
188					    SX_LD(10, num - 1, s & 7));
189					write_sx_io(p, d,
190					    SX_STM(10, num - 1, d & 7));
191					s += xinc;
192					d += xinc;
193					count += 32;
194				}
195				srcstart += srcinc;
196				dststart += dstinc;
197			}
198		} else {
199			/* going right to left */
200			int i, chunks = (w >> 5);
201			for (line = 0; line < h; line++) {
202				s = srcstart;
203				d = dststart;
204				count = w;
205				for (i = 0; i < chunks; i++) {
206					write_sx_io(p, s,
207					    SX_LD(10, 31, s & 7));
208					write_sx_io(p, d,
209					    SX_STM(10, 31, d & 7));
210					s -= 128;
211					d -= 128;
212					count -= 32;
213				}
214				/* leftovers, if any */
215				if (count > 0) {
216					s += (32 - count) << 2;
217					d += (32 - count) << 2;
218					write_sx_io(p, s,
219					    SX_LD(10, count - 1, s & 7));
220					write_sx_io(p, d,
221					    SX_STM(10, count - 1, d & 7));
222				}
223				srcstart += srcinc;
224				dststart += dstinc;
225			}
226		}
227	} else {
228		/* ROPs needed */
229		if ( xinc > 0) {
230			/* going left to right */
231			for (line = 0; line < h; line++) {
232				count = 0;
233				s = srcstart;
234				d = dststart;
235				while ( count < w) {
236					num = min(32, w - count);
237					write_sx_io(p, s,
238					    SX_LD(10, num - 1, s & 7));
239					write_sx_io(p, d,
240					    SX_LD(42, num - 1, d & 7));
241					if (num > 16) {
242						write_sx_reg(p, SX_INSTRUCTIONS,
243					    	 SX_ROP(10, 42, 74, 15));
244						write_sx_reg(p, SX_INSTRUCTIONS,
245					    	 SX_ROP(26, 58, 90, num - 17));
246					} else {
247						write_sx_reg(p, SX_INSTRUCTIONS,
248					    	 SX_ROP(10, 42, 74, num - 1));
249					}
250					write_sx_io(p, d,
251					    SX_STM(74, num - 1, d & 7));
252					s += xinc;
253					d += xinc;
254					count += 32;
255				}
256				srcstart += srcinc;
257				dststart += dstinc;
258			}
259		} else {
260			/* going right to left */
261			int i, chunks = (w >> 5);
262			for (line = 0; line < h; line++) {
263				s = srcstart;
264				d = dststart;
265				count = w;
266				for (i = 0; i < chunks; i++) {
267					write_sx_io(p, s, SX_LD(10, 31, s & 7));
268					write_sx_io(p, d, SX_LD(42, 31, d & 7));
269					write_sx_reg(p, SX_INSTRUCTIONS,
270				    	    SX_ROP(10, 42, 74, 15));
271					write_sx_reg(p, SX_INSTRUCTIONS,
272				    	    SX_ROP(26, 58, 90, 15));
273					write_sx_io(p, d,
274					    SX_STM(74, 31, d & 7));
275					s -= 128;
276					d -= 128;
277					count -= 32;
278				}
279				/* leftovers, if any */
280				if (count > 0) {
281					s += (32 - count) << 2;
282					d += (32 - count) << 2;
283					write_sx_io(p, s,
284					    SX_LD(10, count - 1, s & 7));
285					write_sx_io(p, d,
286					    SX_LD(42, count - 1, d & 7));
287					if (count > 16) {
288						write_sx_reg(p, SX_INSTRUCTIONS,
289					    	    SX_ROP(10, 42, 74, 15));
290						write_sx_reg(p, SX_INSTRUCTIONS,
291					    	 SX_ROP(26, 58, 90, count - 17));
292					} else {
293						write_sx_reg(p, SX_INSTRUCTIONS,
294					    	 SX_ROP(10, 42, 74, count - 1));
295					}
296
297					write_sx_io(p, d,
298					    SX_STM(74, count - 1, d & 7));
299				}
300				srcstart += srcinc;
301				dststart += dstinc;
302			}
303		}
304	}
305	exaMarkSync(pDstPixmap->drawable.pScreen);
306}
307
308static void
309CG14Copy8(PixmapPtr pDstPixmap,
310         int srcX, int srcY, int dstX, int dstY, int w, int h)
311{
312	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
313	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
314	int dstpitch, dstoff, srcpitch, srcoff;
315	int srcstart, dststart, xinc, srcinc, dstinc;
316	int line, count, s, d, num;
317
318	ENTER;
319	dstpitch = exaGetPixmapPitch(pDstPixmap);
320	dstoff = exaGetPixmapOffset(pDstPixmap);
321	srcpitch = p->srcpitch;
322	srcoff = p->srcoff;
323	/*
324	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
325	 * actually wrote anything and only sync if it did
326	 */
327	srcstart = srcX + (srcpitch * srcY) + srcoff;
328	dststart = dstX + (dstpitch * dstY) + dstoff;
329
330	/*
331	 * we always copy up to 32 pixels at a time so direction doesn't
332	 * matter if w<=32
333	 */
334	if (w > 32) {
335		if (p->xdir < 0) {
336			srcstart += (w - 32);
337			dststart += (w - 32);
338			xinc = -32;
339		} else
340			xinc = 32;
341	} else
342		xinc = 32;
343	if (p->ydir < 0) {
344		srcstart += (h - 1) * srcpitch;
345		dststart += (h - 1) * dstpitch;
346		srcinc = -srcpitch;
347		dstinc = -dstpitch;
348	} else {
349		srcinc = srcpitch;
350		dstinc = dstpitch;
351	}
352	if (p->last_rop == 0xcc) {
353		/* plain old copy */
354		if ( xinc > 0) {
355			/* going left to right */
356			for (line = 0; line < h; line++) {
357				count = 0;
358				s = srcstart;
359				d = dststart;
360				while ( count < w) {
361					num = min(32, w - count);
362					write_sx_io(p, s,
363					    SX_LDB(10, num - 1, s & 7));
364					write_sx_io(p, d,
365					    SX_STBM(10, num - 1, d & 7));
366					s += xinc;
367					d += xinc;
368					count += 32;
369				}
370				srcstart += srcinc;
371				dststart += dstinc;
372			}
373		} else {
374			/* going right to left */
375			int i, chunks = (w >> 5);
376			for (line = 0; line < h; line++) {
377				s = srcstart;
378				d = dststart;
379				count = w;
380				for (i = 0; i < chunks; i++) {
381					write_sx_io(p, s,
382					    SX_LDB(10, 31, s & 7));
383					write_sx_io(p, d,
384					    SX_STBM(10, 31, d & 7));
385					s -= 32;
386					d -= 32;
387					count -= 32;
388				}
389				/* leftovers, if any */
390				if (count > 0) {
391					s += (32 - count);
392					d += (32 - count);
393					write_sx_io(p, s,
394					    SX_LDB(10, count - 1, s & 7));
395					write_sx_io(p, d,
396					    SX_STBM(10, count - 1, d & 7));
397				}
398				srcstart += srcinc;
399				dststart += dstinc;
400			}
401		}
402	} else {
403		/* ROPs needed */
404		if ( xinc > 0) {
405			/* going left to right */
406			for (line = 0; line < h; line++) {
407				count = 0;
408				s = srcstart;
409				d = dststart;
410				while ( count < w) {
411					num = min(32, w - count);
412					write_sx_io(p, s,
413					    SX_LDB(10, num - 1, s & 7));
414					write_sx_io(p, d,
415					    SX_LDB(42, num - 1, d & 7));
416					if (num > 16) {
417						write_sx_reg(p, SX_INSTRUCTIONS,
418					    	 SX_ROP(10, 42, 74, 15));
419						write_sx_reg(p, SX_INSTRUCTIONS,
420					    	 SX_ROP(26, 58, 90, num - 17));
421					} else {
422						write_sx_reg(p, SX_INSTRUCTIONS,
423					    	 SX_ROP(10, 42, 74, num - 1));
424					}
425					write_sx_io(p, d,
426					    SX_STBM(74, num - 1, d & 7));
427					s += xinc;
428					d += xinc;
429					count += 32;
430				}
431				srcstart += srcinc;
432				dststart += dstinc;
433			}
434		} else {
435			/* going right to left */
436			int i, chunks = (w >> 5);
437			for (line = 0; line < h; line++) {
438				s = srcstart;
439				d = dststart;
440				count = w;
441				for (i = 0; i < chunks; i++) {
442					write_sx_io(p, s, SX_LDB(10, 31, s & 7));
443					write_sx_io(p, d, SX_LDB(42, 31, d & 7));
444					write_sx_reg(p, SX_INSTRUCTIONS,
445				    	    SX_ROP(10, 42, 74, 15));
446					write_sx_reg(p, SX_INSTRUCTIONS,
447				    	    SX_ROP(26, 58, 90, 15));
448					write_sx_io(p, d,
449					    SX_STBM(74, 31, d & 7));
450					s -= 128;
451					d -= 128;
452					count -= 32;
453				}
454				/* leftovers, if any */
455				if (count > 0) {
456					s += (32 - count);
457					d += (32 - count);
458					write_sx_io(p, s,
459					    SX_LDB(10, count - 1, s & 7));
460					write_sx_io(p, d,
461					    SX_LDB(42, count - 1, d & 7));
462					if (count > 16) {
463						write_sx_reg(p, SX_INSTRUCTIONS,
464					    	    SX_ROP(10, 42, 74, 15));
465						write_sx_reg(p, SX_INSTRUCTIONS,
466					    	 SX_ROP(26, 58, 90, count - 17));
467					} else {
468						write_sx_reg(p, SX_INSTRUCTIONS,
469					    	 SX_ROP(10, 42, 74, count - 1));
470					}
471
472					write_sx_io(p, d,
473					    SX_STBM(74, count - 1, d & 7));
474				}
475				srcstart += srcinc;
476				dststart += dstinc;
477			}
478		}
479	}
480	exaMarkSync(pDstPixmap->drawable.pScreen);
481}
482
483static void
484CG14DoneCopy(PixmapPtr pDstPixmap)
485{
486}
487
488static Bool
489CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
490{
491	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
492	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
493
494	ENTER;
495	DPRINTF(X_ERROR, "bits per pixel: %d\n",
496	    pPixmap->drawable.bitsPerPixel);
497	write_sx_reg(p, SX_QUEUED(8), fg);
498	write_sx_reg(p, SX_QUEUED(9), fg);
499	if (planemask != p->last_mask) {
500		CG14Wait(p);
501		write_sx_reg(p, SX_PLANEMASK, planemask);
502		p->last_mask = planemask;
503	}
504	alu = sx_rop[alu];
505	if (alu != p->last_rop) {
506		CG14Wait(p);
507		write_sx_reg(p, SX_ROP_CONTROL, alu);
508		p->last_rop = alu;
509	}
510	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
511	return TRUE;
512}
513
514static void
515CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
516{
517	int line, x, num;
518	uint32_t ptr;
519
520	ENTER;
521	if (p->last_rop == 0xcc) {
522		/* simple fill */
523		for (line = 0; line < h; line++) {
524			x = 0;
525			while (x < w) {
526				ptr = start + (x << 2);
527				num = min(32, w - x);
528				write_sx_io(p, ptr,
529				    SX_STS(8, num - 1, ptr & 7));
530				x += 32;
531			}
532			start += pitch;
533		}
534	} else if (p->last_rop == 0xaa) {
535		/* nothing to do here */
536		return;
537	} else {
538		/* alright, let's do actual ROP stuff */
539
540		/* first repeat the fill colour into 16 registers */
541		write_sx_reg(p, SX_INSTRUCTIONS,
542		    SX_SELECT_S(8, 8, 10, 15));
543
544		for (line = 0; line < h; line++) {
545			x = 0;
546			while (x < w) {
547				ptr = start + (x << 2);
548				num = min(32, w - x);
549				/* now suck fb data into registers */
550				write_sx_io(p, ptr,
551				    SX_LD(42, num - 1, ptr & 7));
552				/*
553				 * ROP them with the fill data we left in 10
554				 * non-memory ops can only have counts up to 16
555				 */
556				if (num <= 16) {
557					write_sx_reg(p, SX_INSTRUCTIONS,
558					    SX_ROP(10, 42, 74, num - 1));
559				} else {
560					write_sx_reg(p, SX_INSTRUCTIONS,
561					    SX_ROP(10, 42, 74, 15));
562					write_sx_reg(p, SX_INSTRUCTIONS,
563					    SX_ROP(10, 58, 90, num - 17));
564				}
565				/* and write the result back into memory */
566				write_sx_io(p, ptr,
567				    SX_ST(74, num - 1, ptr & 7));
568				x += 32;
569			}
570			start += pitch;
571		}
572	}
573}
574
575static void
576CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
577{
578	int line, x, num, off;
579	uint32_t ptr;
580
581	ENTER;
582	off = start & 7;
583	start &= ~7;
584
585	if (p->last_rop == 0xcc) {
586		/* simple fill */
587		for (line = 0; line < h; line++) {
588			x = 0;
589			while (x < w) {
590				ptr = start + x;
591				num = min(32, w - x);
592				write_sx_io(p, ptr,
593				    SX_STBS(8, num - 1, off));
594				x += 32;
595			}
596			start += pitch;
597		}
598	} else if (p->last_rop == 0xaa) {
599		/* nothing to do here */
600		return;
601	} else {
602		/* alright, let's do actual ROP stuff */
603
604		/* first repeat the fill colour into 16 registers */
605		write_sx_reg(p, SX_INSTRUCTIONS,
606		    SX_SELECT_S(8, 8, 10, 15));
607
608		for (line = 0; line < h; line++) {
609			x = 0;
610			while (x < w) {
611				ptr = start + x;
612				num = min(32, w - x);
613				/* now suck fb data into registers */
614				write_sx_io(p, ptr,
615				    SX_LDB(42, num - 1, off));
616				/*
617				 * ROP them with the fill data we left in 10
618				 * non-memory ops can only have counts up to 16
619				 */
620				if (num <= 16) {
621					write_sx_reg(p, SX_INSTRUCTIONS,
622					    SX_ROP(10, 42, 74, num - 1));
623				} else {
624					write_sx_reg(p, SX_INSTRUCTIONS,
625					    SX_ROP(10, 42, 74, 15));
626					write_sx_reg(p, SX_INSTRUCTIONS,
627					    SX_ROP(10, 58, 90, num - 17));
628				}
629				/* and write the result back into memory */
630				write_sx_io(p, ptr,
631				    SX_STB(74, num - 1, off));
632				x += 32;
633			}
634			start += pitch;
635		}
636	}
637}
638
639static void
640CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
641{
642	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
643	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
644	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
645	int start, depth;
646
647	ENTER;
648	dstpitch = exaGetPixmapPitch(pPixmap);
649	dstoff = exaGetPixmapOffset(pPixmap);
650
651	depth = pPixmap->drawable.bitsPerPixel;
652	switch (depth) {
653		case 32:
654			start = dstoff + (y1 * dstpitch) + (x1 << 2);
655			CG14Solid32(p, start, dstpitch, w, h);
656			break;
657		case 8:
658			start = dstoff + (y1 * dstpitch) + x1;
659			CG14Solid8(p, start, dstpitch, w, h);
660			break;
661	}
662
663	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
664	    dstpitch, dstoff, start);
665	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
666	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
667	exaMarkSync(pPixmap->drawable.pScreen);
668}
669
670/*
671 * Memcpy-based UTS.
672 */
673static Bool
674CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
675    char *src, int src_pitch)
676{
677	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
678	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
679	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
680	int    dst_pitch  = exaGetPixmapPitch(pDst);
681
682	int bpp    = pDst->drawable.bitsPerPixel;
683	int cpp    = (bpp + 7) >> 3;
684	int wBytes = w * cpp;
685
686	ENTER;
687	DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
688	dst += (x * cpp) + (y * dst_pitch);
689
690	CG14Wait(p);
691
692	while (h--) {
693		memcpy(dst, src, wBytes);
694		src += src_pitch;
695		dst += dst_pitch;
696	}
697	__asm("stbar;");
698	return TRUE;
699}
700
701/*
702 * Memcpy-based DFS.
703 */
704static Bool
705CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
706    char *dst, int dst_pitch)
707{
708	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
709	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
710	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
711	int    src_pitch  = exaGetPixmapPitch(pSrc);
712
713	ENTER;
714	int bpp    = pSrc->drawable.bitsPerPixel;
715	int cpp    = (bpp + 7) >> 3;
716	int wBytes = w * cpp;
717
718	src += (x * cpp) + (y * src_pitch);
719
720	CG14Wait(p);
721
722	while (h--) {
723		memcpy(dst, src, wBytes);
724		src += src_pitch;
725		dst += dst_pitch;
726	}
727
728	return TRUE;
729}
730
731Bool
732CG14CheckComposite(int op, PicturePtr pSrcPicture,
733                           PicturePtr pMaskPicture,
734                           PicturePtr pDstPicture)
735{
736	int i, ok = FALSE;
737
738	ENTER;
739
740	/*
741	 * SX is in theory capable of accelerating pretty much all Xrender ops,
742	 * even coordinate transformation and gradients. Support will be added
743	 * over time and likely have to spill over into its own source file.
744	 */
745
746	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
747		DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op);
748		return FALSE;
749	}
750
751	if (pSrcPicture != NULL) {
752		i = 0;
753		while ((i < arraysize(src_formats)) && (!ok)) {
754			ok =  (pSrcPicture->format == src_formats[i]);
755			i++;
756		}
757
758		if (!ok) {
759			DPRINTF(X_ERROR, "%s: unsupported src format %x\n",
760			    __func__, pSrcPicture->format);
761			return FALSE;
762		}
763		DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op);
764	}
765
766	if (pDstPicture != NULL) {
767		i = 0;
768		ok = FALSE;
769		while ((i < arraysize(src_formats)) && (!ok)) {
770			ok =  (pDstPicture->format == src_formats[i]);
771			i++;
772		}
773
774		if (!ok) {
775			DPRINTF(X_ERROR, "%s: unsupported dst format %x\n",
776			    __func__, pDstPicture->format);
777			return FALSE;
778		}
779		DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op);
780	}
781
782	if (pMaskPicture != NULL) {
783		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
784		    pMaskPicture->pDrawable->width,
785		    pMaskPicture->pDrawable->height);
786	}
787	return TRUE;
788}
789
790Bool
791CG14PrepareComposite(int op, PicturePtr pSrcPicture,
792                             PicturePtr pMaskPicture,
793                             PicturePtr pDstPicture,
794                             PixmapPtr  pSrc,
795                             PixmapPtr  pMask,
796                             PixmapPtr  pDst)
797{
798	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
799	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
800
801	ENTER;
802
803	p->no_source_pixmap = FALSE;
804	p->source_is_solid = FALSE;
805
806	if (pSrcPicture->format == PICT_a1) {
807		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
808		    pDstPicture->format, op);
809		if (pMaskPicture != NULL) {
810			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
811		}
812	}
813	if (pSrcPicture->pSourcePict != NULL) {
814		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
815			p->fillcolour =
816			    pSrcPicture->pSourcePict->solidFill.color;
817			DPRINTF(X_ERROR, "%s: solid src %08x\n",
818			    __func__, p->fillcolour);
819			p->no_source_pixmap = TRUE;
820			p->source_is_solid = TRUE;
821		}
822	}
823	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
824		if (pMaskPicture->pSourcePict->type ==
825		    SourcePictTypeSolidFill) {
826			p->fillcolour =
827			   pMaskPicture->pSourcePict->solidFill.color;
828			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
829			    __func__, p->fillcolour);
830		}
831	}
832	if (pMaskPicture != NULL) {
833		p->mskoff = exaGetPixmapOffset(pMask);
834		p->mskpitch = exaGetPixmapPitch(pMask);
835		p->mskformat = pMaskPicture->format;
836	} else {
837		p->mskoff = 0;
838		p->mskpitch = 0;
839		p->mskformat = 0;
840	}
841	if (pSrc != NULL) {
842		p->source_is_solid =
843		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
844		p->srcoff = exaGetPixmapOffset(pSrc);
845		p->srcpitch = exaGetPixmapPitch(pSrc);
846		if (p->source_is_solid) {
847			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
848		}
849	}
850	p->srcformat = pSrcPicture->format;
851	p->dstformat = pDstPicture->format;
852
853	if (p->source_is_solid) {
854		uint32_t temp;
855
856		/* stuff source colour into SX registers, swap as needed */
857		temp = p->fillcolour;
858		switch (p->srcformat) {
859			case PICT_a8r8g8b8:
860			case PICT_x8r8g8b8:
861				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
862				temp = temp >> 8;
863				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
864				temp = temp >> 8;
865				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
866				break;
867			case PICT_a8b8g8r8:
868			case PICT_x8b8g8r8:
869				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
870				temp = temp >> 8;
871				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
872				temp = temp >> 8;
873				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
874				break;
875		}
876		write_sx_reg(p, SX_QUEUED(8), 0xff);
877	}
878	p->op = op;
879	if (op == PictOpSrc) {
880		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
881	}
882#ifdef SX_DEBUG
883	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
884	    *(uint32_t *)(p->fb + p->srcoff));
885#endif
886	return TRUE;
887}
888
889void
890CG14Composite(PixmapPtr pDst, int srcX, int srcY,
891                              int maskX, int maskY,
892                              int dstX, int dstY,
893                              int width, int height)
894{
895	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
896	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
897	uint32_t dstoff, dstpitch;
898	uint32_t dst, msk, src;
899	int flip = 0;
900
901	ENTER;
902	dstoff = exaGetPixmapOffset(pDst);
903	dstpitch = exaGetPixmapPitch(pDst);
904
905	flip = (PICT_FORMAT_TYPE(p->srcformat) !=
906		PICT_FORMAT_TYPE(p->dstformat));
907
908	switch (p->op) {
909		case PictOpOver:
910			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
911			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
912			    p->mskformat, p->dstformat, srcX, srcY);
913			if (p->source_is_solid) {
914				switch (p->mskformat) {
915					case PICT_a8:
916						msk = p->mskoff +
917						    (maskY * p->mskpitch) +
918						    maskX;
919						CG14Comp_Over8Solid(p,
920						    msk, p->mskpitch,
921						    dst, dstpitch,
922						    width, height);
923						break;
924					case PICT_a8r8g8b8:
925					case PICT_a8b8g8r8:
926						msk = p->mskoff +
927						    (maskY * p->mskpitch) +
928						    (maskX << 2);
929						CG14Comp_Over32Solid(p,
930						    msk, p->mskpitch,
931						    dst, dstpitch,
932						    width, height);
933						break;
934					default:
935						xf86Msg(X_ERROR,
936						  "unsupported mask format %08x\n", p->mskformat);
937				}
938			} else {
939				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
940				    p->mskformat);
941				switch (p->srcformat) {
942					case PICT_a8r8g8b8:
943					case PICT_a8b8g8r8:
944						src = p->srcoff +
945						    (srcY * p->srcpitch) +
946						    (srcX << 2);
947						dst = dstoff +
948						    (dstY * dstpitch) +
949						    (dstX << 2);
950						if (p->mskformat == PICT_a8) {
951							msk = p->mskoff +
952							    (maskY * p->mskpitch) +
953							    maskX;
954							CG14Comp_Over32Mask(p,
955							    src, p->srcpitch,
956							    msk, p->mskpitch,
957							    dst, dstpitch,
958							    width, height, flip);
959						} else {
960							CG14Comp_Over32(p,
961							    src, p->srcpitch,
962							    dst, dstpitch,
963							    width, height, flip);
964						}
965						break;
966					case PICT_x8r8g8b8:
967					case PICT_x8b8g8r8:
968						src = p->srcoff +
969						    (srcY * p->srcpitch) +
970						    (srcX << 2);
971						dst = dstoff +
972						    (dstY * dstpitch) +
973						    (dstX << 2);
974						if (p->mskformat == PICT_a8) {
975							msk = p->mskoff +
976							    (maskY * p->mskpitch) +
977							    maskX;
978							CG14Comp_Over32Mask_noalpha(p,
979							    src, p->srcpitch,
980							    msk, p->mskpitch,
981							    dst, dstpitch,
982							    width, height, flip);
983						} else if ((p->mskformat == PICT_a8r8g8b8) ||
984							   (p->mskformat == PICT_a8b8g8r8)) {
985							msk = p->mskoff +
986							    (maskY * p->mskpitch) +
987							    (maskX << 2);
988							CG14Comp_Over32Mask32_noalpha(p,
989							    src, p->srcpitch,
990							    msk, p->mskpitch,
991							    dst, dstpitch,
992							    width, height, flip);
993						} else {
994							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
995						}
996						break;
997					default:
998						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
999						    __func__, p->srcformat);
1000				}
1001			}
1002			break;
1003		case PictOpAdd:
1004			DPRINTF(X_ERROR, "Add %08x %08x\n",
1005			    p->srcformat, p->dstformat);
1006			switch (p->srcformat) {
1007				case PICT_a8:
1008					src = p->srcoff +
1009					    (srcY * p->srcpitch) + srcX;
1010					if (p->dstformat == PICT_a8) {
1011						dst = dstoff +
1012						      (dstY * dstpitch) + dstX;
1013						CG14Comp_Add8(p,
1014						    src, p->srcpitch,
1015						    dst, dstpitch,
1016						    width, height);
1017					} else {
1018						dst = dstoff +
1019						      (dstY * dstpitch) +
1020						      (dstX << 2);
1021						CG14Comp_Add8_32(p,
1022						    src, p->srcpitch,
1023						    dst, dstpitch,
1024						    width, height);
1025					}
1026					break;
1027				case PICT_a8r8g8b8:
1028				case PICT_x8r8g8b8:
1029					src = p->srcoff +
1030					    (srcY * p->srcpitch) + (srcX << 2);
1031					dst = dstoff + (dstY * dstpitch) +
1032					    (dstX << 2);
1033					CG14Comp_Add32(p, src, p->srcpitch,
1034					    dst, dstpitch, width, height);
1035					break;
1036				default:
1037					xf86Msg(X_ERROR,
1038					    "unsupported src format\n");
1039			}
1040			break;
1041		case PictOpSrc:
1042			DPRINTF(X_ERROR, "Src %08x %08x\n",
1043			    p->srcformat, p->dstformat);
1044			if (p->mskformat != 0)
1045				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
1046			if (p->srcformat == PICT_a8) {
1047				CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height);
1048			} else {
1049				/* convert between RGB and BGR? */
1050				CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height);
1051			}
1052			break;
1053		default:
1054			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
1055	}
1056	exaMarkSync(pDst->drawable.pScreen);
1057}
1058
1059
1060
1061Bool
1062CG14InitAccel(ScreenPtr pScreen)
1063{
1064	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1065	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1066	ExaDriverPtr pExa;
1067
1068	pExa = exaDriverAlloc();
1069	if (!pExa)
1070		return FALSE;
1071
1072	p->pExa = pExa;
1073
1074	pExa->exa_major = EXA_VERSION_MAJOR;
1075	pExa->exa_minor = EXA_VERSION_MINOR;
1076
1077	pExa->memoryBase = p->fb;
1078	pExa->memorySize = p->memsize;
1079	pExa->offScreenBase = p->width * p->height * 4;
1080
1081	/*
1082	 * SX memory instructions are written to 64bit aligned addresses with
1083	 * a 3 bit displacement. Make sure the displacement remains constant
1084	 * within one column
1085	 */
1086
1087	pExa->pixmapOffsetAlign = 8;
1088	pExa->pixmapPitchAlign = 8;
1089
1090	pExa->flags = EXA_OFFSCREEN_PIXMAPS
1091		      | EXA_SUPPORTS_OFFSCREEN_OVERLAPS
1092		      /*| EXA_MIXED_PIXMAPS*/;
1093
1094	/*
1095	 * these limits are bogus
1096	 * SX doesn't deal with coordinates at all, so there is no limit but
1097	 * we have to put something here
1098	 */
1099	pExa->maxX = 4096;
1100	pExa->maxY = 4096;
1101
1102	pExa->WaitMarker = CG14WaitMarker;
1103
1104	pExa->PrepareSolid = CG14PrepareSolid;
1105	pExa->Solid = CG14Solid;
1106	pExa->DoneSolid = CG14DoneCopy;
1107	pExa->PrepareCopy = CG14PrepareCopy;
1108	pExa->Copy = CG14Copy32;
1109	pExa->DoneCopy = CG14DoneCopy;
1110	if (p->use_xrender) {
1111		pExa->CheckComposite = CG14CheckComposite;
1112		pExa->PrepareComposite = CG14PrepareComposite;
1113		pExa->Composite = CG14Composite;
1114		pExa->DoneComposite = CG14DoneCopy;
1115	}
1116
1117	/* EXA hits more optimized paths when it does not have to fallback
1118	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
1119	 */
1120	pExa->UploadToScreen = CG14UploadToScreen;
1121	pExa->DownloadFromScreen = CG14DownloadFromScreen;
1122
1123	p->queuecount = 0;
1124	/* do some hardware init */
1125	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
1126	p->last_mask = 0xffffffff;
1127	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
1128	p->last_rop = 0xcc;
1129	return exaDriverInit(pScreen, pExa);
1130}
1131