cg14_accel.c revision b46cab2a
1/* $NetBSD: cg14_accel.c,v 1.17 2021/12/03 06:10:07 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include <sys/types.h>
37
38/* all driver need this */
39#include "xf86.h"
40#include "xf86_OSproc.h"
41#include "compiler.h"
42
43#include "cg14.h"
44
45//#define SX_DEBUG
46
47#ifdef SX_DEBUG
48#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
49#define DPRINTF xf86Msg
50#else
51#define ENTER
52#define DPRINTF while (0) xf86Msg
53#endif
54
55#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
56
57/* 0xcc is SX's GXcopy equivalent */
58uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
59		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
60
61int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
62		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
63int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
64
65static void CG14Copy32(PixmapPtr, int, int, int, int, int, int);
66static void CG14Copy8(PixmapPtr, int, int, int, int, int, int);
67
68static inline void
69CG14Wait(Cg14Ptr p)
70{
71	int bail = 10000000;
72	/* we wait for the busy bit to clear */
73	while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) &&
74	       (bail > 0)) {
75		bail--;
76	};
77	if (bail == 0) {
78		xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n",
79		    read_sx_reg(p, SX_CONTROL_STATUS),
80		    read_sx_reg(p, SX_ERROR));
81	}
82}
83
84static void
85CG14WaitMarker(ScreenPtr pScreen, int Marker)
86{
87	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
88	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
89
90	CG14Wait(p);
91}
92
93static Bool
94CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
95		int xdir, int ydir, int alu, Pixel planemask)
96{
97	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
98	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
99
100	ENTER;
101	DPRINTF(X_ERROR, "bits per pixel: %d\n",
102	    pSrcPixmap->drawable.bitsPerPixel);
103
104	if (planemask != p->last_mask) {
105		CG14Wait(p);
106		write_sx_reg(p, SX_PLANEMASK, planemask);
107		p->last_mask = planemask;
108	}
109	alu = sx_rop[alu];
110	if (alu != p->last_rop) {
111		CG14Wait(p);
112		write_sx_reg(p, SX_ROP_CONTROL, alu);
113		p->last_rop = alu;
114	}
115	switch (pSrcPixmap->drawable.bitsPerPixel)  {
116		case 8:
117			p->pExa->Copy = CG14Copy8;
118			break;
119		case 32:
120			p->pExa->Copy = CG14Copy32;
121			break;
122		default:
123			xf86Msg(X_ERROR, "%s depth %d\n", __func__,
124			    pSrcPixmap->drawable.bitsPerPixel);
125	}
126	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
127	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
128	p->xdir = xdir;
129	p->ydir = ydir;
130	return TRUE;
131}
132
133static void
134CG14Copy32(PixmapPtr pDstPixmap,
135         int srcX, int srcY, int dstX, int dstY, int w, int h)
136{
137	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
138	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
139	int dstpitch, dstoff, srcpitch, srcoff;
140	int srcstart, dststart, xinc, srcinc, dstinc;
141	int line, count, s, d, num;
142
143	ENTER;
144	dstpitch = exaGetPixmapPitch(pDstPixmap);
145	dstoff = exaGetPixmapOffset(pDstPixmap);
146	srcpitch = p->srcpitch;
147	srcoff = p->srcoff;
148	/*
149	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
150	 * actually wrote anything and only sync if it did
151	 */
152	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
153	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
154
155	/*
156	 * we always copy up to 32 pixels at a time so direction doesn't
157	 * matter if w<=32
158	 */
159	if (w > 32) {
160		if (p->xdir < 0) {
161			srcstart += (w - 32) << 2;
162			dststart += (w - 32) << 2;
163			xinc = -128;
164		} else
165			xinc = 128;
166	} else
167		xinc = 128;
168	if (p->ydir < 0) {
169		srcstart += (h - 1) * srcpitch;
170		dststart += (h - 1) * dstpitch;
171		srcinc = -srcpitch;
172		dstinc = -dstpitch;
173	} else {
174		srcinc = srcpitch;
175		dstinc = dstpitch;
176	}
177	if (p->last_rop == 0xcc) {
178		/* plain old copy */
179		if ( xinc > 0) {
180			/* going left to right */
181			for (line = 0; line < h; line++) {
182				count = 0;
183				s = srcstart;
184				d = dststart;
185				while ( count < w) {
186					num = min(32, w - count);
187					write_sx_io(p, s,
188					    SX_LD(10, num - 1, s & 7));
189					write_sx_io(p, d,
190					    SX_STM(10, num - 1, d & 7));
191					s += xinc;
192					d += xinc;
193					count += 32;
194				}
195				srcstart += srcinc;
196				dststart += dstinc;
197			}
198		} else {
199			/* going right to left */
200			int i, chunks = (w >> 5);
201			for (line = 0; line < h; line++) {
202				s = srcstart;
203				d = dststart;
204				count = w;
205				for (i = 0; i < chunks; i++) {
206					write_sx_io(p, s,
207					    SX_LD(10, 31, s & 7));
208					write_sx_io(p, d,
209					    SX_STM(10, 31, d & 7));
210					s -= 128;
211					d -= 128;
212					count -= 32;
213				}
214				/* leftovers, if any */
215				if (count > 0) {
216					s += (32 - count) << 2;
217					d += (32 - count) << 2;
218					write_sx_io(p, s,
219					    SX_LD(10, count - 1, s & 7));
220					write_sx_io(p, d,
221					    SX_STM(10, count - 1, d & 7));
222				}
223				srcstart += srcinc;
224				dststart += dstinc;
225			}
226		}
227	} else {
228		/* ROPs needed */
229		if ( xinc > 0) {
230			/* going left to right */
231			for (line = 0; line < h; line++) {
232				count = 0;
233				s = srcstart;
234				d = dststart;
235				while ( count < w) {
236					num = min(32, w - count);
237					write_sx_io(p, s,
238					    SX_LD(10, num - 1, s & 7));
239					write_sx_io(p, d,
240					    SX_LD(42, num - 1, d & 7));
241					if (num > 16) {
242						write_sx_reg(p, SX_INSTRUCTIONS,
243					    	 SX_ROP(10, 42, 74, 15));
244						write_sx_reg(p, SX_INSTRUCTIONS,
245					    	 SX_ROP(26, 58, 90, num - 17));
246					} else {
247						write_sx_reg(p, SX_INSTRUCTIONS,
248					    	 SX_ROP(10, 42, 74, num - 1));
249					}
250					write_sx_io(p, d,
251					    SX_STM(74, num - 1, d & 7));
252					s += xinc;
253					d += xinc;
254					count += 32;
255				}
256				srcstart += srcinc;
257				dststart += dstinc;
258			}
259		} else {
260			/* going right to left */
261			int i, chunks = (w >> 5);
262			for (line = 0; line < h; line++) {
263				s = srcstart;
264				d = dststart;
265				count = w;
266				for (i = 0; i < chunks; i++) {
267					write_sx_io(p, s, SX_LD(10, 31, s & 7));
268					write_sx_io(p, d, SX_LD(42, 31, d & 7));
269					write_sx_reg(p, SX_INSTRUCTIONS,
270				    	    SX_ROP(10, 42, 74, 15));
271					write_sx_reg(p, SX_INSTRUCTIONS,
272				    	    SX_ROP(26, 58, 90, 15));
273					write_sx_io(p, d,
274					    SX_STM(74, 31, d & 7));
275					s -= 128;
276					d -= 128;
277					count -= 32;
278				}
279				/* leftovers, if any */
280				if (count > 0) {
281					s += (32 - count) << 2;
282					d += (32 - count) << 2;
283					write_sx_io(p, s,
284					    SX_LD(10, count - 1, s & 7));
285					write_sx_io(p, d,
286					    SX_LD(42, count - 1, d & 7));
287					if (count > 16) {
288						write_sx_reg(p, SX_INSTRUCTIONS,
289					    	    SX_ROP(10, 42, 74, 15));
290						write_sx_reg(p, SX_INSTRUCTIONS,
291					    	 SX_ROP(26, 58, 90, count - 17));
292					} else {
293						write_sx_reg(p, SX_INSTRUCTIONS,
294					    	 SX_ROP(10, 42, 74, count - 1));
295					}
296
297					write_sx_io(p, d,
298					    SX_STM(74, count - 1, d & 7));
299				}
300				srcstart += srcinc;
301				dststart += dstinc;
302			}
303		}
304	}
305	exaMarkSync(pDstPixmap->drawable.pScreen);
306}
307
308static void
309CG14Copy8(PixmapPtr pDstPixmap,
310         int srcX, int srcY, int dstX, int dstY, int w, int h)
311{
312	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
313	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
314	int dstpitch, dstoff, srcpitch, srcoff;
315	int srcstart, dststart, xinc, srcinc, dstinc;
316	int line, count, s, d, num;
317
318	ENTER;
319	dstpitch = exaGetPixmapPitch(pDstPixmap);
320	dstoff = exaGetPixmapOffset(pDstPixmap);
321	srcpitch = p->srcpitch;
322	srcoff = p->srcoff;
323	/*
324	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
325	 * actually wrote anything and only sync if it did
326	 */
327	srcstart = srcX + (srcpitch * srcY) + srcoff;
328	dststart = dstX + (dstpitch * dstY) + dstoff;
329
330	/*
331	 * we always copy up to 32 pixels at a time so direction doesn't
332	 * matter if w<=32
333	 */
334	if (w > 32) {
335		if (p->xdir < 0) {
336			srcstart += (w - 32);
337			dststart += (w - 32);
338			xinc = -32;
339		} else
340			xinc = 32;
341	} else
342		xinc = 32;
343	if (p->ydir < 0) {
344		srcstart += (h - 1) * srcpitch;
345		dststart += (h - 1) * dstpitch;
346		srcinc = -srcpitch;
347		dstinc = -dstpitch;
348	} else {
349		srcinc = srcpitch;
350		dstinc = dstpitch;
351	}
352	if (p->last_rop == 0xcc) {
353		/* plain old copy */
354		if ( xinc > 0) {
355			/* going left to right */
356			for (line = 0; line < h; line++) {
357				count = 0;
358				s = srcstart;
359				d = dststart;
360				while ( count < w) {
361					num = min(32, w - count);
362					write_sx_io(p, s,
363					    SX_LDB(10, num - 1, s & 7));
364					write_sx_io(p, d,
365					    SX_STBM(10, num - 1, d & 7));
366					s += xinc;
367					d += xinc;
368					count += 32;
369				}
370				srcstart += srcinc;
371				dststart += dstinc;
372			}
373		} else {
374			/* going right to left */
375			int i, chunks = (w >> 5);
376			for (line = 0; line < h; line++) {
377				s = srcstart;
378				d = dststart;
379				count = w;
380				for (i = 0; i < chunks; i++) {
381					write_sx_io(p, s,
382					    SX_LDB(10, 31, s & 7));
383					write_sx_io(p, d,
384					    SX_STBM(10, 31, d & 7));
385					s -= 32;
386					d -= 32;
387					count -= 32;
388				}
389				/* leftovers, if any */
390				if (count > 0) {
391					s += (32 - count);
392					d += (32 - count);
393					write_sx_io(p, s,
394					    SX_LDB(10, count - 1, s & 7));
395					write_sx_io(p, d,
396					    SX_STBM(10, count - 1, d & 7));
397				}
398				srcstart += srcinc;
399				dststart += dstinc;
400			}
401		}
402	} else {
403		/* ROPs needed */
404		if ( xinc > 0) {
405			/* going left to right */
406			for (line = 0; line < h; line++) {
407				count = 0;
408				s = srcstart;
409				d = dststart;
410				while ( count < w) {
411					num = min(32, w - count);
412					write_sx_io(p, s,
413					    SX_LDB(10, num - 1, s & 7));
414					write_sx_io(p, d,
415					    SX_LDB(42, num - 1, d & 7));
416					if (num > 16) {
417						write_sx_reg(p, SX_INSTRUCTIONS,
418					    	 SX_ROP(10, 42, 74, 15));
419						write_sx_reg(p, SX_INSTRUCTIONS,
420					    	 SX_ROP(26, 58, 90, num - 17));
421					} else {
422						write_sx_reg(p, SX_INSTRUCTIONS,
423					    	 SX_ROP(10, 42, 74, num - 1));
424					}
425					write_sx_io(p, d,
426					    SX_STBM(74, num - 1, d & 7));
427					s += xinc;
428					d += xinc;
429					count += 32;
430				}
431				srcstart += srcinc;
432				dststart += dstinc;
433			}
434		} else {
435			/* going right to left */
436			int i, chunks = (w >> 5);
437			for (line = 0; line < h; line++) {
438				s = srcstart;
439				d = dststart;
440				count = w;
441				for (i = 0; i < chunks; i++) {
442					write_sx_io(p, s, SX_LDB(10, 31, s & 7));
443					write_sx_io(p, d, SX_LDB(42, 31, d & 7));
444					write_sx_reg(p, SX_INSTRUCTIONS,
445				    	    SX_ROP(10, 42, 74, 15));
446					write_sx_reg(p, SX_INSTRUCTIONS,
447				    	    SX_ROP(26, 58, 90, 15));
448					write_sx_io(p, d,
449					    SX_STBM(74, 31, d & 7));
450					s -= 128;
451					d -= 128;
452					count -= 32;
453				}
454				/* leftovers, if any */
455				if (count > 0) {
456					s += (32 - count);
457					d += (32 - count);
458					write_sx_io(p, s,
459					    SX_LDB(10, count - 1, s & 7));
460					write_sx_io(p, d,
461					    SX_LDB(42, count - 1, d & 7));
462					if (count > 16) {
463						write_sx_reg(p, SX_INSTRUCTIONS,
464					    	    SX_ROP(10, 42, 74, 15));
465						write_sx_reg(p, SX_INSTRUCTIONS,
466					    	 SX_ROP(26, 58, 90, count - 17));
467					} else {
468						write_sx_reg(p, SX_INSTRUCTIONS,
469					    	 SX_ROP(10, 42, 74, count - 1));
470					}
471
472					write_sx_io(p, d,
473					    SX_STBM(74, count - 1, d & 7));
474				}
475				srcstart += srcinc;
476				dststart += dstinc;
477			}
478		}
479	}
480	exaMarkSync(pDstPixmap->drawable.pScreen);
481}
482
483static void
484CG14DoneCopy(PixmapPtr pDstPixmap)
485{
486}
487
488static Bool
489CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
490{
491	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
492	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
493
494	ENTER;
495	DPRINTF(X_ERROR, "bits per pixel: %d %08x\n",
496	    pPixmap->drawable.bitsPerPixel, fg);
497
498	/* repeat the colour in every sub byte if we're in 8 bit */
499	if (pPixmap->drawable.bitsPerPixel == 8) {
500		fg |= fg << 8;
501		fg |= fg << 16;
502	}
503	write_sx_reg(p, SX_QUEUED(8), fg);
504	write_sx_reg(p, SX_QUEUED(9), fg);
505	if (planemask != p->last_mask) {
506		CG14Wait(p);
507		write_sx_reg(p, SX_PLANEMASK, planemask);
508		p->last_mask = planemask;
509	}
510	alu = sx_rop[alu];
511	if (alu != p->last_rop) {
512		CG14Wait(p);
513		write_sx_reg(p, SX_ROP_CONTROL, alu);
514		p->last_rop = alu;
515	}
516	if (0) return FALSE;
517	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
518	return TRUE;
519}
520
521static void
522CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
523{
524	int line, x, num;
525	uint32_t ptr;
526
527	ENTER;
528	if (p->last_rop == 0xcc) {
529		/* simple fill */
530		for (line = 0; line < h; line++) {
531			x = 0;
532			while (x < w) {
533				ptr = start + (x << 2);
534				num = min(32, w - x);
535				write_sx_io(p, ptr,
536				    SX_STS(8, num - 1, ptr & 7));
537				x += 32;
538			}
539			start += pitch;
540		}
541	} else if (p->last_rop == 0xaa) {
542		/* nothing to do here */
543		return;
544	} else {
545		/* alright, let's do actual ROP stuff */
546
547		/* first repeat the fill colour into 16 registers */
548		write_sx_reg(p, SX_INSTRUCTIONS,
549		    SX_SELECT_S(8, 8, 10, 15));
550
551		for (line = 0; line < h; line++) {
552			x = 0;
553			while (x < w) {
554				ptr = start + (x << 2);
555				num = min(32, w - x);
556				/* now suck fb data into registers */
557				write_sx_io(p, ptr,
558				    SX_LD(42, num - 1, ptr & 7));
559				/*
560				 * ROP them with the fill data we left in 10
561				 * non-memory ops can only have counts up to 16
562				 */
563				if (num <= 16) {
564					write_sx_reg(p, SX_INSTRUCTIONS,
565					    SX_ROP(10, 42, 74, num - 1));
566				} else {
567					write_sx_reg(p, SX_INSTRUCTIONS,
568					    SX_ROP(10, 42, 74, 15));
569					write_sx_reg(p, SX_INSTRUCTIONS,
570					    SX_ROP(10, 58, 90, num - 17));
571				}
572				/* and write the result back into memory */
573				write_sx_io(p, ptr,
574				    SX_ST(74, num - 1, ptr & 7));
575				x += 32;
576			}
577			start += pitch;
578		}
579	}
580}
581
582static void
583CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
584{
585	int line, x, num, off, pre, cnt;
586	uint32_t ptr;
587
588	ENTER;
589	pre = start & 3;
590	if (pre != 0) pre = 4 - pre;
591
592	if (p->last_rop == 0xcc) {
593		/* simple fill */
594		for (line = 0; line < h; line++) {
595			ptr = start;
596			cnt = w;
597			pre = min(pre, cnt);
598			if (pre) {
599				write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7));
600				ptr += pre;
601				cnt -= pre;
602				if (cnt == 0) goto next;
603			}
604			/* now do the aligned pixels in 32bit chunks */
605			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
606			while(cnt > 3) {
607				num = min(32, cnt >> 2);
608				write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7));
609				ptr += num << 2;
610				cnt -= num << 2;
611			}
612			if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
613			if (cnt > 0) {
614				write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7));
615			}
616			if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
617next:
618			start += pitch;
619		}
620	} else if (p->last_rop == 0xaa) {
621		/* nothing to do here */
622		return;
623	} else {
624		/* alright, let's do actual ROP stuff */
625		off = start & 7;
626		start &= ~7;
627
628		/* first repeat the fill colour into 16 registers */
629		write_sx_reg(p, SX_INSTRUCTIONS,
630		    SX_SELECT_S(8, 8, 10, 15));
631
632		for (line = 0; line < h; line++) {
633			x = 0;
634			while (x < w) {
635				ptr = start + x;
636				num = min(32, w - x);
637				/* now suck fb data into registers */
638				write_sx_io(p, ptr,
639				    SX_LDB(42, num - 1, off));
640				/*
641				 * ROP them with the fill data we left in 10
642				 * non-memory ops can only have counts up to 16
643				 */
644				if (num <= 16) {
645					write_sx_reg(p, SX_INSTRUCTIONS,
646					    SX_ROP(10, 42, 74, num - 1));
647				} else {
648					write_sx_reg(p, SX_INSTRUCTIONS,
649					    SX_ROP(10, 42, 74, 15));
650					write_sx_reg(p, SX_INSTRUCTIONS,
651					    SX_ROP(10, 58, 90, num - 17));
652				}
653				/* and write the result back into memory */
654				write_sx_io(p, ptr,
655				    SX_STB(74, num - 1, off));
656				x += 32;
657			}
658			start += pitch;
659		}
660	}
661}
662
663static void
664CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
665{
666	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
667	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
668	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
669	int start, depth;
670
671	ENTER;
672	dstpitch = exaGetPixmapPitch(pPixmap);
673	dstoff = exaGetPixmapOffset(pPixmap);
674
675	depth = pPixmap->drawable.bitsPerPixel;
676	switch (depth) {
677		case 32:
678			start = dstoff + (y1 * dstpitch) + (x1 << 2);
679			CG14Solid32(p, start, dstpitch, w, h);
680			break;
681		case 8:
682			start = dstoff + (y1 * dstpitch) + x1;
683			CG14Solid8(p, start, dstpitch, w, h);
684			break;
685	}
686
687	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
688	    dstpitch, dstoff, start);
689	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
690	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
691	exaMarkSync(pPixmap->drawable.pScreen);
692}
693
694/*
695 * Memcpy-based UTS.
696 */
697static Bool
698CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
699    char *src, int src_pitch)
700{
701	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
702	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
703	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
704	int    dst_pitch  = exaGetPixmapPitch(pDst);
705
706	int bpp    = pDst->drawable.bitsPerPixel;
707	int cpp    = (bpp + 7) >> 3;
708	int wBytes = w * cpp;
709
710	ENTER;
711	DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
712	dst += (x * cpp) + (y * dst_pitch);
713
714	CG14Wait(p);
715
716	while (h--) {
717		memcpy(dst, src, wBytes);
718		src += src_pitch;
719		dst += dst_pitch;
720	}
721	__asm("stbar;");
722	return TRUE;
723}
724
725/*
726 * Memcpy-based DFS.
727 */
728static Bool
729CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
730    char *dst, int dst_pitch)
731{
732	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
733	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
734	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
735	int    src_pitch  = exaGetPixmapPitch(pSrc);
736
737	ENTER;
738	int bpp    = pSrc->drawable.bitsPerPixel;
739	int cpp    = (bpp + 7) >> 3;
740	int wBytes = w * cpp;
741
742	src += (x * cpp) + (y * src_pitch);
743
744	CG14Wait(p);
745
746	while (h--) {
747		memcpy(dst, src, wBytes);
748		src += src_pitch;
749		dst += dst_pitch;
750	}
751
752	return TRUE;
753}
754
755Bool
756CG14CheckComposite(int op, PicturePtr pSrcPicture,
757                           PicturePtr pMaskPicture,
758                           PicturePtr pDstPicture)
759{
760	int i, ok = FALSE;
761
762	ENTER;
763
764	/*
765	 * SX is in theory capable of accelerating pretty much all Xrender ops,
766	 * even coordinate transformation and gradients. Support will be added
767	 * over time and likely have to spill over into its own source file.
768	 */
769
770	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
771		DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op);
772		return FALSE;
773	}
774
775	if (pSrcPicture != NULL) {
776		i = 0;
777		while ((i < arraysize(src_formats)) && (!ok)) {
778			ok =  (pSrcPicture->format == src_formats[i]);
779			i++;
780		}
781
782		if (!ok) {
783			DPRINTF(X_ERROR, "%s: unsupported src format %x\n",
784			    __func__, pSrcPicture->format);
785			return FALSE;
786		}
787		DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op);
788	}
789
790	if (pDstPicture != NULL) {
791		i = 0;
792		ok = FALSE;
793		while ((i < arraysize(src_formats)) && (!ok)) {
794			ok =  (pDstPicture->format == src_formats[i]);
795			i++;
796		}
797
798		if (!ok) {
799			DPRINTF(X_ERROR, "%s: unsupported dst format %x\n",
800			    __func__, pDstPicture->format);
801			return FALSE;
802		}
803		DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op);
804	}
805
806	if (pMaskPicture != NULL) {
807		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
808		    pMaskPicture->pDrawable->width,
809		    pMaskPicture->pDrawable->height);
810	}
811	return TRUE;
812}
813
814Bool
815CG14PrepareComposite(int op, PicturePtr pSrcPicture,
816                             PicturePtr pMaskPicture,
817                             PicturePtr pDstPicture,
818                             PixmapPtr  pSrc,
819                             PixmapPtr  pMask,
820                             PixmapPtr  pDst)
821{
822	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
823	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
824
825	ENTER;
826
827	p->no_source_pixmap = FALSE;
828	p->source_is_solid = FALSE;
829
830	if (pSrcPicture->format == PICT_a1) {
831		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
832		    pDstPicture->format, op);
833		if (pMaskPicture != NULL) {
834			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
835		}
836	}
837	if (pSrcPicture->pSourcePict != NULL) {
838		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
839			p->fillcolour =
840			    pSrcPicture->pSourcePict->solidFill.color;
841			DPRINTF(X_ERROR, "%s: solid src %08x\n",
842			    __func__, p->fillcolour);
843			p->no_source_pixmap = TRUE;
844			p->source_is_solid = TRUE;
845		}
846	}
847	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
848		if (pMaskPicture->pSourcePict->type ==
849		    SourcePictTypeSolidFill) {
850			p->fillcolour =
851			   pMaskPicture->pSourcePict->solidFill.color;
852			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
853			    __func__, p->fillcolour);
854		}
855	}
856	if (pMaskPicture != NULL) {
857		p->mskoff = exaGetPixmapOffset(pMask);
858		p->mskpitch = exaGetPixmapPitch(pMask);
859		p->mskformat = pMaskPicture->format;
860	} else {
861		p->mskoff = 0;
862		p->mskpitch = 0;
863		p->mskformat = 0;
864	}
865	if (pSrc != NULL) {
866		p->source_is_solid =
867		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
868		p->srcoff = exaGetPixmapOffset(pSrc);
869		p->srcpitch = exaGetPixmapPitch(pSrc);
870		if (p->source_is_solid) {
871			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
872		}
873	}
874	p->srcformat = pSrcPicture->format;
875	p->dstformat = pDstPicture->format;
876
877	if (p->source_is_solid) {
878		uint32_t temp;
879
880		/* stuff source colour into SX registers, swap as needed */
881		temp = p->fillcolour;
882		switch (p->srcformat) {
883			case PICT_a8r8g8b8:
884			case PICT_x8r8g8b8:
885				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
886				temp = temp >> 8;
887				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
888				temp = temp >> 8;
889				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
890				break;
891			case PICT_a8b8g8r8:
892			case PICT_x8b8g8r8:
893				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
894				temp = temp >> 8;
895				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
896				temp = temp >> 8;
897				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
898				break;
899		}
900		write_sx_reg(p, SX_QUEUED(8), 0xff);
901	}
902	p->op = op;
903	if (op == PictOpSrc) {
904		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
905	}
906#ifdef SX_DEBUG
907	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
908	    *(uint32_t *)(p->fb + p->srcoff));
909#endif
910	return TRUE;
911}
912
913void
914CG14Composite(PixmapPtr pDst, int srcX, int srcY,
915                              int maskX, int maskY,
916                              int dstX, int dstY,
917                              int width, int height)
918{
919	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
920	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
921	uint32_t dstoff, dstpitch;
922	uint32_t dst, msk, src;
923	int flip = 0;
924
925	ENTER;
926	dstoff = exaGetPixmapOffset(pDst);
927	dstpitch = exaGetPixmapPitch(pDst);
928
929	flip = (PICT_FORMAT_TYPE(p->srcformat) !=
930		PICT_FORMAT_TYPE(p->dstformat));
931
932	switch (p->op) {
933		case PictOpOver:
934			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
935			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
936			    p->mskformat, p->dstformat, srcX, srcY);
937			if (p->source_is_solid) {
938				switch (p->mskformat) {
939					case PICT_a8:
940						msk = p->mskoff +
941						    (maskY * p->mskpitch) +
942						    maskX;
943						CG14Comp_Over8Solid(p,
944						    msk, p->mskpitch,
945						    dst, dstpitch,
946						    width, height);
947						break;
948					case PICT_a8r8g8b8:
949					case PICT_a8b8g8r8:
950						msk = p->mskoff +
951						    (maskY * p->mskpitch) +
952						    (maskX << 2);
953						CG14Comp_Over32Solid(p,
954						    msk, p->mskpitch,
955						    dst, dstpitch,
956						    width, height);
957						break;
958					default:
959						xf86Msg(X_ERROR,
960						  "unsupported mask format %08x\n", p->mskformat);
961				}
962			} else {
963				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
964				    p->mskformat);
965				switch (p->srcformat) {
966					case PICT_a8r8g8b8:
967					case PICT_a8b8g8r8:
968						src = p->srcoff +
969						    (srcY * p->srcpitch) +
970						    (srcX << 2);
971						dst = dstoff +
972						    (dstY * dstpitch) +
973						    (dstX << 2);
974						if (p->mskformat == PICT_a8) {
975							msk = p->mskoff +
976							    (maskY * p->mskpitch) +
977							    maskX;
978							CG14Comp_Over32Mask(p,
979							    src, p->srcpitch,
980							    msk, p->mskpitch,
981							    dst, dstpitch,
982							    width, height, flip);
983						} else {
984							CG14Comp_Over32(p,
985							    src, p->srcpitch,
986							    dst, dstpitch,
987							    width, height, flip);
988						}
989						break;
990					case PICT_x8r8g8b8:
991					case PICT_x8b8g8r8:
992						src = p->srcoff +
993						    (srcY * p->srcpitch) +
994						    (srcX << 2);
995						dst = dstoff +
996						    (dstY * dstpitch) +
997						    (dstX << 2);
998						if (p->mskformat == PICT_a8) {
999							msk = p->mskoff +
1000							    (maskY * p->mskpitch) +
1001							    maskX;
1002							CG14Comp_Over32Mask_noalpha(p,
1003							    src, p->srcpitch,
1004							    msk, p->mskpitch,
1005							    dst, dstpitch,
1006							    width, height, flip);
1007						} else if ((p->mskformat == PICT_a8r8g8b8) ||
1008							   (p->mskformat == PICT_a8b8g8r8)) {
1009							msk = p->mskoff +
1010							    (maskY * p->mskpitch) +
1011							    (maskX << 2);
1012							CG14Comp_Over32Mask32_noalpha(p,
1013							    src, p->srcpitch,
1014							    msk, p->mskpitch,
1015							    dst, dstpitch,
1016							    width, height, flip);
1017						} else {
1018							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
1019						}
1020						break;
1021					default:
1022						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
1023						    __func__, p->srcformat);
1024				}
1025			}
1026			break;
1027		case PictOpAdd:
1028			DPRINTF(X_ERROR, "Add %08x %08x\n",
1029			    p->srcformat, p->dstformat);
1030			switch (p->srcformat) {
1031				case PICT_a8:
1032					src = p->srcoff +
1033					    (srcY * p->srcpitch) + srcX;
1034					if (p->dstformat == PICT_a8) {
1035						dst = dstoff +
1036						      (dstY * dstpitch) + dstX;
1037						CG14Comp_Add8(p,
1038						    src, p->srcpitch,
1039						    dst, dstpitch,
1040						    width, height);
1041					} else {
1042						dst = dstoff +
1043						      (dstY * dstpitch) +
1044						      (dstX << 2);
1045						CG14Comp_Add8_32(p,
1046						    src, p->srcpitch,
1047						    dst, dstpitch,
1048						    width, height);
1049					}
1050					break;
1051				case PICT_a8r8g8b8:
1052				case PICT_x8r8g8b8:
1053					src = p->srcoff +
1054					    (srcY * p->srcpitch) + (srcX << 2);
1055					dst = dstoff + (dstY * dstpitch) +
1056					    (dstX << 2);
1057					CG14Comp_Add32(p, src, p->srcpitch,
1058					    dst, dstpitch, width, height);
1059					break;
1060				default:
1061					xf86Msg(X_ERROR,
1062					    "unsupported src format\n");
1063			}
1064			break;
1065		case PictOpSrc:
1066			DPRINTF(X_ERROR, "Src %08x %08x\n",
1067			    p->srcformat, p->dstformat);
1068			if (p->mskformat != 0)
1069				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
1070			if (p->srcformat == PICT_a8) {
1071				CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height);
1072			} else {
1073				/* convert between RGB and BGR? */
1074				CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height);
1075			}
1076			break;
1077		default:
1078			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
1079	}
1080	exaMarkSync(pDst->drawable.pScreen);
1081}
1082
1083
1084
1085Bool
1086CG14InitAccel(ScreenPtr pScreen)
1087{
1088	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1089	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1090	ExaDriverPtr pExa;
1091
1092	pExa = exaDriverAlloc();
1093	if (!pExa)
1094		return FALSE;
1095
1096	p->pExa = pExa;
1097
1098	pExa->exa_major = EXA_VERSION_MAJOR;
1099	pExa->exa_minor = EXA_VERSION_MINOR;
1100
1101	pExa->memoryBase = p->fb;
1102	pExa->memorySize = p->memsize;
1103	pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3);
1104
1105	/*
1106	 * SX memory instructions are written to 64bit aligned addresses with
1107	 * a 3 bit displacement. Make sure the displacement remains constant
1108	 * within one column
1109	 */
1110
1111	pExa->pixmapOffsetAlign = 8;
1112	pExa->pixmapPitchAlign = 8;
1113
1114	pExa->flags = EXA_OFFSCREEN_PIXMAPS
1115		      | EXA_SUPPORTS_OFFSCREEN_OVERLAPS
1116		      /*| EXA_MIXED_PIXMAPS*/;
1117
1118	/*
1119	 * these limits are bogus
1120	 * SX doesn't deal with coordinates at all, so there is no limit but
1121	 * we have to put something here
1122	 */
1123	pExa->maxX = 4096;
1124	pExa->maxY = 4096;
1125
1126	pExa->WaitMarker = CG14WaitMarker;
1127
1128	pExa->PrepareSolid = CG14PrepareSolid;
1129	pExa->Solid = CG14Solid;
1130	pExa->DoneSolid = CG14DoneCopy;
1131	pExa->PrepareCopy = CG14PrepareCopy;
1132	pExa->Copy = CG14Copy32;
1133	pExa->DoneCopy = CG14DoneCopy;
1134	if (p->use_xrender) {
1135		pExa->CheckComposite = CG14CheckComposite;
1136		pExa->PrepareComposite = CG14PrepareComposite;
1137		pExa->Composite = CG14Composite;
1138		pExa->DoneComposite = CG14DoneCopy;
1139	}
1140
1141	/* EXA hits more optimized paths when it does not have to fallback
1142	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
1143	 */
1144	pExa->UploadToScreen = CG14UploadToScreen;
1145	pExa->DownloadFromScreen = CG14DownloadFromScreen;
1146
1147	p->queuecount = 0;
1148	/* do some hardware init */
1149	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
1150	p->last_mask = 0xffffffff;
1151	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
1152	p->last_rop = 0xcc;
1153	return exaDriverInit(pScreen, pExa);
1154}
1155