cg14_accel.c revision c88c16f8
1/* $NetBSD: cg14_accel.c,v 1.8 2016/09/16 21:16:37 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include <sys/types.h>
37
38/* all driver need this */
39#include "xf86.h"
40#include "xf86_OSproc.h"
41#include "compiler.h"
42
43#include "cg14.h"
44#include <sparc/sxreg.h>
45
46#define SX_SINGLE
47/*#define SX_DEBUG*/
48/*#define SX_ADD_SOFTWARE*/
49
50#ifdef SX_DEBUG
51#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
52#define DPRINTF xf86Msg
53#else
54#define ENTER
55#define DPRINTF while (0) xf86Msg
56#endif
57
58#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
59
60/* 0xcc is SX's GXcopy equivalent */
61uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
62		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
63
64int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
65		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
66int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
67
68static inline void
69CG14Wait(Cg14Ptr p)
70{
71	/* we just wait until the instruction queue is empty */
72	while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {};
73}
74
75static void
76CG14WaitMarker(ScreenPtr pScreen, int Marker)
77{
78	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
79	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
80
81	CG14Wait(p);
82}
83
84static Bool
85CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
86		int xdir, int ydir, int alu, Pixel planemask)
87{
88	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
89	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
90
91	ENTER;
92	DPRINTF(X_ERROR, "bits per pixel: %d\n",
93	    pSrcPixmap->drawable.bitsPerPixel);
94
95	if (planemask != p->last_mask) {
96		CG14Wait(p);
97		write_sx_reg(p, SX_PLANEMASK, planemask);
98		p->last_mask = planemask;
99	}
100	alu = sx_rop[alu];
101	if (alu != p->last_rop) {
102		CG14Wait(p);
103		write_sx_reg(p, SX_ROP_CONTROL, alu);
104		p->last_rop = alu;
105	}
106	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
107	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
108	p->xdir = xdir;
109	p->ydir = ydir;
110	return TRUE;
111}
112
113static void
114CG14Copy(PixmapPtr pDstPixmap,
115         int srcX, int srcY, int dstX, int dstY, int w, int h)
116{
117	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
118	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
119	int dstpitch, dstoff, srcpitch, srcoff;
120	int srcstart, dststart, xinc, srcinc, dstinc;
121	int line, count, s, d, num;
122
123	ENTER;
124	dstpitch = exaGetPixmapPitch(pDstPixmap);
125	dstoff = exaGetPixmapOffset(pDstPixmap);
126	srcpitch = p->srcpitch;
127	srcoff = p->srcoff;
128	/*
129	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
130	 * actually wrote anything and only sync if it did
131	 */
132	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
133	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
134
135	/*
136	 * we always copy up to 32 pixels at a time so direction doesn't
137	 * matter if w<=32
138	 */
139	if (w > 32) {
140		if (p->xdir < 0) {
141			srcstart += (w - 32) << 2;
142			dststart += (w - 32) << 2;
143			xinc = -128;
144		} else
145			xinc = 128;
146	} else
147		xinc = 128;
148	if (p->ydir < 0) {
149		srcstart += (h - 1) * srcpitch;
150		dststart += (h - 1) * dstpitch;
151		srcinc = -srcpitch;
152		dstinc = -dstpitch;
153	} else {
154		srcinc = srcpitch;
155		dstinc = dstpitch;
156	}
157	if (p->last_rop == 0xcc) {
158		/* plain old copy */
159		if ( xinc > 0) {
160			/* going left to right */
161			for (line = 0; line < h; line++) {
162				count = 0;
163				s = srcstart;
164				d = dststart;
165				while ( count < w) {
166					num = min(32, w - count);
167					write_sx_io(p, s,
168					    SX_LD(10, num - 1, s & 7));
169					write_sx_io(p, d,
170					    SX_STM(10, num - 1, d & 7));
171					s += xinc;
172					d += xinc;
173					count += 32;
174				}
175				srcstart += srcinc;
176				dststart += dstinc;
177			}
178		} else {
179			/* going right to left */
180			int i, chunks = (w >> 5);
181			for (line = 0; line < h; line++) {
182				s = srcstart;
183				d = dststart;
184				count = w;
185				for (i = 0; i < chunks; i++) {
186					write_sx_io(p, s,
187					    SX_LD(10, 31, s & 7));
188					write_sx_io(p, d,
189					    SX_STM(10, 31, d & 7));
190					s -= 128;
191					d -= 128;
192					count -= 32;
193				}
194				/* leftovers, if any */
195				if (count > 0) {
196					s += (32 - count) << 2;
197					d += (32 - count) << 2;
198					write_sx_io(p, s,
199					    SX_LD(10, count - 1, s & 7));
200					write_sx_io(p, d,
201					    SX_STM(10, count - 1, d & 7));
202				}
203				srcstart += srcinc;
204				dststart += dstinc;
205			}
206		}
207	} else {
208		/* ROPs needed */
209		if ( xinc > 0) {
210			/* going left to right */
211			for (line = 0; line < h; line++) {
212				count = 0;
213				s = srcstart;
214				d = dststart;
215				while ( count < w) {
216					num = min(32, w - count);
217					write_sx_io(p, s,
218					    SX_LD(10, num - 1, s & 7));
219					write_sx_io(p, d,
220					    SX_LD(42, num - 1, d & 7));
221					if (num > 16) {
222						write_sx_reg(p, SX_INSTRUCTIONS,
223					    	 SX_ROP(10, 42, 74, 15));
224						write_sx_reg(p, SX_INSTRUCTIONS,
225					    	 SX_ROP(26, 58, 90, num - 17));
226					} else {
227						write_sx_reg(p, SX_INSTRUCTIONS,
228					    	 SX_ROP(10, 42, 74, num - 1));
229					}
230					write_sx_io(p, d,
231					    SX_STM(74, num - 1, d & 7));
232					s += xinc;
233					d += xinc;
234					count += 32;
235				}
236				srcstart += srcinc;
237				dststart += dstinc;
238			}
239		} else {
240			/* going right to left */
241			int i, chunks = (w >> 5);
242			for (line = 0; line < h; line++) {
243				s = srcstart;
244				d = dststart;
245				count = w;
246				for (i = 0; i < chunks; i++) {
247					write_sx_io(p, s, SX_LD(10, 31, s & 7));
248					write_sx_io(p, d, SX_LD(42, 31, d & 7));
249					write_sx_reg(p, SX_INSTRUCTIONS,
250				    	    SX_ROP(10, 42, 74, 15));
251					write_sx_reg(p, SX_INSTRUCTIONS,
252				    	    SX_ROP(26, 58, 90, 15));
253					write_sx_io(p, d,
254					    SX_STM(74, 31, d & 7));
255					s -= 128;
256					d -= 128;
257					count -= 32;
258				}
259				/* leftovers, if any */
260				if (count > 0) {
261					s += (32 - count) << 2;
262					d += (32 - count) << 2;
263					write_sx_io(p, s,
264					    SX_LD(10, count - 1, s & 7));
265					write_sx_io(p, d,
266					    SX_LD(42, count - 1, d & 7));
267					if (count > 16) {
268						write_sx_reg(p, SX_INSTRUCTIONS,
269					    	    SX_ROP(10, 42, 74, 15));
270						write_sx_reg(p, SX_INSTRUCTIONS,
271					    	 SX_ROP(26, 58, 90, count - 17));
272					} else {
273						write_sx_reg(p, SX_INSTRUCTIONS,
274					    	 SX_ROP(10, 42, 74, count - 1));
275					}
276
277					write_sx_io(p, d,
278					    SX_STM(74, count - 1, d & 7));
279				}
280				srcstart += srcinc;
281				dststart += dstinc;
282			}
283		}
284	}
285	exaMarkSync(pDstPixmap->drawable.pScreen);
286}
287
288static void
289CG14DoneCopy(PixmapPtr pDstPixmap)
290{
291}
292
293static Bool
294CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
295{
296	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
297	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
298
299	ENTER;
300	DPRINTF(X_ERROR, "bits per pixel: %d\n",
301	    pPixmap->drawable.bitsPerPixel);
302	write_sx_reg(p, SX_QUEUED(8), fg);
303	write_sx_reg(p, SX_QUEUED(9), fg);
304	if (planemask != p->last_mask) {
305		CG14Wait(p);
306		write_sx_reg(p, SX_PLANEMASK, planemask);
307		p->last_mask = planemask;
308	}
309	alu = sx_rop[alu];
310	if (alu != p->last_rop) {
311		CG14Wait(p);
312		write_sx_reg(p, SX_ROP_CONTROL, alu);
313		p->last_rop = alu;
314	}
315	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
316	return TRUE;
317}
318
319static void
320CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
321{
322	int line, x, num;
323	uint32_t ptr;
324
325	ENTER;
326	if (p->last_rop == 0xcc) {
327		/* simple fill */
328		for (line = 0; line < h; line++) {
329			x = 0;
330			while (x < w) {
331				ptr = start + (x << 2);
332				num = min(32, w - x);
333				write_sx_io(p, ptr,
334				    SX_STS(8, num - 1, ptr & 7));
335				x += 32;
336			}
337			start += pitch;
338		}
339	} else if (p->last_rop == 0xaa) {
340		/* nothing to do here */
341		return;
342	} else {
343		/* alright, let's do actual ROP stuff */
344
345		/* first repeat the fill colour into 16 registers */
346		write_sx_reg(p, SX_INSTRUCTIONS,
347		    SX_SELECT_S(8, 8, 10, 15));
348
349		for (line = 0; line < h; line++) {
350			x = 0;
351			while (x < w) {
352				ptr = start + (x << 2);
353				num = min(32, w - x);
354				/* now suck fb data into registers */
355				write_sx_io(p, ptr,
356				    SX_LD(42, num - 1, ptr & 7));
357				/*
358				 * ROP them with the fill data we left in 10
359				 * non-memory ops can only have counts up to 16
360				 */
361				if (num <= 16) {
362					write_sx_reg(p, SX_INSTRUCTIONS,
363					    SX_ROP(10, 42, 74, num - 1));
364				} else {
365					write_sx_reg(p, SX_INSTRUCTIONS,
366					    SX_ROP(10, 42, 74, 15));
367					write_sx_reg(p, SX_INSTRUCTIONS,
368					    SX_ROP(10, 58, 90, num - 17));
369				}
370				/* and write the result back into memory */
371				write_sx_io(p, ptr,
372				    SX_ST(74, num - 1, ptr & 7));
373				x += 32;
374			}
375			start += pitch;
376		}
377	}
378}
379
380static void
381CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
382{
383	int line, x, num, off;
384	uint32_t ptr;
385
386	ENTER;
387	off = start & 7;
388	start &= ~7;
389
390	if (p->last_rop == 0xcc) {
391		/* simple fill */
392		for (line = 0; line < h; line++) {
393			x = 0;
394			while (x < w) {
395				ptr = start + x;
396				num = min(32, w - x);
397				write_sx_io(p, ptr,
398				    SX_STBS(8, num - 1, off));
399				x += 32;
400			}
401			start += pitch;
402		}
403	} else if (p->last_rop == 0xaa) {
404		/* nothing to do here */
405		return;
406	} else {
407		/* alright, let's do actual ROP stuff */
408
409		/* first repeat the fill colour into 16 registers */
410		write_sx_reg(p, SX_INSTRUCTIONS,
411		    SX_SELECT_S(8, 8, 10, 15));
412
413		for (line = 0; line < h; line++) {
414			x = 0;
415			while (x < w) {
416				ptr = start + x;
417				num = min(32, w - x);
418				/* now suck fb data into registers */
419				write_sx_io(p, ptr,
420				    SX_LDB(42, num - 1, off));
421				/*
422				 * ROP them with the fill data we left in 10
423				 * non-memory ops can only have counts up to 16
424				 */
425				if (num <= 16) {
426					write_sx_reg(p, SX_INSTRUCTIONS,
427					    SX_ROP(10, 42, 74, num - 1));
428				} else {
429					write_sx_reg(p, SX_INSTRUCTIONS,
430					    SX_ROP(10, 42, 74, 15));
431					write_sx_reg(p, SX_INSTRUCTIONS,
432					    SX_ROP(10, 58, 90, num - 17));
433				}
434				/* and write the result back into memory */
435				write_sx_io(p, ptr,
436				    SX_STB(74, num - 1, off));
437				x += 32;
438			}
439			start += pitch;
440		}
441	}
442}
443
444static void
445CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
446{
447	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
448	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
449	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
450	int start, depth;
451
452	ENTER;
453	dstpitch = exaGetPixmapPitch(pPixmap);
454	dstoff = exaGetPixmapOffset(pPixmap);
455
456	depth = pPixmap->drawable.bitsPerPixel;
457	switch (depth) {
458		case 32:
459			start = dstoff + (y1 * dstpitch) + (x1 << 2);
460			CG14Solid32(p, start, dstpitch, w, h);
461			break;
462		case 8:
463			start = dstoff + (y1 * dstpitch) + x1;
464			CG14Solid8(p, start, dstpitch, w, h);
465			break;
466	}
467
468	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
469	    dstpitch, dstoff, start);
470	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
471	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
472	exaMarkSync(pPixmap->drawable.pScreen);
473}
474
475/*
476 * Memcpy-based UTS.
477 */
478static Bool
479CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
480    char *src, int src_pitch)
481{
482	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
483	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
484	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
485	int    dst_pitch  = exaGetPixmapPitch(pDst);
486
487	int bpp    = pDst->drawable.bitsPerPixel;
488	int cpp    = (bpp + 7) >> 3;
489	int wBytes = w * cpp;
490
491	ENTER;
492	dst += (x * cpp) + (y * dst_pitch);
493
494	CG14Wait(p);
495
496	while (h--) {
497		memcpy(dst, src, wBytes);
498		src += src_pitch;
499		dst += dst_pitch;
500	}
501	__asm("stbar;");
502	return TRUE;
503}
504
505/*
506 * Memcpy-based DFS.
507 */
508static Bool
509CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
510    char *dst, int dst_pitch)
511{
512	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
513	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
514	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
515	int    src_pitch  = exaGetPixmapPitch(pSrc);
516
517	ENTER;
518	int bpp    = pSrc->drawable.bitsPerPixel;
519	int cpp    = (bpp + 7) >> 3;
520	int wBytes = w * cpp;
521
522	src += (x * cpp) + (y * src_pitch);
523
524	CG14Wait(p);
525
526	while (h--) {
527		memcpy(dst, src, wBytes);
528		src += src_pitch;
529		dst += dst_pitch;
530	}
531
532	return TRUE;
533}
534
535Bool
536CG14CheckComposite(int op, PicturePtr pSrcPicture,
537                           PicturePtr pMaskPicture,
538                           PicturePtr pDstPicture)
539{
540	int i, ok = FALSE;
541
542	ENTER;
543
544	/*
545	 * SX is in theory capable of accelerating pretty much all Xrender ops,
546	 * even coordinate transformation and gradients. Support will be added
547	 * over time and likely have to spill over into its own source file.
548	 */
549
550	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
551		xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op);
552		return FALSE;
553	}
554	i = 0;
555	while ((i < arraysize(src_formats)) && (!ok)) {
556		ok =  (pSrcPicture->format == src_formats[i]);
557		i++;
558	}
559
560	if (!ok) {
561		xf86Msg(X_ERROR, "%s: unsupported src format %x\n",
562		    __func__, pSrcPicture->format);
563		return FALSE;
564	}
565
566	DPRINTF(X_ERROR, "src is %x, %d: %d %d\n", pSrcPicture->format, op,
567	    pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
568
569	if (pMaskPicture != NULL) {
570		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
571		    pMaskPicture->pDrawable->width,
572		    pMaskPicture->pDrawable->height);
573	}
574	return TRUE;
575}
576
577Bool
578CG14PrepareComposite(int op, PicturePtr pSrcPicture,
579                             PicturePtr pMaskPicture,
580                             PicturePtr pDstPicture,
581                             PixmapPtr  pSrc,
582                             PixmapPtr  pMask,
583                             PixmapPtr  pDst)
584{
585	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
586	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
587
588	ENTER;
589
590	p->no_source_pixmap = FALSE;
591	p->source_is_solid = FALSE;
592
593	if (pSrcPicture->format == PICT_a1) {
594		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
595		    pDstPicture->format, op);
596		if (pMaskPicture != NULL) {
597			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
598		}
599	}
600	if (pSrcPicture->pSourcePict != NULL) {
601		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
602			p->fillcolour =
603			    pSrcPicture->pSourcePict->solidFill.color;
604			DPRINTF(X_ERROR, "%s: solid src %08x\n",
605			    __func__, p->fillcolour);
606			p->no_source_pixmap = TRUE;
607			p->source_is_solid = TRUE;
608		}
609	}
610	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
611		if (pMaskPicture->pSourcePict->type ==
612		    SourcePictTypeSolidFill) {
613			p->fillcolour =
614			   pMaskPicture->pSourcePict->solidFill.color;
615			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
616			    __func__, p->fillcolour);
617		}
618	}
619	if (pMaskPicture != NULL) {
620		p->mskoff = exaGetPixmapOffset(pMask);
621		p->mskpitch = exaGetPixmapPitch(pMask);
622		p->mskformat = pMaskPicture->format;
623	} else {
624		p->mskoff = 0;
625		p->mskpitch = 0;
626		p->mskformat = 0;
627	}
628	if (pSrc != NULL) {
629		p->source_is_solid =
630		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
631		p->srcoff = exaGetPixmapOffset(pSrc);
632		p->srcpitch = exaGetPixmapPitch(pSrc);
633		if (p->source_is_solid) {
634			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
635		}
636	}
637	p->srcformat = pSrcPicture->format;
638	p->dstformat = pDstPicture->format;
639
640	if (p->source_is_solid) {
641		uint32_t temp;
642
643		/* stuff source colour into SX registers, swap as needed */
644		temp = p->fillcolour;
645		switch (p->srcformat) {
646			case PICT_a8r8g8b8:
647			case PICT_x8r8g8b8:
648				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
649				temp = temp >> 8;
650				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
651				temp = temp >> 8;
652				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
653				break;
654			case PICT_a8b8g8r8:
655			case PICT_x8b8g8r8:
656				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
657				temp = temp >> 8;
658				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
659				temp = temp >> 8;
660				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
661				break;
662		}
663		write_sx_reg(p, SX_QUEUED(8), 0xff);
664	}
665	p->op = op;
666	if (op == PictOpSrc) {
667		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
668	}
669#ifdef SX_DEBUG
670	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
671	    *(uint32_t *)(p->fb + p->srcoff));
672#endif
673	return TRUE;
674}
675
676void
677CG14Composite(PixmapPtr pDst, int srcX, int srcY,
678                              int maskX, int maskY,
679                              int dstX, int dstY,
680                              int width, int height)
681{
682	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
683	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
684	uint32_t dstoff, dstpitch;
685	uint32_t dst, msk, src;
686
687	ENTER;
688	dstoff = exaGetPixmapOffset(pDst);
689	dstpitch = exaGetPixmapPitch(pDst);
690
691	switch (p->op) {
692		case PictOpOver:
693			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
694			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
695			    p->mskformat, p->dstformat, srcX, srcY);
696			if (p->source_is_solid) {
697				switch (p->mskformat) {
698					case PICT_a8:
699						msk = p->mskoff +
700						    (maskY * p->mskpitch) +
701						    maskX;
702						CG14Comp_Over8Solid(p,
703						    msk, p->mskpitch,
704						    dst, dstpitch,
705						    width, height);
706						break;
707					case PICT_a8r8g8b8:
708					case PICT_a8b8g8r8:
709						msk = p->mskoff +
710						    (maskY * p->mskpitch) +
711						    (maskX << 2);
712						CG14Comp_Over32Solid(p,
713						    msk, p->mskpitch,
714						    dst, dstpitch,
715						    width, height);
716						break;
717					default:
718						xf86Msg(X_ERROR,
719						  "unsupported mask format\n");
720				}
721			} else {
722				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
723				    p->mskformat);
724				switch (p->srcformat) {
725					case PICT_a8r8g8b8:
726					case PICT_a8b8g8r8:
727						src = p->srcoff +
728						    (srcY * p->srcpitch) +
729						    (srcX << 2);
730						dst = dstoff +
731						    (dstY * dstpitch) +
732						    (dstX << 2);
733						if (p->mskformat == PICT_a8) {
734							msk = p->mskoff +
735							    (maskY * p->mskpitch) +
736							    maskX;
737							CG14Comp_Over32Mask(p,
738							    src, p->srcpitch,
739							    msk, p->mskpitch,
740							    dst, dstpitch,
741							    width, height);
742						} else {
743							CG14Comp_Over32(p,
744							    src, p->srcpitch,
745							    dst, dstpitch,
746							    width, height);
747						}
748						break;
749					case PICT_x8r8g8b8:
750					case PICT_x8b8g8r8:
751						src = p->srcoff +
752						    (srcY * p->srcpitch) +
753						    (srcX << 2);
754						dst = dstoff +
755						    (dstY * dstpitch) +
756						    (dstX << 2);
757						if (p->mskformat == PICT_a8) {
758							msk = p->mskoff +
759							    (maskY * p->mskpitch) +
760							    maskX;
761							CG14Comp_Over32Mask_noalpha(p,
762							    src, p->srcpitch,
763							    msk, p->mskpitch,
764							    dst, dstpitch,
765							    width, height);
766						} else if ((p->mskformat == PICT_a8r8g8b8) ||
767							   (p->mskformat == PICT_a8b8g8r8)) {
768							msk = p->mskoff +
769							    (maskY * p->mskpitch) +
770							    (maskX << 2);
771							CG14Comp_Over32Mask32_noalpha(p,
772							    src, p->srcpitch,
773							    msk, p->mskpitch,
774							    dst, dstpitch,
775							    width, height);
776						} else {
777							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
778						}
779						break;
780					default:
781						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
782						    __func__, p->srcformat);
783				}
784			}
785			break;
786		case PictOpAdd:
787			DPRINTF(X_ERROR, "Add %08x %08x\n",
788			    p->srcformat, p->dstformat);
789			switch (p->srcformat) {
790				case PICT_a8:
791					src = p->srcoff +
792					    (srcY * p->srcpitch) + srcX;
793					dst = dstoff + (dstY * dstpitch) + dstX;
794					CG14Comp_Add8(p, src, p->srcpitch,
795					    dst, dstpitch, width, height);
796					break;
797				case PICT_a8r8g8b8:
798				case PICT_x8r8g8b8:
799					src = p->srcoff +
800					    (srcY * p->srcpitch) + (srcX << 2);
801					dst = dstoff + (dstY * dstpitch) +
802					    (dstX << 2);
803					CG14Comp_Add32(p, src, p->srcpitch,
804					    dst, dstpitch, width, height);
805					break;
806				default:
807					xf86Msg(X_ERROR,
808					    "unsupported src format\n");
809			}
810			break;
811		case PictOpSrc:
812			DPRINTF(X_ERROR, "Src %08x %08x\n",
813			    p->srcformat, p->dstformat);
814			if (p->mskformat != 0)
815				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
816			CG14Copy(pDst, srcX, srcY, dstX, dstY, width, height);
817			break;
818		default:
819			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
820	}
821	exaMarkSync(pDst->drawable.pScreen);
822}
823
824
825
826Bool
827CG14InitAccel(ScreenPtr pScreen)
828{
829	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
830	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
831	ExaDriverPtr pExa;
832
833	pExa = exaDriverAlloc();
834	if (!pExa)
835		return FALSE;
836
837	p->pExa = pExa;
838
839	pExa->exa_major = EXA_VERSION_MAJOR;
840	pExa->exa_minor = EXA_VERSION_MINOR;
841
842	pExa->memoryBase = p->fb;
843	pExa->memorySize = p->memsize;
844	pExa->offScreenBase = p->width * p->height * 4;
845
846	/*
847	 * SX memory instructions are written to 64bit aligned addresses with
848	 * a 3 bit displacement. Make sure the displacement remains constant
849	 * within one column
850	 */
851
852	pExa->pixmapOffsetAlign = 8;
853	pExa->pixmapPitchAlign = 8;
854
855	pExa->flags = EXA_OFFSCREEN_PIXMAPS |
856		      /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/
857		      EXA_MIXED_PIXMAPS;
858
859	/*
860	 * these limits are bogus
861	 * SX doesn't deal with coordinates at all, so there is no limit but
862	 * we have to put something here
863	 */
864	pExa->maxX = 4096;
865	pExa->maxY = 4096;
866
867	pExa->WaitMarker = CG14WaitMarker;
868
869	pExa->PrepareSolid = CG14PrepareSolid;
870	pExa->Solid = CG14Solid;
871	pExa->DoneSolid = CG14DoneCopy;
872	pExa->PrepareCopy = CG14PrepareCopy;
873	pExa->Copy = CG14Copy;
874	pExa->DoneCopy = CG14DoneCopy;
875	if (p->use_xrender) {
876		pExa->CheckComposite = CG14CheckComposite;
877		pExa->PrepareComposite = CG14PrepareComposite;
878		pExa->Composite = CG14Composite;
879		pExa->DoneComposite = CG14DoneCopy;
880	}
881
882	/* EXA hits more optimized paths when it does not have to fallback
883	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
884	 */
885	pExa->UploadToScreen = CG14UploadToScreen;
886	pExa->DownloadFromScreen = CG14DownloadFromScreen;
887
888	/* do some hardware init */
889	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
890	p->last_mask = 0xffffffff;
891	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
892	p->last_rop = 0xcc;
893	return exaDriverInit(pScreen, pExa);
894}
895