cg14_accel.c revision f7cb851f
1/* $NetBSD: cg14_accel.c,v 1.5 2013/07/03 15:29:34 macallan Exp $ */
2/*
3 * Copyright (c) 2013 Michael Lorenz
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *    - Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    - Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials provided
15 *      with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#include <sys/types.h>
33
34/* all driver need this */
35#include "xf86.h"
36#include "xf86_OSproc.h"
37#include "compiler.h"
38
39#include "cg14.h"
40#include <sparc/sxreg.h>
41
42#define SX_SINGLE
43/*#define SX_DEBUG*/
44/*#define SX_ADD_SOFTWARE*/
45
46#ifdef SX_DEBUG
47#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
48#define DPRINTF xf86Msg
49#else
50#define ENTER
51#define DPRINTF while (0) xf86Msg
52#endif
53
54#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
55
56/* 0xcc is SX's GXcopy equivalent */
57uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
58		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
59
60int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
61		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
62int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
63
64static inline void
65CG14Wait(Cg14Ptr p)
66{
67	/* we just wait until the instruction queue is empty */
68	while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {};
69}
70
71static void
72CG14WaitMarker(ScreenPtr pScreen, int Marker)
73{
74	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
76
77	CG14Wait(p);
78}
79
80static Bool
81CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
82		int xdir, int ydir, int alu, Pixel planemask)
83{
84	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
85	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
86
87	ENTER;
88	DPRINTF(X_ERROR, "bits per pixel: %d\n",
89	    pSrcPixmap->drawable.bitsPerPixel);
90
91	if (planemask != p->last_mask) {
92		CG14Wait(p);
93		write_sx_reg(p, SX_PLANEMASK, planemask);
94		p->last_mask = planemask;
95	}
96	alu = sx_rop[alu];
97	if (alu != p->last_rop) {
98		CG14Wait(p);
99		write_sx_reg(p, SX_ROP_CONTROL, alu);
100		p->last_rop = alu;
101	}
102	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
103	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
104	p->xdir = xdir;
105	p->ydir = ydir;
106	return TRUE;
107}
108
109static void
110CG14Copy(PixmapPtr pDstPixmap,
111         int srcX, int srcY, int dstX, int dstY, int w, int h)
112{
113	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
114	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
115	int dstpitch, dstoff, srcpitch, srcoff;
116	int srcstart, dststart, xinc, srcinc, dstinc;
117	int line, count, s, d, num;
118
119	ENTER;
120	dstpitch = exaGetPixmapPitch(pDstPixmap);
121	dstoff = exaGetPixmapOffset(pDstPixmap);
122	srcpitch = p->srcpitch;
123	srcoff = p->srcoff;
124	/*
125	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
126	 * actually wrote anything and only sync if it did
127	 */
128	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
129	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
130
131	/*
132	 * we always copy up to 32 pixels at a time so direction doesn't
133	 * matter if w<=32
134	 */
135	if (w > 32) {
136		if (p->xdir < 0) {
137			srcstart += (w - 32) << 2;
138			dststart += (w - 32) << 2;
139			xinc = -128;
140		} else
141			xinc = 128;
142	} else
143		xinc = 128;
144	if (p->ydir < 0) {
145		srcstart += (h - 1) * srcpitch;
146		dststart += (h - 1) * dstpitch;
147		srcinc = -srcpitch;
148		dstinc = -dstpitch;
149	} else {
150		srcinc = srcpitch;
151		dstinc = dstpitch;
152	}
153	if (p->last_rop == 0xcc) {
154		/* plain old copy */
155		if ( xinc > 0) {
156			/* going left to right */
157			for (line = 0; line < h; line++) {
158				count = 0;
159				s = srcstart;
160				d = dststart;
161				while ( count < w) {
162					num = min(32, w - count);
163					write_sx_io(p, s,
164					    SX_LD(10, num - 1, s & 7));
165					write_sx_io(p, d,
166					    SX_STM(10, num - 1, d & 7));
167					s += xinc;
168					d += xinc;
169					count += 32;
170				}
171				srcstart += srcinc;
172				dststart += dstinc;
173			}
174		} else {
175			/* going right to left */
176			int i, chunks = (w >> 5);
177			for (line = 0; line < h; line++) {
178				s = srcstart;
179				d = dststart;
180				count = w;
181				for (i = 0; i < chunks; i++) {
182					write_sx_io(p, s,
183					    SX_LD(10, 31, s & 7));
184					write_sx_io(p, d,
185					    SX_STM(10, 31, d & 7));
186					s -= 128;
187					d -= 128;
188					count -= 32;
189				}
190				/* leftovers, if any */
191				if (count > 0) {
192					s += (32 - count) << 2;
193					d += (32 - count) << 2;
194					write_sx_io(p, s,
195					    SX_LD(10, count - 1, s & 7));
196					write_sx_io(p, d,
197					    SX_STM(10, count - 1, d & 7));
198				}
199				srcstart += srcinc;
200				dststart += dstinc;
201			}
202		}
203	} else {
204		/* ROPs needed */
205		if ( xinc > 0) {
206			/* going left to right */
207			for (line = 0; line < h; line++) {
208				count = 0;
209				s = srcstart;
210				d = dststart;
211				while ( count < w) {
212					num = min(32, w - count);
213					write_sx_io(p, s,
214					    SX_LD(10, num - 1, s & 7));
215					write_sx_io(p, d,
216					    SX_LD(42, num - 1, d & 7));
217					if (num > 16) {
218						write_sx_reg(p, SX_INSTRUCTIONS,
219					    	 SX_ROP(10, 42, 74, 15));
220						write_sx_reg(p, SX_INSTRUCTIONS,
221					    	 SX_ROP(26, 58, 90, num - 17));
222					} else {
223						write_sx_reg(p, SX_INSTRUCTIONS,
224					    	 SX_ROP(10, 42, 74, num - 1));
225					}
226					write_sx_io(p, d,
227					    SX_STM(74, num - 1, d & 7));
228					s += xinc;
229					d += xinc;
230					count += 32;
231				}
232				srcstart += srcinc;
233				dststart += dstinc;
234			}
235		} else {
236			/* going right to left */
237			int i, chunks = (w >> 5);
238			for (line = 0; line < h; line++) {
239				s = srcstart;
240				d = dststart;
241				count = w;
242				for (i = 0; i < chunks; i++) {
243					write_sx_io(p, s, SX_LD(10, 31, s & 7));
244					write_sx_io(p, d, SX_LD(42, 31, d & 7));
245					write_sx_reg(p, SX_INSTRUCTIONS,
246				    	    SX_ROP(10, 42, 74, 15));
247					write_sx_reg(p, SX_INSTRUCTIONS,
248				    	    SX_ROP(26, 58, 90, 15));
249					write_sx_io(p, d,
250					    SX_STM(74, 31, d & 7));
251					s -= 128;
252					d -= 128;
253					count -= 32;
254				}
255				/* leftovers, if any */
256				if (count > 0) {
257					s += (32 - count) << 2;
258					d += (32 - count) << 2;
259					write_sx_io(p, s,
260					    SX_LD(10, count - 1, s & 7));
261					write_sx_io(p, d,
262					    SX_LD(42, count - 1, d & 7));
263					if (count > 16) {
264						write_sx_reg(p, SX_INSTRUCTIONS,
265					    	    SX_ROP(10, 42, 74, 15));
266						write_sx_reg(p, SX_INSTRUCTIONS,
267					    	 SX_ROP(26, 58, 90, count - 17));
268					} else {
269						write_sx_reg(p, SX_INSTRUCTIONS,
270					    	 SX_ROP(10, 42, 74, count - 1));
271					}
272
273					write_sx_io(p, d,
274					    SX_STM(74, count - 1, d & 7));
275				}
276				srcstart += srcinc;
277				dststart += dstinc;
278			}
279		}
280	}
281	exaMarkSync(pDstPixmap->drawable.pScreen);
282}
283
284static void
285CG14DoneCopy(PixmapPtr pDstPixmap)
286{
287}
288
289static Bool
290CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
291{
292	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
293	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
294
295	ENTER;
296	DPRINTF(X_ERROR, "bits per pixel: %d\n",
297	    pPixmap->drawable.bitsPerPixel);
298	write_sx_reg(p, SX_QUEUED(8), fg);
299	write_sx_reg(p, SX_QUEUED(9), fg);
300	if (planemask != p->last_mask) {
301		CG14Wait(p);
302		write_sx_reg(p, SX_PLANEMASK, planemask);
303		p->last_mask = planemask;
304	}
305	alu = sx_rop[alu];
306	if (alu != p->last_rop) {
307		CG14Wait(p);
308		write_sx_reg(p, SX_ROP_CONTROL, alu);
309		p->last_rop = alu;
310	}
311	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
312	return TRUE;
313}
314
315static void
316CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
317{
318	int line, x, num;
319	uint32_t ptr;
320
321	ENTER;
322	if (p->last_rop == 0xcc) {
323		/* simple fill */
324		for (line = 0; line < h; line++) {
325			x = 0;
326			while (x < w) {
327				ptr = start + (x << 2);
328				num = min(32, w - x);
329				write_sx_io(p, ptr,
330				    SX_STS(8, num - 1, ptr & 7));
331				x += 32;
332			}
333			start += pitch;
334		}
335	} else if (p->last_rop == 0xaa) {
336		/* nothing to do here */
337		return;
338	} else {
339		/* alright, let's do actual ROP stuff */
340
341		/* first repeat the fill colour into 16 registers */
342		write_sx_reg(p, SX_INSTRUCTIONS,
343		    SX_SELECT_S(8, 8, 10, 15));
344
345		for (line = 0; line < h; line++) {
346			x = 0;
347			while (x < w) {
348				ptr = start + (x << 2);
349				num = min(32, w - x);
350				/* now suck fb data into registers */
351				write_sx_io(p, ptr,
352				    SX_LD(42, num - 1, ptr & 7));
353				/*
354				 * ROP them with the fill data we left in 10
355				 * non-memory ops can only have counts up to 16
356				 */
357				if (num <= 16) {
358					write_sx_reg(p, SX_INSTRUCTIONS,
359					    SX_ROP(10, 42, 74, num - 1));
360				} else {
361					write_sx_reg(p, SX_INSTRUCTIONS,
362					    SX_ROP(10, 42, 74, 15));
363					write_sx_reg(p, SX_INSTRUCTIONS,
364					    SX_ROP(10, 58, 90, num - 17));
365				}
366				/* and write the result back into memory */
367				write_sx_io(p, ptr,
368				    SX_ST(74, num - 1, ptr & 7));
369				x += 32;
370			}
371			start += pitch;
372		}
373	}
374}
375
376static void
377CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
378{
379	int line, x, num, off;
380	uint32_t ptr;
381
382	ENTER;
383	off = start & 7;
384	start &= ~7;
385
386	if (p->last_rop == 0xcc) {
387		/* simple fill */
388		for (line = 0; line < h; line++) {
389			x = 0;
390			while (x < w) {
391				ptr = start + x;
392				num = min(32, w - x);
393				write_sx_io(p, ptr,
394				    SX_STBS(8, num - 1, off));
395				x += 32;
396			}
397			start += pitch;
398		}
399	} else if (p->last_rop == 0xaa) {
400		/* nothing to do here */
401		return;
402	} else {
403		/* alright, let's do actual ROP stuff */
404
405		/* first repeat the fill colour into 16 registers */
406		write_sx_reg(p, SX_INSTRUCTIONS,
407		    SX_SELECT_S(8, 8, 10, 15));
408
409		for (line = 0; line < h; line++) {
410			x = 0;
411			while (x < w) {
412				ptr = start + x;
413				num = min(32, w - x);
414				/* now suck fb data into registers */
415				write_sx_io(p, ptr,
416				    SX_LDB(42, num - 1, off));
417				/*
418				 * ROP them with the fill data we left in 10
419				 * non-memory ops can only have counts up to 16
420				 */
421				if (num <= 16) {
422					write_sx_reg(p, SX_INSTRUCTIONS,
423					    SX_ROP(10, 42, 74, num - 1));
424				} else {
425					write_sx_reg(p, SX_INSTRUCTIONS,
426					    SX_ROP(10, 42, 74, 15));
427					write_sx_reg(p, SX_INSTRUCTIONS,
428					    SX_ROP(10, 58, 90, num - 17));
429				}
430				/* and write the result back into memory */
431				write_sx_io(p, ptr,
432				    SX_STB(74, num - 1, off));
433				x += 32;
434			}
435			start += pitch;
436		}
437	}
438}
439
440static void
441CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
442{
443	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
444	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
445	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
446	int start, depth;
447
448	ENTER;
449	dstpitch = exaGetPixmapPitch(pPixmap);
450	dstoff = exaGetPixmapOffset(pPixmap);
451
452	depth = pPixmap->drawable.bitsPerPixel;
453	switch (depth) {
454		case 32:
455			start = dstoff + (y1 * dstpitch) + (x1 << 2);
456			CG14Solid32(p, start, dstpitch, w, h);
457			break;
458		case 8:
459			start = dstoff + (y1 * dstpitch) + x1;
460			CG14Solid8(p, start, dstpitch, w, h);
461			break;
462	}
463
464	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
465	    dstpitch, dstoff, start);
466	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
467	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
468	exaMarkSync(pPixmap->drawable.pScreen);
469}
470
471/*
472 * Memcpy-based UTS.
473 */
474static Bool
475CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
476    char *src, int src_pitch)
477{
478	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
479	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
480	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
481	int    dst_pitch  = exaGetPixmapPitch(pDst);
482
483	int bpp    = pDst->drawable.bitsPerPixel;
484	int cpp    = (bpp + 7) >> 3;
485	int wBytes = w * cpp;
486
487	ENTER;
488	dst += (x * cpp) + (y * dst_pitch);
489
490	CG14Wait(p);
491
492	while (h--) {
493		memcpy(dst, src, wBytes);
494		src += src_pitch;
495		dst += dst_pitch;
496	}
497	__asm("stbar;");
498	return TRUE;
499}
500
501/*
502 * Memcpy-based DFS.
503 */
504static Bool
505CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
506    char *dst, int dst_pitch)
507{
508	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
509	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
510	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
511	int    src_pitch  = exaGetPixmapPitch(pSrc);
512
513	ENTER;
514	int bpp    = pSrc->drawable.bitsPerPixel;
515	int cpp    = (bpp + 7) >> 3;
516	int wBytes = w * cpp;
517
518	src += (x * cpp) + (y * src_pitch);
519
520	CG14Wait(p);
521
522	while (h--) {
523		memcpy(dst, src, wBytes);
524		src += src_pitch;
525		dst += dst_pitch;
526	}
527
528	return TRUE;
529}
530
531Bool
532CG14CheckComposite(int op, PicturePtr pSrcPicture,
533                           PicturePtr pMaskPicture,
534                           PicturePtr pDstPicture)
535{
536	int i, ok = FALSE;
537
538	ENTER;
539
540	/*
541	 * SX is in theory capable of accelerating pretty much all Xrender ops,
542	 * even coordinate transformation and gradients. Support will be added
543	 * over time and likely have to spill over into its own source file.
544	 */
545
546	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
547		xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op);
548		return FALSE;
549	}
550	i = 0;
551	while ((i < arraysize(src_formats)) && (!ok)) {
552		ok =  (pSrcPicture->format == src_formats[i]);
553		i++;
554	}
555
556	if (!ok) {
557		xf86Msg(X_ERROR, "%s: unsupported src format %x\n",
558		    __func__, pSrcPicture->format);
559		return FALSE;
560	}
561
562	DPRINTF(X_ERROR, "src is %x, %d: %d %d\n", pSrcPicture->format, op,
563	    pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
564
565	if (pMaskPicture != NULL) {
566		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
567		    pMaskPicture->pDrawable->width,
568		    pMaskPicture->pDrawable->height);
569	}
570	return TRUE;
571}
572
573Bool
574CG14PrepareComposite(int op, PicturePtr pSrcPicture,
575                             PicturePtr pMaskPicture,
576                             PicturePtr pDstPicture,
577                             PixmapPtr  pSrc,
578                             PixmapPtr  pMask,
579                             PixmapPtr  pDst)
580{
581	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
582	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
583
584	ENTER;
585
586	p->no_source_pixmap = FALSE;
587	p->source_is_solid = FALSE;
588
589	if (pSrcPicture->format == PICT_a1) {
590		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
591		    pDstPicture->format, op);
592		if (pMaskPicture != NULL) {
593			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
594		}
595	}
596	if (pSrcPicture->pSourcePict != NULL) {
597		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
598			p->fillcolour =
599			    pSrcPicture->pSourcePict->solidFill.color;
600			DPRINTF(X_ERROR, "%s: solid src %08x\n",
601			    __func__, p->fillcolour);
602			p->no_source_pixmap = TRUE;
603			p->source_is_solid = TRUE;
604		}
605	}
606	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
607		if (pMaskPicture->pSourcePict->type ==
608		    SourcePictTypeSolidFill) {
609			p->fillcolour =
610			   pMaskPicture->pSourcePict->solidFill.color;
611			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
612			    __func__, p->fillcolour);
613		}
614	}
615	if (pMaskPicture != NULL) {
616		p->mskoff = exaGetPixmapOffset(pMask);
617		p->mskpitch = exaGetPixmapPitch(pMask);
618		p->mskformat = pMaskPicture->format;
619	} else {
620		p->mskoff = 0;
621		p->mskpitch = 0;
622		p->mskformat = 0;
623	}
624	if (pSrc != NULL) {
625		p->source_is_solid =
626		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
627		p->srcoff = exaGetPixmapOffset(pSrc);
628		p->srcpitch = exaGetPixmapPitch(pSrc);
629		if (p->source_is_solid) {
630			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
631		}
632	}
633	p->srcformat = pSrcPicture->format;
634	p->dstformat = pDstPicture->format;
635
636	if (p->source_is_solid) {
637		uint32_t temp;
638
639		/* stuff source colour into SX registers, swap as needed */
640		temp = p->fillcolour;
641		switch (p->srcformat) {
642			case PICT_a8r8g8b8:
643			case PICT_x8r8g8b8:
644				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
645				temp = temp >> 8;
646				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
647				temp = temp >> 8;
648				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
649				break;
650			case PICT_a8b8g8r8:
651			case PICT_x8b8g8r8:
652				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
653				temp = temp >> 8;
654				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
655				temp = temp >> 8;
656				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
657				break;
658		}
659		write_sx_reg(p, SX_QUEUED(8), 0xff);
660	}
661	p->op = op;
662	if (op == PictOpSrc) {
663		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
664	}
665#ifdef SX_DEBUG
666	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
667	    *(uint32_t *)(p->fb + p->srcoff));
668#endif
669	return TRUE;
670}
671
672void
673CG14Composite(PixmapPtr pDst, int srcX, int srcY,
674                              int maskX, int maskY,
675                              int dstX, int dstY,
676                              int width, int height)
677{
678	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
679	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
680	uint32_t dstoff, dstpitch;
681	uint32_t dst, msk, src;
682
683	ENTER;
684	dstoff = exaGetPixmapOffset(pDst);
685	dstpitch = exaGetPixmapPitch(pDst);
686
687	switch (p->op) {
688		case PictOpOver:
689			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
690			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
691			    p->mskformat, p->dstformat, srcX, srcY);
692			if (p->source_is_solid) {
693				switch (p->mskformat) {
694					case PICT_a8:
695						msk = p->mskoff +
696						    (maskY * p->mskpitch) +
697						    maskX;
698						CG14Comp_Over8Solid(p,
699						    msk, p->mskpitch,
700						    dst, dstpitch,
701						    width, height);
702						break;
703					case PICT_a8r8g8b8:
704					case PICT_a8b8g8r8:
705						msk = p->mskoff +
706						    (maskY * p->mskpitch) +
707						    (maskX << 2);
708						CG14Comp_Over32Solid(p,
709						    msk, p->mskpitch,
710						    dst, dstpitch,
711						    width, height);
712						break;
713					default:
714						xf86Msg(X_ERROR,
715						  "unsupported mask format\n");
716				}
717			} else {
718				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
719				    p->mskformat);
720				switch (p->srcformat) {
721					case PICT_a8r8g8b8:
722					case PICT_a8b8g8r8:
723						src = p->srcoff +
724						    (srcY * p->srcpitch) +
725						    (srcX << 2);
726						dst = dstoff +
727						    (dstY * dstpitch) +
728						    (dstX << 2);
729						if (p->mskformat == PICT_a8) {
730							msk = p->mskoff +
731							    (maskY * p->mskpitch) +
732							    maskX;
733							CG14Comp_Over32Mask(p,
734							    src, p->srcpitch,
735							    msk, p->mskpitch,
736							    dst, dstpitch,
737							    width, height);
738						} else {
739							CG14Comp_Over32(p,
740							    src, p->srcpitch,
741							    dst, dstpitch,
742							    width, height);
743						}
744						break;
745					case PICT_x8r8g8b8:
746					case PICT_x8b8g8r8:
747						src = p->srcoff +
748						    (srcY * p->srcpitch) +
749						    (srcX << 2);
750						dst = dstoff +
751						    (dstY * dstpitch) +
752						    (dstX << 2);
753						if (p->mskformat == PICT_a8) {
754							msk = p->mskoff +
755							    (maskY * p->mskpitch) +
756							    maskX;
757							CG14Comp_Over32Mask_noalpha(p,
758							    src, p->srcpitch,
759							    msk, p->mskpitch,
760							    dst, dstpitch,
761							    width, height);
762						} else {
763							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
764						}
765						break;
766					default:
767						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
768						    __func__, p->srcformat);
769				}
770			}
771			break;
772		case PictOpAdd:
773			DPRINTF(X_ERROR, "Add %08x %08x\n",
774			    p->srcformat, p->dstformat);
775			switch (p->srcformat) {
776				case PICT_a8:
777					src = p->srcoff +
778					    (srcY * p->srcpitch) + srcX;
779					dst = dstoff + (dstY * dstpitch) + dstX;
780					CG14Comp_Add8(p, src, p->srcpitch,
781					    dst, dstpitch, width, height);
782					break;
783				case PICT_a8r8g8b8:
784				case PICT_x8r8g8b8:
785					src = p->srcoff +
786					    (srcY * p->srcpitch) + (srcX << 2);
787					dst = dstoff + (dstY * dstpitch) +
788					    (dstX << 2);
789					CG14Comp_Add32(p, src, p->srcpitch,
790					    dst, dstpitch, width, height);
791					break;
792				default:
793					xf86Msg(X_ERROR,
794					    "unsupported src format\n");
795			}
796			break;
797		case PictOpSrc:
798			DPRINTF(X_ERROR, "Src %08x %08x\n",
799			    p->srcformat, p->dstformat);
800			CG14Copy(pDst, srcX, srcY, dstX, dstY, width, height);
801			break;
802		default:
803			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
804	}
805	exaMarkSync(pDst->drawable.pScreen);
806}
807
808
809
810Bool
811CG14InitAccel(ScreenPtr pScreen)
812{
813	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
814	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
815	ExaDriverPtr pExa;
816
817	pExa = exaDriverAlloc();
818	if (!pExa)
819		return FALSE;
820
821	p->pExa = pExa;
822
823	pExa->exa_major = EXA_VERSION_MAJOR;
824	pExa->exa_minor = EXA_VERSION_MINOR;
825
826	pExa->memoryBase = p->fb;
827	pExa->memorySize = p->memsize;
828	pExa->offScreenBase = p->width * p->height * 4;
829
830	/*
831	 * SX memory instructions are written to 64bit aligned addresses with
832	 * a 3 bit displacement. Make sure the displacement remains constant
833	 * within one column
834	 */
835
836	pExa->pixmapOffsetAlign = 8;
837	pExa->pixmapPitchAlign = 8;
838
839	pExa->flags = EXA_OFFSCREEN_PIXMAPS |
840		      /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/
841		      EXA_MIXED_PIXMAPS;
842
843	/*
844	 * these limits are bogus
845	 * SX doesn't deal with coordinates at all, so there is no limit but
846	 * we have to put something here
847	 */
848	pExa->maxX = 4096;
849	pExa->maxY = 4096;
850
851	pExa->WaitMarker = CG14WaitMarker;
852
853	pExa->PrepareSolid = CG14PrepareSolid;
854	pExa->Solid = CG14Solid;
855	pExa->DoneSolid = CG14DoneCopy;
856	pExa->PrepareCopy = CG14PrepareCopy;
857	pExa->Copy = CG14Copy;
858	pExa->DoneCopy = CG14DoneCopy;
859	if (p->use_xrender) {
860		pExa->CheckComposite = CG14CheckComposite;
861		pExa->PrepareComposite = CG14PrepareComposite;
862		pExa->Composite = CG14Composite;
863		pExa->DoneComposite = CG14DoneCopy;
864	}
865
866	/* EXA hits more optimized paths when it does not have to fallback
867	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
868	 */
869	pExa->UploadToScreen = CG14UploadToScreen;
870	pExa->DownloadFromScreen = CG14DownloadFromScreen;
871
872	/* do some hardware init */
873	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
874	p->last_mask = 0xffffffff;
875	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
876	p->last_rop = 0xcc;
877	return exaDriverInit(pScreen, pExa);
878}
879