cg14_accel.c revision fc473876
1fc473876Smacallan/* $NetBSD: cg14_accel.c,v 1.14 2019/03/01 02:22:27 macallan Exp $ */
24261fa58Smacallan/*
34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz
44261fa58Smacallan * All rights reserved.
54261fa58Smacallan *
64261fa58Smacallan * Redistribution and use in source and binary forms, with or without
74261fa58Smacallan * modification, are permitted provided that the following conditions
84261fa58Smacallan * are met:
94261fa58Smacallan *
104261fa58Smacallan *    - Redistributions of source code must retain the above copyright
114261fa58Smacallan *      notice, this list of conditions and the following disclaimer.
124261fa58Smacallan *    - Redistributions in binary form must reproduce the above
134261fa58Smacallan *      copyright notice, this list of conditions and the following
144261fa58Smacallan *      disclaimer in the documentation and/or other materials provided
154261fa58Smacallan *      with the distribution.
164261fa58Smacallan *
174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE.
294261fa58Smacallan *
304261fa58Smacallan */
31c88c16f8Smacallan
32c88c16f8Smacallan#ifdef HAVE_CONFIG_H
33c88c16f8Smacallan#include "config.h"
34c88c16f8Smacallan#endif
35c88c16f8Smacallan
364261fa58Smacallan#include <sys/types.h>
374261fa58Smacallan
384261fa58Smacallan/* all driver need this */
394261fa58Smacallan#include "xf86.h"
404261fa58Smacallan#include "xf86_OSproc.h"
414261fa58Smacallan#include "compiler.h"
424261fa58Smacallan
434261fa58Smacallan#include "cg14.h"
444261fa58Smacallan#include <sparc/sxreg.h>
454261fa58Smacallan
464261fa58Smacallan/*#define SX_DEBUG*/
474261fa58Smacallan
484261fa58Smacallan#ifdef SX_DEBUG
494261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
504261fa58Smacallan#define DPRINTF xf86Msg
514261fa58Smacallan#else
524261fa58Smacallan#define ENTER
534261fa58Smacallan#define DPRINTF while (0) xf86Msg
544261fa58Smacallan#endif
554261fa58Smacallan
564261fa58Smacallan#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
574261fa58Smacallan
584261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */
594261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
604261fa58Smacallan		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
614261fa58Smacallan
624261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
634261fa58Smacallan		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
644261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
654261fa58Smacallan
66f71acd79Smacallanstatic void CG14Copy32(PixmapPtr, int, int, int, int, int, int);
67f71acd79Smacallanstatic void CG14Copy8(PixmapPtr, int, int, int, int, int, int);
68f71acd79Smacallan
694261fa58Smacallanstatic inline void
704261fa58SmacallanCG14Wait(Cg14Ptr p)
714261fa58Smacallan{
72fc473876Smacallan	int bail = 10000000;
73fc473876Smacallan	/* we wait for the busy bit to clear */
74fc473876Smacallan	while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) &&
75fc473876Smacallan	       (bail > 0)) {
76fc473876Smacallan		bail--;
77fc473876Smacallan	};
78fc473876Smacallan	if (bail == 0) {
79fc473876Smacallan		xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n",
80fc473876Smacallan		    read_sx_reg(p, SX_CONTROL_STATUS),
81fc473876Smacallan		    read_sx_reg(p, SX_ERROR));
82fc473876Smacallan	}
834261fa58Smacallan}
844261fa58Smacallan
854261fa58Smacallanstatic void
864261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker)
874261fa58Smacallan{
884261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
894261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
904261fa58Smacallan
914261fa58Smacallan	CG14Wait(p);
924261fa58Smacallan}
934261fa58Smacallan
944261fa58Smacallanstatic Bool
954261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
964261fa58Smacallan		int xdir, int ydir, int alu, Pixel planemask)
974261fa58Smacallan{
984261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
994261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1004261fa58Smacallan
1014261fa58Smacallan	ENTER;
1024261fa58Smacallan	DPRINTF(X_ERROR, "bits per pixel: %d\n",
1034261fa58Smacallan	    pSrcPixmap->drawable.bitsPerPixel);
1044261fa58Smacallan
1054261fa58Smacallan	if (planemask != p->last_mask) {
1064261fa58Smacallan		CG14Wait(p);
1074261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
1084261fa58Smacallan		p->last_mask = planemask;
1094261fa58Smacallan	}
1104261fa58Smacallan	alu = sx_rop[alu];
1114261fa58Smacallan	if (alu != p->last_rop) {
1124261fa58Smacallan		CG14Wait(p);
1134261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
1144261fa58Smacallan		p->last_rop = alu;
1154261fa58Smacallan	}
116f71acd79Smacallan	switch (pSrcPixmap->drawable.bitsPerPixel)  {
117f71acd79Smacallan		case 8:
118f71acd79Smacallan			p->pExa->Copy = CG14Copy8;
119f71acd79Smacallan			break;
120f71acd79Smacallan		case 32:
121f71acd79Smacallan			p->pExa->Copy = CG14Copy32;
122f71acd79Smacallan			break;
123f71acd79Smacallan		default:
124f71acd79Smacallan			xf86Msg(X_ERROR, "%s depth %d\n", __func__,
125f71acd79Smacallan			    pSrcPixmap->drawable.bitsPerPixel);
126f71acd79Smacallan	}
1274261fa58Smacallan	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
1284261fa58Smacallan	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
1294261fa58Smacallan	p->xdir = xdir;
1304261fa58Smacallan	p->ydir = ydir;
1314261fa58Smacallan	return TRUE;
1324261fa58Smacallan}
1334261fa58Smacallan
1344261fa58Smacallanstatic void
135f71acd79SmacallanCG14Copy32(PixmapPtr pDstPixmap,
1364261fa58Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
1374261fa58Smacallan{
1384261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1394261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1404261fa58Smacallan	int dstpitch, dstoff, srcpitch, srcoff;
1414261fa58Smacallan	int srcstart, dststart, xinc, srcinc, dstinc;
1424261fa58Smacallan	int line, count, s, d, num;
1434261fa58Smacallan
1444261fa58Smacallan	ENTER;
1454261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
1464261fa58Smacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
1474261fa58Smacallan	srcpitch = p->srcpitch;
1484261fa58Smacallan	srcoff = p->srcoff;
1494261fa58Smacallan	/*
1504261fa58Smacallan	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
1514261fa58Smacallan	 * actually wrote anything and only sync if it did
1524261fa58Smacallan	 */
1534261fa58Smacallan	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
1544261fa58Smacallan	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
1554261fa58Smacallan
1564261fa58Smacallan	/*
1574261fa58Smacallan	 * we always copy up to 32 pixels at a time so direction doesn't
1584261fa58Smacallan	 * matter if w<=32
1594261fa58Smacallan	 */
1604261fa58Smacallan	if (w > 32) {
1614261fa58Smacallan		if (p->xdir < 0) {
1624261fa58Smacallan			srcstart += (w - 32) << 2;
1634261fa58Smacallan			dststart += (w - 32) << 2;
1644261fa58Smacallan			xinc = -128;
1654261fa58Smacallan		} else
1664261fa58Smacallan			xinc = 128;
1674261fa58Smacallan	} else
1684261fa58Smacallan		xinc = 128;
1694261fa58Smacallan	if (p->ydir < 0) {
1704261fa58Smacallan		srcstart += (h - 1) * srcpitch;
1714261fa58Smacallan		dststart += (h - 1) * dstpitch;
1724261fa58Smacallan		srcinc = -srcpitch;
1734261fa58Smacallan		dstinc = -dstpitch;
1744261fa58Smacallan	} else {
1754261fa58Smacallan		srcinc = srcpitch;
1764261fa58Smacallan		dstinc = dstpitch;
1774261fa58Smacallan	}
1784261fa58Smacallan	if (p->last_rop == 0xcc) {
1794261fa58Smacallan		/* plain old copy */
1804261fa58Smacallan		if ( xinc > 0) {
1814261fa58Smacallan			/* going left to right */
1824261fa58Smacallan			for (line = 0; line < h; line++) {
1834261fa58Smacallan				count = 0;
1844261fa58Smacallan				s = srcstart;
1854261fa58Smacallan				d = dststart;
1864261fa58Smacallan				while ( count < w) {
1874261fa58Smacallan					num = min(32, w - count);
1884261fa58Smacallan					write_sx_io(p, s,
1894261fa58Smacallan					    SX_LD(10, num - 1, s & 7));
1904261fa58Smacallan					write_sx_io(p, d,
1914261fa58Smacallan					    SX_STM(10, num - 1, d & 7));
1924261fa58Smacallan					s += xinc;
1934261fa58Smacallan					d += xinc;
1944261fa58Smacallan					count += 32;
1954261fa58Smacallan				}
1964261fa58Smacallan				srcstart += srcinc;
1974261fa58Smacallan				dststart += dstinc;
1984261fa58Smacallan			}
1994261fa58Smacallan		} else {
2004261fa58Smacallan			/* going right to left */
2014261fa58Smacallan			int i, chunks = (w >> 5);
2024261fa58Smacallan			for (line = 0; line < h; line++) {
2034261fa58Smacallan				s = srcstart;
2044261fa58Smacallan				d = dststart;
2054261fa58Smacallan				count = w;
2064261fa58Smacallan				for (i = 0; i < chunks; i++) {
2074261fa58Smacallan					write_sx_io(p, s,
2084261fa58Smacallan					    SX_LD(10, 31, s & 7));
2094261fa58Smacallan					write_sx_io(p, d,
2104261fa58Smacallan					    SX_STM(10, 31, d & 7));
2114261fa58Smacallan					s -= 128;
2124261fa58Smacallan					d -= 128;
2134261fa58Smacallan					count -= 32;
2144261fa58Smacallan				}
2154261fa58Smacallan				/* leftovers, if any */
2164261fa58Smacallan				if (count > 0) {
2174261fa58Smacallan					s += (32 - count) << 2;
2184261fa58Smacallan					d += (32 - count) << 2;
2194261fa58Smacallan					write_sx_io(p, s,
2204261fa58Smacallan					    SX_LD(10, count - 1, s & 7));
2214261fa58Smacallan					write_sx_io(p, d,
2224261fa58Smacallan					    SX_STM(10, count - 1, d & 7));
2234261fa58Smacallan				}
2244261fa58Smacallan				srcstart += srcinc;
2254261fa58Smacallan				dststart += dstinc;
2264261fa58Smacallan			}
2274261fa58Smacallan		}
2284261fa58Smacallan	} else {
2294261fa58Smacallan		/* ROPs needed */
2304261fa58Smacallan		if ( xinc > 0) {
2314261fa58Smacallan			/* going left to right */
2324261fa58Smacallan			for (line = 0; line < h; line++) {
2334261fa58Smacallan				count = 0;
2344261fa58Smacallan				s = srcstart;
2354261fa58Smacallan				d = dststart;
2364261fa58Smacallan				while ( count < w) {
2374261fa58Smacallan					num = min(32, w - count);
2384261fa58Smacallan					write_sx_io(p, s,
2394261fa58Smacallan					    SX_LD(10, num - 1, s & 7));
2404261fa58Smacallan					write_sx_io(p, d,
2414261fa58Smacallan					    SX_LD(42, num - 1, d & 7));
2424261fa58Smacallan					if (num > 16) {
2434261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2444261fa58Smacallan					    	 SX_ROP(10, 42, 74, 15));
2454261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2464261fa58Smacallan					    	 SX_ROP(26, 58, 90, num - 17));
2474261fa58Smacallan					} else {
2484261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2494261fa58Smacallan					    	 SX_ROP(10, 42, 74, num - 1));
2504261fa58Smacallan					}
2514261fa58Smacallan					write_sx_io(p, d,
2524261fa58Smacallan					    SX_STM(74, num - 1, d & 7));
2534261fa58Smacallan					s += xinc;
2544261fa58Smacallan					d += xinc;
2554261fa58Smacallan					count += 32;
2564261fa58Smacallan				}
2574261fa58Smacallan				srcstart += srcinc;
2584261fa58Smacallan				dststart += dstinc;
2594261fa58Smacallan			}
2604261fa58Smacallan		} else {
2614261fa58Smacallan			/* going right to left */
2624261fa58Smacallan			int i, chunks = (w >> 5);
2634261fa58Smacallan			for (line = 0; line < h; line++) {
2644261fa58Smacallan				s = srcstart;
2654261fa58Smacallan				d = dststart;
2664261fa58Smacallan				count = w;
2674261fa58Smacallan				for (i = 0; i < chunks; i++) {
2684261fa58Smacallan					write_sx_io(p, s, SX_LD(10, 31, s & 7));
2694261fa58Smacallan					write_sx_io(p, d, SX_LD(42, 31, d & 7));
2704261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
2714261fa58Smacallan				    	    SX_ROP(10, 42, 74, 15));
2724261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
2734261fa58Smacallan				    	    SX_ROP(26, 58, 90, 15));
2744261fa58Smacallan					write_sx_io(p, d,
2754261fa58Smacallan					    SX_STM(74, 31, d & 7));
2764261fa58Smacallan					s -= 128;
2774261fa58Smacallan					d -= 128;
2784261fa58Smacallan					count -= 32;
2794261fa58Smacallan				}
2804261fa58Smacallan				/* leftovers, if any */
2814261fa58Smacallan				if (count > 0) {
2824261fa58Smacallan					s += (32 - count) << 2;
2834261fa58Smacallan					d += (32 - count) << 2;
2844261fa58Smacallan					write_sx_io(p, s,
2854261fa58Smacallan					    SX_LD(10, count - 1, s & 7));
2864261fa58Smacallan					write_sx_io(p, d,
2874261fa58Smacallan					    SX_LD(42, count - 1, d & 7));
2884261fa58Smacallan					if (count > 16) {
2894261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2904261fa58Smacallan					    	    SX_ROP(10, 42, 74, 15));
2914261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2924261fa58Smacallan					    	 SX_ROP(26, 58, 90, count - 17));
2934261fa58Smacallan					} else {
2944261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2954261fa58Smacallan					    	 SX_ROP(10, 42, 74, count - 1));
2964261fa58Smacallan					}
2974261fa58Smacallan
2984261fa58Smacallan					write_sx_io(p, d,
2994261fa58Smacallan					    SX_STM(74, count - 1, d & 7));
3004261fa58Smacallan				}
3014261fa58Smacallan				srcstart += srcinc;
3024261fa58Smacallan				dststart += dstinc;
3034261fa58Smacallan			}
3044261fa58Smacallan		}
3054261fa58Smacallan	}
3064261fa58Smacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
3074261fa58Smacallan}
3084261fa58Smacallan
309f71acd79Smacallanstatic void
310f71acd79SmacallanCG14Copy8(PixmapPtr pDstPixmap,
311f71acd79Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
312f71acd79Smacallan{
313f71acd79Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
314f71acd79Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
315f71acd79Smacallan	int dstpitch, dstoff, srcpitch, srcoff;
316f71acd79Smacallan	int srcstart, dststart, xinc, srcinc, dstinc;
317f71acd79Smacallan	int line, count, s, d, num;
318f71acd79Smacallan
319f71acd79Smacallan	ENTER;
320f71acd79Smacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
321f71acd79Smacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
322f71acd79Smacallan	srcpitch = p->srcpitch;
323f71acd79Smacallan	srcoff = p->srcoff;
324f71acd79Smacallan	/*
325f71acd79Smacallan	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
326f71acd79Smacallan	 * actually wrote anything and only sync if it did
327f71acd79Smacallan	 */
328f71acd79Smacallan	srcstart = srcX + (srcpitch * srcY) + srcoff;
329f71acd79Smacallan	dststart = dstX + (dstpitch * dstY) + dstoff;
330f71acd79Smacallan
331f71acd79Smacallan	/*
332f71acd79Smacallan	 * we always copy up to 32 pixels at a time so direction doesn't
333f71acd79Smacallan	 * matter if w<=32
334f71acd79Smacallan	 */
335f71acd79Smacallan	if (w > 32) {
336f71acd79Smacallan		if (p->xdir < 0) {
337f71acd79Smacallan			srcstart += (w - 32);
338f71acd79Smacallan			dststart += (w - 32);
339f71acd79Smacallan			xinc = -32;
340f71acd79Smacallan		} else
341f71acd79Smacallan			xinc = 32;
342f71acd79Smacallan	} else
343f71acd79Smacallan		xinc = 32;
344f71acd79Smacallan	if (p->ydir < 0) {
345f71acd79Smacallan		srcstart += (h - 1) * srcpitch;
346f71acd79Smacallan		dststart += (h - 1) * dstpitch;
347f71acd79Smacallan		srcinc = -srcpitch;
348f71acd79Smacallan		dstinc = -dstpitch;
349f71acd79Smacallan	} else {
350f71acd79Smacallan		srcinc = srcpitch;
351f71acd79Smacallan		dstinc = dstpitch;
352f71acd79Smacallan	}
353f71acd79Smacallan	if (p->last_rop == 0xcc) {
354f71acd79Smacallan		/* plain old copy */
355f71acd79Smacallan		if ( xinc > 0) {
356f71acd79Smacallan			/* going left to right */
357f71acd79Smacallan			for (line = 0; line < h; line++) {
358f71acd79Smacallan				count = 0;
359f71acd79Smacallan				s = srcstart;
360f71acd79Smacallan				d = dststart;
361f71acd79Smacallan				while ( count < w) {
362f71acd79Smacallan					num = min(32, w - count);
363f71acd79Smacallan					write_sx_io(p, s,
364f71acd79Smacallan					    SX_LDB(10, num - 1, s & 7));
365f71acd79Smacallan					write_sx_io(p, d,
366f71acd79Smacallan					    SX_STBM(10, num - 1, d & 7));
367f71acd79Smacallan					s += xinc;
368f71acd79Smacallan					d += xinc;
369f71acd79Smacallan					count += 32;
370f71acd79Smacallan				}
371f71acd79Smacallan				srcstart += srcinc;
372f71acd79Smacallan				dststart += dstinc;
373f71acd79Smacallan			}
374f71acd79Smacallan		} else {
375f71acd79Smacallan			/* going right to left */
376f71acd79Smacallan			int i, chunks = (w >> 5);
377f71acd79Smacallan			for (line = 0; line < h; line++) {
378f71acd79Smacallan				s = srcstart;
379f71acd79Smacallan				d = dststart;
380f71acd79Smacallan				count = w;
381f71acd79Smacallan				for (i = 0; i < chunks; i++) {
382f71acd79Smacallan					write_sx_io(p, s,
383f71acd79Smacallan					    SX_LDB(10, 31, s & 7));
384f71acd79Smacallan					write_sx_io(p, d,
385f71acd79Smacallan					    SX_STBM(10, 31, d & 7));
386f71acd79Smacallan					s -= 32;
387f71acd79Smacallan					d -= 32;
388f71acd79Smacallan					count -= 32;
389f71acd79Smacallan				}
390f71acd79Smacallan				/* leftovers, if any */
391f71acd79Smacallan				if (count > 0) {
392f71acd79Smacallan					s += (32 - count);
393f71acd79Smacallan					d += (32 - count);
394f71acd79Smacallan					write_sx_io(p, s,
395f71acd79Smacallan					    SX_LDB(10, count - 1, s & 7));
396f71acd79Smacallan					write_sx_io(p, d,
397f71acd79Smacallan					    SX_STBM(10, count - 1, d & 7));
398f71acd79Smacallan				}
399f71acd79Smacallan				srcstart += srcinc;
400f71acd79Smacallan				dststart += dstinc;
401f71acd79Smacallan			}
402f71acd79Smacallan		}
403f71acd79Smacallan	} else {
404f71acd79Smacallan		/* ROPs needed */
405f71acd79Smacallan		if ( xinc > 0) {
406f71acd79Smacallan			/* going left to right */
407f71acd79Smacallan			for (line = 0; line < h; line++) {
408f71acd79Smacallan				count = 0;
409f71acd79Smacallan				s = srcstart;
410f71acd79Smacallan				d = dststart;
411f71acd79Smacallan				while ( count < w) {
412f71acd79Smacallan					num = min(32, w - count);
413f71acd79Smacallan					write_sx_io(p, s,
414f71acd79Smacallan					    SX_LDB(10, num - 1, s & 7));
415f71acd79Smacallan					write_sx_io(p, d,
416f71acd79Smacallan					    SX_LDB(42, num - 1, d & 7));
417f71acd79Smacallan					if (num > 16) {
418f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
419f71acd79Smacallan					    	 SX_ROP(10, 42, 74, 15));
420f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
421f71acd79Smacallan					    	 SX_ROP(26, 58, 90, num - 17));
422f71acd79Smacallan					} else {
423f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
424f71acd79Smacallan					    	 SX_ROP(10, 42, 74, num - 1));
425f71acd79Smacallan					}
426f71acd79Smacallan					write_sx_io(p, d,
427f71acd79Smacallan					    SX_STBM(74, num - 1, d & 7));
428f71acd79Smacallan					s += xinc;
429f71acd79Smacallan					d += xinc;
430f71acd79Smacallan					count += 32;
431f71acd79Smacallan				}
432f71acd79Smacallan				srcstart += srcinc;
433f71acd79Smacallan				dststart += dstinc;
434f71acd79Smacallan			}
435f71acd79Smacallan		} else {
436f71acd79Smacallan			/* going right to left */
437f71acd79Smacallan			int i, chunks = (w >> 5);
438f71acd79Smacallan			for (line = 0; line < h; line++) {
439f71acd79Smacallan				s = srcstart;
440f71acd79Smacallan				d = dststart;
441f71acd79Smacallan				count = w;
442f71acd79Smacallan				for (i = 0; i < chunks; i++) {
443f71acd79Smacallan					write_sx_io(p, s, SX_LDB(10, 31, s & 7));
444f71acd79Smacallan					write_sx_io(p, d, SX_LDB(42, 31, d & 7));
445f71acd79Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
446f71acd79Smacallan				    	    SX_ROP(10, 42, 74, 15));
447f71acd79Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
448f71acd79Smacallan				    	    SX_ROP(26, 58, 90, 15));
449f71acd79Smacallan					write_sx_io(p, d,
450f71acd79Smacallan					    SX_STBM(74, 31, d & 7));
451f71acd79Smacallan					s -= 128;
452f71acd79Smacallan					d -= 128;
453f71acd79Smacallan					count -= 32;
454f71acd79Smacallan				}
455f71acd79Smacallan				/* leftovers, if any */
456f71acd79Smacallan				if (count > 0) {
457f71acd79Smacallan					s += (32 - count);
458f71acd79Smacallan					d += (32 - count);
459f71acd79Smacallan					write_sx_io(p, s,
460f71acd79Smacallan					    SX_LDB(10, count - 1, s & 7));
461f71acd79Smacallan					write_sx_io(p, d,
462f71acd79Smacallan					    SX_LDB(42, count - 1, d & 7));
463f71acd79Smacallan					if (count > 16) {
464f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
465f71acd79Smacallan					    	    SX_ROP(10, 42, 74, 15));
466f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
467f71acd79Smacallan					    	 SX_ROP(26, 58, 90, count - 17));
468f71acd79Smacallan					} else {
469f71acd79Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
470f71acd79Smacallan					    	 SX_ROP(10, 42, 74, count - 1));
471f71acd79Smacallan					}
472f71acd79Smacallan
473f71acd79Smacallan					write_sx_io(p, d,
474f71acd79Smacallan					    SX_STBM(74, count - 1, d & 7));
475f71acd79Smacallan				}
476f71acd79Smacallan				srcstart += srcinc;
477f71acd79Smacallan				dststart += dstinc;
478f71acd79Smacallan			}
479f71acd79Smacallan		}
480f71acd79Smacallan	}
481f71acd79Smacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
482f71acd79Smacallan}
483f71acd79Smacallan
4844261fa58Smacallanstatic void
4854261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap)
4864261fa58Smacallan{
4874261fa58Smacallan}
4884261fa58Smacallan
4894261fa58Smacallanstatic Bool
4904261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
4914261fa58Smacallan{
4924261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
4934261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
4944261fa58Smacallan
4954261fa58Smacallan	ENTER;
4966bdc2ffdSmacallan	DPRINTF(X_ERROR, "bits per pixel: %d\n",
4976bdc2ffdSmacallan	    pPixmap->drawable.bitsPerPixel);
4984261fa58Smacallan	write_sx_reg(p, SX_QUEUED(8), fg);
4994261fa58Smacallan	write_sx_reg(p, SX_QUEUED(9), fg);
5004261fa58Smacallan	if (planemask != p->last_mask) {
5014261fa58Smacallan		CG14Wait(p);
5024261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
5034261fa58Smacallan		p->last_mask = planemask;
5044261fa58Smacallan	}
5054261fa58Smacallan	alu = sx_rop[alu];
5064261fa58Smacallan	if (alu != p->last_rop) {
5074261fa58Smacallan		CG14Wait(p);
5084261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
5094261fa58Smacallan		p->last_rop = alu;
5104261fa58Smacallan	}
5114261fa58Smacallan	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
5124261fa58Smacallan	return TRUE;
5134261fa58Smacallan}
5144261fa58Smacallan
5154261fa58Smacallanstatic void
5164261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
5174261fa58Smacallan{
5184261fa58Smacallan	int line, x, num;
5194261fa58Smacallan	uint32_t ptr;
5204261fa58Smacallan
5214261fa58Smacallan	ENTER;
5224261fa58Smacallan	if (p->last_rop == 0xcc) {
5234261fa58Smacallan		/* simple fill */
5244261fa58Smacallan		for (line = 0; line < h; line++) {
5254261fa58Smacallan			x = 0;
5264261fa58Smacallan			while (x < w) {
5274261fa58Smacallan				ptr = start + (x << 2);
5284261fa58Smacallan				num = min(32, w - x);
5294261fa58Smacallan				write_sx_io(p, ptr,
5304261fa58Smacallan				    SX_STS(8, num - 1, ptr & 7));
5314261fa58Smacallan				x += 32;
5324261fa58Smacallan			}
5334261fa58Smacallan			start += pitch;
5344261fa58Smacallan		}
5354261fa58Smacallan	} else if (p->last_rop == 0xaa) {
5364261fa58Smacallan		/* nothing to do here */
5374261fa58Smacallan		return;
5384261fa58Smacallan	} else {
5394261fa58Smacallan		/* alright, let's do actual ROP stuff */
5404261fa58Smacallan
5414261fa58Smacallan		/* first repeat the fill colour into 16 registers */
5424261fa58Smacallan		write_sx_reg(p, SX_INSTRUCTIONS,
5434261fa58Smacallan		    SX_SELECT_S(8, 8, 10, 15));
5444261fa58Smacallan
5454261fa58Smacallan		for (line = 0; line < h; line++) {
5464261fa58Smacallan			x = 0;
5474261fa58Smacallan			while (x < w) {
5484261fa58Smacallan				ptr = start + (x << 2);
5494261fa58Smacallan				num = min(32, w - x);
5504261fa58Smacallan				/* now suck fb data into registers */
5514261fa58Smacallan				write_sx_io(p, ptr,
5524261fa58Smacallan				    SX_LD(42, num - 1, ptr & 7));
5534261fa58Smacallan				/*
5544261fa58Smacallan				 * ROP them with the fill data we left in 10
5554261fa58Smacallan				 * non-memory ops can only have counts up to 16
5564261fa58Smacallan				 */
5574261fa58Smacallan				if (num <= 16) {
5584261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
5594261fa58Smacallan					    SX_ROP(10, 42, 74, num - 1));
5604261fa58Smacallan				} else {
5614261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
5624261fa58Smacallan					    SX_ROP(10, 42, 74, 15));
5634261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
5644261fa58Smacallan					    SX_ROP(10, 58, 90, num - 17));
5654261fa58Smacallan				}
5664261fa58Smacallan				/* and write the result back into memory */
5674261fa58Smacallan				write_sx_io(p, ptr,
5684261fa58Smacallan				    SX_ST(74, num - 1, ptr & 7));
5694261fa58Smacallan				x += 32;
5704261fa58Smacallan			}
5714261fa58Smacallan			start += pitch;
5724261fa58Smacallan		}
5734261fa58Smacallan	}
5744261fa58Smacallan}
5754261fa58Smacallan
5764261fa58Smacallanstatic void
5774261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
5784261fa58Smacallan{
5794261fa58Smacallan	int line, x, num, off;
5804261fa58Smacallan	uint32_t ptr;
5814261fa58Smacallan
5824261fa58Smacallan	ENTER;
5834261fa58Smacallan	off = start & 7;
5844261fa58Smacallan	start &= ~7;
5854261fa58Smacallan
5864261fa58Smacallan	if (p->last_rop == 0xcc) {
5874261fa58Smacallan		/* simple fill */
5884261fa58Smacallan		for (line = 0; line < h; line++) {
5894261fa58Smacallan			x = 0;
5904261fa58Smacallan			while (x < w) {
5914261fa58Smacallan				ptr = start + x;
5924261fa58Smacallan				num = min(32, w - x);
5934261fa58Smacallan				write_sx_io(p, ptr,
5944261fa58Smacallan				    SX_STBS(8, num - 1, off));
5954261fa58Smacallan				x += 32;
5964261fa58Smacallan			}
5974261fa58Smacallan			start += pitch;
5984261fa58Smacallan		}
5994261fa58Smacallan	} else if (p->last_rop == 0xaa) {
6004261fa58Smacallan		/* nothing to do here */
6014261fa58Smacallan		return;
6024261fa58Smacallan	} else {
6034261fa58Smacallan		/* alright, let's do actual ROP stuff */
6044261fa58Smacallan
6054261fa58Smacallan		/* first repeat the fill colour into 16 registers */
6064261fa58Smacallan		write_sx_reg(p, SX_INSTRUCTIONS,
6074261fa58Smacallan		    SX_SELECT_S(8, 8, 10, 15));
6084261fa58Smacallan
6094261fa58Smacallan		for (line = 0; line < h; line++) {
6104261fa58Smacallan			x = 0;
6114261fa58Smacallan			while (x < w) {
6124261fa58Smacallan				ptr = start + x;
6134261fa58Smacallan				num = min(32, w - x);
6144261fa58Smacallan				/* now suck fb data into registers */
6154261fa58Smacallan				write_sx_io(p, ptr,
6164261fa58Smacallan				    SX_LDB(42, num - 1, off));
6174261fa58Smacallan				/*
6184261fa58Smacallan				 * ROP them with the fill data we left in 10
6194261fa58Smacallan				 * non-memory ops can only have counts up to 16
6204261fa58Smacallan				 */
6214261fa58Smacallan				if (num <= 16) {
6224261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
6234261fa58Smacallan					    SX_ROP(10, 42, 74, num - 1));
6244261fa58Smacallan				} else {
6254261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
6264261fa58Smacallan					    SX_ROP(10, 42, 74, 15));
6274261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
6284261fa58Smacallan					    SX_ROP(10, 58, 90, num - 17));
6294261fa58Smacallan				}
6304261fa58Smacallan				/* and write the result back into memory */
6314261fa58Smacallan				write_sx_io(p, ptr,
6324261fa58Smacallan				    SX_STB(74, num - 1, off));
6334261fa58Smacallan				x += 32;
6344261fa58Smacallan			}
6354261fa58Smacallan			start += pitch;
6364261fa58Smacallan		}
6374261fa58Smacallan	}
6384261fa58Smacallan}
6394261fa58Smacallan
6404261fa58Smacallanstatic void
6414261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
6424261fa58Smacallan{
6434261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
6444261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
6454261fa58Smacallan	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
6464261fa58Smacallan	int start, depth;
6474261fa58Smacallan
6484261fa58Smacallan	ENTER;
6494261fa58Smacallan	dstpitch = exaGetPixmapPitch(pPixmap);
6504261fa58Smacallan	dstoff = exaGetPixmapOffset(pPixmap);
6514261fa58Smacallan
6524261fa58Smacallan	depth = pPixmap->drawable.bitsPerPixel;
6534261fa58Smacallan	switch (depth) {
6544261fa58Smacallan		case 32:
6554261fa58Smacallan			start = dstoff + (y1 * dstpitch) + (x1 << 2);
6564261fa58Smacallan			CG14Solid32(p, start, dstpitch, w, h);
6574261fa58Smacallan			break;
6584261fa58Smacallan		case 8:
6594261fa58Smacallan			start = dstoff + (y1 * dstpitch) + x1;
6604261fa58Smacallan			CG14Solid8(p, start, dstpitch, w, h);
6614261fa58Smacallan			break;
6624261fa58Smacallan	}
6634261fa58Smacallan
6644261fa58Smacallan	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
6654261fa58Smacallan	    dstpitch, dstoff, start);
6664261fa58Smacallan	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
6674261fa58Smacallan	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
6684261fa58Smacallan	exaMarkSync(pPixmap->drawable.pScreen);
6694261fa58Smacallan}
6704261fa58Smacallan
6714261fa58Smacallan/*
6724261fa58Smacallan * Memcpy-based UTS.
6734261fa58Smacallan */
6744261fa58Smacallanstatic Bool
6754261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
6764261fa58Smacallan    char *src, int src_pitch)
6774261fa58Smacallan{
6784261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
6794261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
6804261fa58Smacallan	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
6814261fa58Smacallan	int    dst_pitch  = exaGetPixmapPitch(pDst);
6824261fa58Smacallan
6834261fa58Smacallan	int bpp    = pDst->drawable.bitsPerPixel;
6844261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
6854261fa58Smacallan	int wBytes = w * cpp;
6864261fa58Smacallan
6874261fa58Smacallan	ENTER;
688f71acd79Smacallan	DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
6894261fa58Smacallan	dst += (x * cpp) + (y * dst_pitch);
6904261fa58Smacallan
6914261fa58Smacallan	CG14Wait(p);
6924261fa58Smacallan
6934261fa58Smacallan	while (h--) {
6944261fa58Smacallan		memcpy(dst, src, wBytes);
6954261fa58Smacallan		src += src_pitch;
6964261fa58Smacallan		dst += dst_pitch;
6974261fa58Smacallan	}
6984261fa58Smacallan	__asm("stbar;");
6994261fa58Smacallan	return TRUE;
7004261fa58Smacallan}
7014261fa58Smacallan
7024261fa58Smacallan/*
7034261fa58Smacallan * Memcpy-based DFS.
7044261fa58Smacallan */
7054261fa58Smacallanstatic Bool
7064261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
7074261fa58Smacallan    char *dst, int dst_pitch)
7084261fa58Smacallan{
7094261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
7104261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
7114261fa58Smacallan	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
7124261fa58Smacallan	int    src_pitch  = exaGetPixmapPitch(pSrc);
7134261fa58Smacallan
7144261fa58Smacallan	ENTER;
7154261fa58Smacallan	int bpp    = pSrc->drawable.bitsPerPixel;
7164261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
7174261fa58Smacallan	int wBytes = w * cpp;
7184261fa58Smacallan
7194261fa58Smacallan	src += (x * cpp) + (y * src_pitch);
7204261fa58Smacallan
7214261fa58Smacallan	CG14Wait(p);
7224261fa58Smacallan
7234261fa58Smacallan	while (h--) {
7244261fa58Smacallan		memcpy(dst, src, wBytes);
7254261fa58Smacallan		src += src_pitch;
7264261fa58Smacallan		dst += dst_pitch;
7274261fa58Smacallan	}
7284261fa58Smacallan
7294261fa58Smacallan	return TRUE;
7304261fa58Smacallan}
7314261fa58Smacallan
7324261fa58SmacallanBool
7334261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture,
7344261fa58Smacallan                           PicturePtr pMaskPicture,
7354261fa58Smacallan                           PicturePtr pDstPicture)
7364261fa58Smacallan{
7374261fa58Smacallan	int i, ok = FALSE;
7384261fa58Smacallan
7394261fa58Smacallan	ENTER;
7404261fa58Smacallan
7414261fa58Smacallan	/*
7424261fa58Smacallan	 * SX is in theory capable of accelerating pretty much all Xrender ops,
7434261fa58Smacallan	 * even coordinate transformation and gradients. Support will be added
7444261fa58Smacallan	 * over time and likely have to spill over into its own source file.
7454261fa58Smacallan	 */
7464261fa58Smacallan
747a3a2ba44Smacallan	if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) {
748fe97f391Smacallan		DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op);
7494261fa58Smacallan		return FALSE;
7504261fa58Smacallan	}
7514261fa58Smacallan
7524bd47ccfSmacallan	if (pSrcPicture != NULL) {
7534bd47ccfSmacallan		i = 0;
7544bd47ccfSmacallan		while ((i < arraysize(src_formats)) && (!ok)) {
7554bd47ccfSmacallan			ok =  (pSrcPicture->format == src_formats[i]);
7564bd47ccfSmacallan			i++;
7574bd47ccfSmacallan		}
7584bd47ccfSmacallan
7594bd47ccfSmacallan		if (!ok) {
7604bd47ccfSmacallan			DPRINTF(X_ERROR, "%s: unsupported src format %x\n",
7614bd47ccfSmacallan			    __func__, pSrcPicture->format);
7624bd47ccfSmacallan			return FALSE;
7634bd47ccfSmacallan		}
7644bd47ccfSmacallan		DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op);
7654261fa58Smacallan	}
7664261fa58Smacallan
7674bd47ccfSmacallan	if (pDstPicture != NULL) {
7684bd47ccfSmacallan		i = 0;
7694bd47ccfSmacallan		ok = FALSE;
7704bd47ccfSmacallan		while ((i < arraysize(src_formats)) && (!ok)) {
7714bd47ccfSmacallan			ok =  (pDstPicture->format == src_formats[i]);
7724bd47ccfSmacallan			i++;
7734bd47ccfSmacallan		}
7744bd47ccfSmacallan
7754bd47ccfSmacallan		if (!ok) {
7764bd47ccfSmacallan			DPRINTF(X_ERROR, "%s: unsupported dst format %x\n",
7774bd47ccfSmacallan			    __func__, pDstPicture->format);
7784bd47ccfSmacallan			return FALSE;
7794bd47ccfSmacallan		}
7804bd47ccfSmacallan		DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op);
7814bd47ccfSmacallan	}
7824261fa58Smacallan
7834261fa58Smacallan	if (pMaskPicture != NULL) {
7844261fa58Smacallan		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
7854261fa58Smacallan		    pMaskPicture->pDrawable->width,
7864261fa58Smacallan		    pMaskPicture->pDrawable->height);
7874261fa58Smacallan	}
7884261fa58Smacallan	return TRUE;
7894261fa58Smacallan}
7904261fa58Smacallan
7914261fa58SmacallanBool
7924261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture,
7934261fa58Smacallan                             PicturePtr pMaskPicture,
7944261fa58Smacallan                             PicturePtr pDstPicture,
7954261fa58Smacallan                             PixmapPtr  pSrc,
7964261fa58Smacallan                             PixmapPtr  pMask,
7974261fa58Smacallan                             PixmapPtr  pDst)
7984261fa58Smacallan{
7994261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
8004261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
8014261fa58Smacallan
8024261fa58Smacallan	ENTER;
8034261fa58Smacallan
804f7cb851fSmacallan	p->no_source_pixmap = FALSE;
805f7cb851fSmacallan	p->source_is_solid = FALSE;
806f7cb851fSmacallan
807a3a2ba44Smacallan	if (pSrcPicture->format == PICT_a1) {
8086bdc2ffdSmacallan		xf86Msg(X_ERROR, "src mono, dst %x, op %d\n",
8096bdc2ffdSmacallan		    pDstPicture->format, op);
810a3a2ba44Smacallan		if (pMaskPicture != NULL) {
811a3a2ba44Smacallan			xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format);
812a3a2ba44Smacallan		}
813f7cb851fSmacallan	}
8144261fa58Smacallan	if (pSrcPicture->pSourcePict != NULL) {
8154261fa58Smacallan		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
8164261fa58Smacallan			p->fillcolour =
8174261fa58Smacallan			    pSrcPicture->pSourcePict->solidFill.color;
818f7cb851fSmacallan			DPRINTF(X_ERROR, "%s: solid src %08x\n",
8194261fa58Smacallan			    __func__, p->fillcolour);
820f7cb851fSmacallan			p->no_source_pixmap = TRUE;
821f7cb851fSmacallan			p->source_is_solid = TRUE;
8224261fa58Smacallan		}
8234261fa58Smacallan	}
8244261fa58Smacallan	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
8254261fa58Smacallan		if (pMaskPicture->pSourcePict->type ==
8264261fa58Smacallan		    SourcePictTypeSolidFill) {
8274261fa58Smacallan			p->fillcolour =
8284261fa58Smacallan			   pMaskPicture->pSourcePict->solidFill.color;
829a3a2ba44Smacallan			xf86Msg(X_ERROR, "%s: solid mask %08x\n",
8304261fa58Smacallan			    __func__, p->fillcolour);
8314261fa58Smacallan		}
8324261fa58Smacallan	}
8334261fa58Smacallan	if (pMaskPicture != NULL) {
834239808baSmacallan		p->mskoff = exaGetPixmapOffset(pMask);
8354261fa58Smacallan		p->mskpitch = exaGetPixmapPitch(pMask);
8364261fa58Smacallan		p->mskformat = pMaskPicture->format;
837a3a2ba44Smacallan	} else {
838239808baSmacallan		p->mskoff = 0;
839a3a2ba44Smacallan		p->mskpitch = 0;
840a3a2ba44Smacallan		p->mskformat = 0;
8414261fa58Smacallan	}
842f7cb851fSmacallan	if (pSrc != NULL) {
843f7cb851fSmacallan		p->source_is_solid =
844f7cb851fSmacallan		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
845f7cb851fSmacallan		p->srcoff = exaGetPixmapOffset(pSrc);
846f7cb851fSmacallan		p->srcpitch = exaGetPixmapPitch(pSrc);
847f7cb851fSmacallan		if (p->source_is_solid) {
848f7cb851fSmacallan			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
849f7cb851fSmacallan		}
850f7cb851fSmacallan	}
8514261fa58Smacallan	p->srcformat = pSrcPicture->format;
8524261fa58Smacallan	p->dstformat = pDstPicture->format;
853f7cb851fSmacallan
854f7cb851fSmacallan	if (p->source_is_solid) {
855f7cb851fSmacallan		uint32_t temp;
856f7cb851fSmacallan
857f7cb851fSmacallan		/* stuff source colour into SX registers, swap as needed */
858f7cb851fSmacallan		temp = p->fillcolour;
859f7cb851fSmacallan		switch (p->srcformat) {
860f7cb851fSmacallan			case PICT_a8r8g8b8:
861f7cb851fSmacallan			case PICT_x8r8g8b8:
862f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
863f7cb851fSmacallan				temp = temp >> 8;
864f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
865f7cb851fSmacallan				temp = temp >> 8;
866f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
867f7cb851fSmacallan				break;
868f7cb851fSmacallan			case PICT_a8b8g8r8:
869f7cb851fSmacallan			case PICT_x8b8g8r8:
870f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
871f7cb851fSmacallan				temp = temp >> 8;
872f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
873f7cb851fSmacallan				temp = temp >> 8;
874f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
875f7cb851fSmacallan				break;
876f7cb851fSmacallan		}
877f7cb851fSmacallan		write_sx_reg(p, SX_QUEUED(8), 0xff);
878f7cb851fSmacallan	}
8794261fa58Smacallan	p->op = op;
880a3a2ba44Smacallan	if (op == PictOpSrc) {
881a3a2ba44Smacallan		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
882a3a2ba44Smacallan	}
8834261fa58Smacallan#ifdef SX_DEBUG
8844261fa58Smacallan	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
8854261fa58Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
8864261fa58Smacallan#endif
8874261fa58Smacallan	return TRUE;
8884261fa58Smacallan}
8894261fa58Smacallan
8904261fa58Smacallanvoid
8914261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY,
8924261fa58Smacallan                              int maskX, int maskY,
8934261fa58Smacallan                              int dstX, int dstY,
8944261fa58Smacallan                              int width, int height)
8954261fa58Smacallan{
8964261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
8974261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
8984261fa58Smacallan	uint32_t dstoff, dstpitch;
8994261fa58Smacallan	uint32_t dst, msk, src;
900e311bbeeSmacallan	int flip = 0;
9014261fa58Smacallan
9024261fa58Smacallan	ENTER;
9034261fa58Smacallan	dstoff = exaGetPixmapOffset(pDst);
9044261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDst);
9054261fa58Smacallan
906e311bbeeSmacallan	flip = (PICT_FORMAT_TYPE(p->srcformat) !=
907e311bbeeSmacallan		PICT_FORMAT_TYPE(p->dstformat));
908e311bbeeSmacallan
9094261fa58Smacallan	switch (p->op) {
9104261fa58Smacallan		case PictOpOver:
9114261fa58Smacallan			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
9124261fa58Smacallan			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
9134261fa58Smacallan			    p->mskformat, p->dstformat, srcX, srcY);
914a3a2ba44Smacallan			if (p->source_is_solid) {
915a3a2ba44Smacallan				switch (p->mskformat) {
916a3a2ba44Smacallan					case PICT_a8:
917a3a2ba44Smacallan						msk = p->mskoff +
918a3a2ba44Smacallan						    (maskY * p->mskpitch) +
919a3a2ba44Smacallan						    maskX;
920a3a2ba44Smacallan						CG14Comp_Over8Solid(p,
921a3a2ba44Smacallan						    msk, p->mskpitch,
922a3a2ba44Smacallan						    dst, dstpitch,
923a3a2ba44Smacallan						    width, height);
924a3a2ba44Smacallan						break;
925a3a2ba44Smacallan					case PICT_a8r8g8b8:
926a3a2ba44Smacallan					case PICT_a8b8g8r8:
927a3a2ba44Smacallan						msk = p->mskoff +
928a3a2ba44Smacallan						    (maskY * p->mskpitch) +
929a3a2ba44Smacallan						    (maskX << 2);
930a3a2ba44Smacallan						CG14Comp_Over32Solid(p,
931a3a2ba44Smacallan						    msk, p->mskpitch,
932a3a2ba44Smacallan						    dst, dstpitch,
933a3a2ba44Smacallan						    width, height);
934a3a2ba44Smacallan						break;
935a3a2ba44Smacallan					default:
936a3a2ba44Smacallan						xf86Msg(X_ERROR,
937f71acd79Smacallan						  "unsupported mask format %08x\n", p->mskformat);
938a3a2ba44Smacallan				}
939a3a2ba44Smacallan			} else {
9406bdc2ffdSmacallan				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
9416bdc2ffdSmacallan				    p->mskformat);
942a3a2ba44Smacallan				switch (p->srcformat) {
943a3a2ba44Smacallan					case PICT_a8r8g8b8:
944a3a2ba44Smacallan					case PICT_a8b8g8r8:
945a3a2ba44Smacallan						src = p->srcoff +
946a3a2ba44Smacallan						    (srcY * p->srcpitch) +
947a3a2ba44Smacallan						    (srcX << 2);
948a3a2ba44Smacallan						dst = dstoff +
949a3a2ba44Smacallan						    (dstY * dstpitch) +
950a3a2ba44Smacallan						    (dstX << 2);
951a3a2ba44Smacallan						if (p->mskformat == PICT_a8) {
952a3a2ba44Smacallan							msk = p->mskoff +
953a3a2ba44Smacallan							    (maskY * p->mskpitch) +
954a3a2ba44Smacallan							    maskX;
955a3a2ba44Smacallan							CG14Comp_Over32Mask(p,
956a3a2ba44Smacallan							    src, p->srcpitch,
957a3a2ba44Smacallan							    msk, p->mskpitch,
958a3a2ba44Smacallan							    dst, dstpitch,
959e311bbeeSmacallan							    width, height, flip);
960a3a2ba44Smacallan						} else {
961a3a2ba44Smacallan							CG14Comp_Over32(p,
962a3a2ba44Smacallan							    src, p->srcpitch,
963a3a2ba44Smacallan							    dst, dstpitch,
964e311bbeeSmacallan							    width, height, flip);
965a3a2ba44Smacallan						}
966a3a2ba44Smacallan						break;
967a3a2ba44Smacallan					case PICT_x8r8g8b8:
968a3a2ba44Smacallan					case PICT_x8b8g8r8:
9696bdc2ffdSmacallan						src = p->srcoff +
9706bdc2ffdSmacallan						    (srcY * p->srcpitch) +
9716bdc2ffdSmacallan						    (srcX << 2);
9726bdc2ffdSmacallan						dst = dstoff +
9736bdc2ffdSmacallan						    (dstY * dstpitch) +
9746bdc2ffdSmacallan						    (dstX << 2);
9756bdc2ffdSmacallan						if (p->mskformat == PICT_a8) {
9766bdc2ffdSmacallan							msk = p->mskoff +
9776bdc2ffdSmacallan							    (maskY * p->mskpitch) +
9786bdc2ffdSmacallan							    maskX;
9796bdc2ffdSmacallan							CG14Comp_Over32Mask_noalpha(p,
9806bdc2ffdSmacallan							    src, p->srcpitch,
9816bdc2ffdSmacallan							    msk, p->mskpitch,
982fa158432Smacallan							    dst, dstpitch,
983e311bbeeSmacallan							    width, height, flip);
984fa158432Smacallan						} else if ((p->mskformat == PICT_a8r8g8b8) ||
985fa158432Smacallan							   (p->mskformat == PICT_a8b8g8r8)) {
986fa158432Smacallan							msk = p->mskoff +
987fa158432Smacallan							    (maskY * p->mskpitch) +
988fa158432Smacallan							    (maskX << 2);
989fa158432Smacallan							CG14Comp_Over32Mask32_noalpha(p,
990fa158432Smacallan							    src, p->srcpitch,
991fa158432Smacallan							    msk, p->mskpitch,
9926bdc2ffdSmacallan							    dst, dstpitch,
993e311bbeeSmacallan							    width, height, flip);
9946bdc2ffdSmacallan						} else {
9956bdc2ffdSmacallan							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
9966bdc2ffdSmacallan						}
997a3a2ba44Smacallan						break;
998a3a2ba44Smacallan					default:
999a3a2ba44Smacallan						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
1000a3a2ba44Smacallan						    __func__, p->srcformat);
1001a3a2ba44Smacallan				}
1002a3a2ba44Smacallan			}
10034261fa58Smacallan			break;
10044261fa58Smacallan		case PictOpAdd:
10054261fa58Smacallan			DPRINTF(X_ERROR, "Add %08x %08x\n",
10064261fa58Smacallan			    p->srcformat, p->dstformat);
10074261fa58Smacallan			switch (p->srcformat) {
10084261fa58Smacallan				case PICT_a8:
10094261fa58Smacallan					src = p->srcoff +
10104261fa58Smacallan					    (srcY * p->srcpitch) + srcX;
1011d71cb32dSmacallan					if (p->dstformat == PICT_a8) {
1012d71cb32dSmacallan						dst = dstoff +
1013d71cb32dSmacallan						      (dstY * dstpitch) + dstX;
1014d71cb32dSmacallan						CG14Comp_Add8(p,
1015d71cb32dSmacallan						    src, p->srcpitch,
1016d71cb32dSmacallan						    dst, dstpitch,
1017d71cb32dSmacallan						    width, height);
1018d71cb32dSmacallan					} else {
1019d71cb32dSmacallan						dst = dstoff +
1020d71cb32dSmacallan						      (dstY * dstpitch) +
1021d71cb32dSmacallan						      (dstX << 2);
1022d71cb32dSmacallan						CG14Comp_Add8_32(p,
1023d71cb32dSmacallan						    src, p->srcpitch,
1024d71cb32dSmacallan						    dst, dstpitch,
1025d71cb32dSmacallan						    width, height);
1026d71cb32dSmacallan					}
10274261fa58Smacallan					break;
10284261fa58Smacallan				case PICT_a8r8g8b8:
10294261fa58Smacallan				case PICT_x8r8g8b8:
10304261fa58Smacallan					src = p->srcoff +
10314261fa58Smacallan					    (srcY * p->srcpitch) + (srcX << 2);
10324261fa58Smacallan					dst = dstoff + (dstY * dstpitch) +
10334261fa58Smacallan					    (dstX << 2);
10344261fa58Smacallan					CG14Comp_Add32(p, src, p->srcpitch,
10354261fa58Smacallan					    dst, dstpitch, width, height);
10364261fa58Smacallan					break;
10374261fa58Smacallan				default:
10384261fa58Smacallan					xf86Msg(X_ERROR,
10394261fa58Smacallan					    "unsupported src format\n");
10404261fa58Smacallan			}
10414261fa58Smacallan			break;
1042a3a2ba44Smacallan		case PictOpSrc:
1043a3a2ba44Smacallan			DPRINTF(X_ERROR, "Src %08x %08x\n",
1044a3a2ba44Smacallan			    p->srcformat, p->dstformat);
1045239808baSmacallan			if (p->mskformat != 0)
1046239808baSmacallan				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
1047f71acd79Smacallan			if (p->srcformat == PICT_a8) {
1048f71acd79Smacallan				CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height);
1049f71acd79Smacallan			} else {
1050f71acd79Smacallan				/* convert between RGB and BGR? */
1051f71acd79Smacallan				CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height);
1052f71acd79Smacallan			}
1053a3a2ba44Smacallan			break;
10544261fa58Smacallan		default:
10554261fa58Smacallan			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
10564261fa58Smacallan	}
10574261fa58Smacallan	exaMarkSync(pDst->drawable.pScreen);
10584261fa58Smacallan}
10594261fa58Smacallan
10604261fa58Smacallan
10614261fa58Smacallan
10624261fa58SmacallanBool
10634261fa58SmacallanCG14InitAccel(ScreenPtr pScreen)
10644261fa58Smacallan{
10654261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
10664261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
10674261fa58Smacallan	ExaDriverPtr pExa;
10684261fa58Smacallan
10694261fa58Smacallan	pExa = exaDriverAlloc();
10704261fa58Smacallan	if (!pExa)
10714261fa58Smacallan		return FALSE;
10724261fa58Smacallan
10734261fa58Smacallan	p->pExa = pExa;
10744261fa58Smacallan
10754261fa58Smacallan	pExa->exa_major = EXA_VERSION_MAJOR;
10764261fa58Smacallan	pExa->exa_minor = EXA_VERSION_MINOR;
10774261fa58Smacallan
10784261fa58Smacallan	pExa->memoryBase = p->fb;
10794261fa58Smacallan	pExa->memorySize = p->memsize;
10804261fa58Smacallan	pExa->offScreenBase = p->width * p->height * 4;
10814261fa58Smacallan
10824261fa58Smacallan	/*
10834261fa58Smacallan	 * SX memory instructions are written to 64bit aligned addresses with
10844261fa58Smacallan	 * a 3 bit displacement. Make sure the displacement remains constant
10854261fa58Smacallan	 * within one column
10864261fa58Smacallan	 */
10874261fa58Smacallan
10884261fa58Smacallan	pExa->pixmapOffsetAlign = 8;
10894261fa58Smacallan	pExa->pixmapPitchAlign = 8;
10904261fa58Smacallan
1091fe97f391Smacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS
1092f71acd79Smacallan		      | EXA_SUPPORTS_OFFSCREEN_OVERLAPS
1093f71acd79Smacallan		      /*| EXA_MIXED_PIXMAPS*/;
10944261fa58Smacallan
10954261fa58Smacallan	/*
10964261fa58Smacallan	 * these limits are bogus
10974261fa58Smacallan	 * SX doesn't deal with coordinates at all, so there is no limit but
10984261fa58Smacallan	 * we have to put something here
10994261fa58Smacallan	 */
11004261fa58Smacallan	pExa->maxX = 4096;
11014261fa58Smacallan	pExa->maxY = 4096;
11024261fa58Smacallan
11034261fa58Smacallan	pExa->WaitMarker = CG14WaitMarker;
11044261fa58Smacallan
11054261fa58Smacallan	pExa->PrepareSolid = CG14PrepareSolid;
11064261fa58Smacallan	pExa->Solid = CG14Solid;
11074261fa58Smacallan	pExa->DoneSolid = CG14DoneCopy;
11084261fa58Smacallan	pExa->PrepareCopy = CG14PrepareCopy;
1109f71acd79Smacallan	pExa->Copy = CG14Copy32;
11104261fa58Smacallan	pExa->DoneCopy = CG14DoneCopy;
11114261fa58Smacallan	if (p->use_xrender) {
11124261fa58Smacallan		pExa->CheckComposite = CG14CheckComposite;
11134261fa58Smacallan		pExa->PrepareComposite = CG14PrepareComposite;
11144261fa58Smacallan		pExa->Composite = CG14Composite;
11154261fa58Smacallan		pExa->DoneComposite = CG14DoneCopy;
11164261fa58Smacallan	}
11174261fa58Smacallan
11184261fa58Smacallan	/* EXA hits more optimized paths when it does not have to fallback
11194261fa58Smacallan	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
11204261fa58Smacallan	 */
11214261fa58Smacallan	pExa->UploadToScreen = CG14UploadToScreen;
11224261fa58Smacallan	pExa->DownloadFromScreen = CG14DownloadFromScreen;
11234261fa58Smacallan
11244261fa58Smacallan	/* do some hardware init */
11254261fa58Smacallan	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
11264261fa58Smacallan	p->last_mask = 0xffffffff;
11274261fa58Smacallan	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
11284261fa58Smacallan	p->last_rop = 0xcc;
11294261fa58Smacallan	return exaDriverInit(pScreen, pExa);
11304261fa58Smacallan}
1131