1a61c6651Smacallan/* $NetBSD: cg14_accel.c,v 1.33 2024/05/13 10:13:10 macallan Exp $ */
24261fa58Smacallan/*
34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz
44261fa58Smacallan * All rights reserved.
54261fa58Smacallan *
64261fa58Smacallan * Redistribution and use in source and binary forms, with or without
74261fa58Smacallan * modification, are permitted provided that the following conditions
84261fa58Smacallan * are met:
94261fa58Smacallan *
104261fa58Smacallan *    - Redistributions of source code must retain the above copyright
114261fa58Smacallan *      notice, this list of conditions and the following disclaimer.
124261fa58Smacallan *    - Redistributions in binary form must reproduce the above
134261fa58Smacallan *      copyright notice, this list of conditions and the following
144261fa58Smacallan *      disclaimer in the documentation and/or other materials provided
154261fa58Smacallan *      with the distribution.
164261fa58Smacallan *
174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE.
294261fa58Smacallan *
304261fa58Smacallan */
31c88c16f8Smacallan
32c88c16f8Smacallan#ifdef HAVE_CONFIG_H
33c88c16f8Smacallan#include "config.h"
34c88c16f8Smacallan#endif
35c88c16f8Smacallan
364261fa58Smacallan#include <sys/types.h>
374261fa58Smacallan
384261fa58Smacallan/* all driver need this */
394261fa58Smacallan#include "xf86.h"
404261fa58Smacallan#include "xf86_OSproc.h"
414261fa58Smacallan#include "compiler.h"
424261fa58Smacallan
434261fa58Smacallan#include "cg14.h"
444261fa58Smacallan
45b0f02aefSmacallan/*#define SX_DEBUG*/
46b0f02aefSmacallan/*#define SX_TRACE*/
474261fa58Smacallan
486fd6e0f4Smacallan#ifdef SX_TRACE
494261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
504261fa58Smacallan#else
514261fa58Smacallan#define ENTER
526fd6e0f4Smacallan#endif
536fd6e0f4Smacallan
546fd6e0f4Smacallan#ifdef SX_DEBUG
556fd6e0f4Smacallan#define DPRINTF xf86Msg
566fd6e0f4Smacallan#else
574261fa58Smacallan#define DPRINTF while (0) xf86Msg
584261fa58Smacallan#endif
594261fa58Smacallan
604261fa58Smacallan#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
614261fa58Smacallan
624261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */
634261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
644261fa58Smacallan		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
654261fa58Smacallan
664261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
674261fa58Smacallan		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
684261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
694261fa58Smacallan
70f71acd79Smacallanstatic void CG14Copy32(PixmapPtr, int, int, int, int, int, int);
71a61c6651Smacallanstatic void CG14Copy16(PixmapPtr, int, int, int, int, int, int);
72f71acd79Smacallanstatic void CG14Copy8(PixmapPtr, int, int, int, int, int, int);
73f71acd79Smacallan
744261fa58Smacallanstatic inline void
754261fa58SmacallanCG14Wait(Cg14Ptr p)
764261fa58Smacallan{
77fc473876Smacallan	int bail = 10000000;
78fc473876Smacallan	/* we wait for the busy bit to clear */
79fc473876Smacallan	while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) &&
80fc473876Smacallan	       (bail > 0)) {
81fc473876Smacallan		bail--;
82fc473876Smacallan	};
83fc473876Smacallan	if (bail == 0) {
84fc473876Smacallan		xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n",
85fc473876Smacallan		    read_sx_reg(p, SX_CONTROL_STATUS),
86fc473876Smacallan		    read_sx_reg(p, SX_ERROR));
87fc473876Smacallan	}
884261fa58Smacallan}
894261fa58Smacallan
904261fa58Smacallanstatic void
914261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker)
924261fa58Smacallan{
934261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
944261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
954261fa58Smacallan
964261fa58Smacallan	CG14Wait(p);
974261fa58Smacallan}
984261fa58Smacallan
994261fa58Smacallanstatic Bool
1004261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
1014261fa58Smacallan		int xdir, int ydir, int alu, Pixel planemask)
1024261fa58Smacallan{
1034261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1044261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1054261fa58Smacallan
1064261fa58Smacallan	ENTER;
107b0f02aefSmacallan	DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__,
10881c68cf8Smacallan	    pSrcPixmap->drawable.bitsPerPixel, alu);
1094261fa58Smacallan
1104261fa58Smacallan	if (planemask != p->last_mask) {
1114261fa58Smacallan		CG14Wait(p);
1124261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
1134261fa58Smacallan		p->last_mask = planemask;
1144261fa58Smacallan	}
1154261fa58Smacallan	alu = sx_rop[alu];
1164261fa58Smacallan	if (alu != p->last_rop) {
1174261fa58Smacallan		CG14Wait(p);
1184261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
1194261fa58Smacallan		p->last_rop = alu;
1204261fa58Smacallan	}
121f71acd79Smacallan	switch (pSrcPixmap->drawable.bitsPerPixel)  {
122f71acd79Smacallan		case 8:
123f71acd79Smacallan			p->pExa->Copy = CG14Copy8;
124f71acd79Smacallan			break;
125a61c6651Smacallan		case 16:
126a61c6651Smacallan			p->pExa->Copy = CG14Copy16;
127a61c6651Smacallan			break;
128f71acd79Smacallan		case 32:
129f71acd79Smacallan			p->pExa->Copy = CG14Copy32;
130f71acd79Smacallan			break;
131f71acd79Smacallan		default:
132b0f02aefSmacallan			DPRINTF(X_ERROR, "%s depth %d\n", __func__,
133f71acd79Smacallan			    pSrcPixmap->drawable.bitsPerPixel);
134f71acd79Smacallan	}
1354261fa58Smacallan	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
1364261fa58Smacallan	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
1374261fa58Smacallan	p->xdir = xdir;
1384261fa58Smacallan	p->ydir = ydir;
1394261fa58Smacallan	return TRUE;
1404261fa58Smacallan}
1414261fa58Smacallan
1424261fa58Smacallanstatic void
143f71acd79SmacallanCG14Copy32(PixmapPtr pDstPixmap,
1444261fa58Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
1454261fa58Smacallan{
1464261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1474261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1484261fa58Smacallan	int dstpitch, dstoff, srcpitch, srcoff;
1494261fa58Smacallan	int srcstart, dststart, xinc, srcinc, dstinc;
1504261fa58Smacallan	int line, count, s, d, num;
1514261fa58Smacallan
1524261fa58Smacallan	ENTER;
1534261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
1544261fa58Smacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
1554261fa58Smacallan	srcpitch = p->srcpitch;
1564261fa58Smacallan	srcoff = p->srcoff;
1574261fa58Smacallan	/*
1584261fa58Smacallan	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
1594261fa58Smacallan	 * actually wrote anything and only sync if it did
1604261fa58Smacallan	 */
1614261fa58Smacallan	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
1624261fa58Smacallan	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
1634261fa58Smacallan
1644261fa58Smacallan	/*
1654261fa58Smacallan	 * we always copy up to 32 pixels at a time so direction doesn't
1664261fa58Smacallan	 * matter if w<=32
1674261fa58Smacallan	 */
1684261fa58Smacallan	if (w > 32) {
1694261fa58Smacallan		if (p->xdir < 0) {
1704261fa58Smacallan			srcstart += (w - 32) << 2;
1714261fa58Smacallan			dststart += (w - 32) << 2;
1724261fa58Smacallan			xinc = -128;
1734261fa58Smacallan		} else
1744261fa58Smacallan			xinc = 128;
1754261fa58Smacallan	} else
1764261fa58Smacallan		xinc = 128;
1774261fa58Smacallan	if (p->ydir < 0) {
1784261fa58Smacallan		srcstart += (h - 1) * srcpitch;
1794261fa58Smacallan		dststart += (h - 1) * dstpitch;
1804261fa58Smacallan		srcinc = -srcpitch;
1814261fa58Smacallan		dstinc = -dstpitch;
1824261fa58Smacallan	} else {
1834261fa58Smacallan		srcinc = srcpitch;
1844261fa58Smacallan		dstinc = dstpitch;
1854261fa58Smacallan	}
1864261fa58Smacallan	if (p->last_rop == 0xcc) {
1874261fa58Smacallan		/* plain old copy */
1884261fa58Smacallan		if ( xinc > 0) {
1894261fa58Smacallan			/* going left to right */
1904261fa58Smacallan			for (line = 0; line < h; line++) {
1914261fa58Smacallan				count = 0;
1924261fa58Smacallan				s = srcstart;
1934261fa58Smacallan				d = dststart;
1944261fa58Smacallan				while ( count < w) {
1954261fa58Smacallan					num = min(32, w - count);
19672fd264fSmacallan					sxm(SX_LD, s, 10, num - 1);
19772fd264fSmacallan					sxm(SX_STM, d, 10, num - 1);
1984261fa58Smacallan					s += xinc;
1994261fa58Smacallan					d += xinc;
2004261fa58Smacallan					count += 32;
2014261fa58Smacallan				}
2024261fa58Smacallan				srcstart += srcinc;
2034261fa58Smacallan				dststart += dstinc;
2044261fa58Smacallan			}
2054261fa58Smacallan		} else {
2064261fa58Smacallan			/* going right to left */
2074261fa58Smacallan			int i, chunks = (w >> 5);
2084261fa58Smacallan			for (line = 0; line < h; line++) {
2094261fa58Smacallan				s = srcstart;
2104261fa58Smacallan				d = dststart;
2114261fa58Smacallan				count = w;
2124261fa58Smacallan				for (i = 0; i < chunks; i++) {
21372fd264fSmacallan					sxm(SX_LD, s, 10, 31);
21472fd264fSmacallan					sxm(SX_STM, d, 10, 31);
2154261fa58Smacallan					s -= 128;
2164261fa58Smacallan					d -= 128;
2174261fa58Smacallan					count -= 32;
2184261fa58Smacallan				}
2194261fa58Smacallan				/* leftovers, if any */
2204261fa58Smacallan				if (count > 0) {
2214261fa58Smacallan					s += (32 - count) << 2;
2224261fa58Smacallan					d += (32 - count) << 2;
22372fd264fSmacallan					sxm(SX_LD, s, 10, count - 1);
22472fd264fSmacallan					sxm(SX_STM, d, 10, count - 1);
2254261fa58Smacallan				}
2264261fa58Smacallan				srcstart += srcinc;
2274261fa58Smacallan				dststart += dstinc;
2284261fa58Smacallan			}
2294261fa58Smacallan		}
2304261fa58Smacallan	} else {
2314261fa58Smacallan		/* ROPs needed */
2324261fa58Smacallan		if ( xinc > 0) {
2334261fa58Smacallan			/* going left to right */
2344261fa58Smacallan			for (line = 0; line < h; line++) {
2354261fa58Smacallan				count = 0;
2364261fa58Smacallan				s = srcstart;
2374261fa58Smacallan				d = dststart;
2384261fa58Smacallan				while ( count < w) {
2394261fa58Smacallan					num = min(32, w - count);
24072fd264fSmacallan					sxm(SX_LD, s, 10, num - 1);
24172fd264fSmacallan					sxm(SX_LD, d, 42, num - 1);
2424261fa58Smacallan					if (num > 16) {
243230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, 15);
244230e26c7Smacallan						sxi(SX_ROP, 26, 58, 90, num - 17);
2454261fa58Smacallan					} else {
246230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, num - 1);
2474261fa58Smacallan					}
24872fd264fSmacallan					sxm(SX_STM, d, 74, num - 1);
2494261fa58Smacallan					s += xinc;
2504261fa58Smacallan					d += xinc;
2514261fa58Smacallan					count += 32;
2524261fa58Smacallan				}
2534261fa58Smacallan				srcstart += srcinc;
2544261fa58Smacallan				dststart += dstinc;
2554261fa58Smacallan			}
2564261fa58Smacallan		} else {
2574261fa58Smacallan			/* going right to left */
2584261fa58Smacallan			int i, chunks = (w >> 5);
2594261fa58Smacallan			for (line = 0; line < h; line++) {
2604261fa58Smacallan				s = srcstart;
2614261fa58Smacallan				d = dststart;
2624261fa58Smacallan				count = w;
2634261fa58Smacallan				for (i = 0; i < chunks; i++) {
26472fd264fSmacallan					sxm(SX_LD, s, 10, 31);
26572fd264fSmacallan					sxm(SX_LD, d, 42, 31);
266230e26c7Smacallan					sxi(SX_ROP, 10, 42, 74, 15);
267230e26c7Smacallan					sxi(SX_ROP, 26, 58, 90, 15);
26872fd264fSmacallan					sxm(SX_STM, d, 74, 31);
2694261fa58Smacallan					s -= 128;
2704261fa58Smacallan					d -= 128;
2714261fa58Smacallan					count -= 32;
2724261fa58Smacallan				}
2734261fa58Smacallan				/* leftovers, if any */
2744261fa58Smacallan				if (count > 0) {
2754261fa58Smacallan					s += (32 - count) << 2;
2764261fa58Smacallan					d += (32 - count) << 2;
27772fd264fSmacallan					sxm(SX_LD, s, 10, count - 1);
27872fd264fSmacallan					sxm(SX_LD, d, 42, count - 1);
2794261fa58Smacallan					if (count > 16) {
280230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, 15);
281230e26c7Smacallan						sxi(SX_ROP, 26, 58, 90, count - 17);
2824261fa58Smacallan					} else {
283230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, count - 1);
2844261fa58Smacallan					}
28572fd264fSmacallan					sxm(SX_STM, d, 74, count - 1);
2864261fa58Smacallan				}
2874261fa58Smacallan				srcstart += srcinc;
2884261fa58Smacallan				dststart += dstinc;
2894261fa58Smacallan			}
2904261fa58Smacallan		}
2914261fa58Smacallan	}
2924261fa58Smacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
2934261fa58Smacallan}
2944261fa58Smacallan
29581c68cf8Smacallan/*
29681c68cf8Smacallan * copy with same alignment, left to right, no ROP
29781c68cf8Smacallan */
29881c68cf8Smacallanstatic void
29972fd264fSmacallanCG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
30072fd264fSmacallan    int srcpitch, int dstpitch)
30181c68cf8Smacallan{
30281c68cf8Smacallan	int saddr, daddr, pre, cnt, wrds;
30381c68cf8Smacallan
30481c68cf8Smacallan	ENTER;
30581c68cf8Smacallan
30681c68cf8Smacallan	pre = srcstart & 3;
30781c68cf8Smacallan	if (pre != 0) pre = 4 - pre;
30881c68cf8Smacallan	pre = min(pre, w);
30981c68cf8Smacallan
31081c68cf8Smacallan	while (h > 0) {
31181c68cf8Smacallan		saddr = srcstart;
31281c68cf8Smacallan		daddr = dststart;
31381c68cf8Smacallan		cnt = w;
31481c68cf8Smacallan		if (pre > 0) {
31572fd264fSmacallan			sxm(SX_LDB, saddr, 8, pre - 1);
31672fd264fSmacallan			sxm(SX_STB, daddr, 8, pre - 1);
31781c68cf8Smacallan			saddr += pre;
31881c68cf8Smacallan			daddr += pre;
31981c68cf8Smacallan			cnt -= pre;
32081c68cf8Smacallan			if (cnt == 0) goto next;
32181c68cf8Smacallan		}
32281c68cf8Smacallan		while (cnt > 3) {
32381c68cf8Smacallan			wrds = min(32, cnt >> 2);
32472fd264fSmacallan			sxm(SX_LD, saddr, 8, wrds - 1);
32572fd264fSmacallan			sxm(SX_ST, daddr, 8, wrds - 1);
32681c68cf8Smacallan			saddr += wrds << 2;
32781c68cf8Smacallan			daddr += wrds << 2;
32881c68cf8Smacallan			cnt -= wrds << 2;
32981c68cf8Smacallan		}
33081c68cf8Smacallan		if (cnt > 0) {
33172fd264fSmacallan			sxm(SX_LDB, saddr, 8, cnt - 1);
33272fd264fSmacallan			sxm(SX_STB, daddr, 8, cnt - 1);
33381c68cf8Smacallan		}
33481c68cf8Smacallannext:
33581c68cf8Smacallan		srcstart += srcpitch;
33681c68cf8Smacallan		dststart += dstpitch;
33781c68cf8Smacallan		h--;
33881c68cf8Smacallan	}
33981c68cf8Smacallan}
34081c68cf8Smacallan
34181c68cf8Smacallan/*
34281c68cf8Smacallan * copy with same alignment, left to right, ROP
34381c68cf8Smacallan */
34481c68cf8Smacallanstatic void
34572fd264fSmacallanCG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
34672fd264fSmacallan    int srcpitch, int dstpitch)
34781c68cf8Smacallan{
34881c68cf8Smacallan	int saddr, daddr, pre, cnt, wrds;
34981c68cf8Smacallan
35081c68cf8Smacallan	ENTER;
35181c68cf8Smacallan
35281c68cf8Smacallan	pre = srcstart & 3;
35381c68cf8Smacallan	if (pre != 0) pre = 4 - pre;
35481c68cf8Smacallan	pre = min(pre, w);
35581c68cf8Smacallan
35681c68cf8Smacallan	while (h > 0) {
35781c68cf8Smacallan		saddr = srcstart;
35881c68cf8Smacallan		daddr = dststart;
35981c68cf8Smacallan		cnt = w;
36081c68cf8Smacallan		if (pre > 0) {
36172fd264fSmacallan			sxm(SX_LDB, saddr, 8, pre - 1);
36272fd264fSmacallan			sxm(SX_LDB, daddr, 40, pre - 1);
363230e26c7Smacallan			sxi(SX_ROP, 8, 40, 72, pre - 1);
36472fd264fSmacallan			sxm(SX_STB, daddr, 72, pre - 1);
36581c68cf8Smacallan			saddr += pre;
36681c68cf8Smacallan			daddr += pre;
36781c68cf8Smacallan			cnt -= pre;
36881c68cf8Smacallan			if (cnt == 0) goto next;
36981c68cf8Smacallan		}
37081c68cf8Smacallan		while (cnt > 3) {
37181c68cf8Smacallan			wrds = min(32, cnt >> 2);
37272fd264fSmacallan			sxm(SX_LD, saddr, 8, wrds - 1);
37372fd264fSmacallan			sxm(SX_LD, daddr, 40, wrds - 1);
37481c68cf8Smacallan			if (cnt > 16) {
375230e26c7Smacallan				sxi(SX_ROP, 8, 40, 72, 15);
376230e26c7Smacallan				sxi(SX_ROP, 8, 56, 88, wrds - 17);
37781c68cf8Smacallan			} else
378230e26c7Smacallan				sxi(SX_ROP, 8, 40, 72, wrds - 1);
37972fd264fSmacallan			sxm(SX_ST, daddr, 72, wrds - 1);
38081c68cf8Smacallan			saddr += wrds << 2;
38181c68cf8Smacallan			daddr += wrds << 2;
38281c68cf8Smacallan			cnt -= wrds << 2;
38381c68cf8Smacallan		}
38481c68cf8Smacallan		if (cnt > 0) {
38572fd264fSmacallan			sxm(SX_LDB, saddr, 8, cnt - 1);
38672fd264fSmacallan			sxm(SX_LDB, daddr, 40, cnt - 1);
387230e26c7Smacallan			sxi(SX_ROP, 8, 40, 72, cnt - 1);
38872fd264fSmacallan			sxm(SX_STB, daddr, 72, cnt - 1);
38981c68cf8Smacallan		}
39081c68cf8Smacallannext:
39181c68cf8Smacallan		srcstart += srcpitch;
39281c68cf8Smacallan		dststart += dstpitch;
39381c68cf8Smacallan		h--;
39481c68cf8Smacallan	}
39581c68cf8Smacallan}
39681c68cf8Smacallan
397f787bc61Smacallan/* up to 124 pixels so direction doesn't matter, unaligned, ROP */
398f787bc61Smacallanstatic void
399f787bc61SmacallanCG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
400f787bc61Smacallan{
401f787bc61Smacallan	int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post;
4029d7fb28bSmacallan	int ssreg;
403f787bc61Smacallan#ifdef DEBUG
404f787bc61Smacallan	int taddr = 4 + dstpitch * 50;
405f787bc61Smacallan#endif
406f787bc61Smacallan	uint32_t lmask, rmask;
407f787bc61Smacallan	ENTER;
408f787bc61Smacallan
409f787bc61Smacallan	pre = dststart & 3;
410f787bc61Smacallan	lmask = 0xffffffff >> pre;
411f787bc61Smacallan	spre = srcstart & 3;
412f787bc61Smacallan	/*
413f787bc61Smacallan	 * make sure we count all the words needed to cover the destination
414f787bc61Smacallan	 * line, covering potential partials on both ends
415f787bc61Smacallan	 */
416f787bc61Smacallan	wrds = (w + pre + 3) >> 2;
417f787bc61Smacallan	swrds = (w + spre + 3) >> 2;
418f787bc61Smacallan
419f787bc61Smacallan	if (spre < pre) {
420f787bc61Smacallan		dist = 32 - (pre - spre) * 8;
421f787bc61Smacallan		sreg = 9;
422f787bc61Smacallan	} else {
423f787bc61Smacallan		dist = (spre - pre) * 8;
424f787bc61Smacallan		sreg = 8;
425f787bc61Smacallan	}
426f787bc61Smacallan
427f787bc61Smacallan	/*
428f787bc61Smacallan	 * mask out trailing pixels to avoid partial writes
429f787bc61Smacallan	 */
430f787bc61Smacallan	post = (dststart + w) & 3;
43176a85281Smacallan	if (post != 0) {
43276a85281Smacallan		rmask = ~(0xffffffff >> (post * 8));
43376a85281Smacallan		write_sx_reg(p, SX_QUEUED(7), rmask);
43476a85281Smacallan		write_sx_reg(p, SX_QUEUED(6), ~rmask);
43576a85281Smacallan	}
43676a85281Smacallan
437f787bc61Smacallan	DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__,
438f787bc61Smacallan	    w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask);
439f787bc61Smacallan
440f787bc61Smacallan	/* mask out the leading pixels in dst by using a mask and ROP */
44176a85281Smacallan	if (pre != 0) {
442c1537409Smacallan		CG14Wait(p);
44376a85281Smacallan		write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa);
44476a85281Smacallan		write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
44576a85281Smacallan	}
446f787bc61Smacallan
447f787bc61Smacallan	saddr = srcstart & ~3;
448f787bc61Smacallan	daddr = dststart & ~3;
44976a85281Smacallan
450f787bc61Smacallan	while (h > 0) {
45172fd264fSmacallan		sxm(SX_LD, daddr, 80, wrds - 1);
45272fd264fSmacallan		sxm(SX_LD, saddr, sreg, swrds - 1);
453f787bc61Smacallan		if (wrds > 15) {
4549d7fb28bSmacallan			if (dist != 0) {
455230e26c7Smacallan				sxi(SX_FUNNEL_I, 8, dist, 40, 15);
456230e26c7Smacallan				sxi(SX_FUNNEL_I, 24, dist, 56, wrds - 16);
4579d7fb28bSmacallan				/* shifted source pixels are now at register 40+ */
4589d7fb28bSmacallan				ssreg = 40;
4599d7fb28bSmacallan			} else ssreg = 8;
460f787bc61Smacallan			if (pre != 0) {
461f787bc61Smacallan				/* mask out leading junk */
462f787bc61Smacallan				write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
463230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, 8, 0);
464f787bc61Smacallan				write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
465230e26c7Smacallan				sxi(SX_ROPB, ssreg + 1, 81, 9, 14);
466f787bc61Smacallan			} else {
467230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, 8, 15);
468f787bc61Smacallan			}
469230e26c7Smacallan			sxi(SX_ROPB, ssreg + 16, 96, 24, wrds - 16);
470f787bc61Smacallan		} else {
4719d7fb28bSmacallan			if (dist != 0) {
472230e26c7Smacallan				sxi(SX_FUNNEL_I, 8, dist, 40, wrds);
4739d7fb28bSmacallan				ssreg = 40;
4749d7fb28bSmacallan			} else ssreg = 8;
475f787bc61Smacallan			if (pre != 0) {
476f787bc61Smacallan				/* mask out leading junk */
477f787bc61Smacallan				write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
478230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, 8, 0);
479f787bc61Smacallan				write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
480230e26c7Smacallan				sxi(SX_ROPB, ssreg + 1, 81, 9, wrds);
481f787bc61Smacallan			} else {
482230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, 8, wrds);
483f787bc61Smacallan			}
484f787bc61Smacallan		}
485f787bc61Smacallan		if (post != 0) {
486f787bc61Smacallan			/*
487f787bc61Smacallan			 * if the last word to be written out is a partial we
488f787bc61Smacallan			 * mask out the leftovers and replace them with
489f787bc61Smacallan			 * background pixels
490f787bc61Smacallan			 * we could pull the same ROP * mask trick as we do on
491f787bc61Smacallan			 * the left end but it's less annoying this way and
492f787bc61Smacallan			 * the instruction count is the same
493f787bc61Smacallan			 */
494230e26c7Smacallan			sxi(SX_ANDS, 7 + wrds, 7, 5, 0);
495230e26c7Smacallan			sxi(SX_ANDS, 79 + wrds, 6, 4, 0);
496230e26c7Smacallan			sxi(SX_ORS, 5, 4, 7 + wrds, 0);
497f787bc61Smacallan		}
498f787bc61Smacallan#ifdef DEBUG
49972fd264fSmacallan		sxm(SX_ST, taddr, 40, wrds - 1);
500f787bc61Smacallan		taddr += dstpitch;
501f787bc61Smacallan#endif
50272fd264fSmacallan		sxm(SX_ST, daddr, 8, wrds - 1);
503f787bc61Smacallan		saddr += srcpitch;
504f787bc61Smacallan		daddr += dstpitch;
505f787bc61Smacallan		h--;
506f787bc61Smacallan	}
507f787bc61Smacallan}
508f787bc61Smacallan
50976a85281Smacallan/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */
51076a85281Smacallanstatic void
51172fd264fSmacallanCG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
51272fd264fSmacallan    int srcpitch, int dstpitch)
51376a85281Smacallan{
51476a85281Smacallan	int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post;
51576a85281Smacallan	int ssreg;
51676a85281Smacallan#ifdef DEBUG
51776a85281Smacallan	int taddr = 4 + dstpitch * 50;
51876a85281Smacallan#endif
51976a85281Smacallan	uint32_t lmask, rmask;
52076a85281Smacallan	ENTER;
52176a85281Smacallan
52276a85281Smacallan	pre = dststart & 3;
52376a85281Smacallan	lmask = 0xffffffff >> pre;
52476a85281Smacallan	spre = srcstart & 3;
52576a85281Smacallan	/*
52676a85281Smacallan	 * make sure we count all the words needed to cover the destination
52776a85281Smacallan	 * line, covering potential partials on both ends
52876a85281Smacallan	 */
52976a85281Smacallan	wrds = (w + pre + 3) >> 2;
53076a85281Smacallan	swrds = (w + spre + 3) >> 2;
53176a85281Smacallan
53276a85281Smacallan	if (spre < pre) {
53376a85281Smacallan		dist = 32 - (pre - spre) * 8;
53476a85281Smacallan		sreg = 9;
53576a85281Smacallan	} else {
53676a85281Smacallan		dist = (spre - pre) * 8;
53776a85281Smacallan		sreg = 8;
53876a85281Smacallan	}
53976a85281Smacallan
54076a85281Smacallan	/*
54176a85281Smacallan	 * mask out trailing pixels to avoid partial writes
54276a85281Smacallan	 */
54376a85281Smacallan	post = (dststart + w) & 3;
54476a85281Smacallan	if (post != 0) {
54576a85281Smacallan		rmask = ~(0xffffffff >> (post * 8));
54676a85281Smacallan		write_sx_reg(p, SX_QUEUED(7), rmask);
54776a85281Smacallan		write_sx_reg(p, SX_QUEUED(6), ~rmask);
54876a85281Smacallan	}
54976a85281Smacallan
55076a85281Smacallan	DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__,
55176a85281Smacallan	    w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask);
55276a85281Smacallan
55376a85281Smacallan	/* mask out the leading pixels in dst by using a mask and ROP */
55476a85281Smacallan	if (pre != 0) {
555c1537409Smacallan		CG14Wait(p);
55676a85281Smacallan		write_sx_reg(p, SX_ROP_CONTROL, 0xca);
55776a85281Smacallan		write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
55876a85281Smacallan	}
55976a85281Smacallan
56076a85281Smacallan	saddr = srcstart & ~3;
56176a85281Smacallan	daddr = dststart & ~3;
56276a85281Smacallan
56376a85281Smacallan	while (h > 0) {
56472fd264fSmacallan		sxm(SX_LD, saddr, sreg, swrds - 1);
56576a85281Smacallan		if (wrds > 15) {
56676a85281Smacallan			if (dist != 0) {
567230e26c7Smacallan				sxi(SX_FUNNEL_I, 8, dist, 40, 15);
568230e26c7Smacallan				sxi(SX_FUNNEL_I, 24, dist, 56, wrds - 16);
56972fd264fSmacallan				/* shifted source pixels are now at reg 40+ */
57076a85281Smacallan				ssreg = 40;
57176a85281Smacallan			} else ssreg = 8;
57276a85281Smacallan			if (pre != 0) {
57376a85281Smacallan				/* read only the first word */
57472fd264fSmacallan				sxm(SX_LD, daddr, 80, 0);
57576a85281Smacallan				/* mask out leading junk */
576230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, ssreg, 0);
57776a85281Smacallan			}
57876a85281Smacallan		} else {
57976a85281Smacallan			if (dist != 0) {
580230e26c7Smacallan				sxi(SX_FUNNEL_I, 8, dist, 40, wrds);
58176a85281Smacallan				ssreg = 40;
58276a85281Smacallan			} else ssreg = 8;
58376a85281Smacallan			if (pre != 0) {
58476a85281Smacallan				/* read only the first word */
58572fd264fSmacallan				sxm(SX_LD, daddr, 80, 0);
58676a85281Smacallan				/* mask out leading junk */
587230e26c7Smacallan				sxi(SX_ROPB, ssreg, 80, ssreg, 0);
58876a85281Smacallan			}
58976a85281Smacallan		}
59076a85281Smacallan		if (post != 0) {
59176a85281Smacallan			int laddr = daddr + ((wrds - 1) << 2);
59276a85281Smacallan			/*
59376a85281Smacallan			 * if the last word to be written out is a partial we
59476a85281Smacallan			 * mask out the leftovers and replace them with
59576a85281Smacallan			 * background pixels
59676a85281Smacallan			 * we could pull the same ROP * mask trick as we do on
59776a85281Smacallan			 * the left end but it's less annoying this way and
59876a85281Smacallan			 * the instruction count is the same
59976a85281Smacallan			 */
60072fd264fSmacallan			sxm(SX_LD, laddr, 81, 0);
601230e26c7Smacallan			sxi(SX_ANDS, ssreg + wrds - 1, 7, 5, 0);
602230e26c7Smacallan			sxi(SX_ANDS, 81, 6, 4, 0);
603230e26c7Smacallan			sxi(SX_ORS, 5, 4, ssreg + wrds - 1, 0);
60476a85281Smacallan		}
60576a85281Smacallan#ifdef DEBUG
60672fd264fSmacallan		sxm(SX_ST, taddr, 40, wrds - 1);
60776a85281Smacallan		taddr += dstpitch;
60876a85281Smacallan#endif
60972fd264fSmacallan		sxm(SX_ST, daddr, ssreg, wrds - 1);
61076a85281Smacallan		saddr += srcpitch;
61176a85281Smacallan		daddr += dstpitch;
61276a85281Smacallan		h--;
61376a85281Smacallan	}
61476a85281Smacallan}
61576a85281Smacallan
616a61c6651Smacallanstatic void
617a61c6651SmacallanCG14Copy16(PixmapPtr pDstPixmap,
618a61c6651Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
619a61c6651Smacallan{
620a61c6651Smacallan	CG14Copy8(pDstPixmap, srcX << 1, srcY, dstX << 1, dstY, w << 1, h);
621a61c6651Smacallan}
622a61c6651Smacallan
623f71acd79Smacallanstatic void
624f71acd79SmacallanCG14Copy8(PixmapPtr pDstPixmap,
625f71acd79Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
626f71acd79Smacallan{
627f71acd79Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
628f71acd79Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
629f71acd79Smacallan	int dstpitch, dstoff, srcpitch, srcoff;
630f71acd79Smacallan	int srcstart, dststart, xinc, srcinc, dstinc;
631f71acd79Smacallan	int line, count, s, d, num;
632f71acd79Smacallan
633f71acd79Smacallan	ENTER;
634f71acd79Smacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
635f71acd79Smacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
636f71acd79Smacallan	srcpitch = p->srcpitch;
637f71acd79Smacallan	srcoff = p->srcoff;
638f71acd79Smacallan	/*
639f71acd79Smacallan	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
640f71acd79Smacallan	 * actually wrote anything and only sync if it did
641f71acd79Smacallan	 */
642f71acd79Smacallan	srcstart = srcX + (srcpitch * srcY) + srcoff;
643f71acd79Smacallan	dststart = dstX + (dstpitch * dstY) + dstoff;
644f71acd79Smacallan
645f71acd79Smacallan	if (p->ydir < 0) {
646f71acd79Smacallan		srcstart += (h - 1) * srcpitch;
647f71acd79Smacallan		dststart += (h - 1) * dstpitch;
648f71acd79Smacallan		srcinc = -srcpitch;
649f71acd79Smacallan		dstinc = -dstpitch;
650f71acd79Smacallan	} else {
651f71acd79Smacallan		srcinc = srcpitch;
652f71acd79Smacallan		dstinc = dstpitch;
653f71acd79Smacallan	}
654f787bc61Smacallan
655f787bc61Smacallan	/*
656f787bc61Smacallan	 * this copies up to 124 pixels wide in one go, so horizontal
657f787bc61Smacallan	 * direction / overlap don't matter
658f787bc61Smacallan	 * uses all 32bit accesses and funnel shifter for unaligned copies
659f787bc61Smacallan	 */
660f787bc61Smacallan	if ((w < 125) && (w > 8)) {
66176a85281Smacallan		switch (p->last_rop) {
66276a85281Smacallan			case 0xcc:
66372fd264fSmacallan				CG14Copy8_short_norop(p,
66472fd264fSmacallan				    srcstart, dststart, w, h, srcinc, dstinc);
66576a85281Smacallan				break;
66676a85281Smacallan			default:
66772fd264fSmacallan				CG14Copy8_short_rop(p,
66872fd264fSmacallan				    srcstart, dststart, w, h, srcinc, dstinc);
66976a85281Smacallan		}
670f787bc61Smacallan		return;
671f787bc61Smacallan	}
672f787bc61Smacallan
673f787bc61Smacallan	/*
674f787bc61Smacallan	 * only invert x direction if absolutely necessary, it's a pain to
675f787bc61Smacallan	 * go backwards on SX so avoid as much as possible
676f787bc61Smacallan	 */
677f787bc61Smacallan	if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) {
678f787bc61Smacallan		xinc = -32;
679f787bc61Smacallan	} else
680f787bc61Smacallan		xinc = 32;
681f787bc61Smacallan
682f787bc61Smacallan	/*
683f787bc61Smacallan	 * for aligned copies we can go all 32bit and avoid VRAM reads in the
684f787bc61Smacallan	 * most common case
685f787bc61Smacallan	 */
68681c68cf8Smacallan	if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) {
68781c68cf8Smacallan		switch (p->last_rop) {
68881c68cf8Smacallan			case 0xcc:
68972fd264fSmacallan				CG14Copy8_aligned_norop(p,
69072fd264fSmacallan				    srcstart, dststart, w, h, srcinc, dstinc);
69181c68cf8Smacallan				break;
69281c68cf8Smacallan			default:
69372fd264fSmacallan				CG14Copy8_aligned_rop(p,
69472fd264fSmacallan				    srcstart, dststart, w, h, srcinc, dstinc);
69581c68cf8Smacallan		}
69681c68cf8Smacallan		return;
69781c68cf8Smacallan	}
698f787bc61Smacallan
69986527ef6Smacallan	/*
70086527ef6Smacallan	 * if we make it here we either have something large and unaligned,
70186527ef6Smacallan	 * something we need to do right to left, or something tiny.
70286527ef6Smacallan	 * we handle the non-tiny cases by breaking them down into chunks that
70386527ef6Smacallan	 * Copy8_short_*() can handle, making sure the destinations are 32bit
70486527ef6Smacallan	 * aligned whenever possible
70586527ef6Smacallan	 * since we copy by block, not by line we need to go backwards even if
70686527ef6Smacallan	 * we don't copy within the same line
70786527ef6Smacallan	 */
70886527ef6Smacallan	if (w > 8) {
70986527ef6Smacallan		int next, wi, end = dststart + w;
71072fd264fSmacallan		DPRINTF(X_ERROR, "%s %08x %08x %d\n",
71172fd264fSmacallan		    __func__, srcstart, dststart, w);
71286527ef6Smacallan		if ((p->xdir < 0) && (srcoff == dstoff)) {
71386527ef6Smacallan			srcstart += w;
71486527ef6Smacallan			next = max((end - 120) & ~3, dststart);
71586527ef6Smacallan			wi = end - next;
71686527ef6Smacallan			srcstart -= wi;
71786527ef6Smacallan			while (wi > 0) {
71872fd264fSmacallan				DPRINTF(X_ERROR, "%s RL %08x %08x %d\n",
71972fd264fSmacallan				    __func__, srcstart, next, wi);
72086527ef6Smacallan				if (p->last_rop == 0xcc) {
72172fd264fSmacallan					CG14Copy8_short_norop(p, srcstart,
72272fd264fSmacallan					    next, wi, h, srcinc, dstinc);
72386527ef6Smacallan				} else
72472fd264fSmacallan					CG14Copy8_short_rop(p, srcstart,
72572fd264fSmacallan					    next, wi, h, srcinc, dstinc);
72686527ef6Smacallan				end = next;
72786527ef6Smacallan				/*
72886527ef6Smacallan				 * avoid extremely narrow copies so I don't
72986527ef6Smacallan				 * have to deal with dangling start and end
73086527ef6Smacallan				 * pixels in the same word
73186527ef6Smacallan				 */
73286527ef6Smacallan				if ((end - dststart) < 140) {
73386527ef6Smacallan					next = max((end - 80) & ~3, dststart);
73486527ef6Smacallan				} else {
73586527ef6Smacallan					next = max((end - 120) & ~3, dststart);
73686527ef6Smacallan				}
73786527ef6Smacallan				wi = end - next;
73886527ef6Smacallan				srcstart -= wi;
73986527ef6Smacallan			}
74086527ef6Smacallan		} else {
74186527ef6Smacallan			next = min(end, (dststart + 124) & ~3);
74286527ef6Smacallan			wi = next - dststart;
74386527ef6Smacallan			while (wi > 0) {
74472fd264fSmacallan				DPRINTF(X_ERROR, "%s LR %08x %08x %d\n",
74572fd264fSmacallan				    __func__, srcstart, next, wi);
74686527ef6Smacallan				if (p->last_rop == 0xcc) {
74772fd264fSmacallan					CG14Copy8_short_norop(p,
74872fd264fSmacallan					    srcstart, dststart, wi, h,
74972fd264fSmacallan					    srcinc, dstinc);
75086527ef6Smacallan				} else
75172fd264fSmacallan					CG14Copy8_short_rop(p,
75272fd264fSmacallan					    srcstart, dststart, wi, h,
75372fd264fSmacallan					    srcinc, dstinc);
75486527ef6Smacallan				srcstart += wi;
75586527ef6Smacallan				dststart = next;
75686527ef6Smacallan				if ((end - dststart) < 140) {
75786527ef6Smacallan					next = min(end, (dststart + 84) & ~3);
75886527ef6Smacallan				} else {
75986527ef6Smacallan					next = min(end, (dststart + 124) & ~3);
76086527ef6Smacallan				}
76186527ef6Smacallan				wi = next - dststart;
76286527ef6Smacallan			}
76386527ef6Smacallan		}
76486527ef6Smacallan		return;
76586527ef6Smacallan	}
76686527ef6Smacallan	if (xinc < 0) {
76786527ef6Smacallan		srcstart += (w - 32);
76886527ef6Smacallan		dststart += (w - 32);
76986527ef6Smacallan	}
77086527ef6Smacallan
77186527ef6Smacallan	DPRINTF(X_ERROR, "%s fallback to byte-wise %d %d\n", __func__, w, h);
772f71acd79Smacallan	if (p->last_rop == 0xcc) {
773f71acd79Smacallan		/* plain old copy */
774f71acd79Smacallan		if ( xinc > 0) {
775f71acd79Smacallan			/* going left to right */
776f71acd79Smacallan			for (line = 0; line < h; line++) {
777f71acd79Smacallan				count = 0;
778f71acd79Smacallan				s = srcstart;
779f71acd79Smacallan				d = dststart;
780f71acd79Smacallan				while ( count < w) {
781f71acd79Smacallan					num = min(32, w - count);
78272fd264fSmacallan					sxm(SX_LDB, s, 10, num - 1);
78372fd264fSmacallan					sxm(SX_STBM, d, 10, num - 1);
784f71acd79Smacallan					s += xinc;
785f71acd79Smacallan					d += xinc;
786f71acd79Smacallan					count += 32;
787f71acd79Smacallan				}
788f71acd79Smacallan				srcstart += srcinc;
789f71acd79Smacallan				dststart += dstinc;
790f71acd79Smacallan			}
791f71acd79Smacallan		} else {
792f71acd79Smacallan			/* going right to left */
793f71acd79Smacallan			int i, chunks = (w >> 5);
794f71acd79Smacallan			for (line = 0; line < h; line++) {
795f71acd79Smacallan				s = srcstart;
796f71acd79Smacallan				d = dststart;
797f71acd79Smacallan				count = w;
798f71acd79Smacallan				for (i = 0; i < chunks; i++) {
79972fd264fSmacallan					sxm(SX_LDB, s, 10, 31);
80072fd264fSmacallan					sxm(SX_STBM, d, 10, 31);
801f71acd79Smacallan					s -= 32;
802f71acd79Smacallan					d -= 32;
803f71acd79Smacallan					count -= 32;
804f71acd79Smacallan				}
805f71acd79Smacallan				/* leftovers, if any */
806f71acd79Smacallan				if (count > 0) {
807f71acd79Smacallan					s += (32 - count);
808f71acd79Smacallan					d += (32 - count);
80972fd264fSmacallan					sxm(SX_LDB, s, 10, count - 1);
81072fd264fSmacallan					sxm(SX_STBM, d, 10, count - 1);
811f71acd79Smacallan				}
812f71acd79Smacallan				srcstart += srcinc;
813f71acd79Smacallan				dststart += dstinc;
814f71acd79Smacallan			}
815f71acd79Smacallan		}
816f71acd79Smacallan	} else {
817f71acd79Smacallan		/* ROPs needed */
818f71acd79Smacallan		if ( xinc > 0) {
819f71acd79Smacallan			/* going left to right */
820f71acd79Smacallan			for (line = 0; line < h; line++) {
821f71acd79Smacallan				count = 0;
822f71acd79Smacallan				s = srcstart;
823f71acd79Smacallan				d = dststart;
824f71acd79Smacallan				while ( count < w) {
825f71acd79Smacallan					num = min(32, w - count);
82672fd264fSmacallan					sxm(SX_LDB, s, 10, num - 1);
82772fd264fSmacallan					sxm(SX_LDB, d, 42, num - 1);
828f71acd79Smacallan					if (num > 16) {
829230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, 15);
830230e26c7Smacallan						sxi(SX_ROP, 26, 58, 90, num - 17);
831f71acd79Smacallan					} else {
832230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, num - 1);
833f71acd79Smacallan					}
83472fd264fSmacallan					sxm(SX_STBM, d, 74, num - 1);
835f71acd79Smacallan					s += xinc;
836f71acd79Smacallan					d += xinc;
837f71acd79Smacallan					count += 32;
838f71acd79Smacallan				}
839f71acd79Smacallan				srcstart += srcinc;
840f71acd79Smacallan				dststart += dstinc;
841f71acd79Smacallan			}
842f71acd79Smacallan		} else {
843f71acd79Smacallan			/* going right to left */
844f71acd79Smacallan			int i, chunks = (w >> 5);
845f71acd79Smacallan			for (line = 0; line < h; line++) {
846f71acd79Smacallan				s = srcstart;
847f71acd79Smacallan				d = dststart;
848f71acd79Smacallan				count = w;
849f71acd79Smacallan				for (i = 0; i < chunks; i++) {
85072fd264fSmacallan					sxm(SX_LDB, s, 10, 31);
85172fd264fSmacallan					sxm(SX_LDB, d, 42, 31);
852230e26c7Smacallan					sxi(SX_ROP, 10, 42, 74, 15);
853230e26c7Smacallan					sxi(SX_ROP, 26, 58, 90, 15);
85472fd264fSmacallan					sxm(SX_STBM, d, 74, 31);
855f71acd79Smacallan					s -= 128;
856f71acd79Smacallan					d -= 128;
857f71acd79Smacallan					count -= 32;
858f71acd79Smacallan				}
859f71acd79Smacallan				/* leftovers, if any */
860f71acd79Smacallan				if (count > 0) {
861f71acd79Smacallan					s += (32 - count);
862f71acd79Smacallan					d += (32 - count);
86372fd264fSmacallan					sxm(SX_LDB, s, 10, count - 1);
86472fd264fSmacallan					sxm(SX_LDB, d, 42, count - 1);
865f71acd79Smacallan					if (count > 16) {
866230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, 15);
867230e26c7Smacallan						sxi(SX_ROP, 26, 58, 90, count - 17);
868f71acd79Smacallan					} else {
869230e26c7Smacallan						sxi(SX_ROP, 10, 42, 74, count - 1);
870f71acd79Smacallan					}
87172fd264fSmacallan					sxm(SX_STBM, d, 74, count - 1);
872f71acd79Smacallan				}
873f71acd79Smacallan				srcstart += srcinc;
874f71acd79Smacallan				dststart += dstinc;
875f71acd79Smacallan			}
876f71acd79Smacallan		}
877f71acd79Smacallan	}
878f71acd79Smacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
879f71acd79Smacallan}
880f71acd79Smacallan
8814261fa58Smacallanstatic void
8824261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap)
8834261fa58Smacallan{
8844261fa58Smacallan}
8854261fa58Smacallan
8864261fa58Smacallanstatic Bool
8874261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
8884261fa58Smacallan{
8894261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
8904261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
8914261fa58Smacallan
8924261fa58Smacallan	ENTER;
893faf11d72Schristos	DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n",
894b8ad197aSmacallan	    pPixmap->drawable.bitsPerPixel, fg);
895b8ad197aSmacallan
896dbf8597cSmacallan	/*
897dbf8597cSmacallan	 * GXset and GXclear are really just specual cases of GXcopy with
898dbf8597cSmacallan	 * fixed fill colour
899dbf8597cSmacallan	 */
900dbf8597cSmacallan	switch (alu) {
901dbf8597cSmacallan		case GXclear:
902dbf8597cSmacallan			alu = GXcopy;
903dbf8597cSmacallan			fg = 0;
904dbf8597cSmacallan			break;
905dbf8597cSmacallan		case GXset:
906dbf8597cSmacallan			alu = GXcopy;
907dbf8597cSmacallan			fg = 0xffffffff;
908dbf8597cSmacallan			break;
909dbf8597cSmacallan	}
910a61c6651Smacallan	/* repeat the colour in every sub byte if we're in 8 or 16 bit */
911b8ad197aSmacallan	if (pPixmap->drawable.bitsPerPixel == 8) {
912b8ad197aSmacallan		fg |= fg << 8;
913b8ad197aSmacallan		fg |= fg << 16;
914a61c6651Smacallan	} else if (pPixmap->drawable.bitsPerPixel == 16) {
915a61c6651Smacallan		fg |= fg << 16;
916b8ad197aSmacallan	}
9174261fa58Smacallan	write_sx_reg(p, SX_QUEUED(8), fg);
9184261fa58Smacallan	write_sx_reg(p, SX_QUEUED(9), fg);
9194261fa58Smacallan	if (planemask != p->last_mask) {
9204261fa58Smacallan		CG14Wait(p);
9214261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
9224261fa58Smacallan		p->last_mask = planemask;
9234261fa58Smacallan	}
9244261fa58Smacallan	alu = sx_rop[alu];
9254261fa58Smacallan	if (alu != p->last_rop) {
9264261fa58Smacallan		CG14Wait(p);
9274261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
9284261fa58Smacallan		p->last_rop = alu;
9294261fa58Smacallan	}
930dbf8597cSmacallan
9314261fa58Smacallan	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
9324261fa58Smacallan	return TRUE;
9334261fa58Smacallan}
9344261fa58Smacallan
9354261fa58Smacallanstatic void
9364261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
9374261fa58Smacallan{
9384261fa58Smacallan	int line, x, num;
9394261fa58Smacallan	uint32_t ptr;
9404261fa58Smacallan
9414261fa58Smacallan	ENTER;
9424261fa58Smacallan	if (p->last_rop == 0xcc) {
9434261fa58Smacallan		/* simple fill */
9444261fa58Smacallan		for (line = 0; line < h; line++) {
9454261fa58Smacallan			x = 0;
9464261fa58Smacallan			while (x < w) {
9474261fa58Smacallan				ptr = start + (x << 2);
9484261fa58Smacallan				num = min(32, w - x);
94972fd264fSmacallan				sxm(SX_STS, ptr, 8, num - 1);
9504261fa58Smacallan				x += 32;
9514261fa58Smacallan			}
9524261fa58Smacallan			start += pitch;
9534261fa58Smacallan		}
9544261fa58Smacallan	} else if (p->last_rop == 0xaa) {
9554261fa58Smacallan		/* nothing to do here */
9564261fa58Smacallan		return;
9574261fa58Smacallan	} else {
9584261fa58Smacallan		/* alright, let's do actual ROP stuff */
9594261fa58Smacallan
9604261fa58Smacallan		/* first repeat the fill colour into 16 registers */
961230e26c7Smacallan		sxi(SX_SELECT_S, 8, 8, 10, 15);
9624261fa58Smacallan
9634261fa58Smacallan		for (line = 0; line < h; line++) {
9644261fa58Smacallan			x = 0;
9654261fa58Smacallan			while (x < w) {
9664261fa58Smacallan				ptr = start + (x << 2);
9674261fa58Smacallan				num = min(32, w - x);
9684261fa58Smacallan				/* now suck fb data into registers */
96972fd264fSmacallan				sxm(SX_LD, ptr, 42, num - 1);
9704261fa58Smacallan				/*
9714261fa58Smacallan				 * ROP them with the fill data we left in 10
9724261fa58Smacallan				 * non-memory ops can only have counts up to 16
9734261fa58Smacallan				 */
9744261fa58Smacallan				if (num <= 16) {
975230e26c7Smacallan					sxi(SX_ROP, 10, 42, 74, num - 1);
9764261fa58Smacallan				} else {
977230e26c7Smacallan					sxi(SX_ROP, 10, 42, 74, 15);
978230e26c7Smacallan					sxi(SX_ROP, 10, 58, 90, num - 17);
9794261fa58Smacallan				}
9804261fa58Smacallan				/* and write the result back into memory */
98172fd264fSmacallan				sxm(SX_ST, ptr, 74, num - 1);
9824261fa58Smacallan				x += 32;
9834261fa58Smacallan			}
9844261fa58Smacallan			start += pitch;
9854261fa58Smacallan		}
9864261fa58Smacallan	}
9874261fa58Smacallan}
9884261fa58Smacallan
989a61c6651Smacallanstatic void
990a61c6651SmacallanCG14Solid16(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
991a61c6651Smacallan{
992a61c6651Smacallan	int line, num, pre, cnt;
993a61c6651Smacallan	uint32_t ptr;
994a61c6651Smacallan
995a61c6651Smacallan	ENTER;
996a61c6651Smacallan	pre = start & 2;
997a61c6651Smacallan	if (pre != 0) pre = 1;
998a61c6651Smacallan
999a61c6651Smacallan	if (p->last_rop == 0xcc) {
1000a61c6651Smacallan		/* simple fill */
1001a61c6651Smacallan		for (line = 0; line < h; line++) {
1002a61c6651Smacallan			ptr = start;
1003a61c6651Smacallan			cnt = w;
1004a61c6651Smacallan			if (pre) {
1005a61c6651Smacallan				sxm(SX_STW, ptr, 8, 0);
1006a61c6651Smacallan				ptr += 2;
1007a61c6651Smacallan				cnt -= 1;
1008a61c6651Smacallan				if (cnt == 0) goto next;
1009a61c6651Smacallan			}
1010a61c6651Smacallan			/* now do the aligned pixels in 32bit chunks */
1011a61c6651Smacallan			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
1012a61c6651Smacallan			while(cnt > 1) {
1013a61c6651Smacallan				num = min(32, cnt >> 1);
1014a61c6651Smacallan				sxm(SX_STS, ptr, 8, num - 1);
1015a61c6651Smacallan				ptr += num << 2;
1016a61c6651Smacallan				cnt -= num << 1;
1017a61c6651Smacallan			}
1018a61c6651Smacallan			if (cnt > 1) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
1019a61c6651Smacallan			if (cnt > 0) {
1020a61c6651Smacallan				sxm(SX_STW, ptr, 8, 0);
1021a61c6651Smacallan			}
1022a61c6651Smacallannext:
1023a61c6651Smacallan			start += pitch;
1024a61c6651Smacallan		}
1025a61c6651Smacallan	} else if (p->last_rop == 0xaa) {
1026a61c6651Smacallan		/* nothing to do here */
1027a61c6651Smacallan		return;
1028a61c6651Smacallan	} else {
1029a61c6651Smacallan		/* alright, let's do actual ROP stuff */
1030a61c6651Smacallan
1031a61c6651Smacallan		/* first repeat the fill colour into 16 registers */
1032a61c6651Smacallan		sxi(SX_SELECT_S, 8, 8, 10, 15);
1033a61c6651Smacallan
1034a61c6651Smacallan		for (line = 0; line < h; line++) {
1035a61c6651Smacallan			ptr = start;
1036a61c6651Smacallan			cnt = w;
1037a61c6651Smacallan			pre = min(pre, cnt);
1038a61c6651Smacallan			if (pre) {
1039a61c6651Smacallan				sxm(SX_LDW, ptr, 26, 0);
1040a61c6651Smacallan				sxi(SX_ROP, 10, 26, 42, 0);
1041a61c6651Smacallan				sxm(SX_STW, ptr, 42, 0);
1042a61c6651Smacallan				ptr += 2;
1043a61c6651Smacallan				cnt -= 1;
1044a61c6651Smacallan				if (cnt == 0) goto next2;
1045a61c6651Smacallan			}
1046a61c6651Smacallan			/* now do the aligned pixels in 32bit chunks */
1047a61c6651Smacallan			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
1048a61c6651Smacallan			while(cnt > 1) {
1049a61c6651Smacallan				num = min(32, cnt >> 1);
1050a61c6651Smacallan				sxm(SX_LD, ptr, 26, num - 1);
1051a61c6651Smacallan				if (num <= 16) {
1052a61c6651Smacallan					sxi(SX_ROP, 10, 26, 58, num - 1);
1053a61c6651Smacallan				} else {
1054a61c6651Smacallan					sxi(SX_ROP, 10, 26, 58, 15);
1055a61c6651Smacallan					sxi(SX_ROP, 10, 42, 74, num - 17);
1056a61c6651Smacallan				}
1057a61c6651Smacallan				sxm(SX_ST, ptr, 58, num - 1);
1058a61c6651Smacallan				ptr += num << 2;
1059a61c6651Smacallan				cnt -= num << 1;
1060a61c6651Smacallan			}
1061a61c6651Smacallan			if (cnt > 1) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
1062a61c6651Smacallan			if (cnt > 0) {
1063a61c6651Smacallan				sxm(SX_LDW, ptr, 26, 0);
1064a61c6651Smacallan				sxi(SX_ROP, 10, 26, 42, 0);
1065a61c6651Smacallan				sxm(SX_STW, ptr, 42, 0);
1066a61c6651Smacallan			}
1067a61c6651Smacallannext2:
1068a61c6651Smacallan			start += pitch;
1069a61c6651Smacallan		}
1070a61c6651Smacallan	}
1071a61c6651Smacallan}
1072a61c6651Smacallan
10734261fa58Smacallanstatic void
10744261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
10754261fa58Smacallan{
1076dbf8597cSmacallan	int line, num, pre, cnt;
10774261fa58Smacallan	uint32_t ptr;
10784261fa58Smacallan
10794261fa58Smacallan	ENTER;
1080b8ad197aSmacallan	pre = start & 3;
1081b8ad197aSmacallan	if (pre != 0) pre = 4 - pre;
10824261fa58Smacallan
10834261fa58Smacallan	if (p->last_rop == 0xcc) {
10844261fa58Smacallan		/* simple fill */
10854261fa58Smacallan		for (line = 0; line < h; line++) {
1086b8ad197aSmacallan			ptr = start;
1087b8ad197aSmacallan			cnt = w;
1088b46cab2aSmacallan			pre = min(pre, cnt);
1089b8ad197aSmacallan			if (pre) {
109072fd264fSmacallan				sxm(SX_STBS, ptr, 8, pre - 1);
1091b8ad197aSmacallan				ptr += pre;
1092b8ad197aSmacallan				cnt -= pre;
1093b46cab2aSmacallan				if (cnt == 0) goto next;
1094b8ad197aSmacallan			}
1095b8ad197aSmacallan			/* now do the aligned pixels in 32bit chunks */
1096b8ad197aSmacallan			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
1097b8ad197aSmacallan			while(cnt > 3) {
1098b8ad197aSmacallan				num = min(32, cnt >> 2);
109972fd264fSmacallan				sxm(SX_STS, ptr, 8, num - 1);
1100b8ad197aSmacallan				ptr += num << 2;
1101b8ad197aSmacallan				cnt -= num << 2;
1102b8ad197aSmacallan			}
1103b8ad197aSmacallan			if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
1104b8ad197aSmacallan			if (cnt > 0) {
110572fd264fSmacallan				sxm(SX_STBS, ptr, 8, cnt - 1);
11064261fa58Smacallan			}
1107b8ad197aSmacallan			if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
1108b46cab2aSmacallannext:
11094261fa58Smacallan			start += pitch;
11104261fa58Smacallan		}
11114261fa58Smacallan	} else if (p->last_rop == 0xaa) {
11124261fa58Smacallan		/* nothing to do here */
11134261fa58Smacallan		return;
11144261fa58Smacallan	} else {
11154261fa58Smacallan		/* alright, let's do actual ROP stuff */
11164261fa58Smacallan
11174261fa58Smacallan		/* first repeat the fill colour into 16 registers */
1118230e26c7Smacallan		sxi(SX_SELECT_S, 8, 8, 10, 15);
11194261fa58Smacallan
11204261fa58Smacallan		for (line = 0; line < h; line++) {
1121dbf8597cSmacallan			ptr = start;
1122dbf8597cSmacallan			cnt = w;
1123dbf8597cSmacallan			pre = min(pre, cnt);
1124dbf8597cSmacallan			if (pre) {
112572fd264fSmacallan				sxm(SX_LDB, ptr, 26, pre - 1);
1126230e26c7Smacallan				sxi(SX_ROP, 10, 26, 42, pre - 1);
112772fd264fSmacallan				sxm(SX_STB, ptr, 42, pre - 1);
1128dbf8597cSmacallan				ptr += pre;
1129dbf8597cSmacallan				cnt -= pre;
1130dbf8597cSmacallan				if (cnt == 0) goto next2;
1131dbf8597cSmacallan			}
1132dbf8597cSmacallan			/* now do the aligned pixels in 32bit chunks */
1133dbf8597cSmacallan			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
1134dbf8597cSmacallan			while(cnt > 3) {
1135dbf8597cSmacallan				num = min(32, cnt >> 2);
113672fd264fSmacallan				sxm(SX_LD, ptr, 26, num - 1);
11374261fa58Smacallan				if (num <= 16) {
1138230e26c7Smacallan					sxi(SX_ROP, 10, 26, 58, num - 1);
11394261fa58Smacallan				} else {
1140230e26c7Smacallan					sxi(SX_ROP, 10, 26, 58, 15);
1141230e26c7Smacallan					sxi(SX_ROP, 10, 42, 74, num - 17);
11424261fa58Smacallan				}
114372fd264fSmacallan				sxm(SX_ST, ptr, 58, num - 1);
1144dbf8597cSmacallan				ptr += num << 2;
1145dbf8597cSmacallan				cnt -= num << 2;
11464261fa58Smacallan			}
1147dbf8597cSmacallan			if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
1148dbf8597cSmacallan			if (cnt > 0) {
114972fd264fSmacallan				sxm(SX_LDB, ptr, 26, cnt - 1);
1150230e26c7Smacallan				sxi(SX_ROP, 10, 26, 42, cnt - 1);
115172fd264fSmacallan				sxm(SX_STB, ptr, 42, cnt - 1);
1152dbf8597cSmacallan			}
1153dbf8597cSmacallan			if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
1154dbf8597cSmacallannext2:
11554261fa58Smacallan			start += pitch;
11564261fa58Smacallan		}
11574261fa58Smacallan	}
11584261fa58Smacallan}
11594261fa58Smacallan
11604261fa58Smacallanstatic void
11614261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
11624261fa58Smacallan{
11634261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
11644261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
11654261fa58Smacallan	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
11664261fa58Smacallan	int start, depth;
11674261fa58Smacallan
11684261fa58Smacallan	ENTER;
11694261fa58Smacallan	dstpitch = exaGetPixmapPitch(pPixmap);
11704261fa58Smacallan	dstoff = exaGetPixmapOffset(pPixmap);
11714261fa58Smacallan
11724261fa58Smacallan	depth = pPixmap->drawable.bitsPerPixel;
11734261fa58Smacallan	switch (depth) {
11744261fa58Smacallan		case 32:
11754261fa58Smacallan			start = dstoff + (y1 * dstpitch) + (x1 << 2);
11764261fa58Smacallan			CG14Solid32(p, start, dstpitch, w, h);
11774261fa58Smacallan			break;
1178a61c6651Smacallan		case 16:
1179a61c6651Smacallan			start = dstoff + (y1 * dstpitch) + (x1 << 1);
1180a61c6651Smacallan			CG14Solid16(p, start, dstpitch, w, h);
1181a61c6651Smacallan			break;
11824261fa58Smacallan		case 8:
11834261fa58Smacallan			start = dstoff + (y1 * dstpitch) + x1;
11844261fa58Smacallan			CG14Solid8(p, start, dstpitch, w, h);
11854261fa58Smacallan			break;
11864261fa58Smacallan	}
11874261fa58Smacallan
11884261fa58Smacallan	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
11894261fa58Smacallan	    dstpitch, dstoff, start);
11904261fa58Smacallan	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
11914261fa58Smacallan	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
11924261fa58Smacallan	exaMarkSync(pPixmap->drawable.pScreen);
11934261fa58Smacallan}
11944261fa58Smacallan
11954261fa58Smacallan/*
11964261fa58Smacallan * Memcpy-based UTS.
11974261fa58Smacallan */
11984261fa58Smacallanstatic Bool
11994261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
12004261fa58Smacallan    char *src, int src_pitch)
12014261fa58Smacallan{
12024261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
12034261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
12044261fa58Smacallan	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
12054261fa58Smacallan	int    dst_pitch  = exaGetPixmapPitch(pDst);
12064261fa58Smacallan
12074261fa58Smacallan	int bpp    = pDst->drawable.bitsPerPixel;
12084261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
12094261fa58Smacallan	int wBytes = w * cpp;
12104261fa58Smacallan
12114261fa58Smacallan	ENTER;
1212f71acd79Smacallan	DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
12134261fa58Smacallan	dst += (x * cpp) + (y * dst_pitch);
12144261fa58Smacallan
12154261fa58Smacallan	CG14Wait(p);
12164261fa58Smacallan
12174261fa58Smacallan	while (h--) {
12184261fa58Smacallan		memcpy(dst, src, wBytes);
12194261fa58Smacallan		src += src_pitch;
12204261fa58Smacallan		dst += dst_pitch;
12214261fa58Smacallan	}
12224261fa58Smacallan	__asm("stbar;");
12234261fa58Smacallan	return TRUE;
12244261fa58Smacallan}
12254261fa58Smacallan
12264261fa58Smacallan/*
12274261fa58Smacallan * Memcpy-based DFS.
12284261fa58Smacallan */
12294261fa58Smacallanstatic Bool
12304261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
12314261fa58Smacallan    char *dst, int dst_pitch)
12324261fa58Smacallan{
12334261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
12344261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
12354261fa58Smacallan	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
12364261fa58Smacallan	int    src_pitch  = exaGetPixmapPitch(pSrc);
12374261fa58Smacallan
12384261fa58Smacallan	ENTER;
12394261fa58Smacallan	int bpp    = pSrc->drawable.bitsPerPixel;
12404261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
12414261fa58Smacallan	int wBytes = w * cpp;
12424261fa58Smacallan
12434261fa58Smacallan	src += (x * cpp) + (y * src_pitch);
12444261fa58Smacallan
12454261fa58Smacallan	CG14Wait(p);
12464261fa58Smacallan
12474261fa58Smacallan	while (h--) {
12484261fa58Smacallan		memcpy(dst, src, wBytes);
12494261fa58Smacallan		src += src_pitch;
12504261fa58Smacallan		dst += dst_pitch;
12514261fa58Smacallan	}
12524261fa58Smacallan
12534261fa58Smacallan	return TRUE;
12544261fa58Smacallan}
12554261fa58Smacallan
12564261fa58SmacallanBool
12574261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture,
12584261fa58Smacallan                           PicturePtr pMaskPicture,
12594261fa58Smacallan                           PicturePtr pDstPicture)
12604261fa58Smacallan{
12614261fa58Smacallan	int i, ok = FALSE;
12624261fa58Smacallan
12634261fa58Smacallan	ENTER;
12644261fa58Smacallan
12654261fa58Smacallan	/*
12664261fa58Smacallan	 * SX is in theory capable of accelerating pretty much all Xrender ops,
12674261fa58Smacallan	 * even coordinate transformation and gradients. Support will be added
12684261fa58Smacallan	 * over time and likely have to spill over into its own source file.
12694261fa58Smacallan	 */
12704261fa58Smacallan
12716fd6e0f4Smacallan	if ((op != PictOpOver) && (op != PictOpAdd)/* && (op != PictOpSrc)*/) {
1272fe97f391Smacallan		DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op);
12734261fa58Smacallan		return FALSE;
12744261fa58Smacallan	}
12754261fa58Smacallan
12764bd47ccfSmacallan	if (pSrcPicture != NULL) {
12774bd47ccfSmacallan		i = 0;
12784bd47ccfSmacallan		while ((i < arraysize(src_formats)) && (!ok)) {
12794bd47ccfSmacallan			ok =  (pSrcPicture->format == src_formats[i]);
12804bd47ccfSmacallan			i++;
12814bd47ccfSmacallan		}
12824bd47ccfSmacallan
12834bd47ccfSmacallan		if (!ok) {
12844bd47ccfSmacallan			DPRINTF(X_ERROR, "%s: unsupported src format %x\n",
12854bd47ccfSmacallan			    __func__, pSrcPicture->format);
12864bd47ccfSmacallan			return FALSE;
12874bd47ccfSmacallan		}
12884bd47ccfSmacallan		DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op);
12894261fa58Smacallan	}
12904261fa58Smacallan
12914bd47ccfSmacallan	if (pDstPicture != NULL) {
12924bd47ccfSmacallan		i = 0;
12934bd47ccfSmacallan		ok = FALSE;
12944bd47ccfSmacallan		while ((i < arraysize(src_formats)) && (!ok)) {
12954bd47ccfSmacallan			ok =  (pDstPicture->format == src_formats[i]);
12964bd47ccfSmacallan			i++;
12974bd47ccfSmacallan		}
12984bd47ccfSmacallan
12994bd47ccfSmacallan		if (!ok) {
13004bd47ccfSmacallan			DPRINTF(X_ERROR, "%s: unsupported dst format %x\n",
13014bd47ccfSmacallan			    __func__, pDstPicture->format);
13024bd47ccfSmacallan			return FALSE;
13034bd47ccfSmacallan		}
13044bd47ccfSmacallan		DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op);
13054bd47ccfSmacallan	}
13064261fa58Smacallan
13074261fa58Smacallan	if (pMaskPicture != NULL) {
13084261fa58Smacallan		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
13094261fa58Smacallan		    pMaskPicture->pDrawable->width,
13104261fa58Smacallan		    pMaskPicture->pDrawable->height);
13114261fa58Smacallan	}
13124261fa58Smacallan	return TRUE;
13134261fa58Smacallan}
13144261fa58Smacallan
13154261fa58SmacallanBool
13164261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture,
13174261fa58Smacallan                             PicturePtr pMaskPicture,
13184261fa58Smacallan                             PicturePtr pDstPicture,
13194261fa58Smacallan                             PixmapPtr  pSrc,
13204261fa58Smacallan                             PixmapPtr  pMask,
13214261fa58Smacallan                             PixmapPtr  pDst)
13224261fa58Smacallan{
13234261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
13244261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
13254261fa58Smacallan
13264261fa58Smacallan	ENTER;
13274261fa58Smacallan
1328f7cb851fSmacallan	p->no_source_pixmap = FALSE;
1329f7cb851fSmacallan	p->source_is_solid = FALSE;
1330f7cb851fSmacallan
1331a3a2ba44Smacallan	if (pSrcPicture->format == PICT_a1) {
13326fd6e0f4Smacallan		DPRINTF(X_ERROR, "src mono, dst %x, op %d\n",
13336bdc2ffdSmacallan		    pDstPicture->format, op);
1334a3a2ba44Smacallan		if (pMaskPicture != NULL) {
13356fd6e0f4Smacallan			DPRINTF(X_ERROR, "msk %x\n", pMaskPicture->format);
1336a3a2ba44Smacallan		}
1337f7cb851fSmacallan	}
13384261fa58Smacallan	if (pSrcPicture->pSourcePict != NULL) {
13394261fa58Smacallan		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
13404261fa58Smacallan			p->fillcolour =
13414261fa58Smacallan			    pSrcPicture->pSourcePict->solidFill.color;
1342f7cb851fSmacallan			DPRINTF(X_ERROR, "%s: solid src %08x\n",
13434261fa58Smacallan			    __func__, p->fillcolour);
1344f7cb851fSmacallan			p->no_source_pixmap = TRUE;
1345f7cb851fSmacallan			p->source_is_solid = TRUE;
13464261fa58Smacallan		}
13474261fa58Smacallan	}
13484261fa58Smacallan	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
13494261fa58Smacallan		if (pMaskPicture->pSourcePict->type ==
13504261fa58Smacallan		    SourcePictTypeSolidFill) {
13514261fa58Smacallan			p->fillcolour =
13524261fa58Smacallan			   pMaskPicture->pSourcePict->solidFill.color;
13536fd6e0f4Smacallan			DPRINTF(X_ERROR, "%s: solid mask %08x\n",
13544261fa58Smacallan			    __func__, p->fillcolour);
13554261fa58Smacallan		}
13564261fa58Smacallan	}
13574261fa58Smacallan	if (pMaskPicture != NULL) {
1358239808baSmacallan		p->mskoff = exaGetPixmapOffset(pMask);
13594261fa58Smacallan		p->mskpitch = exaGetPixmapPitch(pMask);
13604261fa58Smacallan		p->mskformat = pMaskPicture->format;
1361a3a2ba44Smacallan	} else {
1362239808baSmacallan		p->mskoff = 0;
1363a3a2ba44Smacallan		p->mskpitch = 0;
1364a3a2ba44Smacallan		p->mskformat = 0;
13654261fa58Smacallan	}
1366f7cb851fSmacallan	if (pSrc != NULL) {
1367f7cb851fSmacallan		p->source_is_solid =
1368f7cb851fSmacallan		   ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1));
1369f7cb851fSmacallan		p->srcoff = exaGetPixmapOffset(pSrc);
1370f7cb851fSmacallan		p->srcpitch = exaGetPixmapPitch(pSrc);
1371f7cb851fSmacallan		if (p->source_is_solid) {
1372f7cb851fSmacallan			p->fillcolour = *(uint32_t *)(p->fb + p->srcoff);
1373f7cb851fSmacallan		}
1374f7cb851fSmacallan	}
13754261fa58Smacallan	p->srcformat = pSrcPicture->format;
13764261fa58Smacallan	p->dstformat = pDstPicture->format;
1377f7cb851fSmacallan
1378f7cb851fSmacallan	if (p->source_is_solid) {
1379f7cb851fSmacallan		uint32_t temp;
1380f7cb851fSmacallan
1381f7cb851fSmacallan		/* stuff source colour into SX registers, swap as needed */
1382f7cb851fSmacallan		temp = p->fillcolour;
13836fd6e0f4Smacallan		DPRINTF(X_ERROR, "solid %08x\n", temp);
1384f7cb851fSmacallan		switch (p->srcformat) {
1385f7cb851fSmacallan			case PICT_a8r8g8b8:
1386f7cb851fSmacallan			case PICT_x8r8g8b8:
1387f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
1388f7cb851fSmacallan				temp = temp >> 8;
1389f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
1390f7cb851fSmacallan				temp = temp >> 8;
1391f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
1392f7cb851fSmacallan				break;
1393f7cb851fSmacallan			case PICT_a8b8g8r8:
1394f7cb851fSmacallan			case PICT_x8b8g8r8:
1395f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(11), temp & 0xff);
1396f7cb851fSmacallan				temp = temp >> 8;
1397f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(10), temp & 0xff);
1398f7cb851fSmacallan				temp = temp >> 8;
1399f7cb851fSmacallan				write_sx_reg(p, SX_QUEUED(9), temp & 0xff);
1400f7cb851fSmacallan				break;
1401f7cb851fSmacallan		}
1402f7cb851fSmacallan		write_sx_reg(p, SX_QUEUED(8), 0xff);
1403f7cb851fSmacallan	}
14044261fa58Smacallan	p->op = op;
1405a3a2ba44Smacallan	if (op == PictOpSrc) {
14066fd6e0f4Smacallan		if (pSrc == NULL) {
14076fd6e0f4Smacallan			DPRINTF(X_ERROR, "src type %d\n", pSrcPicture->pSourcePict->type);
14086fd6e0f4Smacallan			return FALSE;
14096fd6e0f4Smacallan		}
1410a3a2ba44Smacallan		CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff);
1411a3a2ba44Smacallan	}
14124261fa58Smacallan#ifdef SX_DEBUG
14134261fa58Smacallan	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
14144261fa58Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
14154261fa58Smacallan#endif
14164261fa58Smacallan	return TRUE;
14174261fa58Smacallan}
14184261fa58Smacallan
14194261fa58Smacallanvoid
14204261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY,
14214261fa58Smacallan                              int maskX, int maskY,
14224261fa58Smacallan                              int dstX, int dstY,
14234261fa58Smacallan                              int width, int height)
14244261fa58Smacallan{
14254261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
14264261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
14274261fa58Smacallan	uint32_t dstoff, dstpitch;
14284261fa58Smacallan	uint32_t dst, msk, src;
1429e311bbeeSmacallan	int flip = 0;
14304261fa58Smacallan
14314261fa58Smacallan	ENTER;
14324261fa58Smacallan	dstoff = exaGetPixmapOffset(pDst);
14334261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDst);
14344261fa58Smacallan
1435e311bbeeSmacallan	flip = (PICT_FORMAT_TYPE(p->srcformat) !=
1436e311bbeeSmacallan		PICT_FORMAT_TYPE(p->dstformat));
1437e311bbeeSmacallan
14384261fa58Smacallan	switch (p->op) {
14394261fa58Smacallan		case PictOpOver:
14404261fa58Smacallan			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
14414261fa58Smacallan			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
14424261fa58Smacallan			    p->mskformat, p->dstformat, srcX, srcY);
1443a3a2ba44Smacallan			if (p->source_is_solid) {
1444a3a2ba44Smacallan				switch (p->mskformat) {
1445a3a2ba44Smacallan					case PICT_a8:
1446a3a2ba44Smacallan						msk = p->mskoff +
1447a3a2ba44Smacallan						    (maskY * p->mskpitch) +
1448a3a2ba44Smacallan						    maskX;
1449a3a2ba44Smacallan						CG14Comp_Over8Solid(p,
1450a3a2ba44Smacallan						    msk, p->mskpitch,
1451a3a2ba44Smacallan						    dst, dstpitch,
1452a3a2ba44Smacallan						    width, height);
1453a3a2ba44Smacallan						break;
1454a3a2ba44Smacallan					case PICT_a8r8g8b8:
1455a3a2ba44Smacallan					case PICT_a8b8g8r8:
1456a3a2ba44Smacallan						msk = p->mskoff +
1457a3a2ba44Smacallan						    (maskY * p->mskpitch) +
1458a3a2ba44Smacallan						    (maskX << 2);
1459a3a2ba44Smacallan						CG14Comp_Over32Solid(p,
1460a3a2ba44Smacallan						    msk, p->mskpitch,
1461a3a2ba44Smacallan						    dst, dstpitch,
1462a3a2ba44Smacallan						    width, height);
1463a3a2ba44Smacallan						break;
14643be5cdccSmacallan					case 0:
14653be5cdccSmacallan						DPRINTF(X_ERROR, "%s: Over with solid %08x and no mask\n", __func__, p->fillcolour);
14663be5cdccSmacallan						CG14PrepareSolid(pDst, GXcopy, 0xffffffff, p->fillcolour);
14673be5cdccSmacallan						CG14Solid(pDst, dstX, dstY, width, height);
14683be5cdccSmacallan						break;
1469a3a2ba44Smacallan					default:
1470a3a2ba44Smacallan						xf86Msg(X_ERROR,
1471f71acd79Smacallan						  "unsupported mask format %08x\n", p->mskformat);
1472a3a2ba44Smacallan				}
1473a3a2ba44Smacallan			} else {
14746bdc2ffdSmacallan				DPRINTF(X_ERROR, "non-solid over with msk %x\n",
14756bdc2ffdSmacallan				    p->mskformat);
1476a3a2ba44Smacallan				switch (p->srcformat) {
1477a3a2ba44Smacallan					case PICT_a8r8g8b8:
1478a3a2ba44Smacallan					case PICT_a8b8g8r8:
1479a3a2ba44Smacallan						src = p->srcoff +
1480a3a2ba44Smacallan						    (srcY * p->srcpitch) +
1481a3a2ba44Smacallan						    (srcX << 2);
1482a3a2ba44Smacallan						dst = dstoff +
1483a3a2ba44Smacallan						    (dstY * dstpitch) +
1484a3a2ba44Smacallan						    (dstX << 2);
1485a3a2ba44Smacallan						if (p->mskformat == PICT_a8) {
1486a3a2ba44Smacallan							msk = p->mskoff +
1487a3a2ba44Smacallan							    (maskY * p->mskpitch) +
1488a3a2ba44Smacallan							    maskX;
1489a3a2ba44Smacallan							CG14Comp_Over32Mask(p,
1490a3a2ba44Smacallan							    src, p->srcpitch,
1491a3a2ba44Smacallan							    msk, p->mskpitch,
1492a3a2ba44Smacallan							    dst, dstpitch,
1493e311bbeeSmacallan							    width, height, flip);
1494a3a2ba44Smacallan						} else {
1495a3a2ba44Smacallan							CG14Comp_Over32(p,
1496a3a2ba44Smacallan							    src, p->srcpitch,
1497a3a2ba44Smacallan							    dst, dstpitch,
1498e311bbeeSmacallan							    width, height, flip);
1499a3a2ba44Smacallan						}
1500a3a2ba44Smacallan						break;
1501a3a2ba44Smacallan					case PICT_x8r8g8b8:
1502a3a2ba44Smacallan					case PICT_x8b8g8r8:
15036bdc2ffdSmacallan						src = p->srcoff +
15046bdc2ffdSmacallan						    (srcY * p->srcpitch) +
15056bdc2ffdSmacallan						    (srcX << 2);
15066bdc2ffdSmacallan						dst = dstoff +
15076bdc2ffdSmacallan						    (dstY * dstpitch) +
15086bdc2ffdSmacallan						    (dstX << 2);
15096bdc2ffdSmacallan						if (p->mskformat == PICT_a8) {
15106bdc2ffdSmacallan							msk = p->mskoff +
15116bdc2ffdSmacallan							    (maskY * p->mskpitch) +
15126bdc2ffdSmacallan							    maskX;
15136bdc2ffdSmacallan							CG14Comp_Over32Mask_noalpha(p,
15146bdc2ffdSmacallan							    src, p->srcpitch,
15156bdc2ffdSmacallan							    msk, p->mskpitch,
1516fa158432Smacallan							    dst, dstpitch,
1517e311bbeeSmacallan							    width, height, flip);
1518fa158432Smacallan						} else if ((p->mskformat == PICT_a8r8g8b8) ||
1519fa158432Smacallan							   (p->mskformat == PICT_a8b8g8r8)) {
1520fa158432Smacallan							msk = p->mskoff +
1521fa158432Smacallan							    (maskY * p->mskpitch) +
1522fa158432Smacallan							    (maskX << 2);
1523fa158432Smacallan							CG14Comp_Over32Mask32_noalpha(p,
1524fa158432Smacallan							    src, p->srcpitch,
1525fa158432Smacallan							    msk, p->mskpitch,
15266bdc2ffdSmacallan							    dst, dstpitch,
1527e311bbeeSmacallan							    width, height, flip);
15286bdc2ffdSmacallan						} else {
15296bdc2ffdSmacallan							xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat);
15306bdc2ffdSmacallan						}
1531a3a2ba44Smacallan						break;
1532a3a2ba44Smacallan					default:
1533a3a2ba44Smacallan						xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n",
1534a3a2ba44Smacallan						    __func__, p->srcformat);
1535a3a2ba44Smacallan				}
1536a3a2ba44Smacallan			}
15374261fa58Smacallan			break;
15384261fa58Smacallan		case PictOpAdd:
15394261fa58Smacallan			DPRINTF(X_ERROR, "Add %08x %08x\n",
15404261fa58Smacallan			    p->srcformat, p->dstformat);
15414261fa58Smacallan			switch (p->srcformat) {
15424261fa58Smacallan				case PICT_a8:
15434261fa58Smacallan					src = p->srcoff +
15444261fa58Smacallan					    (srcY * p->srcpitch) + srcX;
1545d71cb32dSmacallan					if (p->dstformat == PICT_a8) {
1546d71cb32dSmacallan						dst = dstoff +
1547d71cb32dSmacallan						      (dstY * dstpitch) + dstX;
1548d71cb32dSmacallan						CG14Comp_Add8(p,
1549d71cb32dSmacallan						    src, p->srcpitch,
1550d71cb32dSmacallan						    dst, dstpitch,
1551d71cb32dSmacallan						    width, height);
1552d71cb32dSmacallan					} else {
1553d71cb32dSmacallan						dst = dstoff +
1554d71cb32dSmacallan						      (dstY * dstpitch) +
1555d71cb32dSmacallan						      (dstX << 2);
1556d71cb32dSmacallan						CG14Comp_Add8_32(p,
1557d71cb32dSmacallan						    src, p->srcpitch,
1558d71cb32dSmacallan						    dst, dstpitch,
1559d71cb32dSmacallan						    width, height);
1560d71cb32dSmacallan					}
15614261fa58Smacallan					break;
15624261fa58Smacallan				case PICT_a8r8g8b8:
15634261fa58Smacallan				case PICT_x8r8g8b8:
15644261fa58Smacallan					src = p->srcoff +
15654261fa58Smacallan					    (srcY * p->srcpitch) + (srcX << 2);
15664261fa58Smacallan					dst = dstoff + (dstY * dstpitch) +
15674261fa58Smacallan					    (dstX << 2);
15684261fa58Smacallan					CG14Comp_Add32(p, src, p->srcpitch,
15694261fa58Smacallan					    dst, dstpitch, width, height);
15704261fa58Smacallan					break;
15714261fa58Smacallan				default:
15724261fa58Smacallan					xf86Msg(X_ERROR,
15734261fa58Smacallan					    "unsupported src format\n");
15744261fa58Smacallan			}
15754261fa58Smacallan			break;
1576a3a2ba44Smacallan		case PictOpSrc:
1577a3a2ba44Smacallan			DPRINTF(X_ERROR, "Src %08x %08x\n",
1578a3a2ba44Smacallan			    p->srcformat, p->dstformat);
1579239808baSmacallan			if (p->mskformat != 0)
1580239808baSmacallan				xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat);
1581f71acd79Smacallan			if (p->srcformat == PICT_a8) {
1582f71acd79Smacallan				CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height);
1583f71acd79Smacallan			} else {
1584f71acd79Smacallan				/* convert between RGB and BGR? */
1585f71acd79Smacallan				CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height);
1586f71acd79Smacallan			}
1587a3a2ba44Smacallan			break;
15884261fa58Smacallan		default:
15894261fa58Smacallan			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
15904261fa58Smacallan	}
15914261fa58Smacallan	exaMarkSync(pDst->drawable.pScreen);
15924261fa58Smacallan}
15934261fa58Smacallan
15944261fa58Smacallan
15954261fa58Smacallan
15964261fa58SmacallanBool
15974261fa58SmacallanCG14InitAccel(ScreenPtr pScreen)
15984261fa58Smacallan{
15994261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
16004261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
16014261fa58Smacallan	ExaDriverPtr pExa;
16024261fa58Smacallan
16034261fa58Smacallan	pExa = exaDriverAlloc();
16044261fa58Smacallan	if (!pExa)
16054261fa58Smacallan		return FALSE;
16064261fa58Smacallan
16074261fa58Smacallan	p->pExa = pExa;
16084261fa58Smacallan
16094261fa58Smacallan	pExa->exa_major = EXA_VERSION_MAJOR;
16104261fa58Smacallan	pExa->exa_minor = EXA_VERSION_MINOR;
16114261fa58Smacallan
16124261fa58Smacallan	pExa->memoryBase = p->fb;
16134261fa58Smacallan	pExa->memorySize = p->memsize;
1614031e8e94Smacallan	pExa->offScreenBase = p->width * p->height * (pScrn->bitsPerPixel >> 3);
16154261fa58Smacallan
16164261fa58Smacallan	/*
16174261fa58Smacallan	 * SX memory instructions are written to 64bit aligned addresses with
16184261fa58Smacallan	 * a 3 bit displacement. Make sure the displacement remains constant
16194261fa58Smacallan	 * within one column
16204261fa58Smacallan	 */
16214261fa58Smacallan
16224261fa58Smacallan	pExa->pixmapOffsetAlign = 8;
16234261fa58Smacallan	pExa->pixmapPitchAlign = 8;
16244261fa58Smacallan
1625fe97f391Smacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS
1626f71acd79Smacallan		      | EXA_SUPPORTS_OFFSCREEN_OVERLAPS
1627f71acd79Smacallan		      /*| EXA_MIXED_PIXMAPS*/;
16284261fa58Smacallan
16294261fa58Smacallan	/*
16304261fa58Smacallan	 * these limits are bogus
16314261fa58Smacallan	 * SX doesn't deal with coordinates at all, so there is no limit but
16324261fa58Smacallan	 * we have to put something here
16334261fa58Smacallan	 */
16344261fa58Smacallan	pExa->maxX = 4096;
16354261fa58Smacallan	pExa->maxY = 4096;
16364261fa58Smacallan
16374261fa58Smacallan	pExa->WaitMarker = CG14WaitMarker;
16384261fa58Smacallan
16394261fa58Smacallan	pExa->PrepareSolid = CG14PrepareSolid;
16404261fa58Smacallan	pExa->Solid = CG14Solid;
16414261fa58Smacallan	pExa->DoneSolid = CG14DoneCopy;
16424261fa58Smacallan	pExa->PrepareCopy = CG14PrepareCopy;
1643f71acd79Smacallan	pExa->Copy = CG14Copy32;
16444261fa58Smacallan	pExa->DoneCopy = CG14DoneCopy;
16454261fa58Smacallan	if (p->use_xrender) {
16464261fa58Smacallan		pExa->CheckComposite = CG14CheckComposite;
16474261fa58Smacallan		pExa->PrepareComposite = CG14PrepareComposite;
16484261fa58Smacallan		pExa->Composite = CG14Composite;
16494261fa58Smacallan		pExa->DoneComposite = CG14DoneCopy;
16504261fa58Smacallan	}
16514261fa58Smacallan
16524261fa58Smacallan	/* EXA hits more optimized paths when it does not have to fallback
16534261fa58Smacallan	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
16544261fa58Smacallan	 */
16554261fa58Smacallan	pExa->UploadToScreen = CG14UploadToScreen;
16564261fa58Smacallan	pExa->DownloadFromScreen = CG14DownloadFromScreen;
16574261fa58Smacallan
1658c2193d98Smacallan	p->queuecount = 0;
16594261fa58Smacallan	/* do some hardware init */
16604261fa58Smacallan	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
16614261fa58Smacallan	p->last_mask = 0xffffffff;
16624261fa58Smacallan	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
16634261fa58Smacallan	p->last_rop = 0xcc;
16644261fa58Smacallan	return exaDriverInit(pScreen, pExa);
16654261fa58Smacallan}
1666