cg14_accel.c revision 4261fa58
14261fa58Smacallan/* $NetBSD: cg14_accel.c,v 1.1 2013/06/19 13:26:01 macallan Exp $ */
24261fa58Smacallan/*
34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz
44261fa58Smacallan * All rights reserved.
54261fa58Smacallan *
64261fa58Smacallan * Redistribution and use in source and binary forms, with or without
74261fa58Smacallan * modification, are permitted provided that the following conditions
84261fa58Smacallan * are met:
94261fa58Smacallan *
104261fa58Smacallan *    - Redistributions of source code must retain the above copyright
114261fa58Smacallan *      notice, this list of conditions and the following disclaimer.
124261fa58Smacallan *    - Redistributions in binary form must reproduce the above
134261fa58Smacallan *      copyright notice, this list of conditions and the following
144261fa58Smacallan *      disclaimer in the documentation and/or other materials provided
154261fa58Smacallan *      with the distribution.
164261fa58Smacallan *
174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE.
294261fa58Smacallan *
304261fa58Smacallan */
314261fa58Smacallan
324261fa58Smacallan#include <sys/types.h>
334261fa58Smacallan
344261fa58Smacallan/* all driver need this */
354261fa58Smacallan#include "xf86.h"
364261fa58Smacallan#include "xf86_OSproc.h"
374261fa58Smacallan#include "compiler.h"
384261fa58Smacallan
394261fa58Smacallan#include "cg14.h"
404261fa58Smacallan#include <sparc/sxreg.h>
414261fa58Smacallan
424261fa58Smacallan#define SX_SINGLE
434261fa58Smacallan/*#define SX_DEBUG*/
444261fa58Smacallan/*#define SX_ADD_SOFTWARE*/
454261fa58Smacallan
464261fa58Smacallan#ifdef SX_DEBUG
474261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
484261fa58Smacallan#define DPRINTF xf86Msg
494261fa58Smacallan#else
504261fa58Smacallan#define ENTER
514261fa58Smacallan#define DPRINTF while (0) xf86Msg
524261fa58Smacallan#endif
534261fa58Smacallan
544261fa58Smacallan#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
554261fa58Smacallan
564261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */
574261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
584261fa58Smacallan		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
594261fa58Smacallan
604261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
614261fa58Smacallan		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
624261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
634261fa58Smacallan
644261fa58Smacallanchar c[8] = " .,:+*oX";
654261fa58Smacallan
664261fa58Smacallan/* write an SX register */
674261fa58Smacallanstatic inline void
684261fa58Smacallanwrite_sx_reg(Cg14Ptr p, int reg, uint32_t val)
694261fa58Smacallan{
704261fa58Smacallan	*(volatile uint32_t *)(p->sxreg + reg) = val;
714261fa58Smacallan}
724261fa58Smacallan
734261fa58Smacallan/* read an SX register */
744261fa58Smacallanstatic inline uint32_t
754261fa58Smacallanread_sx_reg(Cg14Ptr p, int reg)
764261fa58Smacallan{
774261fa58Smacallan	return *(volatile uint32_t *)(p->sxreg + reg);
784261fa58Smacallan}
794261fa58Smacallan
804261fa58Smacallan/* write a memory referencing instruction */
814261fa58Smacallanstatic inline void
824261fa58Smacallanwrite_sx_io(Cg14Ptr p, int reg, uint32_t val)
834261fa58Smacallan{
844261fa58Smacallan	*(volatile uint32_t *)(p->sxio + reg) = val;
854261fa58Smacallan}
864261fa58Smacallan
874261fa58Smacallanstatic inline void
884261fa58SmacallanCG14Wait(Cg14Ptr p)
894261fa58Smacallan{
904261fa58Smacallan	/* we just wait until the instruction queue is empty */
914261fa58Smacallan	while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {};
924261fa58Smacallan}
934261fa58Smacallan
944261fa58Smacallanstatic void
954261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker)
964261fa58Smacallan{
974261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
984261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
994261fa58Smacallan
1004261fa58Smacallan	CG14Wait(p);
1014261fa58Smacallan}
1024261fa58Smacallan
1034261fa58Smacallanstatic Bool
1044261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
1054261fa58Smacallan		int xdir, int ydir, int alu, Pixel planemask)
1064261fa58Smacallan{
1074261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1084261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1094261fa58Smacallan
1104261fa58Smacallan	ENTER;
1114261fa58Smacallan	DPRINTF(X_ERROR, "bits per pixel: %d\n",
1124261fa58Smacallan	    pSrcPixmap->drawable.bitsPerPixel);
1134261fa58Smacallan
1144261fa58Smacallan	if (planemask != p->last_mask) {
1154261fa58Smacallan		CG14Wait(p);
1164261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
1174261fa58Smacallan		p->last_mask = planemask;
1184261fa58Smacallan	}
1194261fa58Smacallan	alu = sx_rop[alu];
1204261fa58Smacallan	if (alu != p->last_rop) {
1214261fa58Smacallan		CG14Wait(p);
1224261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
1234261fa58Smacallan		p->last_rop = alu;
1244261fa58Smacallan	}
1254261fa58Smacallan	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
1264261fa58Smacallan	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
1274261fa58Smacallan	p->xdir = xdir;
1284261fa58Smacallan	p->ydir = ydir;
1294261fa58Smacallan	return TRUE;
1304261fa58Smacallan}
1314261fa58Smacallan
1324261fa58Smacallanstatic void
1334261fa58SmacallanCG14Copy(PixmapPtr pDstPixmap,
1344261fa58Smacallan         int srcX, int srcY, int dstX, int dstY, int w, int h)
1354261fa58Smacallan{
1364261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1374261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
1384261fa58Smacallan	int dstpitch, dstoff, srcpitch, srcoff;
1394261fa58Smacallan	int srcstart, dststart, xinc, srcinc, dstinc;
1404261fa58Smacallan	int line, count, s, d, num;
1414261fa58Smacallan
1424261fa58Smacallan	ENTER;
1434261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
1444261fa58Smacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
1454261fa58Smacallan	srcpitch = p->srcpitch;
1464261fa58Smacallan	srcoff = p->srcoff;
1474261fa58Smacallan	/*
1484261fa58Smacallan	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
1494261fa58Smacallan	 * actually wrote anything and only sync if it did
1504261fa58Smacallan	 */
1514261fa58Smacallan	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
1524261fa58Smacallan	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
1534261fa58Smacallan
1544261fa58Smacallan	/*
1554261fa58Smacallan	 * we always copy up to 32 pixels at a time so direction doesn't
1564261fa58Smacallan	 * matter if w<=32
1574261fa58Smacallan	 */
1584261fa58Smacallan	if (w > 32) {
1594261fa58Smacallan		if (p->xdir < 0) {
1604261fa58Smacallan			srcstart += (w - 32) << 2;
1614261fa58Smacallan			dststart += (w - 32) << 2;
1624261fa58Smacallan			xinc = -128;
1634261fa58Smacallan		} else
1644261fa58Smacallan			xinc = 128;
1654261fa58Smacallan	} else
1664261fa58Smacallan		xinc = 128;
1674261fa58Smacallan	if (p->ydir < 0) {
1684261fa58Smacallan		srcstart += (h - 1) * srcpitch;
1694261fa58Smacallan		dststart += (h - 1) * dstpitch;
1704261fa58Smacallan		srcinc = -srcpitch;
1714261fa58Smacallan		dstinc = -dstpitch;
1724261fa58Smacallan	} else {
1734261fa58Smacallan		srcinc = srcpitch;
1744261fa58Smacallan		dstinc = dstpitch;
1754261fa58Smacallan	}
1764261fa58Smacallan	if (p->last_rop == 0xcc) {
1774261fa58Smacallan		/* plain old copy */
1784261fa58Smacallan		if ( xinc > 0) {
1794261fa58Smacallan			/* going left to right */
1804261fa58Smacallan			for (line = 0; line < h; line++) {
1814261fa58Smacallan				count = 0;
1824261fa58Smacallan				s = srcstart;
1834261fa58Smacallan				d = dststart;
1844261fa58Smacallan				while ( count < w) {
1854261fa58Smacallan					num = min(32, w - count);
1864261fa58Smacallan					write_sx_io(p, s,
1874261fa58Smacallan					    SX_LD(10, num - 1, s & 7));
1884261fa58Smacallan					write_sx_io(p, d,
1894261fa58Smacallan					    SX_STM(10, num - 1, d & 7));
1904261fa58Smacallan					s += xinc;
1914261fa58Smacallan					d += xinc;
1924261fa58Smacallan					count += 32;
1934261fa58Smacallan				}
1944261fa58Smacallan				srcstart += srcinc;
1954261fa58Smacallan				dststart += dstinc;
1964261fa58Smacallan			}
1974261fa58Smacallan		} else {
1984261fa58Smacallan			/* going right to left */
1994261fa58Smacallan			int i, chunks = (w >> 5);
2004261fa58Smacallan			for (line = 0; line < h; line++) {
2014261fa58Smacallan				s = srcstart;
2024261fa58Smacallan				d = dststart;
2034261fa58Smacallan				count = w;
2044261fa58Smacallan				for (i = 0; i < chunks; i++) {
2054261fa58Smacallan					write_sx_io(p, s,
2064261fa58Smacallan					    SX_LD(10, 31, s & 7));
2074261fa58Smacallan					write_sx_io(p, d,
2084261fa58Smacallan					    SX_STM(10, 31, d & 7));
2094261fa58Smacallan					s -= 128;
2104261fa58Smacallan					d -= 128;
2114261fa58Smacallan					count -= 32;
2124261fa58Smacallan				}
2134261fa58Smacallan				/* leftovers, if any */
2144261fa58Smacallan				if (count > 0) {
2154261fa58Smacallan					s += (32 - count) << 2;
2164261fa58Smacallan					d += (32 - count) << 2;
2174261fa58Smacallan					write_sx_io(p, s,
2184261fa58Smacallan					    SX_LD(10, count - 1, s & 7));
2194261fa58Smacallan					write_sx_io(p, d,
2204261fa58Smacallan					    SX_STM(10, count - 1, d & 7));
2214261fa58Smacallan				}
2224261fa58Smacallan				srcstart += srcinc;
2234261fa58Smacallan				dststart += dstinc;
2244261fa58Smacallan			}
2254261fa58Smacallan		}
2264261fa58Smacallan	} else {
2274261fa58Smacallan		/* ROPs needed */
2284261fa58Smacallan		if ( xinc > 0) {
2294261fa58Smacallan			/* going left to right */
2304261fa58Smacallan			for (line = 0; line < h; line++) {
2314261fa58Smacallan				count = 0;
2324261fa58Smacallan				s = srcstart;
2334261fa58Smacallan				d = dststart;
2344261fa58Smacallan				while ( count < w) {
2354261fa58Smacallan					num = min(32, w - count);
2364261fa58Smacallan					write_sx_io(p, s,
2374261fa58Smacallan					    SX_LD(10, num - 1, s & 7));
2384261fa58Smacallan					write_sx_io(p, d,
2394261fa58Smacallan					    SX_LD(42, num - 1, d & 7));
2404261fa58Smacallan					if (num > 16) {
2414261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2424261fa58Smacallan					    	 SX_ROP(10, 42, 74, 15));
2434261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2444261fa58Smacallan					    	 SX_ROP(26, 58, 90, num - 17));
2454261fa58Smacallan					} else {
2464261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2474261fa58Smacallan					    	 SX_ROP(10, 42, 74, num - 1));
2484261fa58Smacallan					}
2494261fa58Smacallan					write_sx_io(p, d,
2504261fa58Smacallan					    SX_STM(74, num - 1, d & 7));
2514261fa58Smacallan					s += xinc;
2524261fa58Smacallan					d += xinc;
2534261fa58Smacallan					count += 32;
2544261fa58Smacallan				}
2554261fa58Smacallan				srcstart += srcinc;
2564261fa58Smacallan				dststart += dstinc;
2574261fa58Smacallan			}
2584261fa58Smacallan		} else {
2594261fa58Smacallan			/* going right to left */
2604261fa58Smacallan			int i, chunks = (w >> 5);
2614261fa58Smacallan			for (line = 0; line < h; line++) {
2624261fa58Smacallan				s = srcstart;
2634261fa58Smacallan				d = dststart;
2644261fa58Smacallan				count = w;
2654261fa58Smacallan				for (i = 0; i < chunks; i++) {
2664261fa58Smacallan					write_sx_io(p, s, SX_LD(10, 31, s & 7));
2674261fa58Smacallan					write_sx_io(p, d, SX_LD(42, 31, d & 7));
2684261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
2694261fa58Smacallan				    	    SX_ROP(10, 42, 74, 15));
2704261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
2714261fa58Smacallan				    	    SX_ROP(26, 58, 90, 15));
2724261fa58Smacallan					write_sx_io(p, d,
2734261fa58Smacallan					    SX_STM(74, 31, d & 7));
2744261fa58Smacallan					s -= 128;
2754261fa58Smacallan					d -= 128;
2764261fa58Smacallan					count -= 32;
2774261fa58Smacallan				}
2784261fa58Smacallan				/* leftovers, if any */
2794261fa58Smacallan				if (count > 0) {
2804261fa58Smacallan					s += (32 - count) << 2;
2814261fa58Smacallan					d += (32 - count) << 2;
2824261fa58Smacallan					write_sx_io(p, s,
2834261fa58Smacallan					    SX_LD(10, count - 1, s & 7));
2844261fa58Smacallan					write_sx_io(p, d,
2854261fa58Smacallan					    SX_LD(42, count - 1, d & 7));
2864261fa58Smacallan					if (count > 16) {
2874261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2884261fa58Smacallan					    	    SX_ROP(10, 42, 74, 15));
2894261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2904261fa58Smacallan					    	 SX_ROP(26, 58, 90, count - 17));
2914261fa58Smacallan					} else {
2924261fa58Smacallan						write_sx_reg(p, SX_INSTRUCTIONS,
2934261fa58Smacallan					    	 SX_ROP(10, 42, 74, count - 1));
2944261fa58Smacallan					}
2954261fa58Smacallan
2964261fa58Smacallan					write_sx_io(p, d,
2974261fa58Smacallan					    SX_STM(74, count - 1, d & 7));
2984261fa58Smacallan				}
2994261fa58Smacallan				srcstart += srcinc;
3004261fa58Smacallan				dststart += dstinc;
3014261fa58Smacallan			}
3024261fa58Smacallan		}
3034261fa58Smacallan	}
3044261fa58Smacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
3054261fa58Smacallan}
3064261fa58Smacallan
3074261fa58Smacallanstatic void
3084261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap)
3094261fa58Smacallan{
3104261fa58Smacallan}
3114261fa58Smacallan
3124261fa58Smacallanstatic Bool
3134261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
3144261fa58Smacallan{
3154261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
3164261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
3174261fa58Smacallan
3184261fa58Smacallan	ENTER;
3194261fa58Smacallan	DPRINTF(X_ERROR, "bits per pixel: %d\n", pPixmap->drawable.bitsPerPixel);
3204261fa58Smacallan	write_sx_reg(p, SX_QUEUED(8), fg);
3214261fa58Smacallan	write_sx_reg(p, SX_QUEUED(9), fg);
3224261fa58Smacallan	if (planemask != p->last_mask) {
3234261fa58Smacallan		CG14Wait(p);
3244261fa58Smacallan		write_sx_reg(p, SX_PLANEMASK, planemask);
3254261fa58Smacallan		p->last_mask = planemask;
3264261fa58Smacallan	}
3274261fa58Smacallan	alu = sx_rop[alu];
3284261fa58Smacallan	if (alu != p->last_rop) {
3294261fa58Smacallan		CG14Wait(p);
3304261fa58Smacallan		write_sx_reg(p, SX_ROP_CONTROL, alu);
3314261fa58Smacallan		p->last_rop = alu;
3324261fa58Smacallan	}
3334261fa58Smacallan	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
3344261fa58Smacallan	return TRUE;
3354261fa58Smacallan}
3364261fa58Smacallan
3374261fa58Smacallanstatic void
3384261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
3394261fa58Smacallan{
3404261fa58Smacallan	int line, x, num;
3414261fa58Smacallan	uint32_t ptr;
3424261fa58Smacallan
3434261fa58Smacallan	ENTER;
3444261fa58Smacallan	if (p->last_rop == 0xcc) {
3454261fa58Smacallan		/* simple fill */
3464261fa58Smacallan		for (line = 0; line < h; line++) {
3474261fa58Smacallan			x = 0;
3484261fa58Smacallan			while (x < w) {
3494261fa58Smacallan				ptr = start + (x << 2);
3504261fa58Smacallan				num = min(32, w - x);
3514261fa58Smacallan				write_sx_io(p, ptr,
3524261fa58Smacallan				    SX_STS(8, num - 1, ptr & 7));
3534261fa58Smacallan				x += 32;
3544261fa58Smacallan			}
3554261fa58Smacallan			start += pitch;
3564261fa58Smacallan		}
3574261fa58Smacallan	} else if (p->last_rop == 0xaa) {
3584261fa58Smacallan		/* nothing to do here */
3594261fa58Smacallan		return;
3604261fa58Smacallan	} else {
3614261fa58Smacallan		/* alright, let's do actual ROP stuff */
3624261fa58Smacallan
3634261fa58Smacallan		/* first repeat the fill colour into 16 registers */
3644261fa58Smacallan		write_sx_reg(p, SX_INSTRUCTIONS,
3654261fa58Smacallan		    SX_SELECT_S(8, 8, 10, 15));
3664261fa58Smacallan
3674261fa58Smacallan		for (line = 0; line < h; line++) {
3684261fa58Smacallan			x = 0;
3694261fa58Smacallan			while (x < w) {
3704261fa58Smacallan				ptr = start + (x << 2);
3714261fa58Smacallan				num = min(32, w - x);
3724261fa58Smacallan				/* now suck fb data into registers */
3734261fa58Smacallan				write_sx_io(p, ptr,
3744261fa58Smacallan				    SX_LD(42, num - 1, ptr & 7));
3754261fa58Smacallan				/*
3764261fa58Smacallan				 * ROP them with the fill data we left in 10
3774261fa58Smacallan				 * non-memory ops can only have counts up to 16
3784261fa58Smacallan				 */
3794261fa58Smacallan				if (num <= 16) {
3804261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
3814261fa58Smacallan					    SX_ROP(10, 42, 74, num - 1));
3824261fa58Smacallan				} else {
3834261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
3844261fa58Smacallan					    SX_ROP(10, 42, 74, 15));
3854261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
3864261fa58Smacallan					    SX_ROP(10, 58, 90, num - 17));
3874261fa58Smacallan				}
3884261fa58Smacallan				/* and write the result back into memory */
3894261fa58Smacallan				write_sx_io(p, ptr,
3904261fa58Smacallan				    SX_ST(74, num - 1, ptr & 7));
3914261fa58Smacallan				x += 32;
3924261fa58Smacallan			}
3934261fa58Smacallan			start += pitch;
3944261fa58Smacallan		}
3954261fa58Smacallan	}
3964261fa58Smacallan}
3974261fa58Smacallan
3984261fa58Smacallanstatic void
3994261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
4004261fa58Smacallan{
4014261fa58Smacallan	int line, x, num, off;
4024261fa58Smacallan	uint32_t ptr;
4034261fa58Smacallan
4044261fa58Smacallan	ENTER;
4054261fa58Smacallan	off = start & 7;
4064261fa58Smacallan	start &= ~7;
4074261fa58Smacallan
4084261fa58Smacallan	if (p->last_rop == 0xcc) {
4094261fa58Smacallan		/* simple fill */
4104261fa58Smacallan		for (line = 0; line < h; line++) {
4114261fa58Smacallan			x = 0;
4124261fa58Smacallan			while (x < w) {
4134261fa58Smacallan				ptr = start + x;
4144261fa58Smacallan				num = min(32, w - x);
4154261fa58Smacallan				write_sx_io(p, ptr,
4164261fa58Smacallan				    SX_STBS(8, num - 1, off));
4174261fa58Smacallan				x += 32;
4184261fa58Smacallan			}
4194261fa58Smacallan			start += pitch;
4204261fa58Smacallan		}
4214261fa58Smacallan	} else if (p->last_rop == 0xaa) {
4224261fa58Smacallan		/* nothing to do here */
4234261fa58Smacallan		return;
4244261fa58Smacallan	} else {
4254261fa58Smacallan		/* alright, let's do actual ROP stuff */
4264261fa58Smacallan
4274261fa58Smacallan		/* first repeat the fill colour into 16 registers */
4284261fa58Smacallan		write_sx_reg(p, SX_INSTRUCTIONS,
4294261fa58Smacallan		    SX_SELECT_S(8, 8, 10, 15));
4304261fa58Smacallan
4314261fa58Smacallan		for (line = 0; line < h; line++) {
4324261fa58Smacallan			x = 0;
4334261fa58Smacallan			while (x < w) {
4344261fa58Smacallan				ptr = start + x;
4354261fa58Smacallan				num = min(32, w - x);
4364261fa58Smacallan				/* now suck fb data into registers */
4374261fa58Smacallan				write_sx_io(p, ptr,
4384261fa58Smacallan				    SX_LDB(42, num - 1, off));
4394261fa58Smacallan				/*
4404261fa58Smacallan				 * ROP them with the fill data we left in 10
4414261fa58Smacallan				 * non-memory ops can only have counts up to 16
4424261fa58Smacallan				 */
4434261fa58Smacallan				if (num <= 16) {
4444261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
4454261fa58Smacallan					    SX_ROP(10, 42, 74, num - 1));
4464261fa58Smacallan				} else {
4474261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
4484261fa58Smacallan					    SX_ROP(10, 42, 74, 15));
4494261fa58Smacallan					write_sx_reg(p, SX_INSTRUCTIONS,
4504261fa58Smacallan					    SX_ROP(10, 58, 90, num - 17));
4514261fa58Smacallan				}
4524261fa58Smacallan				/* and write the result back into memory */
4534261fa58Smacallan				write_sx_io(p, ptr,
4544261fa58Smacallan				    SX_STB(74, num - 1, off));
4554261fa58Smacallan				x += 32;
4564261fa58Smacallan			}
4574261fa58Smacallan			start += pitch;
4584261fa58Smacallan		}
4594261fa58Smacallan	}
4604261fa58Smacallan}
4614261fa58Smacallan
4624261fa58Smacallanstatic void
4634261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
4644261fa58Smacallan{
4654261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
4664261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
4674261fa58Smacallan	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
4684261fa58Smacallan	int start, depth;
4694261fa58Smacallan
4704261fa58Smacallan	ENTER;
4714261fa58Smacallan	dstpitch = exaGetPixmapPitch(pPixmap);
4724261fa58Smacallan	dstoff = exaGetPixmapOffset(pPixmap);
4734261fa58Smacallan
4744261fa58Smacallan	depth = pPixmap->drawable.bitsPerPixel;
4754261fa58Smacallan	switch (depth) {
4764261fa58Smacallan		case 32:
4774261fa58Smacallan			start = dstoff + (y1 * dstpitch) + (x1 << 2);
4784261fa58Smacallan			CG14Solid32(p, start, dstpitch, w, h);
4794261fa58Smacallan			break;
4804261fa58Smacallan		case 8:
4814261fa58Smacallan			start = dstoff + (y1 * dstpitch) + x1;
4824261fa58Smacallan			CG14Solid8(p, start, dstpitch, w, h);
4834261fa58Smacallan			break;
4844261fa58Smacallan	}
4854261fa58Smacallan
4864261fa58Smacallan	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
4874261fa58Smacallan	    dstpitch, dstoff, start);
4884261fa58Smacallan	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
4894261fa58Smacallan	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
4904261fa58Smacallan	exaMarkSync(pPixmap->drawable.pScreen);
4914261fa58Smacallan}
4924261fa58Smacallan
4934261fa58Smacallan/*
4944261fa58Smacallan * Memcpy-based UTS.
4954261fa58Smacallan */
4964261fa58Smacallanstatic Bool
4974261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
4984261fa58Smacallan    char *src, int src_pitch)
4994261fa58Smacallan{
5004261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
5014261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
5024261fa58Smacallan	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
5034261fa58Smacallan	int    dst_pitch  = exaGetPixmapPitch(pDst);
5044261fa58Smacallan
5054261fa58Smacallan	int bpp    = pDst->drawable.bitsPerPixel;
5064261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
5074261fa58Smacallan	int wBytes = w * cpp;
5084261fa58Smacallan
5094261fa58Smacallan	ENTER;
5104261fa58Smacallan	dst += (x * cpp) + (y * dst_pitch);
5114261fa58Smacallan
5124261fa58Smacallan	CG14Wait(p);
5134261fa58Smacallan
5144261fa58Smacallan	while (h--) {
5154261fa58Smacallan		memcpy(dst, src, wBytes);
5164261fa58Smacallan		src += src_pitch;
5174261fa58Smacallan		dst += dst_pitch;
5184261fa58Smacallan	}
5194261fa58Smacallan	__asm("stbar;");
5204261fa58Smacallan	return TRUE;
5214261fa58Smacallan}
5224261fa58Smacallan
5234261fa58Smacallan/*
5244261fa58Smacallan * Memcpy-based DFS.
5254261fa58Smacallan */
5264261fa58Smacallanstatic Bool
5274261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
5284261fa58Smacallan    char *dst, int dst_pitch)
5294261fa58Smacallan{
5304261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
5314261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
5324261fa58Smacallan	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
5334261fa58Smacallan	int    src_pitch  = exaGetPixmapPitch(pSrc);
5344261fa58Smacallan
5354261fa58Smacallan	ENTER;
5364261fa58Smacallan	int bpp    = pSrc->drawable.bitsPerPixel;
5374261fa58Smacallan	int cpp    = (bpp + 7) >> 3;
5384261fa58Smacallan	int wBytes = w * cpp;
5394261fa58Smacallan
5404261fa58Smacallan	src += (x * cpp) + (y * src_pitch);
5414261fa58Smacallan
5424261fa58Smacallan	CG14Wait(p);
5434261fa58Smacallan
5444261fa58Smacallan	while (h--) {
5454261fa58Smacallan		memcpy(dst, src, wBytes);
5464261fa58Smacallan		src += src_pitch;
5474261fa58Smacallan		dst += dst_pitch;
5484261fa58Smacallan	}
5494261fa58Smacallan
5504261fa58Smacallan	return TRUE;
5514261fa58Smacallan}
5524261fa58Smacallan
5534261fa58SmacallanBool
5544261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture,
5554261fa58Smacallan                           PicturePtr pMaskPicture,
5564261fa58Smacallan                           PicturePtr pDstPicture)
5574261fa58Smacallan{
5584261fa58Smacallan	int i, ok = FALSE;
5594261fa58Smacallan
5604261fa58Smacallan	ENTER;
5614261fa58Smacallan
5624261fa58Smacallan	/*
5634261fa58Smacallan	 * SX is in theory capable of accelerating pretty much all Xrender ops,
5644261fa58Smacallan	 * even coordinate transformation and gradients. Support will be added
5654261fa58Smacallan	 * over time and likely have to spill over into its own source file.
5664261fa58Smacallan	 */
5674261fa58Smacallan
5684261fa58Smacallan	if ((op != PictOpOver) && (op != PictOpAdd)) {
5694261fa58Smacallan		xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op);
5704261fa58Smacallan		return FALSE;
5714261fa58Smacallan	}
5724261fa58Smacallan	i = 0;
5734261fa58Smacallan	while ((i < arraysize(src_formats)) && (!ok)) {
5744261fa58Smacallan		ok =  (pSrcPicture->format == src_formats[i]);
5754261fa58Smacallan		i++;
5764261fa58Smacallan	}
5774261fa58Smacallan
5784261fa58Smacallan	if (!ok) {
5794261fa58Smacallan		xf86Msg(X_ERROR, "%s: unsupported src format %x\n",
5804261fa58Smacallan		    __func__, pSrcPicture->format);
5814261fa58Smacallan		return FALSE;
5824261fa58Smacallan	}
5834261fa58Smacallan
5844261fa58Smacallan	DPRINTF(X_ERROR, "src is %x %d %d\n", pSrcPicture->format,
5854261fa58Smacallan	    pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
5864261fa58Smacallan
5874261fa58Smacallan	if (pMaskPicture != NULL) {
5884261fa58Smacallan		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
5894261fa58Smacallan		    pMaskPicture->pDrawable->width,
5904261fa58Smacallan		    pMaskPicture->pDrawable->height);
5914261fa58Smacallan	}
5924261fa58Smacallan	return TRUE;
5934261fa58Smacallan}
5944261fa58Smacallan
5954261fa58SmacallanBool
5964261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture,
5974261fa58Smacallan                             PicturePtr pMaskPicture,
5984261fa58Smacallan                             PicturePtr pDstPicture,
5994261fa58Smacallan                             PixmapPtr  pSrc,
6004261fa58Smacallan                             PixmapPtr  pMask,
6014261fa58Smacallan                             PixmapPtr  pDst)
6024261fa58Smacallan{
6034261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
6044261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
6054261fa58Smacallan
6064261fa58Smacallan	ENTER;
6074261fa58Smacallan
6084261fa58Smacallan	if (pSrcPicture->pSourcePict != NULL) {
6094261fa58Smacallan		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
6104261fa58Smacallan			p->fillcolour =
6114261fa58Smacallan			    pSrcPicture->pSourcePict->solidFill.color;
6124261fa58Smacallan			DPRINTF(X_ERROR, "%s: solid src %08x\n",
6134261fa58Smacallan			    __func__, p->fillcolour);
6144261fa58Smacallan		}
6154261fa58Smacallan	}
6164261fa58Smacallan	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
6174261fa58Smacallan		if (pMaskPicture->pSourcePict->type ==
6184261fa58Smacallan		    SourcePictTypeSolidFill) {
6194261fa58Smacallan			p->fillcolour =
6204261fa58Smacallan			   pMaskPicture->pSourcePict->solidFill.color;
6214261fa58Smacallan			DPRINTF(X_ERROR, "%s: solid mask %08x\n",
6224261fa58Smacallan			    __func__, p->fillcolour);
6234261fa58Smacallan		}
6244261fa58Smacallan	}
6254261fa58Smacallan	if (pMaskPicture != NULL) {
6264261fa58Smacallan		p->mskoff = exaGetPixmapOffset(pMask);
6274261fa58Smacallan		p->mskpitch = exaGetPixmapPitch(pMask);
6284261fa58Smacallan		p->mskformat = pMaskPicture->format;
6294261fa58Smacallan	}
6304261fa58Smacallan	p->srcoff = exaGetPixmapOffset(pSrc);
6314261fa58Smacallan	p->srcpitch = exaGetPixmapPitch(pSrc);
6324261fa58Smacallan	p->srcformat = pSrcPicture->format;
6334261fa58Smacallan	p->dstformat = pDstPicture->format;
6344261fa58Smacallan	p->op = op;
6354261fa58Smacallan#ifdef SX_DEBUG
6364261fa58Smacallan	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
6374261fa58Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
6384261fa58Smacallan#endif
6394261fa58Smacallan	return TRUE;
6404261fa58Smacallan}
6414261fa58Smacallan
6424261fa58Smacallanvoid CG14Comp_Over32(Cg14Ptr p,
6434261fa58Smacallan                   uint32_t src, uint32_t srcpitch,
6444261fa58Smacallan                   uint32_t dst, uint32_t dstpitch,
6454261fa58Smacallan                   int width, int height)
6464261fa58Smacallan{
6474261fa58Smacallan	uint32_t msk = src, mskx, dstx, m;
6484261fa58Smacallan	int line, x, i;
6494261fa58Smacallan
6504261fa58Smacallan	ENTER;
6514261fa58Smacallan	/* first get the source colour */
6524261fa58Smacallan	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
6534261fa58Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
6544261fa58Smacallan	for (line = 0; line < height; line++) {
6554261fa58Smacallan		mskx = msk;
6564261fa58Smacallan		dstx = dst;
6574261fa58Smacallan#ifdef SX_SINGLE
6584261fa58Smacallan
6594261fa58Smacallan		for (x = 0; x < width; x++) {
6604261fa58Smacallan			m = *(volatile uint32_t *)(p->fb + mskx);
6614261fa58Smacallan			m = m >> 24;
6624261fa58Smacallan			if (m == 0) {
6634261fa58Smacallan				/* nothing to do - all transparent */
6644261fa58Smacallan			} else if (m == 0xff) {
6654261fa58Smacallan				/* all opaque */
6664261fa58Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
6674261fa58Smacallan			} else {
6684261fa58Smacallan				/* fetch alpha value, stick it into scam */
6694261fa58Smacallan				/* mask is in R[12:15] */
6704261fa58Smacallan				/*write_sx_io(p, mskx,
6714261fa58Smacallan				    SX_LDUQ0(12, 0, mskx & 7));*/
6724261fa58Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
6734261fa58Smacallan				/* fetch dst pixel */
6744261fa58Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
6754261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
6764261fa58Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
6774261fa58Smacallan				/*
6784261fa58Smacallan				 * src * alpha + R0
6794261fa58Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
6804261fa58Smacallan				 */
6814261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
6824261fa58Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
6834261fa58Smacallan
6844261fa58Smacallan				/* invert SCAM */
6854261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
6864261fa58Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
6874261fa58Smacallan#ifdef SX_DEBUG
6884261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
6894261fa58Smacallan				    SX_XORV(12, 8, 13, 0));
6904261fa58Smacallan#endif
6914261fa58Smacallan				/* dst * (1 - alpha) + R[13:15] */
6924261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
6934261fa58Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
6944261fa58Smacallan				write_sx_io(p, dstx,
6954261fa58Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
6964261fa58Smacallan			}
6974261fa58Smacallan			dstx += 4;
6984261fa58Smacallan			mskx += 4;
6994261fa58Smacallan		}
7004261fa58Smacallan#else
7014261fa58Smacallan		for (x = 0; x < width; x += 4) {
7024261fa58Smacallan			/* fetch 4 mask values */
7034261fa58Smacallan			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
7044261fa58Smacallan			/* fetch destination pixels */
7054261fa58Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
7064261fa58Smacallan			/* duplicate them for all channels */
7074261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
7084261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
7094261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
7104261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
7114261fa58Smacallan			/* generate inverted alpha */
7124261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7134261fa58Smacallan			    SX_XORS(12, 8, 28, 15));
7144261fa58Smacallan			/* multiply source */
7154261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7164261fa58Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
7174261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7184261fa58Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
7194261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7204261fa58Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
7214261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7224261fa58Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
7234261fa58Smacallan			/* multiply dest */
7244261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7254261fa58Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
7264261fa58Smacallan			/* add up */
7274261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
7284261fa58Smacallan			    SX_ADDV(44, 76, 92, 15));
7294261fa58Smacallan			/* write back */
7304261fa58Smacallan			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
7314261fa58Smacallan			dstx += 16;
7324261fa58Smacallan			mskx += 16;
7334261fa58Smacallan		}
7344261fa58Smacallan#endif
7354261fa58Smacallan		dst += dstpitch;
7364261fa58Smacallan		msk += srcpitch;
7374261fa58Smacallan	}
7384261fa58Smacallan}
7394261fa58Smacallan
7404261fa58Smacallanvoid CG14Comp_Over8(Cg14Ptr p,
7414261fa58Smacallan                   uint32_t src, uint32_t srcpitch,
7424261fa58Smacallan                   uint32_t dst, uint32_t dstpitch,
7434261fa58Smacallan                   int width, int height)
7444261fa58Smacallan{
7454261fa58Smacallan	uint32_t msk = src, mskx, dstx, m;
7464261fa58Smacallan	int line, x, i;
7474261fa58Smacallan#ifdef SX_DEBUG
7484261fa58Smacallan	char buffer[256];
7494261fa58Smacallan#endif
7504261fa58Smacallan	ENTER;
7514261fa58Smacallan
7524261fa58Smacallan	/* first get the source colour */
7534261fa58Smacallan	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
7544261fa58Smacallan	write_sx_reg(p, SX_QUEUED(8), 0xff);
7554261fa58Smacallan	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
7564261fa58Smacallan	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
7574261fa58Smacallan	    *(uint32_t *)(p->fb + p->srcoff));
7584261fa58Smacallan	for (line = 0; line < height; line++) {
7594261fa58Smacallan		mskx = msk;
7604261fa58Smacallan		dstx = dst;
7614261fa58Smacallan#ifdef SX_SINGLE
7624261fa58Smacallan
7634261fa58Smacallan		for (x = 0; x < width; x++) {
7644261fa58Smacallan			m = *(volatile uint8_t *)(p->fb + mskx);
7654261fa58Smacallan#ifdef SX_DEBUG
7664261fa58Smacallan			buffer[x] = c[m >> 5];
7674261fa58Smacallan#endif
7684261fa58Smacallan			if (m == 0) {
7694261fa58Smacallan				/* nothing to do - all transparent */
7704261fa58Smacallan			} else if (m == 0xff) {
7714261fa58Smacallan				/* all opaque */
7724261fa58Smacallan				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
7734261fa58Smacallan			} else {
7744261fa58Smacallan				/* fetch alpha value, stick it into scam */
7754261fa58Smacallan				/* mask is in R[12:15] */
7764261fa58Smacallan				/*write_sx_io(p, mskx & ~7,
7774261fa58Smacallan				    SX_LDB(12, 0, mskx & 7));*/
7784261fa58Smacallan				write_sx_reg(p, SX_QUEUED(12), m);
7794261fa58Smacallan				/* fetch dst pixel */
7804261fa58Smacallan				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
7814261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
7824261fa58Smacallan				    SX_ORV(12, 0, R_SCAM, 0));
7834261fa58Smacallan				/*
7844261fa58Smacallan				 * src * alpha + R0
7854261fa58Smacallan				 * R[9:11] * SCAM + R0 -> R[17:19]
7864261fa58Smacallan				 */
7874261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
7884261fa58Smacallan				    SX_SAXP16X16SR8(9, 0, 17, 2));
7894261fa58Smacallan
7904261fa58Smacallan				/* invert SCAM */
7914261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
7924261fa58Smacallan				    SX_XORV(12, 8, R_SCAM, 0));
7934261fa58Smacallan#ifdef SX_DEBUG
7944261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
7954261fa58Smacallan				    SX_XORV(12, 8, 13, 0));
7964261fa58Smacallan#endif
7974261fa58Smacallan				/* dst * (1 - alpha) + R[13:15] */
7984261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
7994261fa58Smacallan				    SX_SAXP16X16SR8(21, 17, 25, 2));
8004261fa58Smacallan				write_sx_io(p, dstx,
8014261fa58Smacallan				    SX_STUQ0C(24, 0, dstx & 7));
8024261fa58Smacallan			}
8034261fa58Smacallan			dstx += 4;
8044261fa58Smacallan			mskx += 1;
8054261fa58Smacallan		}
8064261fa58Smacallan#ifdef SX_DEBUG
8074261fa58Smacallan		buffer[x] = 0;
8084261fa58Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
8094261fa58Smacallan#endif
8104261fa58Smacallan#else
8114261fa58Smacallan		for (x = 0; x < width; x += 4) {
8124261fa58Smacallan			/* fetch 4 mask values */
8134261fa58Smacallan			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
8144261fa58Smacallan			/* fetch destination pixels */
8154261fa58Smacallan			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
8164261fa58Smacallan			/* duplicate them for all channels */
8174261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
8184261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
8194261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
8204261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
8214261fa58Smacallan			/* generate inverted alpha */
8224261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8234261fa58Smacallan			    SX_XORS(12, 8, 28, 15));
8244261fa58Smacallan			/* multiply source */
8254261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8264261fa58Smacallan			    SX_MUL16X16SR8(8, 12, 44, 3));
8274261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8284261fa58Smacallan			    SX_MUL16X16SR8(8, 16, 48, 3));
8294261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8304261fa58Smacallan			    SX_MUL16X16SR8(8, 20, 52, 3));
8314261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8324261fa58Smacallan			    SX_MUL16X16SR8(8, 24, 56, 3));
8334261fa58Smacallan			/* multiply dest */
8344261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8354261fa58Smacallan			    SX_MUL16X16SR8(28, 60, 76, 15));
8364261fa58Smacallan			/* add up */
8374261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8384261fa58Smacallan			    SX_ADDV(44, 76, 92, 15));
8394261fa58Smacallan			/* write back */
8404261fa58Smacallan			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
8414261fa58Smacallan			dstx += 16;
8424261fa58Smacallan			mskx += 4;
8434261fa58Smacallan		}
8444261fa58Smacallan#endif
8454261fa58Smacallan		dst += dstpitch;
8464261fa58Smacallan		msk += srcpitch;
8474261fa58Smacallan	}
8484261fa58Smacallan}
8494261fa58Smacallan
8504261fa58Smacallanvoid CG14Comp_Add32(Cg14Ptr p,
8514261fa58Smacallan                   uint32_t src, uint32_t srcpitch,
8524261fa58Smacallan                   uint32_t dst, uint32_t dstpitch,
8534261fa58Smacallan                   int width, int height)
8544261fa58Smacallan{
8554261fa58Smacallan	int line;
8564261fa58Smacallan	uint32_t srcx, dstx;
8574261fa58Smacallan	int full, part, x;
8584261fa58Smacallan
8594261fa58Smacallan	ENTER;
8604261fa58Smacallan	full = width >> 3;	/* chunks of 8 */
8614261fa58Smacallan	part = width & 7;	/* leftovers */
8624261fa58Smacallan	/* we do this up to 8 pixels at a time */
8634261fa58Smacallan	for (line = 0; line < height; line++) {
8644261fa58Smacallan		srcx = src;
8654261fa58Smacallan		dstx = dst;
8664261fa58Smacallan		for (x = 0; x < full; x++) {
8674261fa58Smacallan			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
8684261fa58Smacallan			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
8694261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8704261fa58Smacallan			    SX_ADDV(8, 40, 72, 15));
8714261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8724261fa58Smacallan			    SX_ADDV(24, 56, 88, 15));
8734261fa58Smacallan			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
8744261fa58Smacallan			srcx += 128;
8754261fa58Smacallan			dstx += 128;
8764261fa58Smacallan		}
8774261fa58Smacallan
8784261fa58Smacallan		/* do leftovers */
8794261fa58Smacallan		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
8804261fa58Smacallan		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
8814261fa58Smacallan		if (part & 16) {
8824261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8834261fa58Smacallan			    SX_ADDV(8, 40, 72, 15));
8844261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8854261fa58Smacallan			    SX_ADDV(24, 56, 88, part - 17));
8864261fa58Smacallan		} else {
8874261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
8884261fa58Smacallan			    SX_ADDV(8, 40, 72, part - 1));
8894261fa58Smacallan		}
8904261fa58Smacallan		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
8914261fa58Smacallan
8924261fa58Smacallan		/* next line */
8934261fa58Smacallan		src += srcpitch;
8944261fa58Smacallan		dst += dstpitch;
8954261fa58Smacallan	}
8964261fa58Smacallan}
8974261fa58Smacallan
8984261fa58Smacallanvoid CG14Comp_Add8(Cg14Ptr p,
8994261fa58Smacallan                   uint32_t src, uint32_t srcpitch,
9004261fa58Smacallan                   uint32_t dst, uint32_t dstpitch,
9014261fa58Smacallan                   int width, int height)
9024261fa58Smacallan{
9034261fa58Smacallan	int line;
9044261fa58Smacallan	uint32_t srcx, dstx, srcoff, dstoff;
9054261fa58Smacallan	int pre, full, part, x;
9064261fa58Smacallan	uint8_t *d;
9074261fa58Smacallan	char buffer[256];
9084261fa58Smacallan	ENTER;
9094261fa58Smacallan
9104261fa58Smacallan	srcoff = src & 7;
9114261fa58Smacallan	src &= ~7;
9124261fa58Smacallan	dstoff = dst & 7;
9134261fa58Smacallan	dst &= ~7;
9144261fa58Smacallan	full = width >> 5;	/* chunks of 32 */
9154261fa58Smacallan	part = width & 31;	/* leftovers */
9164261fa58Smacallan
9174261fa58Smacallan#ifdef SX_DEBUG
9184261fa58Smacallan	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
9194261fa58Smacallan	    width, height, full, part);
9204261fa58Smacallan#endif
9214261fa58Smacallan	/* we do this up to 32 pixels at a time */
9224261fa58Smacallan	for (line = 0; line < height; line++) {
9234261fa58Smacallan		srcx = src;
9244261fa58Smacallan		dstx = dst;
9254261fa58Smacallan#ifdef SX_ADD_SOFTWARE
9264261fa58Smacallan		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff);
9274261fa58Smacallan		d = (uint8_t *)(p->fb + dstx + dstoff);
9284261fa58Smacallan		for (x = 0; x < width; x++) {
9294261fa58Smacallan			d[x] = min(255, s[x] + d[x]);
9304261fa58Smacallan		}
9314261fa58Smacallan#else
9324261fa58Smacallan		for (x = 0; x < full; x++) {
9334261fa58Smacallan			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
9344261fa58Smacallan			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
9354261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
9364261fa58Smacallan			    SX_ADDV(8, 40, 72, 15));
9374261fa58Smacallan			write_sx_reg(p, SX_INSTRUCTIONS,
9384261fa58Smacallan			    SX_ADDV(24, 56, 88, 15));
9394261fa58Smacallan			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
9404261fa58Smacallan			srcx += 32;
9414261fa58Smacallan			dstx += 32;
9424261fa58Smacallan		}
9434261fa58Smacallan
9444261fa58Smacallan		if (part > 0) {
9454261fa58Smacallan			/* do leftovers */
9464261fa58Smacallan			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
9474261fa58Smacallan			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
9484261fa58Smacallan			if (part > 16) {
9494261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
9504261fa58Smacallan				    SX_ADDV(8, 40, 72, 15));
9514261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
9524261fa58Smacallan				    SX_ADDV(24, 56, 88, part - 17));
9534261fa58Smacallan			} else {
9544261fa58Smacallan				write_sx_reg(p, SX_INSTRUCTIONS,
9554261fa58Smacallan				    SX_ADDV(8, 40, 72, part - 1));
9564261fa58Smacallan			}
9574261fa58Smacallan			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
9584261fa58Smacallan		}
9594261fa58Smacallan#endif
9604261fa58Smacallan#ifdef SX_DEBUG
9614261fa58Smacallan		d = (uint8_t *)(p->fb + src + srcoff);
9624261fa58Smacallan		for (x = 0; x < width; x++) {
9634261fa58Smacallan			buffer[x] = c[d[x]>>5];
9644261fa58Smacallan		}
9654261fa58Smacallan		buffer[x] = 0;
9664261fa58Smacallan		xf86Msg(X_ERROR, "%s\n", buffer);
9674261fa58Smacallan#endif
9684261fa58Smacallan		/* next line */
9694261fa58Smacallan		src += srcpitch;
9704261fa58Smacallan		dst += dstpitch;
9714261fa58Smacallan	}
9724261fa58Smacallan}
9734261fa58Smacallan
9744261fa58Smacallanvoid
9754261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY,
9764261fa58Smacallan                              int maskX, int maskY,
9774261fa58Smacallan                              int dstX, int dstY,
9784261fa58Smacallan                              int width, int height)
9794261fa58Smacallan{
9804261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
9814261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
9824261fa58Smacallan	uint32_t dstoff, dstpitch;
9834261fa58Smacallan	uint32_t dst, msk, src;
9844261fa58Smacallan
9854261fa58Smacallan	ENTER;
9864261fa58Smacallan	dstoff = exaGetPixmapOffset(pDst);
9874261fa58Smacallan	dstpitch = exaGetPixmapPitch(pDst);
9884261fa58Smacallan
9894261fa58Smacallan	switch (p->op) {
9904261fa58Smacallan		case PictOpOver:
9914261fa58Smacallan			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
9924261fa58Smacallan			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
9934261fa58Smacallan			    p->mskformat, p->dstformat, srcX, srcY);
9944261fa58Smacallan			switch (p->mskformat) {
9954261fa58Smacallan				case PICT_a8:
9964261fa58Smacallan					msk = p->mskoff +
9974261fa58Smacallan					    (maskY * p->mskpitch) + maskX;
9984261fa58Smacallan					CG14Comp_Over8(p, msk, p->mskpitch,
9994261fa58Smacallan					    dst, dstpitch, width, height);
10004261fa58Smacallan					break;
10014261fa58Smacallan				case PICT_a8r8g8b8:
10024261fa58Smacallan				case PICT_a8b8g8r8:
10034261fa58Smacallan					msk = p->mskoff +
10044261fa58Smacallan					    (maskY * p->mskpitch) +
10054261fa58Smacallan					    (maskX << 2);
10064261fa58Smacallan					CG14Comp_Over32(p, msk, p->mskpitch,
10074261fa58Smacallan					    dst, dstpitch, width, height);
10084261fa58Smacallan					break;
10094261fa58Smacallan				default:
10104261fa58Smacallan					xf86Msg(X_ERROR,
10114261fa58Smacallan					    "unsupported mask format\n");
10124261fa58Smacallan			}
10134261fa58Smacallan			break;
10144261fa58Smacallan		case PictOpAdd:
10154261fa58Smacallan			DPRINTF(X_ERROR, "Add %08x %08x\n",
10164261fa58Smacallan			    p->srcformat, p->dstformat);
10174261fa58Smacallan			switch (p->srcformat) {
10184261fa58Smacallan				case PICT_a8:
10194261fa58Smacallan					src = p->srcoff +
10204261fa58Smacallan					    (srcY * p->srcpitch) + srcX;
10214261fa58Smacallan					dst = dstoff + (dstY * dstpitch) + dstX;
10224261fa58Smacallan					CG14Comp_Add8(p, src, p->srcpitch,
10234261fa58Smacallan					    dst, dstpitch, width, height);
10244261fa58Smacallan					break;
10254261fa58Smacallan				case PICT_a8r8g8b8:
10264261fa58Smacallan				case PICT_x8r8g8b8:
10274261fa58Smacallan					src = p->srcoff +
10284261fa58Smacallan					    (srcY * p->srcpitch) + (srcX << 2);
10294261fa58Smacallan					dst = dstoff + (dstY * dstpitch) +
10304261fa58Smacallan					    (dstX << 2);
10314261fa58Smacallan					CG14Comp_Add32(p, src, p->srcpitch,
10324261fa58Smacallan					    dst, dstpitch, width, height);
10334261fa58Smacallan					break;
10344261fa58Smacallan				default:
10354261fa58Smacallan					xf86Msg(X_ERROR,
10364261fa58Smacallan					    "unsupported src format\n");
10374261fa58Smacallan			}
10384261fa58Smacallan			break;
10394261fa58Smacallan		default:
10404261fa58Smacallan			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
10414261fa58Smacallan	}
10424261fa58Smacallan	exaMarkSync(pDst->drawable.pScreen);
10434261fa58Smacallan}
10444261fa58Smacallan
10454261fa58Smacallan
10464261fa58Smacallan
10474261fa58SmacallanBool
10484261fa58SmacallanCG14InitAccel(ScreenPtr pScreen)
10494261fa58Smacallan{
10504261fa58Smacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
10514261fa58Smacallan	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
10524261fa58Smacallan	ExaDriverPtr pExa;
10534261fa58Smacallan
10544261fa58Smacallan	pExa = exaDriverAlloc();
10554261fa58Smacallan	if (!pExa)
10564261fa58Smacallan		return FALSE;
10574261fa58Smacallan
10584261fa58Smacallan	p->pExa = pExa;
10594261fa58Smacallan
10604261fa58Smacallan	pExa->exa_major = EXA_VERSION_MAJOR;
10614261fa58Smacallan	pExa->exa_minor = EXA_VERSION_MINOR;
10624261fa58Smacallan
10634261fa58Smacallan	pExa->memoryBase = p->fb;
10644261fa58Smacallan	pExa->memorySize = p->memsize;
10654261fa58Smacallan	pExa->offScreenBase = p->width * p->height * 4;
10664261fa58Smacallan
10674261fa58Smacallan	/*
10684261fa58Smacallan	 * SX memory instructions are written to 64bit aligned addresses with
10694261fa58Smacallan	 * a 3 bit displacement. Make sure the displacement remains constant
10704261fa58Smacallan	 * within one column
10714261fa58Smacallan	 */
10724261fa58Smacallan
10734261fa58Smacallan	pExa->pixmapOffsetAlign = 8;
10744261fa58Smacallan	pExa->pixmapPitchAlign = 8;
10754261fa58Smacallan
10764261fa58Smacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS |
10774261fa58Smacallan		      /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/
10784261fa58Smacallan		      EXA_MIXED_PIXMAPS;
10794261fa58Smacallan
10804261fa58Smacallan	/*
10814261fa58Smacallan	 * these limits are bogus
10824261fa58Smacallan	 * SX doesn't deal with coordinates at all, so there is no limit but
10834261fa58Smacallan	 * we have to put something here
10844261fa58Smacallan	 */
10854261fa58Smacallan	pExa->maxX = 4096;
10864261fa58Smacallan	pExa->maxY = 4096;
10874261fa58Smacallan
10884261fa58Smacallan	pExa->WaitMarker = CG14WaitMarker;
10894261fa58Smacallan
10904261fa58Smacallan	pExa->PrepareSolid = CG14PrepareSolid;
10914261fa58Smacallan	pExa->Solid = CG14Solid;
10924261fa58Smacallan	pExa->DoneSolid = CG14DoneCopy;
10934261fa58Smacallan	pExa->PrepareCopy = CG14PrepareCopy;
10944261fa58Smacallan	pExa->Copy = CG14Copy;
10954261fa58Smacallan	pExa->DoneCopy = CG14DoneCopy;
10964261fa58Smacallan	if (p->use_xrender) {
10974261fa58Smacallan		pExa->CheckComposite = CG14CheckComposite;
10984261fa58Smacallan		pExa->PrepareComposite = CG14PrepareComposite;
10994261fa58Smacallan		pExa->Composite = CG14Composite;
11004261fa58Smacallan		pExa->DoneComposite = CG14DoneCopy;
11014261fa58Smacallan	}
11024261fa58Smacallan
11034261fa58Smacallan	/* EXA hits more optimized paths when it does not have to fallback
11044261fa58Smacallan	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
11054261fa58Smacallan	 */
11064261fa58Smacallan	pExa->UploadToScreen = CG14UploadToScreen;
11074261fa58Smacallan	pExa->DownloadFromScreen = CG14DownloadFromScreen;
11084261fa58Smacallan
11094261fa58Smacallan	/* do some hardware init */
11104261fa58Smacallan	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
11114261fa58Smacallan	p->last_mask = 0xffffffff;
11124261fa58Smacallan	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
11134261fa58Smacallan	p->last_rop = 0xcc;
11144261fa58Smacallan	return exaDriverInit(pScreen, pExa);
11154261fa58Smacallan}
1116