cg14_accel.c revision 86527ef6
186527ef6Smacallan/* $NetBSD: cg14_accel.c,v 1.25 2021/12/10 21:57:13 macallan Exp $ */ 24261fa58Smacallan/* 34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz 44261fa58Smacallan * All rights reserved. 54261fa58Smacallan * 64261fa58Smacallan * Redistribution and use in source and binary forms, with or without 74261fa58Smacallan * modification, are permitted provided that the following conditions 84261fa58Smacallan * are met: 94261fa58Smacallan * 104261fa58Smacallan * - Redistributions of source code must retain the above copyright 114261fa58Smacallan * notice, this list of conditions and the following disclaimer. 124261fa58Smacallan * - Redistributions in binary form must reproduce the above 134261fa58Smacallan * copyright notice, this list of conditions and the following 144261fa58Smacallan * disclaimer in the documentation and/or other materials provided 154261fa58Smacallan * with the distribution. 164261fa58Smacallan * 174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE. 294261fa58Smacallan * 304261fa58Smacallan */ 31c88c16f8Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 364261fa58Smacallan#include <sys/types.h> 374261fa58Smacallan 384261fa58Smacallan/* all driver need this */ 394261fa58Smacallan#include "xf86.h" 404261fa58Smacallan#include "xf86_OSproc.h" 414261fa58Smacallan#include "compiler.h" 424261fa58Smacallan 434261fa58Smacallan#include "cg14.h" 444261fa58Smacallan 45b8ad197aSmacallan//#define SX_DEBUG 464261fa58Smacallan 474261fa58Smacallan#ifdef SX_DEBUG 484261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 494261fa58Smacallan#define DPRINTF xf86Msg 504261fa58Smacallan#else 514261fa58Smacallan#define ENTER 524261fa58Smacallan#define DPRINTF while (0) xf86Msg 534261fa58Smacallan#endif 544261fa58Smacallan 554261fa58Smacallan#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 564261fa58Smacallan 574261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */ 584261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 594261fa58Smacallan 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 604261fa58Smacallan 614261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 624261fa58Smacallan PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 634261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 644261fa58Smacallan 65f71acd79Smacallanstatic void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 66f71acd79Smacallanstatic void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 67f71acd79Smacallan 684261fa58Smacallanstatic inline void 694261fa58SmacallanCG14Wait(Cg14Ptr p) 704261fa58Smacallan{ 71fc473876Smacallan int bail = 10000000; 72fc473876Smacallan /* we wait for the busy bit to clear */ 73fc473876Smacallan while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 74fc473876Smacallan (bail > 0)) { 75fc473876Smacallan bail--; 76fc473876Smacallan }; 77fc473876Smacallan if (bail == 0) { 78fc473876Smacallan xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 79fc473876Smacallan read_sx_reg(p, SX_CONTROL_STATUS), 80fc473876Smacallan read_sx_reg(p, SX_ERROR)); 81fc473876Smacallan } 824261fa58Smacallan} 834261fa58Smacallan 844261fa58Smacallanstatic void 854261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker) 864261fa58Smacallan{ 874261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 884261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 894261fa58Smacallan 904261fa58Smacallan CG14Wait(p); 914261fa58Smacallan} 924261fa58Smacallan 934261fa58Smacallanstatic Bool 944261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 954261fa58Smacallan int xdir, int ydir, int alu, Pixel planemask) 964261fa58Smacallan{ 974261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 984261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 994261fa58Smacallan 1004261fa58Smacallan ENTER; 1018c65af2dSmacallan DPRINTF(X_ERROR, "%s bpp %d rop %x\n", __func__, 10281c68cf8Smacallan pSrcPixmap->drawable.bitsPerPixel, alu); 1034261fa58Smacallan 1044261fa58Smacallan if (planemask != p->last_mask) { 1054261fa58Smacallan CG14Wait(p); 1064261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 1074261fa58Smacallan p->last_mask = planemask; 1084261fa58Smacallan } 1094261fa58Smacallan alu = sx_rop[alu]; 1104261fa58Smacallan if (alu != p->last_rop) { 1114261fa58Smacallan CG14Wait(p); 1124261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 1134261fa58Smacallan p->last_rop = alu; 1144261fa58Smacallan } 115f71acd79Smacallan switch (pSrcPixmap->drawable.bitsPerPixel) { 116f71acd79Smacallan case 8: 117f71acd79Smacallan p->pExa->Copy = CG14Copy8; 118f71acd79Smacallan break; 119f71acd79Smacallan case 32: 120f71acd79Smacallan p->pExa->Copy = CG14Copy32; 121f71acd79Smacallan break; 122f71acd79Smacallan default: 123f71acd79Smacallan xf86Msg(X_ERROR, "%s depth %d\n", __func__, 124f71acd79Smacallan pSrcPixmap->drawable.bitsPerPixel); 125f71acd79Smacallan } 1264261fa58Smacallan p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 1274261fa58Smacallan p->srcoff = exaGetPixmapOffset(pSrcPixmap); 1284261fa58Smacallan p->xdir = xdir; 1294261fa58Smacallan p->ydir = ydir; 1304261fa58Smacallan return TRUE; 1314261fa58Smacallan} 1324261fa58Smacallan 1334261fa58Smacallanstatic void 134f71acd79SmacallanCG14Copy32(PixmapPtr pDstPixmap, 1354261fa58Smacallan int srcX, int srcY, int dstX, int dstY, int w, int h) 1364261fa58Smacallan{ 1374261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 1384261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1394261fa58Smacallan int dstpitch, dstoff, srcpitch, srcoff; 1404261fa58Smacallan int srcstart, dststart, xinc, srcinc, dstinc; 1414261fa58Smacallan int line, count, s, d, num; 1424261fa58Smacallan 1434261fa58Smacallan ENTER; 1444261fa58Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 1454261fa58Smacallan dstoff = exaGetPixmapOffset(pDstPixmap); 1464261fa58Smacallan srcpitch = p->srcpitch; 1474261fa58Smacallan srcoff = p->srcoff; 1484261fa58Smacallan /* 1494261fa58Smacallan * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 1504261fa58Smacallan * actually wrote anything and only sync if it did 1514261fa58Smacallan */ 1524261fa58Smacallan srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 1534261fa58Smacallan dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 1544261fa58Smacallan 1554261fa58Smacallan /* 1564261fa58Smacallan * we always copy up to 32 pixels at a time so direction doesn't 1574261fa58Smacallan * matter if w<=32 1584261fa58Smacallan */ 1594261fa58Smacallan if (w > 32) { 1604261fa58Smacallan if (p->xdir < 0) { 1614261fa58Smacallan srcstart += (w - 32) << 2; 1624261fa58Smacallan dststart += (w - 32) << 2; 1634261fa58Smacallan xinc = -128; 1644261fa58Smacallan } else 1654261fa58Smacallan xinc = 128; 1664261fa58Smacallan } else 1674261fa58Smacallan xinc = 128; 1684261fa58Smacallan if (p->ydir < 0) { 1694261fa58Smacallan srcstart += (h - 1) * srcpitch; 1704261fa58Smacallan dststart += (h - 1) * dstpitch; 1714261fa58Smacallan srcinc = -srcpitch; 1724261fa58Smacallan dstinc = -dstpitch; 1734261fa58Smacallan } else { 1744261fa58Smacallan srcinc = srcpitch; 1754261fa58Smacallan dstinc = dstpitch; 1764261fa58Smacallan } 1774261fa58Smacallan if (p->last_rop == 0xcc) { 1784261fa58Smacallan /* plain old copy */ 1794261fa58Smacallan if ( xinc > 0) { 1804261fa58Smacallan /* going left to right */ 1814261fa58Smacallan for (line = 0; line < h; line++) { 1824261fa58Smacallan count = 0; 1834261fa58Smacallan s = srcstart; 1844261fa58Smacallan d = dststart; 1854261fa58Smacallan while ( count < w) { 1864261fa58Smacallan num = min(32, w - count); 1874261fa58Smacallan write_sx_io(p, s, 1884261fa58Smacallan SX_LD(10, num - 1, s & 7)); 1894261fa58Smacallan write_sx_io(p, d, 1904261fa58Smacallan SX_STM(10, num - 1, d & 7)); 1914261fa58Smacallan s += xinc; 1924261fa58Smacallan d += xinc; 1934261fa58Smacallan count += 32; 1944261fa58Smacallan } 1954261fa58Smacallan srcstart += srcinc; 1964261fa58Smacallan dststart += dstinc; 1974261fa58Smacallan } 1984261fa58Smacallan } else { 1994261fa58Smacallan /* going right to left */ 2004261fa58Smacallan int i, chunks = (w >> 5); 2014261fa58Smacallan for (line = 0; line < h; line++) { 2024261fa58Smacallan s = srcstart; 2034261fa58Smacallan d = dststart; 2044261fa58Smacallan count = w; 2054261fa58Smacallan for (i = 0; i < chunks; i++) { 2064261fa58Smacallan write_sx_io(p, s, 2074261fa58Smacallan SX_LD(10, 31, s & 7)); 2084261fa58Smacallan write_sx_io(p, d, 2094261fa58Smacallan SX_STM(10, 31, d & 7)); 2104261fa58Smacallan s -= 128; 2114261fa58Smacallan d -= 128; 2124261fa58Smacallan count -= 32; 2134261fa58Smacallan } 2144261fa58Smacallan /* leftovers, if any */ 2154261fa58Smacallan if (count > 0) { 2164261fa58Smacallan s += (32 - count) << 2; 2174261fa58Smacallan d += (32 - count) << 2; 2184261fa58Smacallan write_sx_io(p, s, 2194261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2204261fa58Smacallan write_sx_io(p, d, 2214261fa58Smacallan SX_STM(10, count - 1, d & 7)); 2224261fa58Smacallan } 2234261fa58Smacallan srcstart += srcinc; 2244261fa58Smacallan dststart += dstinc; 2254261fa58Smacallan } 2264261fa58Smacallan } 2274261fa58Smacallan } else { 2284261fa58Smacallan /* ROPs needed */ 2294261fa58Smacallan if ( xinc > 0) { 2304261fa58Smacallan /* going left to right */ 2314261fa58Smacallan for (line = 0; line < h; line++) { 2324261fa58Smacallan count = 0; 2334261fa58Smacallan s = srcstart; 2344261fa58Smacallan d = dststart; 2354261fa58Smacallan while ( count < w) { 2364261fa58Smacallan num = min(32, w - count); 2374261fa58Smacallan write_sx_io(p, s, 2384261fa58Smacallan SX_LD(10, num - 1, s & 7)); 2394261fa58Smacallan write_sx_io(p, d, 2404261fa58Smacallan SX_LD(42, num - 1, d & 7)); 2414261fa58Smacallan if (num > 16) { 2424261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2434261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2444261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2454261fa58Smacallan SX_ROP(26, 58, 90, num - 17)); 2464261fa58Smacallan } else { 2474261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2484261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 2494261fa58Smacallan } 2504261fa58Smacallan write_sx_io(p, d, 2514261fa58Smacallan SX_STM(74, num - 1, d & 7)); 2524261fa58Smacallan s += xinc; 2534261fa58Smacallan d += xinc; 2544261fa58Smacallan count += 32; 2554261fa58Smacallan } 2564261fa58Smacallan srcstart += srcinc; 2574261fa58Smacallan dststart += dstinc; 2584261fa58Smacallan } 2594261fa58Smacallan } else { 2604261fa58Smacallan /* going right to left */ 2614261fa58Smacallan int i, chunks = (w >> 5); 2624261fa58Smacallan for (line = 0; line < h; line++) { 2634261fa58Smacallan s = srcstart; 2644261fa58Smacallan d = dststart; 2654261fa58Smacallan count = w; 2664261fa58Smacallan for (i = 0; i < chunks; i++) { 2674261fa58Smacallan write_sx_io(p, s, SX_LD(10, 31, s & 7)); 2684261fa58Smacallan write_sx_io(p, d, SX_LD(42, 31, d & 7)); 2694261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2704261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2714261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2724261fa58Smacallan SX_ROP(26, 58, 90, 15)); 2734261fa58Smacallan write_sx_io(p, d, 2744261fa58Smacallan SX_STM(74, 31, d & 7)); 2754261fa58Smacallan s -= 128; 2764261fa58Smacallan d -= 128; 2774261fa58Smacallan count -= 32; 2784261fa58Smacallan } 2794261fa58Smacallan /* leftovers, if any */ 2804261fa58Smacallan if (count > 0) { 2814261fa58Smacallan s += (32 - count) << 2; 2824261fa58Smacallan d += (32 - count) << 2; 2834261fa58Smacallan write_sx_io(p, s, 2844261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2854261fa58Smacallan write_sx_io(p, d, 2864261fa58Smacallan SX_LD(42, count - 1, d & 7)); 2874261fa58Smacallan if (count > 16) { 2884261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2894261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2904261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2914261fa58Smacallan SX_ROP(26, 58, 90, count - 17)); 2924261fa58Smacallan } else { 2934261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2944261fa58Smacallan SX_ROP(10, 42, 74, count - 1)); 2954261fa58Smacallan } 2964261fa58Smacallan 2974261fa58Smacallan write_sx_io(p, d, 2984261fa58Smacallan SX_STM(74, count - 1, d & 7)); 2994261fa58Smacallan } 3004261fa58Smacallan srcstart += srcinc; 3014261fa58Smacallan dststart += dstinc; 3024261fa58Smacallan } 3034261fa58Smacallan } 3044261fa58Smacallan } 3054261fa58Smacallan exaMarkSync(pDstPixmap->drawable.pScreen); 3064261fa58Smacallan} 3074261fa58Smacallan 30881c68cf8Smacallan/* 30981c68cf8Smacallan * copy with same alignment, left to right, no ROP 31081c68cf8Smacallan */ 31181c68cf8Smacallanstatic void 31281c68cf8SmacallanCG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 31381c68cf8Smacallan{ 31481c68cf8Smacallan int saddr, daddr, pre, cnt, wrds; 31581c68cf8Smacallan 31681c68cf8Smacallan ENTER; 31781c68cf8Smacallan 31881c68cf8Smacallan pre = srcstart & 3; 31981c68cf8Smacallan if (pre != 0) pre = 4 - pre; 32081c68cf8Smacallan pre = min(pre, w); 32181c68cf8Smacallan 32281c68cf8Smacallan while (h > 0) { 32381c68cf8Smacallan saddr = srcstart; 32481c68cf8Smacallan daddr = dststart; 32581c68cf8Smacallan cnt = w; 32681c68cf8Smacallan if (pre > 0) { 32781c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 32881c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7)); 32981c68cf8Smacallan saddr += pre; 33081c68cf8Smacallan daddr += pre; 33181c68cf8Smacallan cnt -= pre; 33281c68cf8Smacallan if (cnt == 0) goto next; 33381c68cf8Smacallan } 33481c68cf8Smacallan while (cnt > 3) { 33581c68cf8Smacallan wrds = min(32, cnt >> 2); 33681c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 33781c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 33881c68cf8Smacallan saddr += wrds << 2; 33981c68cf8Smacallan daddr += wrds << 2; 34081c68cf8Smacallan cnt -= wrds << 2; 34181c68cf8Smacallan } 34281c68cf8Smacallan if (cnt > 0) { 34381c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 34481c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7)); 34581c68cf8Smacallan } 34681c68cf8Smacallannext: 34781c68cf8Smacallan srcstart += srcpitch; 34881c68cf8Smacallan dststart += dstpitch; 34981c68cf8Smacallan h--; 35081c68cf8Smacallan } 35181c68cf8Smacallan} 35281c68cf8Smacallan 35381c68cf8Smacallan/* 35481c68cf8Smacallan * copy with same alignment, left to right, ROP 35581c68cf8Smacallan */ 35681c68cf8Smacallanstatic void 35781c68cf8SmacallanCG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 35881c68cf8Smacallan{ 35981c68cf8Smacallan int saddr, daddr, pre, cnt, wrds; 36081c68cf8Smacallan 36181c68cf8Smacallan ENTER; 36281c68cf8Smacallan 36381c68cf8Smacallan pre = srcstart & 3; 36481c68cf8Smacallan if (pre != 0) pre = 4 - pre; 36581c68cf8Smacallan pre = min(pre, w); 36681c68cf8Smacallan 36781c68cf8Smacallan while (h > 0) { 36881c68cf8Smacallan saddr = srcstart; 36981c68cf8Smacallan daddr = dststart; 37081c68cf8Smacallan cnt = w; 37181c68cf8Smacallan if (pre > 0) { 37281c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); 37381c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7)); 37481c68cf8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1)); 37581c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7)); 37681c68cf8Smacallan saddr += pre; 37781c68cf8Smacallan daddr += pre; 37881c68cf8Smacallan cnt -= pre; 37981c68cf8Smacallan if (cnt == 0) goto next; 38081c68cf8Smacallan } 38181c68cf8Smacallan while (cnt > 3) { 38281c68cf8Smacallan wrds = min(32, cnt >> 2); 38381c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); 38481c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7)); 38581c68cf8Smacallan if (cnt > 16) { 38681c68cf8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15)); 38781c68cf8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17)); 38881c68cf8Smacallan } else 38981c68cf8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1)); 39081c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7)); 39181c68cf8Smacallan saddr += wrds << 2; 39281c68cf8Smacallan daddr += wrds << 2; 39381c68cf8Smacallan cnt -= wrds << 2; 39481c68cf8Smacallan } 39581c68cf8Smacallan if (cnt > 0) { 39681c68cf8Smacallan write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); 39781c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7)); 39881c68cf8Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1)); 39981c68cf8Smacallan write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7)); 40081c68cf8Smacallan } 40181c68cf8Smacallannext: 40281c68cf8Smacallan srcstart += srcpitch; 40381c68cf8Smacallan dststart += dstpitch; 40481c68cf8Smacallan h--; 40581c68cf8Smacallan } 40681c68cf8Smacallan} 40781c68cf8Smacallan 408f787bc61Smacallan/* up to 124 pixels so direction doesn't matter, unaligned, ROP */ 409f787bc61Smacallanstatic void 410f787bc61SmacallanCG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 411f787bc61Smacallan{ 412f787bc61Smacallan int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 4139d7fb28bSmacallan int ssreg; 414f787bc61Smacallan#ifdef DEBUG 415f787bc61Smacallan int taddr = 4 + dstpitch * 50; 416f787bc61Smacallan#endif 417f787bc61Smacallan uint32_t lmask, rmask; 418f787bc61Smacallan ENTER; 419f787bc61Smacallan 420f787bc61Smacallan pre = dststart & 3; 421f787bc61Smacallan lmask = 0xffffffff >> pre; 422f787bc61Smacallan spre = srcstart & 3; 423f787bc61Smacallan /* 424f787bc61Smacallan * make sure we count all the words needed to cover the destination 425f787bc61Smacallan * line, covering potential partials on both ends 426f787bc61Smacallan */ 427f787bc61Smacallan wrds = (w + pre + 3) >> 2; 428f787bc61Smacallan swrds = (w + spre + 3) >> 2; 429f787bc61Smacallan 430f787bc61Smacallan if (spre < pre) { 431f787bc61Smacallan dist = 32 - (pre - spre) * 8; 432f787bc61Smacallan sreg = 9; 433f787bc61Smacallan } else { 434f787bc61Smacallan dist = (spre - pre) * 8; 435f787bc61Smacallan sreg = 8; 436f787bc61Smacallan } 437f787bc61Smacallan 438f787bc61Smacallan /* 439f787bc61Smacallan * mask out trailing pixels to avoid partial writes 440f787bc61Smacallan */ 441f787bc61Smacallan post = (dststart + w) & 3; 44276a85281Smacallan if (post != 0) { 44376a85281Smacallan rmask = ~(0xffffffff >> (post * 8)); 44476a85281Smacallan write_sx_reg(p, SX_QUEUED(7), rmask); 44576a85281Smacallan write_sx_reg(p, SX_QUEUED(6), ~rmask); 44676a85281Smacallan } 44776a85281Smacallan 448f787bc61Smacallan DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 449f787bc61Smacallan w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 450f787bc61Smacallan 451f787bc61Smacallan /* mask out the leading pixels in dst by using a mask and ROP */ 45276a85281Smacallan if (pre != 0) { 45376a85281Smacallan write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); 45476a85281Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 45576a85281Smacallan } 456f787bc61Smacallan 457f787bc61Smacallan saddr = srcstart & ~3; 458f787bc61Smacallan daddr = dststart & ~3; 45976a85281Smacallan 460f787bc61Smacallan while (h > 0) { 461f787bc61Smacallan write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); 462f787bc61Smacallan write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 463f787bc61Smacallan if (wrds > 15) { 4649d7fb28bSmacallan if (dist != 0) { 4659d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 4669d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 4679d7fb28bSmacallan /* shifted source pixels are now at register 40+ */ 4689d7fb28bSmacallan ssreg = 40; 4699d7fb28bSmacallan } else ssreg = 8; 470f787bc61Smacallan if (pre != 0) { 471f787bc61Smacallan /* mask out leading junk */ 472f787bc61Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 4739d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 474f787bc61Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 4759d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14)); 476f787bc61Smacallan } else { 4779d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15)); 478f787bc61Smacallan } 4799d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); 480f787bc61Smacallan } else { 4819d7fb28bSmacallan if (dist != 0) { 4829d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 4839d7fb28bSmacallan ssreg = 40; 4849d7fb28bSmacallan } else ssreg = 8; 485f787bc61Smacallan if (pre != 0) { 486f787bc61Smacallan /* mask out leading junk */ 487f787bc61Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 4889d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); 489f787bc61Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); 4909d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds)); 491f787bc61Smacallan } else { 4929d7fb28bSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds)); 493f787bc61Smacallan } 494f787bc61Smacallan } 495f787bc61Smacallan if (post != 0) { 496f787bc61Smacallan /* 497f787bc61Smacallan * if the last word to be written out is a partial we 498f787bc61Smacallan * mask out the leftovers and replace them with 499f787bc61Smacallan * background pixels 500f787bc61Smacallan * we could pull the same ROP * mask trick as we do on 501f787bc61Smacallan * the left end but it's less annoying this way and 502f787bc61Smacallan * the instruction count is the same 503f787bc61Smacallan */ 504f787bc61Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0)); 505f787bc61Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0)); 506f787bc61Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0)); 507f787bc61Smacallan } 508f787bc61Smacallan#ifdef DEBUG 509f787bc61Smacallan write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 510f787bc61Smacallan taddr += dstpitch; 511f787bc61Smacallan#endif 512f787bc61Smacallan write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); 513f787bc61Smacallan saddr += srcpitch; 514f787bc61Smacallan daddr += dstpitch; 515f787bc61Smacallan h--; 516f787bc61Smacallan } 517f787bc61Smacallan} 518f787bc61Smacallan 51976a85281Smacallan/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ 52076a85281Smacallanstatic void 52176a85281SmacallanCG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) 52276a85281Smacallan{ 52376a85281Smacallan int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; 52476a85281Smacallan int ssreg; 52576a85281Smacallan#ifdef DEBUG 52676a85281Smacallan int taddr = 4 + dstpitch * 50; 52776a85281Smacallan#endif 52876a85281Smacallan uint32_t lmask, rmask; 52976a85281Smacallan ENTER; 53076a85281Smacallan 53176a85281Smacallan pre = dststart & 3; 53276a85281Smacallan lmask = 0xffffffff >> pre; 53376a85281Smacallan spre = srcstart & 3; 53476a85281Smacallan /* 53576a85281Smacallan * make sure we count all the words needed to cover the destination 53676a85281Smacallan * line, covering potential partials on both ends 53776a85281Smacallan */ 53876a85281Smacallan wrds = (w + pre + 3) >> 2; 53976a85281Smacallan swrds = (w + spre + 3) >> 2; 54076a85281Smacallan 54176a85281Smacallan if (spre < pre) { 54276a85281Smacallan dist = 32 - (pre - spre) * 8; 54376a85281Smacallan sreg = 9; 54476a85281Smacallan } else { 54576a85281Smacallan dist = (spre - pre) * 8; 54676a85281Smacallan sreg = 8; 54776a85281Smacallan } 54876a85281Smacallan 54976a85281Smacallan /* 55076a85281Smacallan * mask out trailing pixels to avoid partial writes 55176a85281Smacallan */ 55276a85281Smacallan post = (dststart + w) & 3; 55376a85281Smacallan if (post != 0) { 55476a85281Smacallan rmask = ~(0xffffffff >> (post * 8)); 55576a85281Smacallan write_sx_reg(p, SX_QUEUED(7), rmask); 55676a85281Smacallan write_sx_reg(p, SX_QUEUED(6), ~rmask); 55776a85281Smacallan } 55876a85281Smacallan 55976a85281Smacallan DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, 56076a85281Smacallan w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); 56176a85281Smacallan 56276a85281Smacallan /* mask out the leading pixels in dst by using a mask and ROP */ 56376a85281Smacallan if (pre != 0) { 56476a85281Smacallan write_sx_reg(p, SX_ROP_CONTROL, 0xca); 56576a85281Smacallan write_sx_reg(p, SX_QUEUED(R_MASK), lmask); 56676a85281Smacallan } 56776a85281Smacallan 56876a85281Smacallan saddr = srcstart & ~3; 56976a85281Smacallan daddr = dststart & ~3; 57076a85281Smacallan 57176a85281Smacallan while (h > 0) { 57276a85281Smacallan write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); 57376a85281Smacallan if (wrds > 15) { 57476a85281Smacallan if (dist != 0) { 57576a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); 57676a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); 57776a85281Smacallan /* shifted source pixels are now at register 40+ */ 57876a85281Smacallan ssreg = 40; 57976a85281Smacallan } else ssreg = 8; 58076a85281Smacallan if (pre != 0) { 58176a85281Smacallan /* read only the first word */ 58276a85281Smacallan write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 58376a85281Smacallan /* mask out leading junk */ 58476a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 58576a85281Smacallan } 58676a85281Smacallan } else { 58776a85281Smacallan if (dist != 0) { 58876a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); 58976a85281Smacallan ssreg = 40; 59076a85281Smacallan } else ssreg = 8; 59176a85281Smacallan if (pre != 0) { 59276a85281Smacallan /* read only the first word */ 59376a85281Smacallan write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); 59476a85281Smacallan /* mask out leading junk */ 59576a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); 59676a85281Smacallan } 59776a85281Smacallan } 59876a85281Smacallan if (post != 0) { 59976a85281Smacallan int laddr = daddr + ((wrds - 1) << 2); 60076a85281Smacallan /* 60176a85281Smacallan * if the last word to be written out is a partial we 60276a85281Smacallan * mask out the leftovers and replace them with 60376a85281Smacallan * background pixels 60476a85281Smacallan * we could pull the same ROP * mask trick as we do on 60576a85281Smacallan * the left end but it's less annoying this way and 60676a85281Smacallan * the instruction count is the same 60776a85281Smacallan */ 60876a85281Smacallan write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7)); 60976a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); 61076a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0)); 61176a85281Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0)); 61276a85281Smacallan } 61376a85281Smacallan#ifdef DEBUG 61476a85281Smacallan write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); 61576a85281Smacallan taddr += dstpitch; 61676a85281Smacallan#endif 61776a85281Smacallan write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7)); 61876a85281Smacallan saddr += srcpitch; 61976a85281Smacallan daddr += dstpitch; 62076a85281Smacallan h--; 62176a85281Smacallan } 62276a85281Smacallan} 62376a85281Smacallan 624f71acd79Smacallanstatic void 625f71acd79SmacallanCG14Copy8(PixmapPtr pDstPixmap, 626f71acd79Smacallan int srcX, int srcY, int dstX, int dstY, int w, int h) 627f71acd79Smacallan{ 628f71acd79Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 629f71acd79Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 630f71acd79Smacallan int dstpitch, dstoff, srcpitch, srcoff; 631f71acd79Smacallan int srcstart, dststart, xinc, srcinc, dstinc; 632f71acd79Smacallan int line, count, s, d, num; 633f71acd79Smacallan 634f71acd79Smacallan ENTER; 635f71acd79Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 636f71acd79Smacallan dstoff = exaGetPixmapOffset(pDstPixmap); 637f71acd79Smacallan srcpitch = p->srcpitch; 638f71acd79Smacallan srcoff = p->srcoff; 639f71acd79Smacallan /* 640f71acd79Smacallan * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 641f71acd79Smacallan * actually wrote anything and only sync if it did 642f71acd79Smacallan */ 643f71acd79Smacallan srcstart = srcX + (srcpitch * srcY) + srcoff; 644f71acd79Smacallan dststart = dstX + (dstpitch * dstY) + dstoff; 645f71acd79Smacallan 646f71acd79Smacallan if (p->ydir < 0) { 647f71acd79Smacallan srcstart += (h - 1) * srcpitch; 648f71acd79Smacallan dststart += (h - 1) * dstpitch; 649f71acd79Smacallan srcinc = -srcpitch; 650f71acd79Smacallan dstinc = -dstpitch; 651f71acd79Smacallan } else { 652f71acd79Smacallan srcinc = srcpitch; 653f71acd79Smacallan dstinc = dstpitch; 654f71acd79Smacallan } 655f787bc61Smacallan 656f787bc61Smacallan /* 657f787bc61Smacallan * this copies up to 124 pixels wide in one go, so horizontal 658f787bc61Smacallan * direction / overlap don't matter 659f787bc61Smacallan * uses all 32bit accesses and funnel shifter for unaligned copies 660f787bc61Smacallan */ 661f787bc61Smacallan if ((w < 125) && (w > 8)) { 66276a85281Smacallan switch (p->last_rop) { 66376a85281Smacallan case 0xcc: 66476a85281Smacallan CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 66576a85281Smacallan break; 66676a85281Smacallan default: 66776a85281Smacallan CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 66876a85281Smacallan } 669f787bc61Smacallan return; 670f787bc61Smacallan } 671f787bc61Smacallan 672f787bc61Smacallan /* 673f787bc61Smacallan * only invert x direction if absolutely necessary, it's a pain to 674f787bc61Smacallan * go backwards on SX so avoid as much as possible 675f787bc61Smacallan */ 676f787bc61Smacallan if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) { 677f787bc61Smacallan xinc = -32; 678f787bc61Smacallan } else 679f787bc61Smacallan xinc = 32; 680f787bc61Smacallan 681f787bc61Smacallan /* 682f787bc61Smacallan * for aligned copies we can go all 32bit and avoid VRAM reads in the 683f787bc61Smacallan * most common case 684f787bc61Smacallan */ 68581c68cf8Smacallan if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { 68681c68cf8Smacallan switch (p->last_rop) { 68781c68cf8Smacallan case 0xcc: 68881c68cf8Smacallan CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc); 68981c68cf8Smacallan break; 69081c68cf8Smacallan default: 69181c68cf8Smacallan CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc); 69281c68cf8Smacallan } 69381c68cf8Smacallan return; 69481c68cf8Smacallan } 695f787bc61Smacallan 69686527ef6Smacallan /* 69786527ef6Smacallan * if we make it here we either have something large and unaligned, 69886527ef6Smacallan * something we need to do right to left, or something tiny. 69986527ef6Smacallan * we handle the non-tiny cases by breaking them down into chunks that 70086527ef6Smacallan * Copy8_short_*() can handle, making sure the destinations are 32bit 70186527ef6Smacallan * aligned whenever possible 70286527ef6Smacallan * since we copy by block, not by line we need to go backwards even if 70386527ef6Smacallan * we don't copy within the same line 70486527ef6Smacallan */ 70586527ef6Smacallan if (w > 8) { 70686527ef6Smacallan int next, wi, end = dststart + w; 70786527ef6Smacallan DPRINTF(X_ERROR, "%s %08x %08x %d\n", __func__, srcstart, dststart, w); 70886527ef6Smacallan if ((p->xdir < 0) && (srcoff == dstoff)) { 70986527ef6Smacallan srcstart += w; 71086527ef6Smacallan next = max((end - 120) & ~3, dststart); 71186527ef6Smacallan wi = end - next; 71286527ef6Smacallan srcstart -= wi; 71386527ef6Smacallan while (wi > 0) { 71486527ef6Smacallan DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", __func__, srcstart, next, wi); 71586527ef6Smacallan if (p->last_rop == 0xcc) { 71686527ef6Smacallan CG14Copy8_short_norop(p, srcstart, next, wi, h, srcinc, dstinc); 71786527ef6Smacallan } else 71886527ef6Smacallan CG14Copy8_short_rop(p, srcstart, next, wi, h, srcinc, dstinc); 71986527ef6Smacallan end = next; 72086527ef6Smacallan /* 72186527ef6Smacallan * avoid extremely narrow copies so I don't 72286527ef6Smacallan * have to deal with dangling start and end 72386527ef6Smacallan * pixels in the same word 72486527ef6Smacallan */ 72586527ef6Smacallan if ((end - dststart) < 140) { 72686527ef6Smacallan next = max((end - 80) & ~3, dststart); 72786527ef6Smacallan } else { 72886527ef6Smacallan next = max((end - 120) & ~3, dststart); 72986527ef6Smacallan } 73086527ef6Smacallan wi = end - next; 73186527ef6Smacallan srcstart -= wi; 73286527ef6Smacallan } 73386527ef6Smacallan } else { 73486527ef6Smacallan next = min(end, (dststart + 124) & ~3); 73586527ef6Smacallan wi = next - dststart; 73686527ef6Smacallan while (wi > 0) { 73786527ef6Smacallan DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", __func__, srcstart, next, wi); 73886527ef6Smacallan if (p->last_rop == 0xcc) { 73986527ef6Smacallan CG14Copy8_short_norop(p, srcstart, dststart, wi, h, srcinc, dstinc); 74086527ef6Smacallan } else 74186527ef6Smacallan CG14Copy8_short_rop(p, srcstart, dststart, wi, h, srcinc, dstinc); 74286527ef6Smacallan srcstart += wi; 74386527ef6Smacallan dststart = next; 74486527ef6Smacallan if ((end - dststart) < 140) { 74586527ef6Smacallan next = min(end, (dststart + 84) & ~3); 74686527ef6Smacallan } else { 74786527ef6Smacallan next = min(end, (dststart + 124) & ~3); 74886527ef6Smacallan } 74986527ef6Smacallan wi = next - dststart; 75086527ef6Smacallan } 75186527ef6Smacallan } 75286527ef6Smacallan return; 75386527ef6Smacallan } 75486527ef6Smacallan if (xinc < 0) { 75586527ef6Smacallan srcstart += (w - 32); 75686527ef6Smacallan dststart += (w - 32); 75786527ef6Smacallan } 75886527ef6Smacallan 75986527ef6Smacallan DPRINTF(X_ERROR, "%s fallback to byte-wise %d %d\n", __func__, w, h); 760f71acd79Smacallan if (p->last_rop == 0xcc) { 761f71acd79Smacallan /* plain old copy */ 762f71acd79Smacallan if ( xinc > 0) { 763f71acd79Smacallan /* going left to right */ 764f71acd79Smacallan for (line = 0; line < h; line++) { 765f71acd79Smacallan count = 0; 766f71acd79Smacallan s = srcstart; 767f71acd79Smacallan d = dststart; 768f71acd79Smacallan while ( count < w) { 769f71acd79Smacallan num = min(32, w - count); 770f71acd79Smacallan write_sx_io(p, s, 771f71acd79Smacallan SX_LDB(10, num - 1, s & 7)); 772f71acd79Smacallan write_sx_io(p, d, 773f71acd79Smacallan SX_STBM(10, num - 1, d & 7)); 774f71acd79Smacallan s += xinc; 775f71acd79Smacallan d += xinc; 776f71acd79Smacallan count += 32; 777f71acd79Smacallan } 778f71acd79Smacallan srcstart += srcinc; 779f71acd79Smacallan dststart += dstinc; 780f71acd79Smacallan } 781f71acd79Smacallan } else { 782f71acd79Smacallan /* going right to left */ 783f71acd79Smacallan int i, chunks = (w >> 5); 784f71acd79Smacallan for (line = 0; line < h; line++) { 785f71acd79Smacallan s = srcstart; 786f71acd79Smacallan d = dststart; 787f71acd79Smacallan count = w; 788f71acd79Smacallan for (i = 0; i < chunks; i++) { 789f71acd79Smacallan write_sx_io(p, s, 790f71acd79Smacallan SX_LDB(10, 31, s & 7)); 791f71acd79Smacallan write_sx_io(p, d, 792f71acd79Smacallan SX_STBM(10, 31, d & 7)); 793f71acd79Smacallan s -= 32; 794f71acd79Smacallan d -= 32; 795f71acd79Smacallan count -= 32; 796f71acd79Smacallan } 797f71acd79Smacallan /* leftovers, if any */ 798f71acd79Smacallan if (count > 0) { 799f71acd79Smacallan s += (32 - count); 800f71acd79Smacallan d += (32 - count); 801f71acd79Smacallan write_sx_io(p, s, 802f71acd79Smacallan SX_LDB(10, count - 1, s & 7)); 803f71acd79Smacallan write_sx_io(p, d, 804f71acd79Smacallan SX_STBM(10, count - 1, d & 7)); 805f71acd79Smacallan } 806f71acd79Smacallan srcstart += srcinc; 807f71acd79Smacallan dststart += dstinc; 808f71acd79Smacallan } 809f71acd79Smacallan } 810f71acd79Smacallan } else { 811f71acd79Smacallan /* ROPs needed */ 812f71acd79Smacallan if ( xinc > 0) { 813f71acd79Smacallan /* going left to right */ 814f71acd79Smacallan for (line = 0; line < h; line++) { 815f71acd79Smacallan count = 0; 816f71acd79Smacallan s = srcstart; 817f71acd79Smacallan d = dststart; 818f71acd79Smacallan while ( count < w) { 819f71acd79Smacallan num = min(32, w - count); 820f71acd79Smacallan write_sx_io(p, s, 821f71acd79Smacallan SX_LDB(10, num - 1, s & 7)); 822f71acd79Smacallan write_sx_io(p, d, 823f71acd79Smacallan SX_LDB(42, num - 1, d & 7)); 824f71acd79Smacallan if (num > 16) { 825f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 826f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 827f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 828f71acd79Smacallan SX_ROP(26, 58, 90, num - 17)); 829f71acd79Smacallan } else { 830f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 831f71acd79Smacallan SX_ROP(10, 42, 74, num - 1)); 832f71acd79Smacallan } 833f71acd79Smacallan write_sx_io(p, d, 834f71acd79Smacallan SX_STBM(74, num - 1, d & 7)); 835f71acd79Smacallan s += xinc; 836f71acd79Smacallan d += xinc; 837f71acd79Smacallan count += 32; 838f71acd79Smacallan } 839f71acd79Smacallan srcstart += srcinc; 840f71acd79Smacallan dststart += dstinc; 841f71acd79Smacallan } 842f71acd79Smacallan } else { 843f71acd79Smacallan /* going right to left */ 844f71acd79Smacallan int i, chunks = (w >> 5); 845f71acd79Smacallan for (line = 0; line < h; line++) { 846f71acd79Smacallan s = srcstart; 847f71acd79Smacallan d = dststart; 848f71acd79Smacallan count = w; 849f71acd79Smacallan for (i = 0; i < chunks; i++) { 850f71acd79Smacallan write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 851f71acd79Smacallan write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 852f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 853f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 854f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 855f71acd79Smacallan SX_ROP(26, 58, 90, 15)); 856f71acd79Smacallan write_sx_io(p, d, 857f71acd79Smacallan SX_STBM(74, 31, d & 7)); 858f71acd79Smacallan s -= 128; 859f71acd79Smacallan d -= 128; 860f71acd79Smacallan count -= 32; 861f71acd79Smacallan } 862f71acd79Smacallan /* leftovers, if any */ 863f71acd79Smacallan if (count > 0) { 864f71acd79Smacallan s += (32 - count); 865f71acd79Smacallan d += (32 - count); 866f71acd79Smacallan write_sx_io(p, s, 867f71acd79Smacallan SX_LDB(10, count - 1, s & 7)); 868f71acd79Smacallan write_sx_io(p, d, 869f71acd79Smacallan SX_LDB(42, count - 1, d & 7)); 870f71acd79Smacallan if (count > 16) { 871f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 872f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 873f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 874f71acd79Smacallan SX_ROP(26, 58, 90, count - 17)); 875f71acd79Smacallan } else { 876f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 877f71acd79Smacallan SX_ROP(10, 42, 74, count - 1)); 878f71acd79Smacallan } 879f71acd79Smacallan 880f71acd79Smacallan write_sx_io(p, d, 881f71acd79Smacallan SX_STBM(74, count - 1, d & 7)); 882f71acd79Smacallan } 883f71acd79Smacallan srcstart += srcinc; 884f71acd79Smacallan dststart += dstinc; 885f71acd79Smacallan } 886f71acd79Smacallan } 887f71acd79Smacallan } 888f71acd79Smacallan exaMarkSync(pDstPixmap->drawable.pScreen); 889f71acd79Smacallan} 890f71acd79Smacallan 8914261fa58Smacallanstatic void 8924261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap) 8934261fa58Smacallan{ 8944261fa58Smacallan} 8954261fa58Smacallan 8964261fa58Smacallanstatic Bool 8974261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 8984261fa58Smacallan{ 8994261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 9004261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 9014261fa58Smacallan 9024261fa58Smacallan ENTER; 903faf11d72Schristos DPRINTF(X_ERROR, "bits per pixel: %d %08lx\n", 904b8ad197aSmacallan pPixmap->drawable.bitsPerPixel, fg); 905b8ad197aSmacallan 906dbf8597cSmacallan /* 907dbf8597cSmacallan * GXset and GXclear are really just specual cases of GXcopy with 908dbf8597cSmacallan * fixed fill colour 909dbf8597cSmacallan */ 910dbf8597cSmacallan switch (alu) { 911dbf8597cSmacallan case GXclear: 912dbf8597cSmacallan alu = GXcopy; 913dbf8597cSmacallan fg = 0; 914dbf8597cSmacallan break; 915dbf8597cSmacallan case GXset: 916dbf8597cSmacallan alu = GXcopy; 917dbf8597cSmacallan fg = 0xffffffff; 918dbf8597cSmacallan break; 919dbf8597cSmacallan } 920b8ad197aSmacallan /* repeat the colour in every sub byte if we're in 8 bit */ 921b8ad197aSmacallan if (pPixmap->drawable.bitsPerPixel == 8) { 922b8ad197aSmacallan fg |= fg << 8; 923b8ad197aSmacallan fg |= fg << 16; 924b8ad197aSmacallan } 9254261fa58Smacallan write_sx_reg(p, SX_QUEUED(8), fg); 9264261fa58Smacallan write_sx_reg(p, SX_QUEUED(9), fg); 9274261fa58Smacallan if (planemask != p->last_mask) { 9284261fa58Smacallan CG14Wait(p); 9294261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 9304261fa58Smacallan p->last_mask = planemask; 9314261fa58Smacallan } 9324261fa58Smacallan alu = sx_rop[alu]; 9334261fa58Smacallan if (alu != p->last_rop) { 9344261fa58Smacallan CG14Wait(p); 9354261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 9364261fa58Smacallan p->last_rop = alu; 9374261fa58Smacallan } 938dbf8597cSmacallan 9394261fa58Smacallan DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 9404261fa58Smacallan return TRUE; 9414261fa58Smacallan} 9424261fa58Smacallan 9434261fa58Smacallanstatic void 9444261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 9454261fa58Smacallan{ 9464261fa58Smacallan int line, x, num; 9474261fa58Smacallan uint32_t ptr; 9484261fa58Smacallan 9494261fa58Smacallan ENTER; 9504261fa58Smacallan if (p->last_rop == 0xcc) { 9514261fa58Smacallan /* simple fill */ 9524261fa58Smacallan for (line = 0; line < h; line++) { 9534261fa58Smacallan x = 0; 9544261fa58Smacallan while (x < w) { 9554261fa58Smacallan ptr = start + (x << 2); 9564261fa58Smacallan num = min(32, w - x); 9574261fa58Smacallan write_sx_io(p, ptr, 9584261fa58Smacallan SX_STS(8, num - 1, ptr & 7)); 9594261fa58Smacallan x += 32; 9604261fa58Smacallan } 9614261fa58Smacallan start += pitch; 9624261fa58Smacallan } 9634261fa58Smacallan } else if (p->last_rop == 0xaa) { 9644261fa58Smacallan /* nothing to do here */ 9654261fa58Smacallan return; 9664261fa58Smacallan } else { 9674261fa58Smacallan /* alright, let's do actual ROP stuff */ 9684261fa58Smacallan 9694261fa58Smacallan /* first repeat the fill colour into 16 registers */ 9704261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9714261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 9724261fa58Smacallan 9734261fa58Smacallan for (line = 0; line < h; line++) { 9744261fa58Smacallan x = 0; 9754261fa58Smacallan while (x < w) { 9764261fa58Smacallan ptr = start + (x << 2); 9774261fa58Smacallan num = min(32, w - x); 9784261fa58Smacallan /* now suck fb data into registers */ 9794261fa58Smacallan write_sx_io(p, ptr, 9804261fa58Smacallan SX_LD(42, num - 1, ptr & 7)); 9814261fa58Smacallan /* 9824261fa58Smacallan * ROP them with the fill data we left in 10 9834261fa58Smacallan * non-memory ops can only have counts up to 16 9844261fa58Smacallan */ 9854261fa58Smacallan if (num <= 16) { 9864261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9874261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 9884261fa58Smacallan } else { 9894261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9904261fa58Smacallan SX_ROP(10, 42, 74, 15)); 9914261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9924261fa58Smacallan SX_ROP(10, 58, 90, num - 17)); 9934261fa58Smacallan } 9944261fa58Smacallan /* and write the result back into memory */ 9954261fa58Smacallan write_sx_io(p, ptr, 9964261fa58Smacallan SX_ST(74, num - 1, ptr & 7)); 9974261fa58Smacallan x += 32; 9984261fa58Smacallan } 9994261fa58Smacallan start += pitch; 10004261fa58Smacallan } 10014261fa58Smacallan } 10024261fa58Smacallan} 10034261fa58Smacallan 10044261fa58Smacallanstatic void 10054261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 10064261fa58Smacallan{ 1007dbf8597cSmacallan int line, num, pre, cnt; 10084261fa58Smacallan uint32_t ptr; 10094261fa58Smacallan 10104261fa58Smacallan ENTER; 1011b8ad197aSmacallan pre = start & 3; 1012b8ad197aSmacallan if (pre != 0) pre = 4 - pre; 10134261fa58Smacallan 10144261fa58Smacallan if (p->last_rop == 0xcc) { 10154261fa58Smacallan /* simple fill */ 10164261fa58Smacallan for (line = 0; line < h; line++) { 1017b8ad197aSmacallan ptr = start; 1018b8ad197aSmacallan cnt = w; 1019b46cab2aSmacallan pre = min(pre, cnt); 1020b8ad197aSmacallan if (pre) { 1021b8ad197aSmacallan write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); 1022b8ad197aSmacallan ptr += pre; 1023b8ad197aSmacallan cnt -= pre; 1024b46cab2aSmacallan if (cnt == 0) goto next; 1025b8ad197aSmacallan } 1026b8ad197aSmacallan /* now do the aligned pixels in 32bit chunks */ 1027b8ad197aSmacallan if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1028b8ad197aSmacallan while(cnt > 3) { 1029b8ad197aSmacallan num = min(32, cnt >> 2); 1030b8ad197aSmacallan write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); 1031b8ad197aSmacallan ptr += num << 2; 1032b8ad197aSmacallan cnt -= num << 2; 1033b8ad197aSmacallan } 1034b8ad197aSmacallan if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1035b8ad197aSmacallan if (cnt > 0) { 1036b8ad197aSmacallan write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); 10374261fa58Smacallan } 1038b8ad197aSmacallan if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1039b46cab2aSmacallannext: 10404261fa58Smacallan start += pitch; 10414261fa58Smacallan } 10424261fa58Smacallan } else if (p->last_rop == 0xaa) { 10434261fa58Smacallan /* nothing to do here */ 10444261fa58Smacallan return; 10454261fa58Smacallan } else { 10464261fa58Smacallan /* alright, let's do actual ROP stuff */ 10474261fa58Smacallan 10484261fa58Smacallan /* first repeat the fill colour into 16 registers */ 10494261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 10504261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 10514261fa58Smacallan 10524261fa58Smacallan for (line = 0; line < h; line++) { 1053dbf8597cSmacallan ptr = start; 1054dbf8597cSmacallan cnt = w; 1055dbf8597cSmacallan pre = min(pre, cnt); 1056dbf8597cSmacallan if (pre) { 1057dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7)); 1058dbf8597cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1)); 1059dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7)); 1060dbf8597cSmacallan ptr += pre; 1061dbf8597cSmacallan cnt -= pre; 1062dbf8597cSmacallan if (cnt == 0) goto next2; 1063dbf8597cSmacallan } 1064dbf8597cSmacallan /* now do the aligned pixels in 32bit chunks */ 1065dbf8597cSmacallan if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); 1066dbf8597cSmacallan while(cnt > 3) { 1067dbf8597cSmacallan num = min(32, cnt >> 2); 1068dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7)); 10694261fa58Smacallan if (num <= 16) { 10704261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 1071dbf8597cSmacallan SX_ROP(10, 26, 58, num - 1)); 10724261fa58Smacallan } else { 10734261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 1074dbf8597cSmacallan SX_ROP(10, 26, 58, 15)); 10754261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 1076dbf8597cSmacallan SX_ROP(10, 42, 74, num - 17)); 10774261fa58Smacallan } 1078dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7)); 1079dbf8597cSmacallan ptr += num << 2; 1080dbf8597cSmacallan cnt -= num << 2; 10814261fa58Smacallan } 1082dbf8597cSmacallan if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); 1083dbf8597cSmacallan if (cnt > 0) { 1084dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7)); 1085dbf8597cSmacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1)); 1086dbf8597cSmacallan write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7)); 1087dbf8597cSmacallan } 1088dbf8597cSmacallan if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); 1089dbf8597cSmacallannext2: 10904261fa58Smacallan start += pitch; 10914261fa58Smacallan } 10924261fa58Smacallan } 10934261fa58Smacallan} 10944261fa58Smacallan 10954261fa58Smacallanstatic void 10964261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 10974261fa58Smacallan{ 10984261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 10994261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 11004261fa58Smacallan int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 11014261fa58Smacallan int start, depth; 11024261fa58Smacallan 11034261fa58Smacallan ENTER; 11044261fa58Smacallan dstpitch = exaGetPixmapPitch(pPixmap); 11054261fa58Smacallan dstoff = exaGetPixmapOffset(pPixmap); 11064261fa58Smacallan 11074261fa58Smacallan depth = pPixmap->drawable.bitsPerPixel; 11084261fa58Smacallan switch (depth) { 11094261fa58Smacallan case 32: 11104261fa58Smacallan start = dstoff + (y1 * dstpitch) + (x1 << 2); 11114261fa58Smacallan CG14Solid32(p, start, dstpitch, w, h); 11124261fa58Smacallan break; 11134261fa58Smacallan case 8: 11144261fa58Smacallan start = dstoff + (y1 * dstpitch) + x1; 11154261fa58Smacallan CG14Solid8(p, start, dstpitch, w, h); 11164261fa58Smacallan break; 11174261fa58Smacallan } 11184261fa58Smacallan 11194261fa58Smacallan DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 11204261fa58Smacallan dstpitch, dstoff, start); 11214261fa58Smacallan DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 11224261fa58Smacallan read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 11234261fa58Smacallan exaMarkSync(pPixmap->drawable.pScreen); 11244261fa58Smacallan} 11254261fa58Smacallan 11264261fa58Smacallan/* 11274261fa58Smacallan * Memcpy-based UTS. 11284261fa58Smacallan */ 11294261fa58Smacallanstatic Bool 11304261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 11314261fa58Smacallan char *src, int src_pitch) 11324261fa58Smacallan{ 11334261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 11344261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 11354261fa58Smacallan char *dst = p->fb + exaGetPixmapOffset(pDst); 11364261fa58Smacallan int dst_pitch = exaGetPixmapPitch(pDst); 11374261fa58Smacallan 11384261fa58Smacallan int bpp = pDst->drawable.bitsPerPixel; 11394261fa58Smacallan int cpp = (bpp + 7) >> 3; 11404261fa58Smacallan int wBytes = w * cpp; 11414261fa58Smacallan 11424261fa58Smacallan ENTER; 1143f71acd79Smacallan DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 11444261fa58Smacallan dst += (x * cpp) + (y * dst_pitch); 11454261fa58Smacallan 11464261fa58Smacallan CG14Wait(p); 11474261fa58Smacallan 11484261fa58Smacallan while (h--) { 11494261fa58Smacallan memcpy(dst, src, wBytes); 11504261fa58Smacallan src += src_pitch; 11514261fa58Smacallan dst += dst_pitch; 11524261fa58Smacallan } 11534261fa58Smacallan __asm("stbar;"); 11544261fa58Smacallan return TRUE; 11554261fa58Smacallan} 11564261fa58Smacallan 11574261fa58Smacallan/* 11584261fa58Smacallan * Memcpy-based DFS. 11594261fa58Smacallan */ 11604261fa58Smacallanstatic Bool 11614261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 11624261fa58Smacallan char *dst, int dst_pitch) 11634261fa58Smacallan{ 11644261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 11654261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 11664261fa58Smacallan char *src = p->fb + exaGetPixmapOffset(pSrc); 11674261fa58Smacallan int src_pitch = exaGetPixmapPitch(pSrc); 11684261fa58Smacallan 11694261fa58Smacallan ENTER; 11704261fa58Smacallan int bpp = pSrc->drawable.bitsPerPixel; 11714261fa58Smacallan int cpp = (bpp + 7) >> 3; 11724261fa58Smacallan int wBytes = w * cpp; 11734261fa58Smacallan 11744261fa58Smacallan src += (x * cpp) + (y * src_pitch); 11754261fa58Smacallan 11764261fa58Smacallan CG14Wait(p); 11774261fa58Smacallan 11784261fa58Smacallan while (h--) { 11794261fa58Smacallan memcpy(dst, src, wBytes); 11804261fa58Smacallan src += src_pitch; 11814261fa58Smacallan dst += dst_pitch; 11824261fa58Smacallan } 11834261fa58Smacallan 11844261fa58Smacallan return TRUE; 11854261fa58Smacallan} 11864261fa58Smacallan 11874261fa58SmacallanBool 11884261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture, 11894261fa58Smacallan PicturePtr pMaskPicture, 11904261fa58Smacallan PicturePtr pDstPicture) 11914261fa58Smacallan{ 11924261fa58Smacallan int i, ok = FALSE; 11934261fa58Smacallan 11944261fa58Smacallan ENTER; 11954261fa58Smacallan 11964261fa58Smacallan /* 11974261fa58Smacallan * SX is in theory capable of accelerating pretty much all Xrender ops, 11984261fa58Smacallan * even coordinate transformation and gradients. Support will be added 11994261fa58Smacallan * over time and likely have to spill over into its own source file. 12004261fa58Smacallan */ 12014261fa58Smacallan 1202a3a2ba44Smacallan if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 1203fe97f391Smacallan DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 12044261fa58Smacallan return FALSE; 12054261fa58Smacallan } 12064261fa58Smacallan 12074bd47ccfSmacallan if (pSrcPicture != NULL) { 12084bd47ccfSmacallan i = 0; 12094bd47ccfSmacallan while ((i < arraysize(src_formats)) && (!ok)) { 12104bd47ccfSmacallan ok = (pSrcPicture->format == src_formats[i]); 12114bd47ccfSmacallan i++; 12124bd47ccfSmacallan } 12134bd47ccfSmacallan 12144bd47ccfSmacallan if (!ok) { 12154bd47ccfSmacallan DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 12164bd47ccfSmacallan __func__, pSrcPicture->format); 12174bd47ccfSmacallan return FALSE; 12184bd47ccfSmacallan } 12194bd47ccfSmacallan DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 12204261fa58Smacallan } 12214261fa58Smacallan 12224bd47ccfSmacallan if (pDstPicture != NULL) { 12234bd47ccfSmacallan i = 0; 12244bd47ccfSmacallan ok = FALSE; 12254bd47ccfSmacallan while ((i < arraysize(src_formats)) && (!ok)) { 12264bd47ccfSmacallan ok = (pDstPicture->format == src_formats[i]); 12274bd47ccfSmacallan i++; 12284bd47ccfSmacallan } 12294bd47ccfSmacallan 12304bd47ccfSmacallan if (!ok) { 12314bd47ccfSmacallan DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 12324bd47ccfSmacallan __func__, pDstPicture->format); 12334bd47ccfSmacallan return FALSE; 12344bd47ccfSmacallan } 12354bd47ccfSmacallan DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 12364bd47ccfSmacallan } 12374261fa58Smacallan 12384261fa58Smacallan if (pMaskPicture != NULL) { 12394261fa58Smacallan DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 12404261fa58Smacallan pMaskPicture->pDrawable->width, 12414261fa58Smacallan pMaskPicture->pDrawable->height); 12424261fa58Smacallan } 12434261fa58Smacallan return TRUE; 12444261fa58Smacallan} 12454261fa58Smacallan 12464261fa58SmacallanBool 12474261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture, 12484261fa58Smacallan PicturePtr pMaskPicture, 12494261fa58Smacallan PicturePtr pDstPicture, 12504261fa58Smacallan PixmapPtr pSrc, 12514261fa58Smacallan PixmapPtr pMask, 12524261fa58Smacallan PixmapPtr pDst) 12534261fa58Smacallan{ 12544261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 12554261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 12564261fa58Smacallan 12574261fa58Smacallan ENTER; 12584261fa58Smacallan 1259f7cb851fSmacallan p->no_source_pixmap = FALSE; 1260f7cb851fSmacallan p->source_is_solid = FALSE; 1261f7cb851fSmacallan 1262a3a2ba44Smacallan if (pSrcPicture->format == PICT_a1) { 12636bdc2ffdSmacallan xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 12646bdc2ffdSmacallan pDstPicture->format, op); 1265a3a2ba44Smacallan if (pMaskPicture != NULL) { 1266a3a2ba44Smacallan xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 1267a3a2ba44Smacallan } 1268f7cb851fSmacallan } 12694261fa58Smacallan if (pSrcPicture->pSourcePict != NULL) { 12704261fa58Smacallan if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 12714261fa58Smacallan p->fillcolour = 12724261fa58Smacallan pSrcPicture->pSourcePict->solidFill.color; 1273f7cb851fSmacallan DPRINTF(X_ERROR, "%s: solid src %08x\n", 12744261fa58Smacallan __func__, p->fillcolour); 1275f7cb851fSmacallan p->no_source_pixmap = TRUE; 1276f7cb851fSmacallan p->source_is_solid = TRUE; 12774261fa58Smacallan } 12784261fa58Smacallan } 12794261fa58Smacallan if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 12804261fa58Smacallan if (pMaskPicture->pSourcePict->type == 12814261fa58Smacallan SourcePictTypeSolidFill) { 12824261fa58Smacallan p->fillcolour = 12834261fa58Smacallan pMaskPicture->pSourcePict->solidFill.color; 1284a3a2ba44Smacallan xf86Msg(X_ERROR, "%s: solid mask %08x\n", 12854261fa58Smacallan __func__, p->fillcolour); 12864261fa58Smacallan } 12874261fa58Smacallan } 12884261fa58Smacallan if (pMaskPicture != NULL) { 1289239808baSmacallan p->mskoff = exaGetPixmapOffset(pMask); 12904261fa58Smacallan p->mskpitch = exaGetPixmapPitch(pMask); 12914261fa58Smacallan p->mskformat = pMaskPicture->format; 1292a3a2ba44Smacallan } else { 1293239808baSmacallan p->mskoff = 0; 1294a3a2ba44Smacallan p->mskpitch = 0; 1295a3a2ba44Smacallan p->mskformat = 0; 12964261fa58Smacallan } 1297f7cb851fSmacallan if (pSrc != NULL) { 1298f7cb851fSmacallan p->source_is_solid = 1299f7cb851fSmacallan ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 1300f7cb851fSmacallan p->srcoff = exaGetPixmapOffset(pSrc); 1301f7cb851fSmacallan p->srcpitch = exaGetPixmapPitch(pSrc); 1302f7cb851fSmacallan if (p->source_is_solid) { 1303f7cb851fSmacallan p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 1304f7cb851fSmacallan } 1305f7cb851fSmacallan } 13064261fa58Smacallan p->srcformat = pSrcPicture->format; 13074261fa58Smacallan p->dstformat = pDstPicture->format; 1308f7cb851fSmacallan 1309f7cb851fSmacallan if (p->source_is_solid) { 1310f7cb851fSmacallan uint32_t temp; 1311f7cb851fSmacallan 1312f7cb851fSmacallan /* stuff source colour into SX registers, swap as needed */ 1313f7cb851fSmacallan temp = p->fillcolour; 1314f7cb851fSmacallan switch (p->srcformat) { 1315f7cb851fSmacallan case PICT_a8r8g8b8: 1316f7cb851fSmacallan case PICT_x8r8g8b8: 1317f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1318f7cb851fSmacallan temp = temp >> 8; 1319f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1320f7cb851fSmacallan temp = temp >> 8; 1321f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1322f7cb851fSmacallan break; 1323f7cb851fSmacallan case PICT_a8b8g8r8: 1324f7cb851fSmacallan case PICT_x8b8g8r8: 1325f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 1326f7cb851fSmacallan temp = temp >> 8; 1327f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 1328f7cb851fSmacallan temp = temp >> 8; 1329f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 1330f7cb851fSmacallan break; 1331f7cb851fSmacallan } 1332f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 1333f7cb851fSmacallan } 13344261fa58Smacallan p->op = op; 1335a3a2ba44Smacallan if (op == PictOpSrc) { 1336a3a2ba44Smacallan CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 1337a3a2ba44Smacallan } 13384261fa58Smacallan#ifdef SX_DEBUG 13394261fa58Smacallan DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 13404261fa58Smacallan *(uint32_t *)(p->fb + p->srcoff)); 13414261fa58Smacallan#endif 13424261fa58Smacallan return TRUE; 13434261fa58Smacallan} 13444261fa58Smacallan 13454261fa58Smacallanvoid 13464261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY, 13474261fa58Smacallan int maskX, int maskY, 13484261fa58Smacallan int dstX, int dstY, 13494261fa58Smacallan int width, int height) 13504261fa58Smacallan{ 13514261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 13524261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 13534261fa58Smacallan uint32_t dstoff, dstpitch; 13544261fa58Smacallan uint32_t dst, msk, src; 1355e311bbeeSmacallan int flip = 0; 13564261fa58Smacallan 13574261fa58Smacallan ENTER; 13584261fa58Smacallan dstoff = exaGetPixmapOffset(pDst); 13594261fa58Smacallan dstpitch = exaGetPixmapPitch(pDst); 13604261fa58Smacallan 1361e311bbeeSmacallan flip = (PICT_FORMAT_TYPE(p->srcformat) != 1362e311bbeeSmacallan PICT_FORMAT_TYPE(p->dstformat)); 1363e311bbeeSmacallan 13644261fa58Smacallan switch (p->op) { 13654261fa58Smacallan case PictOpOver: 13664261fa58Smacallan dst = dstoff + (dstY * dstpitch) + (dstX << 2); 13674261fa58Smacallan DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 13684261fa58Smacallan p->mskformat, p->dstformat, srcX, srcY); 1369a3a2ba44Smacallan if (p->source_is_solid) { 1370a3a2ba44Smacallan switch (p->mskformat) { 1371a3a2ba44Smacallan case PICT_a8: 1372a3a2ba44Smacallan msk = p->mskoff + 1373a3a2ba44Smacallan (maskY * p->mskpitch) + 1374a3a2ba44Smacallan maskX; 1375a3a2ba44Smacallan CG14Comp_Over8Solid(p, 1376a3a2ba44Smacallan msk, p->mskpitch, 1377a3a2ba44Smacallan dst, dstpitch, 1378a3a2ba44Smacallan width, height); 1379a3a2ba44Smacallan break; 1380a3a2ba44Smacallan case PICT_a8r8g8b8: 1381a3a2ba44Smacallan case PICT_a8b8g8r8: 1382a3a2ba44Smacallan msk = p->mskoff + 1383a3a2ba44Smacallan (maskY * p->mskpitch) + 1384a3a2ba44Smacallan (maskX << 2); 1385a3a2ba44Smacallan CG14Comp_Over32Solid(p, 1386a3a2ba44Smacallan msk, p->mskpitch, 1387a3a2ba44Smacallan dst, dstpitch, 1388a3a2ba44Smacallan width, height); 1389a3a2ba44Smacallan break; 1390a3a2ba44Smacallan default: 1391a3a2ba44Smacallan xf86Msg(X_ERROR, 1392f71acd79Smacallan "unsupported mask format %08x\n", p->mskformat); 1393a3a2ba44Smacallan } 1394a3a2ba44Smacallan } else { 13956bdc2ffdSmacallan DPRINTF(X_ERROR, "non-solid over with msk %x\n", 13966bdc2ffdSmacallan p->mskformat); 1397a3a2ba44Smacallan switch (p->srcformat) { 1398a3a2ba44Smacallan case PICT_a8r8g8b8: 1399a3a2ba44Smacallan case PICT_a8b8g8r8: 1400a3a2ba44Smacallan src = p->srcoff + 1401a3a2ba44Smacallan (srcY * p->srcpitch) + 1402a3a2ba44Smacallan (srcX << 2); 1403a3a2ba44Smacallan dst = dstoff + 1404a3a2ba44Smacallan (dstY * dstpitch) + 1405a3a2ba44Smacallan (dstX << 2); 1406a3a2ba44Smacallan if (p->mskformat == PICT_a8) { 1407a3a2ba44Smacallan msk = p->mskoff + 1408a3a2ba44Smacallan (maskY * p->mskpitch) + 1409a3a2ba44Smacallan maskX; 1410a3a2ba44Smacallan CG14Comp_Over32Mask(p, 1411a3a2ba44Smacallan src, p->srcpitch, 1412a3a2ba44Smacallan msk, p->mskpitch, 1413a3a2ba44Smacallan dst, dstpitch, 1414e311bbeeSmacallan width, height, flip); 1415a3a2ba44Smacallan } else { 1416a3a2ba44Smacallan CG14Comp_Over32(p, 1417a3a2ba44Smacallan src, p->srcpitch, 1418a3a2ba44Smacallan dst, dstpitch, 1419e311bbeeSmacallan width, height, flip); 1420a3a2ba44Smacallan } 1421a3a2ba44Smacallan break; 1422a3a2ba44Smacallan case PICT_x8r8g8b8: 1423a3a2ba44Smacallan case PICT_x8b8g8r8: 14246bdc2ffdSmacallan src = p->srcoff + 14256bdc2ffdSmacallan (srcY * p->srcpitch) + 14266bdc2ffdSmacallan (srcX << 2); 14276bdc2ffdSmacallan dst = dstoff + 14286bdc2ffdSmacallan (dstY * dstpitch) + 14296bdc2ffdSmacallan (dstX << 2); 14306bdc2ffdSmacallan if (p->mskformat == PICT_a8) { 14316bdc2ffdSmacallan msk = p->mskoff + 14326bdc2ffdSmacallan (maskY * p->mskpitch) + 14336bdc2ffdSmacallan maskX; 14346bdc2ffdSmacallan CG14Comp_Over32Mask_noalpha(p, 14356bdc2ffdSmacallan src, p->srcpitch, 14366bdc2ffdSmacallan msk, p->mskpitch, 1437fa158432Smacallan dst, dstpitch, 1438e311bbeeSmacallan width, height, flip); 1439fa158432Smacallan } else if ((p->mskformat == PICT_a8r8g8b8) || 1440fa158432Smacallan (p->mskformat == PICT_a8b8g8r8)) { 1441fa158432Smacallan msk = p->mskoff + 1442fa158432Smacallan (maskY * p->mskpitch) + 1443fa158432Smacallan (maskX << 2); 1444fa158432Smacallan CG14Comp_Over32Mask32_noalpha(p, 1445fa158432Smacallan src, p->srcpitch, 1446fa158432Smacallan msk, p->mskpitch, 14476bdc2ffdSmacallan dst, dstpitch, 1448e311bbeeSmacallan width, height, flip); 14496bdc2ffdSmacallan } else { 14506bdc2ffdSmacallan xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 14516bdc2ffdSmacallan } 1452a3a2ba44Smacallan break; 1453a3a2ba44Smacallan default: 1454a3a2ba44Smacallan xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1455a3a2ba44Smacallan __func__, p->srcformat); 1456a3a2ba44Smacallan } 1457a3a2ba44Smacallan } 14584261fa58Smacallan break; 14594261fa58Smacallan case PictOpAdd: 14604261fa58Smacallan DPRINTF(X_ERROR, "Add %08x %08x\n", 14614261fa58Smacallan p->srcformat, p->dstformat); 14624261fa58Smacallan switch (p->srcformat) { 14634261fa58Smacallan case PICT_a8: 14644261fa58Smacallan src = p->srcoff + 14654261fa58Smacallan (srcY * p->srcpitch) + srcX; 1466d71cb32dSmacallan if (p->dstformat == PICT_a8) { 1467d71cb32dSmacallan dst = dstoff + 1468d71cb32dSmacallan (dstY * dstpitch) + dstX; 1469d71cb32dSmacallan CG14Comp_Add8(p, 1470d71cb32dSmacallan src, p->srcpitch, 1471d71cb32dSmacallan dst, dstpitch, 1472d71cb32dSmacallan width, height); 1473d71cb32dSmacallan } else { 1474d71cb32dSmacallan dst = dstoff + 1475d71cb32dSmacallan (dstY * dstpitch) + 1476d71cb32dSmacallan (dstX << 2); 1477d71cb32dSmacallan CG14Comp_Add8_32(p, 1478d71cb32dSmacallan src, p->srcpitch, 1479d71cb32dSmacallan dst, dstpitch, 1480d71cb32dSmacallan width, height); 1481d71cb32dSmacallan } 14824261fa58Smacallan break; 14834261fa58Smacallan case PICT_a8r8g8b8: 14844261fa58Smacallan case PICT_x8r8g8b8: 14854261fa58Smacallan src = p->srcoff + 14864261fa58Smacallan (srcY * p->srcpitch) + (srcX << 2); 14874261fa58Smacallan dst = dstoff + (dstY * dstpitch) + 14884261fa58Smacallan (dstX << 2); 14894261fa58Smacallan CG14Comp_Add32(p, src, p->srcpitch, 14904261fa58Smacallan dst, dstpitch, width, height); 14914261fa58Smacallan break; 14924261fa58Smacallan default: 14934261fa58Smacallan xf86Msg(X_ERROR, 14944261fa58Smacallan "unsupported src format\n"); 14954261fa58Smacallan } 14964261fa58Smacallan break; 1497a3a2ba44Smacallan case PictOpSrc: 1498a3a2ba44Smacallan DPRINTF(X_ERROR, "Src %08x %08x\n", 1499a3a2ba44Smacallan p->srcformat, p->dstformat); 1500239808baSmacallan if (p->mskformat != 0) 1501239808baSmacallan xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1502f71acd79Smacallan if (p->srcformat == PICT_a8) { 1503f71acd79Smacallan CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1504f71acd79Smacallan } else { 1505f71acd79Smacallan /* convert between RGB and BGR? */ 1506f71acd79Smacallan CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1507f71acd79Smacallan } 1508a3a2ba44Smacallan break; 15094261fa58Smacallan default: 15104261fa58Smacallan xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 15114261fa58Smacallan } 15124261fa58Smacallan exaMarkSync(pDst->drawable.pScreen); 15134261fa58Smacallan} 15144261fa58Smacallan 15154261fa58Smacallan 15164261fa58Smacallan 15174261fa58SmacallanBool 15184261fa58SmacallanCG14InitAccel(ScreenPtr pScreen) 15194261fa58Smacallan{ 15204261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 15214261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 15224261fa58Smacallan ExaDriverPtr pExa; 15234261fa58Smacallan 15244261fa58Smacallan pExa = exaDriverAlloc(); 15254261fa58Smacallan if (!pExa) 15264261fa58Smacallan return FALSE; 15274261fa58Smacallan 15284261fa58Smacallan p->pExa = pExa; 15294261fa58Smacallan 15304261fa58Smacallan pExa->exa_major = EXA_VERSION_MAJOR; 15314261fa58Smacallan pExa->exa_minor = EXA_VERSION_MINOR; 15324261fa58Smacallan 15334261fa58Smacallan pExa->memoryBase = p->fb; 15344261fa58Smacallan pExa->memorySize = p->memsize; 1535b8ad197aSmacallan pExa->offScreenBase = p->width * p->height * (pScrn->depth >> 3); 15364261fa58Smacallan 15374261fa58Smacallan /* 15384261fa58Smacallan * SX memory instructions are written to 64bit aligned addresses with 15394261fa58Smacallan * a 3 bit displacement. Make sure the displacement remains constant 15404261fa58Smacallan * within one column 15414261fa58Smacallan */ 15424261fa58Smacallan 15434261fa58Smacallan pExa->pixmapOffsetAlign = 8; 15444261fa58Smacallan pExa->pixmapPitchAlign = 8; 15454261fa58Smacallan 1546fe97f391Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS 1547f71acd79Smacallan | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1548f71acd79Smacallan /*| EXA_MIXED_PIXMAPS*/; 15494261fa58Smacallan 15504261fa58Smacallan /* 15514261fa58Smacallan * these limits are bogus 15524261fa58Smacallan * SX doesn't deal with coordinates at all, so there is no limit but 15534261fa58Smacallan * we have to put something here 15544261fa58Smacallan */ 15554261fa58Smacallan pExa->maxX = 4096; 15564261fa58Smacallan pExa->maxY = 4096; 15574261fa58Smacallan 15584261fa58Smacallan pExa->WaitMarker = CG14WaitMarker; 15594261fa58Smacallan 15604261fa58Smacallan pExa->PrepareSolid = CG14PrepareSolid; 15614261fa58Smacallan pExa->Solid = CG14Solid; 15624261fa58Smacallan pExa->DoneSolid = CG14DoneCopy; 15634261fa58Smacallan pExa->PrepareCopy = CG14PrepareCopy; 1564f71acd79Smacallan pExa->Copy = CG14Copy32; 15654261fa58Smacallan pExa->DoneCopy = CG14DoneCopy; 15664261fa58Smacallan if (p->use_xrender) { 15674261fa58Smacallan pExa->CheckComposite = CG14CheckComposite; 15684261fa58Smacallan pExa->PrepareComposite = CG14PrepareComposite; 15694261fa58Smacallan pExa->Composite = CG14Composite; 15704261fa58Smacallan pExa->DoneComposite = CG14DoneCopy; 15714261fa58Smacallan } 15724261fa58Smacallan 15734261fa58Smacallan /* EXA hits more optimized paths when it does not have to fallback 15744261fa58Smacallan * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 15754261fa58Smacallan */ 15764261fa58Smacallan pExa->UploadToScreen = CG14UploadToScreen; 15774261fa58Smacallan pExa->DownloadFromScreen = CG14DownloadFromScreen; 15784261fa58Smacallan 1579c2193d98Smacallan p->queuecount = 0; 15804261fa58Smacallan /* do some hardware init */ 15814261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 15824261fa58Smacallan p->last_mask = 0xffffffff; 15834261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 15844261fa58Smacallan p->last_rop = 0xcc; 15854261fa58Smacallan return exaDriverInit(pScreen, pExa); 15864261fa58Smacallan} 1587