cg14_accel.c revision 4261fa58
14261fa58Smacallan/* $NetBSD: cg14_accel.c,v 1.1 2013/06/19 13:26:01 macallan Exp $ */ 24261fa58Smacallan/* 34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz 44261fa58Smacallan * All rights reserved. 54261fa58Smacallan * 64261fa58Smacallan * Redistribution and use in source and binary forms, with or without 74261fa58Smacallan * modification, are permitted provided that the following conditions 84261fa58Smacallan * are met: 94261fa58Smacallan * 104261fa58Smacallan * - Redistributions of source code must retain the above copyright 114261fa58Smacallan * notice, this list of conditions and the following disclaimer. 124261fa58Smacallan * - Redistributions in binary form must reproduce the above 134261fa58Smacallan * copyright notice, this list of conditions and the following 144261fa58Smacallan * disclaimer in the documentation and/or other materials provided 154261fa58Smacallan * with the distribution. 164261fa58Smacallan * 174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE. 294261fa58Smacallan * 304261fa58Smacallan */ 314261fa58Smacallan 324261fa58Smacallan#include <sys/types.h> 334261fa58Smacallan 344261fa58Smacallan/* all driver need this */ 354261fa58Smacallan#include "xf86.h" 364261fa58Smacallan#include "xf86_OSproc.h" 374261fa58Smacallan#include "compiler.h" 384261fa58Smacallan 394261fa58Smacallan#include "cg14.h" 404261fa58Smacallan#include <sparc/sxreg.h> 414261fa58Smacallan 424261fa58Smacallan#define SX_SINGLE 434261fa58Smacallan/*#define SX_DEBUG*/ 444261fa58Smacallan/*#define SX_ADD_SOFTWARE*/ 454261fa58Smacallan 464261fa58Smacallan#ifdef SX_DEBUG 474261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 484261fa58Smacallan#define DPRINTF xf86Msg 494261fa58Smacallan#else 504261fa58Smacallan#define ENTER 514261fa58Smacallan#define DPRINTF while (0) xf86Msg 524261fa58Smacallan#endif 534261fa58Smacallan 544261fa58Smacallan#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 554261fa58Smacallan 564261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */ 574261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 584261fa58Smacallan 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 594261fa58Smacallan 604261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 614261fa58Smacallan PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 624261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 634261fa58Smacallan 644261fa58Smacallanchar c[8] = " .,:+*oX"; 654261fa58Smacallan 664261fa58Smacallan/* write an SX register */ 674261fa58Smacallanstatic inline void 684261fa58Smacallanwrite_sx_reg(Cg14Ptr p, int reg, uint32_t val) 694261fa58Smacallan{ 704261fa58Smacallan *(volatile uint32_t *)(p->sxreg + reg) = val; 714261fa58Smacallan} 724261fa58Smacallan 734261fa58Smacallan/* read an SX register */ 744261fa58Smacallanstatic inline uint32_t 754261fa58Smacallanread_sx_reg(Cg14Ptr p, int reg) 764261fa58Smacallan{ 774261fa58Smacallan return *(volatile uint32_t *)(p->sxreg + reg); 784261fa58Smacallan} 794261fa58Smacallan 804261fa58Smacallan/* write a memory referencing instruction */ 814261fa58Smacallanstatic inline void 824261fa58Smacallanwrite_sx_io(Cg14Ptr p, int reg, uint32_t val) 834261fa58Smacallan{ 844261fa58Smacallan *(volatile uint32_t *)(p->sxio + reg) = val; 854261fa58Smacallan} 864261fa58Smacallan 874261fa58Smacallanstatic inline void 884261fa58SmacallanCG14Wait(Cg14Ptr p) 894261fa58Smacallan{ 904261fa58Smacallan /* we just wait until the instruction queue is empty */ 914261fa58Smacallan while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {}; 924261fa58Smacallan} 934261fa58Smacallan 944261fa58Smacallanstatic void 954261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker) 964261fa58Smacallan{ 974261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 984261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 994261fa58Smacallan 1004261fa58Smacallan CG14Wait(p); 1014261fa58Smacallan} 1024261fa58Smacallan 1034261fa58Smacallanstatic Bool 1044261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 1054261fa58Smacallan int xdir, int ydir, int alu, Pixel planemask) 1064261fa58Smacallan{ 1074261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 1084261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1094261fa58Smacallan 1104261fa58Smacallan ENTER; 1114261fa58Smacallan DPRINTF(X_ERROR, "bits per pixel: %d\n", 1124261fa58Smacallan pSrcPixmap->drawable.bitsPerPixel); 1134261fa58Smacallan 1144261fa58Smacallan if (planemask != p->last_mask) { 1154261fa58Smacallan CG14Wait(p); 1164261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 1174261fa58Smacallan p->last_mask = planemask; 1184261fa58Smacallan } 1194261fa58Smacallan alu = sx_rop[alu]; 1204261fa58Smacallan if (alu != p->last_rop) { 1214261fa58Smacallan CG14Wait(p); 1224261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 1234261fa58Smacallan p->last_rop = alu; 1244261fa58Smacallan } 1254261fa58Smacallan p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 1264261fa58Smacallan p->srcoff = exaGetPixmapOffset(pSrcPixmap); 1274261fa58Smacallan p->xdir = xdir; 1284261fa58Smacallan p->ydir = ydir; 1294261fa58Smacallan return TRUE; 1304261fa58Smacallan} 1314261fa58Smacallan 1324261fa58Smacallanstatic void 1334261fa58SmacallanCG14Copy(PixmapPtr pDstPixmap, 1344261fa58Smacallan int srcX, int srcY, int dstX, int dstY, int w, int h) 1354261fa58Smacallan{ 1364261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 1374261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1384261fa58Smacallan int dstpitch, dstoff, srcpitch, srcoff; 1394261fa58Smacallan int srcstart, dststart, xinc, srcinc, dstinc; 1404261fa58Smacallan int line, count, s, d, num; 1414261fa58Smacallan 1424261fa58Smacallan ENTER; 1434261fa58Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 1444261fa58Smacallan dstoff = exaGetPixmapOffset(pDstPixmap); 1454261fa58Smacallan srcpitch = p->srcpitch; 1464261fa58Smacallan srcoff = p->srcoff; 1474261fa58Smacallan /* 1484261fa58Smacallan * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 1494261fa58Smacallan * actually wrote anything and only sync if it did 1504261fa58Smacallan */ 1514261fa58Smacallan srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 1524261fa58Smacallan dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 1534261fa58Smacallan 1544261fa58Smacallan /* 1554261fa58Smacallan * we always copy up to 32 pixels at a time so direction doesn't 1564261fa58Smacallan * matter if w<=32 1574261fa58Smacallan */ 1584261fa58Smacallan if (w > 32) { 1594261fa58Smacallan if (p->xdir < 0) { 1604261fa58Smacallan srcstart += (w - 32) << 2; 1614261fa58Smacallan dststart += (w - 32) << 2; 1624261fa58Smacallan xinc = -128; 1634261fa58Smacallan } else 1644261fa58Smacallan xinc = 128; 1654261fa58Smacallan } else 1664261fa58Smacallan xinc = 128; 1674261fa58Smacallan if (p->ydir < 0) { 1684261fa58Smacallan srcstart += (h - 1) * srcpitch; 1694261fa58Smacallan dststart += (h - 1) * dstpitch; 1704261fa58Smacallan srcinc = -srcpitch; 1714261fa58Smacallan dstinc = -dstpitch; 1724261fa58Smacallan } else { 1734261fa58Smacallan srcinc = srcpitch; 1744261fa58Smacallan dstinc = dstpitch; 1754261fa58Smacallan } 1764261fa58Smacallan if (p->last_rop == 0xcc) { 1774261fa58Smacallan /* plain old copy */ 1784261fa58Smacallan if ( xinc > 0) { 1794261fa58Smacallan /* going left to right */ 1804261fa58Smacallan for (line = 0; line < h; line++) { 1814261fa58Smacallan count = 0; 1824261fa58Smacallan s = srcstart; 1834261fa58Smacallan d = dststart; 1844261fa58Smacallan while ( count < w) { 1854261fa58Smacallan num = min(32, w - count); 1864261fa58Smacallan write_sx_io(p, s, 1874261fa58Smacallan SX_LD(10, num - 1, s & 7)); 1884261fa58Smacallan write_sx_io(p, d, 1894261fa58Smacallan SX_STM(10, num - 1, d & 7)); 1904261fa58Smacallan s += xinc; 1914261fa58Smacallan d += xinc; 1924261fa58Smacallan count += 32; 1934261fa58Smacallan } 1944261fa58Smacallan srcstart += srcinc; 1954261fa58Smacallan dststart += dstinc; 1964261fa58Smacallan } 1974261fa58Smacallan } else { 1984261fa58Smacallan /* going right to left */ 1994261fa58Smacallan int i, chunks = (w >> 5); 2004261fa58Smacallan for (line = 0; line < h; line++) { 2014261fa58Smacallan s = srcstart; 2024261fa58Smacallan d = dststart; 2034261fa58Smacallan count = w; 2044261fa58Smacallan for (i = 0; i < chunks; i++) { 2054261fa58Smacallan write_sx_io(p, s, 2064261fa58Smacallan SX_LD(10, 31, s & 7)); 2074261fa58Smacallan write_sx_io(p, d, 2084261fa58Smacallan SX_STM(10, 31, d & 7)); 2094261fa58Smacallan s -= 128; 2104261fa58Smacallan d -= 128; 2114261fa58Smacallan count -= 32; 2124261fa58Smacallan } 2134261fa58Smacallan /* leftovers, if any */ 2144261fa58Smacallan if (count > 0) { 2154261fa58Smacallan s += (32 - count) << 2; 2164261fa58Smacallan d += (32 - count) << 2; 2174261fa58Smacallan write_sx_io(p, s, 2184261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2194261fa58Smacallan write_sx_io(p, d, 2204261fa58Smacallan SX_STM(10, count - 1, d & 7)); 2214261fa58Smacallan } 2224261fa58Smacallan srcstart += srcinc; 2234261fa58Smacallan dststart += dstinc; 2244261fa58Smacallan } 2254261fa58Smacallan } 2264261fa58Smacallan } else { 2274261fa58Smacallan /* ROPs needed */ 2284261fa58Smacallan if ( xinc > 0) { 2294261fa58Smacallan /* going left to right */ 2304261fa58Smacallan for (line = 0; line < h; line++) { 2314261fa58Smacallan count = 0; 2324261fa58Smacallan s = srcstart; 2334261fa58Smacallan d = dststart; 2344261fa58Smacallan while ( count < w) { 2354261fa58Smacallan num = min(32, w - count); 2364261fa58Smacallan write_sx_io(p, s, 2374261fa58Smacallan SX_LD(10, num - 1, s & 7)); 2384261fa58Smacallan write_sx_io(p, d, 2394261fa58Smacallan SX_LD(42, num - 1, d & 7)); 2404261fa58Smacallan if (num > 16) { 2414261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2424261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2434261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2444261fa58Smacallan SX_ROP(26, 58, 90, num - 17)); 2454261fa58Smacallan } else { 2464261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2474261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 2484261fa58Smacallan } 2494261fa58Smacallan write_sx_io(p, d, 2504261fa58Smacallan SX_STM(74, num - 1, d & 7)); 2514261fa58Smacallan s += xinc; 2524261fa58Smacallan d += xinc; 2534261fa58Smacallan count += 32; 2544261fa58Smacallan } 2554261fa58Smacallan srcstart += srcinc; 2564261fa58Smacallan dststart += dstinc; 2574261fa58Smacallan } 2584261fa58Smacallan } else { 2594261fa58Smacallan /* going right to left */ 2604261fa58Smacallan int i, chunks = (w >> 5); 2614261fa58Smacallan for (line = 0; line < h; line++) { 2624261fa58Smacallan s = srcstart; 2634261fa58Smacallan d = dststart; 2644261fa58Smacallan count = w; 2654261fa58Smacallan for (i = 0; i < chunks; i++) { 2664261fa58Smacallan write_sx_io(p, s, SX_LD(10, 31, s & 7)); 2674261fa58Smacallan write_sx_io(p, d, SX_LD(42, 31, d & 7)); 2684261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2694261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2704261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2714261fa58Smacallan SX_ROP(26, 58, 90, 15)); 2724261fa58Smacallan write_sx_io(p, d, 2734261fa58Smacallan SX_STM(74, 31, d & 7)); 2744261fa58Smacallan s -= 128; 2754261fa58Smacallan d -= 128; 2764261fa58Smacallan count -= 32; 2774261fa58Smacallan } 2784261fa58Smacallan /* leftovers, if any */ 2794261fa58Smacallan if (count > 0) { 2804261fa58Smacallan s += (32 - count) << 2; 2814261fa58Smacallan d += (32 - count) << 2; 2824261fa58Smacallan write_sx_io(p, s, 2834261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2844261fa58Smacallan write_sx_io(p, d, 2854261fa58Smacallan SX_LD(42, count - 1, d & 7)); 2864261fa58Smacallan if (count > 16) { 2874261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2884261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2894261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2904261fa58Smacallan SX_ROP(26, 58, 90, count - 17)); 2914261fa58Smacallan } else { 2924261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2934261fa58Smacallan SX_ROP(10, 42, 74, count - 1)); 2944261fa58Smacallan } 2954261fa58Smacallan 2964261fa58Smacallan write_sx_io(p, d, 2974261fa58Smacallan SX_STM(74, count - 1, d & 7)); 2984261fa58Smacallan } 2994261fa58Smacallan srcstart += srcinc; 3004261fa58Smacallan dststart += dstinc; 3014261fa58Smacallan } 3024261fa58Smacallan } 3034261fa58Smacallan } 3044261fa58Smacallan exaMarkSync(pDstPixmap->drawable.pScreen); 3054261fa58Smacallan} 3064261fa58Smacallan 3074261fa58Smacallanstatic void 3084261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap) 3094261fa58Smacallan{ 3104261fa58Smacallan} 3114261fa58Smacallan 3124261fa58Smacallanstatic Bool 3134261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 3144261fa58Smacallan{ 3154261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 3164261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 3174261fa58Smacallan 3184261fa58Smacallan ENTER; 3194261fa58Smacallan DPRINTF(X_ERROR, "bits per pixel: %d\n", pPixmap->drawable.bitsPerPixel); 3204261fa58Smacallan write_sx_reg(p, SX_QUEUED(8), fg); 3214261fa58Smacallan write_sx_reg(p, SX_QUEUED(9), fg); 3224261fa58Smacallan if (planemask != p->last_mask) { 3234261fa58Smacallan CG14Wait(p); 3244261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 3254261fa58Smacallan p->last_mask = planemask; 3264261fa58Smacallan } 3274261fa58Smacallan alu = sx_rop[alu]; 3284261fa58Smacallan if (alu != p->last_rop) { 3294261fa58Smacallan CG14Wait(p); 3304261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 3314261fa58Smacallan p->last_rop = alu; 3324261fa58Smacallan } 3334261fa58Smacallan DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 3344261fa58Smacallan return TRUE; 3354261fa58Smacallan} 3364261fa58Smacallan 3374261fa58Smacallanstatic void 3384261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 3394261fa58Smacallan{ 3404261fa58Smacallan int line, x, num; 3414261fa58Smacallan uint32_t ptr; 3424261fa58Smacallan 3434261fa58Smacallan ENTER; 3444261fa58Smacallan if (p->last_rop == 0xcc) { 3454261fa58Smacallan /* simple fill */ 3464261fa58Smacallan for (line = 0; line < h; line++) { 3474261fa58Smacallan x = 0; 3484261fa58Smacallan while (x < w) { 3494261fa58Smacallan ptr = start + (x << 2); 3504261fa58Smacallan num = min(32, w - x); 3514261fa58Smacallan write_sx_io(p, ptr, 3524261fa58Smacallan SX_STS(8, num - 1, ptr & 7)); 3534261fa58Smacallan x += 32; 3544261fa58Smacallan } 3554261fa58Smacallan start += pitch; 3564261fa58Smacallan } 3574261fa58Smacallan } else if (p->last_rop == 0xaa) { 3584261fa58Smacallan /* nothing to do here */ 3594261fa58Smacallan return; 3604261fa58Smacallan } else { 3614261fa58Smacallan /* alright, let's do actual ROP stuff */ 3624261fa58Smacallan 3634261fa58Smacallan /* first repeat the fill colour into 16 registers */ 3644261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 3654261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 3664261fa58Smacallan 3674261fa58Smacallan for (line = 0; line < h; line++) { 3684261fa58Smacallan x = 0; 3694261fa58Smacallan while (x < w) { 3704261fa58Smacallan ptr = start + (x << 2); 3714261fa58Smacallan num = min(32, w - x); 3724261fa58Smacallan /* now suck fb data into registers */ 3734261fa58Smacallan write_sx_io(p, ptr, 3744261fa58Smacallan SX_LD(42, num - 1, ptr & 7)); 3754261fa58Smacallan /* 3764261fa58Smacallan * ROP them with the fill data we left in 10 3774261fa58Smacallan * non-memory ops can only have counts up to 16 3784261fa58Smacallan */ 3794261fa58Smacallan if (num <= 16) { 3804261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 3814261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 3824261fa58Smacallan } else { 3834261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 3844261fa58Smacallan SX_ROP(10, 42, 74, 15)); 3854261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 3864261fa58Smacallan SX_ROP(10, 58, 90, num - 17)); 3874261fa58Smacallan } 3884261fa58Smacallan /* and write the result back into memory */ 3894261fa58Smacallan write_sx_io(p, ptr, 3904261fa58Smacallan SX_ST(74, num - 1, ptr & 7)); 3914261fa58Smacallan x += 32; 3924261fa58Smacallan } 3934261fa58Smacallan start += pitch; 3944261fa58Smacallan } 3954261fa58Smacallan } 3964261fa58Smacallan} 3974261fa58Smacallan 3984261fa58Smacallanstatic void 3994261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 4004261fa58Smacallan{ 4014261fa58Smacallan int line, x, num, off; 4024261fa58Smacallan uint32_t ptr; 4034261fa58Smacallan 4044261fa58Smacallan ENTER; 4054261fa58Smacallan off = start & 7; 4064261fa58Smacallan start &= ~7; 4074261fa58Smacallan 4084261fa58Smacallan if (p->last_rop == 0xcc) { 4094261fa58Smacallan /* simple fill */ 4104261fa58Smacallan for (line = 0; line < h; line++) { 4114261fa58Smacallan x = 0; 4124261fa58Smacallan while (x < w) { 4134261fa58Smacallan ptr = start + x; 4144261fa58Smacallan num = min(32, w - x); 4154261fa58Smacallan write_sx_io(p, ptr, 4164261fa58Smacallan SX_STBS(8, num - 1, off)); 4174261fa58Smacallan x += 32; 4184261fa58Smacallan } 4194261fa58Smacallan start += pitch; 4204261fa58Smacallan } 4214261fa58Smacallan } else if (p->last_rop == 0xaa) { 4224261fa58Smacallan /* nothing to do here */ 4234261fa58Smacallan return; 4244261fa58Smacallan } else { 4254261fa58Smacallan /* alright, let's do actual ROP stuff */ 4264261fa58Smacallan 4274261fa58Smacallan /* first repeat the fill colour into 16 registers */ 4284261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 4294261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 4304261fa58Smacallan 4314261fa58Smacallan for (line = 0; line < h; line++) { 4324261fa58Smacallan x = 0; 4334261fa58Smacallan while (x < w) { 4344261fa58Smacallan ptr = start + x; 4354261fa58Smacallan num = min(32, w - x); 4364261fa58Smacallan /* now suck fb data into registers */ 4374261fa58Smacallan write_sx_io(p, ptr, 4384261fa58Smacallan SX_LDB(42, num - 1, off)); 4394261fa58Smacallan /* 4404261fa58Smacallan * ROP them with the fill data we left in 10 4414261fa58Smacallan * non-memory ops can only have counts up to 16 4424261fa58Smacallan */ 4434261fa58Smacallan if (num <= 16) { 4444261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 4454261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 4464261fa58Smacallan } else { 4474261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 4484261fa58Smacallan SX_ROP(10, 42, 74, 15)); 4494261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 4504261fa58Smacallan SX_ROP(10, 58, 90, num - 17)); 4514261fa58Smacallan } 4524261fa58Smacallan /* and write the result back into memory */ 4534261fa58Smacallan write_sx_io(p, ptr, 4544261fa58Smacallan SX_STB(74, num - 1, off)); 4554261fa58Smacallan x += 32; 4564261fa58Smacallan } 4574261fa58Smacallan start += pitch; 4584261fa58Smacallan } 4594261fa58Smacallan } 4604261fa58Smacallan} 4614261fa58Smacallan 4624261fa58Smacallanstatic void 4634261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 4644261fa58Smacallan{ 4654261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 4664261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 4674261fa58Smacallan int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 4684261fa58Smacallan int start, depth; 4694261fa58Smacallan 4704261fa58Smacallan ENTER; 4714261fa58Smacallan dstpitch = exaGetPixmapPitch(pPixmap); 4724261fa58Smacallan dstoff = exaGetPixmapOffset(pPixmap); 4734261fa58Smacallan 4744261fa58Smacallan depth = pPixmap->drawable.bitsPerPixel; 4754261fa58Smacallan switch (depth) { 4764261fa58Smacallan case 32: 4774261fa58Smacallan start = dstoff + (y1 * dstpitch) + (x1 << 2); 4784261fa58Smacallan CG14Solid32(p, start, dstpitch, w, h); 4794261fa58Smacallan break; 4804261fa58Smacallan case 8: 4814261fa58Smacallan start = dstoff + (y1 * dstpitch) + x1; 4824261fa58Smacallan CG14Solid8(p, start, dstpitch, w, h); 4834261fa58Smacallan break; 4844261fa58Smacallan } 4854261fa58Smacallan 4864261fa58Smacallan DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 4874261fa58Smacallan dstpitch, dstoff, start); 4884261fa58Smacallan DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 4894261fa58Smacallan read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 4904261fa58Smacallan exaMarkSync(pPixmap->drawable.pScreen); 4914261fa58Smacallan} 4924261fa58Smacallan 4934261fa58Smacallan/* 4944261fa58Smacallan * Memcpy-based UTS. 4954261fa58Smacallan */ 4964261fa58Smacallanstatic Bool 4974261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 4984261fa58Smacallan char *src, int src_pitch) 4994261fa58Smacallan{ 5004261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 5014261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 5024261fa58Smacallan char *dst = p->fb + exaGetPixmapOffset(pDst); 5034261fa58Smacallan int dst_pitch = exaGetPixmapPitch(pDst); 5044261fa58Smacallan 5054261fa58Smacallan int bpp = pDst->drawable.bitsPerPixel; 5064261fa58Smacallan int cpp = (bpp + 7) >> 3; 5074261fa58Smacallan int wBytes = w * cpp; 5084261fa58Smacallan 5094261fa58Smacallan ENTER; 5104261fa58Smacallan dst += (x * cpp) + (y * dst_pitch); 5114261fa58Smacallan 5124261fa58Smacallan CG14Wait(p); 5134261fa58Smacallan 5144261fa58Smacallan while (h--) { 5154261fa58Smacallan memcpy(dst, src, wBytes); 5164261fa58Smacallan src += src_pitch; 5174261fa58Smacallan dst += dst_pitch; 5184261fa58Smacallan } 5194261fa58Smacallan __asm("stbar;"); 5204261fa58Smacallan return TRUE; 5214261fa58Smacallan} 5224261fa58Smacallan 5234261fa58Smacallan/* 5244261fa58Smacallan * Memcpy-based DFS. 5254261fa58Smacallan */ 5264261fa58Smacallanstatic Bool 5274261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 5284261fa58Smacallan char *dst, int dst_pitch) 5294261fa58Smacallan{ 5304261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 5314261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 5324261fa58Smacallan char *src = p->fb + exaGetPixmapOffset(pSrc); 5334261fa58Smacallan int src_pitch = exaGetPixmapPitch(pSrc); 5344261fa58Smacallan 5354261fa58Smacallan ENTER; 5364261fa58Smacallan int bpp = pSrc->drawable.bitsPerPixel; 5374261fa58Smacallan int cpp = (bpp + 7) >> 3; 5384261fa58Smacallan int wBytes = w * cpp; 5394261fa58Smacallan 5404261fa58Smacallan src += (x * cpp) + (y * src_pitch); 5414261fa58Smacallan 5424261fa58Smacallan CG14Wait(p); 5434261fa58Smacallan 5444261fa58Smacallan while (h--) { 5454261fa58Smacallan memcpy(dst, src, wBytes); 5464261fa58Smacallan src += src_pitch; 5474261fa58Smacallan dst += dst_pitch; 5484261fa58Smacallan } 5494261fa58Smacallan 5504261fa58Smacallan return TRUE; 5514261fa58Smacallan} 5524261fa58Smacallan 5534261fa58SmacallanBool 5544261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture, 5554261fa58Smacallan PicturePtr pMaskPicture, 5564261fa58Smacallan PicturePtr pDstPicture) 5574261fa58Smacallan{ 5584261fa58Smacallan int i, ok = FALSE; 5594261fa58Smacallan 5604261fa58Smacallan ENTER; 5614261fa58Smacallan 5624261fa58Smacallan /* 5634261fa58Smacallan * SX is in theory capable of accelerating pretty much all Xrender ops, 5644261fa58Smacallan * even coordinate transformation and gradients. Support will be added 5654261fa58Smacallan * over time and likely have to spill over into its own source file. 5664261fa58Smacallan */ 5674261fa58Smacallan 5684261fa58Smacallan if ((op != PictOpOver) && (op != PictOpAdd)) { 5694261fa58Smacallan xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op); 5704261fa58Smacallan return FALSE; 5714261fa58Smacallan } 5724261fa58Smacallan i = 0; 5734261fa58Smacallan while ((i < arraysize(src_formats)) && (!ok)) { 5744261fa58Smacallan ok = (pSrcPicture->format == src_formats[i]); 5754261fa58Smacallan i++; 5764261fa58Smacallan } 5774261fa58Smacallan 5784261fa58Smacallan if (!ok) { 5794261fa58Smacallan xf86Msg(X_ERROR, "%s: unsupported src format %x\n", 5804261fa58Smacallan __func__, pSrcPicture->format); 5814261fa58Smacallan return FALSE; 5824261fa58Smacallan } 5834261fa58Smacallan 5844261fa58Smacallan DPRINTF(X_ERROR, "src is %x %d %d\n", pSrcPicture->format, 5854261fa58Smacallan pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height); 5864261fa58Smacallan 5874261fa58Smacallan if (pMaskPicture != NULL) { 5884261fa58Smacallan DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 5894261fa58Smacallan pMaskPicture->pDrawable->width, 5904261fa58Smacallan pMaskPicture->pDrawable->height); 5914261fa58Smacallan } 5924261fa58Smacallan return TRUE; 5934261fa58Smacallan} 5944261fa58Smacallan 5954261fa58SmacallanBool 5964261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture, 5974261fa58Smacallan PicturePtr pMaskPicture, 5984261fa58Smacallan PicturePtr pDstPicture, 5994261fa58Smacallan PixmapPtr pSrc, 6004261fa58Smacallan PixmapPtr pMask, 6014261fa58Smacallan PixmapPtr pDst) 6024261fa58Smacallan{ 6034261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 6044261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 6054261fa58Smacallan 6064261fa58Smacallan ENTER; 6074261fa58Smacallan 6084261fa58Smacallan if (pSrcPicture->pSourcePict != NULL) { 6094261fa58Smacallan if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 6104261fa58Smacallan p->fillcolour = 6114261fa58Smacallan pSrcPicture->pSourcePict->solidFill.color; 6124261fa58Smacallan DPRINTF(X_ERROR, "%s: solid src %08x\n", 6134261fa58Smacallan __func__, p->fillcolour); 6144261fa58Smacallan } 6154261fa58Smacallan } 6164261fa58Smacallan if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 6174261fa58Smacallan if (pMaskPicture->pSourcePict->type == 6184261fa58Smacallan SourcePictTypeSolidFill) { 6194261fa58Smacallan p->fillcolour = 6204261fa58Smacallan pMaskPicture->pSourcePict->solidFill.color; 6214261fa58Smacallan DPRINTF(X_ERROR, "%s: solid mask %08x\n", 6224261fa58Smacallan __func__, p->fillcolour); 6234261fa58Smacallan } 6244261fa58Smacallan } 6254261fa58Smacallan if (pMaskPicture != NULL) { 6264261fa58Smacallan p->mskoff = exaGetPixmapOffset(pMask); 6274261fa58Smacallan p->mskpitch = exaGetPixmapPitch(pMask); 6284261fa58Smacallan p->mskformat = pMaskPicture->format; 6294261fa58Smacallan } 6304261fa58Smacallan p->srcoff = exaGetPixmapOffset(pSrc); 6314261fa58Smacallan p->srcpitch = exaGetPixmapPitch(pSrc); 6324261fa58Smacallan p->srcformat = pSrcPicture->format; 6334261fa58Smacallan p->dstformat = pDstPicture->format; 6344261fa58Smacallan p->op = op; 6354261fa58Smacallan#ifdef SX_DEBUG 6364261fa58Smacallan DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 6374261fa58Smacallan *(uint32_t *)(p->fb + p->srcoff)); 6384261fa58Smacallan#endif 6394261fa58Smacallan return TRUE; 6404261fa58Smacallan} 6414261fa58Smacallan 6424261fa58Smacallanvoid CG14Comp_Over32(Cg14Ptr p, 6434261fa58Smacallan uint32_t src, uint32_t srcpitch, 6444261fa58Smacallan uint32_t dst, uint32_t dstpitch, 6454261fa58Smacallan int width, int height) 6464261fa58Smacallan{ 6474261fa58Smacallan uint32_t msk = src, mskx, dstx, m; 6484261fa58Smacallan int line, x, i; 6494261fa58Smacallan 6504261fa58Smacallan ENTER; 6514261fa58Smacallan /* first get the source colour */ 6524261fa58Smacallan write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 6534261fa58Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 6544261fa58Smacallan for (line = 0; line < height; line++) { 6554261fa58Smacallan mskx = msk; 6564261fa58Smacallan dstx = dst; 6574261fa58Smacallan#ifdef SX_SINGLE 6584261fa58Smacallan 6594261fa58Smacallan for (x = 0; x < width; x++) { 6604261fa58Smacallan m = *(volatile uint32_t *)(p->fb + mskx); 6614261fa58Smacallan m = m >> 24; 6624261fa58Smacallan if (m == 0) { 6634261fa58Smacallan /* nothing to do - all transparent */ 6644261fa58Smacallan } else if (m == 0xff) { 6654261fa58Smacallan /* all opaque */ 6664261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 6674261fa58Smacallan } else { 6684261fa58Smacallan /* fetch alpha value, stick it into scam */ 6694261fa58Smacallan /* mask is in R[12:15] */ 6704261fa58Smacallan /*write_sx_io(p, mskx, 6714261fa58Smacallan SX_LDUQ0(12, 0, mskx & 7));*/ 6724261fa58Smacallan write_sx_reg(p, SX_QUEUED(12), m); 6734261fa58Smacallan /* fetch dst pixel */ 6744261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 6754261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6764261fa58Smacallan SX_ORV(12, 0, R_SCAM, 0)); 6774261fa58Smacallan /* 6784261fa58Smacallan * src * alpha + R0 6794261fa58Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 6804261fa58Smacallan */ 6814261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6824261fa58Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 6834261fa58Smacallan 6844261fa58Smacallan /* invert SCAM */ 6854261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6864261fa58Smacallan SX_XORV(12, 8, R_SCAM, 0)); 6874261fa58Smacallan#ifdef SX_DEBUG 6884261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6894261fa58Smacallan SX_XORV(12, 8, 13, 0)); 6904261fa58Smacallan#endif 6914261fa58Smacallan /* dst * (1 - alpha) + R[13:15] */ 6924261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6934261fa58Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 6944261fa58Smacallan write_sx_io(p, dstx, 6954261fa58Smacallan SX_STUQ0C(24, 0, dstx & 7)); 6964261fa58Smacallan } 6974261fa58Smacallan dstx += 4; 6984261fa58Smacallan mskx += 4; 6994261fa58Smacallan } 7004261fa58Smacallan#else 7014261fa58Smacallan for (x = 0; x < width; x += 4) { 7024261fa58Smacallan /* fetch 4 mask values */ 7034261fa58Smacallan write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); 7044261fa58Smacallan /* fetch destination pixels */ 7054261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 7064261fa58Smacallan /* duplicate them for all channels */ 7074261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 7084261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 7094261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 7104261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 7114261fa58Smacallan /* generate inverted alpha */ 7124261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7134261fa58Smacallan SX_XORS(12, 8, 28, 15)); 7144261fa58Smacallan /* multiply source */ 7154261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7164261fa58Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 7174261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7184261fa58Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 7194261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7204261fa58Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 7214261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7224261fa58Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 7234261fa58Smacallan /* multiply dest */ 7244261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7254261fa58Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 7264261fa58Smacallan /* add up */ 7274261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7284261fa58Smacallan SX_ADDV(44, 76, 92, 15)); 7294261fa58Smacallan /* write back */ 7304261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 7314261fa58Smacallan dstx += 16; 7324261fa58Smacallan mskx += 16; 7334261fa58Smacallan } 7344261fa58Smacallan#endif 7354261fa58Smacallan dst += dstpitch; 7364261fa58Smacallan msk += srcpitch; 7374261fa58Smacallan } 7384261fa58Smacallan} 7394261fa58Smacallan 7404261fa58Smacallanvoid CG14Comp_Over8(Cg14Ptr p, 7414261fa58Smacallan uint32_t src, uint32_t srcpitch, 7424261fa58Smacallan uint32_t dst, uint32_t dstpitch, 7434261fa58Smacallan int width, int height) 7444261fa58Smacallan{ 7454261fa58Smacallan uint32_t msk = src, mskx, dstx, m; 7464261fa58Smacallan int line, x, i; 7474261fa58Smacallan#ifdef SX_DEBUG 7484261fa58Smacallan char buffer[256]; 7494261fa58Smacallan#endif 7504261fa58Smacallan ENTER; 7514261fa58Smacallan 7524261fa58Smacallan /* first get the source colour */ 7534261fa58Smacallan write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7)); 7544261fa58Smacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 7554261fa58Smacallan DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)), 7564261fa58Smacallan read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)), 7574261fa58Smacallan *(uint32_t *)(p->fb + p->srcoff)); 7584261fa58Smacallan for (line = 0; line < height; line++) { 7594261fa58Smacallan mskx = msk; 7604261fa58Smacallan dstx = dst; 7614261fa58Smacallan#ifdef SX_SINGLE 7624261fa58Smacallan 7634261fa58Smacallan for (x = 0; x < width; x++) { 7644261fa58Smacallan m = *(volatile uint8_t *)(p->fb + mskx); 7654261fa58Smacallan#ifdef SX_DEBUG 7664261fa58Smacallan buffer[x] = c[m >> 5]; 7674261fa58Smacallan#endif 7684261fa58Smacallan if (m == 0) { 7694261fa58Smacallan /* nothing to do - all transparent */ 7704261fa58Smacallan } else if (m == 0xff) { 7714261fa58Smacallan /* all opaque */ 7724261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); 7734261fa58Smacallan } else { 7744261fa58Smacallan /* fetch alpha value, stick it into scam */ 7754261fa58Smacallan /* mask is in R[12:15] */ 7764261fa58Smacallan /*write_sx_io(p, mskx & ~7, 7774261fa58Smacallan SX_LDB(12, 0, mskx & 7));*/ 7784261fa58Smacallan write_sx_reg(p, SX_QUEUED(12), m); 7794261fa58Smacallan /* fetch dst pixel */ 7804261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); 7814261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7824261fa58Smacallan SX_ORV(12, 0, R_SCAM, 0)); 7834261fa58Smacallan /* 7844261fa58Smacallan * src * alpha + R0 7854261fa58Smacallan * R[9:11] * SCAM + R0 -> R[17:19] 7864261fa58Smacallan */ 7874261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7884261fa58Smacallan SX_SAXP16X16SR8(9, 0, 17, 2)); 7894261fa58Smacallan 7904261fa58Smacallan /* invert SCAM */ 7914261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7924261fa58Smacallan SX_XORV(12, 8, R_SCAM, 0)); 7934261fa58Smacallan#ifdef SX_DEBUG 7944261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7954261fa58Smacallan SX_XORV(12, 8, 13, 0)); 7964261fa58Smacallan#endif 7974261fa58Smacallan /* dst * (1 - alpha) + R[13:15] */ 7984261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 7994261fa58Smacallan SX_SAXP16X16SR8(21, 17, 25, 2)); 8004261fa58Smacallan write_sx_io(p, dstx, 8014261fa58Smacallan SX_STUQ0C(24, 0, dstx & 7)); 8024261fa58Smacallan } 8034261fa58Smacallan dstx += 4; 8044261fa58Smacallan mskx += 1; 8054261fa58Smacallan } 8064261fa58Smacallan#ifdef SX_DEBUG 8074261fa58Smacallan buffer[x] = 0; 8084261fa58Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 8094261fa58Smacallan#endif 8104261fa58Smacallan#else 8114261fa58Smacallan for (x = 0; x < width; x += 4) { 8124261fa58Smacallan /* fetch 4 mask values */ 8134261fa58Smacallan write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); 8144261fa58Smacallan /* fetch destination pixels */ 8154261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); 8164261fa58Smacallan /* duplicate them for all channels */ 8174261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); 8184261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); 8194261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); 8204261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); 8214261fa58Smacallan /* generate inverted alpha */ 8224261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8234261fa58Smacallan SX_XORS(12, 8, 28, 15)); 8244261fa58Smacallan /* multiply source */ 8254261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8264261fa58Smacallan SX_MUL16X16SR8(8, 12, 44, 3)); 8274261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8284261fa58Smacallan SX_MUL16X16SR8(8, 16, 48, 3)); 8294261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8304261fa58Smacallan SX_MUL16X16SR8(8, 20, 52, 3)); 8314261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8324261fa58Smacallan SX_MUL16X16SR8(8, 24, 56, 3)); 8334261fa58Smacallan /* multiply dest */ 8344261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8354261fa58Smacallan SX_MUL16X16SR8(28, 60, 76, 15)); 8364261fa58Smacallan /* add up */ 8374261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8384261fa58Smacallan SX_ADDV(44, 76, 92, 15)); 8394261fa58Smacallan /* write back */ 8404261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); 8414261fa58Smacallan dstx += 16; 8424261fa58Smacallan mskx += 4; 8434261fa58Smacallan } 8444261fa58Smacallan#endif 8454261fa58Smacallan dst += dstpitch; 8464261fa58Smacallan msk += srcpitch; 8474261fa58Smacallan } 8484261fa58Smacallan} 8494261fa58Smacallan 8504261fa58Smacallanvoid CG14Comp_Add32(Cg14Ptr p, 8514261fa58Smacallan uint32_t src, uint32_t srcpitch, 8524261fa58Smacallan uint32_t dst, uint32_t dstpitch, 8534261fa58Smacallan int width, int height) 8544261fa58Smacallan{ 8554261fa58Smacallan int line; 8564261fa58Smacallan uint32_t srcx, dstx; 8574261fa58Smacallan int full, part, x; 8584261fa58Smacallan 8594261fa58Smacallan ENTER; 8604261fa58Smacallan full = width >> 3; /* chunks of 8 */ 8614261fa58Smacallan part = width & 7; /* leftovers */ 8624261fa58Smacallan /* we do this up to 8 pixels at a time */ 8634261fa58Smacallan for (line = 0; line < height; line++) { 8644261fa58Smacallan srcx = src; 8654261fa58Smacallan dstx = dst; 8664261fa58Smacallan for (x = 0; x < full; x++) { 8674261fa58Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); 8684261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); 8694261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8704261fa58Smacallan SX_ADDV(8, 40, 72, 15)); 8714261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8724261fa58Smacallan SX_ADDV(24, 56, 88, 15)); 8734261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); 8744261fa58Smacallan srcx += 128; 8754261fa58Smacallan dstx += 128; 8764261fa58Smacallan } 8774261fa58Smacallan 8784261fa58Smacallan /* do leftovers */ 8794261fa58Smacallan write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); 8804261fa58Smacallan write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); 8814261fa58Smacallan if (part & 16) { 8824261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8834261fa58Smacallan SX_ADDV(8, 40, 72, 15)); 8844261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8854261fa58Smacallan SX_ADDV(24, 56, 88, part - 17)); 8864261fa58Smacallan } else { 8874261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 8884261fa58Smacallan SX_ADDV(8, 40, 72, part - 1)); 8894261fa58Smacallan } 8904261fa58Smacallan write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); 8914261fa58Smacallan 8924261fa58Smacallan /* next line */ 8934261fa58Smacallan src += srcpitch; 8944261fa58Smacallan dst += dstpitch; 8954261fa58Smacallan } 8964261fa58Smacallan} 8974261fa58Smacallan 8984261fa58Smacallanvoid CG14Comp_Add8(Cg14Ptr p, 8994261fa58Smacallan uint32_t src, uint32_t srcpitch, 9004261fa58Smacallan uint32_t dst, uint32_t dstpitch, 9014261fa58Smacallan int width, int height) 9024261fa58Smacallan{ 9034261fa58Smacallan int line; 9044261fa58Smacallan uint32_t srcx, dstx, srcoff, dstoff; 9054261fa58Smacallan int pre, full, part, x; 9064261fa58Smacallan uint8_t *d; 9074261fa58Smacallan char buffer[256]; 9084261fa58Smacallan ENTER; 9094261fa58Smacallan 9104261fa58Smacallan srcoff = src & 7; 9114261fa58Smacallan src &= ~7; 9124261fa58Smacallan dstoff = dst & 7; 9134261fa58Smacallan dst &= ~7; 9144261fa58Smacallan full = width >> 5; /* chunks of 32 */ 9154261fa58Smacallan part = width & 31; /* leftovers */ 9164261fa58Smacallan 9174261fa58Smacallan#ifdef SX_DEBUG 9184261fa58Smacallan xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch, 9194261fa58Smacallan width, height, full, part); 9204261fa58Smacallan#endif 9214261fa58Smacallan /* we do this up to 32 pixels at a time */ 9224261fa58Smacallan for (line = 0; line < height; line++) { 9234261fa58Smacallan srcx = src; 9244261fa58Smacallan dstx = dst; 9254261fa58Smacallan#ifdef SX_ADD_SOFTWARE 9264261fa58Smacallan uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 9274261fa58Smacallan d = (uint8_t *)(p->fb + dstx + dstoff); 9284261fa58Smacallan for (x = 0; x < width; x++) { 9294261fa58Smacallan d[x] = min(255, s[x] + d[x]); 9304261fa58Smacallan } 9314261fa58Smacallan#else 9324261fa58Smacallan for (x = 0; x < full; x++) { 9334261fa58Smacallan write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); 9344261fa58Smacallan write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); 9354261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9364261fa58Smacallan SX_ADDV(8, 40, 72, 15)); 9374261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9384261fa58Smacallan SX_ADDV(24, 56, 88, 15)); 9394261fa58Smacallan write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); 9404261fa58Smacallan srcx += 32; 9414261fa58Smacallan dstx += 32; 9424261fa58Smacallan } 9434261fa58Smacallan 9444261fa58Smacallan if (part > 0) { 9454261fa58Smacallan /* do leftovers */ 9464261fa58Smacallan write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); 9474261fa58Smacallan write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); 9484261fa58Smacallan if (part > 16) { 9494261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9504261fa58Smacallan SX_ADDV(8, 40, 72, 15)); 9514261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9524261fa58Smacallan SX_ADDV(24, 56, 88, part - 17)); 9534261fa58Smacallan } else { 9544261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 9554261fa58Smacallan SX_ADDV(8, 40, 72, part - 1)); 9564261fa58Smacallan } 9574261fa58Smacallan write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); 9584261fa58Smacallan } 9594261fa58Smacallan#endif 9604261fa58Smacallan#ifdef SX_DEBUG 9614261fa58Smacallan d = (uint8_t *)(p->fb + src + srcoff); 9624261fa58Smacallan for (x = 0; x < width; x++) { 9634261fa58Smacallan buffer[x] = c[d[x]>>5]; 9644261fa58Smacallan } 9654261fa58Smacallan buffer[x] = 0; 9664261fa58Smacallan xf86Msg(X_ERROR, "%s\n", buffer); 9674261fa58Smacallan#endif 9684261fa58Smacallan /* next line */ 9694261fa58Smacallan src += srcpitch; 9704261fa58Smacallan dst += dstpitch; 9714261fa58Smacallan } 9724261fa58Smacallan} 9734261fa58Smacallan 9744261fa58Smacallanvoid 9754261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY, 9764261fa58Smacallan int maskX, int maskY, 9774261fa58Smacallan int dstX, int dstY, 9784261fa58Smacallan int width, int height) 9794261fa58Smacallan{ 9804261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 9814261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 9824261fa58Smacallan uint32_t dstoff, dstpitch; 9834261fa58Smacallan uint32_t dst, msk, src; 9844261fa58Smacallan 9854261fa58Smacallan ENTER; 9864261fa58Smacallan dstoff = exaGetPixmapOffset(pDst); 9874261fa58Smacallan dstpitch = exaGetPixmapPitch(pDst); 9884261fa58Smacallan 9894261fa58Smacallan switch (p->op) { 9904261fa58Smacallan case PictOpOver: 9914261fa58Smacallan dst = dstoff + (dstY * dstpitch) + (dstX << 2); 9924261fa58Smacallan DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 9934261fa58Smacallan p->mskformat, p->dstformat, srcX, srcY); 9944261fa58Smacallan switch (p->mskformat) { 9954261fa58Smacallan case PICT_a8: 9964261fa58Smacallan msk = p->mskoff + 9974261fa58Smacallan (maskY * p->mskpitch) + maskX; 9984261fa58Smacallan CG14Comp_Over8(p, msk, p->mskpitch, 9994261fa58Smacallan dst, dstpitch, width, height); 10004261fa58Smacallan break; 10014261fa58Smacallan case PICT_a8r8g8b8: 10024261fa58Smacallan case PICT_a8b8g8r8: 10034261fa58Smacallan msk = p->mskoff + 10044261fa58Smacallan (maskY * p->mskpitch) + 10054261fa58Smacallan (maskX << 2); 10064261fa58Smacallan CG14Comp_Over32(p, msk, p->mskpitch, 10074261fa58Smacallan dst, dstpitch, width, height); 10084261fa58Smacallan break; 10094261fa58Smacallan default: 10104261fa58Smacallan xf86Msg(X_ERROR, 10114261fa58Smacallan "unsupported mask format\n"); 10124261fa58Smacallan } 10134261fa58Smacallan break; 10144261fa58Smacallan case PictOpAdd: 10154261fa58Smacallan DPRINTF(X_ERROR, "Add %08x %08x\n", 10164261fa58Smacallan p->srcformat, p->dstformat); 10174261fa58Smacallan switch (p->srcformat) { 10184261fa58Smacallan case PICT_a8: 10194261fa58Smacallan src = p->srcoff + 10204261fa58Smacallan (srcY * p->srcpitch) + srcX; 10214261fa58Smacallan dst = dstoff + (dstY * dstpitch) + dstX; 10224261fa58Smacallan CG14Comp_Add8(p, src, p->srcpitch, 10234261fa58Smacallan dst, dstpitch, width, height); 10244261fa58Smacallan break; 10254261fa58Smacallan case PICT_a8r8g8b8: 10264261fa58Smacallan case PICT_x8r8g8b8: 10274261fa58Smacallan src = p->srcoff + 10284261fa58Smacallan (srcY * p->srcpitch) + (srcX << 2); 10294261fa58Smacallan dst = dstoff + (dstY * dstpitch) + 10304261fa58Smacallan (dstX << 2); 10314261fa58Smacallan CG14Comp_Add32(p, src, p->srcpitch, 10324261fa58Smacallan dst, dstpitch, width, height); 10334261fa58Smacallan break; 10344261fa58Smacallan default: 10354261fa58Smacallan xf86Msg(X_ERROR, 10364261fa58Smacallan "unsupported src format\n"); 10374261fa58Smacallan } 10384261fa58Smacallan break; 10394261fa58Smacallan default: 10404261fa58Smacallan xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 10414261fa58Smacallan } 10424261fa58Smacallan exaMarkSync(pDst->drawable.pScreen); 10434261fa58Smacallan} 10444261fa58Smacallan 10454261fa58Smacallan 10464261fa58Smacallan 10474261fa58SmacallanBool 10484261fa58SmacallanCG14InitAccel(ScreenPtr pScreen) 10494261fa58Smacallan{ 10504261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 10514261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 10524261fa58Smacallan ExaDriverPtr pExa; 10534261fa58Smacallan 10544261fa58Smacallan pExa = exaDriverAlloc(); 10554261fa58Smacallan if (!pExa) 10564261fa58Smacallan return FALSE; 10574261fa58Smacallan 10584261fa58Smacallan p->pExa = pExa; 10594261fa58Smacallan 10604261fa58Smacallan pExa->exa_major = EXA_VERSION_MAJOR; 10614261fa58Smacallan pExa->exa_minor = EXA_VERSION_MINOR; 10624261fa58Smacallan 10634261fa58Smacallan pExa->memoryBase = p->fb; 10644261fa58Smacallan pExa->memorySize = p->memsize; 10654261fa58Smacallan pExa->offScreenBase = p->width * p->height * 4; 10664261fa58Smacallan 10674261fa58Smacallan /* 10684261fa58Smacallan * SX memory instructions are written to 64bit aligned addresses with 10694261fa58Smacallan * a 3 bit displacement. Make sure the displacement remains constant 10704261fa58Smacallan * within one column 10714261fa58Smacallan */ 10724261fa58Smacallan 10734261fa58Smacallan pExa->pixmapOffsetAlign = 8; 10744261fa58Smacallan pExa->pixmapPitchAlign = 8; 10754261fa58Smacallan 10764261fa58Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS | 10774261fa58Smacallan /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/ 10784261fa58Smacallan EXA_MIXED_PIXMAPS; 10794261fa58Smacallan 10804261fa58Smacallan /* 10814261fa58Smacallan * these limits are bogus 10824261fa58Smacallan * SX doesn't deal with coordinates at all, so there is no limit but 10834261fa58Smacallan * we have to put something here 10844261fa58Smacallan */ 10854261fa58Smacallan pExa->maxX = 4096; 10864261fa58Smacallan pExa->maxY = 4096; 10874261fa58Smacallan 10884261fa58Smacallan pExa->WaitMarker = CG14WaitMarker; 10894261fa58Smacallan 10904261fa58Smacallan pExa->PrepareSolid = CG14PrepareSolid; 10914261fa58Smacallan pExa->Solid = CG14Solid; 10924261fa58Smacallan pExa->DoneSolid = CG14DoneCopy; 10934261fa58Smacallan pExa->PrepareCopy = CG14PrepareCopy; 10944261fa58Smacallan pExa->Copy = CG14Copy; 10954261fa58Smacallan pExa->DoneCopy = CG14DoneCopy; 10964261fa58Smacallan if (p->use_xrender) { 10974261fa58Smacallan pExa->CheckComposite = CG14CheckComposite; 10984261fa58Smacallan pExa->PrepareComposite = CG14PrepareComposite; 10994261fa58Smacallan pExa->Composite = CG14Composite; 11004261fa58Smacallan pExa->DoneComposite = CG14DoneCopy; 11014261fa58Smacallan } 11024261fa58Smacallan 11034261fa58Smacallan /* EXA hits more optimized paths when it does not have to fallback 11044261fa58Smacallan * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 11054261fa58Smacallan */ 11064261fa58Smacallan pExa->UploadToScreen = CG14UploadToScreen; 11074261fa58Smacallan pExa->DownloadFromScreen = CG14DownloadFromScreen; 11084261fa58Smacallan 11094261fa58Smacallan /* do some hardware init */ 11104261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 11114261fa58Smacallan p->last_mask = 0xffffffff; 11124261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 11134261fa58Smacallan p->last_rop = 0xcc; 11144261fa58Smacallan return exaDriverInit(pScreen, pExa); 11154261fa58Smacallan} 1116