cg14_accel.c revision fc473876
1fc473876Smacallan/* $NetBSD: cg14_accel.c,v 1.14 2019/03/01 02:22:27 macallan Exp $ */ 24261fa58Smacallan/* 34261fa58Smacallan * Copyright (c) 2013 Michael Lorenz 44261fa58Smacallan * All rights reserved. 54261fa58Smacallan * 64261fa58Smacallan * Redistribution and use in source and binary forms, with or without 74261fa58Smacallan * modification, are permitted provided that the following conditions 84261fa58Smacallan * are met: 94261fa58Smacallan * 104261fa58Smacallan * - Redistributions of source code must retain the above copyright 114261fa58Smacallan * notice, this list of conditions and the following disclaimer. 124261fa58Smacallan * - Redistributions in binary form must reproduce the above 134261fa58Smacallan * copyright notice, this list of conditions and the following 144261fa58Smacallan * disclaimer in the documentation and/or other materials provided 154261fa58Smacallan * with the distribution. 164261fa58Smacallan * 174261fa58Smacallan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 184261fa58Smacallan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 194261fa58Smacallan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 204261fa58Smacallan * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 214261fa58Smacallan * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 224261fa58Smacallan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 234261fa58Smacallan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 244261fa58Smacallan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 254261fa58Smacallan * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 264261fa58Smacallan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 274261fa58Smacallan * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 284261fa58Smacallan * POSSIBILITY OF SUCH DAMAGE. 294261fa58Smacallan * 304261fa58Smacallan */ 31c88c16f8Smacallan 32c88c16f8Smacallan#ifdef HAVE_CONFIG_H 33c88c16f8Smacallan#include "config.h" 34c88c16f8Smacallan#endif 35c88c16f8Smacallan 364261fa58Smacallan#include <sys/types.h> 374261fa58Smacallan 384261fa58Smacallan/* all driver need this */ 394261fa58Smacallan#include "xf86.h" 404261fa58Smacallan#include "xf86_OSproc.h" 414261fa58Smacallan#include "compiler.h" 424261fa58Smacallan 434261fa58Smacallan#include "cg14.h" 444261fa58Smacallan#include <sparc/sxreg.h> 454261fa58Smacallan 464261fa58Smacallan/*#define SX_DEBUG*/ 474261fa58Smacallan 484261fa58Smacallan#ifdef SX_DEBUG 494261fa58Smacallan#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__); 504261fa58Smacallan#define DPRINTF xf86Msg 514261fa58Smacallan#else 524261fa58Smacallan#define ENTER 534261fa58Smacallan#define DPRINTF while (0) xf86Msg 544261fa58Smacallan#endif 554261fa58Smacallan 564261fa58Smacallan#define arraysize(ary) (sizeof(ary) / sizeof(ary[0])) 574261fa58Smacallan 584261fa58Smacallan/* 0xcc is SX's GXcopy equivalent */ 594261fa58Smacallanuint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee, 604261fa58Smacallan 0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff}; 614261fa58Smacallan 624261fa58Smacallanint src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8, 634261fa58Smacallan PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8}; 644261fa58Smacallanint tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8}; 654261fa58Smacallan 66f71acd79Smacallanstatic void CG14Copy32(PixmapPtr, int, int, int, int, int, int); 67f71acd79Smacallanstatic void CG14Copy8(PixmapPtr, int, int, int, int, int, int); 68f71acd79Smacallan 694261fa58Smacallanstatic inline void 704261fa58SmacallanCG14Wait(Cg14Ptr p) 714261fa58Smacallan{ 72fc473876Smacallan int bail = 10000000; 73fc473876Smacallan /* we wait for the busy bit to clear */ 74fc473876Smacallan while (((read_sx_reg(p, SX_CONTROL_STATUS) & SX_BZ) != 0) && 75fc473876Smacallan (bail > 0)) { 76fc473876Smacallan bail--; 77fc473876Smacallan }; 78fc473876Smacallan if (bail == 0) { 79fc473876Smacallan xf86Msg(X_ERROR, "SX wait for idle timed out %08x %08x\n", 80fc473876Smacallan read_sx_reg(p, SX_CONTROL_STATUS), 81fc473876Smacallan read_sx_reg(p, SX_ERROR)); 82fc473876Smacallan } 834261fa58Smacallan} 844261fa58Smacallan 854261fa58Smacallanstatic void 864261fa58SmacallanCG14WaitMarker(ScreenPtr pScreen, int Marker) 874261fa58Smacallan{ 884261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 894261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 904261fa58Smacallan 914261fa58Smacallan CG14Wait(p); 924261fa58Smacallan} 934261fa58Smacallan 944261fa58Smacallanstatic Bool 954261fa58SmacallanCG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, 964261fa58Smacallan int xdir, int ydir, int alu, Pixel planemask) 974261fa58Smacallan{ 984261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 994261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1004261fa58Smacallan 1014261fa58Smacallan ENTER; 1024261fa58Smacallan DPRINTF(X_ERROR, "bits per pixel: %d\n", 1034261fa58Smacallan pSrcPixmap->drawable.bitsPerPixel); 1044261fa58Smacallan 1054261fa58Smacallan if (planemask != p->last_mask) { 1064261fa58Smacallan CG14Wait(p); 1074261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 1084261fa58Smacallan p->last_mask = planemask; 1094261fa58Smacallan } 1104261fa58Smacallan alu = sx_rop[alu]; 1114261fa58Smacallan if (alu != p->last_rop) { 1124261fa58Smacallan CG14Wait(p); 1134261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 1144261fa58Smacallan p->last_rop = alu; 1154261fa58Smacallan } 116f71acd79Smacallan switch (pSrcPixmap->drawable.bitsPerPixel) { 117f71acd79Smacallan case 8: 118f71acd79Smacallan p->pExa->Copy = CG14Copy8; 119f71acd79Smacallan break; 120f71acd79Smacallan case 32: 121f71acd79Smacallan p->pExa->Copy = CG14Copy32; 122f71acd79Smacallan break; 123f71acd79Smacallan default: 124f71acd79Smacallan xf86Msg(X_ERROR, "%s depth %d\n", __func__, 125f71acd79Smacallan pSrcPixmap->drawable.bitsPerPixel); 126f71acd79Smacallan } 1274261fa58Smacallan p->srcpitch = exaGetPixmapPitch(pSrcPixmap); 1284261fa58Smacallan p->srcoff = exaGetPixmapOffset(pSrcPixmap); 1294261fa58Smacallan p->xdir = xdir; 1304261fa58Smacallan p->ydir = ydir; 1314261fa58Smacallan return TRUE; 1324261fa58Smacallan} 1334261fa58Smacallan 1344261fa58Smacallanstatic void 135f71acd79SmacallanCG14Copy32(PixmapPtr pDstPixmap, 1364261fa58Smacallan int srcX, int srcY, int dstX, int dstY, int w, int h) 1374261fa58Smacallan{ 1384261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 1394261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 1404261fa58Smacallan int dstpitch, dstoff, srcpitch, srcoff; 1414261fa58Smacallan int srcstart, dststart, xinc, srcinc, dstinc; 1424261fa58Smacallan int line, count, s, d, num; 1434261fa58Smacallan 1444261fa58Smacallan ENTER; 1454261fa58Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 1464261fa58Smacallan dstoff = exaGetPixmapOffset(pDstPixmap); 1474261fa58Smacallan srcpitch = p->srcpitch; 1484261fa58Smacallan srcoff = p->srcoff; 1494261fa58Smacallan /* 1504261fa58Smacallan * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 1514261fa58Smacallan * actually wrote anything and only sync if it did 1524261fa58Smacallan */ 1534261fa58Smacallan srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff; 1544261fa58Smacallan dststart = (dstX << 2) + (dstpitch * dstY) + dstoff; 1554261fa58Smacallan 1564261fa58Smacallan /* 1574261fa58Smacallan * we always copy up to 32 pixels at a time so direction doesn't 1584261fa58Smacallan * matter if w<=32 1594261fa58Smacallan */ 1604261fa58Smacallan if (w > 32) { 1614261fa58Smacallan if (p->xdir < 0) { 1624261fa58Smacallan srcstart += (w - 32) << 2; 1634261fa58Smacallan dststart += (w - 32) << 2; 1644261fa58Smacallan xinc = -128; 1654261fa58Smacallan } else 1664261fa58Smacallan xinc = 128; 1674261fa58Smacallan } else 1684261fa58Smacallan xinc = 128; 1694261fa58Smacallan if (p->ydir < 0) { 1704261fa58Smacallan srcstart += (h - 1) * srcpitch; 1714261fa58Smacallan dststart += (h - 1) * dstpitch; 1724261fa58Smacallan srcinc = -srcpitch; 1734261fa58Smacallan dstinc = -dstpitch; 1744261fa58Smacallan } else { 1754261fa58Smacallan srcinc = srcpitch; 1764261fa58Smacallan dstinc = dstpitch; 1774261fa58Smacallan } 1784261fa58Smacallan if (p->last_rop == 0xcc) { 1794261fa58Smacallan /* plain old copy */ 1804261fa58Smacallan if ( xinc > 0) { 1814261fa58Smacallan /* going left to right */ 1824261fa58Smacallan for (line = 0; line < h; line++) { 1834261fa58Smacallan count = 0; 1844261fa58Smacallan s = srcstart; 1854261fa58Smacallan d = dststart; 1864261fa58Smacallan while ( count < w) { 1874261fa58Smacallan num = min(32, w - count); 1884261fa58Smacallan write_sx_io(p, s, 1894261fa58Smacallan SX_LD(10, num - 1, s & 7)); 1904261fa58Smacallan write_sx_io(p, d, 1914261fa58Smacallan SX_STM(10, num - 1, d & 7)); 1924261fa58Smacallan s += xinc; 1934261fa58Smacallan d += xinc; 1944261fa58Smacallan count += 32; 1954261fa58Smacallan } 1964261fa58Smacallan srcstart += srcinc; 1974261fa58Smacallan dststart += dstinc; 1984261fa58Smacallan } 1994261fa58Smacallan } else { 2004261fa58Smacallan /* going right to left */ 2014261fa58Smacallan int i, chunks = (w >> 5); 2024261fa58Smacallan for (line = 0; line < h; line++) { 2034261fa58Smacallan s = srcstart; 2044261fa58Smacallan d = dststart; 2054261fa58Smacallan count = w; 2064261fa58Smacallan for (i = 0; i < chunks; i++) { 2074261fa58Smacallan write_sx_io(p, s, 2084261fa58Smacallan SX_LD(10, 31, s & 7)); 2094261fa58Smacallan write_sx_io(p, d, 2104261fa58Smacallan SX_STM(10, 31, d & 7)); 2114261fa58Smacallan s -= 128; 2124261fa58Smacallan d -= 128; 2134261fa58Smacallan count -= 32; 2144261fa58Smacallan } 2154261fa58Smacallan /* leftovers, if any */ 2164261fa58Smacallan if (count > 0) { 2174261fa58Smacallan s += (32 - count) << 2; 2184261fa58Smacallan d += (32 - count) << 2; 2194261fa58Smacallan write_sx_io(p, s, 2204261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2214261fa58Smacallan write_sx_io(p, d, 2224261fa58Smacallan SX_STM(10, count - 1, d & 7)); 2234261fa58Smacallan } 2244261fa58Smacallan srcstart += srcinc; 2254261fa58Smacallan dststart += dstinc; 2264261fa58Smacallan } 2274261fa58Smacallan } 2284261fa58Smacallan } else { 2294261fa58Smacallan /* ROPs needed */ 2304261fa58Smacallan if ( xinc > 0) { 2314261fa58Smacallan /* going left to right */ 2324261fa58Smacallan for (line = 0; line < h; line++) { 2334261fa58Smacallan count = 0; 2344261fa58Smacallan s = srcstart; 2354261fa58Smacallan d = dststart; 2364261fa58Smacallan while ( count < w) { 2374261fa58Smacallan num = min(32, w - count); 2384261fa58Smacallan write_sx_io(p, s, 2394261fa58Smacallan SX_LD(10, num - 1, s & 7)); 2404261fa58Smacallan write_sx_io(p, d, 2414261fa58Smacallan SX_LD(42, num - 1, d & 7)); 2424261fa58Smacallan if (num > 16) { 2434261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2444261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2454261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2464261fa58Smacallan SX_ROP(26, 58, 90, num - 17)); 2474261fa58Smacallan } else { 2484261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2494261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 2504261fa58Smacallan } 2514261fa58Smacallan write_sx_io(p, d, 2524261fa58Smacallan SX_STM(74, num - 1, d & 7)); 2534261fa58Smacallan s += xinc; 2544261fa58Smacallan d += xinc; 2554261fa58Smacallan count += 32; 2564261fa58Smacallan } 2574261fa58Smacallan srcstart += srcinc; 2584261fa58Smacallan dststart += dstinc; 2594261fa58Smacallan } 2604261fa58Smacallan } else { 2614261fa58Smacallan /* going right to left */ 2624261fa58Smacallan int i, chunks = (w >> 5); 2634261fa58Smacallan for (line = 0; line < h; line++) { 2644261fa58Smacallan s = srcstart; 2654261fa58Smacallan d = dststart; 2664261fa58Smacallan count = w; 2674261fa58Smacallan for (i = 0; i < chunks; i++) { 2684261fa58Smacallan write_sx_io(p, s, SX_LD(10, 31, s & 7)); 2694261fa58Smacallan write_sx_io(p, d, SX_LD(42, 31, d & 7)); 2704261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2714261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2724261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2734261fa58Smacallan SX_ROP(26, 58, 90, 15)); 2744261fa58Smacallan write_sx_io(p, d, 2754261fa58Smacallan SX_STM(74, 31, d & 7)); 2764261fa58Smacallan s -= 128; 2774261fa58Smacallan d -= 128; 2784261fa58Smacallan count -= 32; 2794261fa58Smacallan } 2804261fa58Smacallan /* leftovers, if any */ 2814261fa58Smacallan if (count > 0) { 2824261fa58Smacallan s += (32 - count) << 2; 2834261fa58Smacallan d += (32 - count) << 2; 2844261fa58Smacallan write_sx_io(p, s, 2854261fa58Smacallan SX_LD(10, count - 1, s & 7)); 2864261fa58Smacallan write_sx_io(p, d, 2874261fa58Smacallan SX_LD(42, count - 1, d & 7)); 2884261fa58Smacallan if (count > 16) { 2894261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2904261fa58Smacallan SX_ROP(10, 42, 74, 15)); 2914261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2924261fa58Smacallan SX_ROP(26, 58, 90, count - 17)); 2934261fa58Smacallan } else { 2944261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 2954261fa58Smacallan SX_ROP(10, 42, 74, count - 1)); 2964261fa58Smacallan } 2974261fa58Smacallan 2984261fa58Smacallan write_sx_io(p, d, 2994261fa58Smacallan SX_STM(74, count - 1, d & 7)); 3004261fa58Smacallan } 3014261fa58Smacallan srcstart += srcinc; 3024261fa58Smacallan dststart += dstinc; 3034261fa58Smacallan } 3044261fa58Smacallan } 3054261fa58Smacallan } 3064261fa58Smacallan exaMarkSync(pDstPixmap->drawable.pScreen); 3074261fa58Smacallan} 3084261fa58Smacallan 309f71acd79Smacallanstatic void 310f71acd79SmacallanCG14Copy8(PixmapPtr pDstPixmap, 311f71acd79Smacallan int srcX, int srcY, int dstX, int dstY, int w, int h) 312f71acd79Smacallan{ 313f71acd79Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 314f71acd79Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 315f71acd79Smacallan int dstpitch, dstoff, srcpitch, srcoff; 316f71acd79Smacallan int srcstart, dststart, xinc, srcinc, dstinc; 317f71acd79Smacallan int line, count, s, d, num; 318f71acd79Smacallan 319f71acd79Smacallan ENTER; 320f71acd79Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 321f71acd79Smacallan dstoff = exaGetPixmapOffset(pDstPixmap); 322f71acd79Smacallan srcpitch = p->srcpitch; 323f71acd79Smacallan srcoff = p->srcoff; 324f71acd79Smacallan /* 325f71acd79Smacallan * should clear the WO bit in SX_CONTROL_STATUS, then check if SX 326f71acd79Smacallan * actually wrote anything and only sync if it did 327f71acd79Smacallan */ 328f71acd79Smacallan srcstart = srcX + (srcpitch * srcY) + srcoff; 329f71acd79Smacallan dststart = dstX + (dstpitch * dstY) + dstoff; 330f71acd79Smacallan 331f71acd79Smacallan /* 332f71acd79Smacallan * we always copy up to 32 pixels at a time so direction doesn't 333f71acd79Smacallan * matter if w<=32 334f71acd79Smacallan */ 335f71acd79Smacallan if (w > 32) { 336f71acd79Smacallan if (p->xdir < 0) { 337f71acd79Smacallan srcstart += (w - 32); 338f71acd79Smacallan dststart += (w - 32); 339f71acd79Smacallan xinc = -32; 340f71acd79Smacallan } else 341f71acd79Smacallan xinc = 32; 342f71acd79Smacallan } else 343f71acd79Smacallan xinc = 32; 344f71acd79Smacallan if (p->ydir < 0) { 345f71acd79Smacallan srcstart += (h - 1) * srcpitch; 346f71acd79Smacallan dststart += (h - 1) * dstpitch; 347f71acd79Smacallan srcinc = -srcpitch; 348f71acd79Smacallan dstinc = -dstpitch; 349f71acd79Smacallan } else { 350f71acd79Smacallan srcinc = srcpitch; 351f71acd79Smacallan dstinc = dstpitch; 352f71acd79Smacallan } 353f71acd79Smacallan if (p->last_rop == 0xcc) { 354f71acd79Smacallan /* plain old copy */ 355f71acd79Smacallan if ( xinc > 0) { 356f71acd79Smacallan /* going left to right */ 357f71acd79Smacallan for (line = 0; line < h; line++) { 358f71acd79Smacallan count = 0; 359f71acd79Smacallan s = srcstart; 360f71acd79Smacallan d = dststart; 361f71acd79Smacallan while ( count < w) { 362f71acd79Smacallan num = min(32, w - count); 363f71acd79Smacallan write_sx_io(p, s, 364f71acd79Smacallan SX_LDB(10, num - 1, s & 7)); 365f71acd79Smacallan write_sx_io(p, d, 366f71acd79Smacallan SX_STBM(10, num - 1, d & 7)); 367f71acd79Smacallan s += xinc; 368f71acd79Smacallan d += xinc; 369f71acd79Smacallan count += 32; 370f71acd79Smacallan } 371f71acd79Smacallan srcstart += srcinc; 372f71acd79Smacallan dststart += dstinc; 373f71acd79Smacallan } 374f71acd79Smacallan } else { 375f71acd79Smacallan /* going right to left */ 376f71acd79Smacallan int i, chunks = (w >> 5); 377f71acd79Smacallan for (line = 0; line < h; line++) { 378f71acd79Smacallan s = srcstart; 379f71acd79Smacallan d = dststart; 380f71acd79Smacallan count = w; 381f71acd79Smacallan for (i = 0; i < chunks; i++) { 382f71acd79Smacallan write_sx_io(p, s, 383f71acd79Smacallan SX_LDB(10, 31, s & 7)); 384f71acd79Smacallan write_sx_io(p, d, 385f71acd79Smacallan SX_STBM(10, 31, d & 7)); 386f71acd79Smacallan s -= 32; 387f71acd79Smacallan d -= 32; 388f71acd79Smacallan count -= 32; 389f71acd79Smacallan } 390f71acd79Smacallan /* leftovers, if any */ 391f71acd79Smacallan if (count > 0) { 392f71acd79Smacallan s += (32 - count); 393f71acd79Smacallan d += (32 - count); 394f71acd79Smacallan write_sx_io(p, s, 395f71acd79Smacallan SX_LDB(10, count - 1, s & 7)); 396f71acd79Smacallan write_sx_io(p, d, 397f71acd79Smacallan SX_STBM(10, count - 1, d & 7)); 398f71acd79Smacallan } 399f71acd79Smacallan srcstart += srcinc; 400f71acd79Smacallan dststart += dstinc; 401f71acd79Smacallan } 402f71acd79Smacallan } 403f71acd79Smacallan } else { 404f71acd79Smacallan /* ROPs needed */ 405f71acd79Smacallan if ( xinc > 0) { 406f71acd79Smacallan /* going left to right */ 407f71acd79Smacallan for (line = 0; line < h; line++) { 408f71acd79Smacallan count = 0; 409f71acd79Smacallan s = srcstart; 410f71acd79Smacallan d = dststart; 411f71acd79Smacallan while ( count < w) { 412f71acd79Smacallan num = min(32, w - count); 413f71acd79Smacallan write_sx_io(p, s, 414f71acd79Smacallan SX_LDB(10, num - 1, s & 7)); 415f71acd79Smacallan write_sx_io(p, d, 416f71acd79Smacallan SX_LDB(42, num - 1, d & 7)); 417f71acd79Smacallan if (num > 16) { 418f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 419f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 420f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 421f71acd79Smacallan SX_ROP(26, 58, 90, num - 17)); 422f71acd79Smacallan } else { 423f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 424f71acd79Smacallan SX_ROP(10, 42, 74, num - 1)); 425f71acd79Smacallan } 426f71acd79Smacallan write_sx_io(p, d, 427f71acd79Smacallan SX_STBM(74, num - 1, d & 7)); 428f71acd79Smacallan s += xinc; 429f71acd79Smacallan d += xinc; 430f71acd79Smacallan count += 32; 431f71acd79Smacallan } 432f71acd79Smacallan srcstart += srcinc; 433f71acd79Smacallan dststart += dstinc; 434f71acd79Smacallan } 435f71acd79Smacallan } else { 436f71acd79Smacallan /* going right to left */ 437f71acd79Smacallan int i, chunks = (w >> 5); 438f71acd79Smacallan for (line = 0; line < h; line++) { 439f71acd79Smacallan s = srcstart; 440f71acd79Smacallan d = dststart; 441f71acd79Smacallan count = w; 442f71acd79Smacallan for (i = 0; i < chunks; i++) { 443f71acd79Smacallan write_sx_io(p, s, SX_LDB(10, 31, s & 7)); 444f71acd79Smacallan write_sx_io(p, d, SX_LDB(42, 31, d & 7)); 445f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 446f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 447f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 448f71acd79Smacallan SX_ROP(26, 58, 90, 15)); 449f71acd79Smacallan write_sx_io(p, d, 450f71acd79Smacallan SX_STBM(74, 31, d & 7)); 451f71acd79Smacallan s -= 128; 452f71acd79Smacallan d -= 128; 453f71acd79Smacallan count -= 32; 454f71acd79Smacallan } 455f71acd79Smacallan /* leftovers, if any */ 456f71acd79Smacallan if (count > 0) { 457f71acd79Smacallan s += (32 - count); 458f71acd79Smacallan d += (32 - count); 459f71acd79Smacallan write_sx_io(p, s, 460f71acd79Smacallan SX_LDB(10, count - 1, s & 7)); 461f71acd79Smacallan write_sx_io(p, d, 462f71acd79Smacallan SX_LDB(42, count - 1, d & 7)); 463f71acd79Smacallan if (count > 16) { 464f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 465f71acd79Smacallan SX_ROP(10, 42, 74, 15)); 466f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 467f71acd79Smacallan SX_ROP(26, 58, 90, count - 17)); 468f71acd79Smacallan } else { 469f71acd79Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 470f71acd79Smacallan SX_ROP(10, 42, 74, count - 1)); 471f71acd79Smacallan } 472f71acd79Smacallan 473f71acd79Smacallan write_sx_io(p, d, 474f71acd79Smacallan SX_STBM(74, count - 1, d & 7)); 475f71acd79Smacallan } 476f71acd79Smacallan srcstart += srcinc; 477f71acd79Smacallan dststart += dstinc; 478f71acd79Smacallan } 479f71acd79Smacallan } 480f71acd79Smacallan } 481f71acd79Smacallan exaMarkSync(pDstPixmap->drawable.pScreen); 482f71acd79Smacallan} 483f71acd79Smacallan 4844261fa58Smacallanstatic void 4854261fa58SmacallanCG14DoneCopy(PixmapPtr pDstPixmap) 4864261fa58Smacallan{ 4874261fa58Smacallan} 4884261fa58Smacallan 4894261fa58Smacallanstatic Bool 4904261fa58SmacallanCG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) 4914261fa58Smacallan{ 4924261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 4934261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 4944261fa58Smacallan 4954261fa58Smacallan ENTER; 4966bdc2ffdSmacallan DPRINTF(X_ERROR, "bits per pixel: %d\n", 4976bdc2ffdSmacallan pPixmap->drawable.bitsPerPixel); 4984261fa58Smacallan write_sx_reg(p, SX_QUEUED(8), fg); 4994261fa58Smacallan write_sx_reg(p, SX_QUEUED(9), fg); 5004261fa58Smacallan if (planemask != p->last_mask) { 5014261fa58Smacallan CG14Wait(p); 5024261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, planemask); 5034261fa58Smacallan p->last_mask = planemask; 5044261fa58Smacallan } 5054261fa58Smacallan alu = sx_rop[alu]; 5064261fa58Smacallan if (alu != p->last_rop) { 5074261fa58Smacallan CG14Wait(p); 5084261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, alu); 5094261fa58Smacallan p->last_rop = alu; 5104261fa58Smacallan } 5114261fa58Smacallan DPRINTF(X_ERROR, "%s: %x\n", __func__, alu); 5124261fa58Smacallan return TRUE; 5134261fa58Smacallan} 5144261fa58Smacallan 5154261fa58Smacallanstatic void 5164261fa58SmacallanCG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 5174261fa58Smacallan{ 5184261fa58Smacallan int line, x, num; 5194261fa58Smacallan uint32_t ptr; 5204261fa58Smacallan 5214261fa58Smacallan ENTER; 5224261fa58Smacallan if (p->last_rop == 0xcc) { 5234261fa58Smacallan /* simple fill */ 5244261fa58Smacallan for (line = 0; line < h; line++) { 5254261fa58Smacallan x = 0; 5264261fa58Smacallan while (x < w) { 5274261fa58Smacallan ptr = start + (x << 2); 5284261fa58Smacallan num = min(32, w - x); 5294261fa58Smacallan write_sx_io(p, ptr, 5304261fa58Smacallan SX_STS(8, num - 1, ptr & 7)); 5314261fa58Smacallan x += 32; 5324261fa58Smacallan } 5334261fa58Smacallan start += pitch; 5344261fa58Smacallan } 5354261fa58Smacallan } else if (p->last_rop == 0xaa) { 5364261fa58Smacallan /* nothing to do here */ 5374261fa58Smacallan return; 5384261fa58Smacallan } else { 5394261fa58Smacallan /* alright, let's do actual ROP stuff */ 5404261fa58Smacallan 5414261fa58Smacallan /* first repeat the fill colour into 16 registers */ 5424261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 5434261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 5444261fa58Smacallan 5454261fa58Smacallan for (line = 0; line < h; line++) { 5464261fa58Smacallan x = 0; 5474261fa58Smacallan while (x < w) { 5484261fa58Smacallan ptr = start + (x << 2); 5494261fa58Smacallan num = min(32, w - x); 5504261fa58Smacallan /* now suck fb data into registers */ 5514261fa58Smacallan write_sx_io(p, ptr, 5524261fa58Smacallan SX_LD(42, num - 1, ptr & 7)); 5534261fa58Smacallan /* 5544261fa58Smacallan * ROP them with the fill data we left in 10 5554261fa58Smacallan * non-memory ops can only have counts up to 16 5564261fa58Smacallan */ 5574261fa58Smacallan if (num <= 16) { 5584261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 5594261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 5604261fa58Smacallan } else { 5614261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 5624261fa58Smacallan SX_ROP(10, 42, 74, 15)); 5634261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 5644261fa58Smacallan SX_ROP(10, 58, 90, num - 17)); 5654261fa58Smacallan } 5664261fa58Smacallan /* and write the result back into memory */ 5674261fa58Smacallan write_sx_io(p, ptr, 5684261fa58Smacallan SX_ST(74, num - 1, ptr & 7)); 5694261fa58Smacallan x += 32; 5704261fa58Smacallan } 5714261fa58Smacallan start += pitch; 5724261fa58Smacallan } 5734261fa58Smacallan } 5744261fa58Smacallan} 5754261fa58Smacallan 5764261fa58Smacallanstatic void 5774261fa58SmacallanCG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h) 5784261fa58Smacallan{ 5794261fa58Smacallan int line, x, num, off; 5804261fa58Smacallan uint32_t ptr; 5814261fa58Smacallan 5824261fa58Smacallan ENTER; 5834261fa58Smacallan off = start & 7; 5844261fa58Smacallan start &= ~7; 5854261fa58Smacallan 5864261fa58Smacallan if (p->last_rop == 0xcc) { 5874261fa58Smacallan /* simple fill */ 5884261fa58Smacallan for (line = 0; line < h; line++) { 5894261fa58Smacallan x = 0; 5904261fa58Smacallan while (x < w) { 5914261fa58Smacallan ptr = start + x; 5924261fa58Smacallan num = min(32, w - x); 5934261fa58Smacallan write_sx_io(p, ptr, 5944261fa58Smacallan SX_STBS(8, num - 1, off)); 5954261fa58Smacallan x += 32; 5964261fa58Smacallan } 5974261fa58Smacallan start += pitch; 5984261fa58Smacallan } 5994261fa58Smacallan } else if (p->last_rop == 0xaa) { 6004261fa58Smacallan /* nothing to do here */ 6014261fa58Smacallan return; 6024261fa58Smacallan } else { 6034261fa58Smacallan /* alright, let's do actual ROP stuff */ 6044261fa58Smacallan 6054261fa58Smacallan /* first repeat the fill colour into 16 registers */ 6064261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6074261fa58Smacallan SX_SELECT_S(8, 8, 10, 15)); 6084261fa58Smacallan 6094261fa58Smacallan for (line = 0; line < h; line++) { 6104261fa58Smacallan x = 0; 6114261fa58Smacallan while (x < w) { 6124261fa58Smacallan ptr = start + x; 6134261fa58Smacallan num = min(32, w - x); 6144261fa58Smacallan /* now suck fb data into registers */ 6154261fa58Smacallan write_sx_io(p, ptr, 6164261fa58Smacallan SX_LDB(42, num - 1, off)); 6174261fa58Smacallan /* 6184261fa58Smacallan * ROP them with the fill data we left in 10 6194261fa58Smacallan * non-memory ops can only have counts up to 16 6204261fa58Smacallan */ 6214261fa58Smacallan if (num <= 16) { 6224261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6234261fa58Smacallan SX_ROP(10, 42, 74, num - 1)); 6244261fa58Smacallan } else { 6254261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6264261fa58Smacallan SX_ROP(10, 42, 74, 15)); 6274261fa58Smacallan write_sx_reg(p, SX_INSTRUCTIONS, 6284261fa58Smacallan SX_ROP(10, 58, 90, num - 17)); 6294261fa58Smacallan } 6304261fa58Smacallan /* and write the result back into memory */ 6314261fa58Smacallan write_sx_io(p, ptr, 6324261fa58Smacallan SX_STB(74, num - 1, off)); 6334261fa58Smacallan x += 32; 6344261fa58Smacallan } 6354261fa58Smacallan start += pitch; 6364261fa58Smacallan } 6374261fa58Smacallan } 6384261fa58Smacallan} 6394261fa58Smacallan 6404261fa58Smacallanstatic void 6414261fa58SmacallanCG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) 6424261fa58Smacallan{ 6434261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 6444261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 6454261fa58Smacallan int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 6464261fa58Smacallan int start, depth; 6474261fa58Smacallan 6484261fa58Smacallan ENTER; 6494261fa58Smacallan dstpitch = exaGetPixmapPitch(pPixmap); 6504261fa58Smacallan dstoff = exaGetPixmapOffset(pPixmap); 6514261fa58Smacallan 6524261fa58Smacallan depth = pPixmap->drawable.bitsPerPixel; 6534261fa58Smacallan switch (depth) { 6544261fa58Smacallan case 32: 6554261fa58Smacallan start = dstoff + (y1 * dstpitch) + (x1 << 2); 6564261fa58Smacallan CG14Solid32(p, start, dstpitch, w, h); 6574261fa58Smacallan break; 6584261fa58Smacallan case 8: 6594261fa58Smacallan start = dstoff + (y1 * dstpitch) + x1; 6604261fa58Smacallan CG14Solid8(p, start, dstpitch, w, h); 6614261fa58Smacallan break; 6624261fa58Smacallan } 6634261fa58Smacallan 6644261fa58Smacallan DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2, 6654261fa58Smacallan dstpitch, dstoff, start); 6664261fa58Smacallan DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop, 6674261fa58Smacallan read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9))); 6684261fa58Smacallan exaMarkSync(pPixmap->drawable.pScreen); 6694261fa58Smacallan} 6704261fa58Smacallan 6714261fa58Smacallan/* 6724261fa58Smacallan * Memcpy-based UTS. 6734261fa58Smacallan */ 6744261fa58Smacallanstatic Bool 6754261fa58SmacallanCG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 6764261fa58Smacallan char *src, int src_pitch) 6774261fa58Smacallan{ 6784261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 6794261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 6804261fa58Smacallan char *dst = p->fb + exaGetPixmapOffset(pDst); 6814261fa58Smacallan int dst_pitch = exaGetPixmapPitch(pDst); 6824261fa58Smacallan 6834261fa58Smacallan int bpp = pDst->drawable.bitsPerPixel; 6844261fa58Smacallan int cpp = (bpp + 7) >> 3; 6854261fa58Smacallan int wBytes = w * cpp; 6864261fa58Smacallan 6874261fa58Smacallan ENTER; 688f71acd79Smacallan DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp); 6894261fa58Smacallan dst += (x * cpp) + (y * dst_pitch); 6904261fa58Smacallan 6914261fa58Smacallan CG14Wait(p); 6924261fa58Smacallan 6934261fa58Smacallan while (h--) { 6944261fa58Smacallan memcpy(dst, src, wBytes); 6954261fa58Smacallan src += src_pitch; 6964261fa58Smacallan dst += dst_pitch; 6974261fa58Smacallan } 6984261fa58Smacallan __asm("stbar;"); 6994261fa58Smacallan return TRUE; 7004261fa58Smacallan} 7014261fa58Smacallan 7024261fa58Smacallan/* 7034261fa58Smacallan * Memcpy-based DFS. 7044261fa58Smacallan */ 7054261fa58Smacallanstatic Bool 7064261fa58SmacallanCG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 7074261fa58Smacallan char *dst, int dst_pitch) 7084261fa58Smacallan{ 7094261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 7104261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 7114261fa58Smacallan char *src = p->fb + exaGetPixmapOffset(pSrc); 7124261fa58Smacallan int src_pitch = exaGetPixmapPitch(pSrc); 7134261fa58Smacallan 7144261fa58Smacallan ENTER; 7154261fa58Smacallan int bpp = pSrc->drawable.bitsPerPixel; 7164261fa58Smacallan int cpp = (bpp + 7) >> 3; 7174261fa58Smacallan int wBytes = w * cpp; 7184261fa58Smacallan 7194261fa58Smacallan src += (x * cpp) + (y * src_pitch); 7204261fa58Smacallan 7214261fa58Smacallan CG14Wait(p); 7224261fa58Smacallan 7234261fa58Smacallan while (h--) { 7244261fa58Smacallan memcpy(dst, src, wBytes); 7254261fa58Smacallan src += src_pitch; 7264261fa58Smacallan dst += dst_pitch; 7274261fa58Smacallan } 7284261fa58Smacallan 7294261fa58Smacallan return TRUE; 7304261fa58Smacallan} 7314261fa58Smacallan 7324261fa58SmacallanBool 7334261fa58SmacallanCG14CheckComposite(int op, PicturePtr pSrcPicture, 7344261fa58Smacallan PicturePtr pMaskPicture, 7354261fa58Smacallan PicturePtr pDstPicture) 7364261fa58Smacallan{ 7374261fa58Smacallan int i, ok = FALSE; 7384261fa58Smacallan 7394261fa58Smacallan ENTER; 7404261fa58Smacallan 7414261fa58Smacallan /* 7424261fa58Smacallan * SX is in theory capable of accelerating pretty much all Xrender ops, 7434261fa58Smacallan * even coordinate transformation and gradients. Support will be added 7444261fa58Smacallan * over time and likely have to spill over into its own source file. 7454261fa58Smacallan */ 7464261fa58Smacallan 747a3a2ba44Smacallan if ((op != PictOpOver) && (op != PictOpAdd) && (op != PictOpSrc)) { 748fe97f391Smacallan DPRINTF(X_ERROR, "%s: rejecting %d\n", __func__, op); 7494261fa58Smacallan return FALSE; 7504261fa58Smacallan } 7514261fa58Smacallan 7524bd47ccfSmacallan if (pSrcPicture != NULL) { 7534bd47ccfSmacallan i = 0; 7544bd47ccfSmacallan while ((i < arraysize(src_formats)) && (!ok)) { 7554bd47ccfSmacallan ok = (pSrcPicture->format == src_formats[i]); 7564bd47ccfSmacallan i++; 7574bd47ccfSmacallan } 7584bd47ccfSmacallan 7594bd47ccfSmacallan if (!ok) { 7604bd47ccfSmacallan DPRINTF(X_ERROR, "%s: unsupported src format %x\n", 7614bd47ccfSmacallan __func__, pSrcPicture->format); 7624bd47ccfSmacallan return FALSE; 7634bd47ccfSmacallan } 7644bd47ccfSmacallan DPRINTF(X_ERROR, "src is %x, %d\n", pSrcPicture->format, op); 7654261fa58Smacallan } 7664261fa58Smacallan 7674bd47ccfSmacallan if (pDstPicture != NULL) { 7684bd47ccfSmacallan i = 0; 7694bd47ccfSmacallan ok = FALSE; 7704bd47ccfSmacallan while ((i < arraysize(src_formats)) && (!ok)) { 7714bd47ccfSmacallan ok = (pDstPicture->format == src_formats[i]); 7724bd47ccfSmacallan i++; 7734bd47ccfSmacallan } 7744bd47ccfSmacallan 7754bd47ccfSmacallan if (!ok) { 7764bd47ccfSmacallan DPRINTF(X_ERROR, "%s: unsupported dst format %x\n", 7774bd47ccfSmacallan __func__, pDstPicture->format); 7784bd47ccfSmacallan return FALSE; 7794bd47ccfSmacallan } 7804bd47ccfSmacallan DPRINTF(X_ERROR, "dst is %x, %d\n", pDstPicture->format, op); 7814bd47ccfSmacallan } 7824261fa58Smacallan 7834261fa58Smacallan if (pMaskPicture != NULL) { 7844261fa58Smacallan DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format, 7854261fa58Smacallan pMaskPicture->pDrawable->width, 7864261fa58Smacallan pMaskPicture->pDrawable->height); 7874261fa58Smacallan } 7884261fa58Smacallan return TRUE; 7894261fa58Smacallan} 7904261fa58Smacallan 7914261fa58SmacallanBool 7924261fa58SmacallanCG14PrepareComposite(int op, PicturePtr pSrcPicture, 7934261fa58Smacallan PicturePtr pMaskPicture, 7944261fa58Smacallan PicturePtr pDstPicture, 7954261fa58Smacallan PixmapPtr pSrc, 7964261fa58Smacallan PixmapPtr pMask, 7974261fa58Smacallan PixmapPtr pDst) 7984261fa58Smacallan{ 7994261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 8004261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 8014261fa58Smacallan 8024261fa58Smacallan ENTER; 8034261fa58Smacallan 804f7cb851fSmacallan p->no_source_pixmap = FALSE; 805f7cb851fSmacallan p->source_is_solid = FALSE; 806f7cb851fSmacallan 807a3a2ba44Smacallan if (pSrcPicture->format == PICT_a1) { 8086bdc2ffdSmacallan xf86Msg(X_ERROR, "src mono, dst %x, op %d\n", 8096bdc2ffdSmacallan pDstPicture->format, op); 810a3a2ba44Smacallan if (pMaskPicture != NULL) { 811a3a2ba44Smacallan xf86Msg(X_ERROR, "msk %x\n", pMaskPicture->format); 812a3a2ba44Smacallan } 813f7cb851fSmacallan } 8144261fa58Smacallan if (pSrcPicture->pSourcePict != NULL) { 8154261fa58Smacallan if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { 8164261fa58Smacallan p->fillcolour = 8174261fa58Smacallan pSrcPicture->pSourcePict->solidFill.color; 818f7cb851fSmacallan DPRINTF(X_ERROR, "%s: solid src %08x\n", 8194261fa58Smacallan __func__, p->fillcolour); 820f7cb851fSmacallan p->no_source_pixmap = TRUE; 821f7cb851fSmacallan p->source_is_solid = TRUE; 8224261fa58Smacallan } 8234261fa58Smacallan } 8244261fa58Smacallan if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) { 8254261fa58Smacallan if (pMaskPicture->pSourcePict->type == 8264261fa58Smacallan SourcePictTypeSolidFill) { 8274261fa58Smacallan p->fillcolour = 8284261fa58Smacallan pMaskPicture->pSourcePict->solidFill.color; 829a3a2ba44Smacallan xf86Msg(X_ERROR, "%s: solid mask %08x\n", 8304261fa58Smacallan __func__, p->fillcolour); 8314261fa58Smacallan } 8324261fa58Smacallan } 8334261fa58Smacallan if (pMaskPicture != NULL) { 834239808baSmacallan p->mskoff = exaGetPixmapOffset(pMask); 8354261fa58Smacallan p->mskpitch = exaGetPixmapPitch(pMask); 8364261fa58Smacallan p->mskformat = pMaskPicture->format; 837a3a2ba44Smacallan } else { 838239808baSmacallan p->mskoff = 0; 839a3a2ba44Smacallan p->mskpitch = 0; 840a3a2ba44Smacallan p->mskformat = 0; 8414261fa58Smacallan } 842f7cb851fSmacallan if (pSrc != NULL) { 843f7cb851fSmacallan p->source_is_solid = 844f7cb851fSmacallan ((pSrc->drawable.width == 1) && (pSrc->drawable.height == 1)); 845f7cb851fSmacallan p->srcoff = exaGetPixmapOffset(pSrc); 846f7cb851fSmacallan p->srcpitch = exaGetPixmapPitch(pSrc); 847f7cb851fSmacallan if (p->source_is_solid) { 848f7cb851fSmacallan p->fillcolour = *(uint32_t *)(p->fb + p->srcoff); 849f7cb851fSmacallan } 850f7cb851fSmacallan } 8514261fa58Smacallan p->srcformat = pSrcPicture->format; 8524261fa58Smacallan p->dstformat = pDstPicture->format; 853f7cb851fSmacallan 854f7cb851fSmacallan if (p->source_is_solid) { 855f7cb851fSmacallan uint32_t temp; 856f7cb851fSmacallan 857f7cb851fSmacallan /* stuff source colour into SX registers, swap as needed */ 858f7cb851fSmacallan temp = p->fillcolour; 859f7cb851fSmacallan switch (p->srcformat) { 860f7cb851fSmacallan case PICT_a8r8g8b8: 861f7cb851fSmacallan case PICT_x8r8g8b8: 862f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 863f7cb851fSmacallan temp = temp >> 8; 864f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 865f7cb851fSmacallan temp = temp >> 8; 866f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 867f7cb851fSmacallan break; 868f7cb851fSmacallan case PICT_a8b8g8r8: 869f7cb851fSmacallan case PICT_x8b8g8r8: 870f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(11), temp & 0xff); 871f7cb851fSmacallan temp = temp >> 8; 872f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(10), temp & 0xff); 873f7cb851fSmacallan temp = temp >> 8; 874f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(9), temp & 0xff); 875f7cb851fSmacallan break; 876f7cb851fSmacallan } 877f7cb851fSmacallan write_sx_reg(p, SX_QUEUED(8), 0xff); 878f7cb851fSmacallan } 8794261fa58Smacallan p->op = op; 880a3a2ba44Smacallan if (op == PictOpSrc) { 881a3a2ba44Smacallan CG14PrepareCopy(pSrc, pDst, 1, 1, GXcopy, 0xffffffff); 882a3a2ba44Smacallan } 8834261fa58Smacallan#ifdef SX_DEBUG 8844261fa58Smacallan DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff, 8854261fa58Smacallan *(uint32_t *)(p->fb + p->srcoff)); 8864261fa58Smacallan#endif 8874261fa58Smacallan return TRUE; 8884261fa58Smacallan} 8894261fa58Smacallan 8904261fa58Smacallanvoid 8914261fa58SmacallanCG14Composite(PixmapPtr pDst, int srcX, int srcY, 8924261fa58Smacallan int maskX, int maskY, 8934261fa58Smacallan int dstX, int dstY, 8944261fa58Smacallan int width, int height) 8954261fa58Smacallan{ 8964261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 8974261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 8984261fa58Smacallan uint32_t dstoff, dstpitch; 8994261fa58Smacallan uint32_t dst, msk, src; 900e311bbeeSmacallan int flip = 0; 9014261fa58Smacallan 9024261fa58Smacallan ENTER; 9034261fa58Smacallan dstoff = exaGetPixmapOffset(pDst); 9044261fa58Smacallan dstpitch = exaGetPixmapPitch(pDst); 9054261fa58Smacallan 906e311bbeeSmacallan flip = (PICT_FORMAT_TYPE(p->srcformat) != 907e311bbeeSmacallan PICT_FORMAT_TYPE(p->dstformat)); 908e311bbeeSmacallan 9094261fa58Smacallan switch (p->op) { 9104261fa58Smacallan case PictOpOver: 9114261fa58Smacallan dst = dstoff + (dstY * dstpitch) + (dstX << 2); 9124261fa58Smacallan DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n", 9134261fa58Smacallan p->mskformat, p->dstformat, srcX, srcY); 914a3a2ba44Smacallan if (p->source_is_solid) { 915a3a2ba44Smacallan switch (p->mskformat) { 916a3a2ba44Smacallan case PICT_a8: 917a3a2ba44Smacallan msk = p->mskoff + 918a3a2ba44Smacallan (maskY * p->mskpitch) + 919a3a2ba44Smacallan maskX; 920a3a2ba44Smacallan CG14Comp_Over8Solid(p, 921a3a2ba44Smacallan msk, p->mskpitch, 922a3a2ba44Smacallan dst, dstpitch, 923a3a2ba44Smacallan width, height); 924a3a2ba44Smacallan break; 925a3a2ba44Smacallan case PICT_a8r8g8b8: 926a3a2ba44Smacallan case PICT_a8b8g8r8: 927a3a2ba44Smacallan msk = p->mskoff + 928a3a2ba44Smacallan (maskY * p->mskpitch) + 929a3a2ba44Smacallan (maskX << 2); 930a3a2ba44Smacallan CG14Comp_Over32Solid(p, 931a3a2ba44Smacallan msk, p->mskpitch, 932a3a2ba44Smacallan dst, dstpitch, 933a3a2ba44Smacallan width, height); 934a3a2ba44Smacallan break; 935a3a2ba44Smacallan default: 936a3a2ba44Smacallan xf86Msg(X_ERROR, 937f71acd79Smacallan "unsupported mask format %08x\n", p->mskformat); 938a3a2ba44Smacallan } 939a3a2ba44Smacallan } else { 9406bdc2ffdSmacallan DPRINTF(X_ERROR, "non-solid over with msk %x\n", 9416bdc2ffdSmacallan p->mskformat); 942a3a2ba44Smacallan switch (p->srcformat) { 943a3a2ba44Smacallan case PICT_a8r8g8b8: 944a3a2ba44Smacallan case PICT_a8b8g8r8: 945a3a2ba44Smacallan src = p->srcoff + 946a3a2ba44Smacallan (srcY * p->srcpitch) + 947a3a2ba44Smacallan (srcX << 2); 948a3a2ba44Smacallan dst = dstoff + 949a3a2ba44Smacallan (dstY * dstpitch) + 950a3a2ba44Smacallan (dstX << 2); 951a3a2ba44Smacallan if (p->mskformat == PICT_a8) { 952a3a2ba44Smacallan msk = p->mskoff + 953a3a2ba44Smacallan (maskY * p->mskpitch) + 954a3a2ba44Smacallan maskX; 955a3a2ba44Smacallan CG14Comp_Over32Mask(p, 956a3a2ba44Smacallan src, p->srcpitch, 957a3a2ba44Smacallan msk, p->mskpitch, 958a3a2ba44Smacallan dst, dstpitch, 959e311bbeeSmacallan width, height, flip); 960a3a2ba44Smacallan } else { 961a3a2ba44Smacallan CG14Comp_Over32(p, 962a3a2ba44Smacallan src, p->srcpitch, 963a3a2ba44Smacallan dst, dstpitch, 964e311bbeeSmacallan width, height, flip); 965a3a2ba44Smacallan } 966a3a2ba44Smacallan break; 967a3a2ba44Smacallan case PICT_x8r8g8b8: 968a3a2ba44Smacallan case PICT_x8b8g8r8: 9696bdc2ffdSmacallan src = p->srcoff + 9706bdc2ffdSmacallan (srcY * p->srcpitch) + 9716bdc2ffdSmacallan (srcX << 2); 9726bdc2ffdSmacallan dst = dstoff + 9736bdc2ffdSmacallan (dstY * dstpitch) + 9746bdc2ffdSmacallan (dstX << 2); 9756bdc2ffdSmacallan if (p->mskformat == PICT_a8) { 9766bdc2ffdSmacallan msk = p->mskoff + 9776bdc2ffdSmacallan (maskY * p->mskpitch) + 9786bdc2ffdSmacallan maskX; 9796bdc2ffdSmacallan CG14Comp_Over32Mask_noalpha(p, 9806bdc2ffdSmacallan src, p->srcpitch, 9816bdc2ffdSmacallan msk, p->mskpitch, 982fa158432Smacallan dst, dstpitch, 983e311bbeeSmacallan width, height, flip); 984fa158432Smacallan } else if ((p->mskformat == PICT_a8r8g8b8) || 985fa158432Smacallan (p->mskformat == PICT_a8b8g8r8)) { 986fa158432Smacallan msk = p->mskoff + 987fa158432Smacallan (maskY * p->mskpitch) + 988fa158432Smacallan (maskX << 2); 989fa158432Smacallan CG14Comp_Over32Mask32_noalpha(p, 990fa158432Smacallan src, p->srcpitch, 991fa158432Smacallan msk, p->mskpitch, 9926bdc2ffdSmacallan dst, dstpitch, 993e311bbeeSmacallan width, height, flip); 9946bdc2ffdSmacallan } else { 9956bdc2ffdSmacallan xf86Msg(X_ERROR, "no src alpha, mask is %x\n", p->mskformat); 9966bdc2ffdSmacallan } 997a3a2ba44Smacallan break; 998a3a2ba44Smacallan default: 999a3a2ba44Smacallan xf86Msg(X_ERROR, "%s: format %x in non-solid Over op\n", 1000a3a2ba44Smacallan __func__, p->srcformat); 1001a3a2ba44Smacallan } 1002a3a2ba44Smacallan } 10034261fa58Smacallan break; 10044261fa58Smacallan case PictOpAdd: 10054261fa58Smacallan DPRINTF(X_ERROR, "Add %08x %08x\n", 10064261fa58Smacallan p->srcformat, p->dstformat); 10074261fa58Smacallan switch (p->srcformat) { 10084261fa58Smacallan case PICT_a8: 10094261fa58Smacallan src = p->srcoff + 10104261fa58Smacallan (srcY * p->srcpitch) + srcX; 1011d71cb32dSmacallan if (p->dstformat == PICT_a8) { 1012d71cb32dSmacallan dst = dstoff + 1013d71cb32dSmacallan (dstY * dstpitch) + dstX; 1014d71cb32dSmacallan CG14Comp_Add8(p, 1015d71cb32dSmacallan src, p->srcpitch, 1016d71cb32dSmacallan dst, dstpitch, 1017d71cb32dSmacallan width, height); 1018d71cb32dSmacallan } else { 1019d71cb32dSmacallan dst = dstoff + 1020d71cb32dSmacallan (dstY * dstpitch) + 1021d71cb32dSmacallan (dstX << 2); 1022d71cb32dSmacallan CG14Comp_Add8_32(p, 1023d71cb32dSmacallan src, p->srcpitch, 1024d71cb32dSmacallan dst, dstpitch, 1025d71cb32dSmacallan width, height); 1026d71cb32dSmacallan } 10274261fa58Smacallan break; 10284261fa58Smacallan case PICT_a8r8g8b8: 10294261fa58Smacallan case PICT_x8r8g8b8: 10304261fa58Smacallan src = p->srcoff + 10314261fa58Smacallan (srcY * p->srcpitch) + (srcX << 2); 10324261fa58Smacallan dst = dstoff + (dstY * dstpitch) + 10334261fa58Smacallan (dstX << 2); 10344261fa58Smacallan CG14Comp_Add32(p, src, p->srcpitch, 10354261fa58Smacallan dst, dstpitch, width, height); 10364261fa58Smacallan break; 10374261fa58Smacallan default: 10384261fa58Smacallan xf86Msg(X_ERROR, 10394261fa58Smacallan "unsupported src format\n"); 10404261fa58Smacallan } 10414261fa58Smacallan break; 1042a3a2ba44Smacallan case PictOpSrc: 1043a3a2ba44Smacallan DPRINTF(X_ERROR, "Src %08x %08x\n", 1044a3a2ba44Smacallan p->srcformat, p->dstformat); 1045239808baSmacallan if (p->mskformat != 0) 1046239808baSmacallan xf86Msg(X_ERROR, "Src mask %08x\n", p->mskformat); 1047f71acd79Smacallan if (p->srcformat == PICT_a8) { 1048f71acd79Smacallan CG14Copy8(pDst, srcX, srcY, dstX, dstY, width, height); 1049f71acd79Smacallan } else { 1050f71acd79Smacallan /* convert between RGB and BGR? */ 1051f71acd79Smacallan CG14Copy32(pDst, srcX, srcY, dstX, dstY, width, height); 1052f71acd79Smacallan } 1053a3a2ba44Smacallan break; 10544261fa58Smacallan default: 10554261fa58Smacallan xf86Msg(X_ERROR, "unsupported op %d\n", p->op); 10564261fa58Smacallan } 10574261fa58Smacallan exaMarkSync(pDst->drawable.pScreen); 10584261fa58Smacallan} 10594261fa58Smacallan 10604261fa58Smacallan 10614261fa58Smacallan 10624261fa58SmacallanBool 10634261fa58SmacallanCG14InitAccel(ScreenPtr pScreen) 10644261fa58Smacallan{ 10654261fa58Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 10664261fa58Smacallan Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn); 10674261fa58Smacallan ExaDriverPtr pExa; 10684261fa58Smacallan 10694261fa58Smacallan pExa = exaDriverAlloc(); 10704261fa58Smacallan if (!pExa) 10714261fa58Smacallan return FALSE; 10724261fa58Smacallan 10734261fa58Smacallan p->pExa = pExa; 10744261fa58Smacallan 10754261fa58Smacallan pExa->exa_major = EXA_VERSION_MAJOR; 10764261fa58Smacallan pExa->exa_minor = EXA_VERSION_MINOR; 10774261fa58Smacallan 10784261fa58Smacallan pExa->memoryBase = p->fb; 10794261fa58Smacallan pExa->memorySize = p->memsize; 10804261fa58Smacallan pExa->offScreenBase = p->width * p->height * 4; 10814261fa58Smacallan 10824261fa58Smacallan /* 10834261fa58Smacallan * SX memory instructions are written to 64bit aligned addresses with 10844261fa58Smacallan * a 3 bit displacement. Make sure the displacement remains constant 10854261fa58Smacallan * within one column 10864261fa58Smacallan */ 10874261fa58Smacallan 10884261fa58Smacallan pExa->pixmapOffsetAlign = 8; 10894261fa58Smacallan pExa->pixmapPitchAlign = 8; 10904261fa58Smacallan 1091fe97f391Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS 1092f71acd79Smacallan | EXA_SUPPORTS_OFFSCREEN_OVERLAPS 1093f71acd79Smacallan /*| EXA_MIXED_PIXMAPS*/; 10944261fa58Smacallan 10954261fa58Smacallan /* 10964261fa58Smacallan * these limits are bogus 10974261fa58Smacallan * SX doesn't deal with coordinates at all, so there is no limit but 10984261fa58Smacallan * we have to put something here 10994261fa58Smacallan */ 11004261fa58Smacallan pExa->maxX = 4096; 11014261fa58Smacallan pExa->maxY = 4096; 11024261fa58Smacallan 11034261fa58Smacallan pExa->WaitMarker = CG14WaitMarker; 11044261fa58Smacallan 11054261fa58Smacallan pExa->PrepareSolid = CG14PrepareSolid; 11064261fa58Smacallan pExa->Solid = CG14Solid; 11074261fa58Smacallan pExa->DoneSolid = CG14DoneCopy; 11084261fa58Smacallan pExa->PrepareCopy = CG14PrepareCopy; 1109f71acd79Smacallan pExa->Copy = CG14Copy32; 11104261fa58Smacallan pExa->DoneCopy = CG14DoneCopy; 11114261fa58Smacallan if (p->use_xrender) { 11124261fa58Smacallan pExa->CheckComposite = CG14CheckComposite; 11134261fa58Smacallan pExa->PrepareComposite = CG14PrepareComposite; 11144261fa58Smacallan pExa->Composite = CG14Composite; 11154261fa58Smacallan pExa->DoneComposite = CG14DoneCopy; 11164261fa58Smacallan } 11174261fa58Smacallan 11184261fa58Smacallan /* EXA hits more optimized paths when it does not have to fallback 11194261fa58Smacallan * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 11204261fa58Smacallan */ 11214261fa58Smacallan pExa->UploadToScreen = CG14UploadToScreen; 11224261fa58Smacallan pExa->DownloadFromScreen = CG14DownloadFromScreen; 11234261fa58Smacallan 11244261fa58Smacallan /* do some hardware init */ 11254261fa58Smacallan write_sx_reg(p, SX_PLANEMASK, 0xffffffff); 11264261fa58Smacallan p->last_mask = 0xffffffff; 11274261fa58Smacallan write_sx_reg(p, SX_ROP_CONTROL, 0xcc); 11284261fa58Smacallan p->last_rop = 0xcc; 11294261fa58Smacallan return exaDriverInit(pScreen, pExa); 11304261fa58Smacallan} 1131