tcx_accel.c revision 091cc113
16eb72584Smrg/* 26eb72584Smrg * TCX framebuffer - hardware acceleration. 36eb72584Smrg * 46eb72584Smrg * Copyright (C) 2009 Michael Lorenz 56eb72584Smrg * 66eb72584Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy 76eb72584Smrg * of this software and associated documentation files (the "Software"), to deal 86eb72584Smrg * in the Software without restriction, including without limitation the rights 96eb72584Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 106eb72584Smrg * copies of the Software, and to permit persons to whom the Software is 116eb72584Smrg * furnished to do so, subject to the following conditions: 126eb72584Smrg * 136eb72584Smrg * The above copyright notice and this permission notice shall be included in 146eb72584Smrg * all copies or substantial portions of the Software. 156eb72584Smrg * 166eb72584Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 176eb72584Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 186eb72584Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 196eb72584Smrg * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 206eb72584Smrg * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 216eb72584Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 226eb72584Smrg */ 236eb72584Smrg 24091cc113Smacallan/* $NetBSD: tcx_accel.c,v 1.10 2016/09/23 20:50:54 macallan Exp $ */ 25091cc113Smacallan 26091cc113Smacallan#ifdef HAVE_CONFIG_H 27091cc113Smacallan#include "config.h" 28091cc113Smacallan#endif 296eb72584Smrg 306eb72584Smrg#include <sys/types.h> 316eb72584Smrg 326eb72584Smrg#include "tcx.h" 336eb72584Smrg 346eb72584Smrg#ifdef DEBUG 356eb72584Smrg#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 366eb72584Smrg#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 376eb72584Smrg#else 386eb72584Smrg#define ENTER 396eb72584Smrg#define LEAVE 406eb72584Smrg#endif 416eb72584Smrg 426eb72584Smrgstatic void 436eb72584SmrgTcxWaitMarker(ScreenPtr pScreenInfo, int Marker) 446eb72584Smrg{ 456eb72584Smrg ENTER; 466eb72584Smrg /* do nothing */ 476eb72584Smrg} 486eb72584Smrg 496eb72584Smrgstatic int 506eb72584SmrgTcxMarkSync(ScreenPtr pScreenInfo) 516eb72584Smrg{ 526eb72584Smrg ENTER; 536eb72584Smrg return 0; 546eb72584Smrg} 556eb72584Smrg 566eb72584Smrgstatic Bool 576eb72584SmrgTcxPrepareCopy 586eb72584Smrg( 596eb72584Smrg PixmapPtr pSrcPixmap, 606eb72584Smrg PixmapPtr pDstPixmap, 616eb72584Smrg int xdir, 626eb72584Smrg int ydir, 636eb72584Smrg int alu, 646eb72584Smrg Pixel planemask 656eb72584Smrg) 666eb72584Smrg{ 676eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 686eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 696eb72584Smrg 706eb72584Smrg ENTER; 716eb72584Smrg /* weed out the cases we can't accelerate */ 726eb72584Smrg#ifdef DEBUG 736eb72584Smrg xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask); 746eb72584Smrg#endif 756eb72584Smrg if (alu != GXcopy) 766eb72584Smrg return FALSE; 776eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 786eb72584Smrg return FALSE; 796eb72584Smrg 806eb72584Smrg pTcx->xdir = xdir; 816eb72584Smrg pTcx->ydir = ydir; 826eb72584Smrg pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift; 836eb72584Smrg pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift; 846eb72584Smrg LEAVE; 856eb72584Smrg return TRUE; 866eb72584Smrg} 876eb72584Smrg 886eb72584Smrgstatic void 896eb72584SmrgTcxCopy 906eb72584Smrg( 916eb72584Smrg PixmapPtr pDstPixmap, 926eb72584Smrg int srcX, 936eb72584Smrg int srcY, 946eb72584Smrg int dstX, 956eb72584Smrg int dstY, 966eb72584Smrg int w, 976eb72584Smrg int h 986eb72584Smrg) 996eb72584Smrg{ 1006eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 1016eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 102091cc113Smacallan volatile uint64_t cmd, lcmd; 1036eb72584Smrg int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff; 1046eb72584Smrg int doff; 1056eb72584Smrg 1066eb72584Smrg ENTER; 1076eb72584Smrg leftover = w & 0x1f; 1086eb72584Smrg if (leftover > 0) 1096eb72584Smrg lcmd = 0x3000000000000000LL | (leftover - 1) << 24; 1106eb72584Smrg 1116eb72584Smrg 1126eb72584Smrg doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift; 1136eb72584Smrg dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift; 1146eb72584Smrg src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff; 1156eb72584Smrg dst = dstX + dstY * dpitch + doff; 1166eb72584Smrg 1176eb72584Smrg if (pTcx->ydir < 0) { 1186eb72584Smrg src += (h - 1) * pTcx->srcpitch; 1196eb72584Smrg dst += (h - 1) * dpitch; 1206eb72584Smrg sstep = 0 - pTcx->srcpitch; 1216eb72584Smrg dstep = 0 - dpitch; 1226eb72584Smrg } else { 1236eb72584Smrg sstep = pTcx->srcpitch; 1246eb72584Smrg dstep = dpitch; 1256eb72584Smrg } 1266eb72584Smrg 1276eb72584Smrg xsteps = w >> 5; 1286eb72584Smrg 1296eb72584Smrg if ((pTcx->xdir > 0) || (w < 33)) { 1306eb72584Smrg for (line = 0; line < h; line++) { 1316eb72584Smrg x = xsteps; 1326eb72584Smrg xoff = 0; 1336eb72584Smrg while (x > 0) { 1346eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1356eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1366eb72584Smrg xoff += 32; 1376eb72584Smrg x--; 1386eb72584Smrg } 1396eb72584Smrg if (leftover > 0) { 1406eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1416eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1426eb72584Smrg } 1436eb72584Smrg src += sstep; 1446eb72584Smrg dst += dstep; 1456eb72584Smrg } 1466eb72584Smrg } else { 1476eb72584Smrg /* same thing but right to left */ 1486eb72584Smrg for (line = 0; line < h; line++) { 1496eb72584Smrg x = xsteps; 1506eb72584Smrg xoff = xsteps << 5; 1516eb72584Smrg if (leftover > 0) { 1526eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1536eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1546eb72584Smrg } 1556eb72584Smrg xoff -= 32; 1566eb72584Smrg while (x > 0) { 1576eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1586eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1596eb72584Smrg xoff -= 32; 1606eb72584Smrg x--; 1616eb72584Smrg } 1626eb72584Smrg src += sstep; 1636eb72584Smrg dst += dstep; 1646eb72584Smrg } 1656eb72584Smrg } 1666eb72584Smrg LEAVE; 1676eb72584Smrg} 1686eb72584Smrg 1696eb72584Smrgstatic void 1706eb72584SmrgTcxDoneCopy(PixmapPtr pDstPixmap) 1716eb72584Smrg{ 1726eb72584Smrg ENTER; 1736eb72584Smrg LEAVE; 1746eb72584Smrg} 1756eb72584Smrg 1766eb72584Smrgstatic Bool 1776eb72584SmrgTcxPrepareSolid( 1786eb72584Smrg PixmapPtr pPixmap, 1796eb72584Smrg int alu, 1806eb72584Smrg Pixel planemask, 1816eb72584Smrg Pixel fg) 1826eb72584Smrg{ 1836eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1846eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 1854525cf0bSmacallan uint32_t hwfg; 1866eb72584Smrg 1876eb72584Smrg ENTER; 1884525cf0bSmacallan 1896eb72584Smrg /* weed out the cases we can't accelerate */ 1904525cf0bSmacallan if (pTcx->HasStipROP) { 1914525cf0bSmacallan hwfg = alu << 28; 1924525cf0bSmacallan } else if (alu == GXcopy) { 1934525cf0bSmacallan hwfg = 0x30000000; 1944525cf0bSmacallan } else 1956eb72584Smrg return FALSE; 1964525cf0bSmacallan 1976eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 1986eb72584Smrg return FALSE; 1996eb72584Smrg if (exaGetPixmapOffset(pPixmap) != 0) 2006eb72584Smrg return FALSE; 2016eb72584Smrg pTcx->fg = (fg & 0x00ffffff); 2024525cf0bSmacallan /* set colour space ID if we're in 24bit mode */ 2034525cf0bSmacallan if (pTcx->pitchshift != 0) 2044525cf0bSmacallan hwfg |= 0x03000000; 2054525cf0bSmacallan pTcx->fg |= hwfg; 2066eb72584Smrg#ifdef DEBUG 2074525cf0bSmacallan xf86Msg(X_ERROR, "fg: %08x\n", hwfg); 2086eb72584Smrg#endif 2096eb72584Smrg LEAVE; 2106eb72584Smrg return TRUE; 2116eb72584Smrg} 2126eb72584Smrg 2136eb72584Smrgstatic void 2146eb72584SmrgTcxSolid( 2156eb72584Smrg PixmapPtr pPixmap, 2166eb72584Smrg int x1, 2176eb72584Smrg int y1, 2186eb72584Smrg int x2, 2196eb72584Smrg int y2) 2206eb72584Smrg{ 2216eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 2226eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 2236eb72584Smrg int dpitch, dst, line, fullsteps, i; 224091cc113Smacallan volatile uint64_t cmd, rcmd, lcmd, tmpl; 2256eb72584Smrg uint32_t pmask; 2266eb72584Smrg 2276eb72584Smrg dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift; 2286eb72584Smrg dst = x1 + y1 * dpitch; 2296eb72584Smrg 2306eb72584Smrg tmpl = ((uint64_t)pTcx->fg) << 32; 2316eb72584Smrg 2326eb72584Smrg /* 2336eb72584Smrg * thanks to the funky architecture of the tcx's stipple 'engine' we have 2346eb72584Smrg * to deal with two different cases: 2356eb72584Smrg * - the whole width of the rectangle fits into a single 32 pixel aligned 2366eb72584Smrg * unit of 32 pixels 2376eb72584Smrg * - the first and the last 32bit unit may or may not contain less than 2386eb72584Smrg * 32 pixels 2396eb72584Smrg */ 2406eb72584Smrg x2 -= 1; 2416eb72584Smrg if ((x1 & 0xffe0) == (x2 & 0xffe0)) { 2426eb72584Smrg /* the whole width fits in one 32 pixel write */ 2436eb72584Smrg 2446eb72584Smrg /* first zero out pixels on the right */ 2456eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2466eb72584Smrg /* then mask out pixels on the left */ 2476eb72584Smrg pmask &= (0xffffffff >> (x1 & 0x1f)); 2486eb72584Smrg#ifdef DEBUG 2496eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2506eb72584Smrg#endif 2516eb72584Smrg cmd = tmpl | (uint64_t)pmask; 2526eb72584Smrg dst &= 0xffffffe0; 2536eb72584Smrg for (line = y1; line < y2; line++) { 2546eb72584Smrg pTcx->rstip[dst] = cmd; 2556eb72584Smrg dst += dpitch; 2566eb72584Smrg } 2576eb72584Smrg } else { 2586eb72584Smrg /* at least two writes per line */ 2596eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2606eb72584Smrg rcmd = tmpl | (uint64_t)pmask; 2616eb72584Smrg pmask = 0xffffffff >> (x1 & 0x1f); 2626eb72584Smrg lcmd = tmpl | (uint64_t)pmask; 2636eb72584Smrg cmd = tmpl | 0xffffffffLL; 2646eb72584Smrg dst &= 0xffffffe0; 2656eb72584Smrg fullsteps = ((x2 >> 5) - (x1 >> 5)); 2666eb72584Smrg#ifdef DEBUG 2676eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2686eb72584Smrg xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps); 2696eb72584Smrg#endif 2706eb72584Smrg fullsteps = fullsteps << 5; 2716eb72584Smrg for (line = y1; line < y2; line++) { 2726eb72584Smrg pTcx->rstip[dst] = lcmd; 2736eb72584Smrg for (i = 32; i < fullsteps; i+= 32) 2746eb72584Smrg pTcx->rstip[dst + i] = cmd; 2756eb72584Smrg pTcx->rstip[dst + i] = rcmd; 2766eb72584Smrg dst += dpitch; 2776eb72584Smrg } 2786eb72584Smrg } 2796eb72584Smrg} 2806eb72584Smrg 2816eb72584Smrg/* 2826eb72584Smrg * Memcpy-based UTS. 2836eb72584Smrg */ 2846eb72584Smrgstatic Bool 2856eb72584SmrgTcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 2866eb72584Smrg char *src, int src_pitch) 2876eb72584Smrg{ 2886eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 2896eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 2906eb72584Smrg char *dst = pTcx->fb + exaGetPixmapOffset(pDst); 2916eb72584Smrg int dst_pitch = exaGetPixmapPitch(pDst); 2926eb72584Smrg 2936eb72584Smrg int bpp = pDst->drawable.bitsPerPixel; 2946eb72584Smrg int cpp = (bpp + 7) / 8; 2956eb72584Smrg int wBytes = w * cpp; 2966eb72584Smrg 2976eb72584Smrg ENTER; 2986eb72584Smrg dst += (x * cpp) + (y * dst_pitch); 2996eb72584Smrg 3006eb72584Smrg while (h--) { 3016eb72584Smrg memcpy(dst, src, wBytes); 3026eb72584Smrg src += src_pitch; 3036eb72584Smrg dst += dst_pitch; 3046eb72584Smrg } 3056eb72584Smrg LEAVE; 3066eb72584Smrg return TRUE; 3076eb72584Smrg} 3086eb72584Smrg 3096eb72584Smrg/* 3106eb72584Smrg * Memcpy-based DFS. 3116eb72584Smrg */ 3126eb72584Smrgstatic Bool 3136eb72584SmrgTcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 3146eb72584Smrg char *dst, int dst_pitch) 3156eb72584Smrg{ 3166eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 3176eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3186eb72584Smrg char *src = pTcx->fb + exaGetPixmapOffset(pSrc); 3196eb72584Smrg int src_pitch = exaGetPixmapPitch(pSrc); 3206eb72584Smrg 3216eb72584Smrg int bpp = pSrc->drawable.bitsPerPixel; 3226eb72584Smrg int cpp = (bpp + 7) / 8; 3236eb72584Smrg int wBytes = w * cpp; 3246eb72584Smrg 3256eb72584Smrg ENTER; 3266eb72584Smrg src += (x * cpp) + (y * src_pitch); 3276eb72584Smrg 3286eb72584Smrg while (h--) { 3296eb72584Smrg memcpy(dst, src, wBytes); 3306eb72584Smrg src += src_pitch; 3316eb72584Smrg dst += dst_pitch; 3326eb72584Smrg } 3336eb72584Smrg LEAVE; 3346eb72584Smrg return TRUE; 3356eb72584Smrg} 3366eb72584Smrg 3376eb72584SmrgBool 3386eb72584SmrgTcxInitAccel(ScreenPtr pScreen) 3396eb72584Smrg{ 3406eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 3416eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3426eb72584Smrg ExaDriverPtr pExa; 3436eb72584Smrg 3446eb72584Smrg pExa = exaDriverAlloc(); 3456eb72584Smrg if (!pExa) 3466eb72584Smrg return FALSE; 3476eb72584Smrg 3486eb72584Smrg pTcx->pExa = pExa; 3496eb72584Smrg 3506eb72584Smrg pExa->exa_major = EXA_VERSION_MAJOR; 3516eb72584Smrg pExa->exa_minor = EXA_VERSION_MINOR; 3526eb72584Smrg 3536eb72584Smrg /* 3546eb72584Smrg * The S24 can display both 8 and 24bit data at the same time, and in 35559d6bc2bSmacallan * 24bit we can choose between gamma corrected and direct. No idea how that 3566eb72584Smrg * would map to EXA - we'd have to pick the right framebuffer to draw into 3576eb72584Smrg * and Solid() would need to know what kind of pixels to write 3586eb72584Smrg */ 3596eb72584Smrg pExa->memoryBase = pTcx->fb; 3606eb72584Smrg if (pScrn->depth == 8) { 36159d6bc2bSmacallan pExa->memorySize = pTcx->vramsize; 3626eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height; 3636eb72584Smrg pExa->pixmapOffsetAlign = 1; 3646eb72584Smrg pExa->pixmapPitchAlign = 1; 3656eb72584Smrg } else { 3666eb72584Smrg pExa->memorySize = 1024 * 1024 * 4; 3676eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4; 3686eb72584Smrg pExa->pixmapOffsetAlign = 4; 3696eb72584Smrg pExa->pixmapPitchAlign = 4; 3706eb72584Smrg } 3716eb72584Smrg 3726eb72584Smrg pExa->flags = EXA_OFFSCREEN_PIXMAPS; 3736eb72584Smrg 3746eb72584Smrg pExa->maxX = 2048; 3756eb72584Smrg pExa->maxY = 2048; /* dummy, available VRAM is the limit */ 3766eb72584Smrg 3776eb72584Smrg pExa->MarkSync = TcxMarkSync; 3786eb72584Smrg pExa->WaitMarker = TcxWaitMarker; 3796eb72584Smrg 3806eb72584Smrg pExa->PrepareSolid = TcxPrepareSolid; 3816eb72584Smrg pExa->Solid = TcxSolid; 3826eb72584Smrg pExa->DoneSolid = TcxDoneCopy; 3836eb72584Smrg 3846eb72584Smrg pExa->PrepareCopy = TcxPrepareCopy; 3856eb72584Smrg pExa->Copy = TcxCopy; 3866eb72584Smrg pExa->DoneCopy = TcxDoneCopy; 3876eb72584Smrg 3886eb72584Smrg /* EXA hits more optimized paths when it does not have to fallback because 3896eb72584Smrg * of missing UTS/DFS, hook memcpy-based UTS/DFS. 3906eb72584Smrg */ 3916eb72584Smrg pExa->UploadToScreen = TcxUploadToScreen; 3926eb72584Smrg pExa->DownloadFromScreen = TcxDownloadFromScreen; 3936eb72584Smrg 3946eb72584Smrg return exaDriverInit(pScreen, pExa); 3956eb72584Smrg} 396