tcx_accel.c revision 6eb72584
16eb72584Smrg/* 26eb72584Smrg * TCX framebuffer - hardware acceleration. 36eb72584Smrg * 46eb72584Smrg * Copyright (C) 2009 Michael Lorenz 56eb72584Smrg * 66eb72584Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy 76eb72584Smrg * of this software and associated documentation files (the "Software"), to deal 86eb72584Smrg * in the Software without restriction, including without limitation the rights 96eb72584Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 106eb72584Smrg * copies of the Software, and to permit persons to whom the Software is 116eb72584Smrg * furnished to do so, subject to the following conditions: 126eb72584Smrg * 136eb72584Smrg * The above copyright notice and this permission notice shall be included in 146eb72584Smrg * all copies or substantial portions of the Software. 156eb72584Smrg * 166eb72584Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 176eb72584Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 186eb72584Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 196eb72584Smrg * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 206eb72584Smrg * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 216eb72584Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 226eb72584Smrg */ 236eb72584Smrg 246eb72584Smrg/* $NetBSD: tcx_accel.c,v 1.7 2013/06/04 22:58:31 mrg Exp $ */ 256eb72584Smrg 266eb72584Smrg#include <sys/types.h> 276eb72584Smrg 286eb72584Smrg#include "tcx.h" 296eb72584Smrg 306eb72584Smrg#ifdef DEBUG 316eb72584Smrg#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 326eb72584Smrg#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 336eb72584Smrg#else 346eb72584Smrg#define ENTER 356eb72584Smrg#define LEAVE 366eb72584Smrg#endif 376eb72584Smrg 386eb72584Smrgstatic void 396eb72584SmrgTcxWaitMarker(ScreenPtr pScreenInfo, int Marker) 406eb72584Smrg{ 416eb72584Smrg ENTER; 426eb72584Smrg /* do nothing */ 436eb72584Smrg} 446eb72584Smrg 456eb72584Smrgstatic int 466eb72584SmrgTcxMarkSync(ScreenPtr pScreenInfo) 476eb72584Smrg{ 486eb72584Smrg ENTER; 496eb72584Smrg return 0; 506eb72584Smrg} 516eb72584Smrg 526eb72584Smrgstatic Bool 536eb72584SmrgTcxPrepareCopy 546eb72584Smrg( 556eb72584Smrg PixmapPtr pSrcPixmap, 566eb72584Smrg PixmapPtr pDstPixmap, 576eb72584Smrg int xdir, 586eb72584Smrg int ydir, 596eb72584Smrg int alu, 606eb72584Smrg Pixel planemask 616eb72584Smrg) 626eb72584Smrg{ 636eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 646eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 656eb72584Smrg 666eb72584Smrg ENTER; 676eb72584Smrg /* weed out the cases we can't accelerate */ 686eb72584Smrg#ifdef DEBUG 696eb72584Smrg xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask); 706eb72584Smrg#endif 716eb72584Smrg if (alu != GXcopy) 726eb72584Smrg return FALSE; 736eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 746eb72584Smrg return FALSE; 756eb72584Smrg 766eb72584Smrg pTcx->xdir = xdir; 776eb72584Smrg pTcx->ydir = ydir; 786eb72584Smrg pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift; 796eb72584Smrg pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift; 806eb72584Smrg LEAVE; 816eb72584Smrg return TRUE; 826eb72584Smrg} 836eb72584Smrg 846eb72584Smrgstatic void 856eb72584SmrgTcxCopy 866eb72584Smrg( 876eb72584Smrg PixmapPtr pDstPixmap, 886eb72584Smrg int srcX, 896eb72584Smrg int srcY, 906eb72584Smrg int dstX, 916eb72584Smrg int dstY, 926eb72584Smrg int w, 936eb72584Smrg int h 946eb72584Smrg) 956eb72584Smrg{ 966eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 976eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 986eb72584Smrg uint64_t cmd, lcmd; 996eb72584Smrg int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff; 1006eb72584Smrg int doff; 1016eb72584Smrg 1026eb72584Smrg ENTER; 1036eb72584Smrg leftover = w & 0x1f; 1046eb72584Smrg if (leftover > 0) 1056eb72584Smrg lcmd = 0x3000000000000000LL | (leftover - 1) << 24; 1066eb72584Smrg 1076eb72584Smrg 1086eb72584Smrg doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift; 1096eb72584Smrg dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift; 1106eb72584Smrg src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff; 1116eb72584Smrg dst = dstX + dstY * dpitch + doff; 1126eb72584Smrg 1136eb72584Smrg if (pTcx->ydir < 0) { 1146eb72584Smrg src += (h - 1) * pTcx->srcpitch; 1156eb72584Smrg dst += (h - 1) * dpitch; 1166eb72584Smrg sstep = 0 - pTcx->srcpitch; 1176eb72584Smrg dstep = 0 - dpitch; 1186eb72584Smrg } else { 1196eb72584Smrg sstep = pTcx->srcpitch; 1206eb72584Smrg dstep = dpitch; 1216eb72584Smrg } 1226eb72584Smrg 1236eb72584Smrg xsteps = w >> 5; 1246eb72584Smrg 1256eb72584Smrg if ((pTcx->xdir > 0) || (w < 33)) { 1266eb72584Smrg for (line = 0; line < h; line++) { 1276eb72584Smrg x = xsteps; 1286eb72584Smrg xoff = 0; 1296eb72584Smrg while (x > 0) { 1306eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1316eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1326eb72584Smrg xoff += 32; 1336eb72584Smrg x--; 1346eb72584Smrg } 1356eb72584Smrg if (leftover > 0) { 1366eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1376eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1386eb72584Smrg } 1396eb72584Smrg src += sstep; 1406eb72584Smrg dst += dstep; 1416eb72584Smrg } 1426eb72584Smrg } else { 1436eb72584Smrg /* same thing but right to left */ 1446eb72584Smrg for (line = 0; line < h; line++) { 1456eb72584Smrg x = xsteps; 1466eb72584Smrg xoff = xsteps << 5; 1476eb72584Smrg if (leftover > 0) { 1486eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1496eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1506eb72584Smrg } 1516eb72584Smrg xoff -= 32; 1526eb72584Smrg while (x > 0) { 1536eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1546eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1556eb72584Smrg xoff -= 32; 1566eb72584Smrg x--; 1576eb72584Smrg } 1586eb72584Smrg src += sstep; 1596eb72584Smrg dst += dstep; 1606eb72584Smrg } 1616eb72584Smrg } 1626eb72584Smrg LEAVE; 1636eb72584Smrg} 1646eb72584Smrg 1656eb72584Smrgstatic void 1666eb72584SmrgTcxDoneCopy(PixmapPtr pDstPixmap) 1676eb72584Smrg{ 1686eb72584Smrg ENTER; 1696eb72584Smrg LEAVE; 1706eb72584Smrg} 1716eb72584Smrg 1726eb72584Smrgstatic Bool 1736eb72584SmrgTcxPrepareSolid( 1746eb72584Smrg PixmapPtr pPixmap, 1756eb72584Smrg int alu, 1766eb72584Smrg Pixel planemask, 1776eb72584Smrg Pixel fg) 1786eb72584Smrg{ 1796eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1806eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 1816eb72584Smrg 1826eb72584Smrg ENTER; 1836eb72584Smrg /* weed out the cases we can't accelerate */ 1846eb72584Smrg if (alu != GXcopy) 1856eb72584Smrg return FALSE; 1866eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 1876eb72584Smrg return FALSE; 1886eb72584Smrg if (exaGetPixmapOffset(pPixmap) != 0) 1896eb72584Smrg return FALSE; 1906eb72584Smrg pTcx->fg = (fg & 0x00ffffff); 1916eb72584Smrg if (pTcx->pitchshift == 0) { 1926eb72584Smrg pTcx->fg |= 0x30000000; 1936eb72584Smrg } else 1946eb72584Smrg pTcx->fg |= 0x33000000; 1956eb72584Smrg#ifdef DEBUG 1966eb72584Smrg xf86Msg(X_ERROR, "fg: %08x\n", fg); 1976eb72584Smrg#endif 1986eb72584Smrg LEAVE; 1996eb72584Smrg return TRUE; 2006eb72584Smrg} 2016eb72584Smrg 2026eb72584Smrgstatic void 2036eb72584SmrgTcxSolid( 2046eb72584Smrg PixmapPtr pPixmap, 2056eb72584Smrg int x1, 2066eb72584Smrg int y1, 2076eb72584Smrg int x2, 2086eb72584Smrg int y2) 2096eb72584Smrg{ 2106eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 2116eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 2126eb72584Smrg int dpitch, dst, line, fullsteps, i; 2136eb72584Smrg uint64_t cmd, rcmd, lcmd, tmpl; 2146eb72584Smrg uint32_t pmask; 2156eb72584Smrg 2166eb72584Smrg dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift; 2176eb72584Smrg dst = x1 + y1 * dpitch; 2186eb72584Smrg 2196eb72584Smrg tmpl = ((uint64_t)pTcx->fg) << 32; 2206eb72584Smrg 2216eb72584Smrg /* 2226eb72584Smrg * thanks to the funky architecture of the tcx's stipple 'engine' we have 2236eb72584Smrg * to deal with two different cases: 2246eb72584Smrg * - the whole width of the rectangle fits into a single 32 pixel aligned 2256eb72584Smrg * unit of 32 pixels 2266eb72584Smrg * - the first and the last 32bit unit may or may not contain less than 2276eb72584Smrg * 32 pixels 2286eb72584Smrg */ 2296eb72584Smrg x2 -= 1; 2306eb72584Smrg if ((x1 & 0xffe0) == (x2 & 0xffe0)) { 2316eb72584Smrg /* the whole width fits in one 32 pixel write */ 2326eb72584Smrg 2336eb72584Smrg /* first zero out pixels on the right */ 2346eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2356eb72584Smrg /* then mask out pixels on the left */ 2366eb72584Smrg pmask &= (0xffffffff >> (x1 & 0x1f)); 2376eb72584Smrg#ifdef DEBUG 2386eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2396eb72584Smrg#endif 2406eb72584Smrg cmd = tmpl | (uint64_t)pmask; 2416eb72584Smrg dst &= 0xffffffe0; 2426eb72584Smrg for (line = y1; line < y2; line++) { 2436eb72584Smrg pTcx->rstip[dst] = cmd; 2446eb72584Smrg dst += dpitch; 2456eb72584Smrg } 2466eb72584Smrg } else { 2476eb72584Smrg /* at least two writes per line */ 2486eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2496eb72584Smrg rcmd = tmpl | (uint64_t)pmask; 2506eb72584Smrg pmask = 0xffffffff >> (x1 & 0x1f); 2516eb72584Smrg lcmd = tmpl | (uint64_t)pmask; 2526eb72584Smrg cmd = tmpl | 0xffffffffLL; 2536eb72584Smrg dst &= 0xffffffe0; 2546eb72584Smrg fullsteps = ((x2 >> 5) - (x1 >> 5)); 2556eb72584Smrg#ifdef DEBUG 2566eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2576eb72584Smrg xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps); 2586eb72584Smrg#endif 2596eb72584Smrg fullsteps = fullsteps << 5; 2606eb72584Smrg for (line = y1; line < y2; line++) { 2616eb72584Smrg pTcx->rstip[dst] = lcmd; 2626eb72584Smrg for (i = 32; i < fullsteps; i+= 32) 2636eb72584Smrg pTcx->rstip[dst + i] = cmd; 2646eb72584Smrg pTcx->rstip[dst + i] = rcmd; 2656eb72584Smrg dst += dpitch; 2666eb72584Smrg } 2676eb72584Smrg } 2686eb72584Smrg} 2696eb72584Smrg 2706eb72584Smrg/* 2716eb72584Smrg * Memcpy-based UTS. 2726eb72584Smrg */ 2736eb72584Smrgstatic Bool 2746eb72584SmrgTcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 2756eb72584Smrg char *src, int src_pitch) 2766eb72584Smrg{ 2776eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 2786eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 2796eb72584Smrg char *dst = pTcx->fb + exaGetPixmapOffset(pDst); 2806eb72584Smrg int dst_pitch = exaGetPixmapPitch(pDst); 2816eb72584Smrg 2826eb72584Smrg int bpp = pDst->drawable.bitsPerPixel; 2836eb72584Smrg int cpp = (bpp + 7) / 8; 2846eb72584Smrg int wBytes = w * cpp; 2856eb72584Smrg 2866eb72584Smrg ENTER; 2876eb72584Smrg dst += (x * cpp) + (y * dst_pitch); 2886eb72584Smrg 2896eb72584Smrg while (h--) { 2906eb72584Smrg memcpy(dst, src, wBytes); 2916eb72584Smrg src += src_pitch; 2926eb72584Smrg dst += dst_pitch; 2936eb72584Smrg } 2946eb72584Smrg LEAVE; 2956eb72584Smrg return TRUE; 2966eb72584Smrg} 2976eb72584Smrg 2986eb72584Smrg/* 2996eb72584Smrg * Memcpy-based DFS. 3006eb72584Smrg */ 3016eb72584Smrgstatic Bool 3026eb72584SmrgTcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 3036eb72584Smrg char *dst, int dst_pitch) 3046eb72584Smrg{ 3056eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 3066eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3076eb72584Smrg char *src = pTcx->fb + exaGetPixmapOffset(pSrc); 3086eb72584Smrg int src_pitch = exaGetPixmapPitch(pSrc); 3096eb72584Smrg 3106eb72584Smrg int bpp = pSrc->drawable.bitsPerPixel; 3116eb72584Smrg int cpp = (bpp + 7) / 8; 3126eb72584Smrg int wBytes = w * cpp; 3136eb72584Smrg 3146eb72584Smrg ENTER; 3156eb72584Smrg src += (x * cpp) + (y * src_pitch); 3166eb72584Smrg 3176eb72584Smrg while (h--) { 3186eb72584Smrg memcpy(dst, src, wBytes); 3196eb72584Smrg src += src_pitch; 3206eb72584Smrg dst += dst_pitch; 3216eb72584Smrg } 3226eb72584Smrg LEAVE; 3236eb72584Smrg return TRUE; 3246eb72584Smrg} 3256eb72584Smrg 3266eb72584SmrgBool 3276eb72584SmrgTcxInitAccel(ScreenPtr pScreen) 3286eb72584Smrg{ 3296eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 3306eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3316eb72584Smrg ExaDriverPtr pExa; 3326eb72584Smrg 3336eb72584Smrg pExa = exaDriverAlloc(); 3346eb72584Smrg if (!pExa) 3356eb72584Smrg return FALSE; 3366eb72584Smrg 3376eb72584Smrg pTcx->pExa = pExa; 3386eb72584Smrg 3396eb72584Smrg pExa->exa_major = EXA_VERSION_MAJOR; 3406eb72584Smrg pExa->exa_minor = EXA_VERSION_MINOR; 3416eb72584Smrg 3426eb72584Smrg /* 3436eb72584Smrg * The S24 can display both 8 and 24bit data at the same time, and in 3446eb72584Smrg * 24bit we can choose between gamma corrected ad direct. No idea how that 3456eb72584Smrg * would map to EXA - we'd have to pick the right framebuffer to draw into 3466eb72584Smrg * and Solid() would need to know what kind of pixels to write 3476eb72584Smrg */ 3486eb72584Smrg pExa->memoryBase = pTcx->fb; 3496eb72584Smrg if (pScrn->depth == 8) { 3506eb72584Smrg pExa->memorySize = 1024 * 1024; 3516eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height; 3526eb72584Smrg pExa->pixmapOffsetAlign = 1; 3536eb72584Smrg pExa->pixmapPitchAlign = 1; 3546eb72584Smrg } else { 3556eb72584Smrg pExa->memorySize = 1024 * 1024 * 4; 3566eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4; 3576eb72584Smrg pExa->pixmapOffsetAlign = 4; 3586eb72584Smrg pExa->pixmapPitchAlign = 4; 3596eb72584Smrg } 3606eb72584Smrg 3616eb72584Smrg pExa->flags = EXA_OFFSCREEN_PIXMAPS; 3626eb72584Smrg 3636eb72584Smrg pExa->maxX = 2048; 3646eb72584Smrg pExa->maxY = 2048; /* dummy, available VRAM is the limit */ 3656eb72584Smrg 3666eb72584Smrg pExa->MarkSync = TcxMarkSync; 3676eb72584Smrg pExa->WaitMarker = TcxWaitMarker; 3686eb72584Smrg 3696eb72584Smrg pExa->PrepareSolid = TcxPrepareSolid; 3706eb72584Smrg pExa->Solid = TcxSolid; 3716eb72584Smrg pExa->DoneSolid = TcxDoneCopy; 3726eb72584Smrg 3736eb72584Smrg pExa->PrepareCopy = TcxPrepareCopy; 3746eb72584Smrg pExa->Copy = TcxCopy; 3756eb72584Smrg pExa->DoneCopy = TcxDoneCopy; 3766eb72584Smrg 3776eb72584Smrg /* EXA hits more optimized paths when it does not have to fallback because 3786eb72584Smrg * of missing UTS/DFS, hook memcpy-based UTS/DFS. 3796eb72584Smrg */ 3806eb72584Smrg pExa->UploadToScreen = TcxUploadToScreen; 3816eb72584Smrg pExa->DownloadFromScreen = TcxDownloadFromScreen; 3826eb72584Smrg 3836eb72584Smrg return exaDriverInit(pScreen, pExa); 3846eb72584Smrg} 385