tcx_accel.c revision 4525cf0b
16eb72584Smrg/* 26eb72584Smrg * TCX framebuffer - hardware acceleration. 36eb72584Smrg * 46eb72584Smrg * Copyright (C) 2009 Michael Lorenz 56eb72584Smrg * 66eb72584Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy 76eb72584Smrg * of this software and associated documentation files (the "Software"), to deal 86eb72584Smrg * in the Software without restriction, including without limitation the rights 96eb72584Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 106eb72584Smrg * copies of the Software, and to permit persons to whom the Software is 116eb72584Smrg * furnished to do so, subject to the following conditions: 126eb72584Smrg * 136eb72584Smrg * The above copyright notice and this permission notice shall be included in 146eb72584Smrg * all copies or substantial portions of the Software. 156eb72584Smrg * 166eb72584Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 176eb72584Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 186eb72584Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 196eb72584Smrg * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 206eb72584Smrg * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 216eb72584Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 226eb72584Smrg */ 236eb72584Smrg 244525cf0bSmacallan/* $NetBSD: tcx_accel.c,v 1.9 2014/07/08 17:05:26 macallan Exp $ */ 256eb72584Smrg 266eb72584Smrg#include <sys/types.h> 276eb72584Smrg 286eb72584Smrg#include "tcx.h" 296eb72584Smrg 306eb72584Smrg#ifdef DEBUG 316eb72584Smrg#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 326eb72584Smrg#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 336eb72584Smrg#else 346eb72584Smrg#define ENTER 356eb72584Smrg#define LEAVE 366eb72584Smrg#endif 376eb72584Smrg 386eb72584Smrgstatic void 396eb72584SmrgTcxWaitMarker(ScreenPtr pScreenInfo, int Marker) 406eb72584Smrg{ 416eb72584Smrg ENTER; 426eb72584Smrg /* do nothing */ 436eb72584Smrg} 446eb72584Smrg 456eb72584Smrgstatic int 466eb72584SmrgTcxMarkSync(ScreenPtr pScreenInfo) 476eb72584Smrg{ 486eb72584Smrg ENTER; 496eb72584Smrg return 0; 506eb72584Smrg} 516eb72584Smrg 526eb72584Smrgstatic Bool 536eb72584SmrgTcxPrepareCopy 546eb72584Smrg( 556eb72584Smrg PixmapPtr pSrcPixmap, 566eb72584Smrg PixmapPtr pDstPixmap, 576eb72584Smrg int xdir, 586eb72584Smrg int ydir, 596eb72584Smrg int alu, 606eb72584Smrg Pixel planemask 616eb72584Smrg) 626eb72584Smrg{ 636eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 646eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 656eb72584Smrg 666eb72584Smrg ENTER; 676eb72584Smrg /* weed out the cases we can't accelerate */ 686eb72584Smrg#ifdef DEBUG 696eb72584Smrg xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask); 706eb72584Smrg#endif 716eb72584Smrg if (alu != GXcopy) 726eb72584Smrg return FALSE; 736eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 746eb72584Smrg return FALSE; 756eb72584Smrg 766eb72584Smrg pTcx->xdir = xdir; 776eb72584Smrg pTcx->ydir = ydir; 786eb72584Smrg pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift; 796eb72584Smrg pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift; 806eb72584Smrg LEAVE; 816eb72584Smrg return TRUE; 826eb72584Smrg} 836eb72584Smrg 846eb72584Smrgstatic void 856eb72584SmrgTcxCopy 866eb72584Smrg( 876eb72584Smrg PixmapPtr pDstPixmap, 886eb72584Smrg int srcX, 896eb72584Smrg int srcY, 906eb72584Smrg int dstX, 916eb72584Smrg int dstY, 926eb72584Smrg int w, 936eb72584Smrg int h 946eb72584Smrg) 956eb72584Smrg{ 966eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 976eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 986eb72584Smrg uint64_t cmd, lcmd; 996eb72584Smrg int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff; 1006eb72584Smrg int doff; 1016eb72584Smrg 1026eb72584Smrg ENTER; 1036eb72584Smrg leftover = w & 0x1f; 1046eb72584Smrg if (leftover > 0) 1056eb72584Smrg lcmd = 0x3000000000000000LL | (leftover - 1) << 24; 1066eb72584Smrg 1076eb72584Smrg 1086eb72584Smrg doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift; 1096eb72584Smrg dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift; 1106eb72584Smrg src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff; 1116eb72584Smrg dst = dstX + dstY * dpitch + doff; 1126eb72584Smrg 1136eb72584Smrg if (pTcx->ydir < 0) { 1146eb72584Smrg src += (h - 1) * pTcx->srcpitch; 1156eb72584Smrg dst += (h - 1) * dpitch; 1166eb72584Smrg sstep = 0 - pTcx->srcpitch; 1176eb72584Smrg dstep = 0 - dpitch; 1186eb72584Smrg } else { 1196eb72584Smrg sstep = pTcx->srcpitch; 1206eb72584Smrg dstep = dpitch; 1216eb72584Smrg } 1226eb72584Smrg 1236eb72584Smrg xsteps = w >> 5; 1246eb72584Smrg 1256eb72584Smrg if ((pTcx->xdir > 0) || (w < 33)) { 1266eb72584Smrg for (line = 0; line < h; line++) { 1276eb72584Smrg x = xsteps; 1286eb72584Smrg xoff = 0; 1296eb72584Smrg while (x > 0) { 1306eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1316eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1326eb72584Smrg xoff += 32; 1336eb72584Smrg x--; 1346eb72584Smrg } 1356eb72584Smrg if (leftover > 0) { 1366eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1376eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1386eb72584Smrg } 1396eb72584Smrg src += sstep; 1406eb72584Smrg dst += dstep; 1416eb72584Smrg } 1426eb72584Smrg } else { 1436eb72584Smrg /* same thing but right to left */ 1446eb72584Smrg for (line = 0; line < h; line++) { 1456eb72584Smrg x = xsteps; 1466eb72584Smrg xoff = xsteps << 5; 1476eb72584Smrg if (leftover > 0) { 1486eb72584Smrg cmd = lcmd | (uint64_t)(src + xoff); 1496eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1506eb72584Smrg } 1516eb72584Smrg xoff -= 32; 1526eb72584Smrg while (x > 0) { 1536eb72584Smrg cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 1546eb72584Smrg pTcx->rblit[dst + xoff] = cmd; 1556eb72584Smrg xoff -= 32; 1566eb72584Smrg x--; 1576eb72584Smrg } 1586eb72584Smrg src += sstep; 1596eb72584Smrg dst += dstep; 1606eb72584Smrg } 1616eb72584Smrg } 1626eb72584Smrg LEAVE; 1636eb72584Smrg} 1646eb72584Smrg 1656eb72584Smrgstatic void 1666eb72584SmrgTcxDoneCopy(PixmapPtr pDstPixmap) 1676eb72584Smrg{ 1686eb72584Smrg ENTER; 1696eb72584Smrg LEAVE; 1706eb72584Smrg} 1716eb72584Smrg 1726eb72584Smrgstatic Bool 1736eb72584SmrgTcxPrepareSolid( 1746eb72584Smrg PixmapPtr pPixmap, 1756eb72584Smrg int alu, 1766eb72584Smrg Pixel planemask, 1776eb72584Smrg Pixel fg) 1786eb72584Smrg{ 1796eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 1806eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 1814525cf0bSmacallan uint32_t hwfg; 1826eb72584Smrg 1836eb72584Smrg ENTER; 1844525cf0bSmacallan 1856eb72584Smrg /* weed out the cases we can't accelerate */ 1864525cf0bSmacallan if (pTcx->HasStipROP) { 1874525cf0bSmacallan hwfg = alu << 28; 1884525cf0bSmacallan } else if (alu == GXcopy) { 1894525cf0bSmacallan hwfg = 0x30000000; 1904525cf0bSmacallan } else 1916eb72584Smrg return FALSE; 1924525cf0bSmacallan 1936eb72584Smrg if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 1946eb72584Smrg return FALSE; 1956eb72584Smrg if (exaGetPixmapOffset(pPixmap) != 0) 1966eb72584Smrg return FALSE; 1976eb72584Smrg pTcx->fg = (fg & 0x00ffffff); 1984525cf0bSmacallan /* set colour space ID if we're in 24bit mode */ 1994525cf0bSmacallan if (pTcx->pitchshift != 0) 2004525cf0bSmacallan hwfg |= 0x03000000; 2014525cf0bSmacallan pTcx->fg |= hwfg; 2026eb72584Smrg#ifdef DEBUG 2034525cf0bSmacallan xf86Msg(X_ERROR, "fg: %08x\n", hwfg); 2046eb72584Smrg#endif 2056eb72584Smrg LEAVE; 2066eb72584Smrg return TRUE; 2076eb72584Smrg} 2086eb72584Smrg 2096eb72584Smrgstatic void 2106eb72584SmrgTcxSolid( 2116eb72584Smrg PixmapPtr pPixmap, 2126eb72584Smrg int x1, 2136eb72584Smrg int y1, 2146eb72584Smrg int x2, 2156eb72584Smrg int y2) 2166eb72584Smrg{ 2176eb72584Smrg ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 2186eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 2196eb72584Smrg int dpitch, dst, line, fullsteps, i; 2206eb72584Smrg uint64_t cmd, rcmd, lcmd, tmpl; 2216eb72584Smrg uint32_t pmask; 2226eb72584Smrg 2236eb72584Smrg dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift; 2246eb72584Smrg dst = x1 + y1 * dpitch; 2256eb72584Smrg 2266eb72584Smrg tmpl = ((uint64_t)pTcx->fg) << 32; 2276eb72584Smrg 2286eb72584Smrg /* 2296eb72584Smrg * thanks to the funky architecture of the tcx's stipple 'engine' we have 2306eb72584Smrg * to deal with two different cases: 2316eb72584Smrg * - the whole width of the rectangle fits into a single 32 pixel aligned 2326eb72584Smrg * unit of 32 pixels 2336eb72584Smrg * - the first and the last 32bit unit may or may not contain less than 2346eb72584Smrg * 32 pixels 2356eb72584Smrg */ 2366eb72584Smrg x2 -= 1; 2376eb72584Smrg if ((x1 & 0xffe0) == (x2 & 0xffe0)) { 2386eb72584Smrg /* the whole width fits in one 32 pixel write */ 2396eb72584Smrg 2406eb72584Smrg /* first zero out pixels on the right */ 2416eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2426eb72584Smrg /* then mask out pixels on the left */ 2436eb72584Smrg pmask &= (0xffffffff >> (x1 & 0x1f)); 2446eb72584Smrg#ifdef DEBUG 2456eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2466eb72584Smrg#endif 2476eb72584Smrg cmd = tmpl | (uint64_t)pmask; 2486eb72584Smrg dst &= 0xffffffe0; 2496eb72584Smrg for (line = y1; line < y2; line++) { 2506eb72584Smrg pTcx->rstip[dst] = cmd; 2516eb72584Smrg dst += dpitch; 2526eb72584Smrg } 2536eb72584Smrg } else { 2546eb72584Smrg /* at least two writes per line */ 2556eb72584Smrg pmask = 0xffffffff << (31 - (x2 & 0x1f)); 2566eb72584Smrg rcmd = tmpl | (uint64_t)pmask; 2576eb72584Smrg pmask = 0xffffffff >> (x1 & 0x1f); 2586eb72584Smrg lcmd = tmpl | (uint64_t)pmask; 2596eb72584Smrg cmd = tmpl | 0xffffffffLL; 2606eb72584Smrg dst &= 0xffffffe0; 2616eb72584Smrg fullsteps = ((x2 >> 5) - (x1 >> 5)); 2626eb72584Smrg#ifdef DEBUG 2636eb72584Smrg xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 2646eb72584Smrg xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps); 2656eb72584Smrg#endif 2666eb72584Smrg fullsteps = fullsteps << 5; 2676eb72584Smrg for (line = y1; line < y2; line++) { 2686eb72584Smrg pTcx->rstip[dst] = lcmd; 2696eb72584Smrg for (i = 32; i < fullsteps; i+= 32) 2706eb72584Smrg pTcx->rstip[dst + i] = cmd; 2716eb72584Smrg pTcx->rstip[dst + i] = rcmd; 2726eb72584Smrg dst += dpitch; 2736eb72584Smrg } 2746eb72584Smrg } 2756eb72584Smrg} 2766eb72584Smrg 2776eb72584Smrg/* 2786eb72584Smrg * Memcpy-based UTS. 2796eb72584Smrg */ 2806eb72584Smrgstatic Bool 2816eb72584SmrgTcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 2826eb72584Smrg char *src, int src_pitch) 2836eb72584Smrg{ 2846eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 2856eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 2866eb72584Smrg char *dst = pTcx->fb + exaGetPixmapOffset(pDst); 2876eb72584Smrg int dst_pitch = exaGetPixmapPitch(pDst); 2886eb72584Smrg 2896eb72584Smrg int bpp = pDst->drawable.bitsPerPixel; 2906eb72584Smrg int cpp = (bpp + 7) / 8; 2916eb72584Smrg int wBytes = w * cpp; 2926eb72584Smrg 2936eb72584Smrg ENTER; 2946eb72584Smrg dst += (x * cpp) + (y * dst_pitch); 2956eb72584Smrg 2966eb72584Smrg while (h--) { 2976eb72584Smrg memcpy(dst, src, wBytes); 2986eb72584Smrg src += src_pitch; 2996eb72584Smrg dst += dst_pitch; 3006eb72584Smrg } 3016eb72584Smrg LEAVE; 3026eb72584Smrg return TRUE; 3036eb72584Smrg} 3046eb72584Smrg 3056eb72584Smrg/* 3066eb72584Smrg * Memcpy-based DFS. 3076eb72584Smrg */ 3086eb72584Smrgstatic Bool 3096eb72584SmrgTcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 3106eb72584Smrg char *dst, int dst_pitch) 3116eb72584Smrg{ 3126eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 3136eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3146eb72584Smrg char *src = pTcx->fb + exaGetPixmapOffset(pSrc); 3156eb72584Smrg int src_pitch = exaGetPixmapPitch(pSrc); 3166eb72584Smrg 3176eb72584Smrg int bpp = pSrc->drawable.bitsPerPixel; 3186eb72584Smrg int cpp = (bpp + 7) / 8; 3196eb72584Smrg int wBytes = w * cpp; 3206eb72584Smrg 3216eb72584Smrg ENTER; 3226eb72584Smrg src += (x * cpp) + (y * src_pitch); 3236eb72584Smrg 3246eb72584Smrg while (h--) { 3256eb72584Smrg memcpy(dst, src, wBytes); 3266eb72584Smrg src += src_pitch; 3276eb72584Smrg dst += dst_pitch; 3286eb72584Smrg } 3296eb72584Smrg LEAVE; 3306eb72584Smrg return TRUE; 3316eb72584Smrg} 3326eb72584Smrg 3336eb72584SmrgBool 3346eb72584SmrgTcxInitAccel(ScreenPtr pScreen) 3356eb72584Smrg{ 3366eb72584Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 3376eb72584Smrg TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 3386eb72584Smrg ExaDriverPtr pExa; 3396eb72584Smrg 3406eb72584Smrg pExa = exaDriverAlloc(); 3416eb72584Smrg if (!pExa) 3426eb72584Smrg return FALSE; 3436eb72584Smrg 3446eb72584Smrg pTcx->pExa = pExa; 3456eb72584Smrg 3466eb72584Smrg pExa->exa_major = EXA_VERSION_MAJOR; 3476eb72584Smrg pExa->exa_minor = EXA_VERSION_MINOR; 3486eb72584Smrg 3496eb72584Smrg /* 3506eb72584Smrg * The S24 can display both 8 and 24bit data at the same time, and in 35159d6bc2bSmacallan * 24bit we can choose between gamma corrected and direct. No idea how that 3526eb72584Smrg * would map to EXA - we'd have to pick the right framebuffer to draw into 3536eb72584Smrg * and Solid() would need to know what kind of pixels to write 3546eb72584Smrg */ 3556eb72584Smrg pExa->memoryBase = pTcx->fb; 3566eb72584Smrg if (pScrn->depth == 8) { 35759d6bc2bSmacallan pExa->memorySize = pTcx->vramsize; 3586eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height; 3596eb72584Smrg pExa->pixmapOffsetAlign = 1; 3606eb72584Smrg pExa->pixmapPitchAlign = 1; 3616eb72584Smrg } else { 3626eb72584Smrg pExa->memorySize = 1024 * 1024 * 4; 3636eb72584Smrg pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4; 3646eb72584Smrg pExa->pixmapOffsetAlign = 4; 3656eb72584Smrg pExa->pixmapPitchAlign = 4; 3666eb72584Smrg } 3676eb72584Smrg 3686eb72584Smrg pExa->flags = EXA_OFFSCREEN_PIXMAPS; 3696eb72584Smrg 3706eb72584Smrg pExa->maxX = 2048; 3716eb72584Smrg pExa->maxY = 2048; /* dummy, available VRAM is the limit */ 3726eb72584Smrg 3736eb72584Smrg pExa->MarkSync = TcxMarkSync; 3746eb72584Smrg pExa->WaitMarker = TcxWaitMarker; 3756eb72584Smrg 3766eb72584Smrg pExa->PrepareSolid = TcxPrepareSolid; 3776eb72584Smrg pExa->Solid = TcxSolid; 3786eb72584Smrg pExa->DoneSolid = TcxDoneCopy; 3796eb72584Smrg 3806eb72584Smrg pExa->PrepareCopy = TcxPrepareCopy; 3816eb72584Smrg pExa->Copy = TcxCopy; 3826eb72584Smrg pExa->DoneCopy = TcxDoneCopy; 3836eb72584Smrg 3846eb72584Smrg /* EXA hits more optimized paths when it does not have to fallback because 3856eb72584Smrg * of missing UTS/DFS, hook memcpy-based UTS/DFS. 3866eb72584Smrg */ 3876eb72584Smrg pExa->UploadToScreen = TcxUploadToScreen; 3886eb72584Smrg pExa->DownloadFromScreen = TcxDownloadFromScreen; 3896eb72584Smrg 3906eb72584Smrg return exaDriverInit(pScreen, pExa); 3916eb72584Smrg} 392