tcx_accel.c revision 6eb72584
16eb72584Smrg/*
26eb72584Smrg * TCX framebuffer - hardware acceleration.
36eb72584Smrg *
46eb72584Smrg * Copyright (C) 2009 Michael Lorenz
56eb72584Smrg *
66eb72584Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy
76eb72584Smrg * of this software and associated documentation files (the "Software"), to deal
86eb72584Smrg * in the Software without restriction, including without limitation the rights
96eb72584Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
106eb72584Smrg * copies of the Software, and to permit persons to whom the Software is
116eb72584Smrg * furnished to do so, subject to the following conditions:
126eb72584Smrg *
136eb72584Smrg * The above copyright notice and this permission notice shall be included in
146eb72584Smrg * all copies or substantial portions of the Software.
156eb72584Smrg *
166eb72584Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
176eb72584Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
186eb72584Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
196eb72584Smrg * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
206eb72584Smrg * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
216eb72584Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
226eb72584Smrg */
236eb72584Smrg
246eb72584Smrg/* $NetBSD: tcx_accel.c,v 1.7 2013/06/04 22:58:31 mrg Exp $ */
256eb72584Smrg
266eb72584Smrg#include <sys/types.h>
276eb72584Smrg
286eb72584Smrg#include "tcx.h"
296eb72584Smrg
306eb72584Smrg#ifdef DEBUG
316eb72584Smrg#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
326eb72584Smrg#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
336eb72584Smrg#else
346eb72584Smrg#define ENTER
356eb72584Smrg#define LEAVE
366eb72584Smrg#endif
376eb72584Smrg
386eb72584Smrgstatic void
396eb72584SmrgTcxWaitMarker(ScreenPtr pScreenInfo, int Marker)
406eb72584Smrg{
416eb72584Smrg    ENTER;
426eb72584Smrg    /* do nothing */
436eb72584Smrg}
446eb72584Smrg
456eb72584Smrgstatic int
466eb72584SmrgTcxMarkSync(ScreenPtr pScreenInfo)
476eb72584Smrg{
486eb72584Smrg    ENTER;
496eb72584Smrg    return 0;
506eb72584Smrg}
516eb72584Smrg
526eb72584Smrgstatic Bool
536eb72584SmrgTcxPrepareCopy
546eb72584Smrg(
556eb72584Smrg    PixmapPtr pSrcPixmap,
566eb72584Smrg    PixmapPtr pDstPixmap,
576eb72584Smrg    int       xdir,
586eb72584Smrg    int       ydir,
596eb72584Smrg    int       alu,
606eb72584Smrg    Pixel     planemask
616eb72584Smrg)
626eb72584Smrg{
636eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
646eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
656eb72584Smrg
666eb72584Smrg    ENTER;
676eb72584Smrg    /* weed out the cases we can't accelerate */
686eb72584Smrg#ifdef DEBUG
696eb72584Smrg    xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask);
706eb72584Smrg#endif
716eb72584Smrg    if (alu != GXcopy)
726eb72584Smrg    	return FALSE;
736eb72584Smrg    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
746eb72584Smrg	return FALSE;
756eb72584Smrg
766eb72584Smrg    pTcx->xdir = xdir;
776eb72584Smrg    pTcx->ydir = ydir;
786eb72584Smrg    pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift;
796eb72584Smrg    pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift;
806eb72584Smrg    LEAVE;
816eb72584Smrg    return TRUE;
826eb72584Smrg}
836eb72584Smrg
846eb72584Smrgstatic void
856eb72584SmrgTcxCopy
866eb72584Smrg(
876eb72584Smrg    PixmapPtr pDstPixmap,
886eb72584Smrg    int       srcX,
896eb72584Smrg    int       srcY,
906eb72584Smrg    int       dstX,
916eb72584Smrg    int       dstY,
926eb72584Smrg    int       w,
936eb72584Smrg    int       h
946eb72584Smrg)
956eb72584Smrg{
966eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
976eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
986eb72584Smrg    uint64_t cmd, lcmd;
996eb72584Smrg    int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff;
1006eb72584Smrg    int doff;
1016eb72584Smrg
1026eb72584Smrg    ENTER;
1036eb72584Smrg    leftover = w & 0x1f;
1046eb72584Smrg    if (leftover > 0)
1056eb72584Smrg	    lcmd = 0x3000000000000000LL | (leftover - 1) << 24;
1066eb72584Smrg
1076eb72584Smrg
1086eb72584Smrg    doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift;
1096eb72584Smrg    dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift;
1106eb72584Smrg    src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff;
1116eb72584Smrg    dst = dstX + dstY * dpitch + doff;
1126eb72584Smrg
1136eb72584Smrg    if (pTcx->ydir < 0) {
1146eb72584Smrg	src += (h - 1) * pTcx->srcpitch;
1156eb72584Smrg	dst += (h - 1) * dpitch;
1166eb72584Smrg	sstep = 0 - pTcx->srcpitch;
1176eb72584Smrg	dstep = 0 - dpitch;
1186eb72584Smrg    } else {
1196eb72584Smrg	sstep = pTcx->srcpitch;
1206eb72584Smrg	dstep = dpitch;
1216eb72584Smrg    }
1226eb72584Smrg
1236eb72584Smrg    xsteps = w >> 5;
1246eb72584Smrg
1256eb72584Smrg    if ((pTcx->xdir > 0) || (w < 33)) {
1266eb72584Smrg	for (line = 0; line < h; line++) {
1276eb72584Smrg	    x = xsteps;
1286eb72584Smrg	    xoff = 0;
1296eb72584Smrg	    while (x > 0) {
1306eb72584Smrg		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
1316eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1326eb72584Smrg		xoff += 32;
1336eb72584Smrg		x--;
1346eb72584Smrg	    }
1356eb72584Smrg	    if (leftover > 0) {
1366eb72584Smrg		cmd = lcmd | (uint64_t)(src + xoff);
1376eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1386eb72584Smrg	    }
1396eb72584Smrg	    src += sstep;
1406eb72584Smrg	    dst += dstep;
1416eb72584Smrg	}
1426eb72584Smrg    } else {
1436eb72584Smrg	/* same thing but right to left */
1446eb72584Smrg	for (line = 0; line < h; line++) {
1456eb72584Smrg	    x = xsteps;
1466eb72584Smrg	    xoff = xsteps << 5;
1476eb72584Smrg	    if (leftover > 0) {
1486eb72584Smrg		cmd = lcmd | (uint64_t)(src + xoff);
1496eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1506eb72584Smrg	    }
1516eb72584Smrg	    xoff -= 32;
1526eb72584Smrg	    while (x > 0) {
1536eb72584Smrg		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
1546eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1556eb72584Smrg		xoff -= 32;
1566eb72584Smrg		x--;
1576eb72584Smrg	    }
1586eb72584Smrg	    src += sstep;
1596eb72584Smrg	    dst += dstep;
1606eb72584Smrg	}
1616eb72584Smrg    }
1626eb72584Smrg    LEAVE;
1636eb72584Smrg}
1646eb72584Smrg
1656eb72584Smrgstatic void
1666eb72584SmrgTcxDoneCopy(PixmapPtr pDstPixmap)
1676eb72584Smrg{
1686eb72584Smrg    ENTER;
1696eb72584Smrg    LEAVE;
1706eb72584Smrg}
1716eb72584Smrg
1726eb72584Smrgstatic Bool
1736eb72584SmrgTcxPrepareSolid(
1746eb72584Smrg    PixmapPtr pPixmap,
1756eb72584Smrg    int alu,
1766eb72584Smrg    Pixel planemask,
1776eb72584Smrg    Pixel fg)
1786eb72584Smrg{
1796eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
1806eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
1816eb72584Smrg
1826eb72584Smrg    ENTER;
1836eb72584Smrg    /* weed out the cases we can't accelerate */
1846eb72584Smrg    if (alu != GXcopy)
1856eb72584Smrg    	return FALSE;
1866eb72584Smrg    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
1876eb72584Smrg	return FALSE;
1886eb72584Smrg    if (exaGetPixmapOffset(pPixmap) != 0)
1896eb72584Smrg	return FALSE;
1906eb72584Smrg    pTcx->fg = (fg & 0x00ffffff);
1916eb72584Smrg    if (pTcx->pitchshift == 0) {
1926eb72584Smrg    	pTcx->fg |= 0x30000000;
1936eb72584Smrg    } else
1946eb72584Smrg	pTcx->fg |= 0x33000000;
1956eb72584Smrg#ifdef DEBUG
1966eb72584Smrg    xf86Msg(X_ERROR, "fg: %08x\n", fg);
1976eb72584Smrg#endif
1986eb72584Smrg    LEAVE;
1996eb72584Smrg    return TRUE;
2006eb72584Smrg}
2016eb72584Smrg
2026eb72584Smrgstatic void
2036eb72584SmrgTcxSolid(
2046eb72584Smrg    PixmapPtr pPixmap,
2056eb72584Smrg    int x1,
2066eb72584Smrg    int y1,
2076eb72584Smrg    int x2,
2086eb72584Smrg    int y2)
2096eb72584Smrg{
2106eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
2116eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
2126eb72584Smrg    int dpitch, dst, line, fullsteps, i;
2136eb72584Smrg    uint64_t cmd, rcmd, lcmd, tmpl;
2146eb72584Smrg    uint32_t pmask;
2156eb72584Smrg
2166eb72584Smrg    dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift;
2176eb72584Smrg    dst = x1 + y1 * dpitch;
2186eb72584Smrg
2196eb72584Smrg    tmpl = ((uint64_t)pTcx->fg) << 32;
2206eb72584Smrg
2216eb72584Smrg    /*
2226eb72584Smrg     * thanks to the funky architecture of the tcx's stipple 'engine' we have
2236eb72584Smrg     * to deal with two different cases:
2246eb72584Smrg     * - the whole width of the rectangle fits into a single 32 pixel aligned
2256eb72584Smrg     *   unit of 32 pixels
2266eb72584Smrg     * - the first and the last 32bit unit may or may not contain less than
2276eb72584Smrg     *   32 pixels
2286eb72584Smrg     */
2296eb72584Smrg    x2 -= 1;
2306eb72584Smrg    if ((x1 & 0xffe0) == (x2 & 0xffe0)) {
2316eb72584Smrg	/* the whole width fits in one 32 pixel write */
2326eb72584Smrg
2336eb72584Smrg	/* first zero out pixels on the right */
2346eb72584Smrg	pmask = 0xffffffff << (31 - (x2 & 0x1f));
2356eb72584Smrg	/* then mask out pixels on the left */
2366eb72584Smrg	pmask &= (0xffffffff >> (x1 & 0x1f));
2376eb72584Smrg#ifdef DEBUG
2386eb72584Smrg	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
2396eb72584Smrg#endif
2406eb72584Smrg	cmd = tmpl | (uint64_t)pmask;
2416eb72584Smrg	dst &= 0xffffffe0;
2426eb72584Smrg	for (line = y1; line < y2; line++) {
2436eb72584Smrg	    pTcx->rstip[dst] = cmd;
2446eb72584Smrg	    dst += dpitch;
2456eb72584Smrg	}
2466eb72584Smrg    } else {
2476eb72584Smrg	/* at least two writes per line */
2486eb72584Smrg	pmask = 0xffffffff << (31 - (x2 & 0x1f));
2496eb72584Smrg	rcmd = tmpl | (uint64_t)pmask;
2506eb72584Smrg	pmask = 0xffffffff >> (x1 & 0x1f);
2516eb72584Smrg	lcmd = tmpl | (uint64_t)pmask;
2526eb72584Smrg	cmd = tmpl | 0xffffffffLL;
2536eb72584Smrg	dst &= 0xffffffe0;
2546eb72584Smrg	fullsteps = ((x2 >> 5) - (x1 >> 5));
2556eb72584Smrg#ifdef DEBUG
2566eb72584Smrg	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
2576eb72584Smrg	xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps);
2586eb72584Smrg#endif
2596eb72584Smrg	fullsteps = fullsteps << 5;
2606eb72584Smrg	for (line = y1; line < y2; line++) {
2616eb72584Smrg	    pTcx->rstip[dst] = lcmd;
2626eb72584Smrg	    for (i = 32; i < fullsteps; i+= 32)
2636eb72584Smrg		pTcx->rstip[dst + i] = cmd;
2646eb72584Smrg	    pTcx->rstip[dst + i] = rcmd;
2656eb72584Smrg	    dst += dpitch;
2666eb72584Smrg	}
2676eb72584Smrg    }
2686eb72584Smrg}
2696eb72584Smrg
2706eb72584Smrg/*
2716eb72584Smrg * Memcpy-based UTS.
2726eb72584Smrg */
2736eb72584Smrgstatic Bool
2746eb72584SmrgTcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
2756eb72584Smrg    char *src, int src_pitch)
2766eb72584Smrg{
2776eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
2786eb72584Smrg    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
2796eb72584Smrg    char  *dst        = pTcx->fb + exaGetPixmapOffset(pDst);
2806eb72584Smrg    int    dst_pitch  = exaGetPixmapPitch(pDst);
2816eb72584Smrg
2826eb72584Smrg    int bpp    = pDst->drawable.bitsPerPixel;
2836eb72584Smrg    int cpp    = (bpp + 7) / 8;
2846eb72584Smrg    int wBytes = w * cpp;
2856eb72584Smrg
2866eb72584Smrg    ENTER;
2876eb72584Smrg    dst += (x * cpp) + (y * dst_pitch);
2886eb72584Smrg
2896eb72584Smrg    while (h--) {
2906eb72584Smrg        memcpy(dst, src, wBytes);
2916eb72584Smrg        src += src_pitch;
2926eb72584Smrg        dst += dst_pitch;
2936eb72584Smrg    }
2946eb72584Smrg    LEAVE;
2956eb72584Smrg    return TRUE;
2966eb72584Smrg}
2976eb72584Smrg
2986eb72584Smrg/*
2996eb72584Smrg * Memcpy-based DFS.
3006eb72584Smrg */
3016eb72584Smrgstatic Bool
3026eb72584SmrgTcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
3036eb72584Smrg    char *dst, int dst_pitch)
3046eb72584Smrg{
3056eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
3066eb72584Smrg    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
3076eb72584Smrg    char  *src        = pTcx->fb + exaGetPixmapOffset(pSrc);
3086eb72584Smrg    int    src_pitch  = exaGetPixmapPitch(pSrc);
3096eb72584Smrg
3106eb72584Smrg    int bpp    = pSrc->drawable.bitsPerPixel;
3116eb72584Smrg    int cpp    = (bpp + 7) / 8;
3126eb72584Smrg    int wBytes = w * cpp;
3136eb72584Smrg
3146eb72584Smrg    ENTER;
3156eb72584Smrg    src += (x * cpp) + (y * src_pitch);
3166eb72584Smrg
3176eb72584Smrg    while (h--) {
3186eb72584Smrg        memcpy(dst, src, wBytes);
3196eb72584Smrg        src += src_pitch;
3206eb72584Smrg        dst += dst_pitch;
3216eb72584Smrg    }
3226eb72584Smrg    LEAVE;
3236eb72584Smrg    return TRUE;
3246eb72584Smrg}
3256eb72584Smrg
3266eb72584SmrgBool
3276eb72584SmrgTcxInitAccel(ScreenPtr pScreen)
3286eb72584Smrg{
3296eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
3306eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn);
3316eb72584Smrg    ExaDriverPtr pExa;
3326eb72584Smrg
3336eb72584Smrg    pExa = exaDriverAlloc();
3346eb72584Smrg    if (!pExa)
3356eb72584Smrg        return FALSE;
3366eb72584Smrg
3376eb72584Smrg    pTcx->pExa = pExa;
3386eb72584Smrg
3396eb72584Smrg    pExa->exa_major = EXA_VERSION_MAJOR;
3406eb72584Smrg    pExa->exa_minor = EXA_VERSION_MINOR;
3416eb72584Smrg
3426eb72584Smrg    /*
3436eb72584Smrg     * The S24 can display both 8 and 24bit data at the same time, and in
3446eb72584Smrg     * 24bit we can choose between gamma corrected ad direct. No idea how that
3456eb72584Smrg     * would map to EXA - we'd have to pick the right framebuffer to draw into
3466eb72584Smrg     * and Solid() would need to know what kind of pixels to write
3476eb72584Smrg     */
3486eb72584Smrg    pExa->memoryBase = pTcx->fb;
3496eb72584Smrg    if (pScrn->depth == 8) {
3506eb72584Smrg	pExa->memorySize = 1024 * 1024;
3516eb72584Smrg	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height;
3526eb72584Smrg	pExa->pixmapOffsetAlign = 1;
3536eb72584Smrg	pExa->pixmapPitchAlign = 1;
3546eb72584Smrg    } else {
3556eb72584Smrg	pExa->memorySize = 1024 * 1024 * 4;
3566eb72584Smrg	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4;
3576eb72584Smrg	pExa->pixmapOffsetAlign = 4;
3586eb72584Smrg	pExa->pixmapPitchAlign = 4;
3596eb72584Smrg    }
3606eb72584Smrg
3616eb72584Smrg    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
3626eb72584Smrg
3636eb72584Smrg    pExa->maxX = 2048;
3646eb72584Smrg    pExa->maxY = 2048;	/* dummy, available VRAM is the limit */
3656eb72584Smrg
3666eb72584Smrg    pExa->MarkSync = TcxMarkSync;
3676eb72584Smrg    pExa->WaitMarker = TcxWaitMarker;
3686eb72584Smrg
3696eb72584Smrg    pExa->PrepareSolid = TcxPrepareSolid;
3706eb72584Smrg    pExa->Solid = TcxSolid;
3716eb72584Smrg    pExa->DoneSolid = TcxDoneCopy;
3726eb72584Smrg
3736eb72584Smrg    pExa->PrepareCopy = TcxPrepareCopy;
3746eb72584Smrg    pExa->Copy = TcxCopy;
3756eb72584Smrg    pExa->DoneCopy = TcxDoneCopy;
3766eb72584Smrg
3776eb72584Smrg    /* EXA hits more optimized paths when it does not have to fallback because
3786eb72584Smrg     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
3796eb72584Smrg     */
3806eb72584Smrg    pExa->UploadToScreen = TcxUploadToScreen;
3816eb72584Smrg    pExa->DownloadFromScreen = TcxDownloadFromScreen;
3826eb72584Smrg
3836eb72584Smrg    return exaDriverInit(pScreen, pExa);
3846eb72584Smrg}
385