16eb72584Smrg/*
26eb72584Smrg * TCX framebuffer - hardware acceleration.
36eb72584Smrg *
46eb72584Smrg * Copyright (C) 2009 Michael Lorenz
56eb72584Smrg *
66eb72584Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy
76eb72584Smrg * of this software and associated documentation files (the "Software"), to deal
86eb72584Smrg * in the Software without restriction, including without limitation the rights
96eb72584Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
106eb72584Smrg * copies of the Software, and to permit persons to whom the Software is
116eb72584Smrg * furnished to do so, subject to the following conditions:
126eb72584Smrg *
136eb72584Smrg * The above copyright notice and this permission notice shall be included in
146eb72584Smrg * all copies or substantial portions of the Software.
156eb72584Smrg *
166eb72584Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
176eb72584Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
186eb72584Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
196eb72584Smrg * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
206eb72584Smrg * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
216eb72584Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
226eb72584Smrg */
236eb72584Smrg
24091cc113Smacallan/* $NetBSD: tcx_accel.c,v 1.10 2016/09/23 20:50:54 macallan Exp $ */
25091cc113Smacallan
26091cc113Smacallan#ifdef HAVE_CONFIG_H
27091cc113Smacallan#include "config.h"
28091cc113Smacallan#endif
296eb72584Smrg
306eb72584Smrg#include <sys/types.h>
316eb72584Smrg
326eb72584Smrg#include "tcx.h"
336eb72584Smrg
346eb72584Smrg#ifdef DEBUG
356eb72584Smrg#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
366eb72584Smrg#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
376eb72584Smrg#else
386eb72584Smrg#define ENTER
396eb72584Smrg#define LEAVE
406eb72584Smrg#endif
416eb72584Smrg
426eb72584Smrgstatic void
436eb72584SmrgTcxWaitMarker(ScreenPtr pScreenInfo, int Marker)
446eb72584Smrg{
456eb72584Smrg    ENTER;
466eb72584Smrg    /* do nothing */
476eb72584Smrg}
486eb72584Smrg
496eb72584Smrgstatic int
506eb72584SmrgTcxMarkSync(ScreenPtr pScreenInfo)
516eb72584Smrg{
526eb72584Smrg    ENTER;
536eb72584Smrg    return 0;
546eb72584Smrg}
556eb72584Smrg
566eb72584Smrgstatic Bool
576eb72584SmrgTcxPrepareCopy
586eb72584Smrg(
596eb72584Smrg    PixmapPtr pSrcPixmap,
606eb72584Smrg    PixmapPtr pDstPixmap,
616eb72584Smrg    int       xdir,
626eb72584Smrg    int       ydir,
636eb72584Smrg    int       alu,
646eb72584Smrg    Pixel     planemask
656eb72584Smrg)
666eb72584Smrg{
676eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
686eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
696eb72584Smrg
706eb72584Smrg    ENTER;
716eb72584Smrg    /* weed out the cases we can't accelerate */
726eb72584Smrg#ifdef DEBUG
736eb72584Smrg    xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask);
746eb72584Smrg#endif
756eb72584Smrg    if (alu != GXcopy)
766eb72584Smrg    	return FALSE;
776eb72584Smrg    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
786eb72584Smrg	return FALSE;
796eb72584Smrg
806eb72584Smrg    pTcx->xdir = xdir;
816eb72584Smrg    pTcx->ydir = ydir;
826eb72584Smrg    pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift;
836eb72584Smrg    pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift;
846eb72584Smrg    LEAVE;
856eb72584Smrg    return TRUE;
866eb72584Smrg}
876eb72584Smrg
886eb72584Smrgstatic void
896eb72584SmrgTcxCopy
906eb72584Smrg(
916eb72584Smrg    PixmapPtr pDstPixmap,
926eb72584Smrg    int       srcX,
936eb72584Smrg    int       srcY,
946eb72584Smrg    int       dstX,
956eb72584Smrg    int       dstY,
966eb72584Smrg    int       w,
976eb72584Smrg    int       h
986eb72584Smrg)
996eb72584Smrg{
1006eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
1016eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
102091cc113Smacallan    volatile uint64_t cmd, lcmd;
1036eb72584Smrg    int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff;
1046eb72584Smrg    int doff;
1056eb72584Smrg
1066eb72584Smrg    ENTER;
1076eb72584Smrg    leftover = w & 0x1f;
1086eb72584Smrg    if (leftover > 0)
1096eb72584Smrg	    lcmd = 0x3000000000000000LL | (leftover - 1) << 24;
1106eb72584Smrg
1116eb72584Smrg
1126eb72584Smrg    doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift;
1136eb72584Smrg    dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift;
1146eb72584Smrg    src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff;
1156eb72584Smrg    dst = dstX + dstY * dpitch + doff;
1166eb72584Smrg
1176eb72584Smrg    if (pTcx->ydir < 0) {
1186eb72584Smrg	src += (h - 1) * pTcx->srcpitch;
1196eb72584Smrg	dst += (h - 1) * dpitch;
1206eb72584Smrg	sstep = 0 - pTcx->srcpitch;
1216eb72584Smrg	dstep = 0 - dpitch;
1226eb72584Smrg    } else {
1236eb72584Smrg	sstep = pTcx->srcpitch;
1246eb72584Smrg	dstep = dpitch;
1256eb72584Smrg    }
1266eb72584Smrg
1276eb72584Smrg    xsteps = w >> 5;
1286eb72584Smrg
1296eb72584Smrg    if ((pTcx->xdir > 0) || (w < 33)) {
1306eb72584Smrg	for (line = 0; line < h; line++) {
1316eb72584Smrg	    x = xsteps;
1326eb72584Smrg	    xoff = 0;
1336eb72584Smrg	    while (x > 0) {
1346eb72584Smrg		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
1356eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1366eb72584Smrg		xoff += 32;
1376eb72584Smrg		x--;
1386eb72584Smrg	    }
1396eb72584Smrg	    if (leftover > 0) {
1406eb72584Smrg		cmd = lcmd | (uint64_t)(src + xoff);
1416eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1426eb72584Smrg	    }
1436eb72584Smrg	    src += sstep;
1446eb72584Smrg	    dst += dstep;
1456eb72584Smrg	}
1466eb72584Smrg    } else {
1476eb72584Smrg	/* same thing but right to left */
1486eb72584Smrg	for (line = 0; line < h; line++) {
1496eb72584Smrg	    x = xsteps;
1506eb72584Smrg	    xoff = xsteps << 5;
1516eb72584Smrg	    if (leftover > 0) {
1526eb72584Smrg		cmd = lcmd | (uint64_t)(src + xoff);
1536eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1546eb72584Smrg	    }
1556eb72584Smrg	    xoff -= 32;
1566eb72584Smrg	    while (x > 0) {
1576eb72584Smrg		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
1586eb72584Smrg		pTcx->rblit[dst + xoff] = cmd;
1596eb72584Smrg		xoff -= 32;
1606eb72584Smrg		x--;
1616eb72584Smrg	    }
1626eb72584Smrg	    src += sstep;
1636eb72584Smrg	    dst += dstep;
1646eb72584Smrg	}
1656eb72584Smrg    }
1666eb72584Smrg    LEAVE;
1676eb72584Smrg}
1686eb72584Smrg
1696eb72584Smrgstatic void
1706eb72584SmrgTcxDoneCopy(PixmapPtr pDstPixmap)
1716eb72584Smrg{
1726eb72584Smrg    ENTER;
1736eb72584Smrg    LEAVE;
1746eb72584Smrg}
1756eb72584Smrg
1766eb72584Smrgstatic Bool
1776eb72584SmrgTcxPrepareSolid(
1786eb72584Smrg    PixmapPtr pPixmap,
1796eb72584Smrg    int alu,
1806eb72584Smrg    Pixel planemask,
1816eb72584Smrg    Pixel fg)
1826eb72584Smrg{
1836eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
1846eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
1854525cf0bSmacallan    uint32_t hwfg;
1866eb72584Smrg
1876eb72584Smrg    ENTER;
1884525cf0bSmacallan
1896eb72584Smrg    /* weed out the cases we can't accelerate */
1904525cf0bSmacallan    if (pTcx->HasStipROP) {
1914525cf0bSmacallan    	hwfg = alu << 28;
1924525cf0bSmacallan    } else if (alu == GXcopy) {
1934525cf0bSmacallan        hwfg = 0x30000000;
1944525cf0bSmacallan    } else
1956eb72584Smrg    	return FALSE;
1964525cf0bSmacallan
1976eb72584Smrg    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
1986eb72584Smrg	return FALSE;
1996eb72584Smrg    if (exaGetPixmapOffset(pPixmap) != 0)
2006eb72584Smrg	return FALSE;
2016eb72584Smrg    pTcx->fg = (fg & 0x00ffffff);
2024525cf0bSmacallan    /* set colour space ID if we're in 24bit mode */
2034525cf0bSmacallan    if (pTcx->pitchshift != 0)
2044525cf0bSmacallan    	hwfg |= 0x03000000;
2054525cf0bSmacallan    pTcx->fg |= hwfg;
2066eb72584Smrg#ifdef DEBUG
2074525cf0bSmacallan    xf86Msg(X_ERROR, "fg: %08x\n", hwfg);
2086eb72584Smrg#endif
2096eb72584Smrg    LEAVE;
2106eb72584Smrg    return TRUE;
2116eb72584Smrg}
2126eb72584Smrg
2136eb72584Smrgstatic void
2146eb72584SmrgTcxSolid(
2156eb72584Smrg    PixmapPtr pPixmap,
2166eb72584Smrg    int x1,
2176eb72584Smrg    int y1,
2186eb72584Smrg    int x2,
2196eb72584Smrg    int y2)
2206eb72584Smrg{
2216eb72584Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
2226eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
2236eb72584Smrg    int dpitch, dst, line, fullsteps, i;
224091cc113Smacallan    volatile uint64_t cmd, rcmd, lcmd, tmpl;
2256eb72584Smrg    uint32_t pmask;
2266eb72584Smrg
2276eb72584Smrg    dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift;
2286eb72584Smrg    dst = x1 + y1 * dpitch;
2296eb72584Smrg
2306eb72584Smrg    tmpl = ((uint64_t)pTcx->fg) << 32;
2316eb72584Smrg
2326eb72584Smrg    /*
2336eb72584Smrg     * thanks to the funky architecture of the tcx's stipple 'engine' we have
2346eb72584Smrg     * to deal with two different cases:
2356eb72584Smrg     * - the whole width of the rectangle fits into a single 32 pixel aligned
2366eb72584Smrg     *   unit of 32 pixels
2376eb72584Smrg     * - the first and the last 32bit unit may or may not contain less than
2386eb72584Smrg     *   32 pixels
2396eb72584Smrg     */
2406eb72584Smrg    x2 -= 1;
2416eb72584Smrg    if ((x1 & 0xffe0) == (x2 & 0xffe0)) {
2426eb72584Smrg	/* the whole width fits in one 32 pixel write */
2436eb72584Smrg
2446eb72584Smrg	/* first zero out pixels on the right */
2456eb72584Smrg	pmask = 0xffffffff << (31 - (x2 & 0x1f));
2466eb72584Smrg	/* then mask out pixels on the left */
2476eb72584Smrg	pmask &= (0xffffffff >> (x1 & 0x1f));
2486eb72584Smrg#ifdef DEBUG
2496eb72584Smrg	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
2506eb72584Smrg#endif
2516eb72584Smrg	cmd = tmpl | (uint64_t)pmask;
2526eb72584Smrg	dst &= 0xffffffe0;
2536eb72584Smrg	for (line = y1; line < y2; line++) {
2546eb72584Smrg	    pTcx->rstip[dst] = cmd;
2556eb72584Smrg	    dst += dpitch;
2566eb72584Smrg	}
2576eb72584Smrg    } else {
2586eb72584Smrg	/* at least two writes per line */
2596eb72584Smrg	pmask = 0xffffffff << (31 - (x2 & 0x1f));
2606eb72584Smrg	rcmd = tmpl | (uint64_t)pmask;
2616eb72584Smrg	pmask = 0xffffffff >> (x1 & 0x1f);
2626eb72584Smrg	lcmd = tmpl | (uint64_t)pmask;
2636eb72584Smrg	cmd = tmpl | 0xffffffffLL;
2646eb72584Smrg	dst &= 0xffffffe0;
2656eb72584Smrg	fullsteps = ((x2 >> 5) - (x1 >> 5));
2666eb72584Smrg#ifdef DEBUG
2676eb72584Smrg	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
2686eb72584Smrg	xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps);
2696eb72584Smrg#endif
2706eb72584Smrg	fullsteps = fullsteps << 5;
2716eb72584Smrg	for (line = y1; line < y2; line++) {
2726eb72584Smrg	    pTcx->rstip[dst] = lcmd;
2736eb72584Smrg	    for (i = 32; i < fullsteps; i+= 32)
2746eb72584Smrg		pTcx->rstip[dst + i] = cmd;
2756eb72584Smrg	    pTcx->rstip[dst + i] = rcmd;
2766eb72584Smrg	    dst += dpitch;
2776eb72584Smrg	}
2786eb72584Smrg    }
2796eb72584Smrg}
2806eb72584Smrg
2816eb72584Smrg/*
2826eb72584Smrg * Memcpy-based UTS.
2836eb72584Smrg */
2846eb72584Smrgstatic Bool
2856eb72584SmrgTcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
2866eb72584Smrg    char *src, int src_pitch)
2876eb72584Smrg{
2886eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
2896eb72584Smrg    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
2906eb72584Smrg    char  *dst        = pTcx->fb + exaGetPixmapOffset(pDst);
2916eb72584Smrg    int    dst_pitch  = exaGetPixmapPitch(pDst);
2926eb72584Smrg
2936eb72584Smrg    int bpp    = pDst->drawable.bitsPerPixel;
2946eb72584Smrg    int cpp    = (bpp + 7) / 8;
2956eb72584Smrg    int wBytes = w * cpp;
2966eb72584Smrg
2976eb72584Smrg    ENTER;
2986eb72584Smrg    dst += (x * cpp) + (y * dst_pitch);
2996eb72584Smrg
3006eb72584Smrg    while (h--) {
3016eb72584Smrg        memcpy(dst, src, wBytes);
3026eb72584Smrg        src += src_pitch;
3036eb72584Smrg        dst += dst_pitch;
3046eb72584Smrg    }
3056eb72584Smrg    LEAVE;
3066eb72584Smrg    return TRUE;
3076eb72584Smrg}
3086eb72584Smrg
3096eb72584Smrg/*
3106eb72584Smrg * Memcpy-based DFS.
3116eb72584Smrg */
3126eb72584Smrgstatic Bool
3136eb72584SmrgTcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
3146eb72584Smrg    char *dst, int dst_pitch)
3156eb72584Smrg{
3166eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
3176eb72584Smrg    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
3186eb72584Smrg    char  *src        = pTcx->fb + exaGetPixmapOffset(pSrc);
3196eb72584Smrg    int    src_pitch  = exaGetPixmapPitch(pSrc);
3206eb72584Smrg
3216eb72584Smrg    int bpp    = pSrc->drawable.bitsPerPixel;
3226eb72584Smrg    int cpp    = (bpp + 7) / 8;
3236eb72584Smrg    int wBytes = w * cpp;
3246eb72584Smrg
3256eb72584Smrg    ENTER;
3266eb72584Smrg    src += (x * cpp) + (y * src_pitch);
3276eb72584Smrg
3286eb72584Smrg    while (h--) {
3296eb72584Smrg        memcpy(dst, src, wBytes);
3306eb72584Smrg        src += src_pitch;
3316eb72584Smrg        dst += dst_pitch;
3326eb72584Smrg    }
3336eb72584Smrg    LEAVE;
3346eb72584Smrg    return TRUE;
3356eb72584Smrg}
3366eb72584Smrg
3376eb72584SmrgBool
3386eb72584SmrgTcxInitAccel(ScreenPtr pScreen)
3396eb72584Smrg{
3406eb72584Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
3416eb72584Smrg    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn);
3426eb72584Smrg    ExaDriverPtr pExa;
3436eb72584Smrg
3446eb72584Smrg    pExa = exaDriverAlloc();
3456eb72584Smrg    if (!pExa)
3466eb72584Smrg        return FALSE;
3476eb72584Smrg
3486eb72584Smrg    pTcx->pExa = pExa;
3496eb72584Smrg
3506eb72584Smrg    pExa->exa_major = EXA_VERSION_MAJOR;
3516eb72584Smrg    pExa->exa_minor = EXA_VERSION_MINOR;
3526eb72584Smrg
3536eb72584Smrg    /*
3546eb72584Smrg     * The S24 can display both 8 and 24bit data at the same time, and in
35559d6bc2bSmacallan     * 24bit we can choose between gamma corrected and direct. No idea how that
3566eb72584Smrg     * would map to EXA - we'd have to pick the right framebuffer to draw into
3576eb72584Smrg     * and Solid() would need to know what kind of pixels to write
3586eb72584Smrg     */
3596eb72584Smrg    pExa->memoryBase = pTcx->fb;
3606eb72584Smrg    if (pScrn->depth == 8) {
36159d6bc2bSmacallan	pExa->memorySize = pTcx->vramsize;
3626eb72584Smrg	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height;
3636eb72584Smrg	pExa->pixmapOffsetAlign = 1;
3646eb72584Smrg	pExa->pixmapPitchAlign = 1;
3656eb72584Smrg    } else {
3666eb72584Smrg	pExa->memorySize = 1024 * 1024 * 4;
3676eb72584Smrg	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4;
3686eb72584Smrg	pExa->pixmapOffsetAlign = 4;
3696eb72584Smrg	pExa->pixmapPitchAlign = 4;
3706eb72584Smrg    }
3716eb72584Smrg
3726eb72584Smrg    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
3736eb72584Smrg
3746eb72584Smrg    pExa->maxX = 2048;
3756eb72584Smrg    pExa->maxY = 2048;	/* dummy, available VRAM is the limit */
3766eb72584Smrg
3776eb72584Smrg    pExa->MarkSync = TcxMarkSync;
3786eb72584Smrg    pExa->WaitMarker = TcxWaitMarker;
3796eb72584Smrg
3806eb72584Smrg    pExa->PrepareSolid = TcxPrepareSolid;
3816eb72584Smrg    pExa->Solid = TcxSolid;
3826eb72584Smrg    pExa->DoneSolid = TcxDoneCopy;
3836eb72584Smrg
3846eb72584Smrg    pExa->PrepareCopy = TcxPrepareCopy;
3856eb72584Smrg    pExa->Copy = TcxCopy;
3866eb72584Smrg    pExa->DoneCopy = TcxDoneCopy;
3876eb72584Smrg
3886eb72584Smrg    /* EXA hits more optimized paths when it does not have to fallback because
3896eb72584Smrg     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
3906eb72584Smrg     */
3916eb72584Smrg    pExa->UploadToScreen = TcxUploadToScreen;
3926eb72584Smrg    pExa->DownloadFromScreen = TcxDownloadFromScreen;
3936eb72584Smrg
3946eb72584Smrg    return exaDriverInit(pScreen, pExa);
3956eb72584Smrg}
396