tcx_accel.c revision ecc0bcc7
1/*
2 * TCX framebuffer - hardware acceleration.
3 *
4 * Copyright (C) 2009 Michael Lorenz
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24/* $NetBSD: tcx_accel.c,v 1.1 2009/08/26 22:24:34 macallan Exp $ */
25
26#include <sys/types.h>
27
28#include "tcx.h"
29
30#ifdef DEBUG
31#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
32#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
33#else
34#define ENTER
35#define LEAVE
36#endif
37
38static void
39TcxWaitMarker(ScreenPtr pScreenInfo, int Marker)
40{
41    ENTER;
42    /* do nothing */
43}
44
45static int
46TcxMarkSync(ScreenPtr pScreenInfo)
47{
48    ENTER;
49    return 0;
50}
51
52static Bool
53TcxPrepareCopy
54(
55    PixmapPtr pSrcPixmap,
56    PixmapPtr pDstPixmap,
57    int       xdir,
58    int       ydir,
59    int       alu,
60    Pixel     planemask
61)
62{
63    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
64    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
65
66    ENTER;
67    /* weed out the cases we can't accelerate */
68#ifdef DEBUG
69    xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask);
70#endif
71    if (alu != GXcopy)
72    	return FALSE;
73    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
74	return FALSE;
75
76    pTcx->xdir = xdir;
77    pTcx->ydir = ydir;
78    pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> 2;
79    pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> 2;
80    LEAVE;
81    return TRUE;
82}
83
84static void
85TcxCopy
86(
87    PixmapPtr pDstPixmap,
88    int       srcX,
89    int       srcY,
90    int       dstX,
91    int       dstY,
92    int       w,
93    int       h
94)
95{
96    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
97    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
98    uint64_t cmd, lcmd;
99    int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff;
100    int doff;
101
102    ENTER;
103    leftover = w & 0x1f;
104    if (leftover > 0)
105	    lcmd = 0x3000000000000000LL | (leftover - 1) << 24;
106
107
108    doff = exaGetPixmapOffset(pDstPixmap) >> 2;
109    dpitch = exaGetPixmapPitch(pDstPixmap) >> 2;
110    src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff;
111    dst = dstX + dstY * dpitch + doff;
112
113    if (pTcx->ydir < 0) {
114	src += (h - 1) * pTcx->srcpitch;
115	dst += (h - 1) * dpitch;
116	sstep = 0 - pTcx->srcpitch;
117	dstep = 0 - dpitch;
118    } else {
119	sstep = pTcx->srcpitch;
120	dstep = dpitch;
121    }
122
123    xsteps = w >> 5;
124
125    if ((pTcx->xdir > 0) || (w < 33)) {
126	for (line = 0; line < h; line++) {
127	    x = xsteps;
128	    xoff = 0;
129	    while (x > 0) {
130		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
131		pTcx->rblit[dst + xoff] = cmd;
132		xoff += 32;
133		x--;
134	    }
135	    if (leftover > 0) {
136		cmd = lcmd | (uint64_t)(src + xoff);
137		pTcx->rblit[dst + xoff] = cmd;
138	    }
139	    src += sstep;
140	    dst += dstep;
141	}
142    } else {
143	/* same thing but right to left */
144	for (line = 0; line < h; line++) {
145	    x = xsteps;
146	    xoff = xsteps << 5;
147	    if (leftover > 0) {
148		cmd = lcmd | (uint64_t)(src + xoff);
149		pTcx->rblit[dst + xoff] = cmd;
150		xoff -= 32;
151	    }
152	    while (x > 0) {
153		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
154		pTcx->rblit[dst + xoff] = cmd;
155		xoff -= 32;
156		x--;
157	    }
158	    src += sstep;
159	    dst += dstep;
160	}
161    }
162    LEAVE;
163}
164
165static void
166TcxDoneCopy(PixmapPtr pDstPixmap)
167{
168    ENTER;
169    LEAVE;
170}
171
172static Bool
173TcxPrepareSolid(
174    PixmapPtr pPixmap,
175    int alu,
176    Pixel planemask,
177    Pixel fg)
178{
179    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
180    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
181
182    ENTER;
183    /* weed out the cases we can't accelerate */
184    if (alu != GXcopy)
185    	return FALSE;
186    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
187	return FALSE;
188    if (exaGetPixmapOffset(pPixmap) != 0)
189	return FALSE;
190    pTcx->fg = (fg & 0x00ffffff) | 0x33000000;
191#ifdef DEBUG
192    xf86Msg(X_ERROR, "fg: %08x\n", fg);
193#endif
194    LEAVE;
195    return TRUE;
196}
197
198static void
199TcxSolid(
200    PixmapPtr pPixmap,
201    int x1,
202    int y1,
203    int x2,
204    int y2)
205{
206    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
207    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
208    int dpitch, dst, line, fullsteps, i;
209    uint64_t cmd, rcmd, lcmd, tmpl;
210    uint32_t pmask;
211
212    dpitch = exaGetPixmapPitch(pPixmap) >> 2;
213    dst = x1 + y1 * dpitch;
214
215    tmpl = ((uint64_t)pTcx->fg) << 32;
216
217    /*
218     * thanks to the funky architecture of the tcx's stipple 'engine' we have
219     * to deal with two different cases:
220     * - the whole width of the rectangle fits into a single 32 pixel aligned
221     *   unit of 32 pixels
222     * - the first and the last 32bit unit may or may not contain less than
223     *   32 pixels
224     */
225    x2 -= 1;
226    if ((x1 & 0xffe0) == (x2 & 0xffe0)) {
227	/* the whole width fits in one 32 pixel write */
228
229	/* first zero out pixels on the right */
230	pmask = 0xffffffff << (31 - (x2 & 0x1f));
231	/* then mask out pixels on the left */
232	pmask &= (0xffffffff >> (x1 & 0x1f));
233#ifdef DEBUG
234	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
235#endif
236	cmd = tmpl | (uint64_t)pmask;
237	dst &= 0xffffffe0;
238	for (line = y1; line < y2; line++) {
239	    pTcx->rstip[dst] = cmd;
240	    dst += dpitch;
241	}
242    } else {
243	/* at least two writes per line */
244	pmask = 0xffffffff << (31 - (x2 & 0x1f));
245	rcmd = tmpl | (uint64_t)pmask;
246	pmask = 0xffffffff >> (x1 & 0x1f);
247	lcmd = tmpl | (uint64_t)pmask;
248	cmd = tmpl | 0xffffffffLL;
249	dst &= 0xffffffe0;
250	fullsteps = ((x2 >> 5) - (x1 >> 5));
251#ifdef DEBUG
252	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
253	xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps);
254#endif
255	fullsteps = fullsteps << 5;
256	for (line = y1; line < y2; line++) {
257	    pTcx->rstip[dst] = lcmd;
258	    for (i = 32; i < fullsteps; i+= 32)
259		pTcx->rstip[dst + i] = cmd;
260	    pTcx->rstip[dst + i] = rcmd;
261	    dst += dpitch;
262	}
263    }
264}
265
266/*
267 * Memcpy-based UTS.
268 */
269static Bool
270TcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
271    char *src, int src_pitch)
272{
273    char  *dst        = pDst->devPrivate.ptr;
274    int    dst_pitch  = exaGetPixmapPitch(pDst);
275
276    int bpp    = pDst->drawable.bitsPerPixel;
277    int cpp    = (bpp + 7) / 8;
278    int wBytes = w * cpp;
279
280    ENTER;
281    dst += (x * cpp) + (y * dst_pitch);
282
283    while (h--) {
284        memcpy(dst, src, wBytes);
285        src += src_pitch;
286        dst += dst_pitch;
287    }
288    LEAVE;
289    return TRUE;
290}
291
292/*
293 * Memcpy-based DFS.
294 */
295static Bool
296TcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
297    char *dst, int dst_pitch)
298{
299    char  *src        = pSrc->devPrivate.ptr;
300    int    src_pitch  = exaGetPixmapPitch(pSrc);
301
302    int bpp    = pSrc->drawable.bitsPerPixel;
303    int cpp    = (bpp + 7) / 8;
304    int wBytes = w * cpp;
305
306    ENTER;
307    src += (x * cpp) + (y * src_pitch);
308
309    while (h--) {
310        memcpy(dst, src, wBytes);
311        src += src_pitch;
312        dst += dst_pitch;
313    }
314    LEAVE;
315    return TRUE;
316}
317
318Bool
319TcxInitAccel(ScreenPtr pScreen)
320{
321    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
322    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn);
323    ExaDriverPtr pExa;
324
325    pExa = exaDriverAlloc();
326    if (!pExa)
327        return FALSE;
328
329    pTcx->pExa = pExa;
330
331    pExa->exa_major = EXA_VERSION_MAJOR;
332    pExa->exa_minor = EXA_VERSION_MINOR;
333
334    pExa->memoryBase = pTcx->fb;
335    if (pScrn->depth == 8) {
336	pExa->memorySize = 1024 * 1024;
337	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height;
338	pExa->pixmapOffsetAlign = 1;
339	pExa->pixmapPitchAlign = 1;
340    } else {
341	pExa->memorySize = 1024 * 1024 * 4;
342	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4;
343	pExa->pixmapOffsetAlign = 4;
344	pExa->pixmapPitchAlign = 4;
345    }
346
347    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
348
349    pExa->maxX = 2048;
350    pExa->maxY = 2048;	/* dummy, available VRAM is the limit */
351
352    pExa->MarkSync = TcxMarkSync;
353    pExa->WaitMarker = TcxWaitMarker;
354
355    pExa->PrepareSolid = TcxPrepareSolid;
356    pExa->Solid = TcxSolid;
357    pExa->DoneSolid = TcxDoneCopy;
358
359    pExa->PrepareCopy = TcxPrepareCopy;
360    pExa->Copy = TcxCopy;
361    pExa->DoneCopy = TcxDoneCopy;
362
363    /* EXA hits more optimized paths when it does not have to fallback because
364     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
365     */
366if (0) {
367    pExa->UploadToScreen = TcxUploadToScreen;
368    pExa->DownloadFromScreen = TcxDownloadFromScreen;
369}
370    return exaDriverInit(pScreen, pExa);
371}
372