tcx_accel.c revision 02566b6a
1/*
2 * TCX framebuffer - hardware acceleration.
3 *
4 * Copyright (C) 2009 Michael Lorenz
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24/* $NetBSD: tcx_accel.c,v 1.5 2009/11/25 05:24:44 macallan Exp $ */
25
26#include <sys/types.h>
27
28#include "tcx.h"
29
30#ifdef DEBUG
31#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
32#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
33#else
34#define ENTER
35#define LEAVE
36#endif
37
38static void
39TcxWaitMarker(ScreenPtr pScreenInfo, int Marker)
40{
41    ENTER;
42    /* do nothing */
43}
44
45static int
46TcxMarkSync(ScreenPtr pScreenInfo)
47{
48    ENTER;
49    return 0;
50}
51
52static Bool
53TcxPrepareCopy
54(
55    PixmapPtr pSrcPixmap,
56    PixmapPtr pDstPixmap,
57    int       xdir,
58    int       ydir,
59    int       alu,
60    Pixel     planemask
61)
62{
63    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
64    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
65
66    ENTER;
67    /* weed out the cases we can't accelerate */
68#ifdef DEBUG
69    xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask);
70#endif
71    if (alu != GXcopy)
72    	return FALSE;
73    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
74	return FALSE;
75
76    pTcx->xdir = xdir;
77    pTcx->ydir = ydir;
78    pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift;
79    pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift;
80    LEAVE;
81    return TRUE;
82}
83
84static void
85TcxCopy
86(
87    PixmapPtr pDstPixmap,
88    int       srcX,
89    int       srcY,
90    int       dstX,
91    int       dstY,
92    int       w,
93    int       h
94)
95{
96    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
97    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
98    uint64_t cmd, lcmd;
99    int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff;
100    int doff;
101
102    ENTER;
103    leftover = w & 0x1f;
104    if (leftover > 0)
105	    lcmd = 0x3000000000000000LL | (leftover - 1) << 24;
106
107
108    doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift;
109    dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift;
110    src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff;
111    dst = dstX + dstY * dpitch + doff;
112
113    if (pTcx->ydir < 0) {
114	src += (h - 1) * pTcx->srcpitch;
115	dst += (h - 1) * dpitch;
116	sstep = 0 - pTcx->srcpitch;
117	dstep = 0 - dpitch;
118    } else {
119	sstep = pTcx->srcpitch;
120	dstep = dpitch;
121    }
122
123    xsteps = w >> 5;
124
125    if ((pTcx->xdir > 0) || (w < 33)) {
126	for (line = 0; line < h; line++) {
127	    x = xsteps;
128	    xoff = 0;
129	    while (x > 0) {
130		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
131		pTcx->rblit[dst + xoff] = cmd;
132		xoff += 32;
133		x--;
134	    }
135	    if (leftover > 0) {
136		cmd = lcmd | (uint64_t)(src + xoff);
137		pTcx->rblit[dst + xoff] = cmd;
138	    }
139	    src += sstep;
140	    dst += dstep;
141	}
142    } else {
143	/* same thing but right to left */
144	for (line = 0; line < h; line++) {
145	    x = xsteps;
146	    xoff = xsteps << 5;
147	    if (leftover > 0) {
148		cmd = lcmd | (uint64_t)(src + xoff);
149		pTcx->rblit[dst + xoff] = cmd;
150	    }
151	    xoff -= 32;
152	    while (x > 0) {
153		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
154		pTcx->rblit[dst + xoff] = cmd;
155		xoff -= 32;
156		x--;
157	    }
158	    src += sstep;
159	    dst += dstep;
160	}
161    }
162    LEAVE;
163}
164
165static void
166TcxDoneCopy(PixmapPtr pDstPixmap)
167{
168    ENTER;
169    LEAVE;
170}
171
172static Bool
173TcxPrepareSolid(
174    PixmapPtr pPixmap,
175    int alu,
176    Pixel planemask,
177    Pixel fg)
178{
179    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
180    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
181
182    ENTER;
183    /* weed out the cases we can't accelerate */
184    if (alu != GXcopy)
185    	return FALSE;
186    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
187	return FALSE;
188    if (exaGetPixmapOffset(pPixmap) != 0)
189	return FALSE;
190    pTcx->fg = (fg & 0x00ffffff);
191    if (pTcx->pitchshift == 0) {
192    	pTcx->fg |= 0x30000000;
193    } else
194	pTcx->fg |= 0x33000000;
195#ifdef DEBUG
196    xf86Msg(X_ERROR, "fg: %08x\n", fg);
197#endif
198    LEAVE;
199    return TRUE;
200}
201
202static void
203TcxSolid(
204    PixmapPtr pPixmap,
205    int x1,
206    int y1,
207    int x2,
208    int y2)
209{
210    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
211    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
212    int dpitch, dst, line, fullsteps, i;
213    uint64_t cmd, rcmd, lcmd, tmpl;
214    uint32_t pmask;
215
216    dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift;
217    dst = x1 + y1 * dpitch;
218
219    tmpl = ((uint64_t)pTcx->fg) << 32;
220
221    /*
222     * thanks to the funky architecture of the tcx's stipple 'engine' we have
223     * to deal with two different cases:
224     * - the whole width of the rectangle fits into a single 32 pixel aligned
225     *   unit of 32 pixels
226     * - the first and the last 32bit unit may or may not contain less than
227     *   32 pixels
228     */
229    x2 -= 1;
230    if ((x1 & 0xffe0) == (x2 & 0xffe0)) {
231	/* the whole width fits in one 32 pixel write */
232
233	/* first zero out pixels on the right */
234	pmask = 0xffffffff << (31 - (x2 & 0x1f));
235	/* then mask out pixels on the left */
236	pmask &= (0xffffffff >> (x1 & 0x1f));
237#ifdef DEBUG
238	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
239#endif
240	cmd = tmpl | (uint64_t)pmask;
241	dst &= 0xffffffe0;
242	for (line = y1; line < y2; line++) {
243	    pTcx->rstip[dst] = cmd;
244	    dst += dpitch;
245	}
246    } else {
247	/* at least two writes per line */
248	pmask = 0xffffffff << (31 - (x2 & 0x1f));
249	rcmd = tmpl | (uint64_t)pmask;
250	pmask = 0xffffffff >> (x1 & 0x1f);
251	lcmd = tmpl | (uint64_t)pmask;
252	cmd = tmpl | 0xffffffffLL;
253	dst &= 0xffffffe0;
254	fullsteps = ((x2 >> 5) - (x1 >> 5));
255#ifdef DEBUG
256	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
257	xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps);
258#endif
259	fullsteps = fullsteps << 5;
260	for (line = y1; line < y2; line++) {
261	    pTcx->rstip[dst] = lcmd;
262	    for (i = 32; i < fullsteps; i+= 32)
263		pTcx->rstip[dst + i] = cmd;
264	    pTcx->rstip[dst + i] = rcmd;
265	    dst += dpitch;
266	}
267    }
268}
269
270/*
271 * Memcpy-based UTS.
272 */
273static Bool
274TcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
275    char *src, int src_pitch)
276{
277    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
278    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
279    char  *dst        = pTcx->fb + exaGetPixmapOffset(pDst);
280    int    dst_pitch  = exaGetPixmapPitch(pDst);
281
282    int bpp    = pDst->drawable.bitsPerPixel;
283    int cpp    = (bpp + 7) / 8;
284    int wBytes = w * cpp;
285
286    ENTER;
287    dst += (x * cpp) + (y * dst_pitch);
288
289    while (h--) {
290        memcpy(dst, src, wBytes);
291        src += src_pitch;
292        dst += dst_pitch;
293    }
294    LEAVE;
295    return TRUE;
296}
297
298/*
299 * Memcpy-based DFS.
300 */
301static Bool
302TcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
303    char *dst, int dst_pitch)
304{
305    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
306    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
307    char  *src        = pTcx->fb + exaGetPixmapOffset(pSrc);
308    int    src_pitch  = exaGetPixmapPitch(pSrc);
309
310    int bpp    = pSrc->drawable.bitsPerPixel;
311    int cpp    = (bpp + 7) / 8;
312    int wBytes = w * cpp;
313
314    ENTER;
315    src += (x * cpp) + (y * src_pitch);
316
317    while (h--) {
318        memcpy(dst, src, wBytes);
319        src += src_pitch;
320        dst += dst_pitch;
321    }
322    LEAVE;
323    return TRUE;
324}
325
326Bool
327TcxInitAccel(ScreenPtr pScreen)
328{
329    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
330    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn);
331    ExaDriverPtr pExa;
332
333    pExa = exaDriverAlloc();
334    if (!pExa)
335        return FALSE;
336
337    pTcx->pExa = pExa;
338
339    pExa->exa_major = EXA_VERSION_MAJOR;
340    pExa->exa_minor = EXA_VERSION_MINOR;
341
342    /*
343     * The S24 can display both 8 and 24bit data at the same time, and in
344     * 24bit we can choose between gamma corrected ad direct. No idea how that
345     * would map to EXA - we'd have to pick the right framebuffer to draw into
346     * and Solid() would need to know what kind of pixels to write
347     */
348    pExa->memoryBase = pTcx->fb;
349    if (pScrn->depth == 8) {
350	pExa->memorySize = 1024 * 1024;
351	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height;
352	pExa->pixmapOffsetAlign = 1;
353	pExa->pixmapPitchAlign = 1;
354    } else {
355	pExa->memorySize = 1024 * 1024 * 4;
356	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4;
357	pExa->pixmapOffsetAlign = 4;
358	pExa->pixmapPitchAlign = 4;
359    }
360
361    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
362
363    pExa->maxX = 2048;
364    pExa->maxY = 2048;	/* dummy, available VRAM is the limit */
365
366    pExa->MarkSync = TcxMarkSync;
367    pExa->WaitMarker = TcxWaitMarker;
368
369    pExa->PrepareSolid = TcxPrepareSolid;
370    pExa->Solid = TcxSolid;
371    pExa->DoneSolid = TcxDoneCopy;
372
373    pExa->PrepareCopy = TcxPrepareCopy;
374    pExa->Copy = TcxCopy;
375    pExa->DoneCopy = TcxDoneCopy;
376
377    /* EXA hits more optimized paths when it does not have to fallback because
378     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
379     */
380    pExa->UploadToScreen = TcxUploadToScreen;
381    pExa->DownloadFromScreen = TcxDownloadFromScreen;
382
383    return exaDriverInit(pScreen, pExa);
384}
385