tcx_accel.c revision 4525cf0b
1/*
2 * TCX framebuffer - hardware acceleration.
3 *
4 * Copyright (C) 2009 Michael Lorenz
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24/* $NetBSD: tcx_accel.c,v 1.9 2014/07/08 17:05:26 macallan Exp $ */
25
26#include <sys/types.h>
27
28#include "tcx.h"
29
30#ifdef DEBUG
31#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
32#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
33#else
34#define ENTER
35#define LEAVE
36#endif
37
38static void
39TcxWaitMarker(ScreenPtr pScreenInfo, int Marker)
40{
41    ENTER;
42    /* do nothing */
43}
44
45static int
46TcxMarkSync(ScreenPtr pScreenInfo)
47{
48    ENTER;
49    return 0;
50}
51
52static Bool
53TcxPrepareCopy
54(
55    PixmapPtr pSrcPixmap,
56    PixmapPtr pDstPixmap,
57    int       xdir,
58    int       ydir,
59    int       alu,
60    Pixel     planemask
61)
62{
63    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
64    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
65
66    ENTER;
67    /* weed out the cases we can't accelerate */
68#ifdef DEBUG
69    xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask);
70#endif
71    if (alu != GXcopy)
72    	return FALSE;
73    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
74	return FALSE;
75
76    pTcx->xdir = xdir;
77    pTcx->ydir = ydir;
78    pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift;
79    pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift;
80    LEAVE;
81    return TRUE;
82}
83
84static void
85TcxCopy
86(
87    PixmapPtr pDstPixmap,
88    int       srcX,
89    int       srcY,
90    int       dstX,
91    int       dstY,
92    int       w,
93    int       h
94)
95{
96    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
97    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
98    uint64_t cmd, lcmd;
99    int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff;
100    int doff;
101
102    ENTER;
103    leftover = w & 0x1f;
104    if (leftover > 0)
105	    lcmd = 0x3000000000000000LL | (leftover - 1) << 24;
106
107
108    doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift;
109    dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift;
110    src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff;
111    dst = dstX + dstY * dpitch + doff;
112
113    if (pTcx->ydir < 0) {
114	src += (h - 1) * pTcx->srcpitch;
115	dst += (h - 1) * dpitch;
116	sstep = 0 - pTcx->srcpitch;
117	dstep = 0 - dpitch;
118    } else {
119	sstep = pTcx->srcpitch;
120	dstep = dpitch;
121    }
122
123    xsteps = w >> 5;
124
125    if ((pTcx->xdir > 0) || (w < 33)) {
126	for (line = 0; line < h; line++) {
127	    x = xsteps;
128	    xoff = 0;
129	    while (x > 0) {
130		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
131		pTcx->rblit[dst + xoff] = cmd;
132		xoff += 32;
133		x--;
134	    }
135	    if (leftover > 0) {
136		cmd = lcmd | (uint64_t)(src + xoff);
137		pTcx->rblit[dst + xoff] = cmd;
138	    }
139	    src += sstep;
140	    dst += dstep;
141	}
142    } else {
143	/* same thing but right to left */
144	for (line = 0; line < h; line++) {
145	    x = xsteps;
146	    xoff = xsteps << 5;
147	    if (leftover > 0) {
148		cmd = lcmd | (uint64_t)(src + xoff);
149		pTcx->rblit[dst + xoff] = cmd;
150	    }
151	    xoff -= 32;
152	    while (x > 0) {
153		cmd = 0x300000001f000000LL | (uint64_t)(src + xoff);
154		pTcx->rblit[dst + xoff] = cmd;
155		xoff -= 32;
156		x--;
157	    }
158	    src += sstep;
159	    dst += dstep;
160	}
161    }
162    LEAVE;
163}
164
165static void
166TcxDoneCopy(PixmapPtr pDstPixmap)
167{
168    ENTER;
169    LEAVE;
170}
171
172static Bool
173TcxPrepareSolid(
174    PixmapPtr pPixmap,
175    int alu,
176    Pixel planemask,
177    Pixel fg)
178{
179    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
180    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
181    uint32_t hwfg;
182
183    ENTER;
184
185    /* weed out the cases we can't accelerate */
186    if (pTcx->HasStipROP) {
187    	hwfg = alu << 28;
188    } else if (alu == GXcopy) {
189        hwfg = 0x30000000;
190    } else
191    	return FALSE;
192
193    if ((planemask != 0xffffffff) && (planemask != 0x00ffffff))
194	return FALSE;
195    if (exaGetPixmapOffset(pPixmap) != 0)
196	return FALSE;
197    pTcx->fg = (fg & 0x00ffffff);
198    /* set colour space ID if we're in 24bit mode */
199    if (pTcx->pitchshift != 0)
200    	hwfg |= 0x03000000;
201    pTcx->fg |= hwfg;
202#ifdef DEBUG
203    xf86Msg(X_ERROR, "fg: %08x\n", hwfg);
204#endif
205    LEAVE;
206    return TRUE;
207}
208
209static void
210TcxSolid(
211    PixmapPtr pPixmap,
212    int x1,
213    int y1,
214    int x2,
215    int y2)
216{
217    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
218    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo);
219    int dpitch, dst, line, fullsteps, i;
220    uint64_t cmd, rcmd, lcmd, tmpl;
221    uint32_t pmask;
222
223    dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift;
224    dst = x1 + y1 * dpitch;
225
226    tmpl = ((uint64_t)pTcx->fg) << 32;
227
228    /*
229     * thanks to the funky architecture of the tcx's stipple 'engine' we have
230     * to deal with two different cases:
231     * - the whole width of the rectangle fits into a single 32 pixel aligned
232     *   unit of 32 pixels
233     * - the first and the last 32bit unit may or may not contain less than
234     *   32 pixels
235     */
236    x2 -= 1;
237    if ((x1 & 0xffe0) == (x2 & 0xffe0)) {
238	/* the whole width fits in one 32 pixel write */
239
240	/* first zero out pixels on the right */
241	pmask = 0xffffffff << (31 - (x2 & 0x1f));
242	/* then mask out pixels on the left */
243	pmask &= (0xffffffff >> (x1 & 0x1f));
244#ifdef DEBUG
245	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
246#endif
247	cmd = tmpl | (uint64_t)pmask;
248	dst &= 0xffffffe0;
249	for (line = y1; line < y2; line++) {
250	    pTcx->rstip[dst] = cmd;
251	    dst += dpitch;
252	}
253    } else {
254	/* at least two writes per line */
255	pmask = 0xffffffff << (31 - (x2 & 0x1f));
256	rcmd = tmpl | (uint64_t)pmask;
257	pmask = 0xffffffff >> (x1 & 0x1f);
258	lcmd = tmpl | (uint64_t)pmask;
259	cmd = tmpl | 0xffffffffLL;
260	dst &= 0xffffffe0;
261	fullsteps = ((x2 >> 5) - (x1 >> 5));
262#ifdef DEBUG
263	xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2);
264	xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps);
265#endif
266	fullsteps = fullsteps << 5;
267	for (line = y1; line < y2; line++) {
268	    pTcx->rstip[dst] = lcmd;
269	    for (i = 32; i < fullsteps; i+= 32)
270		pTcx->rstip[dst + i] = cmd;
271	    pTcx->rstip[dst + i] = rcmd;
272	    dst += dpitch;
273	}
274    }
275}
276
277/*
278 * Memcpy-based UTS.
279 */
280static Bool
281TcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
282    char *src, int src_pitch)
283{
284    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
285    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
286    char  *dst        = pTcx->fb + exaGetPixmapOffset(pDst);
287    int    dst_pitch  = exaGetPixmapPitch(pDst);
288
289    int bpp    = pDst->drawable.bitsPerPixel;
290    int cpp    = (bpp + 7) / 8;
291    int wBytes = w * cpp;
292
293    ENTER;
294    dst += (x * cpp) + (y * dst_pitch);
295
296    while (h--) {
297        memcpy(dst, src, wBytes);
298        src += src_pitch;
299        dst += dst_pitch;
300    }
301    LEAVE;
302    return TRUE;
303}
304
305/*
306 * Memcpy-based DFS.
307 */
308static Bool
309TcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
310    char *dst, int dst_pitch)
311{
312    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
313    TcxPtr pTcx       = GET_TCX_FROM_SCRN(pScrn);
314    char  *src        = pTcx->fb + exaGetPixmapOffset(pSrc);
315    int    src_pitch  = exaGetPixmapPitch(pSrc);
316
317    int bpp    = pSrc->drawable.bitsPerPixel;
318    int cpp    = (bpp + 7) / 8;
319    int wBytes = w * cpp;
320
321    ENTER;
322    src += (x * cpp) + (y * src_pitch);
323
324    while (h--) {
325        memcpy(dst, src, wBytes);
326        src += src_pitch;
327        dst += dst_pitch;
328    }
329    LEAVE;
330    return TRUE;
331}
332
333Bool
334TcxInitAccel(ScreenPtr pScreen)
335{
336    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
337    TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn);
338    ExaDriverPtr pExa;
339
340    pExa = exaDriverAlloc();
341    if (!pExa)
342        return FALSE;
343
344    pTcx->pExa = pExa;
345
346    pExa->exa_major = EXA_VERSION_MAJOR;
347    pExa->exa_minor = EXA_VERSION_MINOR;
348
349    /*
350     * The S24 can display both 8 and 24bit data at the same time, and in
351     * 24bit we can choose between gamma corrected and direct. No idea how that
352     * would map to EXA - we'd have to pick the right framebuffer to draw into
353     * and Solid() would need to know what kind of pixels to write
354     */
355    pExa->memoryBase = pTcx->fb;
356    if (pScrn->depth == 8) {
357	pExa->memorySize = pTcx->vramsize;
358	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height;
359	pExa->pixmapOffsetAlign = 1;
360	pExa->pixmapPitchAlign = 1;
361    } else {
362	pExa->memorySize = 1024 * 1024 * 4;
363	pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4;
364	pExa->pixmapOffsetAlign = 4;
365	pExa->pixmapPitchAlign = 4;
366    }
367
368    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
369
370    pExa->maxX = 2048;
371    pExa->maxY = 2048;	/* dummy, available VRAM is the limit */
372
373    pExa->MarkSync = TcxMarkSync;
374    pExa->WaitMarker = TcxWaitMarker;
375
376    pExa->PrepareSolid = TcxPrepareSolid;
377    pExa->Solid = TcxSolid;
378    pExa->DoneSolid = TcxDoneCopy;
379
380    pExa->PrepareCopy = TcxPrepareCopy;
381    pExa->Copy = TcxCopy;
382    pExa->DoneCopy = TcxDoneCopy;
383
384    /* EXA hits more optimized paths when it does not have to fallback because
385     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
386     */
387    pExa->UploadToScreen = TcxUploadToScreen;
388    pExa->DownloadFromScreen = TcxDownloadFromScreen;
389
390    return exaDriverInit(pScreen, pExa);
391}
392