tcx_accel.c revision 02566b6a
1/* 2 * TCX framebuffer - hardware acceleration. 3 * 4 * Copyright (C) 2009 Michael Lorenz 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24/* $NetBSD: tcx_accel.c,v 1.5 2009/11/25 05:24:44 macallan Exp $ */ 25 26#include <sys/types.h> 27 28#include "tcx.h" 29 30#ifdef DEBUG 31#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 32#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 33#else 34#define ENTER 35#define LEAVE 36#endif 37 38static void 39TcxWaitMarker(ScreenPtr pScreenInfo, int Marker) 40{ 41 ENTER; 42 /* do nothing */ 43} 44 45static int 46TcxMarkSync(ScreenPtr pScreenInfo) 47{ 48 ENTER; 49 return 0; 50} 51 52static Bool 53TcxPrepareCopy 54( 55 PixmapPtr pSrcPixmap, 56 PixmapPtr pDstPixmap, 57 int xdir, 58 int ydir, 59 int alu, 60 Pixel planemask 61) 62{ 63 ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 64 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 65 66 ENTER; 67 /* weed out the cases we can't accelerate */ 68#ifdef DEBUG 69 xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask); 70#endif 71 if (alu != GXcopy) 72 return FALSE; 73 if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 74 return FALSE; 75 76 pTcx->xdir = xdir; 77 pTcx->ydir = ydir; 78 pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift; 79 pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift; 80 LEAVE; 81 return TRUE; 82} 83 84static void 85TcxCopy 86( 87 PixmapPtr pDstPixmap, 88 int srcX, 89 int srcY, 90 int dstX, 91 int dstY, 92 int w, 93 int h 94) 95{ 96 ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 97 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 98 uint64_t cmd, lcmd; 99 int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff; 100 int doff; 101 102 ENTER; 103 leftover = w & 0x1f; 104 if (leftover > 0) 105 lcmd = 0x3000000000000000LL | (leftover - 1) << 24; 106 107 108 doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift; 109 dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift; 110 src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff; 111 dst = dstX + dstY * dpitch + doff; 112 113 if (pTcx->ydir < 0) { 114 src += (h - 1) * pTcx->srcpitch; 115 dst += (h - 1) * dpitch; 116 sstep = 0 - pTcx->srcpitch; 117 dstep = 0 - dpitch; 118 } else { 119 sstep = pTcx->srcpitch; 120 dstep = dpitch; 121 } 122 123 xsteps = w >> 5; 124 125 if ((pTcx->xdir > 0) || (w < 33)) { 126 for (line = 0; line < h; line++) { 127 x = xsteps; 128 xoff = 0; 129 while (x > 0) { 130 cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 131 pTcx->rblit[dst + xoff] = cmd; 132 xoff += 32; 133 x--; 134 } 135 if (leftover > 0) { 136 cmd = lcmd | (uint64_t)(src + xoff); 137 pTcx->rblit[dst + xoff] = cmd; 138 } 139 src += sstep; 140 dst += dstep; 141 } 142 } else { 143 /* same thing but right to left */ 144 for (line = 0; line < h; line++) { 145 x = xsteps; 146 xoff = xsteps << 5; 147 if (leftover > 0) { 148 cmd = lcmd | (uint64_t)(src + xoff); 149 pTcx->rblit[dst + xoff] = cmd; 150 } 151 xoff -= 32; 152 while (x > 0) { 153 cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); 154 pTcx->rblit[dst + xoff] = cmd; 155 xoff -= 32; 156 x--; 157 } 158 src += sstep; 159 dst += dstep; 160 } 161 } 162 LEAVE; 163} 164 165static void 166TcxDoneCopy(PixmapPtr pDstPixmap) 167{ 168 ENTER; 169 LEAVE; 170} 171 172static Bool 173TcxPrepareSolid( 174 PixmapPtr pPixmap, 175 int alu, 176 Pixel planemask, 177 Pixel fg) 178{ 179 ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 180 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 181 182 ENTER; 183 /* weed out the cases we can't accelerate */ 184 if (alu != GXcopy) 185 return FALSE; 186 if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) 187 return FALSE; 188 if (exaGetPixmapOffset(pPixmap) != 0) 189 return FALSE; 190 pTcx->fg = (fg & 0x00ffffff); 191 if (pTcx->pitchshift == 0) { 192 pTcx->fg |= 0x30000000; 193 } else 194 pTcx->fg |= 0x33000000; 195#ifdef DEBUG 196 xf86Msg(X_ERROR, "fg: %08x\n", fg); 197#endif 198 LEAVE; 199 return TRUE; 200} 201 202static void 203TcxSolid( 204 PixmapPtr pPixmap, 205 int x1, 206 int y1, 207 int x2, 208 int y2) 209{ 210 ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; 211 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); 212 int dpitch, dst, line, fullsteps, i; 213 uint64_t cmd, rcmd, lcmd, tmpl; 214 uint32_t pmask; 215 216 dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift; 217 dst = x1 + y1 * dpitch; 218 219 tmpl = ((uint64_t)pTcx->fg) << 32; 220 221 /* 222 * thanks to the funky architecture of the tcx's stipple 'engine' we have 223 * to deal with two different cases: 224 * - the whole width of the rectangle fits into a single 32 pixel aligned 225 * unit of 32 pixels 226 * - the first and the last 32bit unit may or may not contain less than 227 * 32 pixels 228 */ 229 x2 -= 1; 230 if ((x1 & 0xffe0) == (x2 & 0xffe0)) { 231 /* the whole width fits in one 32 pixel write */ 232 233 /* first zero out pixels on the right */ 234 pmask = 0xffffffff << (31 - (x2 & 0x1f)); 235 /* then mask out pixels on the left */ 236 pmask &= (0xffffffff >> (x1 & 0x1f)); 237#ifdef DEBUG 238 xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 239#endif 240 cmd = tmpl | (uint64_t)pmask; 241 dst &= 0xffffffe0; 242 for (line = y1; line < y2; line++) { 243 pTcx->rstip[dst] = cmd; 244 dst += dpitch; 245 } 246 } else { 247 /* at least two writes per line */ 248 pmask = 0xffffffff << (31 - (x2 & 0x1f)); 249 rcmd = tmpl | (uint64_t)pmask; 250 pmask = 0xffffffff >> (x1 & 0x1f); 251 lcmd = tmpl | (uint64_t)pmask; 252 cmd = tmpl | 0xffffffffLL; 253 dst &= 0xffffffe0; 254 fullsteps = ((x2 >> 5) - (x1 >> 5)); 255#ifdef DEBUG 256 xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); 257 xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps); 258#endif 259 fullsteps = fullsteps << 5; 260 for (line = y1; line < y2; line++) { 261 pTcx->rstip[dst] = lcmd; 262 for (i = 32; i < fullsteps; i+= 32) 263 pTcx->rstip[dst + i] = cmd; 264 pTcx->rstip[dst + i] = rcmd; 265 dst += dpitch; 266 } 267 } 268} 269 270/* 271 * Memcpy-based UTS. 272 */ 273static Bool 274TcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 275 char *src, int src_pitch) 276{ 277 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 278 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 279 char *dst = pTcx->fb + exaGetPixmapOffset(pDst); 280 int dst_pitch = exaGetPixmapPitch(pDst); 281 282 int bpp = pDst->drawable.bitsPerPixel; 283 int cpp = (bpp + 7) / 8; 284 int wBytes = w * cpp; 285 286 ENTER; 287 dst += (x * cpp) + (y * dst_pitch); 288 289 while (h--) { 290 memcpy(dst, src, wBytes); 291 src += src_pitch; 292 dst += dst_pitch; 293 } 294 LEAVE; 295 return TRUE; 296} 297 298/* 299 * Memcpy-based DFS. 300 */ 301static Bool 302TcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 303 char *dst, int dst_pitch) 304{ 305 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 306 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 307 char *src = pTcx->fb + exaGetPixmapOffset(pSrc); 308 int src_pitch = exaGetPixmapPitch(pSrc); 309 310 int bpp = pSrc->drawable.bitsPerPixel; 311 int cpp = (bpp + 7) / 8; 312 int wBytes = w * cpp; 313 314 ENTER; 315 src += (x * cpp) + (y * src_pitch); 316 317 while (h--) { 318 memcpy(dst, src, wBytes); 319 src += src_pitch; 320 dst += dst_pitch; 321 } 322 LEAVE; 323 return TRUE; 324} 325 326Bool 327TcxInitAccel(ScreenPtr pScreen) 328{ 329 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 330 TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); 331 ExaDriverPtr pExa; 332 333 pExa = exaDriverAlloc(); 334 if (!pExa) 335 return FALSE; 336 337 pTcx->pExa = pExa; 338 339 pExa->exa_major = EXA_VERSION_MAJOR; 340 pExa->exa_minor = EXA_VERSION_MINOR; 341 342 /* 343 * The S24 can display both 8 and 24bit data at the same time, and in 344 * 24bit we can choose between gamma corrected ad direct. No idea how that 345 * would map to EXA - we'd have to pick the right framebuffer to draw into 346 * and Solid() would need to know what kind of pixels to write 347 */ 348 pExa->memoryBase = pTcx->fb; 349 if (pScrn->depth == 8) { 350 pExa->memorySize = 1024 * 1024; 351 pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height; 352 pExa->pixmapOffsetAlign = 1; 353 pExa->pixmapPitchAlign = 1; 354 } else { 355 pExa->memorySize = 1024 * 1024 * 4; 356 pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4; 357 pExa->pixmapOffsetAlign = 4; 358 pExa->pixmapPitchAlign = 4; 359 } 360 361 pExa->flags = EXA_OFFSCREEN_PIXMAPS; 362 363 pExa->maxX = 2048; 364 pExa->maxY = 2048; /* dummy, available VRAM is the limit */ 365 366 pExa->MarkSync = TcxMarkSync; 367 pExa->WaitMarker = TcxWaitMarker; 368 369 pExa->PrepareSolid = TcxPrepareSolid; 370 pExa->Solid = TcxSolid; 371 pExa->DoneSolid = TcxDoneCopy; 372 373 pExa->PrepareCopy = TcxPrepareCopy; 374 pExa->Copy = TcxCopy; 375 pExa->DoneCopy = TcxDoneCopy; 376 377 /* EXA hits more optimized paths when it does not have to fallback because 378 * of missing UTS/DFS, hook memcpy-based UTS/DFS. 379 */ 380 pExa->UploadToScreen = TcxUploadToScreen; 381 pExa->DownloadFromScreen = TcxDownloadFromScreen; 382 383 return exaDriverInit(pScreen, pExa); 384} 385