igs_accel.c revision be1ef3d3
1be1ef3d3Smacallan/* 2be1ef3d3Smacallan * IGS CyberPro - hardware acceleration. 3be1ef3d3Smacallan * 4be1ef3d3Smacallan * Copyright (C) 2009 Michael Lorenz 5be1ef3d3Smacallan * 6be1ef3d3Smacallan * Permission is hereby granted, free of charge, to any person obtaining a copy 7be1ef3d3Smacallan * of this software and associated documentation files (the "Software"), to deal 8be1ef3d3Smacallan * in the Software without restriction, including without limitation the rights 9be1ef3d3Smacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10be1ef3d3Smacallan * copies of the Software, and to permit persons to whom the Software is 11be1ef3d3Smacallan * furnished to do so, subject to the following conditions: 12be1ef3d3Smacallan * 13be1ef3d3Smacallan * The above copyright notice and this permission notice shall be included in 14be1ef3d3Smacallan * all copies or substantial portions of the Software. 15be1ef3d3Smacallan * 16be1ef3d3Smacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17be1ef3d3Smacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18be1ef3d3Smacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19be1ef3d3Smacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20be1ef3d3Smacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21be1ef3d3Smacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22be1ef3d3Smacallan */ 23be1ef3d3Smacallan 24be1ef3d3Smacallan/* $NetBSD: igs_accel.c,v 1.1 2009/11/10 21:39:45 macallan Exp $ */ 25be1ef3d3Smacallan 26be1ef3d3Smacallan#include <sys/types.h> 27be1ef3d3Smacallan 28be1ef3d3Smacallan#include "igs.h" 29be1ef3d3Smacallan 30be1ef3d3Smacallan/*#define DEBUG*/ 31be1ef3d3Smacallan 32be1ef3d3Smacallan#ifdef DEBUG 33be1ef3d3Smacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 34be1ef3d3Smacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 35be1ef3d3Smacallan#else 36be1ef3d3Smacallan#define ENTER 37be1ef3d3Smacallan#define LEAVE 38be1ef3d3Smacallan#endif 39be1ef3d3Smacallan 40be1ef3d3Smacallanstatic inline void IgsWrite1(IgsPtr fPtr, int offset, uint8_t val) 41be1ef3d3Smacallan{ 42be1ef3d3Smacallan *(fPtr->reg + offset) = val; 43be1ef3d3Smacallan} 44be1ef3d3Smacallan 45be1ef3d3Smacallan 46be1ef3d3Smacallanstatic inline void IgsWrite2(IgsPtr fPtr, int offset, uint16_t val) 47be1ef3d3Smacallan{ 48be1ef3d3Smacallan *(uint16_t *)(fPtr->reg + offset) = val; 49be1ef3d3Smacallan} 50be1ef3d3Smacallan 51be1ef3d3Smacallanstatic inline void IgsWrite4(IgsPtr fPtr, int offset, uint32_t val) 52be1ef3d3Smacallan{ 53be1ef3d3Smacallan *(uint32_t *)(fPtr->reg + offset) = val; 54be1ef3d3Smacallan} 55be1ef3d3Smacallan 56be1ef3d3Smacallanstatic inline uint8_t IgsRead1(IgsPtr fPtr, int offset) 57be1ef3d3Smacallan{ 58be1ef3d3Smacallan return *(fPtr->reg + offset); 59be1ef3d3Smacallan} 60be1ef3d3Smacallan 61be1ef3d3Smacallanstatic inline uint16_t IgsRead2(IgsPtr fPtr, int offset) 62be1ef3d3Smacallan{ 63be1ef3d3Smacallan return *(uint16_t *)(fPtr->reg + offset); 64be1ef3d3Smacallan} 65be1ef3d3Smacallan 66be1ef3d3Smacallanstatic void 67be1ef3d3SmacallanIgsWaitMarker(ScreenPtr pScreen, int Marker) 68be1ef3d3Smacallan{ 69be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 70be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 71be1ef3d3Smacallan int bail = 0x0fffffff; 72be1ef3d3Smacallan ENTER; 73be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_MAP_FMT_REG, (fPtr->info.depth >> 3) - 1); 74be1ef3d3Smacallan while ((IgsRead1(fPtr, 75be1ef3d3Smacallan IGS_COP_CTL_REG) & (IGS_COP_CTL_BUSY | IGS_COP_CTL_HFEMPTZ) != 0) 76be1ef3d3Smacallan && (bail > 0)) { 77be1ef3d3Smacallan bail--; 78be1ef3d3Smacallan usleep(1); 79be1ef3d3Smacallan } 80be1ef3d3Smacallan 81be1ef3d3Smacallan /* reset the coprocessor if we run into a timeout */ 82be1ef3d3Smacallan if (bail == 0) { 83be1ef3d3Smacallan xf86Msg(X_ERROR, "%s: timeout\n", __func__); 84be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_CTL_REG, 0); 85be1ef3d3Smacallan } 86be1ef3d3Smacallan LEAVE; 87be1ef3d3Smacallan} 88be1ef3d3Smacallan 89be1ef3d3Smacallanstatic int 90be1ef3d3SmacallanIgsMarkSync(ScreenPtr pScreenInfo) 91be1ef3d3Smacallan{ 92be1ef3d3Smacallan ENTER; 93be1ef3d3Smacallan return 0; 94be1ef3d3Smacallan} 95be1ef3d3Smacallan 96be1ef3d3Smacallanstatic void 97be1ef3d3SmacallanIgsWaitReady(IgsPtr fPtr) 98be1ef3d3Smacallan{ 99be1ef3d3Smacallan int bail = 0x0fffffff; 100be1ef3d3Smacallan ENTER; 101be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_MAP_FMT_REG, (fPtr->info.depth >> 3) - 1); 102be1ef3d3Smacallan while (((IgsRead1(fPtr, 103be1ef3d3Smacallan IGS_COP_CTL_REG) & (IGS_COP_CTL_BUSY | IGS_COP_CTL_HFEMPTZ)) != 0) 104be1ef3d3Smacallan && (bail > 0)) { 105be1ef3d3Smacallan bail--; 106be1ef3d3Smacallan usleep(1); 107be1ef3d3Smacallan } 108be1ef3d3Smacallan 109be1ef3d3Smacallan /* reset the coprocessor if we run into a timeout */ 110be1ef3d3Smacallan if (bail == 0) { 111be1ef3d3Smacallan xf86Msg(X_ERROR, "%s: timeout\n", __func__); 112be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_CTL_REG, 0); 113be1ef3d3Smacallan } 114be1ef3d3Smacallan LEAVE; 115be1ef3d3Smacallan} 116be1ef3d3Smacallan 117be1ef3d3Smacallanstatic Bool 118be1ef3d3SmacallanIgsPrepareCopy 119be1ef3d3Smacallan( 120be1ef3d3Smacallan PixmapPtr pSrcPixmap, 121be1ef3d3Smacallan PixmapPtr pDstPixmap, 122be1ef3d3Smacallan int xdir, 123be1ef3d3Smacallan int ydir, 124be1ef3d3Smacallan int alu, 125be1ef3d3Smacallan Pixel planemask 126be1ef3d3Smacallan) 127be1ef3d3Smacallan{ 128be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 129be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 130be1ef3d3Smacallan 131be1ef3d3Smacallan ENTER; 132be1ef3d3Smacallan fPtr->cmd = IGS_COP_OP_PXBLT | IGS_COP_OP_FG_FROM_SRC | 133be1ef3d3Smacallan IGS_COP_PPM_FIXED_FG; 134be1ef3d3Smacallan if (xdir < 0) 135be1ef3d3Smacallan fPtr->cmd |= IGS_COP_OCTANT_X_NEG; 136be1ef3d3Smacallan if (ydir < 0) 137be1ef3d3Smacallan fPtr->cmd |= IGS_COP_OCTANT_Y_NEG; 138be1ef3d3Smacallan 139be1ef3d3Smacallan IgsWaitReady(fPtr); 140be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_CTL_REG, 0); 141be1ef3d3Smacallan fPtr->srcoff = exaGetPixmapOffset(pSrcPixmap) >> fPtr->shift; 142be1ef3d3Smacallan fPtr->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> fPtr->shift; 143be1ef3d3Smacallan 144be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_SRC_MAP_WIDTH_REG, fPtr->srcpitch - 1); 145be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_DST_MAP_WIDTH_REG, 146be1ef3d3Smacallan (exaGetPixmapPitch(pDstPixmap) >> fPtr->shift) - 1); 147be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_FG_MIX_REG, alu); 148be1ef3d3Smacallan IgsWrite4(fPtr, IGS_PLANE_MASK_REG, planemask); 149be1ef3d3Smacallan LEAVE; 150be1ef3d3Smacallan return TRUE; 151be1ef3d3Smacallan} 152be1ef3d3Smacallan 153be1ef3d3Smacallanstatic void 154be1ef3d3SmacallanIgsCopy 155be1ef3d3Smacallan( 156be1ef3d3Smacallan PixmapPtr pDstPixmap, 157be1ef3d3Smacallan int srcX, 158be1ef3d3Smacallan int srcY, 159be1ef3d3Smacallan int dstX, 160be1ef3d3Smacallan int dstY, 161be1ef3d3Smacallan int w, 162be1ef3d3Smacallan int h 163be1ef3d3Smacallan) 164be1ef3d3Smacallan{ 165be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 166be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 167be1ef3d3Smacallan int dstpitch, dstoff; 168be1ef3d3Smacallan 169be1ef3d3Smacallan if (fPtr->cmd & IGS_COP_OCTANT_X_NEG) { 170be1ef3d3Smacallan srcX += (w - 1); 171be1ef3d3Smacallan dstX += (w - 1); 172be1ef3d3Smacallan } 173be1ef3d3Smacallan 174be1ef3d3Smacallan if (fPtr->cmd & IGS_COP_OCTANT_Y_NEG) { 175be1ef3d3Smacallan srcY += (h - 1); 176be1ef3d3Smacallan dstY += (h - 1); 177be1ef3d3Smacallan } 178be1ef3d3Smacallan IgsWaitReady(fPtr); 179be1ef3d3Smacallan IgsWrite4(fPtr, IGS_COP_SRC_START_REG, fPtr->srcoff + srcX + 180be1ef3d3Smacallan fPtr->srcpitch * srcY); 181be1ef3d3Smacallan dstpitch = exaGetPixmapPitch(pDstPixmap) >> fPtr->shift; 182be1ef3d3Smacallan dstoff = exaGetPixmapOffset(pDstPixmap) >> fPtr->shift; 183be1ef3d3Smacallan IgsWrite4(fPtr, IGS_COP_DST_START_REG, dstoff + dstX + 184be1ef3d3Smacallan dstpitch * dstY); 185be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_WIDTH_REG, w - 1); 186be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_HEIGHT_REG, h - 1); 187be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_PIXEL_OP_REG, fPtr->cmd & 0xffff); 188be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_PIXEL_OP_REG + 2, (fPtr->cmd >> 16) & 0xffff); 189be1ef3d3Smacallan LEAVE; 190be1ef3d3Smacallan} 191be1ef3d3Smacallan 192be1ef3d3Smacallanstatic void 193be1ef3d3SmacallanIgsDoneCopy(PixmapPtr pDstPixmap) 194be1ef3d3Smacallan{ 195be1ef3d3Smacallan ENTER; 196be1ef3d3Smacallan LEAVE; 197be1ef3d3Smacallan} 198be1ef3d3Smacallan 199be1ef3d3Smacallanstatic Bool 200be1ef3d3SmacallanIgsPrepareSolid( 201be1ef3d3Smacallan PixmapPtr pPixmap, 202be1ef3d3Smacallan int alu, 203be1ef3d3Smacallan Pixel planemask, 204be1ef3d3Smacallan Pixel fg) 205be1ef3d3Smacallan{ 206be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 207be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 208be1ef3d3Smacallan 209be1ef3d3Smacallan ENTER; 210be1ef3d3Smacallan fPtr->cmd = IGS_COP_OP_PXBLT | IGS_COP_PPM_FIXED_FG; 211be1ef3d3Smacallan 212be1ef3d3Smacallan IgsWaitReady(fPtr); 213be1ef3d3Smacallan 214be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_CTL_REG, 0); 215be1ef3d3Smacallan 216be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_DST_MAP_WIDTH_REG, 217be1ef3d3Smacallan (exaGetPixmapPitch(pPixmap) >> fPtr->shift) - 1); 218be1ef3d3Smacallan IgsWrite1(fPtr, IGS_COP_FG_MIX_REG, alu); 219be1ef3d3Smacallan IgsWrite4(fPtr, IGS_PLANE_MASK_REG, planemask); 220be1ef3d3Smacallan IgsWrite4(fPtr, IGS_COP_FG_REG, fg); 221be1ef3d3Smacallan LEAVE; 222be1ef3d3Smacallan return TRUE; 223be1ef3d3Smacallan} 224be1ef3d3Smacallan 225be1ef3d3Smacallanstatic void 226be1ef3d3SmacallanIgsSolid( 227be1ef3d3Smacallan PixmapPtr pPixmap, 228be1ef3d3Smacallan int x1, 229be1ef3d3Smacallan int y1, 230be1ef3d3Smacallan int x2, 231be1ef3d3Smacallan int y2) 232be1ef3d3Smacallan{ 233be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 234be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 235be1ef3d3Smacallan int w = x2 - x1, h = y2 - y1, dstoff, dstpitch; 236be1ef3d3Smacallan 237be1ef3d3Smacallan ENTER; 238be1ef3d3Smacallan IgsWaitReady(fPtr); 239be1ef3d3Smacallan dstpitch = exaGetPixmapPitch(pPixmap) >> fPtr->shift; 240be1ef3d3Smacallan dstoff = exaGetPixmapOffset(pPixmap) >> fPtr->shift; 241be1ef3d3Smacallan IgsWrite4(fPtr, IGS_COP_DST_START_REG, dstoff + x1 + 242be1ef3d3Smacallan dstpitch * y1); 243be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_WIDTH_REG, w - 1); 244be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_HEIGHT_REG, h - 1); 245be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_PIXEL_OP_REG, fPtr->cmd & 0xffff); 246be1ef3d3Smacallan IgsWrite2(fPtr, IGS_COP_PIXEL_OP_REG + 2, (fPtr->cmd >> 16) & 0xffff); 247be1ef3d3Smacallan} 248be1ef3d3Smacallan 249be1ef3d3Smacallan/* 250be1ef3d3Smacallan * Memcpy-based UTS. 251be1ef3d3Smacallan */ 252be1ef3d3Smacallanstatic Bool 253be1ef3d3SmacallanIgsUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 254be1ef3d3Smacallan char *src, int src_pitch) 255be1ef3d3Smacallan{ 256be1ef3d3Smacallan char *dst = pDst->devPrivate.ptr; 257be1ef3d3Smacallan int dst_pitch = exaGetPixmapPitch(pDst); 258be1ef3d3Smacallan 259be1ef3d3Smacallan int bpp = pDst->drawable.bitsPerPixel; 260be1ef3d3Smacallan int cpp = (bpp + 7) / 8; 261be1ef3d3Smacallan int wBytes = w * cpp; 262be1ef3d3Smacallan 263be1ef3d3Smacallan ENTER; 264be1ef3d3Smacallan dst += (x * cpp) + (y * dst_pitch); 265be1ef3d3Smacallan 266be1ef3d3Smacallan while (h--) { 267be1ef3d3Smacallan memcpy(dst, src, wBytes); 268be1ef3d3Smacallan src += src_pitch; 269be1ef3d3Smacallan dst += dst_pitch; 270be1ef3d3Smacallan } 271be1ef3d3Smacallan LEAVE; 272be1ef3d3Smacallan return TRUE; 273be1ef3d3Smacallan} 274be1ef3d3Smacallan 275be1ef3d3Smacallan/* 276be1ef3d3Smacallan * Memcpy-based DFS. 277be1ef3d3Smacallan */ 278be1ef3d3Smacallanstatic Bool 279be1ef3d3SmacallanIgsDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 280be1ef3d3Smacallan char *dst, int dst_pitch) 281be1ef3d3Smacallan{ 282be1ef3d3Smacallan char *src = pSrc->devPrivate.ptr; 283be1ef3d3Smacallan int src_pitch = exaGetPixmapPitch(pSrc); 284be1ef3d3Smacallan 285be1ef3d3Smacallan int bpp = pSrc->drawable.bitsPerPixel; 286be1ef3d3Smacallan int cpp = (bpp + 7) / 8; 287be1ef3d3Smacallan int wBytes = w * cpp; 288be1ef3d3Smacallan 289be1ef3d3Smacallan ENTER; 290be1ef3d3Smacallan src += (x * cpp) + (y * src_pitch); 291be1ef3d3Smacallan 292be1ef3d3Smacallan while (h--) { 293be1ef3d3Smacallan memcpy(dst, src, wBytes); 294be1ef3d3Smacallan src += src_pitch; 295be1ef3d3Smacallan dst += dst_pitch; 296be1ef3d3Smacallan } 297be1ef3d3Smacallan LEAVE; 298be1ef3d3Smacallan return TRUE; 299be1ef3d3Smacallan} 300be1ef3d3Smacallan 301be1ef3d3SmacallanBool 302be1ef3d3SmacallanIgsInitAccel(ScreenPtr pScreen) 303be1ef3d3Smacallan{ 304be1ef3d3Smacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 305be1ef3d3Smacallan IgsPtr fPtr = IGSPTR(pScrn); 306be1ef3d3Smacallan ExaDriverPtr pExa; 307be1ef3d3Smacallan 308be1ef3d3Smacallan pExa = exaDriverAlloc(); 309be1ef3d3Smacallan if (!pExa) 310be1ef3d3Smacallan return FALSE; 311be1ef3d3Smacallan 312be1ef3d3Smacallan fPtr->pExa = pExa; 313be1ef3d3Smacallan 314be1ef3d3Smacallan pExa->exa_major = EXA_VERSION_MAJOR; 315be1ef3d3Smacallan pExa->exa_minor = EXA_VERSION_MINOR; 316be1ef3d3Smacallan 317be1ef3d3Smacallan /* 318be1ef3d3Smacallan * The S24 can display both 8 and 24bit data at the same time, and in 319be1ef3d3Smacallan * 24bit we can choose between gamma corrected ad direct. No idea how that 320be1ef3d3Smacallan * would map to EXA - we'd have to pick the right framebuffer to draw into 321be1ef3d3Smacallan * and Solid() would need to know what kind of pixels to write 322be1ef3d3Smacallan */ 323be1ef3d3Smacallan pExa->memoryBase = fPtr->fbmem; 324be1ef3d3Smacallan pExa->memorySize = fPtr->fbmem_len; 325be1ef3d3Smacallan pExa->offScreenBase = fPtr->linebytes * fPtr->info.height; 326be1ef3d3Smacallan pExa->pixmapOffsetAlign = 4; 327be1ef3d3Smacallan pExa->pixmapPitchAlign = 4; 328be1ef3d3Smacallan 329be1ef3d3Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS; 330be1ef3d3Smacallan 331be1ef3d3Smacallan pExa->maxX = 2048; 332be1ef3d3Smacallan pExa->maxY = 2048; /* dummy, available VRAM is the limit */ 333be1ef3d3Smacallan 334be1ef3d3Smacallan pExa->MarkSync = IgsMarkSync; 335be1ef3d3Smacallan pExa->WaitMarker = IgsWaitMarker; 336be1ef3d3Smacallan pExa->PrepareSolid = IgsPrepareSolid; 337be1ef3d3Smacallan pExa->Solid = IgsSolid; 338be1ef3d3Smacallan pExa->DoneSolid = IgsDoneCopy; 339be1ef3d3Smacallan pExa->PrepareCopy = IgsPrepareCopy; 340be1ef3d3Smacallan pExa->Copy = IgsCopy; 341be1ef3d3Smacallan pExa->DoneCopy = IgsDoneCopy; 342be1ef3d3Smacallan 343be1ef3d3Smacallan switch(fPtr->info.depth) { 344be1ef3d3Smacallan case 8: 345be1ef3d3Smacallan fPtr->shift = 0; 346be1ef3d3Smacallan break; 347be1ef3d3Smacallan case 16: 348be1ef3d3Smacallan fPtr->shift = 1; 349be1ef3d3Smacallan break; 350be1ef3d3Smacallan case 32: 351be1ef3d3Smacallan fPtr->shift = 2; 352be1ef3d3Smacallan break; 353be1ef3d3Smacallan } 354be1ef3d3Smacallan /* EXA hits more optimized paths when it does not have to fallback because 355be1ef3d3Smacallan * of missing UTS/DFS, hook memcpy-based UTS/DFS. 356be1ef3d3Smacallan */ 357be1ef3d3Smacallanif (0) { 358be1ef3d3Smacallan pExa->UploadToScreen = IgsUploadToScreen; 359be1ef3d3Smacallan pExa->DownloadFromScreen = IgsDownloadFromScreen; 360be1ef3d3Smacallan} 361be1ef3d3Smacallan return exaDriverInit(pScreen, pExa); 362be1ef3d3Smacallan} 363