nv_exa.c revision c20046b8
105c1003fSmacallan/* 205c1003fSmacallan * crude EXA support for geforce chips 305c1003fSmacallan * 405c1003fSmacallan * Copyright (C) 2018 Michael Lorenz 505c1003fSmacallan * 605c1003fSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy 705c1003fSmacallan * of this software and associated documentation files (the "Software"), to deal 805c1003fSmacallan * in the Software without restriction, including without limitation the rights 905c1003fSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1005c1003fSmacallan * copies of the Software, and to permit persons to whom the Software is 1105c1003fSmacallan * furnished to do so, subject to the following conditions: 1205c1003fSmacallan * 1305c1003fSmacallan * The above copyright notice and this permission notice shall be included in 1405c1003fSmacallan * all copies or substantial portions of the Software. 1505c1003fSmacallan * 1605c1003fSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1705c1003fSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1805c1003fSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1905c1003fSmacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 2005c1003fSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2105c1003fSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2205c1003fSmacallan */ 2305c1003fSmacallan 24c20046b8Smacallan/* $NetBSD: nv_exa.c,v 1.5 2018/10/05 01:53:54 macallan Exp $ */ 2505c1003fSmacallan 2605c1003fSmacallan#ifdef HAVE_CONFIG_H 2705c1003fSmacallan#include "config.h" 2805c1003fSmacallan#endif 2905c1003fSmacallan 3005c1003fSmacallan#include "nv_include.h" 3105c1003fSmacallan#include "miline.h" 3205c1003fSmacallan#include "nv_dma.h" 3305c1003fSmacallan#include "exa.h" 3405c1003fSmacallan 3505c1003fSmacallan//#define DEBUG 3605c1003fSmacallan 3705c1003fSmacallan#ifdef DEBUG 3805c1003fSmacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 3905c1003fSmacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 4005c1003fSmacallan#else 4105c1003fSmacallan#define ENTER 4205c1003fSmacallan#define LEAVE 4305c1003fSmacallan#endif 4405c1003fSmacallan 4505c1003fSmacallanstatic void 4605c1003fSmacallanNvWaitMarker(ScreenPtr pScreen, int Marker) 4705c1003fSmacallan{ 4805c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 4905c1003fSmacallan 5005c1003fSmacallan ENTER; 5105c1003fSmacallan NVSync(pScrn); 5205c1003fSmacallan LEAVE; 5305c1003fSmacallan} 5405c1003fSmacallan 5505c1003fSmacallanstatic Bool 5605c1003fSmacallanNvPrepareCopy 5705c1003fSmacallan( 5805c1003fSmacallan PixmapPtr pSrcPixmap, 5905c1003fSmacallan PixmapPtr pDstPixmap, 6005c1003fSmacallan int xdir, 6105c1003fSmacallan int ydir, 6205c1003fSmacallan int rop, 6305c1003fSmacallan Pixel planemask 6405c1003fSmacallan) 6505c1003fSmacallan{ 6605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 6705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 68092d2b73Smacallan uint32_t dstpitch, dstoff, srcpitch, srcoff; 6905c1003fSmacallan 7005c1003fSmacallan ENTER; 7105c1003fSmacallan if (pSrcPixmap->drawable.bitsPerPixel != 32) 7205c1003fSmacallan xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pSrcPixmap->drawable.bitsPerPixel); 7305c1003fSmacallan planemask |= ~0 << pNv->CurrentLayout.depth; 7405c1003fSmacallan NVSetRopSolid(pScrn, rop, planemask); 75092d2b73Smacallan 7605c1003fSmacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 7705c1003fSmacallan dstoff = exaGetPixmapOffset(pDstPixmap); 7805c1003fSmacallan srcpitch = exaGetPixmapPitch(pSrcPixmap); 7905c1003fSmacallan srcoff = exaGetPixmapOffset(pSrcPixmap); 8005c1003fSmacallan 81092d2b73Smacallan 82092d2b73Smacallan NVDmaStart(pNv, SURFACE_FORMAT, 4); 83092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 8405c1003fSmacallan NVDmaNext (pNv, srcpitch | (dstpitch << 16)); 8505c1003fSmacallan NVDmaNext (pNv, srcoff); 8605c1003fSmacallan NVDmaNext (pNv, dstoff); 8705c1003fSmacallan 88092d2b73Smacallan pNv->DMAKickoffCallback = NVDMAKickoffCallback; 89092d2b73Smacallan 9005c1003fSmacallan LEAVE; 9105c1003fSmacallan return TRUE; 9205c1003fSmacallan} 9305c1003fSmacallan 9405c1003fSmacallanstatic void 9505c1003fSmacallanNvCopy 9605c1003fSmacallan( 9705c1003fSmacallan PixmapPtr pDstPixmap, 9805c1003fSmacallan int srcX, 9905c1003fSmacallan int srcY, 10005c1003fSmacallan int dstX, 10105c1003fSmacallan int dstY, 10205c1003fSmacallan int w, 10305c1003fSmacallan int h 10405c1003fSmacallan) 10505c1003fSmacallan{ 10605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 10705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 10805c1003fSmacallan 109092d2b73Smacallan ENTER; 110092d2b73Smacallan 11105c1003fSmacallan NVDmaStart(pNv, BLIT_POINT_SRC, 3); 11205c1003fSmacallan NVDmaNext (pNv, (srcY << 16) | srcX); 11305c1003fSmacallan NVDmaNext (pNv, (dstY << 16) | dstX); 11405c1003fSmacallan NVDmaNext (pNv, (h << 16) | w); 11505c1003fSmacallan 11605c1003fSmacallan if((w * h) >= 512) 117c20046b8Smacallan NVDmaKickoff(pNv); 11805c1003fSmacallan 11905c1003fSmacallan LEAVE; 12005c1003fSmacallan} 12105c1003fSmacallan 12205c1003fSmacallanstatic void 12305c1003fSmacallanNvDoneCopy(PixmapPtr pDstPixmap) 12405c1003fSmacallan{ 12505c1003fSmacallan ENTER; 12605c1003fSmacallan LEAVE; 12705c1003fSmacallan} 12805c1003fSmacallan 12905c1003fSmacallanstatic Bool 13005c1003fSmacallanNvPrepareSolid( 13105c1003fSmacallan PixmapPtr pPixmap, 13205c1003fSmacallan int rop, 13305c1003fSmacallan Pixel planemask, 13405c1003fSmacallan Pixel color) 13505c1003fSmacallan{ 13605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 13705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 138092d2b73Smacallan uint32_t pitch, off; 13905c1003fSmacallan 14005c1003fSmacallan ENTER; 14105c1003fSmacallan if (pPixmap->drawable.bitsPerPixel != 32) 14205c1003fSmacallan xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pPixmap->drawable.bitsPerPixel); 14305c1003fSmacallan planemask |= ~0 << pNv->CurrentLayout.depth; 144c20046b8Smacallan off = exaGetPixmapOffset(pPixmap); 145c20046b8Smacallan 146c20046b8Smacallan /* 147c20046b8Smacallan * XXX 148c20046b8Smacallan * on my 6800 Ultra the drawing engine stalls when drawing at least 149c20046b8Smacallan * rectangles into off-screen memory. Draw them by software until I figure out 150c20046b8Smacallan * what's going on 151c20046b8Smacallan */ 152c20046b8Smacallan if (off != 0) return FALSE; 153c20046b8Smacallan 154c20046b8Smacallan NVSetRopSolid(pScrn, rop, planemask); 15505c1003fSmacallan 15605c1003fSmacallan pitch = exaGetPixmapPitch(pPixmap); 15705c1003fSmacallan 158092d2b73Smacallan NVDmaStart(pNv, SURFACE_FORMAT, 4); 159092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 16005c1003fSmacallan NVDmaNext (pNv, pitch | (pitch << 16)); 16105c1003fSmacallan NVDmaNext (pNv, off); 16205c1003fSmacallan NVDmaNext (pNv, off); 16305c1003fSmacallan 164092d2b73Smacallan NVDmaStart(pNv, RECT_FORMAT, 1); 165092d2b73Smacallan NVDmaNext (pNv, pNv->rectFormat); 166092d2b73Smacallan 16705c1003fSmacallan NVDmaStart(pNv, RECT_SOLID_COLOR, 1); 16805c1003fSmacallan NVDmaNext (pNv, color); 16905c1003fSmacallan 170092d2b73Smacallan pNv->DMAKickoffCallback = NVDMAKickoffCallback; 171092d2b73Smacallan 17205c1003fSmacallan LEAVE; 17305c1003fSmacallan return TRUE; 17405c1003fSmacallan} 17505c1003fSmacallan 17605c1003fSmacallanstatic void 17705c1003fSmacallanNvSolid( 17805c1003fSmacallan PixmapPtr pPixmap, 17905c1003fSmacallan int x1, 18005c1003fSmacallan int y1, 18105c1003fSmacallan int x2, 18205c1003fSmacallan int y2) 18305c1003fSmacallan{ 18405c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 18505c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 18605c1003fSmacallan int w = x2 - x1, h = y2 - y1; 18705c1003fSmacallan 18805c1003fSmacallan ENTER; 189092d2b73Smacallan 19005c1003fSmacallan NVDmaStart(pNv, RECT_SOLID_RECTS(0), 2); 19105c1003fSmacallan NVDmaNext (pNv, (x1 << 16) | y1); 19205c1003fSmacallan NVDmaNext (pNv, (w << 16) | h); 19305c1003fSmacallan 19405c1003fSmacallan if((w * h) >= 512) 19505c1003fSmacallan NVDmaKickoff(pNv); 19605c1003fSmacallan 19705c1003fSmacallan LEAVE; 19805c1003fSmacallan} 19905c1003fSmacallan 20005c1003fSmacallan/* 20105c1003fSmacallan * Memcpy-based UTS. 20205c1003fSmacallan */ 20305c1003fSmacallanstatic Bool 20405c1003fSmacallanNvUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 20505c1003fSmacallan char *src, int src_pitch) 20605c1003fSmacallan{ 20705c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 20805c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 20905c1003fSmacallan unsigned char *dst = pNv->FbStart + exaGetPixmapOffset(pDst); 21005c1003fSmacallan int dst_pitch = exaGetPixmapPitch(pDst); 21105c1003fSmacallan 21205c1003fSmacallan int bpp = pDst->drawable.bitsPerPixel; 21305c1003fSmacallan int cpp = (bpp + 7) >> 3; 21405c1003fSmacallan int wBytes = w * cpp; 21505c1003fSmacallan 21605c1003fSmacallan ENTER; 21705c1003fSmacallan dst += (x * cpp) + (y * dst_pitch); 21805c1003fSmacallan 21905c1003fSmacallan NVSync(pScrn); 22005c1003fSmacallan 22105c1003fSmacallan while (h--) { 22205c1003fSmacallan memcpy(dst, src, wBytes); 22305c1003fSmacallan src += src_pitch; 22405c1003fSmacallan dst += dst_pitch; 22505c1003fSmacallan } 226092d2b73Smacallan 22705c1003fSmacallan LEAVE; 22805c1003fSmacallan return TRUE; 22905c1003fSmacallan} 23005c1003fSmacallan 23105c1003fSmacallan/* 23205c1003fSmacallan * Memcpy-based DFS. 23305c1003fSmacallan */ 23405c1003fSmacallanstatic Bool 23505c1003fSmacallanNvDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 23605c1003fSmacallan char *dst, int dst_pitch) 23705c1003fSmacallan{ 23805c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 23905c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 24005c1003fSmacallan unsigned char *src = pNv->FbStart + exaGetPixmapOffset(pSrc); 24105c1003fSmacallan int src_pitch = exaGetPixmapPitch(pSrc); 24205c1003fSmacallan 24305c1003fSmacallan int bpp = pSrc->drawable.bitsPerPixel; 24405c1003fSmacallan int cpp = (bpp + 7) >> 3; 24505c1003fSmacallan int wBytes = w * cpp; 24605c1003fSmacallan 24705c1003fSmacallan ENTER; 24805c1003fSmacallan src += (x * cpp) + (y * src_pitch); 24905c1003fSmacallan 25005c1003fSmacallan NVSync(pScrn); 25105c1003fSmacallan 25205c1003fSmacallan while (h--) { 25305c1003fSmacallan memcpy(dst, src, wBytes); 25405c1003fSmacallan src += src_pitch; 25505c1003fSmacallan dst += dst_pitch; 25605c1003fSmacallan } 25705c1003fSmacallan LEAVE; 25805c1003fSmacallan return TRUE; 25905c1003fSmacallan} 26005c1003fSmacallan 2612d0d2c2bSmacallanstatic Bool 2622d0d2c2bSmacallanNvPrepareAccess(PixmapPtr pPix, int index) 2632d0d2c2bSmacallan{ 2642d0d2c2bSmacallan ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2652d0d2c2bSmacallan 2662d0d2c2bSmacallan NVSync(pScrn); 2672d0d2c2bSmacallan return TRUE; 2682d0d2c2bSmacallan} 2692d0d2c2bSmacallan 2702d0d2c2bSmacallanstatic void 2712d0d2c2bSmacallanNvFinishAccess(PixmapPtr pPix, int index) 2722d0d2c2bSmacallan{ 2732d0d2c2bSmacallan} 2742d0d2c2bSmacallan 27505c1003fSmacallanBool 27605c1003fSmacallanNvInitExa(ScreenPtr pScreen) 27705c1003fSmacallan{ 27805c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 27905c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 28005c1003fSmacallan ExaDriverPtr pExa; 28105c1003fSmacallan int surfaceFormat, rectFormat; 28205c1003fSmacallan 28305c1003fSmacallan pExa = exaDriverAlloc(); 28405c1003fSmacallan if (!pExa) 28505c1003fSmacallan return FALSE; 28605c1003fSmacallan 28705c1003fSmacallan pNv->pExa = pExa; 28805c1003fSmacallan 28905c1003fSmacallan NVResetGraphics(pScrn); 29005c1003fSmacallan 29105c1003fSmacallan pExa->exa_major = EXA_VERSION_MAJOR; 29205c1003fSmacallan pExa->exa_minor = EXA_VERSION_MINOR; 29305c1003fSmacallan 29405c1003fSmacallan pExa->memoryBase = pNv->FbStart; 295092d2b73Smacallan pExa->memorySize = pNv->ScratchBufferStart & (~255); 296092d2b73Smacallan pExa->offScreenBase = (((pScrn->virtualY * pScrn->displayWidth * 297092d2b73Smacallan pScrn->bitsPerPixel >> 3) + 255) & (~255)); 298092d2b73Smacallan pExa->pixmapOffsetAlign = 256; 299092d2b73Smacallan pExa->pixmapPitchAlign = 256; 30005c1003fSmacallan 301092d2b73Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS/* | 302092d2b73Smacallan EXA_MIXED_PIXMAPS*/; 30305c1003fSmacallan 304092d2b73Smacallan pExa->maxX = 4096; 305092d2b73Smacallan pExa->maxY = 4096; 30605c1003fSmacallan 30705c1003fSmacallan pExa->WaitMarker = NvWaitMarker; 30805c1003fSmacallan pExa->PrepareSolid = NvPrepareSolid; 30905c1003fSmacallan pExa->Solid = NvSolid; 31005c1003fSmacallan pExa->DoneSolid = NvDoneCopy; 31105c1003fSmacallan pExa->PrepareCopy = NvPrepareCopy; 31205c1003fSmacallan pExa->Copy = NvCopy; 31305c1003fSmacallan pExa->DoneCopy = NvDoneCopy; 31405c1003fSmacallan 31505c1003fSmacallan switch(pNv->CurrentLayout.depth) { 31605c1003fSmacallan case 24: 317092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH24; 318092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH24; 31905c1003fSmacallan break; 32005c1003fSmacallan case 16: 32105c1003fSmacallan case 15: 322092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH16; 323092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH16; 32405c1003fSmacallan break; 32505c1003fSmacallan default: 326092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH8; 327092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH8; 32805c1003fSmacallan break; 32905c1003fSmacallan } 33005c1003fSmacallan NVDmaStart(pNv, SURFACE_FORMAT, 1); 331092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 33205c1003fSmacallan NVDmaStart(pNv, RECT_FORMAT, 1); 333092d2b73Smacallan NVDmaNext (pNv, pNv->rectFormat); 33405c1003fSmacallan 335c20046b8Smacallan NVDmaStart(pNv, PATTERN_COLOR_0, 4); 336c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 337c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 338c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 339c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 340c20046b8Smacallan 34105c1003fSmacallan pNv->currentRop = ~0; /* set to something invalid */ 34205c1003fSmacallan NVSetRopSolid(pScrn, GXcopy, ~0); 34305c1003fSmacallan 34405c1003fSmacallan NVDmaKickoff(pNv); 34505c1003fSmacallan 34605c1003fSmacallan /* EXA hits more optimized paths when it does not have to fallback 34705c1003fSmacallan * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 34805c1003fSmacallan */ 34905c1003fSmacallan pExa->UploadToScreen = NvUploadToScreen; 35005c1003fSmacallan pExa->DownloadFromScreen = NvDownloadFromScreen; 3512d0d2c2bSmacallan pExa->PrepareAccess = NvPrepareAccess; 3522d0d2c2bSmacallan pExa->FinishAccess = NvFinishAccess; 3532d0d2c2bSmacallan 35405c1003fSmacallan return exaDriverInit(pScreen, pExa); 35505c1003fSmacallan} 356