105c1003fSmacallan/* 205c1003fSmacallan * crude EXA support for geforce chips 305c1003fSmacallan * 405c1003fSmacallan * Copyright (C) 2018 Michael Lorenz 505c1003fSmacallan * 605c1003fSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy 705c1003fSmacallan * of this software and associated documentation files (the "Software"), to deal 805c1003fSmacallan * in the Software without restriction, including without limitation the rights 905c1003fSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1005c1003fSmacallan * copies of the Software, and to permit persons to whom the Software is 1105c1003fSmacallan * furnished to do so, subject to the following conditions: 1205c1003fSmacallan * 1305c1003fSmacallan * The above copyright notice and this permission notice shall be included in 1405c1003fSmacallan * all copies or substantial portions of the Software. 1505c1003fSmacallan * 1605c1003fSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1705c1003fSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1805c1003fSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1905c1003fSmacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 2005c1003fSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2105c1003fSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2205c1003fSmacallan */ 2305c1003fSmacallan 249ea41ceaSmacallan/* $NetBSD: nv_exa.c,v 1.7 2022/08/15 09:44:19 macallan Exp $ */ 2505c1003fSmacallan 2605c1003fSmacallan#ifdef HAVE_CONFIG_H 2705c1003fSmacallan#include "config.h" 2805c1003fSmacallan#endif 2905c1003fSmacallan 3005c1003fSmacallan#include "nv_include.h" 3105c1003fSmacallan#include "miline.h" 3205c1003fSmacallan#include "nv_dma.h" 3305c1003fSmacallan#include "exa.h" 3405c1003fSmacallan 3505c1003fSmacallan//#define DEBUG 3605c1003fSmacallan 3705c1003fSmacallan#ifdef DEBUG 3805c1003fSmacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__) 3905c1003fSmacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) 4005c1003fSmacallan#else 4105c1003fSmacallan#define ENTER 4205c1003fSmacallan#define LEAVE 4305c1003fSmacallan#endif 4405c1003fSmacallan 4505c1003fSmacallanstatic void 4605c1003fSmacallanNvWaitMarker(ScreenPtr pScreen, int Marker) 4705c1003fSmacallan{ 4805c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 4905c1003fSmacallan 5005c1003fSmacallan ENTER; 5105c1003fSmacallan NVSync(pScrn); 5205c1003fSmacallan LEAVE; 5305c1003fSmacallan} 5405c1003fSmacallan 5505c1003fSmacallanstatic Bool 5605c1003fSmacallanNvPrepareCopy 5705c1003fSmacallan( 5805c1003fSmacallan PixmapPtr pSrcPixmap, 5905c1003fSmacallan PixmapPtr pDstPixmap, 6005c1003fSmacallan int xdir, 6105c1003fSmacallan int ydir, 6205c1003fSmacallan int rop, 6305c1003fSmacallan Pixel planemask 6405c1003fSmacallan) 6505c1003fSmacallan{ 6605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 6705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 68092d2b73Smacallan uint32_t dstpitch, dstoff, srcpitch, srcoff; 6905c1003fSmacallan 7005c1003fSmacallan ENTER; 7105c1003fSmacallan if (pSrcPixmap->drawable.bitsPerPixel != 32) 7205c1003fSmacallan xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pSrcPixmap->drawable.bitsPerPixel); 7305c1003fSmacallan planemask |= ~0 << pNv->CurrentLayout.depth; 7405c1003fSmacallan NVSetRopSolid(pScrn, rop, planemask); 75092d2b73Smacallan 7605c1003fSmacallan dstpitch = exaGetPixmapPitch(pDstPixmap); 7705c1003fSmacallan dstoff = exaGetPixmapOffset(pDstPixmap); 7805c1003fSmacallan srcpitch = exaGetPixmapPitch(pSrcPixmap); 7905c1003fSmacallan srcoff = exaGetPixmapOffset(pSrcPixmap); 8005c1003fSmacallan 81092d2b73Smacallan 82092d2b73Smacallan NVDmaStart(pNv, SURFACE_FORMAT, 4); 83092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 8405c1003fSmacallan NVDmaNext (pNv, srcpitch | (dstpitch << 16)); 8505c1003fSmacallan NVDmaNext (pNv, srcoff); 8605c1003fSmacallan NVDmaNext (pNv, dstoff); 8705c1003fSmacallan 88092d2b73Smacallan pNv->DMAKickoffCallback = NVDMAKickoffCallback; 89092d2b73Smacallan 9005c1003fSmacallan LEAVE; 9105c1003fSmacallan return TRUE; 9205c1003fSmacallan} 9305c1003fSmacallan 9405c1003fSmacallanstatic void 9505c1003fSmacallanNvCopy 9605c1003fSmacallan( 9705c1003fSmacallan PixmapPtr pDstPixmap, 9805c1003fSmacallan int srcX, 9905c1003fSmacallan int srcY, 10005c1003fSmacallan int dstX, 10105c1003fSmacallan int dstY, 10205c1003fSmacallan int w, 10305c1003fSmacallan int h 10405c1003fSmacallan) 10505c1003fSmacallan{ 10605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; 10705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 10805c1003fSmacallan 109092d2b73Smacallan ENTER; 110092d2b73Smacallan 11105c1003fSmacallan NVDmaStart(pNv, BLIT_POINT_SRC, 3); 11205c1003fSmacallan NVDmaNext (pNv, (srcY << 16) | srcX); 11305c1003fSmacallan NVDmaNext (pNv, (dstY << 16) | dstX); 11405c1003fSmacallan NVDmaNext (pNv, (h << 16) | w); 11505c1003fSmacallan 11605c1003fSmacallan if((w * h) >= 512) 117c20046b8Smacallan NVDmaKickoff(pNv); 11805c1003fSmacallan 11905c1003fSmacallan LEAVE; 12005c1003fSmacallan} 12105c1003fSmacallan 12205c1003fSmacallanstatic void 12305c1003fSmacallanNvDoneCopy(PixmapPtr pDstPixmap) 12405c1003fSmacallan{ 12505c1003fSmacallan ENTER; 12605c1003fSmacallan LEAVE; 12705c1003fSmacallan} 12805c1003fSmacallan 12905c1003fSmacallanstatic Bool 13005c1003fSmacallanNvPrepareSolid( 13105c1003fSmacallan PixmapPtr pPixmap, 13205c1003fSmacallan int rop, 13305c1003fSmacallan Pixel planemask, 13405c1003fSmacallan Pixel color) 13505c1003fSmacallan{ 13605c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 13705c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 138092d2b73Smacallan uint32_t pitch, off; 13905c1003fSmacallan 14005c1003fSmacallan ENTER; 14114032a88Smacallan 14214032a88Smacallan if (pPixmap->drawable.bitsPerPixel != 32) { 14314032a88Smacallan#ifdef DEBUG 14405c1003fSmacallan xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pPixmap->drawable.bitsPerPixel); 14514032a88Smacallan#endif 14614032a88Smacallan return FALSE; 14714032a88Smacallan } 14805c1003fSmacallan planemask |= ~0 << pNv->CurrentLayout.depth; 149c20046b8Smacallan off = exaGetPixmapOffset(pPixmap); 150c20046b8Smacallan 151c20046b8Smacallan /* 152c20046b8Smacallan * XXX 153c20046b8Smacallan * on my 6800 Ultra the drawing engine stalls when drawing at least 1549ea41ceaSmacallan * some rectangles into off-screen memory. Draw them by software until 1559ea41ceaSmacallan * I figure out what's going on 156c20046b8Smacallan */ 1579ea41ceaSmacallan if (pNv->Architecture >= NV_ARCH_40) { 1589ea41ceaSmacallan if (off != 0) return FALSE; 1599ea41ceaSmacallan } 1609ea41ceaSmacallan 161c20046b8Smacallan NVSetRopSolid(pScrn, rop, planemask); 16205c1003fSmacallan 16305c1003fSmacallan pitch = exaGetPixmapPitch(pPixmap); 16405c1003fSmacallan 165092d2b73Smacallan NVDmaStart(pNv, SURFACE_FORMAT, 4); 166092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 16705c1003fSmacallan NVDmaNext (pNv, pitch | (pitch << 16)); 16805c1003fSmacallan NVDmaNext (pNv, off); 16905c1003fSmacallan NVDmaNext (pNv, off); 17005c1003fSmacallan 171092d2b73Smacallan NVDmaStart(pNv, RECT_FORMAT, 1); 172092d2b73Smacallan NVDmaNext (pNv, pNv->rectFormat); 173092d2b73Smacallan 17405c1003fSmacallan NVDmaStart(pNv, RECT_SOLID_COLOR, 1); 17505c1003fSmacallan NVDmaNext (pNv, color); 17605c1003fSmacallan 177092d2b73Smacallan pNv->DMAKickoffCallback = NVDMAKickoffCallback; 178092d2b73Smacallan 17905c1003fSmacallan LEAVE; 18005c1003fSmacallan return TRUE; 18105c1003fSmacallan} 18205c1003fSmacallan 18305c1003fSmacallanstatic void 18405c1003fSmacallanNvSolid( 18505c1003fSmacallan PixmapPtr pPixmap, 18605c1003fSmacallan int x1, 18705c1003fSmacallan int y1, 18805c1003fSmacallan int x2, 18905c1003fSmacallan int y2) 19005c1003fSmacallan{ 19105c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; 19205c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 19305c1003fSmacallan int w = x2 - x1, h = y2 - y1; 19405c1003fSmacallan 19505c1003fSmacallan ENTER; 196092d2b73Smacallan 19705c1003fSmacallan NVDmaStart(pNv, RECT_SOLID_RECTS(0), 2); 19805c1003fSmacallan NVDmaNext (pNv, (x1 << 16) | y1); 19905c1003fSmacallan NVDmaNext (pNv, (w << 16) | h); 20005c1003fSmacallan 20105c1003fSmacallan if((w * h) >= 512) 20205c1003fSmacallan NVDmaKickoff(pNv); 20305c1003fSmacallan 20405c1003fSmacallan LEAVE; 20505c1003fSmacallan} 20605c1003fSmacallan 20705c1003fSmacallan/* 20805c1003fSmacallan * Memcpy-based UTS. 20905c1003fSmacallan */ 21005c1003fSmacallanstatic Bool 21105c1003fSmacallanNvUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 21205c1003fSmacallan char *src, int src_pitch) 21305c1003fSmacallan{ 21405c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 21505c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 21605c1003fSmacallan unsigned char *dst = pNv->FbStart + exaGetPixmapOffset(pDst); 21705c1003fSmacallan int dst_pitch = exaGetPixmapPitch(pDst); 21805c1003fSmacallan 21905c1003fSmacallan int bpp = pDst->drawable.bitsPerPixel; 22005c1003fSmacallan int cpp = (bpp + 7) >> 3; 22105c1003fSmacallan int wBytes = w * cpp; 22205c1003fSmacallan 22305c1003fSmacallan ENTER; 22405c1003fSmacallan dst += (x * cpp) + (y * dst_pitch); 22505c1003fSmacallan 22605c1003fSmacallan NVSync(pScrn); 22705c1003fSmacallan 22805c1003fSmacallan while (h--) { 22905c1003fSmacallan memcpy(dst, src, wBytes); 23005c1003fSmacallan src += src_pitch; 23105c1003fSmacallan dst += dst_pitch; 23205c1003fSmacallan } 233092d2b73Smacallan 23405c1003fSmacallan LEAVE; 23505c1003fSmacallan return TRUE; 23605c1003fSmacallan} 23705c1003fSmacallan 23805c1003fSmacallan/* 23905c1003fSmacallan * Memcpy-based DFS. 24005c1003fSmacallan */ 24105c1003fSmacallanstatic Bool 24205c1003fSmacallanNvDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 24305c1003fSmacallan char *dst, int dst_pitch) 24405c1003fSmacallan{ 24505c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 24605c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 24705c1003fSmacallan unsigned char *src = pNv->FbStart + exaGetPixmapOffset(pSrc); 24805c1003fSmacallan int src_pitch = exaGetPixmapPitch(pSrc); 24905c1003fSmacallan 25005c1003fSmacallan int bpp = pSrc->drawable.bitsPerPixel; 25105c1003fSmacallan int cpp = (bpp + 7) >> 3; 25205c1003fSmacallan int wBytes = w * cpp; 25305c1003fSmacallan 25405c1003fSmacallan ENTER; 25505c1003fSmacallan src += (x * cpp) + (y * src_pitch); 25605c1003fSmacallan 25705c1003fSmacallan NVSync(pScrn); 25805c1003fSmacallan 25905c1003fSmacallan while (h--) { 26005c1003fSmacallan memcpy(dst, src, wBytes); 26105c1003fSmacallan src += src_pitch; 26205c1003fSmacallan dst += dst_pitch; 26305c1003fSmacallan } 26405c1003fSmacallan LEAVE; 26505c1003fSmacallan return TRUE; 26605c1003fSmacallan} 26705c1003fSmacallan 2682d0d2c2bSmacallanstatic Bool 2692d0d2c2bSmacallanNvPrepareAccess(PixmapPtr pPix, int index) 2702d0d2c2bSmacallan{ 2712d0d2c2bSmacallan ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2722d0d2c2bSmacallan 2732d0d2c2bSmacallan NVSync(pScrn); 2742d0d2c2bSmacallan return TRUE; 2752d0d2c2bSmacallan} 2762d0d2c2bSmacallan 2772d0d2c2bSmacallanstatic void 2782d0d2c2bSmacallanNvFinishAccess(PixmapPtr pPix, int index) 2792d0d2c2bSmacallan{ 2802d0d2c2bSmacallan} 2812d0d2c2bSmacallan 28205c1003fSmacallanBool 28305c1003fSmacallanNvInitExa(ScreenPtr pScreen) 28405c1003fSmacallan{ 28505c1003fSmacallan ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 28605c1003fSmacallan NVPtr pNv = NVPTR(pScrn); 28705c1003fSmacallan ExaDriverPtr pExa; 28805c1003fSmacallan 28905c1003fSmacallan pExa = exaDriverAlloc(); 29005c1003fSmacallan if (!pExa) 29105c1003fSmacallan return FALSE; 29205c1003fSmacallan 29305c1003fSmacallan pNv->pExa = pExa; 29405c1003fSmacallan 29505c1003fSmacallan NVResetGraphics(pScrn); 29605c1003fSmacallan 29705c1003fSmacallan pExa->exa_major = EXA_VERSION_MAJOR; 29805c1003fSmacallan pExa->exa_minor = EXA_VERSION_MINOR; 29905c1003fSmacallan 30005c1003fSmacallan pExa->memoryBase = pNv->FbStart; 301092d2b73Smacallan pExa->memorySize = pNv->ScratchBufferStart & (~255); 302092d2b73Smacallan pExa->offScreenBase = (((pScrn->virtualY * pScrn->displayWidth * 303092d2b73Smacallan pScrn->bitsPerPixel >> 3) + 255) & (~255)); 304092d2b73Smacallan pExa->pixmapOffsetAlign = 256; 305092d2b73Smacallan pExa->pixmapPitchAlign = 256; 30605c1003fSmacallan 30714032a88Smacallan pExa->flags = EXA_OFFSCREEN_PIXMAPS | 30814032a88Smacallan EXA_MIXED_PIXMAPS; 30905c1003fSmacallan 310092d2b73Smacallan pExa->maxX = 4096; 311092d2b73Smacallan pExa->maxY = 4096; 31205c1003fSmacallan 31305c1003fSmacallan pExa->WaitMarker = NvWaitMarker; 31405c1003fSmacallan pExa->PrepareSolid = NvPrepareSolid; 31505c1003fSmacallan pExa->Solid = NvSolid; 31605c1003fSmacallan pExa->DoneSolid = NvDoneCopy; 31705c1003fSmacallan pExa->PrepareCopy = NvPrepareCopy; 31805c1003fSmacallan pExa->Copy = NvCopy; 31905c1003fSmacallan pExa->DoneCopy = NvDoneCopy; 32005c1003fSmacallan 32105c1003fSmacallan switch(pNv->CurrentLayout.depth) { 32205c1003fSmacallan case 24: 323092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH24; 324092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH24; 32505c1003fSmacallan break; 32605c1003fSmacallan case 16: 32705c1003fSmacallan case 15: 328092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH16; 329092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH16; 33005c1003fSmacallan break; 33105c1003fSmacallan default: 332092d2b73Smacallan pNv->surfaceFormat = SURFACE_FORMAT_DEPTH8; 333092d2b73Smacallan pNv->rectFormat = RECT_FORMAT_DEPTH8; 33405c1003fSmacallan break; 33505c1003fSmacallan } 33605c1003fSmacallan NVDmaStart(pNv, SURFACE_FORMAT, 1); 337092d2b73Smacallan NVDmaNext (pNv, pNv->surfaceFormat); 33805c1003fSmacallan NVDmaStart(pNv, RECT_FORMAT, 1); 339092d2b73Smacallan NVDmaNext (pNv, pNv->rectFormat); 34005c1003fSmacallan 341c20046b8Smacallan NVDmaStart(pNv, PATTERN_COLOR_0, 4); 342c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 343c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 344c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 345c20046b8Smacallan NVDmaNext (pNv, 0xffffffff); 346c20046b8Smacallan 34705c1003fSmacallan pNv->currentRop = ~0; /* set to something invalid */ 34805c1003fSmacallan NVSetRopSolid(pScrn, GXcopy, ~0); 34905c1003fSmacallan 35005c1003fSmacallan NVDmaKickoff(pNv); 35105c1003fSmacallan 35205c1003fSmacallan /* EXA hits more optimized paths when it does not have to fallback 35305c1003fSmacallan * because of missing UTS/DFS, hook memcpy-based UTS/DFS. 35405c1003fSmacallan */ 35505c1003fSmacallan pExa->UploadToScreen = NvUploadToScreen; 35605c1003fSmacallan pExa->DownloadFromScreen = NvDownloadFromScreen; 3572d0d2c2bSmacallan pExa->PrepareAccess = NvPrepareAccess; 3582d0d2c2bSmacallan pExa->FinishAccess = NvFinishAccess; 3592d0d2c2bSmacallan 36005c1003fSmacallan return exaDriverInit(pScreen, pExa); 36105c1003fSmacallan} 362