105c1003fSmacallan/*
205c1003fSmacallan * crude EXA support for geforce chips
305c1003fSmacallan *
405c1003fSmacallan * Copyright (C) 2018 Michael Lorenz
505c1003fSmacallan *
605c1003fSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy
705c1003fSmacallan * of this software and associated documentation files (the "Software"), to deal
805c1003fSmacallan * in the Software without restriction, including without limitation the rights
905c1003fSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1005c1003fSmacallan * copies of the Software, and to permit persons to whom the Software is
1105c1003fSmacallan * furnished to do so, subject to the following conditions:
1205c1003fSmacallan *
1305c1003fSmacallan * The above copyright notice and this permission notice shall be included in
1405c1003fSmacallan * all copies or substantial portions of the Software.
1505c1003fSmacallan *
1605c1003fSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1705c1003fSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1805c1003fSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1905c1003fSmacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
2005c1003fSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2105c1003fSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2205c1003fSmacallan */
2305c1003fSmacallan
249ea41ceaSmacallan/* $NetBSD: nv_exa.c,v 1.7 2022/08/15 09:44:19 macallan Exp $ */
2505c1003fSmacallan
2605c1003fSmacallan#ifdef HAVE_CONFIG_H
2705c1003fSmacallan#include "config.h"
2805c1003fSmacallan#endif
2905c1003fSmacallan
3005c1003fSmacallan#include "nv_include.h"
3105c1003fSmacallan#include "miline.h"
3205c1003fSmacallan#include "nv_dma.h"
3305c1003fSmacallan#include "exa.h"
3405c1003fSmacallan
3505c1003fSmacallan//#define DEBUG
3605c1003fSmacallan
3705c1003fSmacallan#ifdef DEBUG
3805c1003fSmacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
3905c1003fSmacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
4005c1003fSmacallan#else
4105c1003fSmacallan#define ENTER
4205c1003fSmacallan#define LEAVE
4305c1003fSmacallan#endif
4405c1003fSmacallan
4505c1003fSmacallanstatic void
4605c1003fSmacallanNvWaitMarker(ScreenPtr pScreen, int Marker)
4705c1003fSmacallan{
4805c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
4905c1003fSmacallan
5005c1003fSmacallan	ENTER;
5105c1003fSmacallan	NVSync(pScrn);
5205c1003fSmacallan	LEAVE;
5305c1003fSmacallan}
5405c1003fSmacallan
5505c1003fSmacallanstatic Bool
5605c1003fSmacallanNvPrepareCopy
5705c1003fSmacallan(
5805c1003fSmacallan    PixmapPtr pSrcPixmap,
5905c1003fSmacallan    PixmapPtr pDstPixmap,
6005c1003fSmacallan    int       xdir,
6105c1003fSmacallan    int       ydir,
6205c1003fSmacallan    int       rop,
6305c1003fSmacallan    Pixel     planemask
6405c1003fSmacallan)
6505c1003fSmacallan{
6605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
6705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
68092d2b73Smacallan	uint32_t dstpitch, dstoff, srcpitch, srcoff;
6905c1003fSmacallan
7005c1003fSmacallan	ENTER;
7105c1003fSmacallan	if (pSrcPixmap->drawable.bitsPerPixel != 32)
7205c1003fSmacallan		xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pSrcPixmap->drawable.bitsPerPixel);
7305c1003fSmacallan	planemask |= ~0 << pNv->CurrentLayout.depth;
7405c1003fSmacallan	NVSetRopSolid(pScrn, rop, planemask);
75092d2b73Smacallan
7605c1003fSmacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
7705c1003fSmacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
7805c1003fSmacallan	srcpitch = exaGetPixmapPitch(pSrcPixmap);
7905c1003fSmacallan	srcoff = exaGetPixmapOffset(pSrcPixmap);
8005c1003fSmacallan
81092d2b73Smacallan
82092d2b73Smacallan	NVDmaStart(pNv, SURFACE_FORMAT, 4);
83092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
8405c1003fSmacallan	NVDmaNext (pNv, srcpitch | (dstpitch << 16));
8505c1003fSmacallan	NVDmaNext (pNv, srcoff);
8605c1003fSmacallan	NVDmaNext (pNv, dstoff);
8705c1003fSmacallan
88092d2b73Smacallan	pNv->DMAKickoffCallback = NVDMAKickoffCallback;
89092d2b73Smacallan
9005c1003fSmacallan	LEAVE;
9105c1003fSmacallan	return TRUE;
9205c1003fSmacallan}
9305c1003fSmacallan
9405c1003fSmacallanstatic void
9505c1003fSmacallanNvCopy
9605c1003fSmacallan(
9705c1003fSmacallan    PixmapPtr pDstPixmap,
9805c1003fSmacallan    int       srcX,
9905c1003fSmacallan    int       srcY,
10005c1003fSmacallan    int       dstX,
10105c1003fSmacallan    int       dstY,
10205c1003fSmacallan    int       w,
10305c1003fSmacallan    int       h
10405c1003fSmacallan)
10505c1003fSmacallan{
10605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
10705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
10805c1003fSmacallan
109092d2b73Smacallan	ENTER;
110092d2b73Smacallan
11105c1003fSmacallan	NVDmaStart(pNv, BLIT_POINT_SRC, 3);
11205c1003fSmacallan	NVDmaNext (pNv, (srcY << 16) | srcX);
11305c1003fSmacallan	NVDmaNext (pNv, (dstY << 16) | dstX);
11405c1003fSmacallan	NVDmaNext (pNv, (h  << 16) | w);
11505c1003fSmacallan
11605c1003fSmacallan	if((w * h) >= 512)
117c20046b8Smacallan		NVDmaKickoff(pNv);
11805c1003fSmacallan
11905c1003fSmacallan	LEAVE;
12005c1003fSmacallan}
12105c1003fSmacallan
12205c1003fSmacallanstatic void
12305c1003fSmacallanNvDoneCopy(PixmapPtr pDstPixmap)
12405c1003fSmacallan{
12505c1003fSmacallan    ENTER;
12605c1003fSmacallan    LEAVE;
12705c1003fSmacallan}
12805c1003fSmacallan
12905c1003fSmacallanstatic Bool
13005c1003fSmacallanNvPrepareSolid(
13105c1003fSmacallan    PixmapPtr pPixmap,
13205c1003fSmacallan    int rop,
13305c1003fSmacallan    Pixel planemask,
13405c1003fSmacallan    Pixel color)
13505c1003fSmacallan{
13605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
13705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
138092d2b73Smacallan	uint32_t pitch, off;
13905c1003fSmacallan
14005c1003fSmacallan	ENTER;
14114032a88Smacallan
14214032a88Smacallan	if (pPixmap->drawable.bitsPerPixel != 32) {
14314032a88Smacallan#ifdef DEBUG
14405c1003fSmacallan		xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pPixmap->drawable.bitsPerPixel);
14514032a88Smacallan#endif
14614032a88Smacallan		return FALSE;
14714032a88Smacallan	}
14805c1003fSmacallan	planemask |= ~0 << pNv->CurrentLayout.depth;
149c20046b8Smacallan	off = exaGetPixmapOffset(pPixmap);
150c20046b8Smacallan
151c20046b8Smacallan	/*
152c20046b8Smacallan	 * XXX
153c20046b8Smacallan	 * on my 6800 Ultra the drawing engine stalls when drawing at least
1549ea41ceaSmacallan	 * some rectangles into off-screen memory. Draw them by software until
1559ea41ceaSmacallan	 * I figure out what's going on
156c20046b8Smacallan	 */
1579ea41ceaSmacallan	if (pNv->Architecture >= NV_ARCH_40) {
1589ea41ceaSmacallan		if (off != 0) return FALSE;
1599ea41ceaSmacallan	}
1609ea41ceaSmacallan
161c20046b8Smacallan	NVSetRopSolid(pScrn, rop, planemask);
16205c1003fSmacallan
16305c1003fSmacallan	pitch = exaGetPixmapPitch(pPixmap);
16405c1003fSmacallan
165092d2b73Smacallan	NVDmaStart(pNv, SURFACE_FORMAT, 4);
166092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
16705c1003fSmacallan	NVDmaNext (pNv, pitch | (pitch << 16));
16805c1003fSmacallan	NVDmaNext (pNv, off);
16905c1003fSmacallan	NVDmaNext (pNv, off);
17005c1003fSmacallan
171092d2b73Smacallan	NVDmaStart(pNv, RECT_FORMAT, 1);
172092d2b73Smacallan	NVDmaNext (pNv, pNv->rectFormat);
173092d2b73Smacallan
17405c1003fSmacallan	NVDmaStart(pNv, RECT_SOLID_COLOR, 1);
17505c1003fSmacallan	NVDmaNext (pNv, color);
17605c1003fSmacallan
177092d2b73Smacallan	pNv->DMAKickoffCallback = NVDMAKickoffCallback;
178092d2b73Smacallan
17905c1003fSmacallan	LEAVE;
18005c1003fSmacallan	return TRUE;
18105c1003fSmacallan}
18205c1003fSmacallan
18305c1003fSmacallanstatic void
18405c1003fSmacallanNvSolid(
18505c1003fSmacallan    PixmapPtr pPixmap,
18605c1003fSmacallan    int x1,
18705c1003fSmacallan    int y1,
18805c1003fSmacallan    int x2,
18905c1003fSmacallan    int y2)
19005c1003fSmacallan{
19105c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
19205c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
19305c1003fSmacallan	int w = x2 - x1, h = y2 - y1;
19405c1003fSmacallan
19505c1003fSmacallan	ENTER;
196092d2b73Smacallan
19705c1003fSmacallan	NVDmaStart(pNv, RECT_SOLID_RECTS(0), 2);
19805c1003fSmacallan	NVDmaNext (pNv, (x1 << 16) | y1);
19905c1003fSmacallan	NVDmaNext (pNv, (w << 16) | h);
20005c1003fSmacallan
20105c1003fSmacallan	if((w * h) >= 512)
20205c1003fSmacallan		NVDmaKickoff(pNv);
20305c1003fSmacallan
20405c1003fSmacallan	LEAVE;
20505c1003fSmacallan}
20605c1003fSmacallan
20705c1003fSmacallan/*
20805c1003fSmacallan * Memcpy-based UTS.
20905c1003fSmacallan */
21005c1003fSmacallanstatic Bool
21105c1003fSmacallanNvUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
21205c1003fSmacallan    char *src, int src_pitch)
21305c1003fSmacallan{
21405c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
21505c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
21605c1003fSmacallan	unsigned char *dst = pNv->FbStart + exaGetPixmapOffset(pDst);
21705c1003fSmacallan	int dst_pitch = exaGetPixmapPitch(pDst);
21805c1003fSmacallan
21905c1003fSmacallan	int bpp    = pDst->drawable.bitsPerPixel;
22005c1003fSmacallan	int cpp    = (bpp + 7) >> 3;
22105c1003fSmacallan	int wBytes = w * cpp;
22205c1003fSmacallan
22305c1003fSmacallan	ENTER;
22405c1003fSmacallan	dst += (x * cpp) + (y * dst_pitch);
22505c1003fSmacallan
22605c1003fSmacallan	NVSync(pScrn);
22705c1003fSmacallan
22805c1003fSmacallan	while (h--) {
22905c1003fSmacallan		memcpy(dst, src, wBytes);
23005c1003fSmacallan		src += src_pitch;
23105c1003fSmacallan		dst += dst_pitch;
23205c1003fSmacallan	}
233092d2b73Smacallan
23405c1003fSmacallan	LEAVE;
23505c1003fSmacallan	return TRUE;
23605c1003fSmacallan}
23705c1003fSmacallan
23805c1003fSmacallan/*
23905c1003fSmacallan * Memcpy-based DFS.
24005c1003fSmacallan */
24105c1003fSmacallanstatic Bool
24205c1003fSmacallanNvDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
24305c1003fSmacallan    char *dst, int dst_pitch)
24405c1003fSmacallan{
24505c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
24605c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
24705c1003fSmacallan	unsigned char *src = pNv->FbStart + exaGetPixmapOffset(pSrc);
24805c1003fSmacallan	int src_pitch = exaGetPixmapPitch(pSrc);
24905c1003fSmacallan
25005c1003fSmacallan	int bpp    = pSrc->drawable.bitsPerPixel;
25105c1003fSmacallan	int cpp    = (bpp + 7) >> 3;
25205c1003fSmacallan	int wBytes = w * cpp;
25305c1003fSmacallan
25405c1003fSmacallan	ENTER;
25505c1003fSmacallan	src += (x * cpp) + (y * src_pitch);
25605c1003fSmacallan
25705c1003fSmacallan	NVSync(pScrn);
25805c1003fSmacallan
25905c1003fSmacallan	while (h--) {
26005c1003fSmacallan		memcpy(dst, src, wBytes);
26105c1003fSmacallan		src += src_pitch;
26205c1003fSmacallan		dst += dst_pitch;
26305c1003fSmacallan	}
26405c1003fSmacallan	LEAVE;
26505c1003fSmacallan	return TRUE;
26605c1003fSmacallan}
26705c1003fSmacallan
2682d0d2c2bSmacallanstatic Bool
2692d0d2c2bSmacallanNvPrepareAccess(PixmapPtr pPix, int index)
2702d0d2c2bSmacallan{
2712d0d2c2bSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2722d0d2c2bSmacallan
2732d0d2c2bSmacallan	NVSync(pScrn);
2742d0d2c2bSmacallan	return TRUE;
2752d0d2c2bSmacallan}
2762d0d2c2bSmacallan
2772d0d2c2bSmacallanstatic void
2782d0d2c2bSmacallanNvFinishAccess(PixmapPtr pPix, int index)
2792d0d2c2bSmacallan{
2802d0d2c2bSmacallan}
2812d0d2c2bSmacallan
28205c1003fSmacallanBool
28305c1003fSmacallanNvInitExa(ScreenPtr pScreen)
28405c1003fSmacallan{
28505c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
28605c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
28705c1003fSmacallan	ExaDriverPtr pExa;
28805c1003fSmacallan
28905c1003fSmacallan	pExa = exaDriverAlloc();
29005c1003fSmacallan	if (!pExa)
29105c1003fSmacallan		return FALSE;
29205c1003fSmacallan
29305c1003fSmacallan	pNv->pExa = pExa;
29405c1003fSmacallan
29505c1003fSmacallan	NVResetGraphics(pScrn);
29605c1003fSmacallan
29705c1003fSmacallan	pExa->exa_major = EXA_VERSION_MAJOR;
29805c1003fSmacallan	pExa->exa_minor = EXA_VERSION_MINOR;
29905c1003fSmacallan
30005c1003fSmacallan	pExa->memoryBase = pNv->FbStart;
301092d2b73Smacallan	pExa->memorySize = pNv->ScratchBufferStart & (~255);
302092d2b73Smacallan	pExa->offScreenBase = (((pScrn->virtualY * pScrn->displayWidth *
303092d2b73Smacallan			       pScrn->bitsPerPixel >> 3) + 255) & (~255));
304092d2b73Smacallan	pExa->pixmapOffsetAlign = 256;
305092d2b73Smacallan	pExa->pixmapPitchAlign = 256;
30605c1003fSmacallan
30714032a88Smacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS |
30814032a88Smacallan		      EXA_MIXED_PIXMAPS;
30905c1003fSmacallan
310092d2b73Smacallan	pExa->maxX = 4096;
311092d2b73Smacallan	pExa->maxY = 4096;
31205c1003fSmacallan
31305c1003fSmacallan	pExa->WaitMarker = NvWaitMarker;
31405c1003fSmacallan	pExa->PrepareSolid = NvPrepareSolid;
31505c1003fSmacallan	pExa->Solid = NvSolid;
31605c1003fSmacallan	pExa->DoneSolid = NvDoneCopy;
31705c1003fSmacallan	pExa->PrepareCopy = NvPrepareCopy;
31805c1003fSmacallan	pExa->Copy = NvCopy;
31905c1003fSmacallan	pExa->DoneCopy = NvDoneCopy;
32005c1003fSmacallan
32105c1003fSmacallan	switch(pNv->CurrentLayout.depth) {
32205c1003fSmacallan	case 24:
323092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH24;
324092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH24;
32505c1003fSmacallan		break;
32605c1003fSmacallan	case 16:
32705c1003fSmacallan	case 15:
328092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH16;
329092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH16;
33005c1003fSmacallan		break;
33105c1003fSmacallan	default:
332092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH8;
333092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH8;
33405c1003fSmacallan		break;
33505c1003fSmacallan	}
33605c1003fSmacallan	NVDmaStart(pNv, SURFACE_FORMAT, 1);
337092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
33805c1003fSmacallan	NVDmaStart(pNv, RECT_FORMAT, 1);
339092d2b73Smacallan	NVDmaNext (pNv, pNv->rectFormat);
34005c1003fSmacallan
341c20046b8Smacallan	NVDmaStart(pNv, PATTERN_COLOR_0, 4);
342c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
343c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
344c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
345c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
346c20046b8Smacallan
34705c1003fSmacallan	pNv->currentRop = ~0;  /* set to something invalid */
34805c1003fSmacallan	NVSetRopSolid(pScrn, GXcopy, ~0);
34905c1003fSmacallan
35005c1003fSmacallan	NVDmaKickoff(pNv);
35105c1003fSmacallan
35205c1003fSmacallan	/* EXA hits more optimized paths when it does not have to fallback
35305c1003fSmacallan	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
35405c1003fSmacallan	 */
35505c1003fSmacallan	pExa->UploadToScreen = NvUploadToScreen;
35605c1003fSmacallan	pExa->DownloadFromScreen = NvDownloadFromScreen;
3572d0d2c2bSmacallan	pExa->PrepareAccess = NvPrepareAccess;
3582d0d2c2bSmacallan	pExa->FinishAccess = NvFinishAccess;
3592d0d2c2bSmacallan
36005c1003fSmacallan	return exaDriverInit(pScreen, pExa);
36105c1003fSmacallan}
362