nv_exa.c revision c20046b8
105c1003fSmacallan/*
205c1003fSmacallan * crude EXA support for geforce chips
305c1003fSmacallan *
405c1003fSmacallan * Copyright (C) 2018 Michael Lorenz
505c1003fSmacallan *
605c1003fSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy
705c1003fSmacallan * of this software and associated documentation files (the "Software"), to deal
805c1003fSmacallan * in the Software without restriction, including without limitation the rights
905c1003fSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1005c1003fSmacallan * copies of the Software, and to permit persons to whom the Software is
1105c1003fSmacallan * furnished to do so, subject to the following conditions:
1205c1003fSmacallan *
1305c1003fSmacallan * The above copyright notice and this permission notice shall be included in
1405c1003fSmacallan * all copies or substantial portions of the Software.
1505c1003fSmacallan *
1605c1003fSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1705c1003fSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1805c1003fSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1905c1003fSmacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
2005c1003fSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2105c1003fSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2205c1003fSmacallan */
2305c1003fSmacallan
24c20046b8Smacallan/* $NetBSD: nv_exa.c,v 1.5 2018/10/05 01:53:54 macallan Exp $ */
2505c1003fSmacallan
2605c1003fSmacallan#ifdef HAVE_CONFIG_H
2705c1003fSmacallan#include "config.h"
2805c1003fSmacallan#endif
2905c1003fSmacallan
3005c1003fSmacallan#include "nv_include.h"
3105c1003fSmacallan#include "miline.h"
3205c1003fSmacallan#include "nv_dma.h"
3305c1003fSmacallan#include "exa.h"
3405c1003fSmacallan
3505c1003fSmacallan//#define DEBUG
3605c1003fSmacallan
3705c1003fSmacallan#ifdef DEBUG
3805c1003fSmacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
3905c1003fSmacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
4005c1003fSmacallan#else
4105c1003fSmacallan#define ENTER
4205c1003fSmacallan#define LEAVE
4305c1003fSmacallan#endif
4405c1003fSmacallan
4505c1003fSmacallanstatic void
4605c1003fSmacallanNvWaitMarker(ScreenPtr pScreen, int Marker)
4705c1003fSmacallan{
4805c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
4905c1003fSmacallan
5005c1003fSmacallan	ENTER;
5105c1003fSmacallan	NVSync(pScrn);
5205c1003fSmacallan	LEAVE;
5305c1003fSmacallan}
5405c1003fSmacallan
5505c1003fSmacallanstatic Bool
5605c1003fSmacallanNvPrepareCopy
5705c1003fSmacallan(
5805c1003fSmacallan    PixmapPtr pSrcPixmap,
5905c1003fSmacallan    PixmapPtr pDstPixmap,
6005c1003fSmacallan    int       xdir,
6105c1003fSmacallan    int       ydir,
6205c1003fSmacallan    int       rop,
6305c1003fSmacallan    Pixel     planemask
6405c1003fSmacallan)
6505c1003fSmacallan{
6605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
6705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
68092d2b73Smacallan	uint32_t dstpitch, dstoff, srcpitch, srcoff;
6905c1003fSmacallan
7005c1003fSmacallan	ENTER;
7105c1003fSmacallan	if (pSrcPixmap->drawable.bitsPerPixel != 32)
7205c1003fSmacallan		xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pSrcPixmap->drawable.bitsPerPixel);
7305c1003fSmacallan	planemask |= ~0 << pNv->CurrentLayout.depth;
7405c1003fSmacallan	NVSetRopSolid(pScrn, rop, planemask);
75092d2b73Smacallan
7605c1003fSmacallan	dstpitch = exaGetPixmapPitch(pDstPixmap);
7705c1003fSmacallan	dstoff = exaGetPixmapOffset(pDstPixmap);
7805c1003fSmacallan	srcpitch = exaGetPixmapPitch(pSrcPixmap);
7905c1003fSmacallan	srcoff = exaGetPixmapOffset(pSrcPixmap);
8005c1003fSmacallan
81092d2b73Smacallan
82092d2b73Smacallan	NVDmaStart(pNv, SURFACE_FORMAT, 4);
83092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
8405c1003fSmacallan	NVDmaNext (pNv, srcpitch | (dstpitch << 16));
8505c1003fSmacallan	NVDmaNext (pNv, srcoff);
8605c1003fSmacallan	NVDmaNext (pNv, dstoff);
8705c1003fSmacallan
88092d2b73Smacallan	pNv->DMAKickoffCallback = NVDMAKickoffCallback;
89092d2b73Smacallan
9005c1003fSmacallan	LEAVE;
9105c1003fSmacallan	return TRUE;
9205c1003fSmacallan}
9305c1003fSmacallan
9405c1003fSmacallanstatic void
9505c1003fSmacallanNvCopy
9605c1003fSmacallan(
9705c1003fSmacallan    PixmapPtr pDstPixmap,
9805c1003fSmacallan    int       srcX,
9905c1003fSmacallan    int       srcY,
10005c1003fSmacallan    int       dstX,
10105c1003fSmacallan    int       dstY,
10205c1003fSmacallan    int       w,
10305c1003fSmacallan    int       h
10405c1003fSmacallan)
10505c1003fSmacallan{
10605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
10705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
10805c1003fSmacallan
109092d2b73Smacallan	ENTER;
110092d2b73Smacallan
11105c1003fSmacallan	NVDmaStart(pNv, BLIT_POINT_SRC, 3);
11205c1003fSmacallan	NVDmaNext (pNv, (srcY << 16) | srcX);
11305c1003fSmacallan	NVDmaNext (pNv, (dstY << 16) | dstX);
11405c1003fSmacallan	NVDmaNext (pNv, (h  << 16) | w);
11505c1003fSmacallan
11605c1003fSmacallan	if((w * h) >= 512)
117c20046b8Smacallan		NVDmaKickoff(pNv);
11805c1003fSmacallan
11905c1003fSmacallan	LEAVE;
12005c1003fSmacallan}
12105c1003fSmacallan
12205c1003fSmacallanstatic void
12305c1003fSmacallanNvDoneCopy(PixmapPtr pDstPixmap)
12405c1003fSmacallan{
12505c1003fSmacallan    ENTER;
12605c1003fSmacallan    LEAVE;
12705c1003fSmacallan}
12805c1003fSmacallan
12905c1003fSmacallanstatic Bool
13005c1003fSmacallanNvPrepareSolid(
13105c1003fSmacallan    PixmapPtr pPixmap,
13205c1003fSmacallan    int rop,
13305c1003fSmacallan    Pixel planemask,
13405c1003fSmacallan    Pixel color)
13505c1003fSmacallan{
13605c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
13705c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
138092d2b73Smacallan	uint32_t pitch, off;
13905c1003fSmacallan
14005c1003fSmacallan	ENTER;
14105c1003fSmacallan	if (pPixmap->drawable.bitsPerPixel != 32)
14205c1003fSmacallan		xf86Msg(X_ERROR, "%s %d bpp\n", __func__, pPixmap->drawable.bitsPerPixel);
14305c1003fSmacallan	planemask |= ~0 << pNv->CurrentLayout.depth;
144c20046b8Smacallan	off = exaGetPixmapOffset(pPixmap);
145c20046b8Smacallan
146c20046b8Smacallan	/*
147c20046b8Smacallan	 * XXX
148c20046b8Smacallan	 * on my 6800 Ultra the drawing engine stalls when drawing at least
149c20046b8Smacallan	 * rectangles into off-screen memory. Draw them by software until I figure out
150c20046b8Smacallan	 * what's going on
151c20046b8Smacallan	 */
152c20046b8Smacallan	if (off != 0) return FALSE;
153c20046b8Smacallan
154c20046b8Smacallan	NVSetRopSolid(pScrn, rop, planemask);
15505c1003fSmacallan
15605c1003fSmacallan	pitch = exaGetPixmapPitch(pPixmap);
15705c1003fSmacallan
158092d2b73Smacallan	NVDmaStart(pNv, SURFACE_FORMAT, 4);
159092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
16005c1003fSmacallan	NVDmaNext (pNv, pitch | (pitch << 16));
16105c1003fSmacallan	NVDmaNext (pNv, off);
16205c1003fSmacallan	NVDmaNext (pNv, off);
16305c1003fSmacallan
164092d2b73Smacallan	NVDmaStart(pNv, RECT_FORMAT, 1);
165092d2b73Smacallan	NVDmaNext (pNv, pNv->rectFormat);
166092d2b73Smacallan
16705c1003fSmacallan	NVDmaStart(pNv, RECT_SOLID_COLOR, 1);
16805c1003fSmacallan	NVDmaNext (pNv, color);
16905c1003fSmacallan
170092d2b73Smacallan	pNv->DMAKickoffCallback = NVDMAKickoffCallback;
171092d2b73Smacallan
17205c1003fSmacallan	LEAVE;
17305c1003fSmacallan	return TRUE;
17405c1003fSmacallan}
17505c1003fSmacallan
17605c1003fSmacallanstatic void
17705c1003fSmacallanNvSolid(
17805c1003fSmacallan    PixmapPtr pPixmap,
17905c1003fSmacallan    int x1,
18005c1003fSmacallan    int y1,
18105c1003fSmacallan    int x2,
18205c1003fSmacallan    int y2)
18305c1003fSmacallan{
18405c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
18505c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
18605c1003fSmacallan	int w = x2 - x1, h = y2 - y1;
18705c1003fSmacallan
18805c1003fSmacallan	ENTER;
189092d2b73Smacallan
19005c1003fSmacallan	NVDmaStart(pNv, RECT_SOLID_RECTS(0), 2);
19105c1003fSmacallan	NVDmaNext (pNv, (x1 << 16) | y1);
19205c1003fSmacallan	NVDmaNext (pNv, (w << 16) | h);
19305c1003fSmacallan
19405c1003fSmacallan	if((w * h) >= 512)
19505c1003fSmacallan		NVDmaKickoff(pNv);
19605c1003fSmacallan
19705c1003fSmacallan	LEAVE;
19805c1003fSmacallan}
19905c1003fSmacallan
20005c1003fSmacallan/*
20105c1003fSmacallan * Memcpy-based UTS.
20205c1003fSmacallan */
20305c1003fSmacallanstatic Bool
20405c1003fSmacallanNvUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
20505c1003fSmacallan    char *src, int src_pitch)
20605c1003fSmacallan{
20705c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
20805c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
20905c1003fSmacallan	unsigned char *dst = pNv->FbStart + exaGetPixmapOffset(pDst);
21005c1003fSmacallan	int dst_pitch = exaGetPixmapPitch(pDst);
21105c1003fSmacallan
21205c1003fSmacallan	int bpp    = pDst->drawable.bitsPerPixel;
21305c1003fSmacallan	int cpp    = (bpp + 7) >> 3;
21405c1003fSmacallan	int wBytes = w * cpp;
21505c1003fSmacallan
21605c1003fSmacallan	ENTER;
21705c1003fSmacallan	dst += (x * cpp) + (y * dst_pitch);
21805c1003fSmacallan
21905c1003fSmacallan	NVSync(pScrn);
22005c1003fSmacallan
22105c1003fSmacallan	while (h--) {
22205c1003fSmacallan		memcpy(dst, src, wBytes);
22305c1003fSmacallan		src += src_pitch;
22405c1003fSmacallan		dst += dst_pitch;
22505c1003fSmacallan	}
226092d2b73Smacallan
22705c1003fSmacallan	LEAVE;
22805c1003fSmacallan	return TRUE;
22905c1003fSmacallan}
23005c1003fSmacallan
23105c1003fSmacallan/*
23205c1003fSmacallan * Memcpy-based DFS.
23305c1003fSmacallan */
23405c1003fSmacallanstatic Bool
23505c1003fSmacallanNvDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
23605c1003fSmacallan    char *dst, int dst_pitch)
23705c1003fSmacallan{
23805c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
23905c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
24005c1003fSmacallan	unsigned char *src = pNv->FbStart + exaGetPixmapOffset(pSrc);
24105c1003fSmacallan	int src_pitch = exaGetPixmapPitch(pSrc);
24205c1003fSmacallan
24305c1003fSmacallan	int bpp    = pSrc->drawable.bitsPerPixel;
24405c1003fSmacallan	int cpp    = (bpp + 7) >> 3;
24505c1003fSmacallan	int wBytes = w * cpp;
24605c1003fSmacallan
24705c1003fSmacallan	ENTER;
24805c1003fSmacallan	src += (x * cpp) + (y * src_pitch);
24905c1003fSmacallan
25005c1003fSmacallan	NVSync(pScrn);
25105c1003fSmacallan
25205c1003fSmacallan	while (h--) {
25305c1003fSmacallan		memcpy(dst, src, wBytes);
25405c1003fSmacallan		src += src_pitch;
25505c1003fSmacallan		dst += dst_pitch;
25605c1003fSmacallan	}
25705c1003fSmacallan	LEAVE;
25805c1003fSmacallan	return TRUE;
25905c1003fSmacallan}
26005c1003fSmacallan
2612d0d2c2bSmacallanstatic Bool
2622d0d2c2bSmacallanNvPrepareAccess(PixmapPtr pPix, int index)
2632d0d2c2bSmacallan{
2642d0d2c2bSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2652d0d2c2bSmacallan
2662d0d2c2bSmacallan	NVSync(pScrn);
2672d0d2c2bSmacallan	return TRUE;
2682d0d2c2bSmacallan}
2692d0d2c2bSmacallan
2702d0d2c2bSmacallanstatic void
2712d0d2c2bSmacallanNvFinishAccess(PixmapPtr pPix, int index)
2722d0d2c2bSmacallan{
2732d0d2c2bSmacallan}
2742d0d2c2bSmacallan
27505c1003fSmacallanBool
27605c1003fSmacallanNvInitExa(ScreenPtr pScreen)
27705c1003fSmacallan{
27805c1003fSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
27905c1003fSmacallan	NVPtr pNv = NVPTR(pScrn);
28005c1003fSmacallan	ExaDriverPtr pExa;
28105c1003fSmacallan	int surfaceFormat, rectFormat;
28205c1003fSmacallan
28305c1003fSmacallan	pExa = exaDriverAlloc();
28405c1003fSmacallan	if (!pExa)
28505c1003fSmacallan		return FALSE;
28605c1003fSmacallan
28705c1003fSmacallan	pNv->pExa = pExa;
28805c1003fSmacallan
28905c1003fSmacallan	NVResetGraphics(pScrn);
29005c1003fSmacallan
29105c1003fSmacallan	pExa->exa_major = EXA_VERSION_MAJOR;
29205c1003fSmacallan	pExa->exa_minor = EXA_VERSION_MINOR;
29305c1003fSmacallan
29405c1003fSmacallan	pExa->memoryBase = pNv->FbStart;
295092d2b73Smacallan	pExa->memorySize = pNv->ScratchBufferStart & (~255);
296092d2b73Smacallan	pExa->offScreenBase = (((pScrn->virtualY * pScrn->displayWidth *
297092d2b73Smacallan			       pScrn->bitsPerPixel >> 3) + 255) & (~255));
298092d2b73Smacallan	pExa->pixmapOffsetAlign = 256;
299092d2b73Smacallan	pExa->pixmapPitchAlign = 256;
30005c1003fSmacallan
301092d2b73Smacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS/* |
302092d2b73Smacallan		      EXA_MIXED_PIXMAPS*/;
30305c1003fSmacallan
304092d2b73Smacallan	pExa->maxX = 4096;
305092d2b73Smacallan	pExa->maxY = 4096;
30605c1003fSmacallan
30705c1003fSmacallan	pExa->WaitMarker = NvWaitMarker;
30805c1003fSmacallan	pExa->PrepareSolid = NvPrepareSolid;
30905c1003fSmacallan	pExa->Solid = NvSolid;
31005c1003fSmacallan	pExa->DoneSolid = NvDoneCopy;
31105c1003fSmacallan	pExa->PrepareCopy = NvPrepareCopy;
31205c1003fSmacallan	pExa->Copy = NvCopy;
31305c1003fSmacallan	pExa->DoneCopy = NvDoneCopy;
31405c1003fSmacallan
31505c1003fSmacallan	switch(pNv->CurrentLayout.depth) {
31605c1003fSmacallan	case 24:
317092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH24;
318092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH24;
31905c1003fSmacallan		break;
32005c1003fSmacallan	case 16:
32105c1003fSmacallan	case 15:
322092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH16;
323092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH16;
32405c1003fSmacallan		break;
32505c1003fSmacallan	default:
326092d2b73Smacallan		pNv->surfaceFormat = SURFACE_FORMAT_DEPTH8;
327092d2b73Smacallan		pNv->rectFormat = RECT_FORMAT_DEPTH8;
32805c1003fSmacallan		break;
32905c1003fSmacallan	}
33005c1003fSmacallan	NVDmaStart(pNv, SURFACE_FORMAT, 1);
331092d2b73Smacallan	NVDmaNext (pNv, pNv->surfaceFormat);
33205c1003fSmacallan	NVDmaStart(pNv, RECT_FORMAT, 1);
333092d2b73Smacallan	NVDmaNext (pNv, pNv->rectFormat);
33405c1003fSmacallan
335c20046b8Smacallan	NVDmaStart(pNv, PATTERN_COLOR_0, 4);
336c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
337c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
338c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
339c20046b8Smacallan	NVDmaNext (pNv, 0xffffffff);
340c20046b8Smacallan
34105c1003fSmacallan	pNv->currentRop = ~0;  /* set to something invalid */
34205c1003fSmacallan	NVSetRopSolid(pScrn, GXcopy, ~0);
34305c1003fSmacallan
34405c1003fSmacallan	NVDmaKickoff(pNv);
34505c1003fSmacallan
34605c1003fSmacallan	/* EXA hits more optimized paths when it does not have to fallback
34705c1003fSmacallan	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
34805c1003fSmacallan	 */
34905c1003fSmacallan	pExa->UploadToScreen = NvUploadToScreen;
35005c1003fSmacallan	pExa->DownloadFromScreen = NvDownloadFromScreen;
3512d0d2c2bSmacallan	pExa->PrepareAccess = NvPrepareAccess;
3522d0d2c2bSmacallan	pExa->FinishAccess = NvFinishAccess;
3532d0d2c2bSmacallan
35405c1003fSmacallan	return exaDriverInit(pScreen, pExa);
35505c1003fSmacallan}
356