1898d24fdSmacallan/*
2898d24fdSmacallan * Southland Media MGX - hardware acceleration.
3898d24fdSmacallan *
4898d24fdSmacallan * Copyright (C) 2021 Michael Lorenz
5898d24fdSmacallan *
6898d24fdSmacallan * Permission is hereby granted, free of charge, to any person obtaining a copy
7898d24fdSmacallan * of this software and associated documentation files (the "Software"), to deal
8898d24fdSmacallan * in the Software without restriction, including without limitation the rights
9898d24fdSmacallan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10898d24fdSmacallan * copies of the Software, and to permit persons to whom the Software is
11898d24fdSmacallan * furnished to do so, subject to the following conditions:
12898d24fdSmacallan *
13898d24fdSmacallan * The above copyright notice and this permission notice shall be included in
14898d24fdSmacallan * all copies or substantial portions of the Software.
15898d24fdSmacallan *
16898d24fdSmacallan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17898d24fdSmacallan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18898d24fdSmacallan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19898d24fdSmacallan * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20898d24fdSmacallan * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21898d24fdSmacallan * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22898d24fdSmacallan */
23898d24fdSmacallan
24898d24fdSmacallan/* $NetBSD: mgx_accel.c,v 1.1 2021/11/12 18:58:14 macallan Exp $ */
25898d24fdSmacallan
26898d24fdSmacallan#include <sys/types.h>
27898d24fdSmacallan
28898d24fdSmacallan#include "mgx.h"
29898d24fdSmacallan#include <dev/sbus/mgxreg.h>
30898d24fdSmacallan
31898d24fdSmacallan//#define DEBUG
32898d24fdSmacallan
33898d24fdSmacallan#ifdef DEBUG
34898d24fdSmacallan#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
35898d24fdSmacallan#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
36898d24fdSmacallan#define DBGMSG xf86Msg
37898d24fdSmacallan#else
38898d24fdSmacallan#define ENTER
39898d24fdSmacallan#define DBGMSG if (0) xf86Msg
40898d24fdSmacallan#define LEAVE
41898d24fdSmacallan#endif
42898d24fdSmacallan
43898d24fdSmacallan/* Translation from X ROP's to APM ROP's. */
44898d24fdSmacallanstatic unsigned char apmROP[] = {
45898d24fdSmacallan  0,
46898d24fdSmacallan  0x88,
47898d24fdSmacallan  0x44,
48898d24fdSmacallan  0xCC,
49898d24fdSmacallan  0x22,
50898d24fdSmacallan  0xAA,
51898d24fdSmacallan  0x66,
52898d24fdSmacallan  0xEE,
53898d24fdSmacallan  0x11,
54898d24fdSmacallan  0x99,
55898d24fdSmacallan  0x55,
56898d24fdSmacallan  0xDD,
57898d24fdSmacallan  0x33,
58898d24fdSmacallan  0xBB,
59898d24fdSmacallan  0x77,
60898d24fdSmacallan  0xFF
61898d24fdSmacallan};
62898d24fdSmacallan
63898d24fdSmacallanstatic void
64898d24fdSmacallanMgxWaitMarker(ScreenPtr pScreen, int Marker)
65898d24fdSmacallan{
66898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
67898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
68898d24fdSmacallan	int bail = 0x0fffffff;
69898d24fdSmacallan	uint8_t stat;
70898d24fdSmacallan
71898d24fdSmacallan	ENTER;
72898d24fdSmacallan	do {
73898d24fdSmacallan		stat = MgxRead1(pMgx, ATR_BLT_STATUS);
74898d24fdSmacallan		if ((stat & (BLT_HOST_BUSY | BLT_ENGINE_BUSY)) == 0)
75898d24fdSmacallan			break;
76898d24fdSmacallan		bail--;
77898d24fdSmacallan	} while (bail < 0);
78898d24fdSmacallan	if (bail == 0) DBGMSG(X_ERROR, "%s timed out\n", __func__);
79898d24fdSmacallan	LEAVE;
80898d24fdSmacallan}
81898d24fdSmacallan
82898d24fdSmacallanstatic void
83898d24fdSmacallanMgxWait(MgxPtr pMgx)
84898d24fdSmacallan{
85898d24fdSmacallan	int bail = 10000;
86898d24fdSmacallan	uint8_t stat;
87898d24fdSmacallan
88898d24fdSmacallan	ENTER;
89898d24fdSmacallan	do {
90898d24fdSmacallan		stat = MgxRead1(pMgx, ATR_BLT_STATUS);
91898d24fdSmacallan		if ((stat & (BLT_HOST_BUSY | BLT_ENGINE_BUSY)) == 0)
92898d24fdSmacallan			break;
93898d24fdSmacallan		bail--;
94898d24fdSmacallan	} while (bail < 0);
95898d24fdSmacallan	if (bail == 0) DBGMSG(X_ERROR, "%s timed out\n", __func__);
96898d24fdSmacallan	LEAVE;
97898d24fdSmacallan}
98898d24fdSmacallan
99898d24fdSmacallanstatic void
100898d24fdSmacallanMgxWaitFifo(MgxPtr pMgx, int depth)
101898d24fdSmacallan{
102898d24fdSmacallan	unsigned int i;
103898d24fdSmacallan	uint8_t stat;
104898d24fdSmacallan
105898d24fdSmacallan	ENTER;
106898d24fdSmacallan
107898d24fdSmacallan	for (i = 100000; i != 0; i--) {
108898d24fdSmacallan		stat = MgxRead1(pMgx, ATR_FIFO_STATUS);
109898d24fdSmacallan		stat = (stat & FIFO_MASK) >> FIFO_SHIFT;
110898d24fdSmacallan		DBGMSG(X_ERROR, "%s %x\n", __func__, stat);
111898d24fdSmacallan		if (stat >= depth)
112898d24fdSmacallan			break;
113898d24fdSmacallan		MgxWrite1(pMgx, ATR_FIFO_STATUS, 0);
114898d24fdSmacallan	}
115898d24fdSmacallan	if (i == 0) xf86Msg(X_ERROR, "%s timed out\n", __func__);
116898d24fdSmacallan	LEAVE;
117898d24fdSmacallan}
118898d24fdSmacallan
119898d24fdSmacallanstatic Bool
120898d24fdSmacallanMgxPrepareCopy
121898d24fdSmacallan(
122898d24fdSmacallan    PixmapPtr pSrcPixmap,
123898d24fdSmacallan    PixmapPtr pDstPixmap,
124898d24fdSmacallan    int       xdir,
125898d24fdSmacallan    int       ydir,
126898d24fdSmacallan    int       alu,
127898d24fdSmacallan    Pixel     planemask
128898d24fdSmacallan)
129898d24fdSmacallan{
130898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
131898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
132898d24fdSmacallan	int srcpitch = exaGetPixmapPitch(pSrcPixmap);
133898d24fdSmacallan	int srcoff = exaGetPixmapOffset(pSrcPixmap);
134898d24fdSmacallan
135898d24fdSmacallan	ENTER;
136898d24fdSmacallan
137898d24fdSmacallan	DBGMSG(X_ERROR, "%s %d %d\n", __func__, srcoff, srcpitch);
138898d24fdSmacallan	pMgx->offset = srcoff / srcpitch;
139898d24fdSmacallan
140898d24fdSmacallan	MgxWaitFifo(pMgx, 1);
141898d24fdSmacallan	MgxWrite1(pMgx, ATR_ROP, apmROP[alu]);
142898d24fdSmacallan	LEAVE;
143898d24fdSmacallan	return TRUE;
144898d24fdSmacallan}
145898d24fdSmacallan
146898d24fdSmacallanstatic void
147898d24fdSmacallanMgxCopy
148898d24fdSmacallan(
149898d24fdSmacallan    PixmapPtr pDstPixmap,
150898d24fdSmacallan    int       xs,
151898d24fdSmacallan    int       ys,
152898d24fdSmacallan    int       xd,
153898d24fdSmacallan    int       yd,
154898d24fdSmacallan    int       wi,
155898d24fdSmacallan    int       he
156898d24fdSmacallan)
157898d24fdSmacallan{
158898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
159898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
160898d24fdSmacallan	int dstpitch = exaGetPixmapPitch(pDstPixmap);
161898d24fdSmacallan	int dstoff = exaGetPixmapOffset(pDstPixmap);
162898d24fdSmacallan	uint32_t dec = pMgx->dec;
163898d24fdSmacallan
164898d24fdSmacallan	ENTER;
165898d24fdSmacallan
166898d24fdSmacallan	if (dstoff > 0 || 1) {
167898d24fdSmacallan		DBGMSG(X_ERROR, "%s %d %d\n", __func__, dstoff, dstpitch);
168898d24fdSmacallan		yd += dstoff / dstpitch;
169898d24fdSmacallan	}
170898d24fdSmacallan	ys += pMgx->offset;
171898d24fdSmacallan
172898d24fdSmacallan        dec |= (DEC_COMMAND_BLT << DEC_COMMAND_SHIFT) |
173898d24fdSmacallan	       (DEC_START_DIMX << DEC_START_SHIFT);
174898d24fdSmacallan
175898d24fdSmacallan	if ((xs < xd) && (ys == yd) && ((xd - xs) < wi)) {
176898d24fdSmacallan		xs += wi - 1;
177898d24fdSmacallan		xd += wi - 1;
178898d24fdSmacallan		dec |= DEC_DIR_X_REVERSE;
179898d24fdSmacallan	}
180898d24fdSmacallan	if (ys < yd) {
181898d24fdSmacallan		ys += he - 1;
182898d24fdSmacallan		yd += he - 1;
183898d24fdSmacallan		dec |= DEC_DIR_Y_REVERSE;
184898d24fdSmacallan	}
185898d24fdSmacallan
186898d24fdSmacallan	DBGMSG(X_ERROR, "%s %d %d %d %d -> %d %d\n", __func__, xs, ys, wi, he, xd, yd);
187898d24fdSmacallan	MgxWaitFifo(pMgx, 4);
188898d24fdSmacallan	MgxWrite4(pMgx, ATR_DEC, dec);
189898d24fdSmacallan	MgxWrite4(pMgx, ATR_SRC_XY, (ys << 16) | xs);
190898d24fdSmacallan	MgxWrite4(pMgx, ATR_DST_XY, (yd << 16) | xd);
191898d24fdSmacallan	MgxWrite4(pMgx, ATR_WH, (he << 16) | wi);
192898d24fdSmacallan
193898d24fdSmacallan	exaMarkSync(pDstPixmap->drawable.pScreen);
194898d24fdSmacallan	LEAVE;
195898d24fdSmacallan}
196898d24fdSmacallan
197898d24fdSmacallanstatic void
198898d24fdSmacallanMgxDoneCopy(PixmapPtr pDstPixmap)
199898d24fdSmacallan{
200898d24fdSmacallan    ENTER;
201898d24fdSmacallan    LEAVE;
202898d24fdSmacallan}
203898d24fdSmacallan
204898d24fdSmacallanstatic Bool
205898d24fdSmacallanMgxPrepareSolid(
206898d24fdSmacallan    PixmapPtr pPixmap,
207898d24fdSmacallan    int alu,
208898d24fdSmacallan    Pixel planemask,
209898d24fdSmacallan    Pixel fg)
210898d24fdSmacallan{
211898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
212898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
213898d24fdSmacallan	uint32_t dec;
214898d24fdSmacallan
215898d24fdSmacallan	ENTER;
216898d24fdSmacallan	dec = pMgx->dec;
217898d24fdSmacallan	dec |= (DEC_COMMAND_RECT << DEC_COMMAND_SHIFT) |
218898d24fdSmacallan	       (DEC_START_DIMX << DEC_START_SHIFT);
219898d24fdSmacallan	MgxWaitFifo(pMgx, 3);
220898d24fdSmacallan	MgxWrite1(pMgx, ATR_ROP, apmROP[alu]);
221898d24fdSmacallan	MgxWrite4(pMgx, ATR_FG, /*bswap32*/(fg));
222898d24fdSmacallan	MgxWrite4(pMgx, ATR_DEC, dec);
223898d24fdSmacallan	LEAVE;
224898d24fdSmacallan	return TRUE;
225898d24fdSmacallan}
226898d24fdSmacallan
227898d24fdSmacallanstatic void
228898d24fdSmacallanMgxSolid(
229898d24fdSmacallan    PixmapPtr pPixmap,
230898d24fdSmacallan    int x1,
231898d24fdSmacallan    int y1,
232898d24fdSmacallan    int x2,
233898d24fdSmacallan    int y2)
234898d24fdSmacallan{
235898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
236898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
237898d24fdSmacallan	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
238898d24fdSmacallan	int pitch = exaGetPixmapPitch(pPixmap);
239898d24fdSmacallan	int offset = exaGetPixmapOffset(pPixmap);
240898d24fdSmacallan
241898d24fdSmacallan	ENTER;
242898d24fdSmacallan	if (offset > 0) {
243898d24fdSmacallan		DBGMSG(X_ERROR, "%s %d %d\n", __func__, offset, pitch);
244898d24fdSmacallan		y1 += offset / pitch;
245898d24fdSmacallan	}
246898d24fdSmacallan	DBGMSG(X_ERROR, "%s %d %d %d %d\n", __func__, x1, y1, w, h);
247898d24fdSmacallan
248898d24fdSmacallan	MgxWaitFifo(pMgx, 2);
249898d24fdSmacallan	MgxWrite4(pMgx, ATR_DST_XY, (y1 << 16) | x1);
250898d24fdSmacallan	MgxWrite4(pMgx, ATR_WH, (h << 16) | w);
251898d24fdSmacallan	exaMarkSync(pPixmap->drawable.pScreen);
252898d24fdSmacallan	LEAVE;
253898d24fdSmacallan}
254898d24fdSmacallan
255898d24fdSmacallan/*
256898d24fdSmacallan * Memcpy-based UTS.
257898d24fdSmacallan */
258898d24fdSmacallanstatic Bool
259898d24fdSmacallanMgxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
260898d24fdSmacallan    char *srcc, int src_pitch)
261898d24fdSmacallan{
262898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
263898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
264898d24fdSmacallan	uint32_t *dst = (uint32_t *)(pMgx->fb + exaGetPixmapOffset(pDst));
265898d24fdSmacallan	uint32_t *src = (uint32_t *)srcc;
266898d24fdSmacallan	int i, dst_pitch = exaGetPixmapPitch(pDst) >> 2;
267898d24fdSmacallan
268898d24fdSmacallan	ENTER;
269898d24fdSmacallan	dst += x + (y * dst_pitch);
270898d24fdSmacallan
271898d24fdSmacallan	MgxWait(pMgx);
272898d24fdSmacallan
273898d24fdSmacallan	while (h--) {
274898d24fdSmacallan		for (i = 0; i < w; i++) dst[i] = /*bswap32*/(src[i]);
275898d24fdSmacallan		src += src_pitch >> 2;
276898d24fdSmacallan		dst += dst_pitch;
277898d24fdSmacallan	LEAVE;
278898d24fdSmacallan	}
279898d24fdSmacallan	return TRUE;
280898d24fdSmacallan}
281898d24fdSmacallan
282898d24fdSmacallan/*
283898d24fdSmacallan * Memcpy-based DFS.
284898d24fdSmacallan */
285898d24fdSmacallanstatic Bool
286898d24fdSmacallanMgxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
287898d24fdSmacallan    char *dstt, int dst_pitch)
288898d24fdSmacallan{
289898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
290898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
291898d24fdSmacallan	uint32_t *src = (uint32_t *)(pMgx->fb + exaGetPixmapOffset(pSrc));
292898d24fdSmacallan	uint32_t *dst = (uint32_t *)dstt;
293898d24fdSmacallan	int i, src_pitch = exaGetPixmapPitch(pSrc) >> 2;
294898d24fdSmacallan
295898d24fdSmacallan	ENTER;
296898d24fdSmacallan	src += x + (y * src_pitch);
297898d24fdSmacallan
298898d24fdSmacallan	MgxWait(pMgx);
299898d24fdSmacallan
300898d24fdSmacallan	while (h--) {
301898d24fdSmacallan		for (i = 0; i < w; i++) dst[i] = /*bswap32*/(src[i]);
302898d24fdSmacallan		src += src_pitch;
303898d24fdSmacallan		dst += dst_pitch >> 2;
304898d24fdSmacallan	}
305898d24fdSmacallan	LEAVE;
306898d24fdSmacallan	return TRUE;
307898d24fdSmacallan}
308898d24fdSmacallan
309898d24fdSmacallanBool
310898d24fdSmacallanMgxInitAccel(ScreenPtr pScreen)
311898d24fdSmacallan{
312898d24fdSmacallan	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
313898d24fdSmacallan	MgxPtr pMgx = GET_MGX_FROM_SCRN(pScrn);
314898d24fdSmacallan	ExaDriverPtr pExa;
315898d24fdSmacallan	int lines;
316898d24fdSmacallan	uint32_t ap;
317898d24fdSmacallan	uint8_t reg;
318898d24fdSmacallan
319898d24fdSmacallan	pExa = exaDriverAlloc();
320898d24fdSmacallan	if (!pExa)
321898d24fdSmacallan		return FALSE;
322898d24fdSmacallan
323898d24fdSmacallan	pMgx->pExa = pExa;
324898d24fdSmacallan
325898d24fdSmacallan	pExa->exa_major = EXA_VERSION_MAJOR;
326898d24fdSmacallan	pExa->exa_minor = EXA_VERSION_MINOR;
327898d24fdSmacallan
328898d24fdSmacallan	pExa->memoryBase = pMgx->fb;
329898d24fdSmacallan	lines = (pMgx->vramsize - 0x1000) / (pScrn->displayWidth * 4);
330898d24fdSmacallan	DBGMSG(X_ERROR, "lines %d\n", lines);
331898d24fdSmacallan	pExa->memorySize = lines * pScrn->displayWidth * 4;
332898d24fdSmacallan	pExa->offScreenBase = pScrn->displayWidth * pScrn->virtualY * 4;
333898d24fdSmacallan	pExa->pixmapOffsetAlign = pScrn->displayWidth * 4;
334898d24fdSmacallan	pExa->pixmapPitchAlign = pScrn->displayWidth * 4;
335898d24fdSmacallan
336898d24fdSmacallan	pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_MIXED_PIXMAPS;
337898d24fdSmacallan
338898d24fdSmacallan	pExa->maxX = 2048;
339898d24fdSmacallan	pExa->maxY = 2048;
340898d24fdSmacallan
341898d24fdSmacallan	pExa->WaitMarker = MgxWaitMarker;
342898d24fdSmacallan	pExa->PrepareSolid = MgxPrepareSolid;
343898d24fdSmacallan	pExa->Solid = MgxSolid;
344898d24fdSmacallan	pExa->DoneSolid = MgxDoneCopy;
345898d24fdSmacallan	pExa->PrepareCopy = MgxPrepareCopy;
346898d24fdSmacallan	pExa->Copy = MgxCopy;
347898d24fdSmacallan	pExa->DoneCopy = MgxDoneCopy;
348898d24fdSmacallan
349898d24fdSmacallan	MgxWait(pMgx);
350898d24fdSmacallan
351898d24fdSmacallan	/* XXX support other colour depths */
352898d24fdSmacallan	reg = MgxRead1(pMgx, ATR_PIXEL);
353898d24fdSmacallan	DBGMSG(X_ERROR, "pixel %x\n", reg);
354898d24fdSmacallan	reg &= ~PIXEL_DEPTH_MASK;
355898d24fdSmacallan	reg |= PIXEL_32;
356898d24fdSmacallan	MgxWrite1(pMgx, ATR_PIXEL, reg);
357898d24fdSmacallan	pMgx->dec = DEC_DEPTH_32 << DEC_DEPTH_SHIFT;
358898d24fdSmacallan
359898d24fdSmacallan	ap = MgxRead4(pMgx, ATR_APERTURE);
360898d24fdSmacallan	MgxWrite2(pMgx, ATR_APERTURE, 0xffff);
361898d24fdSmacallan	ap = MgxRead4(pMgx, ATR_APERTURE);
362898d24fdSmacallan
363898d24fdSmacallan	switch (pScrn->displayWidth) {
364898d24fdSmacallan		case 640:
365898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_640 << DEC_WIDTH_SHIFT;
366898d24fdSmacallan			break;
367898d24fdSmacallan		case 800:
368898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_800 << DEC_WIDTH_SHIFT;
369898d24fdSmacallan			break;
370898d24fdSmacallan		case 1024:
371898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_1024 << DEC_WIDTH_SHIFT;
372898d24fdSmacallan			break;
373898d24fdSmacallan		case 1152:
374898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_1152 << DEC_WIDTH_SHIFT;
375898d24fdSmacallan			break;
376898d24fdSmacallan		case 1280:
377898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_1280 << DEC_WIDTH_SHIFT;
378898d24fdSmacallan			break;
379898d24fdSmacallan		case 1600:
380898d24fdSmacallan			pMgx->dec |= DEC_WIDTH_1600 << DEC_WIDTH_SHIFT;
381898d24fdSmacallan			break;
382898d24fdSmacallan		default:
383898d24fdSmacallan			return FALSE; /* not supported */
384898d24fdSmacallan	}
385898d24fdSmacallan
386898d24fdSmacallan	/*
387898d24fdSmacallan	 * the fb is endian-twiddled and we don't know how to turn it off,
388898d24fdSmacallan	 * so we convert data when copying stuff in and out
389898d24fdSmacallan	 */
390898d24fdSmacallan	pExa->UploadToScreen = MgxUploadToScreen;
391898d24fdSmacallan	pExa->DownloadFromScreen = MgxDownloadFromScreen;
392898d24fdSmacallan	return exaDriverInit(pScreen, pExa);
393898d24fdSmacallan}
394