viaLowLevel.c revision 90b17f1b
11.3Schristos/*****************************************************************************
21.1Smanu * VIA Unichrome XvMC extension client lib.
31.1Smanu *
41.1Smanu * Copyright (c) 2004 Thomas Hellstr�m. All rights reserved.
51.1Smanu * Copyright (c) 2003 Andreas Robinson. All rights reserved.
61.1Smanu *
71.1Smanu * Permission is hereby granted, free of charge, to any person obtaining a
81.1Smanu * copy of this software and associated documentation files (the "Software"),
91.1Smanu * to deal in the Software without restriction, including without limitation
101.1Smanu * the rights to use, copy, modify, merge, publish, distribute, sublicense,
111.1Smanu * and/or sell copies of the Software, and to permit persons to whom the
121.1Smanu * Software is furnished to do so, subject to the following conditions:
131.1Smanu *
141.1Smanu * The above copyright notice and this permission notice shall be included in
151.1Smanu * all copies or substantial portions of the Software.
161.1Smanu *
171.1Smanu * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
181.1Smanu * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
191.1Smanu * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
201.1Smanu * AUTHOR(S) OR COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
211.1Smanu * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
221.1Smanu * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
231.1Smanu * DEALINGS IN THE SOFTWARE.
241.1Smanu */
251.1Smanu
261.1Smanu/*
271.1Smanu * Low-level functions that deal directly with the hardware. In the future,
281.1Smanu * these functions might be implemented in a kernel module. Also, some of them
291.1Smanu * would benefit from DMA.
301.1Smanu *
311.1Smanu * Authors: Andreas Robinson 2003. Thomas Hellstr�m 2004.
321.3Schristos */
331.1Smanu
341.3Schristos#include "viaXvMCPriv.h"
351.3Schristos#include "viaLowLevel.h"
361.1Smanu#include <time.h>
371.1Smanu#include <sys/time.h>
381.1Smanu#include <stdio.h>
391.1Smanu
401.1Smanutypedef struct
411.1Smanu{
421.1Smanu    CARD32 agp_buffer[LL_AGP_CMDBUF_SIZE];
431.1Smanu    CARD32 pci_buffer[LL_PCI_CMDBUF_SIZE];
441.1Smanu    unsigned agp_pos;
451.1Smanu    unsigned pci_pos;
461.1Smanu    unsigned flip_pos;
471.1Smanu    int use_agp;
481.1Smanu    int agp_mode;
491.1Smanu    int agp_header_start;
501.1Smanu    int agp_index;
511.1Smanu    int fd;
521.2Sjmmv    drm_context_t *drmcontext;
531.1Smanu    drmLockPtr hwLock;
541.1Smanu    drmAddress mmioAddress;
551.1Smanu    drmAddress fbAddress;
561.1Smanu    unsigned fbStride;
571.1Smanu    unsigned fbDepth;
581.1Smanu    unsigned width;
591.1Smanu    unsigned height;
601.1Smanu    unsigned curWaitFlags;
611.1Smanu    int performLocking;
621.1Smanu    unsigned errors;
631.1Smanu    drm_via_mem_t tsMem;
641.1Smanu    CARD32 tsOffset;
651.1Smanu    volatile CARD32 *tsP;
661.1Smanu    CARD32 curTimeStamp;
671.1Smanu    CARD32 lastReadTimeStamp;
681.1Smanu    int agpSync;
691.1Smanu    CARD32 agpSyncTimeStamp;
701.1Smanu    unsigned chipId;
711.1Smanu} XvMCLowLevel;
721.1Smanu
731.1Smanu/*
741.1Smanu * For Other architectures than i386 these might have to be modified for
751.2Sjmmv * bigendian etc.
761.1Smanu */
771.1Smanu
781.1Smanu#define MPEGIN(xl,reg)							\
791.1Smanu    *((volatile CARD32 *)(((CARD8 *)(xl)->mmioAddress) + 0xc00 + (reg)))
801.1Smanu
811.1Smanu#define VIDIN(ctx,reg)							\
821.1Smanu    *((volatile CARD32 *)(((CARD8 *)(ctx)->mmioAddress) + (reg)))
831.1Smanu
841.1Smanu#define REGIN(ctx,reg)							\
851.1Smanu    *((volatile CARD32 *)(((CARD8 *)(ctx)->mmioAddress) + 0x0000 + (reg)))
861.1Smanu
871.1Smanu#define HQV_CONTROL             0x3D0
881.1Smanu#define HQV_SRC_STARTADDR_Y     0x3D4
891.1Smanu#define HQV_SRC_STARTADDR_U     0x3D8
901.1Smanu#define HQV_SRC_STARTADDR_V     0x3DC
911.1Smanu#define HQV_MINIFY_DEBLOCK      0x3E8
921.1Smanu
931.1Smanu#define HQV_SW_FLIP         0x00000010
941.2Sjmmv#define HQV_FLIP_STATUS     0x00000001
951.1Smanu#define HQV_SUBPIC_FLIP     0x00008000
961.1Smanu#define HQV_FLIP_ODD        0x00000020
971.1Smanu#define HQV_DEINTERLACE     0x00010000
981.1Smanu#define HQV_FIELD_2_FRAME   0x00020000
991.1Smanu#define HQV_FRAME_2_FIELD   0x00040000
1001.1Smanu#define HQV_FIELD_UV        0x00100000
1011.1Smanu#define HQV_DEBLOCK_HOR     0x00008000
1021.1Smanu#define HQV_DEBLOCK_VER     0x80000000
1031.1Smanu
1041.1Smanu#define V_COMPOSE_MODE          0x298
1051.1Smanu#define V1_COMMAND_FIRE         0x80000000
1061.1Smanu#define V3_COMMAND_FIRE         0x40000000
1071.1Smanu
1081.2Sjmmv/* SUBPICTURE Registers */
1091.1Smanu#define SUBP_CONTROL_STRIDE     0x3C0
1101.1Smanu#define SUBP_STARTADDR          0x3C4
1111.1Smanu#define RAM_TABLE_CONTROL       0x3C8
1121.1Smanu#define RAM_TABLE_READ          0x3CC
1131.1Smanu
1141.1Smanu/* SUBP_CONTROL_STRIDE              0x3c0 */
1151.1Smanu#define SUBP_HQV_ENABLE             0x00010000
1161.1Smanu#define SUBP_IA44                   0x00020000
1171.1Smanu#define SUBP_AI44                   0x00000000
1181.1Smanu#define SUBP_STRIDE_MASK            0x00001fff
1191.1Smanu#define SUBP_CONTROL_MASK           0x00070000
1201.1Smanu
1211.1Smanu/* RAM_TABLE_CONTROL                0x3c8 */
1221.1Smanu#define RAM_TABLE_RGB_ENABLE        0x00000007
1231.1Smanu
1241.2Sjmmv#define VIA_REG_STATUS          0x400
1251.1Smanu#define VIA_REG_GEMODE          0x004
1261.1Smanu#define VIA_REG_SRCBASE         0x030
1271.1Smanu#define VIA_REG_DSTBASE         0x034
1281.1Smanu#define VIA_REG_PITCH           0x038
1291.1Smanu#define VIA_REG_SRCCOLORKEY     0x01C
1301.1Smanu#define VIA_REG_KEYCONTROL      0x02C
1311.1Smanu#define VIA_REG_SRCPOS          0x008
1321.1Smanu#define VIA_REG_DSTPOS          0x00C
1331.1Smanu#define VIA_REG_GECMD           0x000
1341.1Smanu#define VIA_REG_DIMENSION       0x010  /* width and height */
1351.1Smanu#define VIA_REG_FGCOLOR         0x018
1361.1Smanu
1371.1Smanu#define VIA_VR_QUEUE_BUSY       0x00020000	/* Virtual Queue is busy */
1381.1Smanu#define VIA_CMD_RGTR_BUSY       0x00000080	/* Command Regulator is busy */
1391.1Smanu#define VIA_2D_ENG_BUSY         0x00000002	/* 2D Engine is busy */
1401.1Smanu#define VIA_3D_ENG_BUSY         0x00000001	/* 3D Engine is busy */
1411.1Smanu#define VIA_GEM_8bpp            0x00000000
1421.1Smanu#define VIA_GEM_16bpp           0x00000100
1431.1Smanu#define VIA_GEM_32bpp           0x00000300
1441.1Smanu#define VIA_GEC_BLT             0x00000001
1451.2Sjmmv#define VIA_PITCH_ENABLE        0x80000000
1461.1Smanu#define VIA_GEC_INCX            0x00000000
1471.1Smanu#define VIA_GEC_DECY            0x00004000
1481.1Smanu#define VIA_GEC_INCY            0x00000000
1491.1Smanu#define VIA_GEC_DECX            0x00008000
1501.1Smanu#define VIA_GEC_FIXCOLOR_PAT    0x00002000
1511.1Smanu
1521.1Smanu#define VIA_BLIT_CLEAR 0x00
1531.1Smanu#define VIA_BLIT_COPY 0xCC
1541.1Smanu#define VIA_BLIT_FILL 0xF0
1551.1Smanu#define VIA_BLIT_SET 0xFF
1561.1Smanu
1571.1Smanu#define VIA_SYNCWAITTIMEOUT 50000      /* Might be a bit conservative */
1581.1Smanu#define VIA_DMAWAITTIMEOUT 150000
1591.1Smanu#define VIA_VIDWAITTIMEOUT 50000
1601.1Smanu#define VIA_XVMC_DECODERTIMEOUT 50000  /*(microseconds) */
1611.1Smanu
1621.2Sjmmv#define H1_ADDR(val) (((val) >> 2) | 0xF0000000)
1631.1Smanu#define WAITFLAGS(xl, flags)			\
1641.1Smanu    (xl)->curWaitFlags |= (flags)
1651.1Smanu#define BEGIN_RING_AGP(xl,size)						\
1661.1Smanu    do {								\
1671.1Smanu	if ((xl)->agp_pos > (LL_AGP_CMDBUF_SIZE-(size))) {		\
1681.1Smanu	    agpFlush(xl);						\
1691.1Smanu	}								\
1701.1Smanu    } while(0)
1711.1Smanu#define OUT_RING_AGP(xl, val)			\
1721.1Smanu    (xl)->agp_buffer[(xl)->agp_pos++] = (val)
1731.1Smanu#define OUT_RING_QW_AGP(xl, val1, val2)			\
1741.1Smanu    do {						\
1751.1Smanu	(xl)->agp_buffer[(xl)->agp_pos++] = (val1);	\
1761.1Smanu	(xl)->agp_buffer[(xl)->agp_pos++] = (val2);	\
1771.1Smanu    } while (0)
1781.1Smanu
1791.1Smanu#define LL_HW_LOCK(xl)							\
1801.1Smanu    do {								\
1811.1Smanu	DRM_LOCK((xl)->fd,(xl)->hwLock,*(xl)->drmcontext,0);		\
1821.1Smanu    } while(0);
1831.1Smanu#define LL_HW_UNLOCK(xl)					\
1841.1Smanu    do {							\
1851.1Smanu	DRM_UNLOCK((xl)->fd,(xl)->hwLock,*(xl)->drmcontext);	\
1861.1Smanu    } while(0);
1871.1Smanu
1881.1Smanu/*
1891.1Smanu * We want to have two concurrent types of thread taking the hardware
1901.1Smanu * lock simulataneously. One is the video out thread that needs immediate
1911.1Smanu * access to flip an image. The other is everything else which may have
1921.1Smanu * the lock for quite some time. This is only so the video out thread can
1931.1Smanu * sneak in and display an image while other resources are busy.
1941.1Smanu */
1951.1Smanu
1961.1Smanuvoid
1971.1SmanuhwlLock(void *xlp, int videoLock)
198{
199    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
200
201    LL_HW_LOCK(xl);
202}
203
204void
205hwlUnlock(void *xlp, int videoLock)
206{
207    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
208
209    LL_HW_UNLOCK(xl);
210}
211
212static unsigned
213timeDiff(struct timeval *now, struct timeval *then)
214{
215    return (now->tv_usec >= then->tv_usec) ?
216	now->tv_usec - then->tv_usec :
217	1000000 - (then->tv_usec - now->tv_usec);
218}
219
220void
221setAGPSyncLowLevel(void *xlp, int val, CARD32 timeStamp)
222{
223    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
224
225    xl->agpSync = val;
226    xl->agpSyncTimeStamp = timeStamp;
227}
228
229CARD32
230viaDMATimeStampLowLevel(void *xlp)
231{
232    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
233
234    if (xl->use_agp) {
235	viaBlit(xl, 32, xl->tsOffset, 1, xl->tsOffset, 1, 1, 1, 0, 0,
236	    VIABLIT_FILL, xl->curTimeStamp);
237	return xl->curTimeStamp++;
238    }
239    return 0;
240}
241
242static void
243viaDMAWaitTimeStamp(XvMCLowLevel * xl, CARD32 timeStamp, int doSleep)
244{
245    struct timeval now, then;
246    struct timezone here;
247    struct timespec sleep, rem;
248
249    if (xl->use_agp && (timeStamp > xl->lastReadTimeStamp)) {
250	sleep.tv_nsec = 1;
251	sleep.tv_sec = 0;
252	here.tz_minuteswest = 0;
253	here.tz_dsttime = 0;
254	gettimeofday(&then, &here);
255
256	while (timeStamp > (xl->lastReadTimeStamp = *xl->tsP)) {
257	    gettimeofday(&now, &here);
258	    if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
259		if ((timeStamp > (xl->lastReadTimeStamp = *xl->tsP))) {
260		    xl->errors |= LL_DMA_TIMEDOUT;
261		    break;
262		}
263	    }
264	    if (doSleep)
265		nanosleep(&sleep, &rem);
266	}
267    }
268}
269
270static int
271viaDMAInitTimeStamp(XvMCLowLevel * xl)
272{
273    int ret = 0;
274
275    if (xl->use_agp) {
276	xl->tsMem.context = *(xl->drmcontext);
277	xl->tsMem.size = 64;
278	xl->tsMem.type = VIA_MEM_VIDEO;
279	if ((ret = drmCommandWriteRead(xl->fd, DRM_VIA_ALLOCMEM, &xl->tsMem,
280		sizeof(xl->tsMem))) < 0)
281	    return ret;
282	if (xl->tsMem.size != 64)
283	    return -1;
284	xl->tsOffset = (xl->tsMem.offset + 31) & ~31;
285	xl->tsP = (CARD32 *) xl->fbAddress + (xl->tsOffset >> 2);
286	xl->curTimeStamp = 1;
287	*xl->tsP = 0;
288    }
289    return 0;
290}
291
292static int
293viaDMACleanupTimeStamp(XvMCLowLevel * xl)
294{
295
296    if (!(xl->tsMem.size) || !xl->use_agp)
297	return 0;
298    return drmCommandWrite(xl->fd, DRM_VIA_FREEMEM, &xl->tsMem,
299	sizeof(xl->tsMem));
300}
301
302static CARD32
303viaMpegGetStatus(XvMCLowLevel * xl)
304{
305    return MPEGIN(xl, 0x54);
306}
307
308static int
309viaMpegIsBusy(XvMCLowLevel * xl, CARD32 mask, CARD32 idle)
310{
311    CARD32 tmp = viaMpegGetStatus(xl);
312
313    /*
314     * Error detected.
315     * FIXME: Are errors really shown when error concealment is on?
316     */
317
318    if (tmp & 0x70)
319	return 0;
320
321    return (tmp & mask) != idle;
322}
323
324static void
325syncDMA(XvMCLowLevel * xl, unsigned int doSleep)
326{
327
328    /*
329     * Ideally, we'd like to have an interrupt wait here, but, according to second hand
330     * information, the hardware does not support this, although earlier S3 chips do that.
331     * It is therefore not implemented into the DRM, and we'll do a user space wait here.
332     */
333
334    struct timeval now, then;
335    struct timezone here;
336    struct timespec sleep, rem;
337
338    sleep.tv_nsec = 1;
339    sleep.tv_sec = 0;
340    here.tz_minuteswest = 0;
341    here.tz_dsttime = 0;
342    gettimeofday(&then, &here);
343    while (!(REGIN(xl, VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY)) {
344	gettimeofday(&now, &here);
345	if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
346	    if (!(REGIN(xl, VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY)) {
347		xl->errors |= LL_DMA_TIMEDOUT;
348		break;
349	    }
350	}
351	if (doSleep)
352	    nanosleep(&sleep, &rem);
353    }
354    while (REGIN(xl, VIA_REG_STATUS) & VIA_CMD_RGTR_BUSY) {
355	gettimeofday(&now, &here);
356	if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
357	    if (REGIN(xl, VIA_REG_STATUS) & VIA_CMD_RGTR_BUSY) {
358		xl->errors |= LL_DMA_TIMEDOUT;
359		break;
360	    }
361	}
362	if (doSleep)
363	    nanosleep(&sleep, &rem);
364    }
365}
366
367static void
368syncVideo(XvMCLowLevel * xl, unsigned int doSleep)
369{
370    /*
371     * Wait for HQV completion. Nothing strange here. We assume that the HQV
372     * Handles syncing to the V1 / V3 engines by itself. It should be safe to
373     * always wait for SUBPIC_FLIP completion although subpictures are not
374     * always used.
375     */
376
377    struct timeval now, then;
378    struct timezone here;
379    struct timespec sleep, rem;
380
381    sleep.tv_nsec = 1;
382    sleep.tv_sec = 0;
383    here.tz_minuteswest = 0;
384    here.tz_dsttime = 0;
385    gettimeofday(&then, &here);
386    while (VIDIN(xl, HQV_CONTROL) & (HQV_SW_FLIP | HQV_SUBPIC_FLIP)) {
387	gettimeofday(&now, &here);
388	if (timeDiff(&now, &then) > VIA_SYNCWAITTIMEOUT) {
389	    if (VIDIN(xl, HQV_CONTROL) & (HQV_SW_FLIP | HQV_SUBPIC_FLIP)) {
390		xl->errors |= LL_VIDEO_TIMEDOUT;
391		break;
392	    }
393	}
394	if (doSleep)
395	    nanosleep(&sleep, &rem);
396    }
397}
398
399static void
400syncAccel(XvMCLowLevel * xl, unsigned int mode, unsigned int doSleep)
401{
402    struct timeval now, then;
403    struct timezone here;
404    struct timespec sleep, rem;
405    CARD32 mask = ((mode & LL_MODE_2D) ? VIA_2D_ENG_BUSY : 0) |
406	((mode & LL_MODE_3D) ? VIA_3D_ENG_BUSY : 0);
407
408    sleep.tv_nsec = 1;
409    sleep.tv_sec = 0;
410    here.tz_minuteswest = 0;
411    here.tz_dsttime = 0;
412    gettimeofday(&then, &here);
413    while (REGIN(xl, VIA_REG_STATUS) & mask) {
414	gettimeofday(&now, &here);
415	if (timeDiff(&now, &then) > VIA_SYNCWAITTIMEOUT) {
416	    if (REGIN(xl, VIA_REG_STATUS) & mask) {
417		xl->errors |= LL_ACCEL_TIMEDOUT;
418		break;
419	    }
420	}
421	if (doSleep)
422	    nanosleep(&sleep, &rem);
423    }
424}
425
426static void
427syncMpeg(XvMCLowLevel * xl, unsigned int mode, unsigned int doSleep)
428{
429    /*
430     * Ideally, we'd like to have an interrupt wait here, but from information from VIA
431     * at least the MPEG completion interrupt is broken on the CLE266, which was
432     * discovered during validation of the chip.
433     */
434
435    struct timeval now, then;
436    struct timezone here;
437    struct timespec sleep, rem;
438    CARD32 busyMask = 0;
439    CARD32 idleVal = 0;
440    CARD32 ret;
441
442    sleep.tv_nsec = 1;
443    sleep.tv_sec = 0;
444    here.tz_minuteswest = 0;
445    here.tz_dsttime = 0;
446    gettimeofday(&then, &here);
447    if (mode & LL_MODE_DECODER_SLICE) {
448	busyMask = VIA_SLICEBUSYMASK;
449	idleVal = VIA_SLICEIDLEVAL;
450    }
451    if (mode & LL_MODE_DECODER_IDLE) {
452	busyMask |= VIA_BUSYMASK;
453	idleVal = VIA_IDLEVAL;
454    }
455    while (viaMpegIsBusy(xl, busyMask, idleVal)) {
456	gettimeofday(&now, &here);
457	if (timeDiff(&now, &then) > VIA_XVMC_DECODERTIMEOUT) {
458	    if (viaMpegIsBusy(xl, busyMask, idleVal)) {
459		xl->errors |= LL_DECODER_TIMEDOUT;
460	    }
461	    break;
462	}
463	if (doSleep)
464	    nanosleep(&sleep, &rem);
465    }
466
467    ret = viaMpegGetStatus(xl);
468    if (ret & 0x70) {
469	xl->errors |= ((ret & 0x70) >> 3);
470    }
471    return;
472}
473
474static void
475pciFlush(XvMCLowLevel * xl)
476{
477    int ret;
478    drm_via_cmdbuffer_t b;
479    unsigned mode = xl->curWaitFlags;
480
481    b.buf = (char *)xl->pci_buffer;
482    b.size = xl->pci_pos * sizeof(CARD32);
483    if (xl->performLocking)
484	hwlLock(xl, 0);
485    if ((mode != LL_MODE_VIDEO) && (mode != 0))
486	syncDMA(xl, 0);
487    if ((mode & LL_MODE_2D) || (mode & LL_MODE_3D))
488	syncAccel(xl, mode, 0);
489    if (mode & LL_MODE_VIDEO)
490	syncVideo(xl, 0);
491    if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE))
492	syncMpeg(xl, mode, 0);
493    ret = drmCommandWrite(xl->fd, DRM_VIA_PCICMD, &b, sizeof(b));
494    if (xl->performLocking)
495	hwlUnlock(xl, 0);
496    if (ret) {
497	xl->errors |= LL_PCI_COMMAND_ERR;
498    }
499    xl->pci_pos = 0;
500    xl->curWaitFlags = 0;
501}
502
503static void
504agpFlush(XvMCLowLevel * xl)
505{
506    drm_via_cmdbuffer_t b;
507    int ret;
508
509    if (xl->use_agp) {
510	b.buf = (char *)xl->agp_buffer;
511	b.size = xl->agp_pos * sizeof(CARD32);
512	if (xl->agpSync) {
513	    syncXvMCLowLevel(xl, LL_MODE_DECODER_IDLE, 1,
514		xl->agpSyncTimeStamp);
515	    xl->agpSync = 0;
516	}
517	if (xl->performLocking)
518	    hwlLock(xl, 0);
519	do {
520	    ret = drmCommandWrite(xl->fd, DRM_VIA_CMDBUFFER, &b, sizeof(b));
521	} while (-EAGAIN == ret);
522	if (xl->performLocking)
523	    hwlUnlock(xl, 0);
524
525	if (ret) {
526	    xl->errors |= LL_AGP_COMMAND_ERR;
527	} else {
528	    xl->agp_pos = 0;
529	}
530	xl->curWaitFlags &= LL_MODE_VIDEO;
531    } else {
532	unsigned mode = xl->curWaitFlags;
533
534	b.buf = (char *)xl->agp_buffer;
535	b.size = xl->agp_pos * sizeof(CARD32);
536	if (xl->performLocking)
537	    hwlLock(xl, 0);
538	if ((mode != LL_MODE_VIDEO) && (mode != 0))
539	    syncDMA(xl, 0);
540	if ((mode & LL_MODE_2D) || (mode & LL_MODE_3D))
541	    syncAccel(xl, mode, 0);
542	if (mode & LL_MODE_VIDEO)
543	    syncVideo(xl, 0);
544	if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE))
545	    syncMpeg(xl, mode, 0);
546	ret = drmCommandWrite(xl->fd, DRM_VIA_PCICMD, &b, sizeof(b));
547	if (xl->performLocking)
548	    hwlUnlock(xl, 0);
549	if (ret) {
550	    xl->errors |= LL_PCI_COMMAND_ERR;
551	}
552	xl->agp_pos = 0;
553	xl->curWaitFlags = 0;
554    }
555}
556
557unsigned
558flushXvMCLowLevel(void *xlp)
559{
560    unsigned errors;
561    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
562
563    if (xl->pci_pos)
564	pciFlush(xl);
565    if (xl->agp_pos)
566	agpFlush(xl);
567    errors = xl->errors;
568    xl->errors = 0;
569    return errors;
570}
571
572void
573flushPCIXvMCLowLevel(void *xlp)
574{
575    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
576
577    if (xl->pci_pos)
578	pciFlush(xl);
579    if (!xl->use_agp && xl->agp_pos)
580	agpFlush(xl);
581}
582
583__inline static void
584pciCommand(XvMCLowLevel * xl, unsigned offset, unsigned value, unsigned flags)
585{
586    if (xl->pci_pos > (LL_PCI_CMDBUF_SIZE - 2))
587	pciFlush(xl);
588    if (flags)
589	xl->curWaitFlags |= flags;
590    xl->pci_buffer[xl->pci_pos++] = (offset >> 2) | 0xF0000000;
591    xl->pci_buffer[xl->pci_pos++] = value;
592}
593
594void
595viaMpegSetSurfaceStride(void *xlp, ViaXvMCContext * ctx)
596{
597    CARD32 y_stride = ctx->yStride;
598    CARD32 uv_stride = y_stride >> 1;
599    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
600
601    BEGIN_RING_AGP(xl, 2);
602    OUT_RING_QW_AGP(xl, H1_ADDR(0xc50),
603	(y_stride >> 3) | ((uv_stride >> 3) << 16));
604    WAITFLAGS(xl, LL_MODE_DECODER_IDLE);
605}
606
607void
608viaVideoSetSWFLipLocked(void *xlp, unsigned yOffs, unsigned uOffs,
609    unsigned vOffs, unsigned yStride, unsigned uvStride)
610{
611    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
612
613    pciCommand(xl, HQV_SRC_STARTADDR_Y, yOffs, LL_MODE_VIDEO);
614    pciCommand(xl, HQV_SRC_STARTADDR_U, uOffs, 0);
615    pciCommand(xl, HQV_SRC_STARTADDR_V, vOffs, 0);
616}
617
618void
619viaVideoSWFlipLocked(void *xlp, unsigned flags, int progressiveSequence)
620{
621    CARD32 andWd, orWd;
622    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
623
624    andWd = 0;
625    orWd = 0;
626
627    if ((flags & XVMC_FRAME_PICTURE) == XVMC_BOTTOM_FIELD) {
628	andWd = 0xFFFFFFFFU;
629	orWd = HQV_FIELD_UV |
630	    HQV_DEINTERLACE |
631	    HQV_FIELD_2_FRAME |
632	    HQV_FRAME_2_FIELD |
633	    HQV_SW_FLIP | HQV_FLIP_ODD | HQV_FLIP_STATUS | HQV_SUBPIC_FLIP;
634    } else if ((flags & XVMC_FRAME_PICTURE) == XVMC_TOP_FIELD) {
635	andWd = ~HQV_FLIP_ODD;
636	orWd = HQV_FIELD_UV |
637	    HQV_DEINTERLACE |
638	    HQV_FIELD_2_FRAME |
639	    HQV_FRAME_2_FIELD |
640	    HQV_SW_FLIP | HQV_FLIP_STATUS | HQV_SUBPIC_FLIP;
641    } else if ((flags & XVMC_FRAME_PICTURE) == XVMC_FRAME_PICTURE) {
642	andWd = ~(HQV_DEINTERLACE |
643	    HQV_FRAME_2_FIELD | HQV_FIELD_2_FRAME | HQV_FIELD_UV);
644	orWd = HQV_SW_FLIP | HQV_FLIP_STATUS | HQV_SUBPIC_FLIP;
645    }
646    if (progressiveSequence) {
647	andWd &= ~HQV_FIELD_UV;
648	orWd &= ~HQV_FIELD_UV;
649    }
650
651    pciCommand(xl, HQV_CONTROL, (VIDIN(xl,
652		HQV_CONTROL) & andWd) | orWd, 0);
653}
654
655void
656viaMpegSetFB(void *xlp, unsigned i,
657    unsigned yOffs, unsigned uOffs, unsigned vOffs)
658{
659    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
660
661    i *= 12;
662    BEGIN_RING_AGP(xl, 6);
663    OUT_RING_QW_AGP(xl, H1_ADDR(0xc20 + i), yOffs >> 3);
664    OUT_RING_QW_AGP(xl, H1_ADDR(0xc24 + i), uOffs >> 3);
665    OUT_RING_QW_AGP(xl, H1_ADDR(0xc28 + i), vOffs >> 3);
666    WAITFLAGS(xl, LL_MODE_DECODER_IDLE);
667}
668
669void
670viaMpegBeginPicture(void *xlp, ViaXvMCContext * ctx,
671    unsigned width, unsigned height, const XvMCMpegControl * control)
672{
673
674    unsigned j, mb_width, mb_height;
675    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
676
677    mb_width = (width + 15) >> 4;
678
679    mb_height =
680	((control->mpeg_coding == XVMC_MPEG_2) &&
681	(control->flags & XVMC_PROGRESSIVE_SEQUENCE)) ?
682	2 * ((height + 31) >> 5) : (((height + 15) >> 4));
683
684    BEGIN_RING_AGP(xl, 144);
685    WAITFLAGS(xl, LL_MODE_DECODER_IDLE);
686
687    OUT_RING_QW_AGP(xl, H1_ADDR(0xc00),
688	((control->picture_structure & XVMC_FRAME_PICTURE) << 2) |
689	((control->picture_coding_type & 3) << 4) |
690	((control->flags & XVMC_ALTERNATE_SCAN) ? (1 << 6) : 0));
691
692    if (!(ctx->intraLoaded)) {
693	OUT_RING_QW_AGP(xl, H1_ADDR(0xc5c), 0);
694	for (j = 0; j < 64; j += 4) {
695	    OUT_RING_QW_AGP(xl, H1_ADDR(0xc60),
696		ctx->intra_quantiser_matrix[j] |
697		(ctx->intra_quantiser_matrix[j + 1] << 8) |
698		(ctx->intra_quantiser_matrix[j + 2] << 16) |
699		(ctx->intra_quantiser_matrix[j + 3] << 24));
700	}
701	ctx->intraLoaded = 1;
702    }
703
704    if (!(ctx->nonIntraLoaded)) {
705	OUT_RING_QW_AGP(xl, H1_ADDR(0xc5c), 1);
706	for (j = 0; j < 64; j += 4) {
707	    OUT_RING_QW_AGP(xl, H1_ADDR(0xc60),
708		ctx->non_intra_quantiser_matrix[j] |
709		(ctx->non_intra_quantiser_matrix[j + 1] << 8) |
710		(ctx->non_intra_quantiser_matrix[j + 2] << 16) |
711		(ctx->non_intra_quantiser_matrix[j + 3] << 24));
712	}
713	ctx->nonIntraLoaded = 1;
714    }
715
716    if (!(ctx->chromaIntraLoaded)) {
717	OUT_RING_QW_AGP(xl, H1_ADDR(0xc5c), 2);
718	for (j = 0; j < 64; j += 4) {
719	    OUT_RING_QW_AGP(xl, H1_ADDR(0xc60),
720		ctx->chroma_intra_quantiser_matrix[j] |
721		(ctx->chroma_intra_quantiser_matrix[j + 1] << 8) |
722		(ctx->chroma_intra_quantiser_matrix[j + 2] << 16) |
723		(ctx->chroma_intra_quantiser_matrix[j + 3] << 24));
724	}
725	ctx->chromaIntraLoaded = 1;
726    }
727
728    if (!(ctx->chromaNonIntraLoaded)) {
729	OUT_RING_QW_AGP(xl, H1_ADDR(0xc5c), 3);
730	for (j = 0; j < 64; j += 4) {
731	    OUT_RING_QW_AGP(xl, H1_ADDR(0xc60),
732		ctx->chroma_non_intra_quantiser_matrix[j] |
733		(ctx->chroma_non_intra_quantiser_matrix[j + 1] << 8) |
734		(ctx->chroma_non_intra_quantiser_matrix[j + 2] << 16) |
735		(ctx->chroma_non_intra_quantiser_matrix[j + 3] << 24));
736	}
737	ctx->chromaNonIntraLoaded = 1;
738    }
739
740    OUT_RING_QW_AGP(xl, H1_ADDR(0xc90),
741	((mb_width * mb_height) & 0x3fff) |
742	((control->flags & XVMC_PRED_DCT_FRAME) ? (1 << 14) : 0) |
743	((control->flags & XVMC_TOP_FIELD_FIRST) ? (1 << 15) : 0) |
744	((control->mpeg_coding == XVMC_MPEG_2) ? (1 << 16) : 0) |
745	((mb_width & 0xff) << 18));
746
747    OUT_RING_QW_AGP(xl, H1_ADDR(0xc94),
748	((control->flags & XVMC_CONCEALMENT_MOTION_VECTORS) ? 1 : 0) |
749	((control->flags & XVMC_Q_SCALE_TYPE) ? 2 : 0) |
750	((control->intra_dc_precision & 3) << 2) |
751	(((1 + 0x100000 / mb_width) & 0xfffff) << 4) |
752	((control->flags & XVMC_INTRA_VLC_FORMAT) ? (1 << 24) : 0));
753
754    OUT_RING_QW_AGP(xl, H1_ADDR(0xc98),
755	(((control->FHMV_range) & 0xf) << 0) |
756	(((control->FVMV_range) & 0xf) << 4) |
757	(((control->BHMV_range) & 0xf) << 8) |
758	(((control->BVMV_range) & 0xf) << 12) |
759	((control->flags & XVMC_SECOND_FIELD) ? (1 << 20) : 0) |
760	(0x0a6 << 16));
761
762}
763
764void
765viaMpegReset(void *xlp)
766{
767    int i, j;
768    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
769
770    BEGIN_RING_AGP(xl, 100);
771    WAITFLAGS(xl, LL_MODE_DECODER_IDLE);
772
773    for (i = 0; i < 14; i++)
774	OUT_RING_QW_AGP(xl, H1_ADDR(0xc08), 0);
775
776    OUT_RING_QW_AGP(xl, H1_ADDR(0xc98), 0x400000);
777
778    for (i = 0; i < 6; i++) {
779	OUT_RING_QW_AGP(xl, H1_ADDR(0xc0c), 0x43 | 0x20);
780	for (j = 0xc10; j < 0xc20; j += 4)
781	    OUT_RING_QW_AGP(xl, H1_ADDR(j), 0);
782    }
783
784    OUT_RING_QW_AGP(xl, H1_ADDR(0xc0c), 0xc3 | 0x20);
785    for (j = 0xc10; j < 0xc20; j += 4)
786	OUT_RING_QW_AGP(xl, H1_ADDR(j), 0);
787
788}
789
790void
791viaMpegWriteSlice(void *xlp, CARD8 * slice, int nBytes, CARD32 sCode)
792{
793    int i, n, r;
794    CARD32 *buf;
795    int count;
796    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
797
798    if (xl->errors & (LL_DECODER_TIMEDOUT |
799	    LL_IDCT_FIFO_ERROR | LL_SLICE_FIFO_ERROR | LL_SLICE_FAULT))
800	return;
801
802    n = nBytes >> 2;
803    if (sCode)
804	nBytes += 4;
805    r = nBytes & 3;
806    buf = (CARD32 *) slice;
807
808    if (r)
809	nBytes += 4 - r;
810
811    nBytes += 8;
812
813    BEGIN_RING_AGP(xl, 4);
814    WAITFLAGS(xl, LL_MODE_DECODER_IDLE);
815
816    OUT_RING_QW_AGP(xl, H1_ADDR(0xc9c), nBytes);
817
818    if (sCode)
819	OUT_RING_QW_AGP(xl, H1_ADDR(0xca0), sCode);
820
821    i = 0;
822    count = 0;
823
824    do {
825	count += (LL_AGP_CMDBUF_SIZE - 20) >> 1;
826	count = (count > n) ? n : count;
827	BEGIN_RING_AGP(xl, (count - i) << 1);
828
829	for (; i < count; i++) {
830	    OUT_RING_QW_AGP(xl, H1_ADDR(0xca0), *buf++);
831	}
832    } while (i < n);
833
834    BEGIN_RING_AGP(xl, 6);
835
836    if (r) {
837	OUT_RING_QW_AGP(xl, H1_ADDR(0xca0), *buf & ((1 << (r << 3)) - 1));
838    }
839    OUT_RING_QW_AGP(xl, H1_ADDR(0xca0), 0);
840    OUT_RING_QW_AGP(xl, H1_ADDR(0xca0), 0);
841
842}
843
844void
845viaVideoSubPictureOffLocked(void *xlp)
846{
847
848    CARD32 stride;
849    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
850
851    stride = VIDIN(xl, SUBP_CONTROL_STRIDE);
852
853    pciCommand(xl, SUBP_CONTROL_STRIDE, stride & ~SUBP_HQV_ENABLE,
854	LL_MODE_VIDEO);
855}
856
857void
858viaVideoSubPictureLocked(void *xlp, ViaXvMCSubPicture * pViaSubPic)
859{
860
861    unsigned i;
862    CARD32 cWord;
863    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
864
865    for (i = 0; i < VIA_SUBPIC_PALETTE_SIZE; ++i) {
866	pciCommand(xl, RAM_TABLE_CONTROL, pViaSubPic->palette[i],
867	    LL_MODE_VIDEO);
868    }
869
870    pciCommand(xl, SUBP_STARTADDR, pViaSubPic->offset, 0);
871    cWord = (pViaSubPic->stride & SUBP_STRIDE_MASK) | SUBP_HQV_ENABLE;
872    cWord |= (pViaSubPic->ia44) ? SUBP_IA44 : SUBP_AI44;
873    pciCommand(xl, SUBP_CONTROL_STRIDE, cWord, 0);
874}
875
876void
877viaBlit(void *xlp, unsigned bpp, unsigned srcBase,
878    unsigned srcPitch, unsigned dstBase, unsigned dstPitch,
879    unsigned w, unsigned h, int xdir, int ydir, unsigned blitMode,
880    unsigned color)
881{
882
883    CARD32 dwGEMode = 0, srcY = 0, srcX, dstY = 0, dstX;
884    CARD32 cmd;
885    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
886
887    if (!w || !h)
888	return;
889
890    switch (bpp) {
891    case 16:
892	dwGEMode |= VIA_GEM_16bpp;
893	break;
894    case 32:
895	dwGEMode |= VIA_GEM_32bpp;
896	break;
897    default:
898	dwGEMode |= VIA_GEM_8bpp;
899	break;
900    }
901
902    srcX = srcBase & 31;
903    dstX = dstBase & 31;
904    switch (bpp) {
905    case 16:
906	dwGEMode |= VIA_GEM_16bpp;
907	srcX >>= 2;
908	dstX >>= 2;
909	break;
910    case 32:
911	dwGEMode |= VIA_GEM_32bpp;
912	srcX >>= 4;
913	dstX >>= 4;
914	break;
915    default:
916	dwGEMode |= VIA_GEM_8bpp;
917	break;
918    }
919
920    BEGIN_RING_AGP(xl, 20);
921    WAITFLAGS(xl, LL_MODE_2D);
922
923    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_GEMODE), dwGEMode);
924    cmd = 0;
925
926    if (xdir < 0) {
927	cmd |= VIA_GEC_DECX;
928	srcX += (w - 1);
929	dstX += (w - 1);
930    }
931    if (ydir < 0) {
932	cmd |= VIA_GEC_DECY;
933	srcY += (h - 1);
934	dstY += (h - 1);
935    }
936
937    switch (blitMode) {
938    case VIABLIT_TRANSCOPY:
939	OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_SRCCOLORKEY), color);
940	OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_KEYCONTROL), 0x4000);
941	cmd |= VIA_GEC_BLT | (VIA_BLIT_COPY << 24);
942	break;
943    case VIABLIT_FILL:
944	OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_FGCOLOR), color);
945	cmd |= VIA_GEC_BLT | VIA_GEC_FIXCOLOR_PAT | (VIA_BLIT_FILL << 24);
946	break;
947    default:
948	OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_KEYCONTROL), 0x0);
949	cmd |= VIA_GEC_BLT | (VIA_BLIT_COPY << 24);
950    }
951
952    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_SRCBASE), (srcBase & ~31) >> 3);
953    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_DSTBASE), (dstBase & ~31) >> 3);
954    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_PITCH), VIA_PITCH_ENABLE |
955	(srcPitch >> 3) | (((dstPitch) >> 3) << 16));
956    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_SRCPOS), ((srcY << 16) | srcX));
957    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_DSTPOS), ((dstY << 16) | dstX));
958    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_DIMENSION),
959	(((h - 1) << 16) | (w - 1)));
960    OUT_RING_QW_AGP(xl, H1_ADDR(VIA_REG_GECMD), cmd);
961}
962
963unsigned
964syncXvMCLowLevel(void *xlp, unsigned int mode, unsigned int doSleep,
965    CARD32 timeStamp)
966{
967    unsigned errors;
968    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
969
970    if (mode == 0) {
971	errors = xl->errors;
972	xl->errors = 0;
973	return errors;
974    }
975
976    if ((mode & (LL_MODE_VIDEO | LL_MODE_3D)) || !xl->use_agp) {
977	if (xl->performLocking)
978	    hwlLock(xl, 0);
979	if ((mode != LL_MODE_VIDEO))
980	    syncDMA(xl, doSleep);
981	if (mode & LL_MODE_3D)
982	    syncAccel(xl, mode, doSleep);
983	if (mode & LL_MODE_VIDEO)
984	    syncVideo(xl, doSleep);
985	if (xl->performLocking)
986	    hwlUnlock(xl, 0);
987    } else {
988	viaDMAWaitTimeStamp(xl, timeStamp, doSleep);
989    }
990
991    if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE))
992	syncMpeg(xl, mode, doSleep);
993
994    errors = xl->errors;
995    xl->errors = 0;
996
997    return errors;
998}
999
1000extern void *
1001initXvMCLowLevel(int fd, drm_context_t * ctx,
1002    drmLockPtr hwLock, drmAddress mmioAddress,
1003    drmAddress fbAddress, unsigned fbStride, unsigned fbDepth,
1004    unsigned width, unsigned height, int useAgp, unsigned chipId)
1005{
1006    int ret;
1007    XvMCLowLevel *xl;
1008
1009    if (chipId == PCI_CHIP_VT3259 || chipId == PCI_CHIP_VT3364) {
1010	fprintf(stderr, "You are using an XvMC driver for the wrong chip.\n");
1011	fprintf(stderr, "Chipid is 0x%04x.\n", chipId);
1012	return NULL;
1013    }
1014
1015    xl = (XvMCLowLevel *) malloc(sizeof(XvMCLowLevel));
1016
1017    if (!xl)
1018	return NULL;
1019
1020    xl->agp_pos = 0;
1021    xl->pci_pos = 0;
1022    xl->use_agp = useAgp;
1023    xl->fd = fd;
1024    xl->drmcontext = ctx;
1025    xl->hwLock = hwLock;
1026    xl->mmioAddress = mmioAddress;
1027    xl->fbAddress = fbAddress;
1028    xl->curWaitFlags = 0;
1029    xl->performLocking = 1;
1030    xl->errors = 0;
1031    xl->agpSync = 0;
1032    ret = viaDMAInitTimeStamp(xl);
1033    if (ret) {
1034	free(xl);
1035	return NULL;
1036    }
1037    return xl;
1038}
1039
1040void
1041setLowLevelLocking(void *xlp, int performLocking)
1042{
1043    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1044
1045    xl->performLocking = performLocking;
1046}
1047
1048void
1049closeXvMCLowLevel(void *xlp)
1050{
1051    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1052
1053    viaDMACleanupTimeStamp(xl);
1054    free(xl);
1055}
1056