1/*****************************************************************************
2 * VIA Unichrome XvMC extension client lib.
3 *
4 * Copyright (c) 2004 Thomas Hellstr�m. All rights reserved.
5 * Copyright (c) 2003 Andreas Robinson. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHOR(S) OR COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * Low-level functions that deal directly with the hardware. In the future,
28 * these functions might be implemented in a kernel module. Also, some of them
29 * would benefit from DMA.
30 *
31 * Authors:
32 *  Andreas Robinson 2003. (Initial decoder interface functions).
33 *  Thomas Hellstrom 2004, 2005 (Blitting functions, AGP and locking, Unichrome Pro Video AGP).
34 *  Ivor Hewitt 2005 (Unichrome Pro modifications and merging).
35 */
36
37/* IH
38 * I've left the proReg or-ing in case we need/want to implement the V1/V3
39 * register toggle too, which also moves the register locations.
40 * The CN400 has dual mpeg decoders, not sure at the moment whether these
41 * are also operated through independent registers also.
42 */
43
44#undef VIDEO_DMA
45#define HQV_USE_IRQ
46#define UNICHROME_PRO
47
48#include "viaXvMCPriv.h"
49#include "viaLowLevel.h"
50#include "driDrawable.h"
51#include <time.h>
52#include <sys/time.h>
53#include <stdio.h>
54
55typedef enum
56{ ll_init, ll_agpBuf, ll_pciBuf, ll_timeStamp, ll_llBuf }
57LLState;
58
59typedef struct
60{
61    drm_via_mem_t mem;
62    unsigned offset;
63    unsigned stride;
64    unsigned height;
65} LowLevelBuffer;
66
67struct _XvMCLowLevel;
68
69typedef struct _ViaCommandBuffer
70{
71    CARD32 *buf;
72    CARD32 waitFlags;
73    unsigned pos;
74    unsigned bufSize;
75    int mode;
76    int header_start;
77    int rindex;
78    void (*flushFunc) (struct _ViaCommandBuffer * cb,
79	struct _XvMCLowLevel * xl);
80} ViaCommandBuffer;
81
82typedef struct _XvMCLowLevel
83{
84    ViaCommandBuffer agpBuf, pciBuf, *videoBuf;
85    int use_agp;
86    int fd;
87    drm_context_t *drmcontext;
88    drmLockPtr hwLock;
89    drmAddress mmioAddress;
90    drmAddress fbAddress;
91    unsigned fbStride;
92    unsigned fbDepth;
93    unsigned width;
94    unsigned height;
95    int performLocking;
96    unsigned errors;
97    drm_via_mem_t tsMem;
98    CARD32 tsOffset;
99    volatile CARD32 *tsP;
100    CARD32 curTimeStamp;
101    CARD32 lastReadTimeStamp;
102    int agpSync;
103    CARD32 agpSyncTimeStamp;
104    unsigned chipId;
105
106    /*
107     * Data for video-engine less display
108     */
109
110    XvMCRegion sRegion;
111    XvMCRegion dRegion;
112    LowLevelBuffer scale;
113    LowLevelBuffer back;
114    Bool downScaling;
115    CARD32 downScaleW;
116    CARD32 downScaleH;
117    CARD32 upScaleW;
118    CARD32 upScaleH;
119    unsigned fetch;
120    unsigned line;
121    LLState state;
122} XvMCLowLevel;
123
124/*
125 * For Other architectures than i386 these might have to be modified for
126 * bigendian etc.
127 */
128
129#define MPEGIN(xl,reg)							\
130    *((volatile CARD32 *)(((CARD8 *)(xl)->mmioAddress) + 0xc00 + (reg)))
131
132#define VIDIN(ctx,reg)							\
133    *((volatile CARD32 *)(((CARD8 *)(ctx)->mmioAddress) + (reg)))
134
135#define REGIN(ctx,reg)							\
136    *((volatile CARD32 *)(((CARD8 *)(ctx)->mmioAddress) + 0x0000 + (reg)))
137
138#define HQV_CONTROL             0x3D0
139#define HQV_SRC_OFFSET          0x3CC
140#define HQV_SRC_STARTADDR_Y     0x3D4
141#define HQV_SRC_STARTADDR_U     0x3D8
142#define HQV_SRC_STARTADDR_V     0x3DC
143#define HQV_MINIFY_DEBLOCK      0x3E8
144
145#define REG_HQV1_INDEX      0x00001000
146
147#define HQV_SW_FLIP         0x00000010
148#define HQV_FLIP_STATUS     0x00000001
149#define HQV_SUBPIC_FLIP     0x00008000
150#define HQV_FLIP_ODD        0x00000020
151#define HQV_DEINTERLACE     0x00010000
152#define HQV_FIELD_2_FRAME   0x00020000
153#define HQV_FRAME_2_FIELD   0x00040000
154#define HQV_FIELD_UV        0x00100000
155#define HQV_DEBLOCK_HOR     0x00008000
156#define HQV_DEBLOCK_VER     0x80000000
157#define HQV_YUV420          0xC0000000
158#define HQV_YUV422          0x80000000
159#define HQV_ENABLE          0x08000000
160#define HQV_GEN_IRQ         0x00000080
161
162#define HQV_SCALE_ENABLE    0x00000800
163#define HQV_SCALE_DOWN      0x00001000
164
165#define V_COMPOSE_MODE          0x298
166#define V1_COMMAND_FIRE         0x80000000
167#define V3_COMMAND_FIRE         0x40000000
168
169/* SUBPICTURE Registers */
170#define SUBP_CONTROL_STRIDE     0x3C0
171#define SUBP_STARTADDR          0x3C4
172#define RAM_TABLE_CONTROL       0x3C8
173#define RAM_TABLE_READ          0x3CC
174
175/* SUBP_CONTROL_STRIDE              0x3c0 */
176#define SUBP_HQV_ENABLE             0x00010000
177#define SUBP_IA44                   0x00020000
178#define SUBP_AI44                   0x00000000
179#define SUBP_STRIDE_MASK            0x00001fff
180#define SUBP_CONTROL_MASK           0x00070000
181
182/* RAM_TABLE_CONTROL                0x3c8 */
183#define RAM_TABLE_RGB_ENABLE        0x00000007
184
185#define VIA_REG_STATUS          0x400
186#define VIA_REG_GEMODE          0x004
187#define VIA_REG_SRCBASE         0x030
188#define VIA_REG_DSTBASE         0x034
189#define VIA_REG_PITCH           0x038
190#define VIA_REG_SRCCOLORKEY     0x01C
191#define VIA_REG_KEYCONTROL      0x02C
192#define VIA_REG_SRCPOS          0x008
193#define VIA_REG_DSTPOS          0x00C
194#define VIA_REG_GECMD           0x000
195#define VIA_REG_DIMENSION       0x010  /* width and height */
196#define VIA_REG_FGCOLOR         0x018
197
198#define VIA_VR_QUEUE_BUSY       0x00020000	/* Virtual Queue is busy */
199#define VIA_CMD_RGTR_BUSY       0x00000080	/* Command Regulator is busy */
200#define VIA_2D_ENG_BUSY         0x00000002	/* 2D Engine is busy */
201#define VIA_3D_ENG_BUSY         0x00000001	/* 3D Engine is busy */
202#define VIA_GEM_8bpp            0x00000000
203#define VIA_GEM_16bpp           0x00000100
204#define VIA_GEM_32bpp           0x00000300
205#define VIA_GEC_BLT             0x00000001
206#define VIA_PITCH_ENABLE        0x80000000
207#define VIA_GEC_INCX            0x00000000
208#define VIA_GEC_DECY            0x00004000
209#define VIA_GEC_INCY            0x00000000
210#define VIA_GEC_DECX            0x00008000
211#define VIA_GEC_FIXCOLOR_PAT    0x00002000
212
213#define VIA_BLIT_CLEAR 0x00
214#define VIA_BLIT_COPY 0xCC
215#define VIA_BLIT_FILL 0xF0
216#define VIA_BLIT_SET 0xFF
217
218#define VIA_SYNCWAITTIMEOUT 50000      /* Might be a bit conservative */
219#define VIA_DMAWAITTIMEOUT 150000
220#define VIA_VIDWAITTIMEOUT 50000
221#define VIA_XVMC_DECODERTIMEOUT 50000  /*(microseconds) */
222
223#define VIA_AGP_HEADER5 0xFE040000
224#define VIA_AGP_HEADER6 0xFE050000
225
226typedef struct
227{
228    CARD32 data;
229    Bool set;
230} HQVRegister;
231
232#define H1_ADDR(val) (((val) >> 2) | 0xF0000000)
233#define WAITFLAGS(cb, flags)			\
234    (cb)->waitFlags |= (flags)
235#define BEGIN_RING_AGP(cb, xl, size)					\
236    do {								\
237	if ((cb)->pos > ((cb)->bufSize-(size))) {			\
238	    cb->flushFunc(cb, xl);					\
239	}								\
240    } while(0)
241#define OUT_RING_AGP(cb, val) do{			\
242	(cb)->buf[(cb)->pos++] = (val);	\
243  } while(0);
244
245#define OUT_RING_QW_AGP(cb, val1, val2)			\
246    do {						\
247	(cb)->buf[(cb)->pos++] = (val1);	\
248	(cb)->buf[(cb)->pos++] = (val2);	\
249    } while (0)
250
251#define BEGIN_HEADER5_AGP(cb, xl, index)	\
252    do {					\
253	BEGIN_RING_AGP(cb, xl, 8);		\
254	(cb)->mode = VIA_AGP_HEADER5;		\
255        (cb)->rindex = (index);			\
256	(cb)->header_start = (cb)->pos;		\
257	(cb)->pos += 4;				\
258    } while (0)
259
260#define BEGIN_HEADER6_AGP(cb, xl)		\
261    do {					\
262	BEGIN_RING_AGP(cb, xl, 8);		\
263	(cb)->mode = VIA_AGP_HEADER6;	\
264	(cb)->header_start = (cb)->pos; \
265	(cb)->pos += 4;			\
266    } while (0)
267
268#define BEGIN_HEADER5_DATA(cb, xl, size, index)				\
269    do {								\
270	if ((cb)->pos > ((cb)->bufSize - ((size) + 16))) {		\
271	    cb->flushFunc(cb, xl);					\
272	    BEGIN_HEADER5_AGP(cb, xl, index);				\
273	} else if ((cb)->mode && (((cb)->mode != VIA_AGP_HEADER5) ||	\
274				  ((cb)->rindex != index))) {		\
275	    finish_header_agp(cb);					\
276	    BEGIN_HEADER5_AGP((cb), xl, (index));			\
277	} else if (cb->mode != VIA_AGP_HEADER5) {			\
278	    BEGIN_HEADER5_AGP((cb), xl, (index));			\
279	}								\
280    }while(0)
281
282#define BEGIN_HEADER6_DATA(cb, xl, size)				\
283    do{									\
284	if ((cb)->pos > (cb->bufSize-(((size) << 1) + 16))) {		\
285	    cb->flushFunc(cb, xl);					\
286	    BEGIN_HEADER6_AGP(cb, xl);					\
287	} else	if ((cb)->mode && ((cb)->mode != VIA_AGP_HEADER6)) {	\
288	    finish_header_agp(cb);					\
289	    BEGIN_HEADER6_AGP(cb, xl);					\
290	}								\
291	else if ((cb->mode != VIA_AGP_HEADER6)) {			\
292	    BEGIN_HEADER6_AGP(cb, (xl));				\
293	}								\
294    }while(0)
295
296#define HQV_SHADOW_BASE 0x3CC
297#define HQV_SHADOW_SIZE 13
298
299#define SETHQVSHADOW(shadow, offset, value)				\
300    do {								\
301	HQVRegister *r = (shadow) + (((offset) - HQV_SHADOW_BASE) >> 2); \
302	r->data = (value);						\
303	r->set = TRUE;							\
304    } while(0)
305
306#define GETHQVSHADOW(shadow, offset)  ((shadow)[(offset - HQV_SHADOW_BASE) >> 2].data)
307
308#define LL_HW_LOCK(xl)							\
309    do {								\
310	DRM_LOCK((xl)->fd,(xl)->hwLock,*(xl)->drmcontext,0);		\
311    } while(0);
312#define LL_HW_UNLOCK(xl)					\
313    do {							\
314	DRM_UNLOCK((xl)->fd,(xl)->hwLock,*(xl)->drmcontext);	\
315    } while(0);
316
317static HQVRegister hqvShadow[HQV_SHADOW_SIZE];
318
319static void
320initHQVShadow(HQVRegister * r)
321{
322    int i;
323
324    for (i = 0; i < HQV_SHADOW_SIZE; ++i) {
325	r->data = 0;
326	r++->set = FALSE;
327    }
328}
329
330#if 0
331static void
332setHQVHWDeinterlacing(HQVRegister * shadow, Bool on, Bool motionDetect,
333    CARD32 stride, CARD32 height)
334{
335    CARD32 tmp = GETHQVSHADOW(shadow, 0x3E4);
336
337    if (!on) {
338	tmp &= ~((1 << 0) | (1 << 12) | (1 << 27) | (1 << 31));
339	SETHQVSHADOW(shadow, 0x3E4, tmp);
340	return;
341    }
342
343    tmp = (1 << 31) |
344	(4 << 28) |
345	(1 << 27) |
346	(3 << 25) | (1 << 18) | (2 << 14) | (8 << 8) | (8 << 1) | (1 << 0);
347
348    if (motionDetect)
349	tmp |= (1 << 12);
350
351    SETHQVSHADOW(shadow, 0x3E4, tmp);
352
353    tmp = GETHQVSHADOW(shadow, 0x3DC);
354    tmp |= (stride * height * 1536) / 1024 & 0x7ff;
355
356    SETHQVSHADOW(shadow, 0x3DC, tmp);
357
358    tmp = GETHQVSHADOW(shadow, 0x3D0);
359    tmp |= (1 << 23);
360
361    SETHQVSHADOW(shadow, 0x3D0, tmp);
362}
363
364#endif
365
366static void
367setHQVDeblocking(HQVRegister * shadow, Bool on, Bool lowPass)
368{
369    CARD32 tmp = GETHQVSHADOW(shadow, 0x3DC);
370
371    if (!on) {
372	tmp &= ~(1 << 27);
373	SETHQVSHADOW(shadow, 0x3DC, tmp);
374	return;
375    }
376
377    tmp |= (8 << 16) | (1 << 27);
378    if (lowPass)
379	tmp |= (1 << 26);
380    SETHQVSHADOW(shadow, 0x3DC, tmp);
381
382    tmp = GETHQVSHADOW(shadow, 0x3D4);
383    tmp |= (6 << 27);
384    SETHQVSHADOW(shadow, 0x3D4, tmp);
385
386    tmp = GETHQVSHADOW(shadow, 0x3D8);
387    tmp |= (19 << 27);
388    SETHQVSHADOW(shadow, 0x3D8, tmp);
389}
390
391static void
392setHQVStartAddress(HQVRegister * shadow, unsigned yOffs, unsigned uOffs,
393    unsigned stride, unsigned format)
394{
395    CARD32 tmp = GETHQVSHADOW(shadow, 0x3D4);
396
397    tmp |= yOffs & 0x03FFFFF0;
398    SETHQVSHADOW(shadow, 0x3D4, tmp);
399    tmp = GETHQVSHADOW(shadow, 0x3D8);
400    tmp |= uOffs & 0x03FFFFF0;
401    SETHQVSHADOW(shadow, 0x3D8, tmp);
402    tmp = GETHQVSHADOW(shadow, 0x3F8);
403    tmp |= (stride & 0x1FF8);
404    SETHQVSHADOW(shadow, 0x3F8, tmp);
405    tmp = GETHQVSHADOW(shadow, 0x3D0);
406
407    if (format == 0) {
408	/*
409	 * NV12
410	 */
411	tmp |= (0x0C << 28);
412    } else if (format == 1) {
413	/*
414	 * RGB16
415	 */
416	tmp |= (0x02 << 28);
417    } else if (format == 2) {
418	/*
419	 * RGB32
420	 */
421	;
422    }
423    SETHQVSHADOW(shadow, 0x3D0, tmp);
424}
425
426#if 0
427
428static void
429setHQVColorSpaceConversion(HQVRegister * shadow, unsigned depth, Bool on)
430{
431    CARD32 tmp = GETHQVSHADOW(shadow, 0x3DC);
432
433    if (!on) {
434	tmp &= ~(1 << 28);
435	SETHQVSHADOW(shadow, 0x3DC, tmp);
436	return;
437    }
438
439    if (depth == 32)
440	tmp |= (1 << 29);
441    tmp |= (1 << 28);
442    tmp &= ~(1 << 15);
443    SETHQVSHADOW(shadow, 0x3DC, tmp);
444}
445
446static void
447setHQVFetchLine(HQVRegister * shadow, unsigned fetch, unsigned lines)
448{
449    SETHQVSHADOW(shadow, 0x3E0,
450	((lines - 1) & 0x7FF) | (((fetch - 1) & 0x1FFF) << 16));
451}
452
453static void
454setHQVScale(HQVRegister * shadow, unsigned horizontal, unsigned vertical)
455{
456    SETHQVSHADOW(shadow, 0x3E8,
457	(horizontal & 0xFFFF) | ((vertical & 0xFFFF) << 16));
458}
459
460static void
461setHQVSingleDestination(HQVRegister * shadow, unsigned offset,
462    unsigned stride)
463{
464    CARD32 tmp = GETHQVSHADOW(shadow, 0x3D0);
465
466    tmp |= (1 << 6);
467
468    SETHQVSHADOW(shadow, 0x3D0, tmp);
469    SETHQVSHADOW(shadow, 0x3EC, offset & 0x03FFFFF8);
470    SETHQVSHADOW(shadow, 0x3F4, stride & 0x1FF8);
471}
472#endif
473
474static void
475setHQVDeinterlacing(HQVRegister * shadow, CARD32 frameType)
476{
477    CARD32 tmp = GETHQVSHADOW(shadow, 0x3D0);
478
479    if ((frameType & XVMC_FRAME_PICTURE) == XVMC_TOP_FIELD) {
480	tmp |= HQV_FIELD_UV |
481	    HQV_DEINTERLACE | HQV_FIELD_2_FRAME | HQV_FRAME_2_FIELD;
482    } else if ((frameType & XVMC_FRAME_PICTURE) == XVMC_BOTTOM_FIELD) {
483	tmp |= HQV_FIELD_UV |
484	    HQV_DEINTERLACE |
485	    HQV_FIELD_2_FRAME | HQV_FRAME_2_FIELD | HQV_FLIP_ODD;
486    }
487    SETHQVSHADOW(shadow, 0x3D0, tmp);
488}
489
490static void
491setHQVTripleBuffer(HQVRegister * shadow, Bool on)
492{
493    CARD32 tmp = GETHQVSHADOW(shadow, 0x3D0);
494
495    if (on)
496	tmp |= (1 << 26);
497    else
498	tmp &= ~(1 << 26);
499    SETHQVSHADOW(shadow, 0x3D0, tmp);
500}
501
502static void
503finish_header_agp(ViaCommandBuffer * cb)
504{
505    int numDWords, i;
506
507    CARD32 *hb;
508
509    if (!cb->mode)
510	return;
511    numDWords = cb->pos - cb->header_start - 4;
512    hb = cb->buf + cb->header_start;
513    switch (cb->mode) {
514    case VIA_AGP_HEADER5:
515	hb[0] = VIA_AGP_HEADER5 | cb->rindex;
516	hb[1] = numDWords;
517	hb[2] = 0x00F50000;	       /* SW debug flag. (?) */
518	break;
519    default:
520	hb[0] = VIA_AGP_HEADER6;
521	hb[1] = numDWords >> 1;
522	hb[2] = 0x00F60000;	       /* SW debug flag. (?) */
523	break;
524    }
525    hb[3] = 0;
526    if (numDWords & 3) {
527	for (i = 0; i < (4 - (numDWords & 3)); ++i)
528	    OUT_RING_AGP(cb, 0x00000000);
529    }
530    cb->mode = 0;
531}
532
533void
534hwlLock(void *xlp, int videoLock)
535{
536    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
537
538    LL_HW_LOCK(xl);
539}
540
541void
542hwlUnlock(void *xlp, int videoLock)
543{
544    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
545
546    LL_HW_UNLOCK(xl);
547}
548
549static unsigned
550timeDiff(struct timeval *now, struct timeval *then)
551{
552    return (now->tv_usec >= then->tv_usec) ?
553	now->tv_usec - then->tv_usec :
554	1000000 - (then->tv_usec - now->tv_usec);
555}
556
557void
558setAGPSyncLowLevel(void *xlp, int val, CARD32 timeStamp)
559{
560    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
561
562    xl->agpSync = val;
563    xl->agpSyncTimeStamp = timeStamp;
564}
565
566CARD32
567viaDMATimeStampLowLevel(void *xlp)
568{
569    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
570
571    if (xl->use_agp) {
572	viaBlit(xl, 32, xl->tsOffset, 1, xl->tsOffset, 1, 1, 1, 0, 0,
573	    VIABLIT_FILL, xl->curTimeStamp);
574	return xl->curTimeStamp++;
575    }
576    return 0;
577}
578
579static void
580viaDMAWaitTimeStamp(XvMCLowLevel * xl, CARD32 timeStamp, int doSleep)
581{
582    struct timeval now, then;
583    struct timezone here;
584    struct timespec sleep, rem;
585
586    if (xl->use_agp && (xl->lastReadTimeStamp - timeStamp > (1 << 23))) {
587	sleep.tv_nsec = 1;
588	sleep.tv_sec = 0;
589	here.tz_minuteswest = 0;
590	here.tz_dsttime = 0;
591	gettimeofday(&then, &here);
592
593	while (((xl->lastReadTimeStamp = *xl->tsP) - timeStamp) > (1 << 23)) {
594	    gettimeofday(&now, &here);
595	    if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
596		if (((xl->lastReadTimeStamp =
597			    *xl->tsP) - timeStamp) > (1 << 23)) {
598		    xl->errors |= LL_DMA_TIMEDOUT;
599		    break;
600		}
601	    }
602	    if (doSleep)
603		nanosleep(&sleep, &rem);
604	}
605    }
606}
607
608static int
609viaDMAInitTimeStamp(XvMCLowLevel * xl)
610{
611    int ret = 0;
612
613    if (xl->use_agp) {
614	xl->tsMem.context = *(xl->drmcontext);
615	xl->tsMem.size = 64;
616	xl->tsMem.type = VIA_MEM_VIDEO;
617	if ((ret = drmCommandWriteRead(xl->fd, DRM_VIA_ALLOCMEM,
618		    &xl->tsMem, sizeof(xl->tsMem))) < 0)
619	    return ret;
620	if (xl->tsMem.size != 64)
621	    return -1;
622	xl->tsOffset = (xl->tsMem.offset + 31) & ~31;
623	xl->tsP = (CARD32 *) xl->fbAddress + (xl->tsOffset >> 2);
624	xl->curTimeStamp = 1;
625	*xl->tsP = 0;
626    }
627    return 0;
628}
629
630static int
631viaDMACleanupTimeStamp(XvMCLowLevel * xl)
632{
633
634    if (!(xl->tsMem.size) || !xl->use_agp)
635	return 0;
636    return drmCommandWrite(xl->fd, DRM_VIA_FREEMEM, &xl->tsMem,
637	sizeof(xl->tsMem));
638}
639
640static CARD32
641viaMpegGetStatus(XvMCLowLevel * xl)
642{
643    return MPEGIN(xl, 0x54);
644}
645
646static int
647viaMpegIsBusy(XvMCLowLevel * xl, CARD32 mask, CARD32 idle)
648{
649    CARD32 tmp = viaMpegGetStatus(xl);
650
651    /*
652     * Error detected.
653     * FIXME: Are errors really shown when error concealment is on?
654     */
655
656    if (tmp & 0x70)
657	return 0;
658
659    return (tmp & mask) != idle;
660}
661
662static void
663syncDMA(XvMCLowLevel * xl, unsigned int doSleep)
664{
665
666    /*
667     * Ideally, we'd like to have an interrupt wait here, but, according to second hand
668     * information, the hardware does not support this, although earlier S3 chips do that.
669     * It is therefore not implemented into the DRM, and we'll do a user space wait here.
670     */
671
672    struct timeval now, then;
673    struct timezone here;
674    struct timespec sleep, rem;
675
676    sleep.tv_nsec = 1;
677    sleep.tv_sec = 0;
678    here.tz_minuteswest = 0;
679    here.tz_dsttime = 0;
680    gettimeofday(&then, &here);
681    while (!(REGIN(xl, VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY)) {
682	gettimeofday(&now, &here);
683	if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
684	    if (!(REGIN(xl, VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY)) {
685		xl->errors |= LL_DMA_TIMEDOUT;
686		break;
687	    }
688	}
689	if (doSleep)
690	    nanosleep(&sleep, &rem);
691    }
692    while (REGIN(xl, VIA_REG_STATUS) & VIA_CMD_RGTR_BUSY) {
693	gettimeofday(&now, &here);
694	if (timeDiff(&now, &then) > VIA_DMAWAITTIMEOUT) {
695	    if (REGIN(xl, VIA_REG_STATUS) & VIA_CMD_RGTR_BUSY) {
696		xl->errors |= LL_DMA_TIMEDOUT;
697		break;
698	    }
699	}
700	if (doSleep)
701	    nanosleep(&sleep, &rem);
702    }
703}
704
705#ifdef HQV_USE_IRQ
706static void
707syncVideo(XvMCLowLevel * xl, unsigned int doSleep)
708{
709    int proReg = REG_HQV1_INDEX;
710
711    /*
712     * Wait for HQV completion using completion interrupt. Nothing strange here.
713     * Note that the interrupt handler clears the HQV_FLIP_STATUS bit, so we
714     * can't wait on that one.
715     */
716
717    if ((VIDIN(xl, HQV_CONTROL | proReg) & (HQV_SW_FLIP | HQV_SUBPIC_FLIP))) {
718	drm_via_irqwait_t irqw;
719
720	irqw.request.irq = 1;
721	irqw.request.type = VIA_IRQ_ABSOLUTE;
722	if (drmCommandWriteRead(xl->fd, DRM_VIA_WAIT_IRQ, &irqw,
723		sizeof(irqw)) < 0)
724	    xl->errors |= LL_VIDEO_TIMEDOUT;
725    }
726}
727#else
728static void
729syncVideo(XvMCLowLevel * xl, unsigned int doSleep)
730{
731    /*
732     * Wait for HQV completion. Nothing strange here. We assume that the HQV
733     * Handles syncing to the V1 / V3 engines by itself. It should be safe to
734     * always wait for SUBPIC_FLIP completion although subpictures are not
735     * always used.
736     */
737
738    struct timeval now, then;
739    struct timezone here;
740    struct timespec sleep, rem;
741
742    int proReg = REG_HQV1_INDEX;
743
744    sleep.tv_nsec = 1;
745    sleep.tv_sec = 0;
746    here.tz_minuteswest = 0;
747    here.tz_dsttime = 0;
748    gettimeofday(&then, &here);
749    while ((VIDIN(xl,
750		HQV_CONTROL | proReg) & (HQV_SW_FLIP | HQV_SUBPIC_FLIP))) {
751	gettimeofday(&now, &here);
752	if (timeDiff(&now, &then) > VIA_SYNCWAITTIMEOUT) {
753	    if ((VIDIN(xl,
754			HQV_CONTROL | proReg) & (HQV_SW_FLIP |
755			HQV_SUBPIC_FLIP))) {
756		xl->errors |= LL_VIDEO_TIMEDOUT;
757		break;
758	    }
759	}
760	if (doSleep)
761	    nanosleep(&sleep, &rem);
762    }
763}
764#endif
765
766static void
767syncAccel(XvMCLowLevel * xl, unsigned int mode, unsigned int doSleep)
768{
769    struct timeval now, then;
770    struct timezone here;
771    struct timespec sleep, rem;
772    CARD32 mask = ((mode & LL_MODE_2D) ? VIA_2D_ENG_BUSY : 0) |
773	((mode & LL_MODE_3D) ? VIA_3D_ENG_BUSY : 0);
774
775    sleep.tv_nsec = 1;
776    sleep.tv_sec = 0;
777    here.tz_minuteswest = 0;
778    here.tz_dsttime = 0;
779    gettimeofday(&then, &here);
780    while (REGIN(xl, VIA_REG_STATUS) & mask) {
781	gettimeofday(&now, &here);
782	if (timeDiff(&now, &then) > VIA_SYNCWAITTIMEOUT) {
783	    if (REGIN(xl, VIA_REG_STATUS) & mask) {
784		xl->errors |= LL_ACCEL_TIMEDOUT;
785		break;
786	    }
787	}
788	if (doSleep)
789	    nanosleep(&sleep, &rem);
790    }
791}
792
793static void
794syncMpeg(XvMCLowLevel * xl, unsigned int mode, unsigned int doSleep)
795{
796    /*
797     * Ideally, we'd like to have an interrupt wait here, but from information from VIA
798     * at least the MPEG completion interrupt is broken on the CLE266, which was
799     * discovered during validation of the chip.
800     */
801
802    struct timeval now, then;
803    struct timezone here;
804    struct timespec sleep, rem;
805    CARD32 busyMask = 0;
806    CARD32 idleVal = 0;
807    CARD32 ret;
808
809    sleep.tv_nsec = 1;
810    sleep.tv_sec = 0;
811    here.tz_minuteswest = 0;
812    here.tz_dsttime = 0;
813    gettimeofday(&then, &here);
814    if (mode & LL_MODE_DECODER_SLICE) {
815	busyMask = VIA_SLICEBUSYMASK;
816	idleVal = VIA_SLICEIDLEVAL;
817    }
818    if (mode & LL_MODE_DECODER_IDLE) {
819	busyMask |= VIA_BUSYMASK;
820	idleVal = VIA_IDLEVAL;
821    }
822    while (viaMpegIsBusy(xl, busyMask, idleVal)) {
823	gettimeofday(&now, &here);
824	if (timeDiff(&now, &then) > VIA_XVMC_DECODERTIMEOUT) {
825	    if (viaMpegIsBusy(xl, busyMask, idleVal)) {
826		xl->errors |= LL_DECODER_TIMEDOUT;
827	    }
828	    break;
829	}
830	if (doSleep)
831	    nanosleep(&sleep, &rem);
832    }
833
834    ret = viaMpegGetStatus(xl);
835    if (ret & 0x70) {
836	xl->errors |= ((ret & 0x70) >> 3);
837    }
838    return;
839}
840
841static void
842pciFlush(ViaCommandBuffer * cb, XvMCLowLevel * xl)
843{
844    int ret;
845    drm_via_cmdbuffer_t b;
846    unsigned mode = cb->waitFlags;
847
848    finish_header_agp(cb);
849    b.buf = (char *)cb->buf;
850    b.size = cb->pos * sizeof(CARD32);
851    if (xl->performLocking)
852	hwlLock(xl, 0);
853    if (((mode == LL_MODE_VIDEO) && (xl->videoBuf == &xl->agpBuf)) ||
854	((mode != LL_MODE_VIDEO) && (mode != 0)))
855	syncDMA(xl, 0);
856    if ((mode & LL_MODE_2D) || (mode & LL_MODE_3D)) {
857	syncAccel(xl, mode, 0);
858    }
859    if (mode & LL_MODE_VIDEO) {
860	syncVideo(xl, 1);
861    }
862    if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE)) {
863	syncMpeg(xl, mode, 0);
864    }
865    ret = drmCommandWrite(xl->fd, DRM_VIA_PCICMD, &b, sizeof(b));
866    if (xl->performLocking)
867	hwlUnlock(xl, 0);
868    if (ret) {
869	xl->errors |= LL_PCI_COMMAND_ERR;
870    }
871    cb->pos = 0;
872    cb->waitFlags = 0;
873}
874
875static void
876agpFlush(ViaCommandBuffer * cb, XvMCLowLevel * xl)
877{
878    drm_via_cmdbuffer_t b;
879    int ret;
880    int i;
881
882    finish_header_agp(cb);
883    if (xl->use_agp) {
884	b.buf = (char *)cb->buf;
885	b.size = cb->pos * sizeof(CARD32);
886	if (xl->agpSync) {
887	    syncXvMCLowLevel(xl, LL_MODE_DECODER_IDLE, 1,
888		xl->agpSyncTimeStamp);
889	    xl->agpSync = 0;
890	}
891	if (xl->performLocking)
892	    hwlLock(xl, 0);
893	do {
894	    ret = drmCommandWrite(xl->fd, DRM_VIA_CMDBUFFER, &b, sizeof(b));
895	} while (-EAGAIN == ret);
896	if (xl->performLocking)
897	    hwlUnlock(xl, 0);
898
899	if (ret) {
900	    xl->errors |= LL_AGP_COMMAND_ERR;
901	    for (i = 0; i < cb->pos; i += 2) {
902		printf("0x%x, 0x%x\n", (unsigned)cb->buf[i],
903		    (unsigned)cb->buf[i + 1]);
904	    }
905	    exit(-1);
906	} else {
907	    cb->pos = 0;
908	}
909	cb->waitFlags &= LL_MODE_VIDEO;	/* FIXME: Check this! */
910    } else {
911	unsigned mode = cb->waitFlags;
912
913	b.buf = (char *)cb->buf;
914	b.size = cb->pos * sizeof(CARD32);
915	if (xl->performLocking)
916	    hwlLock(xl, 0);
917	if (((mode == LL_MODE_VIDEO) && (cb == &xl->agpBuf)) ||
918	    ((mode != LL_MODE_VIDEO) && (mode != 0)))
919	    syncDMA(xl, 0);
920	if ((mode & LL_MODE_2D) || (mode & LL_MODE_3D))
921	    syncAccel(xl, mode, 0);
922	if (mode & LL_MODE_VIDEO)
923	    syncVideo(xl, 1);
924	if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE))
925	    syncMpeg(xl, mode, 0);
926	ret = drmCommandWrite(xl->fd, DRM_VIA_PCICMD, &b, sizeof(b));
927	if (xl->performLocking)
928	    hwlUnlock(xl, 0);
929	if (ret) {
930	    xl->errors |= LL_PCI_COMMAND_ERR;
931	}
932	cb->pos = 0;
933	cb->waitFlags = 0;
934    }
935}
936
937#if 0				       /* Needs debugging */
938static void
939uploadHQVDeinterlace(XvMCLowLevel * xl, unsigned offset, HQVRegister * shadow,
940    CARD32 cur_offset, CARD32 prev_offset, CARD32 stride,
941    Bool top_field_first, CARD32 height)
942{
943    CARD32 tmp;
944    ViaCommandBuffer *cb = &xl->agpBuf;
945
946    BEGIN_HEADER6_DATA(cb, xl, 9);
947    tmp = GETHQVSHADOW(shadow, 0x3F8);
948    tmp &= ~(3 << 30);
949    tmp |= (1 << 30);
950    OUT_RING_QW_AGP(cb, 0x3F8 + offset, tmp);
951    OUT_RING_QW_AGP(cb, 0x3D4 + offset, prev_offset +
952	((top_field_first) ? stride : 0));
953    OUT_RING_QW_AGP(cb, 0x3D8 + offset, prev_offset + stride * height);
954    tmp &= ~(3 << 30);
955    tmp |= (2 << 30);
956    OUT_RING_QW_AGP(cb, 0x3F8 + offset, tmp);
957    OUT_RING_QW_AGP(cb, 0x3D4 + offset, cur_offset +
958	((top_field_first) ? 0 : stride));
959    OUT_RING_QW_AGP(cb, 0x3D8 + offset, cur_offset + stride * height);
960    tmp |= (3 << 30);
961    OUT_RING_QW_AGP(cb, 0x3F8 + offset, tmp);
962    OUT_RING_QW_AGP(cb, 0x3D4 + offset, cur_offset +
963	((top_field_first) ? stride : 0));
964    OUT_RING_QW_AGP(cb, 0x3D8 + offset, cur_offset + stride * height);
965}
966
967#endif
968
969static void
970uploadHQVShadow(XvMCLowLevel * xl, unsigned offset, HQVRegister * shadow,
971    Bool flip)
972{
973    int i;
974    CARD32 tmp;
975    ViaCommandBuffer *cb = xl->videoBuf;
976
977    BEGIN_HEADER6_DATA(cb, xl, HQV_SHADOW_SIZE);
978    WAITFLAGS(cb, LL_MODE_VIDEO);
979
980    if (shadow[0].set)
981	OUT_RING_QW_AGP(cb, 0x3CC + offset, 0);
982
983    for (i = 2; i < HQV_SHADOW_SIZE; ++i) {
984	if (shadow[i].set) {
985	    OUT_RING_QW_AGP(cb, offset + HQV_SHADOW_BASE + (i << 2),
986		shadow[i].data);
987	    shadow[i].set = FALSE;
988	}
989    }
990
991    /*
992     * Finally the control register for flip.
993     */
994
995    if (flip) {
996	tmp = GETHQVSHADOW(shadow, 0x3D0);
997	OUT_RING_QW_AGP(cb, offset + HQV_CONTROL,
998	    HQV_ENABLE | HQV_GEN_IRQ | HQV_SUBPIC_FLIP | HQV_SW_FLIP | tmp);
999    }
1000    shadow[0].set = FALSE;
1001    shadow[1].set = FALSE;
1002}
1003
1004unsigned
1005flushXvMCLowLevel(void *xlp)
1006{
1007    unsigned errors;
1008    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1009
1010    if (xl->pciBuf.pos)
1011	pciFlush(&xl->pciBuf, xl);
1012    if (xl->agpBuf.pos)
1013	agpFlush(&xl->agpBuf, xl);
1014    errors = xl->errors;
1015    if (errors)
1016	printf("Error 0x%x\n", errors);
1017    xl->errors = 0;
1018    return errors;
1019}
1020
1021void
1022flushPCIXvMCLowLevel(void *xlp)
1023{
1024    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1025
1026    if (xl->pciBuf.pos)
1027	pciFlush(&xl->pciBuf, xl);
1028    if ((!xl->use_agp && xl->agpBuf.pos))
1029	agpFlush(&xl->agpBuf, xl);
1030}
1031
1032void
1033viaMpegSetSurfaceStride(void *xlp, ViaXvMCContext * ctx)
1034{
1035    CARD32 y_stride = ctx->yStride;
1036    CARD32 uv_stride = y_stride >> 1;
1037    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1038    ViaCommandBuffer *cb = &xl->agpBuf;
1039
1040    BEGIN_HEADER6_DATA(cb, xl, 1);
1041    OUT_RING_QW_AGP(cb, 0xc50, (y_stride >> 3) | ((uv_stride >> 3) << 16));
1042    WAITFLAGS(cb, LL_MODE_DECODER_IDLE);
1043}
1044
1045void
1046viaVideoSetSWFLipLocked(void *xlp, unsigned yOffs, unsigned uOffs,
1047    unsigned vOffs, unsigned yStride, unsigned uvStride)
1048{
1049    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1050
1051    initHQVShadow(hqvShadow);
1052    setHQVStartAddress(hqvShadow, yOffs, vOffs, yStride, 0);
1053    if (xl->videoBuf == &xl->agpBuf)
1054	syncDMA(xl, 1);
1055    syncVideo(xl, 1);
1056    uploadHQVShadow(xl, REG_HQV1_INDEX, hqvShadow, FALSE);
1057    xl->videoBuf->flushFunc(xl->videoBuf, xl);
1058}
1059
1060void
1061viaVideoSWFlipLocked(void *xlp, unsigned flags, Bool progressiveSequence)
1062{
1063    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1064
1065    setHQVDeinterlacing(hqvShadow, flags);
1066    setHQVDeblocking(hqvShadow,
1067	((flags & XVMC_FRAME_PICTURE) == XVMC_FRAME_PICTURE), TRUE);
1068    setHQVTripleBuffer(hqvShadow, TRUE);
1069    if (xl->videoBuf == &xl->agpBuf)
1070	syncDMA(xl, 1);
1071    syncVideo(xl, 1);
1072    uploadHQVShadow(xl, REG_HQV1_INDEX, hqvShadow, TRUE);
1073    xl->videoBuf->flushFunc(xl->videoBuf, xl);
1074}
1075
1076void
1077viaMpegSetFB(void *xlp, unsigned i,
1078    unsigned yOffs, unsigned uOffs, unsigned vOffs)
1079{
1080    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1081    ViaCommandBuffer *cb = &xl->agpBuf;
1082
1083    i *= (4 * 2);
1084    BEGIN_HEADER6_DATA(cb, xl, 2);
1085    OUT_RING_QW_AGP(cb, 0xc28 + i, yOffs >> 3);
1086    OUT_RING_QW_AGP(cb, 0xc2c + i, vOffs >> 3);
1087
1088    WAITFLAGS(cb, LL_MODE_DECODER_IDLE);
1089}
1090
1091void
1092viaMpegBeginPicture(void *xlp, ViaXvMCContext * ctx,
1093    unsigned width, unsigned height, const XvMCMpegControl * control)
1094{
1095
1096    unsigned j, mb_width, mb_height;
1097    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1098    ViaCommandBuffer *cb = &xl->agpBuf;
1099
1100    mb_width = (width + 15) >> 4;
1101
1102    mb_height =
1103	((control->mpeg_coding == XVMC_MPEG_2) &&
1104	(control->flags & XVMC_PROGRESSIVE_SEQUENCE)) ?
1105	2 * ((height + 31) >> 5) : (((height + 15) >> 4));
1106
1107    BEGIN_HEADER6_DATA(cb, xl, 72);
1108    WAITFLAGS(cb, LL_MODE_DECODER_IDLE);
1109
1110    OUT_RING_QW_AGP(cb, 0xc00,
1111	((control->picture_structure & XVMC_FRAME_PICTURE) << 2) |
1112	((control->picture_coding_type & 3) << 4) |
1113	((control->flags & XVMC_ALTERNATE_SCAN) ? (1 << 6) : 0));
1114
1115    if (!(ctx->intraLoaded)) {
1116	OUT_RING_QW_AGP(cb, 0xc5c, 0);
1117	for (j = 0; j < 64; j += 4) {
1118	    OUT_RING_QW_AGP(cb, 0xc60,
1119		ctx->intra_quantiser_matrix[j] |
1120		(ctx->intra_quantiser_matrix[j + 1] << 8) |
1121		(ctx->intra_quantiser_matrix[j + 2] << 16) |
1122		(ctx->intra_quantiser_matrix[j + 3] << 24));
1123	}
1124	ctx->intraLoaded = 1;
1125    }
1126
1127    if (!(ctx->nonIntraLoaded)) {
1128	OUT_RING_QW_AGP(cb, 0xc5c, 1);
1129	for (j = 0; j < 64; j += 4) {
1130	    OUT_RING_QW_AGP(cb, 0xc60,
1131		ctx->non_intra_quantiser_matrix[j] |
1132		(ctx->non_intra_quantiser_matrix[j + 1] << 8) |
1133		(ctx->non_intra_quantiser_matrix[j + 2] << 16) |
1134		(ctx->non_intra_quantiser_matrix[j + 3] << 24));
1135	}
1136	ctx->nonIntraLoaded = 1;
1137    }
1138
1139    if (!(ctx->chromaIntraLoaded)) {
1140	OUT_RING_QW_AGP(cb, 0xc5c, 2);
1141	for (j = 0; j < 64; j += 4) {
1142	    OUT_RING_QW_AGP(cb, 0xc60,
1143		ctx->chroma_intra_quantiser_matrix[j] |
1144		(ctx->chroma_intra_quantiser_matrix[j + 1] << 8) |
1145		(ctx->chroma_intra_quantiser_matrix[j + 2] << 16) |
1146		(ctx->chroma_intra_quantiser_matrix[j + 3] << 24));
1147	}
1148	ctx->chromaIntraLoaded = 1;
1149    }
1150
1151    if (!(ctx->chromaNonIntraLoaded)) {
1152	OUT_RING_QW_AGP(cb, 0xc5c, 3);
1153	for (j = 0; j < 64; j += 4) {
1154	    OUT_RING_QW_AGP(cb, 0xc60,
1155		ctx->chroma_non_intra_quantiser_matrix[j] |
1156		(ctx->chroma_non_intra_quantiser_matrix[j + 1] << 8) |
1157		(ctx->chroma_non_intra_quantiser_matrix[j + 2] << 16) |
1158		(ctx->chroma_non_intra_quantiser_matrix[j + 3] << 24));
1159	}
1160	ctx->chromaNonIntraLoaded = 1;
1161    }
1162
1163    OUT_RING_QW_AGP(cb, 0xc90,
1164	((mb_width * mb_height) & 0x3fff) |
1165	((control->flags & XVMC_PRED_DCT_FRAME) ? (1 << 14) : 0) |
1166	((control->flags & XVMC_TOP_FIELD_FIRST) ? (1 << 15) : 0) |
1167	((control->mpeg_coding == XVMC_MPEG_2) ? (1 << 16) : 0) |
1168	((mb_width & 0xff) << 18));
1169
1170    OUT_RING_QW_AGP(cb, 0xc94,
1171	((control->flags & XVMC_CONCEALMENT_MOTION_VECTORS) ? 1 : 0) |
1172	((control->flags & XVMC_Q_SCALE_TYPE) ? 2 : 0) |
1173	((control->intra_dc_precision & 3) << 2) |
1174	(((1 + 0x100000 / mb_width) & 0xfffff) << 4) |
1175	((control->flags & XVMC_INTRA_VLC_FORMAT) ? (1 << 24) : 0));
1176
1177    OUT_RING_QW_AGP(cb, 0xc98,
1178	(((control->FHMV_range) & 0xf) << 0) |
1179	(((control->FVMV_range) & 0xf) << 4) |
1180	(((control->BHMV_range) & 0xf) << 8) |
1181	(((control->BVMV_range) & 0xf) << 12) |
1182	((control->flags & XVMC_SECOND_FIELD) ? (1 << 20) : 0) |
1183	(0x0a6 << 16));
1184
1185}
1186
1187void
1188viaMpegReset(void *xlp)
1189{
1190    int i, j;
1191    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1192    ViaCommandBuffer *cb = &xl->agpBuf;
1193
1194    BEGIN_HEADER6_DATA(cb, xl, 99);
1195    WAITFLAGS(cb, LL_MODE_DECODER_IDLE);
1196
1197    OUT_RING_QW_AGP(cb, 0xcf0, 0);
1198
1199    for (i = 0; i < 6; i++) {
1200	OUT_RING_QW_AGP(cb, 0xcc0, 0);
1201	OUT_RING_QW_AGP(cb, 0xc0c, 0x43 | 0x20);
1202	for (j = 0xc10; j < 0xc20; j += 4)
1203	    OUT_RING_QW_AGP(cb, j, 0);
1204    }
1205
1206    OUT_RING_QW_AGP(cb, 0xc0c, 0x1c3);
1207    for (j = 0xc10; j < 0xc20; j += 4)
1208	OUT_RING_QW_AGP(cb, j, 0);
1209
1210    for (i = 0; i < 19; i++)
1211	OUT_RING_QW_AGP(cb, 0xc08, 0);
1212
1213    OUT_RING_QW_AGP(cb, 0xc98, 0x400000);
1214
1215    for (i = 0; i < 6; i++) {
1216	OUT_RING_QW_AGP(cb, 0xcc0, 0);
1217	OUT_RING_QW_AGP(cb, 0xc0c, 0x1c3 | 0x20);
1218	for (j = 0xc10; j < 0xc20; j += 4)
1219	    OUT_RING_QW_AGP(cb, j, 0);
1220    }
1221    OUT_RING_QW_AGP(cb, 0xcf0, 0);
1222
1223}
1224
1225void
1226viaMpegWriteSlice(void *xlp, CARD8 * slice, int nBytes, CARD32 sCode)
1227{
1228    int i, n, r;
1229    CARD32 *buf;
1230    int count;
1231    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1232    ViaCommandBuffer *cb = &xl->agpBuf;
1233
1234    if (xl->errors & (LL_DECODER_TIMEDOUT |
1235	    LL_IDCT_FIFO_ERROR | LL_SLICE_FIFO_ERROR | LL_SLICE_FAULT))
1236	return;
1237
1238    n = nBytes >> 2;
1239    if (sCode)
1240	nBytes += 4;
1241    r = nBytes & 3;
1242    buf = (CARD32 *) slice;
1243
1244    if (r)
1245	nBytes += 4 - r;
1246
1247    nBytes += 8;
1248
1249    BEGIN_HEADER6_DATA(cb, xl, 2);
1250    WAITFLAGS(cb, LL_MODE_DECODER_IDLE);
1251    OUT_RING_QW_AGP(cb, 0xc9c, nBytes);
1252
1253    if (sCode)
1254	OUT_RING_QW_AGP(cb, 0xca0, sCode);
1255
1256    i = 0;
1257    count = 0;
1258
1259    do {
1260	count += (LL_AGP_CMDBUF_SIZE - 20);
1261	count = (count > n) ? n : count;
1262	BEGIN_HEADER5_DATA(cb, xl, (count - i), 0xca0);
1263
1264	for (; i < count; i++) {
1265	    OUT_RING_AGP(cb, *buf++);
1266	}
1267	finish_header_agp(cb);
1268    } while (i < n);
1269
1270    BEGIN_HEADER5_DATA(cb, xl, 3, 0xca0);
1271
1272    if (r) {
1273	OUT_RING_AGP(cb, *buf & ((1 << (r << 3)) - 1));
1274    }
1275    OUT_RING_AGP(cb, 0);
1276    OUT_RING_AGP(cb, 0);
1277    finish_header_agp(cb);
1278}
1279
1280void
1281viaVideoSubPictureOffLocked(void *xlp)
1282{
1283
1284    CARD32 stride;
1285    int proReg = REG_HQV1_INDEX;
1286    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1287    ViaCommandBuffer *cb = xl->videoBuf;
1288
1289    if (xl->videoBuf == &xl->agpBuf)
1290	syncDMA(xl, 1);
1291    stride = VIDIN(xl, proReg | SUBP_CONTROL_STRIDE);
1292    WAITFLAGS(cb, LL_MODE_VIDEO);
1293    BEGIN_HEADER6_DATA(cb, xl, 1);
1294    OUT_RING_QW_AGP(cb, proReg | SUBP_CONTROL_STRIDE,
1295	stride & ~SUBP_HQV_ENABLE);
1296}
1297
1298void
1299viaVideoSubPictureLocked(void *xlp, ViaXvMCSubPicture * pViaSubPic)
1300{
1301
1302    unsigned i;
1303    CARD32 cWord;
1304    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1305    int proReg = REG_HQV1_INDEX;
1306    ViaCommandBuffer *cb = xl->videoBuf;
1307
1308    if (xl->videoBuf == &xl->agpBuf)
1309	syncDMA(xl, 1);
1310    WAITFLAGS(cb, LL_MODE_VIDEO);
1311    BEGIN_HEADER6_DATA(cb, xl, VIA_SUBPIC_PALETTE_SIZE + 2);
1312    for (i = 0; i < VIA_SUBPIC_PALETTE_SIZE; ++i) {
1313	OUT_RING_QW_AGP(cb, proReg | RAM_TABLE_CONTROL,
1314	    pViaSubPic->palette[i]);
1315    }
1316
1317    cWord = (pViaSubPic->stride & SUBP_STRIDE_MASK) | SUBP_HQV_ENABLE;
1318    cWord |= (pViaSubPic->ia44) ? SUBP_IA44 : SUBP_AI44;
1319    OUT_RING_QW_AGP(cb, proReg | SUBP_STARTADDR, pViaSubPic->offset);
1320    OUT_RING_QW_AGP(cb, proReg | SUBP_CONTROL_STRIDE, cWord);
1321}
1322
1323void
1324viaBlit(void *xlp, unsigned bpp, unsigned srcBase,
1325    unsigned srcPitch, unsigned dstBase, unsigned dstPitch,
1326    unsigned w, unsigned h, int xdir, int ydir, unsigned blitMode,
1327    unsigned color)
1328{
1329
1330    CARD32 dwGEMode = 0, srcY = 0, srcX, dstY = 0, dstX;
1331    CARD32 cmd;
1332    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1333    ViaCommandBuffer *cb = &xl->agpBuf;
1334
1335    if (!w || !h)
1336	return;
1337
1338    finish_header_agp(cb);
1339
1340    switch (bpp) {
1341    case 16:
1342	dwGEMode |= VIA_GEM_16bpp;
1343	break;
1344    case 32:
1345	dwGEMode |= VIA_GEM_32bpp;
1346	break;
1347    default:
1348	dwGEMode |= VIA_GEM_8bpp;
1349	break;
1350    }
1351
1352    srcX = srcBase & 31;
1353    dstX = dstBase & 31;
1354    switch (bpp) {
1355    case 16:
1356	dwGEMode |= VIA_GEM_16bpp;
1357	srcX >>= 2;
1358	dstX >>= 2;
1359	break;
1360    case 32:
1361	dwGEMode |= VIA_GEM_32bpp;
1362	srcX >>= 4;
1363	dstX >>= 4;
1364	break;
1365    default:
1366	dwGEMode |= VIA_GEM_8bpp;
1367	break;
1368    }
1369
1370    BEGIN_RING_AGP(cb, xl, 20);
1371    WAITFLAGS(cb, LL_MODE_2D);
1372
1373    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_GEMODE), dwGEMode);
1374    cmd = 0;
1375
1376    if (xdir < 0) {
1377	cmd |= VIA_GEC_DECX;
1378	srcX += (w - 1);
1379	dstX += (w - 1);
1380    }
1381    if (ydir < 0) {
1382	cmd |= VIA_GEC_DECY;
1383	srcY += (h - 1);
1384	dstY += (h - 1);
1385    }
1386
1387    switch (blitMode) {
1388    case VIABLIT_TRANSCOPY:
1389	OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_SRCCOLORKEY), color);
1390	OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_KEYCONTROL), 0x4000);
1391	cmd |= VIA_GEC_BLT | (VIA_BLIT_COPY << 24);
1392	break;
1393    case VIABLIT_FILL:
1394	OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_FGCOLOR), color);
1395	cmd |= VIA_GEC_BLT | VIA_GEC_FIXCOLOR_PAT | (VIA_BLIT_FILL << 24);
1396	break;
1397    default:
1398	OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_KEYCONTROL), 0x0);
1399	cmd |= VIA_GEC_BLT | (VIA_BLIT_COPY << 24);
1400    }
1401
1402    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_SRCBASE), (srcBase & ~31) >> 3);
1403    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_DSTBASE), (dstBase & ~31) >> 3);
1404    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_PITCH), VIA_PITCH_ENABLE |
1405	(srcPitch >> 3) | (((dstPitch) >> 3) << 16));
1406    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_SRCPOS), ((srcY << 16) | srcX));
1407    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_DSTPOS), ((dstY << 16) | dstX));
1408    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_DIMENSION),
1409	(((h - 1) << 16) | (w - 1)));
1410    OUT_RING_QW_AGP(cb, H1_ADDR(VIA_REG_GECMD), cmd);
1411}
1412
1413unsigned
1414syncXvMCLowLevel(void *xlp, unsigned int mode, unsigned int doSleep,
1415    CARD32 timeStamp)
1416{
1417    unsigned errors;
1418    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1419
1420    if (mode == 0) {
1421	errors = xl->errors;
1422	xl->errors = 0;
1423	return errors;
1424    }
1425
1426    if ((mode & (LL_MODE_VIDEO | LL_MODE_3D)) || !xl->use_agp) {
1427	if (xl->performLocking)
1428	    hwlLock(xl, 0);
1429	if ((xl->videoBuf == &xl->agpBuf) || (mode != LL_MODE_VIDEO))
1430	    syncDMA(xl, doSleep);
1431	if (mode & LL_MODE_3D)
1432	    syncAccel(xl, mode, doSleep);
1433	if (mode & LL_MODE_VIDEO)
1434	    syncVideo(xl, doSleep);
1435	if (xl->performLocking)
1436	    hwlUnlock(xl, 0);
1437    } else {
1438	viaDMAWaitTimeStamp(xl, timeStamp, doSleep);
1439    }
1440
1441    if (mode & (LL_MODE_DECODER_SLICE | LL_MODE_DECODER_IDLE))
1442	syncMpeg(xl, mode, doSleep);
1443
1444    errors = xl->errors;
1445    xl->errors = 0;
1446
1447    return errors;
1448}
1449
1450static int
1451updateLowLevelBuf(XvMCLowLevel * xl, LowLevelBuffer * buf,
1452    unsigned width, unsigned height)
1453{
1454    unsigned stride, size;
1455    drm_via_mem_t *mem = &buf->mem;
1456    int ret;
1457
1458    stride = (width + 31) & ~31;
1459    size = stride * height + (xl->fbDepth >> 3);
1460
1461    if (size != mem->size) {
1462	if (mem->size)
1463	    drmCommandWrite(xl->fd, DRM_VIA_FREEMEM, mem, sizeof(*mem));
1464	mem->context = *(xl->drmcontext);
1465	mem->size = size;
1466	mem->type = VIA_MEM_VIDEO;
1467
1468	if (((ret = drmCommandWriteRead(xl->fd, DRM_VIA_ALLOCMEM, mem,
1469			sizeof(*mem))) < 0) || mem->size != size) {
1470	    mem->size = 0;
1471	    return -1;
1472	}
1473    }
1474
1475    buf->offset = (mem->offset + 31) & ~31;
1476    buf->stride = stride;
1477    buf->height = height;
1478    return 0;
1479}
1480
1481static void
1482cleanupLowLevelBuf(XvMCLowLevel * xl, LowLevelBuffer * buf)
1483{
1484    drm_via_mem_t *mem = &buf->mem;
1485
1486    if (mem->size)
1487	drmCommandWrite(xl->fd, DRM_VIA_FREEMEM, mem, sizeof(*mem));
1488    mem->size = 0;
1489}
1490
1491static void *
1492releaseXvMCLowLevel(XvMCLowLevel * xl)
1493{
1494    switch (xl->state) {
1495    case ll_llBuf:
1496	cleanupLowLevelBuf(xl, &xl->scale);
1497    case ll_timeStamp:
1498	viaDMACleanupTimeStamp(xl);
1499    case ll_pciBuf:
1500	free(xl->pciBuf.buf);
1501    case ll_agpBuf:
1502	free(xl->agpBuf.buf);
1503    case ll_init:
1504	free(xl);
1505    default:
1506	;
1507    }
1508    return NULL;
1509}
1510
1511void *
1512initXvMCLowLevel(int fd, drm_context_t * ctx,
1513    drmLockPtr hwLock, drmAddress mmioAddress,
1514    drmAddress fbAddress, unsigned fbStride, unsigned fbDepth,
1515    unsigned width, unsigned height, int useAgp, unsigned chipId)
1516{
1517    XvMCLowLevel *xl;
1518
1519    if (chipId != PCI_CHIP_VT3259 && chipId != PCI_CHIP_VT3364) {
1520	fprintf(stderr, "You are using an XvMC driver for the wrong chip.\n");
1521	fprintf(stderr, "Chipid is 0x%04x.\n", chipId);
1522	return NULL;
1523    }
1524
1525    xl = (XvMCLowLevel *) malloc(sizeof(XvMCLowLevel));
1526    if (!xl)
1527	return NULL;
1528    xl->state = ll_init;
1529
1530    xl->agpBuf.buf = (CARD32 *) malloc(LL_AGP_CMDBUF_SIZE * sizeof(CARD32));
1531    if (!xl->agpBuf.buf)
1532	return releaseXvMCLowLevel(xl);
1533    xl->state = ll_agpBuf;
1534    xl->agpBuf.bufSize = LL_AGP_CMDBUF_SIZE;
1535    xl->agpBuf.flushFunc = &agpFlush;
1536    xl->agpBuf.pos = 0;
1537    xl->agpBuf.mode = 0;
1538    xl->agpBuf.waitFlags = 0;
1539
1540    xl->pciBuf.buf = (CARD32 *) malloc(LL_PCI_CMDBUF_SIZE * sizeof(CARD32));
1541    if (!xl->pciBuf.buf)
1542	return releaseXvMCLowLevel(xl);
1543    xl->state = ll_pciBuf;
1544    xl->pciBuf.bufSize = LL_PCI_CMDBUF_SIZE;
1545    xl->pciBuf.flushFunc = &pciFlush;
1546    xl->pciBuf.pos = 0;
1547    xl->pciBuf.mode = 0;
1548    xl->pciBuf.waitFlags = 0;
1549
1550    xl->use_agp = useAgp;
1551    xl->fd = fd;
1552    xl->drmcontext = ctx;
1553    xl->hwLock = hwLock;
1554    xl->mmioAddress = mmioAddress;
1555    xl->fbAddress = fbAddress;
1556    xl->fbDepth = fbDepth;
1557    xl->fbStride = fbStride;
1558    xl->width = width;
1559    xl->height = height;
1560    xl->performLocking = 1;
1561    xl->errors = 0;
1562    xl->agpSync = 0;
1563    xl->chipId = chipId;
1564
1565    if (viaDMAInitTimeStamp(xl))
1566	return releaseXvMCLowLevel(xl);
1567    xl->state = ll_timeStamp;
1568
1569    xl->scale.mem.size = 0;
1570    xl->back.mem.size = 0;
1571
1572    if (updateLowLevelBuf(xl, &xl->scale, width, height))
1573	return releaseXvMCLowLevel(xl);
1574    xl->state = ll_llBuf;
1575
1576#ifdef VIDEO_DMA
1577    xl->videoBuf = &xl->agpBuf;
1578#else
1579    xl->videoBuf = &xl->pciBuf;
1580#endif
1581
1582    return xl;
1583}
1584
1585void
1586setLowLevelLocking(void *xlp, int performLocking)
1587{
1588    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1589
1590    xl->performLocking = performLocking;
1591}
1592
1593void
1594closeXvMCLowLevel(void *xlp)
1595{
1596    XvMCLowLevel *xl = (XvMCLowLevel *) xlp;
1597
1598    releaseXvMCLowLevel(xl);
1599}
1600
1601#if 0				       /* Under development */
1602static CARD32
1603computeDownScaling(int dst, int *src)
1604{
1605    CARD32 value = 0x800;
1606
1607    while (*src > dst) {
1608	*src >>= 1;
1609	value--;
1610    }
1611    return value;
1612}
1613
1614static void
1615computeHQVScaleAndFilter(XvMCLowLevel * xl)
1616{
1617    int srcW, srcH;
1618    const XvMCRegion *src = &xl->sRegion, *back = &xl->dRegion;
1619
1620    xl->downScaling = FALSE;
1621
1622    if (back->w < src->w || back->h < src->h) {
1623
1624	xl->downScaling = TRUE;
1625	srcW = src->w;
1626	srcH = src->h;
1627
1628	xl->downScaleW = (back->w >= srcW) ? 0 :
1629	    HQV_SCALE_ENABLE | HQV_SCALE_DOWN |
1630	    (computeDownScaling(back->w, &srcW));
1631
1632	xl->downScaleH = (back->h >= srcH) ? 0 :
1633	    HQV_SCALE_ENABLE | HQV_SCALE_DOWN |
1634	    (computeDownScaling(back->h, &srcH));
1635
1636    }
1637
1638    xl->upScaleW =
1639	(back->w == srcW) ? 0 : (0x800 * srcW / back->w) | HQV_SCALE_ENABLE;
1640    xl->upScaleH =
1641	(back->h == srcH) ? 0 : (0x800 * srcH / back->h) | HQV_SCALE_ENABLE;
1642}
1643
1644static int
1645setupBackBuffer(XvMCLowLevel * xl)
1646{
1647    return updateLowLevelBuf(xl, &xl->back, xl->dRegion.w, xl->dRegion.h);
1648}
1649
1650#endif
1651