1#ifdef HAVE_CONFIG_H
2#include "config.h"
3#endif
4
5#include "xf86.h"
6#include "xf86_OSproc.h"
7#include "compiler.h"
8#include "xf86Pci.h"
9#include "xf86fbman.h"
10#include "regionstr.h"
11
12#include "mga_reg.h"
13#include "mga.h"
14#include "mga_macros.h"
15#include "xf86xv.h"
16#include <X11/extensions/Xv.h>
17
18#ifdef USE_XAA
19#include "xaa.h"
20#include "xaalocal.h"
21#endif
22
23#include "dixstruct.h"
24#include "fourcc.h"
25
26#define OFF_DELAY 	250  /* milliseconds */
27#define FREE_DELAY 	15000
28
29#define OFF_TIMER 	0x01
30#define FREE_TIMER	0x02
31#define CLIENT_VIDEO_ON	0x04
32
33#define TIMER_MASK      (OFF_TIMER | FREE_TIMER)
34
35#define MGA_MAX_PORTS	32
36
37static void MGAInitOffscreenImages(ScreenPtr);
38
39static XF86VideoAdaptorPtr MGASetupImageVideoOverlay(ScreenPtr);
40static int  MGASetPortAttributeOverlay(ScrnInfoPtr, Atom, INT32, pointer);
41static int  MGAGetPortAttributeOverlay(ScrnInfoPtr, Atom ,INT32 *, pointer);
42
43static XF86VideoAdaptorPtr MGASetupImageVideoTexture(ScreenPtr);
44static int  MGASetPortAttributeTexture(ScrnInfoPtr, Atom, INT32, pointer);
45static int  MGAGetPortAttributeTexture(ScrnInfoPtr, Atom ,INT32 *, pointer);
46
47static void MGAStopVideo(ScrnInfoPtr, pointer, Bool);
48static void MGAQueryBestSize(ScrnInfoPtr, Bool, short, short, short, short,
49			unsigned int *, unsigned int *, pointer);
50static int  MGAPutImage(ScrnInfoPtr, short, short, short, short, short,
51			short, short, short, int, unsigned char*, short,
52			short, Bool, RegionPtr, pointer, DrawablePtr);
53static int  MGAQueryImageAttributes(ScrnInfoPtr, int, unsigned short *,
54			unsigned short *,  int *, int *);
55static void MGAFreeMemory(ScrnInfoPtr pScrn, void *mem_struct);
56
57static void MGAResetVideoOverlay(ScrnInfoPtr);
58
59static void MGAVideoTimerCallback(ScrnInfoPtr pScrn, Time time);
60
61static XF86VideoAdaptorPtr MGASetupImageVideoILOAD(ScreenPtr);
62static int MGAPutImageILOAD(ScrnInfoPtr, short, short, short, short, short,
63			    short, short, short, int, unsigned char*, short,
64			    short, Bool, RegionPtr, pointer, DrawablePtr);
65
66#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
67
68static Atom xvBrightness, xvContrast, xvColorKey, xvDoubleBuffer;
69
70#ifdef USE_EXA
71static void
72MGAVideoSave(ScreenPtr pScreen, ExaOffscreenArea *area)
73{
74    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
75    MGAPtr pMga = MGAPTR(pScrn);
76    MGAPortPrivPtr pPriv = pMga->portPrivate;
77
78    if (pPriv->video_memory == area)
79        pPriv->video_memory = NULL;
80}
81#endif /* USE_EXA */
82
83void MGAInitVideo(ScreenPtr pScreen)
84{
85    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
86    XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
87    XF86VideoAdaptorPtr newAdaptor = NULL;
88    MGAPtr pMga = MGAPTR(pScrn);
89    int num_adaptors;
90
91    if ((pScrn->bitsPerPixel != 8) && !pMga->NoAccel &&
92	(pMga->SecondCrtc == FALSE) &&
93	((pMga->Chipset == PCI_CHIP_MGA2164) ||
94	 (pMga->Chipset == PCI_CHIP_MGA2164_AGP) ||
95/*	 (pMga->Chipset == PCI_CHIP_MGA2064) ||     */
96	 (pMga->Chipset == PCI_CHIP_MGAG200) ||
97	 (pMga->Chipset == PCI_CHIP_MGAG200_PCI) ||
98	 (pMga->Chipset == PCI_CHIP_MGAG400) ||
99	 (pMga->Chipset == PCI_CHIP_MGAG550))) {
100	if ((pMga->Chipset == PCI_CHIP_MGA2164) ||
101/*	    (pMga->Chipset == PCI_CHIP_MGA2064) ||   */
102	    (pMga->Chipset == PCI_CHIP_MGA2164_AGP)) {
103	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using MGA 2164W ILOAD video\n");
104	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
105		       "This is an experimental driver and may not work on your machine.\n");
106
107	    newAdaptor = MGASetupImageVideoILOAD(pScreen);
108	    pMga->TexturedVideo = TRUE;
109	    /* ^^^ this is not really true but the ILOAD scaler shares
110	     * much more code with the textured video than the overlay
111	     */
112	} else if (pMga->TexturedVideo && (pScrn->bitsPerPixel != 24)) {
113	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using texture video\n");
114	    newAdaptor = MGASetupImageVideoTexture(pScreen);
115	    pMga->TexturedVideo = TRUE;
116	} else {
117	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using overlay video\n");
118	    newAdaptor = MGASetupImageVideoOverlay(pScreen);
119	    pMga->TexturedVideo = FALSE;
120	}
121
122	MGAInitOffscreenImages(pScreen);
123    }
124
125    num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
126
127    if(newAdaptor) {
128	if(!num_adaptors) {
129	    num_adaptors = 1;
130	    adaptors = &newAdaptor;
131	} else {
132	    /* need to free this someplace */
133	    newAdaptors = malloc((num_adaptors + 1) * sizeof(XF86VideoAdaptorPtr *));
134	    if(newAdaptors) {
135		memcpy(newAdaptors, adaptors, num_adaptors *
136					sizeof(XF86VideoAdaptorPtr));
137		newAdaptors[num_adaptors] = newAdaptor;
138		adaptors = newAdaptors;
139		num_adaptors++;
140	    }
141	}
142    }
143
144    if(num_adaptors)
145        xf86XVScreenInit(pScreen, adaptors, num_adaptors);
146
147    free(newAdaptors);
148}
149
150/* client libraries expect an encoding */
151static XF86VideoEncodingRec DummyEncoding[2] =
152{
153 {   /* overlay limit */
154   0,
155   "XV_IMAGE",
156   1024, 1024,
157   {1, 1}
158 },
159 {  /* texture limit */
160   0,
161   "XV_IMAGE",
162   2046, 2046,
163   {1, 1}
164 }
165};
166
167#define NUM_FORMATS 6
168
169static XF86VideoFormatRec Formats[NUM_FORMATS] =
170{
171   {15, TrueColor}, {16, TrueColor}, {24, TrueColor},
172   {15, DirectColor}, {16, DirectColor}, {24, DirectColor}
173};
174
175#define NUM_ATTRIBUTES_OVERLAY 4
176
177static XF86AttributeRec Attributes[NUM_ATTRIBUTES_OVERLAY] =
178{
179   {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
180   {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"},
181   {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"},
182   {XvSettable | XvGettable, 0, 1, "XV_DOUBLE_BUFFER"}
183};
184
185#define NUM_IMAGES 4
186
187static XF86ImageRec Images[NUM_IMAGES] =
188{
189	XVIMAGE_YUY2,
190	XVIMAGE_YV12,
191	XVIMAGE_I420,
192	XVIMAGE_UYVY
193};
194
195static void
196MGAResetVideoOverlay(ScrnInfoPtr pScrn)
197{
198    MGAPtr pMga = MGAPTR(pScrn);
199    MGAPortPrivPtr pPriv = pMga->portPrivate;
200
201    CHECK_DMA_QUIESCENT(pMga, pScrn);
202
203    outMGAdac(0x51, 0x01); /* keying on */
204    outMGAdac(0x52, 0xff); /* full mask */
205    outMGAdac(0x53, 0xff);
206    outMGAdac(0x54, 0xff);
207
208    outMGAdac(0x55, (pPriv->colorKey & pScrn->mask.red) >>
209		    pScrn->offset.red);
210    outMGAdac(0x56, (pPriv->colorKey & pScrn->mask.green) >>
211		    pScrn->offset.green);
212    outMGAdac(0x57, (pPriv->colorKey & pScrn->mask.blue) >>
213		    pScrn->offset.blue);
214
215    OUTREG(MGAREG_BESLUMACTL, ((pPriv->brightness & 0xff) << 16) |
216			       (pPriv->contrast & 0xff));
217}
218
219
220static XF86VideoAdaptorPtr
221MGAAllocAdaptor(ScrnInfoPtr pScrn, Bool doublebuffer)
222{
223    XF86VideoAdaptorPtr adapt;
224    MGAPtr pMga = MGAPTR(pScrn);
225    MGAPortPrivPtr pPriv;
226    int i;
227
228    if(!(adapt = xf86XVAllocateVideoAdaptorRec(pScrn)))
229	return NULL;
230
231    if(!(pPriv = calloc(1, sizeof(MGAPortPrivRec) +
232			(sizeof(DevUnion) * MGA_MAX_PORTS))))
233    {
234	free(adapt);
235	return NULL;
236    }
237
238    adapt->pPortPrivates = (DevUnion*)(&pPriv[1]);
239
240    for(i = 0; i < MGA_MAX_PORTS; i++)
241	adapt->pPortPrivates[i].val = i;
242
243    xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
244    xvContrast   = MAKE_ATOM("XV_CONTRAST");
245    xvColorKey   = MAKE_ATOM("XV_COLORKEY");
246    xvDoubleBuffer = MAKE_ATOM("XV_DOUBLE_BUFFER");
247
248    pPriv->colorKey = pMga->videoKey;
249    pPriv->videoStatus = 0;
250    pPriv->brightness = 0;
251    pPriv->contrast = 128;
252    pPriv->lastPort = -1;
253    pPriv->doubleBuffer = doublebuffer;
254    pPriv->currentBuffer = 0;
255
256    pMga->adaptor = adapt;
257    pMga->portPrivate = pPriv;
258
259    return adapt;
260}
261
262static XF86VideoAdaptorPtr
263MGASetupImageVideoOverlay(ScreenPtr pScreen)
264{
265    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
266    MGAPtr pMga = MGAPTR(pScrn);
267    XF86VideoAdaptorPtr adapt;
268
269    adapt = MGAAllocAdaptor(pScrn, TRUE);
270    if (adapt == NULL)
271	return NULL;
272
273    adapt->type = XvWindowMask | XvInputMask | XvImageMask;
274    adapt->flags = VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT;
275    adapt->name = "Matrox G-Series Backend Scaler";
276    adapt->nEncodings = 1;
277    adapt->pEncodings = &DummyEncoding[0];
278    adapt->nFormats = NUM_FORMATS;
279    adapt->pFormats = Formats;
280    adapt->nPorts = 1;
281    adapt->pAttributes = Attributes;
282    if (pMga->Chipset == PCI_CHIP_MGAG400 ||
283	pMga->Chipset == PCI_CHIP_MGAG550) {
284	adapt->nImages = 4;
285	adapt->nAttributes = 4;
286    } else {
287	adapt->nImages = 3;
288	adapt->nAttributes = 1;
289    }
290    adapt->pImages = Images;
291    adapt->PutVideo = NULL;
292    adapt->PutStill = NULL;
293    adapt->GetVideo = NULL;
294    adapt->GetStill = NULL;
295    adapt->StopVideo = MGAStopVideo;
296    adapt->SetPortAttribute = MGASetPortAttributeOverlay;
297    adapt->GetPortAttribute = MGAGetPortAttributeOverlay;
298    adapt->QueryBestSize = MGAQueryBestSize;
299    adapt->PutImage = MGAPutImage;
300    adapt->QueryImageAttributes = MGAQueryImageAttributes;
301
302    /* gotta uninit this someplace */
303    REGION_NULL(pScreen, &(pMga->portPrivate->clip));
304
305    MGAResetVideoOverlay(pScrn);
306
307    return adapt;
308}
309
310
311static XF86VideoAdaptorPtr
312MGASetupImageVideoTexture(ScreenPtr pScreen)
313{
314    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
315    XF86VideoAdaptorPtr adapt;
316    MGAPtr pMga = MGAPTR(pScrn);
317
318    adapt = MGAAllocAdaptor(pScrn, FALSE);
319    if (adapt == NULL)
320	return NULL;
321
322    adapt->type = XvWindowMask | XvInputMask | XvImageMask;
323    adapt->flags = 0;
324    adapt->name = "Matrox G-Series Texture Engine";
325    adapt->nEncodings = 1;
326    adapt->pEncodings = &DummyEncoding[1];
327    adapt->nFormats = NUM_FORMATS;
328    adapt->pFormats = Formats;
329    adapt->nPorts = MGA_MAX_PORTS;
330    adapt->pAttributes = NULL;
331    adapt->nAttributes = 0;
332    adapt->pImages = Images;
333    if (pMga->Chipset == PCI_CHIP_MGAG400 ||
334	pMga->Chipset == PCI_CHIP_MGAG550)
335	adapt->nImages = 4;
336    else
337	adapt->nImages = 3;
338    adapt->PutVideo = NULL;
339    adapt->PutStill = NULL;
340    adapt->GetVideo = NULL;
341    adapt->GetStill = NULL;
342    adapt->StopVideo = MGAStopVideo;
343    adapt->SetPortAttribute = MGASetPortAttributeTexture;
344    adapt->GetPortAttribute = MGAGetPortAttributeTexture;
345    adapt->QueryBestSize = MGAQueryBestSize;
346    adapt->PutImage = MGAPutImage;
347    adapt->QueryImageAttributes = MGAQueryImageAttributes;
348
349    return adapt;
350}
351
352
353static void
354MGAStopVideo(ScrnInfoPtr pScrn, pointer data, Bool shutdown)
355{
356  MGAPtr pMga = MGAPTR(pScrn);
357  MGAPortPrivPtr pPriv = pMga->portPrivate;
358
359  if(pMga->TexturedVideo) return;
360
361  REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
362
363  if(shutdown) {
364     if(pPriv->videoStatus & CLIENT_VIDEO_ON)
365	OUTREG(MGAREG_BESCTL, 0);
366     if (pPriv->video_memory) {
367         MGAFreeMemory(pScrn, pPriv->video_memory);
368         pPriv->video_memory = NULL;
369     }
370     pPriv->videoStatus = 0;
371  } else {
372     if(pPriv->videoStatus & CLIENT_VIDEO_ON) {
373	pPriv->videoStatus |= OFF_TIMER;
374	pPriv->offTime = currentTime.milliseconds + OFF_DELAY;
375     }
376  }
377}
378
379static int
380MGASetPortAttributeOverlay(
381  ScrnInfoPtr pScrn,
382  Atom attribute,
383  INT32 value,
384  pointer data
385){
386  MGAPtr pMga = MGAPTR(pScrn);
387  MGAPortPrivPtr pPriv = pMga->portPrivate;
388
389  CHECK_DMA_QUIESCENT(pMga, pScrn);
390
391  if(attribute == xvBrightness) {
392	if((value < -128) || (value > 127))
393	   return BadValue;
394	pPriv->brightness = value;
395	OUTREG(MGAREG_BESLUMACTL, ((pPriv->brightness & 0xff) << 16) |
396			           (pPriv->contrast & 0xff));
397  } else
398  if(attribute == xvContrast) {
399	if((value < 0) || (value > 255))
400	   return BadValue;
401	pPriv->contrast = value;
402	OUTREG(MGAREG_BESLUMACTL, ((pPriv->brightness & 0xff) << 16) |
403			           (pPriv->contrast & 0xff));
404  } else
405  if(attribute == xvColorKey) {
406	pPriv->colorKey = value;
407	outMGAdac(0x55, (pPriv->colorKey & pScrn->mask.red) >>
408		    pScrn->offset.red);
409	outMGAdac(0x56, (pPriv->colorKey & pScrn->mask.green) >>
410		    pScrn->offset.green);
411	outMGAdac(0x57, (pPriv->colorKey & pScrn->mask.blue) >>
412		    pScrn->offset.blue);
413	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
414  } else
415  if(attribute == xvDoubleBuffer) {
416	if((value < 0) || (value > 1))
417          return BadValue;
418	pPriv->doubleBuffer = value;
419  } else return BadMatch;
420
421  return Success;
422}
423
424static int
425MGAGetPortAttributeOverlay(
426  ScrnInfoPtr pScrn,
427  Atom attribute,
428  INT32 *value,
429  pointer data
430){
431  MGAPtr pMga = MGAPTR(pScrn);
432  MGAPortPrivPtr pPriv = pMga->portPrivate;
433
434  if(attribute == xvBrightness) {
435	*value = pPriv->brightness;
436  } else
437  if(attribute == xvContrast) {
438	*value = pPriv->contrast;
439  } else
440  if(attribute == xvDoubleBuffer) {
441        *value = pPriv->doubleBuffer ? 1 : 0;
442  } else
443  if(attribute == xvColorKey) {
444	*value = pPriv->colorKey;
445  } else return BadMatch;
446
447  return Success;
448}
449
450
451static int
452MGASetPortAttributeTexture(
453  ScrnInfoPtr pScrn,
454  Atom attribute,
455  INT32 value,
456  pointer data
457) {
458  return BadMatch;
459}
460
461
462static int
463MGAGetPortAttributeTexture(
464  ScrnInfoPtr pScrn,
465  Atom attribute,
466  INT32 *value,
467  pointer data
468){
469  return BadMatch;
470}
471
472static void
473MGAQueryBestSize(
474  ScrnInfoPtr pScrn,
475  Bool motion,
476  short vid_w, short vid_h,
477  short drw_w, short drw_h,
478  unsigned int *p_w, unsigned int *p_h,
479  pointer data
480){
481  *p_w = drw_w;
482  *p_h = drw_h;
483}
484
485
486static void
487MGACopyData(
488  unsigned char *src,
489  unsigned char *dst,
490  int srcPitch,
491  int dstPitch,
492  int h,
493  int w
494){
495    w <<= 1;
496    while(h--) {
497	/* XXX Maybe this one needs big-endian fixes, too? -ReneR */
498	memcpy(dst, src, w);
499	src += srcPitch;
500	dst += dstPitch;
501    }
502}
503
504static void
505MGACopyMungedData(
506   unsigned char *src1,
507   unsigned char *src2,
508   unsigned char *src3,
509   unsigned char *dst1,
510   int srcPitch,
511   int srcPitch2,
512   int dstPitch,
513   int h,
514   int w
515){
516   CARD32 *dst;
517   CARD8 *s1, *s2, *s3;
518   int i, j;
519
520   w >>= 1;
521
522   for(j = 0; j < h; j++) {
523        dst = (CARD32*)dst1;
524        s1 = src1;  s2 = src2;  s3 = src3;
525        i = w;
526        while(i > 4) {
527#if X_BYTE_ORDER == X_LITTLE_ENDIAN
528           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
529           dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
530           dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
531           dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
532#else
533           dst[0] = (s1[0] << 16) | s1[1] | (s3[0] << 24) | (s2[0] << 8);
534           dst[1] = (s1[2] << 16) | s1[3] | (s3[1] << 24) | (s2[1] << 8);
535           dst[2] = (s1[4] << 16) | s1[5] | (s3[2] << 24) | (s2[2] << 8);
536           dst[3] = (s1[6] << 16) | s1[7] | (s3[3] << 24) | (s2[3] << 8);
537#endif
538           dst += 4; s2 += 4; s3 += 4; s1 += 8;
539           i -= 4;
540        }
541
542        while(i--) {
543#if X_BYTE_ORDER == X_LITTLE_ENDIAN
544           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
545#else
546           dst[0] = (s1[0] << 16) | s1[1] | (s3[0] << 24) | (s2[0] << 8);
547#endif
548           dst++; s2++; s3++;
549           s1 += 2;
550        }
551
552        dst1 += dstPitch;
553        src1 += srcPitch;
554        if(j & 1) {
555            src2 += srcPitch2;
556            src3 += srcPitch2;
557        }
558   }
559}
560
561
562static CARD32
563MGAAllocateMemory(
564   ScrnInfoPtr pScrn,
565   void **mem_struct,
566   int size
567){
568#if defined(USE_XAA) || defined(USE_EXA)
569   MGAPtr pMga = MGAPTR(pScrn);
570#endif /* defined(USE_XAA) || defined(USE_EXA) */
571#ifdef USE_XAA
572   ScreenPtr pScreen = xf86ScrnToScreen(pScrn);
573#endif /* USE_XAA */
574   int offset = 0;
575
576#ifdef USE_EXA
577   if (pMga->Exa) {
578       ExaOffscreenArea *area = *mem_struct;
579
580	if (area) {
581	    if (area->size >= size)
582		return area->offset;
583
584	    exaOffscreenFree(pScrn->pScreen, area);
585	}
586
587	area = exaOffscreenAlloc(pScrn->pScreen, size, 64, TRUE, MGAVideoSave,
588				 NULL);
589	*mem_struct = area;
590
591	if (!area)
592	    return 0;
593
594	offset = area->offset;
595   }
596#endif /* USE_EXA */
597#ifdef USE_XAA
598   FBLinearPtr linear = *mem_struct;
599   int cpp = pMga->CurrentLayout.bitsPerPixel / 8;
600
601   /* XAA allocates in units of pixels at the screen bpp, so adjust size
602    * appropriately.
603    */
604   size = (size + cpp - 1) / cpp;
605
606   if (!pMga->Exa) {
607       if (linear) {
608           if (linear->size >= size)
609               return linear->offset * cpp;
610
611           if (xf86ResizeOffscreenLinear(linear, size))
612               return linear->offset * cpp;
613
614           xf86FreeOffscreenLinear(linear);
615       }
616
617
618       linear = xf86AllocateOffscreenLinear(pScreen, size, 16,
619                                            NULL, NULL, NULL);
620       *mem_struct = linear;
621
622       if (!linear) {
623           int max_size;
624
625           xf86QueryLargestOffscreenLinear(pScreen, &max_size, 16,
626                                           PRIORITY_EXTREME);
627
628           if (max_size < size)
629               return 0;
630
631           xf86PurgeUnlockedOffscreenAreas(pScreen);
632
633           linear = xf86AllocateOffscreenLinear(pScreen, size, 16,
634                                                NULL, NULL, NULL);
635           *mem_struct = linear;
636
637           if (!linear)
638               return 0;
639       }
640
641       offset = linear->offset * cpp;
642   }
643#endif /* USE_XAA */
644
645   return offset;
646}
647
648static void
649MGAFreeMemory(ScrnInfoPtr pScrn, void *mem_struct)
650{
651#if defined(USE_XAA) || defined(USE_EXA)
652    MGAPtr pMga = MGAPTR(pScrn);
653#endif /* defined(USE_XAA) || defined(USE_EXA) */
654
655#ifdef USE_EXA
656    if (pMga->Exa) {
657	ExaOffscreenArea *area = mem_struct;
658
659	if (area)
660	    exaOffscreenFree(pScrn->pScreen, area);
661    }
662#endif /* USE_EXA */
663#ifdef USE_XAA
664    if (!pMga->Exa) {
665	FBLinearPtr linear = mem_struct;
666
667	if (linear)
668	    xf86FreeOffscreenLinear(linear);
669    }
670#endif /* USE_XAA */
671}
672
673static void
674MGADisplayVideoOverlay(
675    ScrnInfoPtr pScrn,
676    int id,
677    int offset,
678    short width, short height,
679    int pitch,
680    int x1, int y1, int x2, int y2,
681    BoxPtr dstBox,
682    short src_w, short src_h,
683    short drw_w, short drw_h
684){
685    MGAPtr pMga = MGAPTR(pScrn);
686    int tmp, hzoom, intrep;
687    int maxOverlayClock;
688
689    CHECK_DMA_QUIESCENT(pMga, pScrn);
690
691    /* got 48 scanlines to do it in */
692    tmp = INREG(MGAREG_VCOUNT) + 48;
693    /* FIXME always change it in vertical retrace use CrtcV ?*/
694    if(tmp > pScrn->currentMode->CrtcVTotal)
695	tmp -= 49; /* too bad */
696    else
697        tmp = pScrn->currentMode->CrtcVTotal -1;
698
699    tmp = pScrn->currentMode->VDisplay +1;
700    /* enable accelerated 2x horizontal zoom when pixelclock >135MHz */
701
702    if ((pMga->ChipRev >= 0x80) || (pMga->Chipset == PCI_CHIP_MGAG550)) {
703	/* G450, G550 */
704	maxOverlayClock = 234000;
705    } else {
706	maxOverlayClock = 135000;
707    }
708
709    hzoom = (pScrn->currentMode->Clock > maxOverlayClock) ? 1 : 0;
710
711    switch(id) {
712    case FOURCC_UYVY:
713	OUTREG(MGAREG_BESGLOBCTL, 0x000000c0 | (3 * hzoom) | (tmp << 16));
714	break;
715    case FOURCC_YUY2:
716    default:
717	OUTREG(MGAREG_BESGLOBCTL, 0x00000080 | (3 * hzoom) | (tmp << 16));
718	break;
719    }
720
721    OUTREG(MGAREG_BESA1ORG, offset);
722
723    if(y1 & 0x00010000)
724	OUTREG(MGAREG_BESCTL, 0x00040c41);
725    else
726	OUTREG(MGAREG_BESCTL, 0x00040c01);
727
728    OUTREG(MGAREG_BESHCOORD, (dstBox->x1 << 16) | (dstBox->x2 - 1));
729    OUTREG(MGAREG_BESVCOORD, (dstBox->y1 << 16) | (dstBox->y2 - 1));
730
731    OUTREG(MGAREG_BESHSRCST, x1 & 0x03fffffc);
732    OUTREG(MGAREG_BESHSRCEND, (x2 - 0x00010000) & 0x03fffffc);
733    OUTREG(MGAREG_BESHSRCLST, (width - 1) << 16);
734
735    OUTREG(MGAREG_BESPITCH, pitch >> 1);
736
737    OUTREG(MGAREG_BESV1WGHT, y1 & 0x0000fffc);
738    OUTREG(MGAREG_BESV1SRCLST, height - 1 - (y1 >> 16));
739
740    intrep = ((drw_h == src_h) || (drw_h < 2)) ? 0 : 1;
741    tmp = ((src_h - intrep) << 16)/(drw_h - intrep);
742    if(tmp >= (32 << 16))
743	tmp = (32 << 16) - 1;
744    OUTREG(MGAREG_BESVISCAL, tmp & 0x001ffffc);
745
746    intrep = ((drw_w == src_w) || (drw_w < 2)) ? 0 : 1;
747    tmp = (((src_w - intrep) << 16)/(drw_w - intrep)) << hzoom;
748    if(tmp >= (32 << 16))
749	tmp = (32 << 16) - 1;
750    OUTREG(MGAREG_BESHISCAL, tmp & 0x001ffffc);
751
752}
753
754
755/**
756 * \todo
757 * Starting with at least the G200, the chip can handle non-mipmapped
758 * non-power-of-two textures.  However, the code in this routine forces the
759 * texture dimensions to be powers of two.  That should simplify the code and
760 * may improve performance slightly.
761 */
762static void
763MGADisplayVideoTexture(
764    ScrnInfoPtr pScrn,
765    int id, int offset,
766    int nbox, BoxPtr pbox,
767    int width, int height, int pitch,
768    short src_x, short src_y,
769    short src_w, short src_h,
770    short drw_x, short drw_y,
771    short drw_w, short drw_h
772){
773    MGAPtr pMga = MGAPTR(pScrn);
774    int log2w = 0, log2h = 0, i, incx, incy, padw, padh;
775
776    pitch >>= 1;
777
778    i = 12;
779    while(--i) {
780	if(width & (1 << i)) {
781	    log2w = i;
782	    if(width & ((1 << i) - 1))
783		log2w++;
784	    break;
785	}
786    }
787
788    i = 12;
789    while(--i) {
790	if(height & (1 << i)) {
791	    log2h = i;
792	    if(height & ((1 << i) - 1))
793		log2h++;
794	    break;
795	}
796    }
797
798    padw = 1 << log2w;
799    padh = 1 << log2h;
800    incx = (src_w << 20)/(drw_w * padw);
801    incy = (src_h << 20)/(drw_h * padh);
802
803    CHECK_DMA_QUIESCENT(pMga, pScrn);
804
805    WAITFIFO(15);
806    OUTREG(MGAREG_TMR0, incx);  /* sx inc */
807    OUTREG(MGAREG_TMR1, 0);  /* sy inc */
808    OUTREG(MGAREG_TMR2, 0);  /* tx inc */
809    OUTREG(MGAREG_TMR3, incy);  /* ty inc */
810    OUTREG(MGAREG_TMR4, 0x00000000);
811    OUTREG(MGAREG_TMR5, 0x00000000);
812    OUTREG(MGAREG_TMR8, 0x00010000);
813    OUTREG(MGAREG_TEXORG, offset);
814    OUTREG(MGAREG_TEXWIDTH,  log2w | (((8 - log2w) & 63) << 9) |
815				((width - 1) << 18));
816    OUTREG(MGAREG_TEXHEIGHT, log2h | (((8 - log2h) & 63) << 9) |
817				((height - 1) << 18));
818    if(id == FOURCC_UYVY)
819	OUTREG(MGAREG_TEXCTL, 0x1A00010b | ((pitch & 0x07FF) << 9));
820    else
821	OUTREG(MGAREG_TEXCTL, 0x1A00010a | ((pitch & 0x07FF) << 9));
822    OUTREG(MGAREG_TEXCTL2, 0x00000014);
823    OUTREG(MGAREG_DWGCTL, 0x000c7076);
824    OUTREG(MGAREG_TEXFILTER, 0x01e00020);
825    OUTREG(MGAREG_ALPHACTRL, 0x00000001);
826
827    padw = (src_x << 20)/padw;
828    padh = (src_y << 20)/padh;
829
830    while(nbox--) {
831	WAITFIFO(4);
832	OUTREG(MGAREG_TMR6, (incx * (pbox->x1 - drw_x)) + padw);
833	OUTREG(MGAREG_TMR7, (incy * (pbox->y1 - drw_y)) + padh);
834	OUTREG(MGAREG_FXBNDRY, (pbox->x2 << 16) | (pbox->x1 & 0xffff));
835	OUTREG(MGAREG_YDSTLEN + MGAREG_EXEC,
836				(pbox->y1 << 16) | (pbox->y2 - pbox->y1));
837	pbox++;
838    }
839
840    MGA_MARK_SYNC(pMga, pScrn);
841}
842
843static int
844MGAPutImage(
845  ScrnInfoPtr pScrn,
846  short src_x, short src_y,
847  short drw_x, short drw_y,
848  short src_w, short src_h,
849  short drw_w, short drw_h,
850  int id, unsigned char* buf,
851  short width, short height,
852  Bool Sync,
853  RegionPtr clipBoxes, pointer data,
854  DrawablePtr pDraw
855){
856   MGAPtr pMga = MGAPTR(pScrn);
857   MGAPortPrivPtr pPriv = pMga->portPrivate;
858   INT32 x1, x2, y1, y2;
859   unsigned char *dst_start;
860   int new_size, offset, offset2 = 0, offset3 = 0;
861   int srcPitch, srcPitch2 = 0, dstPitch;
862   int top, left, npixels, nlines;
863   BoxRec dstBox;
864   CARD32 tmp;
865
866   /* Clip */
867   x1 = src_x;
868   x2 = src_x + src_w;
869   y1 = src_y;
870   y2 = src_y + src_h;
871
872   dstBox.x1 = drw_x;
873   dstBox.x2 = drw_x + drw_w;
874   dstBox.y1 = drw_y;
875   dstBox.y2 = drw_y + drw_h;
876
877   if(!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2,
878			     clipBoxes, width, height))
879	return Success;
880
881   if(!pMga->TexturedVideo) {
882	dstBox.x1 -= pScrn->frameX0;
883	dstBox.x2 -= pScrn->frameX0;
884	dstBox.y1 -= pScrn->frameY0;
885	dstBox.y2 -= pScrn->frameY0;
886   }
887
888   dstPitch = ((width << 1) + 15) & ~15;
889   new_size = dstPitch * height;
890
891   switch(id) {
892   case FOURCC_YV12:
893   case FOURCC_I420:
894	srcPitch = (width + 3) & ~3;
895	offset2 = srcPitch * height;
896	srcPitch2 = ((width >> 1) + 3) & ~3;
897	offset3 = (srcPitch2 * (height >> 1)) + offset2;
898	break;
899   case FOURCC_UYVY:
900   case FOURCC_YUY2:
901   default:
902	srcPitch = (width << 1);
903	break;
904   }
905
906   pPriv->video_offset = MGAAllocateMemory(pScrn, &pPriv->video_memory,
907					   pPriv->doubleBuffer ?
908                                           (new_size << 1) : new_size);
909   if (!pPriv->video_offset)
910	return BadAlloc;
911
912   pPriv->currentBuffer ^= 1;
913
914    /* copy data */
915   top = y1 >> 16;
916   left = (x1 >> 16) & ~1;
917   npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left;
918   left <<= 1;
919
920   offset = pPriv->video_offset;
921   if(pPriv->doubleBuffer)
922        offset += pPriv->currentBuffer * new_size;
923   dst_start = pMga->FbStart + offset + left + (top * dstPitch);
924
925   if (pMga->TexturedVideo && ((long)data != pPriv->lastPort))
926       MGA_SYNC(pMga, pScrn);
927
928   switch(id) {
929    case FOURCC_YV12:
930    case FOURCC_I420:
931	top &= ~1;
932	tmp = ((top >> 1) * srcPitch2) + (left >> 2);
933	offset2 += tmp;
934	offset3 += tmp;
935	if(id == FOURCC_I420) {
936	   tmp = offset2;
937	   offset2 = offset3;
938	   offset3 = tmp;
939	}
940	nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
941	MGACopyMungedData(buf + (top * srcPitch) + (left >> 1),
942			  buf + offset2, buf + offset3, dst_start,
943			  srcPitch, srcPitch2, dstPitch, nlines, npixels);
944	break;
945    case FOURCC_UYVY:
946    case FOURCC_YUY2:
947    default:
948	buf += (top * srcPitch) + left;
949	nlines = ((y2 + 0xffff) >> 16) - top;
950	MGACopyData(buf, dst_start, srcPitch, dstPitch, nlines, npixels);
951        break;
952    }
953
954    if(pMga->TexturedVideo) {
955	pPriv->lastPort = (long)data;
956	MGADisplayVideoTexture(pScrn, id, offset,
957		REGION_NUM_RECTS(clipBoxes), REGION_RECTS(clipBoxes),
958		width, height, dstPitch, src_x, src_y, src_w, src_h,
959		drw_x, drw_y, drw_w, drw_h);
960	pPriv->videoStatus = FREE_TIMER;
961	pPriv->freeTime = currentTime.milliseconds + FREE_DELAY;
962    } else {
963    /* update cliplist */
964	if(!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes)) {
965	    REGION_COPY(pScrn->pScreen, &pPriv->clip, clipBoxes);
966	    /* draw these */
967	    xf86XVFillKeyHelper(pScrn->pScreen, pPriv->colorKey, clipBoxes);
968	}
969
970	offset += top * dstPitch;
971	MGADisplayVideoOverlay(pScrn, id, offset, width, height, dstPitch,
972	     x1, y1, x2, y2, &dstBox, src_w, src_h, drw_w, drw_h);
973
974	pPriv->videoStatus = CLIENT_VIDEO_ON;
975    }
976    pMga->VideoTimerCallback = MGAVideoTimerCallback;
977
978    return Success;
979}
980
981
982static int
983MGAQueryImageAttributes(
984    ScrnInfoPtr pScrn,
985    int id,
986    unsigned short *w, unsigned short *h,
987    int *pitches, int *offsets
988){
989    MGAPtr pMga = MGAPTR(pScrn);
990    int size, tmp;
991
992    if(pMga->TexturedVideo) {
993	if(*w > 2046) *w = 2046;
994	if(*h > 2046) *h = 2046;
995    } else {
996	if(*w > 1024) *w = 1024;
997	if(*h > 1024) *h = 1024;
998    }
999
1000    *w = (*w + 1) & ~1;
1001    if(offsets) offsets[0] = 0;
1002
1003    switch(id) {
1004    case FOURCC_YV12:
1005    case FOURCC_I420:
1006	*h = (*h + 1) & ~1;
1007	size = (*w + 3) & ~3;
1008	if(pitches) pitches[0] = size;
1009	size *= *h;
1010	if(offsets) offsets[1] = size;
1011	tmp = ((*w >> 1) + 3) & ~3;
1012	if(pitches) pitches[1] = pitches[2] = tmp;
1013	tmp *= (*h >> 1);
1014	size += tmp;
1015	if(offsets) offsets[2] = size;
1016	size += tmp;
1017	break;
1018    case FOURCC_UYVY:
1019    case FOURCC_YUY2:
1020    default:
1021	size = *w << 1;
1022	if(pitches) pitches[0] = size;
1023	size *= *h;
1024	break;
1025    }
1026
1027    return size;
1028}
1029
1030static void
1031MGAVideoTimerCallback(ScrnInfoPtr pScrn, Time time)
1032{
1033    MGAPtr pMga = MGAPTR(pScrn);
1034    MGAPortPrivPtr pPriv = pMga->portPrivate;
1035
1036    if(pPriv->videoStatus & TIMER_MASK) {
1037	if(pPriv->videoStatus & OFF_TIMER) {
1038	    if(pPriv->offTime < time) {
1039		OUTREG(MGAREG_BESCTL, 0);
1040		pPriv->videoStatus = FREE_TIMER;
1041		pPriv->freeTime = time + FREE_DELAY;
1042	    }
1043	} else {  /* FREE_TIMER */
1044	    if(pPriv->freeTime < time) {
1045		if (pPriv->video_memory) {
1046                   MGAFreeMemory(pScrn, pPriv->video_memory);
1047		   pPriv->video_memory = NULL;
1048		}
1049		pPriv->videoStatus = 0;
1050	        pMga->VideoTimerCallback = NULL;
1051	    }
1052        }
1053    } else  /* shouldn't get here */
1054	pMga->VideoTimerCallback = NULL;
1055}
1056
1057
1058/****************** Offscreen stuff ***************/
1059
1060typedef struct {
1061  void *surface_memory;
1062  Bool isOn;
1063} OffscreenPrivRec, * OffscreenPrivPtr;
1064
1065static int
1066MGAAllocateSurface(
1067    ScrnInfoPtr pScrn,
1068    int id,
1069    unsigned short w,
1070    unsigned short h,
1071    XF86SurfacePtr surface
1072){
1073    void *surface_memory = NULL;
1074    int pitch, size, bpp, offset;
1075    OffscreenPrivPtr pPriv;
1076
1077    if((w > 1024) || (h > 1024))
1078	return BadAlloc;
1079
1080    w = (w + 1) & ~1;
1081    pitch = ((w << 1) + 15) & ~15;
1082    bpp = pScrn->bitsPerPixel >> 3;
1083    size = ((pitch * h) + bpp - 1) / bpp;
1084
1085    offset = MGAAllocateMemory(pScrn, &surface_memory, size);
1086    if (!offset)
1087	return BadAlloc;
1088
1089    surface->width = w;
1090    surface->height = h;
1091
1092    if(!(surface->pitches = malloc(sizeof(int)))) {
1093        MGAFreeMemory(pScrn, surface_memory);
1094	return BadAlloc;
1095    }
1096    if(!(surface->offsets = malloc(sizeof(int)))) {
1097	free(surface->pitches);
1098        MGAFreeMemory(pScrn, surface_memory);
1099	return BadAlloc;
1100    }
1101    if(!(pPriv = malloc(sizeof(OffscreenPrivRec)))) {
1102	free(surface->pitches);
1103	free(surface->offsets);
1104        MGAFreeMemory(pScrn, surface_memory);
1105	return BadAlloc;
1106    }
1107
1108    pPriv->surface_memory = surface_memory;
1109    pPriv->isOn = FALSE;
1110
1111    surface->pScrn = pScrn;
1112    surface->id = id;
1113    surface->pitches[0] = pitch;
1114    surface->offsets[0] = offset;
1115    surface->devPrivate.ptr = (pointer)pPriv;
1116
1117    return Success;
1118}
1119
1120static int
1121MGAStopSurface(
1122    XF86SurfacePtr surface
1123){
1124    OffscreenPrivPtr pPriv = (OffscreenPrivPtr)surface->devPrivate.ptr;
1125
1126    if(pPriv->isOn) {
1127	ScrnInfoPtr pScrn = surface->pScrn;
1128	MGAPtr pMga = MGAPTR(pScrn);
1129	OUTREG(MGAREG_BESCTL, 0);
1130	pPriv->isOn = FALSE;
1131    }
1132
1133    return Success;
1134}
1135
1136
1137static int
1138MGAFreeSurface(
1139    XF86SurfacePtr surface
1140){
1141    ScrnInfoPtr pScrn = surface->pScrn;
1142    OffscreenPrivPtr pPriv = (OffscreenPrivPtr)surface->devPrivate.ptr;
1143
1144    if(pPriv->isOn)
1145	MGAStopSurface(surface);
1146    MGAFreeMemory(pScrn, pPriv->surface_memory);
1147    free(surface->pitches);
1148    free(surface->offsets);
1149    free(surface->devPrivate.ptr);
1150
1151    return Success;
1152}
1153
1154static int
1155MGAGetSurfaceAttribute(
1156    ScrnInfoPtr pScrn,
1157    Atom attribute,
1158    INT32 *value
1159){
1160    return MGAGetPortAttributeOverlay(pScrn, attribute, value, 0);
1161}
1162
1163static int
1164MGASetSurfaceAttribute(
1165    ScrnInfoPtr pScrn,
1166    Atom attribute,
1167    INT32 value
1168){
1169    return MGASetPortAttributeOverlay(pScrn, attribute, value, 0);
1170}
1171
1172
1173static int
1174MGADisplaySurface(
1175    XF86SurfacePtr surface,
1176    short src_x, short src_y,
1177    short drw_x, short drw_y,
1178    short src_w, short src_h,
1179    short drw_w, short drw_h,
1180    RegionPtr clipBoxes
1181){
1182    OffscreenPrivPtr pPriv = (OffscreenPrivPtr)surface->devPrivate.ptr;
1183    ScrnInfoPtr pScrn = surface->pScrn;
1184    MGAPtr pMga = MGAPTR(pScrn);
1185    MGAPortPrivPtr portPriv = pMga->portPrivate;
1186    INT32 x1, y1, x2, y2;
1187    BoxRec dstBox;
1188
1189    x1 = src_x;
1190    x2 = src_x + src_w;
1191    y1 = src_y;
1192    y2 = src_y + src_h;
1193
1194    dstBox.x1 = drw_x;
1195    dstBox.x2 = drw_x + drw_w;
1196    dstBox.y1 = drw_y;
1197    dstBox.y2 = drw_y + drw_h;
1198
1199    if(!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2, clipBoxes,
1200			      surface->width, surface->height))
1201    {
1202	return Success;
1203    }
1204
1205    dstBox.x1 -= pScrn->frameX0;
1206    dstBox.x2 -= pScrn->frameX0;
1207    dstBox.y1 -= pScrn->frameY0;
1208    dstBox.y2 -= pScrn->frameY0;
1209
1210    MGAResetVideoOverlay(pScrn);
1211
1212    MGADisplayVideoOverlay(pScrn, surface->id, surface->offsets[0],
1213	     surface->width, surface->height, surface->pitches[0],
1214	     x1, y1, x2, y2, &dstBox, src_w, src_h, drw_w, drw_h);
1215
1216    xf86XVFillKeyHelper(pScrn->pScreen, portPriv->colorKey, clipBoxes);
1217
1218    pPriv->isOn = TRUE;
1219    /* we've prempted the XvImage stream so set its free timer */
1220    if(portPriv->videoStatus & CLIENT_VIDEO_ON) {
1221	REGION_EMPTY(pScrn->pScreen, &portPriv->clip);
1222	UpdateCurrentTime();
1223	portPriv->videoStatus = FREE_TIMER;
1224	portPriv->freeTime = currentTime.milliseconds + FREE_DELAY;
1225	pMga->VideoTimerCallback = MGAVideoTimerCallback;
1226    }
1227
1228    return Success;
1229}
1230
1231
1232static void
1233MGAInitOffscreenImages(ScreenPtr pScreen)
1234{
1235    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1236    MGAPtr pMga = MGAPTR(pScrn);
1237    int num = (pMga->Chipset == PCI_CHIP_MGAG400 || pMga->Chipset == PCI_CHIP_MGAG550) ? 2 : 1;
1238    XF86OffscreenImagePtr offscreenImages;
1239
1240    /* need to free this someplace */
1241    if(!(offscreenImages = malloc(num * sizeof(XF86OffscreenImageRec))))
1242	return;
1243
1244    offscreenImages[0].image = &Images[0];
1245    offscreenImages[0].flags = VIDEO_OVERLAID_IMAGES |
1246			       VIDEO_CLIP_TO_VIEWPORT;
1247    offscreenImages[0].alloc_surface = MGAAllocateSurface;
1248    offscreenImages[0].free_surface = MGAFreeSurface;
1249    offscreenImages[0].display = MGADisplaySurface;
1250    offscreenImages[0].stop = MGAStopSurface;
1251    offscreenImages[0].setAttribute = MGASetSurfaceAttribute;
1252    offscreenImages[0].getAttribute = MGAGetSurfaceAttribute;
1253    offscreenImages[0].max_width = 1024;
1254    offscreenImages[0].max_height = 1024;
1255    offscreenImages[0].num_attributes = (num == 1) ? 1 : 4;
1256    offscreenImages[0].attributes = Attributes;
1257
1258    if(num == 2) {
1259	offscreenImages[1].image = &Images[3];
1260	offscreenImages[1].flags = VIDEO_OVERLAID_IMAGES |
1261				   VIDEO_CLIP_TO_VIEWPORT;
1262	offscreenImages[1].alloc_surface = MGAAllocateSurface;
1263	offscreenImages[1].free_surface = MGAFreeSurface;
1264	offscreenImages[1].display = MGADisplaySurface;
1265	offscreenImages[1].stop = MGAStopSurface;
1266	offscreenImages[1].setAttribute = MGASetSurfaceAttribute;
1267	offscreenImages[1].getAttribute = MGAGetSurfaceAttribute;
1268	offscreenImages[1].max_width = 1024;
1269	offscreenImages[1].max_height = 1024;
1270	offscreenImages[1].num_attributes = 4;
1271	offscreenImages[1].attributes = Attributes;
1272    }
1273
1274    xf86XVRegisterOffscreenImages(pScreen, offscreenImages, num);
1275}
1276
1277
1278/* Matrox MGA 2164W Xv extension support.
1279*  The extension is implemented as a HOST->FB image load in YUV format.
1280*  I decided not to use real hardware overlay since on the Millennium II
1281*  it would limit the size of the frame buffer to 4Mb (even on a 16Mb
1282*  card) due to an hardware limitation.
1283*  Author: Gabriele Gorla (gorlik@yahoo.com)
1284*  Based on the MGA-Gxxx Xv extension by: Mark Vojkovich
1285   */
1286
1287/* This code is still in alpha stage. Only YUV->RGB conversion
1288   and horizontal scaling are hardware accelerated.
1289   All 4 FOURCC formats supported by X should be supported.
1290   It has been tested only on my DEC XP1000 at 1024x768x32 under
1291   linux 2.6.18 with X.org 7.1.1 (debian alpha)
1292
1293   Bug reports and success/failure stories are greatly appreciated.
1294*/
1295
1296/* #define DEBUG_MGA2164 */
1297/* #define CUSTOM_MEMCOPY */
1298#define MGA2164_SWFILTER
1299
1300
1301static XF86VideoAdaptorPtr
1302MGASetupImageVideoILOAD(ScreenPtr pScreen)
1303{
1304    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1305    XF86VideoAdaptorPtr adapt;
1306    MGAPtr pMga = MGAPTR(pScrn);
1307
1308    adapt = MGAAllocAdaptor(pScrn, FALSE);
1309    if (adapt == NULL)
1310	return NULL;
1311
1312    adapt->type = XvWindowMask | XvInputMask | XvImageMask;
1313    adapt->flags = 0;
1314    adapt->name = "Matrox Millennium II ILOAD Video Engine";
1315    adapt->nEncodings = 1;
1316    adapt->pEncodings = &DummyEncoding[1];
1317    adapt->nFormats = NUM_FORMATS;
1318    adapt->pFormats = Formats;
1319    adapt->nPorts = MGA_MAX_PORTS;
1320    adapt->pAttributes = NULL;
1321    adapt->nAttributes = 0;
1322
1323    /* number of supported color formats */
1324    adapt->pImages = Images;
1325    adapt->nImages = 4;
1326
1327    adapt->PutVideo = NULL;
1328    adapt->PutStill = NULL;
1329    adapt->GetVideo = NULL;
1330    adapt->GetStill = NULL;
1331    adapt->StopVideo = MGAStopVideo;
1332
1333    adapt->SetPortAttribute = MGASetPortAttributeTexture;
1334    adapt->GetPortAttribute = MGAGetPortAttributeTexture;
1335    adapt->QueryBestSize = MGAQueryBestSize;
1336    adapt->PutImage = MGAPutImageILOAD;
1337    adapt->QueryImageAttributes = MGAQueryImageAttributes;
1338
1339    REGION_INIT(pScreen, &(pMga->portPrivate->clip), NullBox, 0);
1340
1341    return adapt;
1342}
1343
1344/* this function is optimized for alpha. It might be better also for
1345other load/store risc architectures but I never tested on anything else
1346than my ev56 */
1347static void CopyMungedScanline_AXP(CARD32 *fb_ptr, short src_w,
1348				   CARD32 *tsp, CARD32 *tpu, CARD32 *tpv)
1349{
1350    CARD32 k,y0,y1,u,v;
1351
1352    for(k=src_w/8;k;k--) {
1353	y0=*tsp;
1354	y1=*(tsp+1);
1355	u=*tpu;
1356	v=*tpv;
1357
1358	*(fb_ptr)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) |
1359	    (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
1360	*(fb_ptr+1)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) |
1361	    (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
1362
1363	*(fb_ptr+2)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) |
1364	    (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
1365	*(fb_ptr+3)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) |
1366	    (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
1367
1368	/*correct below*/
1369	/*    *(fb_ptr)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) |
1370	      (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
1371	      *(fb_ptr+1)=((y0&0x00ff0000)>>16)|((y0&0xff000000)>>8) |
1372	      (v&0x0000ff00)<<16 | (u&0x0000ff00);
1373	      *(fb_ptr+2)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) |
1374	      (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
1375	      *(fb_ptr+3)=((y1&0x00ff0000)>>16)|((y1&0xff000000)>>8) |
1376	      (v&0xff000000) | (u&0xff000000)>>16; */
1377
1378	tsp+=2; tpu++; tpv++;
1379	fb_ptr+=4;
1380    }
1381}
1382
1383#if 0
1384static void CopyMungedScanline_AXP2(CARD32 *fb_ptr, short src_w,
1385				    CARD32 *tsp, CARD32 *tpu, CARD32 *tpv)
1386{
1387    CARD8 *y, *u, *v;
1388    int k;
1389    y=(CARD8 *)tsp;
1390    u=(CARD8 *)tpu;
1391    v=(CARD8 *)tpv;
1392
1393    for(k=src_w/8;k;k--) {
1394	fb_ptr[0]=y[0] | y[1]<<16 | v[0]<<24 | u[0]<<8;
1395	fb_ptr[1]=y[2] | y[3]<<16 | v[1]<<24 | u[1]<<8;
1396	fb_ptr[2]=y[4] | y[5]<<16 | v[2]<<24 | u[2]<<8;
1397	fb_ptr[3]=y[6] | y[7]<<16 | v[3]<<24 | u[3]<<8;
1398
1399	y+=8; u+=4; v+=4;
1400	fb_ptr+=4;
1401    }
1402}
1403#endif
1404
1405
1406static void CopyMungedScanlineFilter_AXP(CARD32 *fb_ptr, short src_w,
1407					 CARD32 *tsp1, CARD32 *tpu1, CARD32 *tpv1,
1408					 CARD32 *tsp2, CARD32 *tpu2, CARD32 *tpv2,
1409					 int beta, int xds )
1410{
1411    unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
1412    int yf[8], uf[4], vf[4];
1413    int oneminbeta = 0xff - beta;
1414
1415    for(k=xds*src_w/8;k;k--) {
1416	y0_1=*tsp1;
1417	y1_1=*(tsp1+1);
1418	y0_2=*tsp2;
1419	y1_2=*(tsp2+1);
1420	u=*tpu1;
1421	v=*tpv1;
1422
1423	tsp1+=2; tsp2+=2; tpu1++; tpv1++;
1424	yf[0] = ((y0_1&0x000000ff)*oneminbeta + (y0_2&0x000000ff)*beta )>>8;
1425	yf[1] = (((y0_1&0x0000ff00)>>8)*oneminbeta + ((y0_2&0x0000ff00)>>8)*beta )>>8;
1426	yf[2] = (((y0_1&0x00ff0000)>>16)*oneminbeta + ((y0_2&0x00ff0000)>>16)*beta )>>8;
1427	yf[3] = (((y0_1&0xff000000)>>24)*oneminbeta + ((y0_2&0xff000000)>>24)*beta )>>8;
1428	yf[4] = ((y1_1&0x000000ff)*oneminbeta + (y1_2&0x000000ff)*beta )>>8;
1429	yf[5] = (((y1_1&0x0000ff00)>>8)*oneminbeta + ((y1_2&0x0000ff00)>>8)*beta )>>8;
1430	yf[6] = (((y1_1&0x00ff0000)>>16)*oneminbeta + ((y1_2&0x00ff0000)>>16)*beta )>>8;
1431	yf[7] = (((y1_1&0xff000000)>>24)*oneminbeta + ((y1_2&0xff000000)>>24)*beta )>>8;
1432
1433	/* FIXME: there is still no filtering on u and v */
1434	uf[0]=(u&0x000000ff);
1435	uf[1]=(u&0x0000ff00)>>8;
1436	uf[2]=(u&0x00ff0000)>>16;
1437	uf[3]=(u&0xff000000)>>24;
1438
1439	vf[0]=(v&0x000000ff);
1440	vf[1]=(v&0x0000ff00)>>8;
1441	vf[2]=(v&0x00ff0000)>>16;
1442	vf[3]=(v&0xff000000)>>24;
1443
1444	switch(xds) {
1445	case 1:
1446	    *(fb_ptr)=(yf[0]) | (yf[1]<<16) |
1447		vf[0]<<24 | uf[0]<<8;
1448	    *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
1449		vf[1]<<24 | uf[1]<<8;
1450	    *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
1451		vf[2]<<24 | uf[2]<<8;
1452	    *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
1453		vf[3]<<24 | uf[3]<<8;
1454	    fb_ptr+=4;
1455	    break;
1456
1457	case 2:
1458	    *(fb_ptr)=(yf[0]+yf[1])/2 | (((yf[2]+yf[3])/2)<<16) |
1459		((vf[0]+vf[1])/2 )<<24 | ((uf[0]+uf[1])/2)<<8;
1460	    *(fb_ptr+1)=(yf[4]+yf[5])/2 | ( ((yf[6]+yf[7])/2) <<16) |
1461		((vf[2]+vf[3])/2 )<<24 | ((uf[2]+uf[3])/2)<<8;
1462	    fb_ptr+=2;
1463	    break;
1464
1465	case 4:
1466	    *(fb_ptr)=(yf[0]+yf[1]+yf[2]+yf[3])/4 | (((yf[4]+yf[5]+yf[6]+yf[7])/4)<<16) |
1467		((vf[0]+vf[1]+vf[2]+vf[3])/4 )<<24 | ((uf[0]+uf[1]+uf[2]+uf[3])/4)<<8;
1468	    fb_ptr+=1;
1469	    break;
1470
1471	default:
1472	    break;
1473	}
1474    }
1475}
1476
1477#if 0
1478static void CopyMungedScanlineFilterDown_AXP(CARD32 *fb_ptr, short src_w,
1479					     CARD32 *tsp1, CARD32 *tpu1, CARD32 *tpv1,
1480					     CARD32 *tsp2, CARD32 *tpu2, CARD32 *tpv2,
1481					     int beta , int xds)
1482{
1483    unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
1484    int yf[8], uf[4], vf[4];
1485
1486    for(k=src_w/8;k;k--) {
1487	y0_1=*tsp1;
1488	y1_1=*(tsp1+1);
1489	y0_2=*tsp2;
1490	y1_2=*(tsp2+1);
1491	u=*tpu1;
1492	v=*tpv1;
1493
1494	tsp1+=2; tsp2+=2; tpu1++; tpv1++;
1495	yf[0] = ((y0_1&0x000000ff) + (y0_2&0x000000ff))>>8;
1496	yf[1] = (((y0_1&0x0000ff00)>>8) + ((y0_2&0x0000ff00)>>8))>>8;
1497	yf[2] = (((y0_1&0x00ff0000)>>16) + ((y0_2&0x00ff0000)>>16))>>8;
1498	yf[3] = (((y0_1&0x000000ff)>>24) + ((y0_2&0x000000ff)>>24))>>8;
1499	yf[4] = ((y1_1&0x000000ff) + (y1_2&0x000000ff))>>8;
1500	yf[5] = (((y1_1&0x0000ff00)>>8) + ((y1_2&0x0000ff00)>>8))>>8;
1501	yf[6] = (((y1_1&0x00ff0000)>>16) + ((y1_2&0x00ff0000)>>16))>>8;
1502	yf[7] = (((y1_1&0x000000ff)>>24) + ((y1_2&0x000000ff)>>24))>>8;
1503
1504	*(fb_ptr)=(yf[0]) | (yf[1]<<16) |
1505	    (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
1506	*(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
1507	    (v&0x0000ff00)<<16 | (u&0x0000ff00);
1508	*(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
1509	    (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
1510	*(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
1511	    (v&0xff000000) | (u&0xff000000)>>16;
1512
1513	fb_ptr+=4;
1514    }
1515}
1516#endif
1517
1518static void MGACopyScaledILOAD(
1519			       ScrnInfoPtr pScrn,
1520			       int id, unsigned char *buf,
1521			       BoxPtr pbox,
1522			       int width, int height, int pitch,
1523			       short src_x, short src_y,
1524			       short src_w, short src_h,
1525			       short drw_x, short drw_y,
1526			       short drw_w, short drw_h
1527			       )
1528{
1529    MGAPtr pMga = MGAPTR(pScrn);
1530    CARD32 *fb_ptr;
1531    unsigned char *ubuf, *vbuf, *tbuf;
1532    CARD32 *pu, *pv;
1533    int k,l;
1534#ifdef MGA2164_BLIT_DUP
1535    int pl;
1536#endif /* MGA2164_BLIT_DUP */
1537    int dl;
1538    int xds, yds;
1539    short box_h;
1540    short scr_pitch = ( pScrn->virtualX + 15) & ~15;
1541
1542#ifdef DEBUG_MGA2164
1543    char sbuf[255];
1544
1545    sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
1546	    pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
1547	    pbox->x2,pbox->y2);
1548    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1549
1550    sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
1551	    src_x,src_y,src_w,src_h);
1552    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1553    sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
1554	    drw_x,drw_y,drw_w,drw_h);
1555    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1556#endif
1557
1558    /* scaling yuv->rgb */
1559
1560    /* hack to force width and src image to be 8 pixel aligned */
1561    src_x&=~0x7;
1562    src_w&=~0x7;
1563
1564    box_h=pbox->y2-pbox->y1;
1565
1566    /* compute X down scaling factor */
1567    if(src_w>drw_w) {
1568	if(src_w/2<drw_w) {
1569	    xds=2;
1570	} else if(src_w/4<drw_w) {
1571	    xds=4;
1572	} else { xds=8; }
1573    } else xds = 1;
1574
1575    /* prevent crashing when dragging window outside left boundary of screen */
1576    /* FIXME: need to implement per pixel left start to avoid undesired
1577       effects when dragging window outside left screen boundary */
1578
1579    if(drw_x<0) {
1580	src_x=( -(drw_x*src_w)/drw_w + 0x7)&~0x7;
1581	src_w-=src_x;
1582	drw_w+=drw_x;
1583	drw_x=0;
1584    }
1585
1586    src_w/=xds;
1587
1588    /* compute X down scaling factor */
1589    if(src_h>drw_h) {
1590	if(src_h/2<drw_h) {
1591	    yds=2;
1592	} else if(src_h/4<drw_h) {
1593	    yds=4;
1594	} else { yds=8; }
1595    } else yds = 1;
1596
1597
1598#ifdef DEBUG_MGA2164
1599    char sbuf[255];
1600
1601    sprintf(sbuf,"---- xds = %d\n",
1602	    xds);
1603    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1604#endif
1605
1606
1607#ifdef DEBUG_MGA2164
1608    sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
1609	    src_x,src_y,src_w,src_h);
1610    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1611    sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
1612	    drw_x,drw_y,drw_w,drw_h);
1613    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1614#endif
1615
1616    CHECK_DMA_QUIESCENT(pMga, pScrn);
1617
1618    /* scaling ILOAD */
1619
1620    vbuf=buf+width*height;
1621    ubuf=vbuf+width*height/4;
1622    pu = (CARD32 *)(ubuf+(src_y/2)*(width/2));
1623    pv = (CARD32 *)(vbuf+(src_y/2)*(width/2));
1624#ifdef MGA2164_BLIT_DUP
1625    pl = -1;
1626#endif /* MGA2164_BLIT_DUP */
1627    for(dl=0;dl<box_h;dl++) {
1628	int beta;
1629	l=(dl+(pbox->y1-drw_y))*src_h/drw_h;
1630	/* FIXME: check the math */
1631	beta = ((dl+(pbox->y1-drw_y))*src_h*0xff/drw_h) - ((dl+(pbox->y1-drw_y))*src_h/drw_h*0xff);
1632
1633#ifdef MGA2164_BLIT_DUP
1634	if(l!=pl)
1635#else
1636	    if(1)
1637#endif
1638		{
1639
1640		    /*
1641		      #ifdef DEBUG_MGA2164
1642		      sprintf(sbuf,"new line: scr_dst %d   img_src %d   prev %d\n",
1643		      dl,l,pl);
1644		      xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1645		      #endif
1646		    */
1647
1648		    OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD_HIQH | MGADWG_BUYUV | MGADWG_SHIFTZERO
1649			   | MGADWG_SGNZERO | 0xc0000);
1650
1651		    OUTREG(MGAREG_AR0, pbox->x1 + drw_w -1);    /* SRC LINE END   why -1 ? */
1652		    OUTREG(MGAREG_AR2, ( ( (src_w-1)<<16) / (drw_w-1)) + 1 ); /* ((SRC_X_DIM -1)<<16) / (DST_X_DIM-1) +1 */
1653		    OUTREG(MGAREG_AR3, pbox->x1 );                            /* SRC LINE START*/
1654		    OUTREG(MGAREG_AR5, scr_pitch);                            /* DST_Y_INCR = PITCH? */
1655		    OUTREG(MGAREG_AR6, ((src_w-drw_w)<<16) / (drw_w-1) );     /* */
1656		    OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16) );     /* why -1 ? */
1657		    OUTREG(MGAREG_CXBNDRY, pbox->x1 | ((pbox->x2-1)<<16 ) );
1658		    OUTREG(MGAREG_YDST , pbox->y1+dl );                       /* Y_START_POS */
1659		    OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                     /* # of LINES */
1660
1661		    /* xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Data finished\n"); */
1662
1663		    fb_ptr=(CARD32 *)pMga->ILOADBase;
1664
1665		    switch(id) {
1666		    case FOURCC_YV12:
1667		    case FOURCC_I420:
1668			tbuf=buf+(l+src_y)*width;
1669			{
1670			    CARD32 *tpu=pu+src_x/8+l/2*width/8;
1671			    CARD32 *tpv=pv+src_x/8+l/2*width/8;
1672			    CARD32 *tsp=(CARD32 *)(tbuf+src_x), *tsp2;
1673
1674			    if((l+src_y)<(src_h-1))
1675				tsp2=(CARD32 *)(tbuf+src_x+width);
1676			    else
1677				tsp2=(CARD32 *)(tbuf+src_x);
1678
1679			    /* it is not clear if waiting is actually good for performance */
1680			    /*	 WAITFIFO(pMga->FifoSize);*/
1681			    /* should try to get MGACopyMunged data to work here */
1682			    /*		CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv); */
1683
1684			    /* Filter does not work yet */
1685			    CopyMungedScanlineFilter_AXP(fb_ptr,src_w,tsp,tpu,tpv,tsp2,tpu,tpv, beta, xds);
1686			    /*	if(l&1) {
1687				pu+=width/8;
1688				pv+=width/8;
1689				} */
1690			}
1691			break;
1692		    case FOURCC_UYVY:
1693		    case FOURCC_YUY2:
1694			tbuf=buf+(l+src_y)*width*2;
1695
1696#ifndef MGA2164_SWFILTER
1697			WAITFIFO(pMga->FifoSize/2);
1698			memcpy(fb_ptr, tbuf+src_x*2, src_w*2);
1699			fb_ptr+=src_w*2;   /* pointer in the pseudo dma window */
1700#else
1701			{
1702			    CARD32 *tsp=(CARD32 *)(tbuf+src_x*2), *tsp2;
1703
1704			    if((l+src_y)<(src_h-1))
1705				tsp2=(CARD32 *)(tbuf+src_x*2+width*2);
1706			    else
1707				tsp2=(CARD32 *)(tbuf+src_x*2);
1708			    /*	  {
1709				  char sbuf [256];
1710				  sprintf(sbuf,"dst line: %d   src_line: %d    beta: %x\n",
1711				  dl, l, beta );
1712				  xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1713				  }  */
1714
1715			    WAITFIFO(pMga->FifoSize/4);
1716			    for(k=xds*src_w/8;k;k--) {
1717				int oneminbeta = 0xff-beta;
1718				int y[8], u[4], v[4], ya[4], ua[2], va[2], p;
1719
1720				switch(yds) {
1721				case 1:
1722				    /* upscale y filter */
1723				    for(p=0;p<4;p++) {
1724					y[2*p]=(((*(tsp+p)&0x000000ff))*oneminbeta+((*(tsp2+p)&0x000000ff))*beta)>>8;
1725					y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16)*oneminbeta+((*(tsp2+p)&0x00ff0000)>>16)*beta)>>8;
1726					u[p]=(((*(tsp+p)&0x0000ff00)>>8)*oneminbeta+((*(tsp2+p)&0x0000ff00)>>8)*beta)>>8;
1727					v[p]=(((*(tsp+p)&0xff000000)>>24)*oneminbeta+((*(tsp2+p)&0xff000000)>>24)*beta)>>8;
1728				    }
1729				    break;
1730				    /* downscale y filter */
1731				case 2:
1732				case 3:
1733				case 4:
1734				default:
1735				    for(p=0;p<4;p++) {
1736					y[2*p]=(((*(tsp+p)&0x000000ff)));
1737					y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16));
1738					u[p]=(((*(tsp+p)&0x0000ff00)>>8));
1739					v[p]=(((*(tsp+p)&0xff000000)>>24));
1740				    }
1741				    break;
1742				}
1743
1744				switch (xds) {
1745				case 1: /* simple copy */
1746				    *(fb_ptr++)=y[0]|y[1]<<16|u[0]<<8|v[0]<<24;
1747				    *(fb_ptr++)=y[2]|y[3]<<16|u[1]<<8|v[1]<<24;
1748				    *(fb_ptr++)=y[4]|y[5]<<16|u[2]<<8|v[2]<<24;
1749				    *(fb_ptr++)=y[6]|y[7]<<16|u[3]<<8|v[3]<<24;
1750				    break;
1751				case 2: /* dowscale by 2 */
1752				    ya[0]=(y[0]+y[1])>>1;
1753				    ya[1]=(y[2]+y[3])>>1;
1754				    ya[2]=(y[4]+y[5])>>1;
1755				    ya[3]=(y[6]+y[7])>>1;
1756				    ua[0]=(u[0]+u[1])>>1;
1757				    ua[1]=(u[2]+u[3])>>1;
1758				    va[0]=(v[0]+v[1])>>1;
1759				    va[1]=(v[2]+v[3])>>1;
1760				    *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
1761				    *(fb_ptr++)=ya[2]|ya[3]<<16|ua[1]<<8|va[1]<<24;
1762				    break;
1763				case 4: /* downscale by 4 */
1764				    ya[0]=(y[0]+y[1]+y[2]+y[3])>>2;
1765				    ya[1]=(y[4]+y[5]+y[6]+y[7])>>2;
1766				    ua[0]=(u[0]+u[1]+u[2]+u[3])>>2;
1767				    va[0]=(v[0]+v[1]+v[2]+v[3])>>2;
1768				    *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
1769				    break;
1770				case 8:
1771				default:
1772				    break;
1773				}
1774
1775				/* fb_ptr+=4; */
1776				tsp+=4; tsp2+=4;
1777			    }
1778			}
1779#endif /* MGA2164_SWFILTER */
1780			break;
1781		    default:
1782			break;
1783		    }
1784#ifdef MGA2164_BLIT_DUP
1785		    pl=l;
1786#endif /* MGA2164_BLIT_DUP */
1787		} else {
1788		    /* dup lines */
1789
1790#ifdef DEBUG_MGA2164
1791		    sprintf(sbuf,"dup line: scr_src %d   scr_dst %d\n",
1792			    dl-1,dl);
1793		    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1794#endif
1795
1796		    OUTREG(MGAREG_DWGCTL, 0x040C6008);
1797		    OUTREG(MGAREG_FXBNDRY, pbox->x1|((pbox->x2-1)<<16) );      /* why -1 ? */
1798		    OUTREG(MGAREG_AR3, (pbox->y1+dl-1)*scr_pitch+pbox->x1 );   /* SRC LINE START*/
1799		    OUTREG(MGAREG_AR0, (pbox->y1+dl-1)*scr_pitch+pbox->x2 -1); /* SRC LINE END   why -1 ? */
1800		    OUTREG(MGAREG_AR5, scr_pitch);                             /* DST_Y_INCR = PITCH? */
1801		    OUTREG(MGAREG_YDST , pbox->y1+dl);                         /* Y_START_POS */
1802		    OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                      /* # of LINES */
1803		}
1804    }
1805    OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);
1806}
1807
1808static void MGACopyILOAD(
1809			 ScrnInfoPtr pScrn,
1810			 int id, unsigned char *buf,
1811			 BoxPtr pbox,
1812			 int width, int height, int pitch,
1813			 short src_x, short src_y,
1814			 short src_w, short src_h,
1815			 short drw_x, short drw_y,
1816			 short drw_w, short drw_h
1817			 )
1818{
1819    MGAPtr pMga = MGAPTR(pScrn);
1820    CARD32 *fb_ptr;
1821    CARD8  *ubuf, *vbuf;
1822    CARD32 *pu, *pv;
1823#ifdef CUSTOM_MEMCOPY
1824    int k;
1825#endif /* CUSTOM_MEMCOPY */
1826    int l;
1827    short clip_x1, clip_x2, tmp_w;
1828
1829#ifdef DEBUG_MGA2164
1830    char sbuf[255];
1831
1832    sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
1833	    pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
1834	    pbox->x2,pbox->y2);
1835    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1836
1837    sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
1838	    src_x,src_y,src_w,src_h);
1839    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1840    sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
1841	    drw_x,drw_y,drw_w,drw_h);
1842    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1843#endif
1844
1845    /* non-scaling yuv->rgb */
1846
1847    /* hack to force width and src image to be 8 pixel aligned */
1848    src_x&=~0x7;
1849    src_w&=~0x7;
1850    drw_w&=~0x7;
1851    tmp_w=drw_w;
1852    clip_x1=drw_x;
1853    clip_x2=drw_x+drw_w;
1854
1855    /* hack for clipping in non scaling version */
1856    /* this works only if no scaling */
1857    if(pbox->x1 > drw_x) {              /* left side X clipping*/
1858	src_x+=((pbox->x1-drw_x)&~0x7);
1859	src_w-=((pbox->x1-drw_x)&~0x7);
1860	clip_x1=pbox->x1;
1861	drw_x+=src_x;
1862	drw_w=src_w;
1863    }
1864
1865    if( (pbox->x2) < (drw_x+drw_w) ) {     /* right side X clipping */
1866	tmp_w=( (pbox->x2) - drw_x );
1867	drw_w= tmp_w & (~0x7);
1868	if(drw_w!=tmp_w) drw_w+=8;
1869	clip_x2=drw_x+tmp_w-1; /* not sure why needs -1 */
1870	src_w=drw_w;
1871    }
1872
1873    if(pbox->y1 > drw_y) {             /* top side Y clipping */
1874	src_y+=(pbox->y1-drw_y);
1875	src_h-=(pbox->y1-drw_y);
1876	drw_y+=src_y;
1877	drw_h=src_h;
1878    }
1879    if((pbox->y2)<(drw_y+drw_h)) {     /* bottom side Y clipping */
1880	drw_h=(pbox->y2)-drw_y;
1881	src_h=drw_h;
1882    }
1883
1884    if(drw_x<0) drw_x=0;
1885
1886#ifdef DEBUG_MGA2164
1887    sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
1888	    src_x,src_y,src_w,src_h);
1889    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1890    sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
1891	    drw_x,drw_y,drw_w,drw_h);
1892    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
1893#endif
1894
1895    /* ready to draw */
1896    if(drw_w==0||drw_h==0) return;
1897
1898    if(drw_w<0||drw_h<0) {
1899	/* actually until scaling is working this might happen
1900	   during normal operation */
1901	/*  sprintf(sbuf,"drw_w or drw_h are negative (this should never
1902	    happen)\n");
1903	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); */
1904	return;
1905    }
1906
1907    CHECK_DMA_QUIESCENT(pMga, pScrn);
1908
1909    /* non scaling ILOAD */
1910    WAITFIFO(6);
1911    OUTREG(MGAREG_AR5, 0);
1912    OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD | MGADWG_BUYUV | MGADWG_SHIFTZERO
1913	   | MGADWG_SGNZERO | 0xc0000);
1914    OUTREG(MGAREG_AR0, (drw_w)-1 );
1915    OUTREG(MGAREG_AR3, 0);
1916    OUTREG(MGAREG_CXBNDRY, clip_x1|(clip_x2<<16));
1917    OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16));
1918    OUTREG(MGAREG_YDSTLEN + MGAREG_EXEC , (drw_y<<16)|drw_h);
1919
1920    fb_ptr=(CARD32 *)pMga->ILOADBase;
1921    vbuf=buf+width*height;
1922    ubuf=vbuf+width*height/4;
1923
1924    switch(id) {
1925    case FOURCC_YV12:
1926    case FOURCC_I420:
1927	pu = (CARD32 *)(ubuf+(src_y/2)*(width/2));
1928	pv = (CARD32 *)(vbuf+(src_y/2)*(width/2));
1929	buf+=src_y*width;
1930
1931	for(l=0;l<drw_h;l++) {
1932	    CARD32 *tpu=pu+src_x/8;
1933	    CARD32 *tpv=pv+src_x/8;
1934	    CARD32 *tsp=(CARD32 *)(buf+src_x);
1935
1936	    /* it is not clear if waiting is actually good for performance */
1937	    /*	WAITFIFO(pMga->FifoSize);*/
1938	    /* should try to get MGACopyMunged data to work here */
1939	    CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv);
1940	    buf+=width;
1941	    if(l&1) {
1942		pu+=width/8;
1943		pv+=width/8;
1944	    }
1945	}
1946	break;
1947    case FOURCC_UYVY:
1948    case FOURCC_YUY2:
1949	buf+=src_y*width*2;
1950	for(l=0;l<drw_h;l++) {
1951
1952#ifndef CUSTOM_MEMCOPY
1953	    WAITFIFO(pMga->FifoSize/2); /* not sure what's the value for best performance */
1954	    memcpy(fb_ptr, buf+src_x*2, src_w*2);
1955	    fb_ptr+=src_w*2;
1956#else
1957	    CARD32 *tsp=(CARD32 *)(buf+src_x*2);
1958	    WAITFIFO(pMga->FifoSize/4);
1959	    for(k=src_w/8;k;k--) {
1960		*(fb_ptr)=*(tsp);
1961		*(fb_ptr+1)=*(tsp+1);
1962		*(fb_ptr+2)=*(tsp+2);
1963		*(fb_ptr+3)=*(tsp+3);
1964		fb_ptr+=4; tsp+=4;
1965	    }
1966#endif /* CUSTOM_MEMCOPY */
1967	    buf+=width*2;
1968	}
1969	break;
1970    default:
1971	break;
1972    }
1973    OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);    /* put clipping back to normal */
1974}
1975
1976static int
1977MGAPutImageILOAD(
1978		 ScrnInfoPtr pScrn,
1979		 short src_x, short src_y,
1980		 short drw_x, short drw_y,
1981		 short src_w, short src_h,
1982		 short drw_w, short drw_h,
1983		 int id, unsigned char* buf,
1984		 short width, short height,
1985		 Bool Sync,
1986		 RegionPtr clipBoxes, pointer data,
1987		 DrawablePtr pDraw
1988		 ){
1989    MGAPtr pMga = MGAPTR(pScrn);
1990    MGAPortPrivPtr pPriv = pMga->portPrivate;
1991    INT32 x1, x2, y1, y2;
1992    int dstPitch = 0;
1993    BoxRec dstBox;
1994    int nbox;
1995    BoxPtr pbox;
1996
1997    /* Clip */
1998    x1 = src_x; x2 = src_x + src_w;
1999    y1 = src_y; y2 = src_y + src_h;
2000
2001    dstBox.x1 = drw_x; dstBox.x2 = drw_x + drw_w;
2002    dstBox.y1 = drw_y; dstBox.y2 = drw_y + drw_h;
2003
2004    if(!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2,
2005			      clipBoxes, width, height))
2006	return Success;
2007
2008#ifdef USE_XAA
2009    if( pMga->AccelInfoRec->NeedToSync && ((long)data != pPriv->lastPort) ) {
2010	MGAStormSync(pScrn);
2011    }
2012#endif
2013
2014    pPriv->lastPort = (long)data;
2015    nbox=REGION_NUM_RECTS(clipBoxes);
2016    pbox=REGION_RECTS(clipBoxes);
2017
2018    while(nbox--) {
2019
2020	if ( (drw_w==src_w) && (drw_h==src_h) && (drw_x >= 0 ) ) {
2021	    /* special case 1: non scaling optimization */
2022	    MGACopyILOAD(pScrn,id,buf,pbox,
2023			 width, height, dstPitch, src_x, src_y, src_w, src_h,
2024			 drw_x, drw_y, drw_w, drw_h);
2025#if 0
2026	    } else if ( (drw_w>src_w) && (drw_h>src_h) && (drw_x >= 0 ) ) {
2027		/* special case 2: upscaling for full screen apps */
2028		/* FIXME: to do */
2029		MGACopyScaledILOAD(pScrn,id,buf,pbox,
2030				   width, height, dstPitch, src_x, src_y, src_w, src_h,
2031				   drw_x, drw_y, drw_w, drw_h);
2032
2033#endif
2034	    } else /* generic fallback case */
2035		MGACopyScaledILOAD(pScrn,id,buf,pbox,
2036				   width, height, dstPitch, src_x, src_y, src_w, src_h,
2037				   drw_x, drw_y, drw_w, drw_h);
2038	/* FIXME: when the generic is perfect I will enable the optimizations */
2039	pbox++;
2040    }
2041
2042#ifdef USE_XAA
2043    pMga->AccelInfoRec->NeedToSync = TRUE;
2044#endif
2045    pPriv->videoStatus = FREE_TIMER;
2046    pPriv->freeTime = currentTime.milliseconds + FREE_DELAY;
2047    pMga->VideoTimerCallback = MGAVideoTimerCallback;
2048
2049    return Success;
2050}
2051