nouveau_xv.c revision a5560a61
1/*
2 * Copyright 2007 Arthur Huillet
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#ifdef __SSE2__
29#include <immintrin.h>
30#endif
31
32#include "xorg-config.h"
33#include "xf86xv.h"
34#include <X11/extensions/Xv.h>
35#include "exa.h"
36#include "damage.h"
37#include "dixstruct.h"
38#include "fourcc.h"
39
40#include "nv_include.h"
41#include "nv_dma.h"
42
43#include "vl_hwmc.h"
44
45#include "hwdefs/nv_m2mf.xml.h"
46
47#define IMAGE_MAX_W 2046
48#define IMAGE_MAX_H 2046
49
50#define TEX_IMAGE_MAX_W 4096
51#define TEX_IMAGE_MAX_H 4096
52
53#define OFF_DELAY 	500  /* milliseconds */
54#define FREE_DELAY 	5000
55
56#define NUM_BLIT_PORTS 16
57#define NUM_TEXTURE_PORTS 32
58
59
60#define NVStopOverlay(X) (((pNv->Architecture == NV_ARCH_04) ? NV04StopOverlay(X) : NV10StopOverlay(X)))
61
62/* Value taken by pPriv -> currentHostBuffer when we failed to allocate the two private buffers in TT memory, so that we can catch this case
63and attempt no other allocation afterwards (performance reasons) */
64#define NO_PRIV_HOST_BUFFER_AVAILABLE 9999
65
66/* NVPutImage action flags */
67enum {
68	IS_YV12 = 1,
69	IS_YUY2 = 2,
70	CONVERT_TO_YUY2=4,
71	USE_OVERLAY=8,
72	USE_TEXTURE=16,
73	SWAP_UV=32,
74	IS_RGB=64, //I am not sure how long we will support it
75};
76
77#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
78
79Atom xvBrightness, xvContrast, xvColorKey, xvSaturation;
80Atom xvHue, xvAutopaintColorKey, xvSetDefaults, xvDoubleBuffer;
81Atom xvITURBT709, xvSyncToVBlank, xvOnCRTCNb;
82
83/* client libraries expect an encoding */
84static XF86VideoEncodingRec DummyEncoding =
85{
86	0,
87	"XV_IMAGE",
88	IMAGE_MAX_W, IMAGE_MAX_H,
89	{1, 1}
90};
91
92static XF86VideoEncodingRec DummyEncodingTex =
93{
94	0,
95	"XV_IMAGE",
96	TEX_IMAGE_MAX_W, TEX_IMAGE_MAX_H,
97	{1, 1}
98};
99
100static XF86VideoEncodingRec DummyEncodingNV50 =
101{
102	0,
103	"XV_IMAGE",
104	8192, 8192,
105	{1, 1}
106};
107
108#define NUM_FORMATS_ALL 6
109
110XF86VideoFormatRec NVFormats[NUM_FORMATS_ALL] =
111{
112	{15, TrueColor}, {16, TrueColor}, {24, TrueColor},
113	{15, DirectColor}, {16, DirectColor}, {24, DirectColor}
114};
115
116#define NUM_FORMATS_NV50 8
117XF86VideoFormatRec NV50Formats[NUM_FORMATS_NV50] =
118{
119	{15, TrueColor}, {16, TrueColor}, {24, TrueColor}, {30, TrueColor},
120	{15, DirectColor}, {16, DirectColor}, {24, DirectColor}, {30, DirectColor}
121};
122
123#define NUM_NV04_OVERLAY_ATTRIBUTES 4
124XF86AttributeRec NV04OverlayAttributes[NUM_NV04_OVERLAY_ATTRIBUTES] =
125{
126	    {XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
127	    {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
128	    {XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
129	    {XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
130};
131
132
133#define NUM_NV10_OVERLAY_ATTRIBUTES 10
134XF86AttributeRec NV10OverlayAttributes[NUM_NV10_OVERLAY_ATTRIBUTES] =
135{
136	{XvSettable | XvGettable, 0, 1, "XV_DOUBLE_BUFFER"},
137	{XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
138	{XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
139	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
140	{XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
141	{XvSettable | XvGettable, 0, 8191, "XV_CONTRAST"},
142	{XvSettable | XvGettable, 0, 8191, "XV_SATURATION"},
143	{XvSettable | XvGettable, 0, 360, "XV_HUE"},
144	{XvSettable | XvGettable, 0, 1, "XV_ITURBT_709"},
145	{XvSettable | XvGettable, 0, 1, "XV_ON_CRTC_NB"},
146};
147
148#define NUM_BLIT_ATTRIBUTES 2
149XF86AttributeRec NVBlitAttributes[NUM_BLIT_ATTRIBUTES] =
150{
151	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
152	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
153};
154
155#define NUM_TEXTURED_ATTRIBUTES 2
156XF86AttributeRec NVTexturedAttributes[NUM_TEXTURED_ATTRIBUTES] =
157{
158	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
159	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
160};
161
162#define NUM_TEXTURED_ATTRIBUTES_NV50 7
163XF86AttributeRec NVTexturedAttributesNV50[NUM_TEXTURED_ATTRIBUTES_NV50] =
164{
165	{ XvSettable             , 0, 0, "XV_SET_DEFAULTS" },
166	{ XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK" },
167	{ XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS" },
168	{ XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST" },
169	{ XvSettable | XvGettable, -1000, 1000, "XV_SATURATION" },
170	{ XvSettable | XvGettable, -1000, 1000, "XV_HUE" },
171	{ XvSettable | XvGettable, 0, 1, "XV_ITURBT_709" }
172};
173
174#define NUM_IMAGES_YUV 4
175#define NUM_IMAGES_ALL 5
176
177#define FOURCC_RGB 0x0000003
178#define XVIMAGE_RGB \
179   { \
180        FOURCC_RGB, \
181        XvRGB, \
182        LSBFirst, \
183        { 0x03, 0x00, 0x00, 0x00, \
184          0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \
185        32, \
186        XvPacked, \
187        1, \
188        24, 0x00ff0000, 0x0000ff00, 0x000000ff, \
189        0, 0, 0, \
190        0, 0, 0, \
191        0, 0, 0, \
192        {'B','G','R','X',\
193          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \
194        XvTopToBottom \
195   }
196
197static XF86ImageRec NVImages[NUM_IMAGES_ALL] =
198{
199	XVIMAGE_YUY2,
200	XVIMAGE_YV12,
201	XVIMAGE_UYVY,
202	XVIMAGE_I420,
203	XVIMAGE_RGB
204};
205
206static void
207box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
208{
209	dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
210	dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
211	if (dest->x1 >= dest->x2) {
212		dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
213		return;
214	}
215
216	dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
217	dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
218	if (dest->y1 >= dest->y2)
219		dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
220}
221
222static int
223box_area(BoxPtr box)
224{
225    return (int) (box->x2 - box->x1) * (int) (box->y2 - box->y1);
226}
227
228static void
229rr_crtc_box(RRCrtcPtr crtc, BoxPtr crtc_box)
230{
231	if (crtc->mode) {
232		crtc_box->x1 = crtc->x;
233		crtc_box->y1 = crtc->y;
234		switch (crtc->rotation) {
235		case RR_Rotate_0:
236		case RR_Rotate_180:
237		default:
238			crtc_box->x2 = crtc->x + crtc->mode->mode.width;
239			crtc_box->y2 = crtc->y + crtc->mode->mode.height;
240			break;
241		case RR_Rotate_90:
242		case RR_Rotate_270:
243			crtc_box->x2 = crtc->x + crtc->mode->mode.height;
244			crtc_box->y2 = crtc->y + crtc->mode->mode.width;
245			break;
246		}
247	} else
248		crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0;
249}
250
251static Bool
252rr_crtc_on(RRCrtcPtr crtc, Bool crtc_is_xf86_hint)
253{
254	if (!crtc) {
255		return FALSE;
256	}
257	if (crtc_is_xf86_hint && crtc->devPrivate) {
258		return xf86_crtc_on(crtc->devPrivate);
259	} else {
260		return !!crtc->mode;
261	}
262}
263
264/*
265 * Return the crtc covering 'box'. If two crtcs cover a portion of
266 * 'box', then prefer the crtc with greater coverage.
267 */
268static RRCrtcPtr
269rr_crtc_covering_box(ScreenPtr pScreen, BoxPtr box, Bool screen_is_xf86_hint)
270{
271	rrScrPrivPtr pScrPriv;
272	RRCrtcPtr crtc, best_crtc, primary_crtc;
273	int coverage, best_coverage;
274	int c;
275	BoxRec crtc_box, cover_box;
276	RROutputPtr primary_output;
277
278	best_crtc = NULL;
279	best_coverage = 0;
280	primary_crtc = NULL;
281	primary_output = NULL;
282
283	if (!dixPrivateKeyRegistered(rrPrivKey))
284		return NULL;
285
286	pScrPriv = rrGetScrPriv(pScreen);
287
288	if (!pScrPriv)
289		return NULL;
290
291	primary_output = RRFirstOutput(pScreen);
292	if (primary_output && primary_output->crtc)
293		primary_crtc = primary_output->crtc->devPrivate;
294
295	for (c = 0; c < pScrPriv->numCrtcs; c++) {
296		crtc = pScrPriv->crtcs[c];
297
298		/* If the CRTC is off, treat it as not covering */
299		if (!rr_crtc_on(crtc, screen_is_xf86_hint))
300			continue;
301
302		rr_crtc_box(crtc, &crtc_box);
303		box_intersect(&cover_box, &crtc_box, box);
304		coverage = box_area(&cover_box);
305		if (coverage > best_coverage ||
306		   (crtc == primary_crtc && coverage == best_coverage)) {
307			best_crtc = crtc;
308			best_coverage = coverage;
309		}
310	}
311
312	return best_crtc;
313}
314
315#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(23, 0)
316static RRCrtcPtr
317rr_crtc_covering_box_on_secondary(ScreenPtr pScreen, BoxPtr box)
318{
319	if (!pScreen->isGPU) {
320		ScreenPtr secondary;
321		RRCrtcPtr crtc = NULL;
322
323		xorg_list_for_each_entry(secondary, &pScreen->secondary_list, secondary_head) {
324			if (!secondary->is_output_secondary)
325				continue;
326
327			crtc = rr_crtc_covering_box(secondary, box, FALSE);
328			if (crtc)
329				return crtc;
330		}
331	}
332
333	return NULL;
334}
335#endif
336
337RRCrtcPtr
338randr_crtc_covering_drawable(DrawablePtr pDraw)
339{
340	ScreenPtr pScreen = pDraw->pScreen;
341	RRCrtcPtr crtc = NULL;
342	BoxRec box;
343
344	box.x1 = pDraw->x;
345	box.y1 = pDraw->y;
346	box.x2 = box.x1 + pDraw->width;
347	box.y2 = box.y1 + pDraw->height;
348
349	crtc = rr_crtc_covering_box(pScreen, &box, TRUE);
350#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(23, 0)
351	if (!crtc) {
352		crtc = rr_crtc_covering_box_on_secondary(pScreen, &box);
353	}
354#endif
355	return crtc;
356}
357
358xf86CrtcPtr
359nouveau_pick_best_crtc(ScrnInfoPtr pScrn,
360                       int x, int y, int w, int h)
361{
362    ScreenPtr pScreen = pScrn->pScreen;
363    RRCrtcPtr crtc = NULL;
364    BoxRec box;
365
366    if (!pScrn->vtSema)
367	return NULL;
368
369    box.x1 = x;
370    box.x2 = x + w;
371    box.y1 = y;
372    box.y2 = y + h;
373
374    crtc = rr_crtc_covering_box(pScreen, &box, TRUE);
375    if (crtc) {
376	return crtc->devPrivate;
377    }
378    return NULL;
379}
380
381unsigned int
382nv_window_belongs_to_crtc(ScrnInfoPtr pScrn, int x, int y, int w, int h)
383{
384	xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
385	unsigned int mask = 0;
386	int i;
387
388	for (i = 0; i < xf86_config->num_crtc; i++) {
389		xf86CrtcPtr crtc = xf86_config->crtc[i];
390
391		if (!xf86_crtc_on(crtc))
392			continue;
393
394		if ((x < (crtc->x + crtc->mode.HDisplay)) &&
395		    (y < (crtc->y + crtc->mode.VDisplay)) &&
396		    ((x + w) > crtc->x) &&
397		    ((y + h) > crtc->y))
398		    mask |= 1 << i;
399	}
400
401	return mask;
402}
403
404/**
405 * NVSetPortDefaults
406 * set attributes of port "pPriv" to compiled-in (except for colorKey) defaults
407 * this function does not care about the kind of adapter the port is for
408 *
409 * @param pScrn screen to get the default colorKey from
410 * @param pPriv port to reset to defaults
411 */
412void
413NVSetPortDefaults (ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
414{
415	NVPtr pNv = NVPTR(pScrn);
416
417	pPriv->brightness		= 0;
418	pPriv->contrast			= 4096;
419	pPriv->saturation		= 4096;
420	pPriv->hue			= 0;
421	pPriv->colorKey			= pNv->videoKey;
422	pPriv->autopaintColorKey	= TRUE;
423	pPriv->doubleBuffer		= pNv->Architecture != NV_ARCH_04;
424	pPriv->iturbt_709		= FALSE;
425	pPriv->currentHostBuffer	= 0;
426}
427
428static int
429nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size,
430		      struct nouveau_bo **pbo)
431{
432	union nouveau_bo_config config = {};
433	NVPtr pNv = NVPTR(pScrn);
434
435	if (*pbo) {
436		if ((*pbo)->size >= size)
437			return 0;
438		nouveau_bo_ref(NULL, pbo);
439	}
440
441	if (flags & NOUVEAU_BO_VRAM) {
442		if (pNv->Architecture == NV_TESLA)
443			config.nv50.memtype = 0x70;
444		else
445		if (pNv->Architecture >= NV_FERMI)
446			config.nvc0.memtype = 0xfe;
447	}
448	flags |= NOUVEAU_BO_MAP;
449
450	return nouveau_bo_new(pNv->dev, flags, 0, size, &config, pbo);
451}
452
453/**
454 * NVFreePortMemory
455 * frees memory held by a given port
456 *
457 * @param pScrn screen whose port wants to free memory
458 * @param pPriv port to free memory of
459 */
460static void
461NVFreePortMemory(ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
462{
463	nouveau_bo_ref(NULL, &pPriv->video_mem);
464	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
465	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[1]);
466}
467
468/**
469 * NVFreeOverlayMemory
470 * frees memory held by the overlay port
471 *
472 * @param pScrn screen whose overlay port wants to free memory
473 */
474static void
475NVFreeOverlayMemory(ScrnInfoPtr pScrn)
476{
477	NVPtr	pNv = NVPTR(pScrn);
478	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
479
480	NVFreePortMemory(pScrn, pPriv);
481#if NVOVL_SUPPORT
482	/* "power cycle" the overlay */
483	nvWriteMC(pNv, NV_PMC_ENABLE,
484		  (nvReadMC(pNv, NV_PMC_ENABLE) & 0xEFFFFFFF));
485	nvWriteMC(pNv, NV_PMC_ENABLE,
486		  (nvReadMC(pNv, NV_PMC_ENABLE) | 0x10000000));
487#endif
488}
489
490/**
491 * NVFreeBlitMemory
492 * frees memory held by the blit port
493 *
494 * @param pScrn screen whose blit port wants to free memory
495 */
496static void
497NVFreeBlitMemory(ScrnInfoPtr pScrn)
498{
499	NVPtr	pNv = NVPTR(pScrn);
500	NVPortPrivPtr pPriv = GET_BLIT_PRIVATE(pNv);
501
502	NVFreePortMemory(pScrn, pPriv);
503}
504
505/**
506 * NVVideoTimerCallback
507 * callback function which perform cleanup tasks (stop overlay, free memory).
508 * within the driver
509 * purpose and use is unknown
510 */
511void
512NVVideoTimerCallback(ScrnInfoPtr pScrn, Time currentTime)
513{
514	NVPtr         pNv = NVPTR(pScrn);
515	NVPortPrivPtr pOverPriv = NULL;
516	NVPortPrivPtr pBlitPriv = NULL;
517	Bool needCallback = FALSE;
518
519	if (!pScrn->vtSema)
520		return;
521
522	if (pNv->overlayAdaptor) {
523		pOverPriv = GET_OVERLAY_PRIVATE(pNv);
524		if (!pOverPriv->videoStatus)
525			pOverPriv = NULL;
526	}
527
528	if (pNv->blitAdaptor) {
529		pBlitPriv = GET_BLIT_PRIVATE(pNv);
530		if (!pBlitPriv->videoStatus)
531			pBlitPriv = NULL;
532	}
533
534	if (pOverPriv) {
535		if (pOverPriv->videoTime < currentTime) {
536			if (pOverPriv->videoStatus & OFF_TIMER) {
537				NVStopOverlay(pScrn);
538				pOverPriv->videoStatus = FREE_TIMER;
539				pOverPriv->videoTime = currentTime + FREE_DELAY;
540				needCallback = TRUE;
541			} else
542			if (pOverPriv->videoStatus & FREE_TIMER) {
543				NVFreeOverlayMemory(pScrn);
544				pOverPriv->videoStatus = 0;
545			}
546		} else {
547			needCallback = TRUE;
548		}
549	}
550
551	if (pBlitPriv) {
552		if (pBlitPriv->videoTime < currentTime) {
553			NVFreeBlitMemory(pScrn);
554			pBlitPriv->videoStatus = 0;
555		} else {
556			needCallback = TRUE;
557		}
558	}
559
560	pNv->VideoTimerCallback = needCallback ? NVVideoTimerCallback : NULL;
561}
562
563#ifndef ExaOffscreenMarkUsed
564extern void ExaOffscreenMarkUsed(PixmapPtr);
565#endif
566
567/*
568 * StopVideo
569 */
570static void
571NVStopOverlayVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
572{
573	NVPtr         pNv   = NVPTR(pScrn);
574	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
575
576	if (pPriv->grabbedByV4L)
577		return;
578
579	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
580
581	if(Exit) {
582		if (pPriv->videoStatus & CLIENT_VIDEO_ON)
583			NVStopOverlay(pScrn);
584		NVFreeOverlayMemory(pScrn);
585		pPriv->videoStatus = 0;
586	} else {
587		if (pPriv->videoStatus & CLIENT_VIDEO_ON) {
588			pPriv->videoStatus = OFF_TIMER | CLIENT_VIDEO_ON;
589			pPriv->videoTime = currentTime.milliseconds + OFF_DELAY;
590			pNv->VideoTimerCallback = NVVideoTimerCallback;
591		}
592	}
593}
594
595/**
596 * QueryBestSize
597 * used by client applications to ask the driver:
598 * how would you actually scale a video of dimensions
599 * vid_w, vid_h, if i wanted you to scale it to dimensions
600 * drw_w, drw_h?
601 * function stores actual scaling size in pointers p_w, p_h.
602 *
603 *
604 * @param pScrn unused
605 * @param motion unused
606 * @param vid_w width of source video
607 * @param vid_h height of source video
608 * @param drw_w desired scaled width as requested by client
609 * @param drw_h desired scaled height as requested by client
610 * @param p_w actual scaled width as the driver is capable of
611 * @param p_h actual scaled height as the driver is capable of
612 * @param data unused
613 */
614static void
615NVQueryBestSize(ScrnInfoPtr pScrn, Bool motion,
616		short vid_w, short vid_h,
617		short drw_w, short drw_h,
618		unsigned int *p_w, unsigned int *p_h,
619		pointer data)
620{
621	if(vid_w > (drw_w << 3))
622		drw_w = vid_w >> 3;
623	if(vid_h > (drw_h << 3))
624		drw_h = vid_h >> 3;
625
626	*p_w = drw_w;
627	*p_h = drw_h;
628}
629
630/**
631 * NVCopyData420
632 * used to convert YV12 to YUY2 for the blitter and NV04 overlay.
633 * The U and V samples generated are linearly interpolated on the vertical
634 * axis for better quality
635 *
636 * @param src1 source buffer of luma
637 * @param src2 source buffer of chroma1
638 * @param src3 source buffer of chroma2
639 * @param dst1 destination buffer
640 * @param srcPitch pitch of src1
641 * @param srcPitch2 pitch of src2, src3
642 * @param dstPitch pitch of dst1
643 * @param h number of lines to copy
644 * @param w length of lines to copy
645 */
646static inline void
647NVCopyData420(unsigned char *src1, unsigned char *src2, unsigned char *src3,
648	      unsigned char *dst1, int srcPitch, int srcPitch2, int dstPitch,
649	      int h, int w)
650{
651	CARD32 *dst;
652	CARD8 *s1, *s2, *s3;
653	int i, j;
654
655#define su(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s2[X] +        \
656		(signed int)(s2 + srcPitch2)[X]) / 2) : (s2[X]))
657#define sv(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s3[X] +        \
658		(signed int)(s3 + srcPitch2)[X]) / 2) : (s3[X]))
659
660	w >>= 1;
661
662	for (j = 0; j < h; j++) {
663		dst = (CARD32*)dst1;
664		s1 = src1;  s2 = src2;  s3 = src3;
665		i = w;
666
667		while (i > 4) {
668#if X_BYTE_ORDER == X_BIG_ENDIAN
669		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
670		dst[1] = (s1[2] << 24) | (s1[3] << 8) | (sv(1) << 16) | su(1);
671		dst[2] = (s1[4] << 24) | (s1[5] << 8) | (sv(2) << 16) | su(2);
672		dst[3] = (s1[6] << 24) | (s1[7] << 8) | (sv(3) << 16) | su(3);
673#else
674		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
675		dst[1] = s1[2] | (s1[3] << 16) | (sv(1) << 8) | (su(1) << 24);
676		dst[2] = s1[4] | (s1[5] << 16) | (sv(2) << 8) | (su(2) << 24);
677		dst[3] = s1[6] | (s1[7] << 16) | (sv(3) << 8) | (su(3) << 24);
678#endif
679		dst += 4; s2 += 4; s3 += 4; s1 += 8;
680		i -= 4;
681		}
682
683		while (i--) {
684#if X_BYTE_ORDER == X_BIG_ENDIAN
685		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
686#else
687		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
688#endif
689		dst++; s2++; s3++;
690		s1 += 2;
691		}
692
693		dst1 += dstPitch;
694		src1 += srcPitch;
695		if (j & 1) {
696			src2 += srcPitch2;
697			src3 += srcPitch2;
698		}
699	}
700}
701
702/**
703 * NVCopyNV12ColorPlanes
704 * Used to convert YV12 color planes to NV12 (interleaved UV) for the overlay
705 *
706 * @param src1 source buffer of chroma1
707 * @param dst1 destination buffer
708 * @param h number of lines to copy
709 * @param w length of lines to copy
710 * @param id source pixel format (YV12 or I420)
711 */
712static inline void
713NVCopyNV12ColorPlanes(unsigned char *src1, unsigned char *src2,
714		      unsigned char *dst, int dstPitch, int srcPitch2,
715		      int h, int w)
716{
717	int i, j, l, e;
718
719	w >>= 1;
720	h >>= 1;
721#ifdef __SSE2__
722	l = w >> 3;
723	e = w & 7;
724#else
725	l = w >> 1;
726	e = w & 1;
727#endif
728
729	for (j = 0; j < h; j++) {
730		unsigned char *us = src1;
731		unsigned char *vs = src2;
732		unsigned int *vuvud = (unsigned int *) dst;
733		unsigned short *vud;
734
735		for (i = 0; i < l; i++) {
736#ifdef __SSE2__
737			_mm_storeu_si128(
738				(void*)vuvud,
739				_mm_unpacklo_epi8(
740					_mm_loadl_epi64((void*)vs),
741					_mm_loadl_epi64((void*)us)));
742			vuvud+=4;
743			us+=8;
744			vs+=8;
745#else /* __SSE2__ */
746#  if X_BYTE_ORDER == X_BIG_ENDIAN
747			*vuvud++ = (vs[0]<<24) | (us[0]<<16) | (vs[1]<<8) | us[1];
748#  else
749			*vuvud++ = vs[0] | (us[0]<<8) | (vs[1]<<16) | (us[1]<<24);
750#  endif
751			us+=2;
752			vs+=2;
753#endif /* __SSE2__ */
754		}
755
756		vud = (unsigned short *)vuvud;
757		for (i = 0; i < e; i++) {
758#if X_BYTE_ORDER == X_BIG_ENDIAN
759			vud[i] = us[i] | (vs[i]<<8);
760#else
761			vud[i] = vs[i] | (us[i]<<8);
762#endif
763		}
764
765		dst += dstPitch;
766		src1 += srcPitch2;
767		src2 += srcPitch2;
768	}
769
770}
771
772
773static int
774NV_set_dimensions(ScrnInfoPtr pScrn, int action_flags, INT32 *xa, INT32 *xb,
775		  INT32 *ya, INT32 *yb, short *src_x, short *src_y,
776		  short *src_w, short *src_h, short *drw_x, short *drw_y,
777		  short *drw_w, short *drw_h, int *left, int *top, int *right,
778		  int *bottom, BoxRec *dstBox, int *npixels, int *nlines,
779		  RegionPtr clipBoxes, short width, short height)
780{
781	NVPtr pNv = NVPTR(pScrn);
782
783	if (action_flags & USE_OVERLAY) {
784		switch (pNv->Architecture) {
785		case NV_ARCH_04:
786			/* NV0x overlay can't scale down. at all. */
787			if (*drw_w < *src_w)
788				*drw_w = *src_w;
789			if (*drw_h < *src_h)
790				*drw_h = *src_h;
791			break;
792		case NV_ARCH_30:
793			/* According to DirectFB, NV3x can't scale down by
794			 * a ratio > 2
795			 */
796			if (*drw_w < (*src_w) >> 1)
797				*drw_w = *src_w;
798			if (*drw_h < (*src_h) >> 1)
799				*drw_h = *src_h;
800			break;
801		default: /*NV10, NV20*/
802			/* NV1x overlay can't scale down by a ratio > 8 */
803			if (*drw_w < (*src_w) >> 3)
804				*drw_w = *src_w >> 3;
805			if (*drw_h < (*src_h >> 3))
806				*drw_h = *src_h >> 3;
807		}
808	}
809
810	/* Clip */
811	*xa = *src_x;
812	*xb = *src_x + *src_w;
813	*ya = *src_y;
814	*yb = *src_y + *src_h;
815
816	dstBox->x1 = *drw_x;
817	dstBox->x2 = *drw_x + *drw_w;
818	dstBox->y1 = *drw_y;
819	dstBox->y2 = *drw_y + *drw_h;
820
821	/* In randr 1.2 mode VIDEO_CLIP_TO_VIEWPORT is broken (hence it is not
822	 * set in the overlay adapter flags) since pScrn->frame{X,Y}1 do not get
823	 * updated. Hence manual clipping against the CRTC dimensions
824	 */
825	if (action_flags & USE_OVERLAY) {
826		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
827		unsigned id = pPriv->overlayCRTC;
828		xf86CrtcPtr crtc = XF86_CRTC_CONFIG_PTR(pScrn)->crtc[id];
829		RegionRec VPReg;
830		BoxRec VPBox;
831
832		VPBox.x1 = crtc->x;
833		VPBox.y1 = crtc->y;
834		VPBox.x2 = crtc->x + crtc->mode.HDisplay;
835		VPBox.y2 = crtc->y + crtc->mode.VDisplay;
836
837		REGION_INIT(pScreen, &VPReg, &VPBox, 1);
838		REGION_INTERSECT(pScreen, clipBoxes, clipBoxes, &VPReg);
839		REGION_UNINIT(pScreen, &VPReg);
840	}
841
842	if (!xf86XVClipVideoHelper(dstBox, xa, xb, ya, yb, clipBoxes,
843				   width, height))
844		return -1;
845
846	if (action_flags & USE_OVERLAY)	{
847		xf86CrtcConfigPtr xf86_config =
848			XF86_CRTC_CONFIG_PTR(pScrn);
849		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
850
851		dstBox->x1 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
852		dstBox->x2 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
853		dstBox->y1 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
854		dstBox->y2 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
855	}
856
857	/* Convert fixed point to integer, as xf86XVClipVideoHelper probably
858	 * turns its parameter into fixed point values
859	 */
860	*left = (*xa) >> 16;
861	if (*left < 0)
862		*left = 0;
863
864	*top = (*ya) >> 16;
865	if (*top < 0)
866		*top = 0;
867
868	*right = (*xb) >> 16;
869	if (*right > width)
870		*right = width;
871
872	*bottom = (*yb) >> 16;
873	if (*bottom > height)
874		*bottom = height;
875
876	if (action_flags & IS_YV12) {
877		/* even "left", even "top", even number of pixels per line
878		 * and even number of lines
879		 */
880		*left &= ~1;
881		*npixels = ((*right + 1) & ~1) - *left;
882		*top &= ~1;
883		*nlines = ((*bottom + 1) & ~1) - *top;
884	} else
885	if (action_flags & IS_YUY2) {
886		/* even "left" */
887		*left &= ~1;
888		/* even number of pixels per line */
889		*npixels = ((*right + 1) & ~1) - *left;
890		*nlines = *bottom - *top;
891		/* 16bpp */
892		*left <<= 1;
893	} else
894	if (action_flags & IS_RGB) {
895		*npixels = *right - *left;
896		*nlines = *bottom - *top;
897		/* 32bpp */
898		*left <<= 2;
899	}
900
901	return 0;
902}
903
904static int
905NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
906				  int *srcPitch2, int *dstPitch, int *s2offset,
907				  int *s3offset, int *uv_offset,
908				  int *newFBSize, int *newTTSize,
909				  int *line_len, int npixels, int nlines,
910				  int width, int height)
911{
912	int tmp;
913
914	if (pNv->Architecture >= NV_TESLA) {
915		npixels = (npixels + 7) & ~7;
916		nlines = (nlines + 7) & ~7;
917	}
918
919	if (action_flags & IS_YV12) {
920		*srcPitch = (width + 3) & ~3;	/* of luma */
921		*s2offset = *srcPitch * height;
922		*srcPitch2 = ((width >> 1) + 3) & ~3; /*of chroma*/
923		*s3offset = (*srcPitch2 * (height >> 1)) + *s2offset;
924		*dstPitch = (npixels + 63) & ~63; /*luma and chroma pitch*/
925		*line_len = npixels;
926		*uv_offset = nlines * *dstPitch;
927		*newFBSize = *uv_offset + (nlines >> 1) * *dstPitch;
928		*newTTSize = *uv_offset + (nlines >> 1) * *dstPitch;
929	} else
930	if (action_flags & IS_YUY2) {
931		*srcPitch = width << 1; /* one luma, one chroma per pixel */
932		*dstPitch = ((npixels << 1) + 63) & ~63;
933		*line_len = npixels << 1;
934		*newFBSize = nlines * *dstPitch;
935		*newTTSize = nlines * *line_len;
936	} else
937	if (action_flags & IS_RGB) {
938		/* one R, one G, one B, one X per pixel */
939		*srcPitch = width << 2;
940		*dstPitch = ((npixels << 2) + 63) & ~63;
941		*line_len = npixels << 2;
942		*newFBSize = nlines * *dstPitch;
943		*newTTSize = nlines * *dstPitch;
944	}
945
946	if (action_flags & CONVERT_TO_YUY2) {
947		*dstPitch = ((npixels << 1) + 63) & ~63;
948		*line_len = npixels << 1;
949		*newFBSize = nlines * *dstPitch;
950		*newTTSize = nlines * *line_len;
951		*uv_offset = 0;
952	}
953
954	if (action_flags & SWAP_UV)  {
955		/* I420 swaps U and V */
956		tmp = *s2offset;
957		*s2offset = *s3offset;
958		*s3offset = tmp;
959	}
960
961	/* Overlay double buffering... */
962	if (action_flags & USE_OVERLAY)
963                (*newFBSize) <<= 1;
964
965	return 0;
966}
967
968
969/**
970 * NV_set_action_flags
971 * This function computes the action flags from the input image,
972 * that is, it decides what NVPutImage and its helpers must do.
973 * This eases readability by avoiding lots of switch-case statements in the
974 * core NVPutImage
975 */
976static void
977NV_set_action_flags(ScrnInfoPtr pScrn, DrawablePtr pDraw, NVPortPrivPtr pPriv,
978		    int id, short drw_x, short drw_y, short drw_w, short drw_h,
979		    int *action_flags)
980{
981	NVPtr pNv = NVPTR(pScrn);
982
983#define USING_OVERLAY (*action_flags & USE_OVERLAY)
984#define USING_TEXTURE (*action_flags & USE_TEXTURE)
985#define USING_BLITTER ((!(*action_flags & USE_OVERLAY)) &&                     \
986		       (!(*action_flags & USE_TEXTURE)))
987
988	*action_flags = 0;
989
990	/* Pixel format-related bits */
991	if (id == FOURCC_YUY2 || id == FOURCC_UYVY)
992		*action_flags |= IS_YUY2;
993
994	if (id == FOURCC_YV12 || id == FOURCC_I420)
995		*action_flags |= IS_YV12;
996
997	if (id == FOURCC_RGB) /*How long will we support it?*/
998		*action_flags |= IS_RGB;
999
1000	if (id == FOURCC_I420) /* I420 is YV12 with swapped UV */
1001		*action_flags |= SWAP_UV;
1002
1003	/* Desired adapter */
1004	if (!pPriv->blitter && !pPriv->texture)
1005		*action_flags |= USE_OVERLAY;
1006
1007	if (!pPriv->blitter && pPriv->texture)
1008		*action_flags |= USE_TEXTURE;
1009
1010	/* Adapter fallbacks (when the desired one can't be used)*/
1011#ifdef COMPOSITE
1012	{
1013		PixmapPtr ppix = NVGetDrawablePixmap(pDraw);
1014
1015		/* this is whether ppix is in the viewable fb, not related to
1016		   the EXA "offscreen" stuff */
1017		if (!nouveau_exa_pixmap_is_onscreen(ppix))
1018			*action_flags &= ~USE_OVERLAY;
1019	}
1020#endif
1021
1022#ifdef NVOVL_SUPPORT
1023	if (USING_OVERLAY) {
1024		char crtc = nv_window_belongs_to_crtc(pScrn, drw_x, drw_y,
1025						      drw_w, drw_h);
1026
1027		if ((crtc & (1 << 0)) && (crtc & (1 << 1))) {
1028			/* The overlay cannot be used on two CRTCs at a time,
1029			 * so we need to fallback on the blitter
1030			 */
1031			*action_flags &= ~USE_OVERLAY;
1032		} else
1033		if ((crtc & (1 << 0))) {
1034			/* We need to put the overlay on CRTC0 - if it's not
1035			 * already here
1036			 */
1037			if (pPriv->overlayCRTC == 1) {
1038				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
1039					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) |
1040					    NV_CRTC_FSEL_OVERLAY);
1041				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
1042					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) &
1043					    ~NV_CRTC_FSEL_OVERLAY);
1044				pPriv->overlayCRTC = 0;
1045			}
1046		} else
1047		if ((crtc & (1 << 1))) {
1048			if (pPriv->overlayCRTC == 0) {
1049				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
1050					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) |
1051					    NV_CRTC_FSEL_OVERLAY);
1052				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
1053					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) &
1054					    ~NV_CRTC_FSEL_OVERLAY);
1055				pPriv->overlayCRTC = 1;
1056			}
1057		}
1058
1059		if (XF86_CRTC_CONFIG_PTR(pScrn)->crtc[pPriv->overlayCRTC]
1060						 ->rotation != RR_Rotate_0)
1061			*action_flags &= ~USE_OVERLAY;
1062	}
1063#endif
1064
1065	/* At this point the adapter we're going to use is _known_.
1066	 * You cannot change it now.
1067	 */
1068
1069	/* Card/adapter format restrictions */
1070	if (USING_BLITTER) {
1071		/* The blitter does not handle YV12 natively */
1072		if (id == FOURCC_YV12 || id == FOURCC_I420)
1073			*action_flags |= CONVERT_TO_YUY2;
1074	}
1075
1076	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_04)) {
1077		/* NV04-05 don't support YV12, only YUY2 and ITU-R BT.601 */
1078		if (*action_flags & IS_YV12)
1079			*action_flags |= CONVERT_TO_YUY2;
1080	}
1081
1082	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_10 ||
1083			      pNv->Architecture == NV_ARCH_20)) {
1084		/* No YV12 overlay on NV10, 11, 15, 20, NFORCE */
1085		switch (pNv->dev->chipset) {
1086		case 0x10:
1087		case 0x11:
1088		case 0x15:
1089		case 0x1a: /*XXX: unsure about nforce */
1090		case 0x20:
1091			*action_flags |= CONVERT_TO_YUY2;
1092			break;
1093		default:
1094			break;
1095		}
1096	}
1097}
1098
1099
1100/**
1101 * NVPutImage
1102 * PutImage is "the" important function of the Xv extension.
1103 * a client (e.g. video player) calls this function for every
1104 * image (of the video) to be displayed. this function then
1105 * scales and displays the image.
1106 *
1107 * @param pScrn screen which hold the port where the image is put
1108 * @param src_x source point in the source image to start displaying from
1109 * @param src_y see above
1110 * @param src_w width of the source image to display
1111 * @param src_h see above
1112 * @param drw_x  screen point to display to
1113 * @param drw_y
1114 * @param drw_w width of the screen drawable
1115 * @param drw_h
1116 * @param id pixel format of image
1117 * @param buf pointer to buffer containing the source image
1118 * @param width total width of the source image we are passed
1119 * @param height
1120 * @param Sync unused
1121 * @param clipBoxes ??
1122 * @param data pointer to port
1123 * @param pDraw drawable pointer
1124 */
1125static int
1126NVPutImage(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x,
1127	   short drw_y, short src_w, short src_h, short drw_w, short drw_h,
1128	   int id, unsigned char *buf, short width, short height,
1129	   Bool Sync, RegionPtr clipBoxes, pointer data, DrawablePtr pDraw)
1130{
1131	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
1132	NVPtr pNv = NVPTR(pScrn);
1133	PixmapPtr ppix;
1134	/* source box */
1135	INT32 xa = 0, xb = 0, ya = 0, yb = 0;
1136	/* size to allocate in VRAM and in GART respectively */
1137	int newFBSize = 0, newTTSize = 0;
1138	/* card VRAM offsets, source offsets for U and V planes */
1139	int offset = 0, uv_offset = 0, s2offset = 0, s3offset = 0;
1140	/* source pitch, source pitch of U and V planes in case of YV12,
1141	 * VRAM destination pitch
1142	 */
1143	int srcPitch = 0, srcPitch2 = 0, dstPitch = 0;
1144	/* position of the given source data (using src_*), number of pixels
1145	 * and lines we are interested in
1146	 */
1147	int top = 0, left = 0, right = 0, bottom = 0, npixels = 0, nlines = 0;
1148	Bool skip = FALSE;
1149	BoxRec dstBox;
1150	CARD32 tmp = 0;
1151	int line_len = 0; /* length of a line, like npixels, but in bytes */
1152	struct nouveau_bo *destination_buffer = NULL;
1153	int action_flags; /* what shall we do? */
1154	unsigned char *map;
1155	int ret, i;
1156
1157	if (pPriv->grabbedByV4L)
1158		return Success;
1159
1160	if (width > pPriv->max_image_dim || height > pPriv->max_image_dim)
1161		return BadMatch;
1162
1163	NV_set_action_flags(pScrn, pDraw, pPriv, id, drw_x, drw_y, drw_w,
1164			    drw_h, &action_flags);
1165
1166	if (NV_set_dimensions(pScrn, action_flags, &xa, &xb, &ya, &yb,
1167			      &src_x,  &src_y, &src_w, &src_h,
1168			      &drw_x, &drw_y, &drw_w, &drw_h,
1169			      &left, &top, &right, &bottom, &dstBox,
1170			      &npixels, &nlines, clipBoxes, width, height))
1171		return Success;
1172
1173	if (NV_calculate_pitches_and_mem_size(pNv, action_flags, &srcPitch,
1174					      &srcPitch2, &dstPitch, &s2offset,
1175					      &s3offset, &uv_offset,
1176					      &newFBSize, &newTTSize,
1177					      &line_len, npixels, nlines,
1178					      width, height))
1179		return BadImplementation;
1180
1181	/* There are some cases (tvtime with overscan for example) where the
1182	 * input image is larger (width/height) than the source rectangle for
1183	 * the overlay (src_w, src_h). In those cases, we try to do something
1184	 * optimal by uploading only the necessary data.
1185	 */
1186	if (action_flags & IS_YUY2 || action_flags & IS_RGB)
1187		buf += (top * srcPitch) + left;
1188
1189	if (action_flags & IS_YV12) {
1190		tmp = ((top >> 1) * srcPitch2) + (left >> 1);
1191		s2offset += tmp;
1192		s3offset += tmp;
1193	}
1194
1195	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, newFBSize,
1196				    &pPriv->video_mem);
1197	if (ret)
1198		return BadAlloc;
1199
1200#ifdef NVOVL_SUPPORT
1201	if (action_flags & USE_OVERLAY) {
1202		ret = nouveau_bo_pin(pPriv->video_mem, NOUVEAU_BO_VRAM);
1203		if (ret) {
1204			nouveau_bo_ref(NULL, &pPriv->video_mem);
1205			return BadAlloc;
1206		}
1207	}
1208#endif
1209
1210	/* The overlay supports hardware double buffering. We handle this here*/
1211	offset = 0;
1212#ifdef NVOVL_SUPPORT
1213	if (pPriv->doubleBuffer) {
1214		int mask = 1 << (pPriv->currentBuffer << 2);
1215
1216		/* overwrite the newest buffer if there's not one free */
1217		if (nvReadVIDEO(pNv, NV_PVIDEO_BUFFER) & mask) {
1218			if (!pPriv->currentBuffer)
1219				offset += newFBSize >> 1;
1220			skip = TRUE;
1221		} else {
1222			if (pPriv->currentBuffer)
1223				offset += newFBSize >> 1;
1224		}
1225	}
1226#endif
1227
1228	/* Now we take a decision regarding the way we send the data to the
1229	 * card.
1230	 *
1231	 * Either we use double buffering of "private" TT memory
1232	 * Either we rely on X's GARTScratch
1233	 * Either we fallback on CPU copy
1234	 */
1235
1236	/* Try to allocate host-side double buffers, unless we have already
1237	 * failed
1238	 */
1239
1240	/* We take only nlines * line_len bytes - that is, only the pixel
1241	 * data we are interested in - because the stuff in the GART is
1242	 * written contiguously
1243	 */
1244	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1245		ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART, newTTSize,
1246					    &pPriv->TT_mem_chunk[0]);
1247		if (ret == 0) {
1248			ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART,
1249						    newTTSize,
1250						    &pPriv->TT_mem_chunk[1]);
1251			if (ret) {
1252				nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
1253				pPriv->currentHostBuffer =
1254					NO_PRIV_HOST_BUFFER_AVAILABLE;
1255			}
1256		} else {
1257			pPriv->currentHostBuffer =
1258				NO_PRIV_HOST_BUFFER_AVAILABLE;
1259		}
1260	}
1261
1262	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1263		destination_buffer =
1264			pPriv->TT_mem_chunk[pPriv->currentHostBuffer];
1265	}
1266	if (!destination_buffer) {
1267		if (pNv->Architecture >= NV_TESLA) {
1268			NOUVEAU_ERR("No scratch buffer for tiled upload\n");
1269			return BadAlloc;
1270		}
1271
1272		goto CPU_copy;
1273	}
1274
1275	if (newTTSize <= destination_buffer->size) {
1276		unsigned char *dst;
1277
1278		/* Upload to GART */
1279		nouveau_bo_map(destination_buffer, NOUVEAU_BO_WR, pNv->client);
1280		dst = destination_buffer->map;
1281
1282		if (action_flags & IS_YV12) {
1283			if (action_flags & CONVERT_TO_YUY2) {
1284				NVCopyData420(buf + (top * srcPitch) + left,
1285					      buf + s2offset, buf + s3offset,
1286					      dst, srcPitch, srcPitch2,
1287					      line_len, nlines, npixels);
1288			} else {
1289				/* Native YV12 */
1290				unsigned char *tbuf = buf + top *
1291						      srcPitch + left;
1292				unsigned char *tdst = dst;
1293
1294				/* luma upload */
1295				for (i = 0; i < nlines; i++) {
1296					memcpy(tdst, tbuf, line_len);
1297					tdst += line_len;
1298					tbuf += srcPitch;
1299				}
1300				dst += line_len * nlines;
1301
1302				NVCopyNV12ColorPlanes(buf + s2offset,
1303						      buf + s3offset, dst,
1304						      line_len, srcPitch2,
1305						      nlines, npixels);
1306			}
1307		} else {
1308			for (i = 0; i < nlines; i++) {
1309				memcpy(dst, buf, line_len);
1310				dst += line_len;
1311				buf += srcPitch;
1312			}
1313		}
1314
1315		if (uv_offset) {
1316			NVAccelM2MF(pNv, line_len, nlines / 2, 1,
1317				    line_len * nlines, uv_offset,
1318				    destination_buffer, NOUVEAU_BO_GART,
1319				    line_len, nlines >> 1, 0, 0,
1320				    pPriv->video_mem, NOUVEAU_BO_VRAM,
1321				    dstPitch, nlines >> 1, 0, 0);
1322		}
1323
1324		NVAccelM2MF(pNv, line_len, nlines, 1, 0, 0,
1325			    destination_buffer, NOUVEAU_BO_GART,
1326			    line_len, nlines, 0, 0,
1327			    pPriv->video_mem, NOUVEAU_BO_VRAM,
1328			    dstPitch, nlines, 0, 0);
1329
1330	} else {
1331CPU_copy:
1332		nouveau_bo_map(pPriv->video_mem, NOUVEAU_BO_WR, pNv->client);
1333		map = pPriv->video_mem->map + offset;
1334
1335		if (action_flags & IS_YV12) {
1336			if (action_flags & CONVERT_TO_YUY2) {
1337				NVCopyData420(buf + (top * srcPitch) + left,
1338					      buf + s2offset, buf + s3offset,
1339					      map, srcPitch, srcPitch2,
1340					      dstPitch, nlines, npixels);
1341			} else {
1342				unsigned char *tbuf =
1343					buf + left + top * srcPitch;
1344
1345				for (i = 0; i < nlines; i++) {
1346					int dwords = npixels << 1;
1347
1348					while (dwords & ~0x03) {
1349						*map = *tbuf;
1350						*(map + 1) = *(tbuf + 1);
1351						*(map + 2) = *(tbuf + 2);
1352						*(map + 3) = *(tbuf + 3);
1353						map += 4;
1354						tbuf += 4;
1355						dwords -= 4;
1356					}
1357
1358					switch (dwords) {
1359					case 3: *(map + 2) = *(tbuf + 2);
1360					case 2: *(map + 1) = *(tbuf + 1);
1361					case 1: *map = *tbuf;
1362					}
1363
1364					map += dstPitch - (npixels << 1);
1365					tbuf += srcPitch - (npixels << 1);
1366				}
1367
1368				NVCopyNV12ColorPlanes(buf + s2offset,
1369						      buf + s3offset,
1370						      map, dstPitch, srcPitch2,
1371						      nlines, npixels);
1372			}
1373		} else {
1374			/* YUY2 and RGB */
1375			for (i = 0; i < nlines; i++) {
1376				int dwords = npixels << 1;
1377
1378				while (dwords & ~0x03) {
1379					*map = *buf;
1380					*(map + 1) = *(buf + 1);
1381					*(map + 2) = *(buf + 2);
1382					*(map + 3) = *(buf + 3);
1383					map += 4;
1384					buf += 4;
1385					dwords -= 4;
1386				}
1387
1388				switch (dwords) {
1389				case 3: *(map + 2) = *(buf + 2);
1390				case 2: *(map + 1) = *(buf + 1);
1391				case 1: *map = *buf;
1392				}
1393
1394				map += dstPitch - (npixels << 1);
1395				buf += srcPitch - (npixels << 1);
1396			}
1397		}
1398	}
1399
1400	if (skip)
1401		return Success;
1402
1403	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE)
1404		pPriv->currentHostBuffer ^= 1;
1405
1406	/* If we're not using the hw overlay, we're rendering into a pixmap
1407	 * and need to take a couple of additional steps...
1408	 */
1409	if (!(action_flags & USE_OVERLAY)) {
1410		ppix = NVGetDrawablePixmap(pDraw);
1411
1412		/* Ensure pixmap is in offscreen memory */
1413		pNv->exa_force_cp = TRUE;
1414		exaMoveInPixmap(ppix);
1415		pNv->exa_force_cp = FALSE;
1416
1417		if (!exaGetPixmapDriverPrivate(ppix))
1418			return BadAlloc;
1419
1420#ifdef COMPOSITE
1421		/* Convert screen coords to pixmap coords */
1422		if (ppix->screen_x || ppix->screen_y) {
1423			REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
1424					 -ppix->screen_x, -ppix->screen_y);
1425			dstBox.x1 -= ppix->screen_x;
1426			dstBox.x2 -= ppix->screen_x;
1427			dstBox.y1 -= ppix->screen_y;
1428			dstBox.y2 -= ppix->screen_y;
1429		}
1430#endif
1431	}
1432
1433	if (action_flags & USE_OVERLAY) {
1434		if (pNv->Architecture == NV_ARCH_04) {
1435			NV04PutOverlayImage(pScrn, pPriv->video_mem, offset,
1436					    id, dstPitch, &dstBox, 0, 0,
1437					    xb, yb, npixels, nlines,
1438					    src_w, src_h, drw_w, drw_h,
1439					    clipBoxes);
1440		} else {
1441			NV10PutOverlayImage(pScrn, pPriv->video_mem, offset,
1442					    uv_offset, id, dstPitch, &dstBox,
1443					    0, 0, xb, yb,
1444					    npixels, nlines, src_w, src_h,
1445					    drw_w, drw_h, clipBoxes);
1446		}
1447
1448		pPriv->currentBuffer ^= 1;
1449	} else
1450	if (action_flags & USE_TEXTURE) {
1451		ret = BadImplementation;
1452
1453		if (pNv->Architecture == NV_ARCH_30) {
1454			ret = NV30PutTextureImage(pScrn, pPriv->video_mem,
1455						  offset, uv_offset,
1456						  id, dstPitch, &dstBox, 0, 0,
1457						  xb, yb, npixels, nlines,
1458						  src_w, src_h, drw_w, drw_h,
1459						  clipBoxes, ppix, pPriv);
1460		} else
1461		if (pNv->Architecture == NV_ARCH_40) {
1462			ret = NV40PutTextureImage(pScrn, pPriv->video_mem,
1463						  offset, uv_offset,
1464						  id, dstPitch, &dstBox, 0, 0,
1465						  xb, yb, npixels, nlines,
1466						  src_w, src_h, drw_w, drw_h,
1467						  clipBoxes, ppix, pPriv);
1468		} else
1469		if (pNv->Architecture == NV_TESLA) {
1470			ret = nv50_xv_image_put(pScrn, pPriv->video_mem,
1471						offset, uv_offset,
1472						id, dstPitch, &dstBox, 0, 0,
1473						xb, yb, npixels, nlines,
1474						src_w, src_h, drw_w, drw_h,
1475						clipBoxes, ppix, pPriv);
1476		} else {
1477			ret = nvc0_xv_image_put(pScrn, pPriv->video_mem,
1478						offset, uv_offset,
1479						id, dstPitch, &dstBox, 0, 0,
1480						xb, yb, npixels, nlines,
1481						src_w, src_h, drw_w, drw_h,
1482						clipBoxes, ppix, pPriv);
1483		}
1484
1485		if (ret != Success)
1486			return ret;
1487	} else {
1488		ret = NVPutBlitImage(pScrn, pPriv->video_mem, offset, id,
1489				     dstPitch, &dstBox, 0, 0, xb, yb, npixels,
1490				     nlines, src_w, src_h, drw_w, drw_h,
1491				     clipBoxes, ppix);
1492		if (ret != Success)
1493			return ret;
1494	}
1495
1496#ifdef COMPOSITE
1497	/* Damage tracking */
1498	if (!(action_flags & USE_OVERLAY))
1499		DamageDamageRegion(&ppix->drawable, clipBoxes);
1500#endif
1501
1502	return Success;
1503}
1504
1505/**
1506 * QueryImageAttributes
1507 *
1508 * calculates
1509 * - size (memory required to store image),
1510 * - pitches,
1511 * - offsets
1512 * of image
1513 * depending on colorspace (id) and dimensions (w,h) of image
1514 * values of
1515 * - w,
1516 * - h
1517 * may be adjusted as needed
1518 *
1519 * @param pScrn unused
1520 * @param id colorspace of image
1521 * @param w pointer to width of image
1522 * @param h pointer to height of image
1523 * @param pitches pitches[i] = length of a scanline in plane[i]
1524 * @param offsets offsets[i] = offset of plane i from the beginning of the image
1525 * @return size of the memory required for the XvImage queried
1526 */
1527static int
1528NVQueryImageAttributes(ScrnInfoPtr pScrn, int id,
1529		       unsigned short *w, unsigned short *h,
1530		       int *pitches, int *offsets)
1531{
1532	int size, tmp;
1533
1534	*w = (*w + 1) & ~1; // width rounded up to an even number
1535	if (offsets)
1536		offsets[0] = 0;
1537
1538	switch (id) {
1539	case FOURCC_YV12:
1540	case FOURCC_I420:
1541		*h = (*h + 1) & ~1; // height rounded up to an even number
1542		size = (*w + 3) & ~3; // width rounded up to a multiple of 4
1543		if (pitches)
1544			pitches[0] = size; // width rounded up to a multiple of 4
1545		size *= *h;
1546		if (offsets)
1547			offsets[1] = size; // number of pixels in "rounded up" image
1548		tmp = ((*w >> 1) + 3) & ~3; // width/2 rounded up to a multiple of 4
1549		if (pitches)
1550			pitches[1] = pitches[2] = tmp; // width/2 rounded up to a multiple of 4
1551		tmp *= (*h >> 1); // 1/4*number of pixels in "rounded up" image
1552		size += tmp; // 5/4*number of pixels in "rounded up" image
1553		if (offsets)
1554			offsets[2] = size; // 5/4*number of pixels in "rounded up" image
1555		size += tmp; // = 3/2*number of pixels in "rounded up" image
1556		break;
1557	case FOURCC_UYVY:
1558	case FOURCC_YUY2:
1559		size = *w << 1; // 2*width
1560		if (pitches)
1561			pitches[0] = size; // 2*width
1562		size *= *h; // 2*width*height
1563		break;
1564	case FOURCC_RGB:
1565		size = *w << 2; // 4*width (32 bit per pixel)
1566		if (pitches)
1567			pitches[0] = size; // 4*width
1568		size *= *h; // 4*width*height
1569		break;
1570	case FOURCC_AI44:
1571	case FOURCC_IA44:
1572		size = *w; // width
1573		if (pitches)
1574			pitches[0] = size; // width
1575		size *= *h; // width*height
1576		break;
1577	default:
1578		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Unknown colorspace: %x\n", id);
1579		*w = *h = size = 0;
1580		break;
1581	}
1582
1583	return size;
1584}
1585
1586/***** Exported offscreen surface stuff ****/
1587
1588
1589static int
1590NVAllocSurface(ScrnInfoPtr pScrn, int id,
1591	       unsigned short w, unsigned short h,
1592	       XF86SurfacePtr surface)
1593{
1594	NVPtr pNv = NVPTR(pScrn);
1595	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1596	int size, bpp, ret;
1597
1598	bpp = pScrn->bitsPerPixel >> 3;
1599
1600	if (pPriv->grabbedByV4L)
1601		return BadAlloc;
1602
1603	if ((w > IMAGE_MAX_W) || (h > IMAGE_MAX_H))
1604		return BadValue;
1605
1606	w = (w + 1) & ~1;
1607	pPriv->pitch = ((w << 1) + 63) & ~63;
1608	size = h * pPriv->pitch / bpp;
1609
1610	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, size,
1611				    &pPriv->video_mem);
1612	if (ret)
1613		return BadAlloc;
1614	pPriv->offset = 0;
1615
1616	surface->width = w;
1617	surface->height = h;
1618	surface->pScrn = pScrn;
1619	surface->pitches = &pPriv->pitch;
1620	surface->offsets = &pPriv->offset;
1621	surface->devPrivate.ptr = (pointer)pPriv;
1622	surface->id = id;
1623
1624	/* grab the video */
1625	NVStopOverlay(pScrn);
1626	pPriv->videoStatus = 0;
1627	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
1628	pPriv->grabbedByV4L = TRUE;
1629
1630	return Success;
1631}
1632
1633static int
1634NVStopSurface(XF86SurfacePtr surface)
1635{
1636	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1637
1638	if (pPriv->grabbedByV4L && pPriv->videoStatus) {
1639		NV10StopOverlay(surface->pScrn);
1640		pPriv->videoStatus = 0;
1641	}
1642
1643	return Success;
1644}
1645
1646static int
1647NVFreeSurface(XF86SurfacePtr surface)
1648{
1649	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1650
1651	if (pPriv->grabbedByV4L) {
1652		NVStopSurface(surface);
1653		NVFreeOverlayMemory(surface->pScrn);
1654		pPriv->grabbedByV4L = FALSE;
1655	}
1656
1657	return Success;
1658}
1659
1660static int
1661NVGetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 *value)
1662{
1663	NVPtr pNv = NVPTR(pScrn);
1664	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1665
1666	return NV10GetOverlayPortAttribute(pScrn, attribute,
1667					 value, (pointer)pPriv);
1668}
1669
1670static int
1671NVSetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 value)
1672{
1673	NVPtr pNv = NVPTR(pScrn);
1674	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1675
1676	return NV10SetOverlayPortAttribute(pScrn, attribute,
1677					 value, (pointer)pPriv);
1678}
1679
1680static int
1681NVDisplaySurface(XF86SurfacePtr surface,
1682		 short src_x, short src_y,
1683		 short drw_x, short drw_y,
1684		 short src_w, short src_h,
1685		 short drw_w, short drw_h,
1686		 RegionPtr clipBoxes)
1687{
1688	ScrnInfoPtr pScrn = surface->pScrn;
1689	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1690	INT32 xa, xb, ya, yb;
1691	BoxRec dstBox;
1692
1693	if (!pPriv->grabbedByV4L)
1694		return Success;
1695
1696	if (src_w > (drw_w << 3))
1697		drw_w = src_w >> 3;
1698	if (src_h > (drw_h << 3))
1699		drw_h = src_h >> 3;
1700
1701	/* Clip */
1702	xa = src_x;
1703	xb = src_x + src_w;
1704	ya = src_y;
1705	yb = src_y + src_h;
1706
1707	dstBox.x1 = drw_x;
1708	dstBox.x2 = drw_x + drw_w;
1709	dstBox.y1 = drw_y;
1710	dstBox.y2 = drw_y + drw_h;
1711
1712	if(!xf86XVClipVideoHelper(&dstBox, &xa, &xb, &ya, &yb, clipBoxes,
1713				  surface->width, surface->height))
1714		return Success;
1715
1716	dstBox.x1 -= pScrn->frameX0;
1717	dstBox.x2 -= pScrn->frameX0;
1718	dstBox.y1 -= pScrn->frameY0;
1719	dstBox.y2 -= pScrn->frameY0;
1720
1721	pPriv->currentBuffer = 0;
1722
1723	NV10PutOverlayImage(pScrn, pPriv->video_mem, surface->offsets[0],
1724			    0, surface->id, surface->pitches[0], &dstBox,
1725			    xa, ya, xb, yb, surface->width, surface->height,
1726			    src_w, src_h, drw_w, drw_h, clipBoxes);
1727
1728	return Success;
1729}
1730
1731/**
1732 * NVSetupBlitVideo
1733 * this function does all the work setting up a blit port
1734 *
1735 * @return blit port
1736 */
1737static XF86VideoAdaptorPtr
1738NVSetupBlitVideo (ScreenPtr pScreen)
1739{
1740	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1741	NVPtr               pNv       = NVPTR(pScrn);
1742	XF86VideoAdaptorPtr adapt;
1743	NVPortPrivPtr       pPriv;
1744	int i;
1745
1746	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1747					sizeof(NVPortPrivRec) +
1748					(sizeof(DevUnion) * NUM_BLIT_PORTS)))) {
1749		return NULL;
1750	}
1751
1752	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1753	adapt->flags		= 0;
1754	adapt->name		= "NV Video Blitter";
1755	adapt->nEncodings	= 1;
1756	adapt->pEncodings	= &DummyEncoding;
1757	adapt->nFormats		= NUM_FORMATS_ALL;
1758	adapt->pFormats		= NVFormats;
1759	adapt->nPorts		= NUM_BLIT_PORTS;
1760	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1761
1762	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_BLIT_PORTS]);
1763	for(i = 0; i < NUM_BLIT_PORTS; i++)
1764		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1765
1766	if (pNv->dev->chipset >= 0x11) {
1767		adapt->pAttributes = NVBlitAttributes;
1768		adapt->nAttributes = NUM_BLIT_ATTRIBUTES;
1769	} else {
1770		adapt->pAttributes = NULL;
1771		adapt->nAttributes = 0;
1772	}
1773
1774	adapt->pImages			= NVImages;
1775	adapt->nImages			= NUM_IMAGES_ALL;
1776	adapt->PutVideo			= NULL;
1777	adapt->PutStill			= NULL;
1778	adapt->GetVideo			= NULL;
1779	adapt->GetStill			= NULL;
1780	adapt->StopVideo		= NVStopBlitVideo;
1781	adapt->SetPortAttribute		= NVSetBlitPortAttribute;
1782	adapt->GetPortAttribute		= NVGetBlitPortAttribute;
1783	adapt->QueryBestSize		= NVQueryBestSize;
1784	adapt->PutImage			= NVPutImage;
1785	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1786
1787	pPriv->videoStatus		= 0;
1788	pPriv->grabbedByV4L		= FALSE;
1789	pPriv->blitter			= TRUE;
1790	pPriv->texture			= FALSE;
1791	pPriv->bicubic			= FALSE;
1792	pPriv->doubleBuffer		= FALSE;
1793	pPriv->SyncToVBlank		= (pNv->dev->chipset >= 0x11);
1794	pPriv->max_image_dim            = 2048;
1795
1796	pNv->blitAdaptor		= adapt;
1797
1798	return adapt;
1799}
1800
1801/**
1802 * NVSetupOverlayVideo
1803 * this function does all the work setting up an overlay port
1804 *
1805 * @return overlay port
1806 */
1807static XF86VideoAdaptorPtr
1808NVSetupOverlayVideoAdapter(ScreenPtr pScreen)
1809{
1810	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1811	NVPtr               pNv       = NVPTR(pScrn);
1812	XF86VideoAdaptorPtr adapt;
1813	NVPortPrivPtr       pPriv;
1814
1815	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1816					sizeof(NVPortPrivRec) +
1817					sizeof(DevUnion)))) {
1818		return NULL;
1819	}
1820
1821	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1822	adapt->flags		= VIDEO_OVERLAID_IMAGES;
1823	adapt->name		= "NV Video Overlay";
1824	adapt->nEncodings	= 1;
1825	adapt->pEncodings	= &DummyEncoding;
1826	adapt->nFormats		= NUM_FORMATS_ALL;
1827	adapt->pFormats		= NVFormats;
1828	adapt->nPorts		= 1;
1829	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1830
1831	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[1]);
1832	adapt->pPortPrivates[0].ptr	= (pointer)(pPriv);
1833
1834	adapt->pAttributes		= (pNv->Architecture != NV_ARCH_04) ? NV10OverlayAttributes : NV04OverlayAttributes;
1835	adapt->nAttributes		= (pNv->Architecture != NV_ARCH_04) ? NUM_NV10_OVERLAY_ATTRIBUTES : NUM_NV04_OVERLAY_ATTRIBUTES;
1836	adapt->pImages			= NVImages;
1837	adapt->nImages			= NUM_IMAGES_YUV;
1838	adapt->PutVideo			= NULL;
1839	adapt->PutStill			= NULL;
1840	adapt->GetVideo			= NULL;
1841	adapt->GetStill			= NULL;
1842	adapt->StopVideo		= NVStopOverlayVideo;
1843	adapt->SetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10SetOverlayPortAttribute : NV04SetOverlayPortAttribute;
1844	adapt->GetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10GetOverlayPortAttribute : NV04GetOverlayPortAttribute;
1845	adapt->QueryBestSize		= NVQueryBestSize;
1846	adapt->PutImage			= NVPutImage;
1847	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1848
1849	pPriv->videoStatus		= 0;
1850	pPriv->currentBuffer		= 0;
1851	pPriv->grabbedByV4L		= FALSE;
1852	pPriv->blitter			= FALSE;
1853	pPriv->texture			= FALSE;
1854	pPriv->bicubic			= FALSE;
1855	pPriv->max_image_dim            = 2048;
1856
1857	NVSetPortDefaults (pScrn, pPriv);
1858
1859	/* gotta uninit this someplace */
1860	REGION_NULL(pScreen, &pPriv->clip);
1861
1862	pNv->overlayAdaptor	= adapt;
1863
1864	xvBrightness		= MAKE_ATOM("XV_BRIGHTNESS");
1865	xvColorKey		= MAKE_ATOM("XV_COLORKEY");
1866	xvAutopaintColorKey     = MAKE_ATOM("XV_AUTOPAINT_COLORKEY");
1867	xvSetDefaults           = MAKE_ATOM("XV_SET_DEFAULTS");
1868
1869	if ( pNv->Architecture != NV_ARCH_04 )
1870		{
1871		xvDoubleBuffer		= MAKE_ATOM("XV_DOUBLE_BUFFER");
1872		xvContrast		= MAKE_ATOM("XV_CONTRAST");
1873		xvSaturation		= MAKE_ATOM("XV_SATURATION");
1874		xvHue			= MAKE_ATOM("XV_HUE");
1875		xvITURBT709		= MAKE_ATOM("XV_ITURBT_709");
1876		xvOnCRTCNb		= MAKE_ATOM("XV_ON_CRTC_NB");
1877		NV10WriteOverlayParameters(pScrn);
1878		}
1879
1880	return adapt;
1881}
1882
1883
1884XF86OffscreenImageRec NVOffscreenImages[2] = {
1885	{
1886		&NVImages[0],
1887		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1888		NVAllocSurface,
1889		NVFreeSurface,
1890		NVDisplaySurface,
1891		NVStopSurface,
1892		NVGetSurfaceAttribute,
1893		NVSetSurfaceAttribute,
1894		IMAGE_MAX_W, IMAGE_MAX_H,
1895		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1896		&NV10OverlayAttributes[1]
1897	},
1898	{
1899		&NVImages[2],
1900		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1901		NVAllocSurface,
1902		NVFreeSurface,
1903		NVDisplaySurface,
1904		NVStopSurface,
1905		NVGetSurfaceAttribute,
1906		NVSetSurfaceAttribute,
1907		IMAGE_MAX_W, IMAGE_MAX_H,
1908		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1909		&NV10OverlayAttributes[1]
1910	}
1911};
1912
1913static void
1914NVInitOffscreenImages (ScreenPtr pScreen)
1915{
1916	xf86XVRegisterOffscreenImages(pScreen, NVOffscreenImages, 2);
1917}
1918
1919/**
1920 * NVChipsetHasOverlay
1921 *
1922 * newer chips don't support overlay anymore.
1923 * overlay feature is emulated via textures.
1924 *
1925 * @param pNv
1926 * @return true, if chipset supports overlay
1927 */
1928static Bool
1929NVChipsetHasOverlay(NVPtr pNv)
1930{
1931	switch (pNv->Architecture) {
1932	case NV_ARCH_04: /*NV04 has a different overlay than NV10+*/
1933	case NV_ARCH_10:
1934	case NV_ARCH_20:
1935	case NV_ARCH_30:
1936		return TRUE;
1937	case NV_ARCH_40:
1938		if (pNv->dev->chipset == 0x40)
1939			return TRUE;
1940		break;
1941	default:
1942		break;
1943	}
1944
1945	return FALSE;
1946}
1947
1948/**
1949 * NVSetupOverlayVideo
1950 * check if chipset supports Overla
1951 * if so, setup overlay port
1952 *
1953 * @return overlay port
1954 * @see NVChipsetHasOverlay(NVPtr pNv)
1955 * @see NV10SetupOverlayVideo(ScreenPtr pScreen)
1956 * @see NVInitOffscreenImages(ScreenPtr pScreen)
1957 */
1958static XF86VideoAdaptorPtr
1959NVSetupOverlayVideo(ScreenPtr pScreen)
1960{
1961	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
1962	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
1963	NVPtr                pNv   = NVPTR(pScrn);
1964
1965	if (1 /*pNv->kms_enable*/ || !NVChipsetHasOverlay(pNv))
1966		return NULL;
1967
1968	overlayAdaptor = NVSetupOverlayVideoAdapter(pScreen);
1969	/* I am not sure what this call does. */
1970	if (overlayAdaptor && pNv->Architecture != NV_ARCH_04 )
1971		NVInitOffscreenImages(pScreen);
1972
1973	#ifdef COMPOSITE
1974	if (!noCompositeExtension) {
1975		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1976			   "Xv: Composite is enabled, enabling overlay with "
1977			   "smart blitter fallback\n");
1978		overlayAdaptor->name = "NV Video Overlay with Composite";
1979	}
1980	#endif
1981
1982	return overlayAdaptor;
1983}
1984
1985/**
1986 * NV30 texture adapter.
1987 */
1988
1989#define NUM_FORMAT_TEXTURED 2
1990
1991static XF86ImageRec NV30TexturedImages[NUM_FORMAT_TEXTURED] =
1992{
1993	XVIMAGE_YV12,
1994	XVIMAGE_I420,
1995};
1996
1997/**
1998 * NV30SetupTexturedVideo
1999 * this function does all the work setting up textured video port
2000 *
2001 * @return texture port
2002 */
2003static XF86VideoAdaptorPtr
2004NV30SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
2005{
2006	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2007	NVPtr pNv = NVPTR(pScrn);
2008	XF86VideoAdaptorPtr adapt;
2009	NVPortPrivPtr pPriv;
2010	int i;
2011
2012	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2013				 sizeof(NVPortPrivRec) +
2014				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2015		return NULL;
2016	}
2017
2018	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2019	adapt->flags		= 0;
2020	if (bicubic)
2021		adapt->name		= "NV30 high quality adapter";
2022	else
2023		adapt->name		= "NV30 texture adapter";
2024	adapt->nEncodings	= 1;
2025	adapt->pEncodings	= &DummyEncodingTex;
2026	adapt->nFormats		= NUM_FORMATS_ALL;
2027	adapt->pFormats		= NVFormats;
2028	adapt->nPorts		= NUM_TEXTURE_PORTS;
2029	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2030
2031	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2032	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2033		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2034
2035	adapt->pAttributes		= NVTexturedAttributes;
2036	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
2037	adapt->pImages			= NV30TexturedImages;
2038	adapt->nImages			= NUM_FORMAT_TEXTURED;
2039	adapt->PutVideo			= NULL;
2040	adapt->PutStill			= NULL;
2041	adapt->GetVideo			= NULL;
2042	adapt->GetStill			= NULL;
2043	adapt->StopVideo		= NV30StopTexturedVideo;
2044	adapt->SetPortAttribute		= NV30SetTexturePortAttribute;
2045	adapt->GetPortAttribute		= NV30GetTexturePortAttribute;
2046	adapt->QueryBestSize		= NVQueryBestSize;
2047	adapt->PutImage			= NVPutImage;
2048	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2049
2050	pPriv->videoStatus		= 0;
2051	pPriv->grabbedByV4L		= FALSE;
2052	pPriv->blitter			= FALSE;
2053	pPriv->texture			= TRUE;
2054	pPriv->bicubic			= bicubic;
2055	pPriv->doubleBuffer		= FALSE;
2056	pPriv->SyncToVBlank		= TRUE;
2057	pPriv->max_image_dim            = 4096;
2058
2059	if (bicubic)
2060		pNv->textureAdaptor[1]	= adapt;
2061	else
2062		pNv->textureAdaptor[0]	= adapt;
2063
2064	return adapt;
2065}
2066
2067/**
2068 * NV40 texture adapter.
2069 */
2070
2071#define NUM_FORMAT_TEXTURED 2
2072
2073static XF86ImageRec NV40TexturedImages[NUM_FORMAT_TEXTURED] =
2074{
2075	XVIMAGE_YV12,
2076	XVIMAGE_I420,
2077};
2078
2079/**
2080 * NV40SetupTexturedVideo
2081 * this function does all the work setting up textured video port
2082 *
2083 * @return texture port
2084 */
2085static XF86VideoAdaptorPtr
2086NV40SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
2087{
2088	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2089	NVPtr pNv = NVPTR(pScrn);
2090	XF86VideoAdaptorPtr adapt;
2091	NVPortPrivPtr pPriv;
2092	int i;
2093
2094	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2095				 sizeof(NVPortPrivRec) +
2096				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2097		return NULL;
2098	}
2099
2100	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2101	adapt->flags		= 0;
2102	if (bicubic)
2103		adapt->name		= "NV40 high quality adapter";
2104	else
2105		adapt->name		= "NV40 texture adapter";
2106	adapt->nEncodings	= 1;
2107	adapt->pEncodings	= &DummyEncodingTex;
2108	adapt->nFormats		= NUM_FORMATS_ALL;
2109	adapt->pFormats		= NVFormats;
2110	adapt->nPorts		= NUM_TEXTURE_PORTS;
2111	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2112
2113	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2114	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2115		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2116
2117	adapt->pAttributes		= NVTexturedAttributes;
2118	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
2119	adapt->pImages			= NV40TexturedImages;
2120	adapt->nImages			= NUM_FORMAT_TEXTURED;
2121	adapt->PutVideo			= NULL;
2122	adapt->PutStill			= NULL;
2123	adapt->GetVideo			= NULL;
2124	adapt->GetStill			= NULL;
2125	adapt->StopVideo		= NV40StopTexturedVideo;
2126	adapt->SetPortAttribute		= NV40SetTexturePortAttribute;
2127	adapt->GetPortAttribute		= NV40GetTexturePortAttribute;
2128	adapt->QueryBestSize		= NVQueryBestSize;
2129	adapt->PutImage			= NVPutImage;
2130	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2131
2132	pPriv->videoStatus		= 0;
2133	pPriv->grabbedByV4L		= FALSE;
2134	pPriv->blitter			= FALSE;
2135	pPriv->texture			= TRUE;
2136	pPriv->bicubic			= bicubic;
2137	pPriv->doubleBuffer		= FALSE;
2138	pPriv->SyncToVBlank		= TRUE;
2139	pPriv->max_image_dim            = 4096;
2140
2141	if (bicubic)
2142		pNv->textureAdaptor[1]	= adapt;
2143	else
2144		pNv->textureAdaptor[0]	= adapt;
2145
2146	return adapt;
2147}
2148
2149static XF86ImageRec
2150NV50TexturedImages[] =
2151{
2152	XVIMAGE_YV12,
2153	XVIMAGE_I420,
2154	XVIMAGE_YUY2,
2155	XVIMAGE_UYVY
2156};
2157
2158static XF86VideoAdaptorPtr
2159NV50SetupTexturedVideo (ScreenPtr pScreen)
2160{
2161	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2162	NVPtr pNv = NVPTR(pScrn);
2163	XF86VideoAdaptorPtr adapt;
2164	NVPortPrivPtr pPriv;
2165	int i;
2166
2167	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2168				 sizeof(NVPortPrivRec) +
2169				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2170		return NULL;
2171	}
2172
2173	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2174	adapt->flags		= 0;
2175	adapt->name		= "Nouveau GeForce 8/9 Textured Video";
2176	adapt->nEncodings	= 1;
2177	adapt->pEncodings	= &DummyEncodingNV50;
2178	adapt->nFormats		= NUM_FORMATS_NV50;
2179	adapt->pFormats		= NV50Formats;
2180	adapt->nPorts		= NUM_TEXTURE_PORTS;
2181	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2182
2183	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2184	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2185		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2186
2187	adapt->pAttributes		= NVTexturedAttributesNV50;
2188	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES_NV50;
2189	adapt->pImages			= NV50TexturedImages;
2190	adapt->nImages			= sizeof(NV50TexturedImages) /
2191					  sizeof(NV50TexturedImages[0]);
2192	adapt->PutVideo			= NULL;
2193	adapt->PutStill			= NULL;
2194	adapt->GetVideo			= NULL;
2195	adapt->GetStill			= NULL;
2196	adapt->StopVideo		= nv50_xv_video_stop;
2197	adapt->SetPortAttribute		= nv50_xv_port_attribute_set;
2198	adapt->GetPortAttribute		= nv50_xv_port_attribute_get;
2199	adapt->QueryBestSize		= NVQueryBestSize;
2200	adapt->PutImage			= NVPutImage;
2201	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2202
2203	pNv->textureAdaptor[0]		= adapt;
2204
2205	nv50_xv_set_port_defaults(pScrn, pPriv);
2206	nv50_xv_csc_update(pScrn, pPriv);
2207
2208	xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
2209	xvContrast   = MAKE_ATOM("XV_CONTRAST");
2210	xvSaturation = MAKE_ATOM("XV_SATURATION");
2211	xvHue        = MAKE_ATOM("XV_HUE");
2212	xvITURBT709  = MAKE_ATOM("XV_ITURBT_709");
2213	return adapt;
2214}
2215
2216static void
2217NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor)
2218{
2219	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2220	NVPtr                pNv = NVPTR(pScrn);
2221
2222	if (!pNv->Nv3D)
2223		return;
2224
2225	if (pNv->Architecture == NV_ARCH_30) {
2226		textureAdaptor[0] = NV30SetupTexturedVideo(pScreen, FALSE);
2227		textureAdaptor[1] = NV30SetupTexturedVideo(pScreen, TRUE);
2228	} else
2229	if (pNv->Architecture == NV_ARCH_40) {
2230		textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE);
2231		textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE);
2232	} else
2233	if (pNv->Architecture >= NV_TESLA) {
2234		textureAdaptor[0] = NV50SetupTexturedVideo(pScreen);
2235	}
2236}
2237
2238/**
2239 * NVInitVideo
2240 * tries to initialize the various supported adapters
2241 * and add them to the list of ports on screen "pScreen".
2242 *
2243 * @param pScreen
2244 * @see NVSetupOverlayVideo(ScreenPtr pScreen)
2245 * @see NVSetupBlitVideo(ScreenPtr pScreen)
2246 */
2247void
2248NVInitVideo(ScreenPtr pScreen)
2249{
2250	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2251	NVPtr                pNv = NVPTR(pScrn);
2252	XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
2253	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
2254	XF86VideoAdaptorPtr  blitAdaptor = NULL;
2255	XF86VideoAdaptorPtr  textureAdaptor[2] = {NULL, NULL};
2256	int                  num_adaptors;
2257
2258	/*
2259	 * Driving the blitter requires the DMA FIFO. Using the FIFO
2260	 * without accel causes DMA errors. While the overlay might
2261	 * might work without accel, we also disable it for now when
2262	 * acceleration is disabled:
2263	 */
2264	if (pScrn->bitsPerPixel != 8 && pNv->AccelMethod == EXA) {
2265		xvSyncToVBlank = MAKE_ATOM("XV_SYNC_TO_VBLANK");
2266
2267		if (pNv->Architecture < NV_TESLA) {
2268			overlayAdaptor = NVSetupOverlayVideo(pScreen);
2269			blitAdaptor    = NVSetupBlitVideo(pScreen);
2270		}
2271
2272		NVSetupTexturedVideo(pScreen, textureAdaptor);
2273	}
2274
2275	num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
2276	if (blitAdaptor || overlayAdaptor || textureAdaptor[0]) {
2277		int size = num_adaptors;
2278
2279		if(overlayAdaptor) size++;
2280		if(blitAdaptor)    size++;
2281		if(textureAdaptor[0]) size++;
2282		if(textureAdaptor[1]) size++;
2283
2284		newAdaptors = malloc(size * sizeof(XF86VideoAdaptorPtr *));
2285		if(newAdaptors) {
2286			if(num_adaptors) {
2287				memcpy(newAdaptors, adaptors, num_adaptors *
2288						sizeof(XF86VideoAdaptorPtr));
2289			}
2290
2291			if(overlayAdaptor) {
2292				newAdaptors[num_adaptors] = overlayAdaptor;
2293				num_adaptors++;
2294			}
2295
2296			if (textureAdaptor[0]) { /* bilinear */
2297				newAdaptors[num_adaptors] = textureAdaptor[0];
2298				num_adaptors++;
2299			}
2300
2301			if (textureAdaptor[1]) { /* bicubic */
2302				newAdaptors[num_adaptors] = textureAdaptor[1];
2303				num_adaptors++;
2304			}
2305
2306			if(blitAdaptor) {
2307				newAdaptors[num_adaptors] = blitAdaptor;
2308				num_adaptors++;
2309			}
2310
2311			adaptors = newAdaptors;
2312		}
2313	}
2314
2315	if (num_adaptors)
2316		xf86XVScreenInit(pScreen, adaptors, num_adaptors);
2317	if (newAdaptors)
2318		free(newAdaptors);
2319
2320	/*
2321	 * For now we associate with the plain texture adapter since it is logical, but we can
2322	 * associate with any/all adapters since VL doesn't depend on Xv for color conversion.
2323	 */
2324	if (textureAdaptor[0]) {
2325		XF86MCAdaptorPtr *adaptorsXvMC = malloc(sizeof(XF86MCAdaptorPtr));
2326
2327		if (adaptorsXvMC) {
2328			adaptorsXvMC[0] = vlCreateAdaptorXvMC(pScreen, (char *)textureAdaptor[0]->name);
2329
2330			if (adaptorsXvMC[0]) {
2331				vlInitXvMC(pScreen, 1, adaptorsXvMC);
2332				vlDestroyAdaptorXvMC(adaptorsXvMC[0]);
2333			}
2334
2335			free(adaptorsXvMC);
2336		}
2337	}
2338}
2339
2340void
2341NVTakedownVideo(ScrnInfoPtr pScrn)
2342{
2343	NVPtr pNv = NVPTR(pScrn);
2344
2345	if (pNv->blitAdaptor)
2346		NVFreePortMemory(pScrn, GET_BLIT_PRIVATE(pNv));
2347	if (pNv->textureAdaptor[0]) {
2348		NVFreePortMemory(pScrn,
2349				 pNv->textureAdaptor[0]->pPortPrivates[0].ptr);
2350	}
2351	if (pNv->textureAdaptor[1]) {
2352		NVFreePortMemory(pScrn,
2353				 pNv->textureAdaptor[1]->pPortPrivates[0].ptr);
2354	}
2355}
2356
2357/* The filtering function used for video scaling. We use a cubic filter as
2358 * defined in  "Reconstruction Filters in Computer Graphics" Mitchell &
2359 * Netravali in SIGGRAPH '88
2360 */
2361static float filter_func(float x)
2362{
2363	const double B=0.75;
2364	const double C=(1.0-B)/2.0;
2365	double x1=fabs(x);
2366	double x2=fabs(x)*x1;
2367	double x3=fabs(x)*x2;
2368
2369	if (fabs(x)<1.0)
2370		return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0;
2371	else
2372		return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
2373}
2374
2375static int8_t f32tosb8(float v)
2376{
2377	return (int8_t)(v*127.0);
2378}
2379
2380void
2381NVXVComputeBicubicFilter(struct nouveau_bo *bo, unsigned offset, unsigned size)
2382{
2383	int8_t *t = (int8_t *)(bo->map + offset);
2384	int i;
2385
2386	for(i = 0; i < size; i++) {
2387		float  x = (i + 0.5) / size;
2388		float w0 = filter_func(x+1.0);
2389		float w1 = filter_func(x);
2390		float w2 = filter_func(x-1.0);
2391		float w3 = filter_func(x-2.0);
2392
2393		t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
2394		t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
2395		t[4*i+0]=f32tosb8(w0+w1);
2396		t[4*i+3]=f32tosb8(0.0);
2397	}
2398}
2399