nouveau_xv.c revision 22d74663
1/*
2 * Copyright 2007 Arthur Huillet
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#ifdef __SSE2__
29#include <immintrin.h>
30#endif
31
32#include "xorg-config.h"
33#include "xf86xv.h"
34#include <X11/extensions/Xv.h>
35#include "exa.h"
36#include "damage.h"
37#include "dixstruct.h"
38#include "fourcc.h"
39
40#include "nv_include.h"
41#include "nv_dma.h"
42
43#include "vl_hwmc.h"
44
45#include "hwdefs/nv_m2mf.xml.h"
46
47#define IMAGE_MAX_W 2046
48#define IMAGE_MAX_H 2046
49
50#define TEX_IMAGE_MAX_W 4096
51#define TEX_IMAGE_MAX_H 4096
52
53#define OFF_DELAY 	500  /* milliseconds */
54#define FREE_DELAY 	5000
55
56#define NUM_BLIT_PORTS 16
57#define NUM_TEXTURE_PORTS 32
58
59
60#define NVStopOverlay(X) (((pNv->Architecture == NV_ARCH_04) ? NV04StopOverlay(X) : NV10StopOverlay(X)))
61
62/* Value taken by pPriv -> currentHostBuffer when we failed to allocate the two private buffers in TT memory, so that we can catch this case
63and attempt no other allocation afterwards (performance reasons) */
64#define NO_PRIV_HOST_BUFFER_AVAILABLE 9999
65
66/* NVPutImage action flags */
67enum {
68	IS_YV12 = 1,
69	IS_YUY2 = 2,
70	CONVERT_TO_YUY2=4,
71	USE_OVERLAY=8,
72	USE_TEXTURE=16,
73	SWAP_UV=32,
74	IS_RGB=64, //I am not sure how long we will support it
75};
76
77#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
78
79Atom xvBrightness, xvContrast, xvColorKey, xvSaturation;
80Atom xvHue, xvAutopaintColorKey, xvSetDefaults, xvDoubleBuffer;
81Atom xvITURBT709, xvSyncToVBlank, xvOnCRTCNb;
82
83/* client libraries expect an encoding */
84static XF86VideoEncodingRec DummyEncoding =
85{
86	0,
87	"XV_IMAGE",
88	IMAGE_MAX_W, IMAGE_MAX_H,
89	{1, 1}
90};
91
92static XF86VideoEncodingRec DummyEncodingTex =
93{
94	0,
95	"XV_IMAGE",
96	TEX_IMAGE_MAX_W, TEX_IMAGE_MAX_H,
97	{1, 1}
98};
99
100static XF86VideoEncodingRec DummyEncodingNV50 =
101{
102	0,
103	"XV_IMAGE",
104	8192, 8192,
105	{1, 1}
106};
107
108#define NUM_FORMATS_ALL 6
109
110XF86VideoFormatRec NVFormats[NUM_FORMATS_ALL] =
111{
112	{15, TrueColor}, {16, TrueColor}, {24, TrueColor},
113	{15, DirectColor}, {16, DirectColor}, {24, DirectColor}
114};
115
116#define NUM_FORMATS_NV50 8
117XF86VideoFormatRec NV50Formats[NUM_FORMATS_NV50] =
118{
119	{15, TrueColor}, {16, TrueColor}, {24, TrueColor}, {30, TrueColor},
120	{15, DirectColor}, {16, DirectColor}, {24, DirectColor}, {30, DirectColor}
121};
122
123#define NUM_NV04_OVERLAY_ATTRIBUTES 4
124XF86AttributeRec NV04OverlayAttributes[NUM_NV04_OVERLAY_ATTRIBUTES] =
125{
126	    {XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
127	    {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
128	    {XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
129	    {XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
130};
131
132
133#define NUM_NV10_OVERLAY_ATTRIBUTES 10
134XF86AttributeRec NV10OverlayAttributes[NUM_NV10_OVERLAY_ATTRIBUTES] =
135{
136	{XvSettable | XvGettable, 0, 1, "XV_DOUBLE_BUFFER"},
137	{XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
138	{XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
139	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
140	{XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
141	{XvSettable | XvGettable, 0, 8191, "XV_CONTRAST"},
142	{XvSettable | XvGettable, 0, 8191, "XV_SATURATION"},
143	{XvSettable | XvGettable, 0, 360, "XV_HUE"},
144	{XvSettable | XvGettable, 0, 1, "XV_ITURBT_709"},
145	{XvSettable | XvGettable, 0, 1, "XV_ON_CRTC_NB"},
146};
147
148#define NUM_BLIT_ATTRIBUTES 2
149XF86AttributeRec NVBlitAttributes[NUM_BLIT_ATTRIBUTES] =
150{
151	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
152	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
153};
154
155#define NUM_TEXTURED_ATTRIBUTES 2
156XF86AttributeRec NVTexturedAttributes[NUM_TEXTURED_ATTRIBUTES] =
157{
158	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
159	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
160};
161
162#define NUM_TEXTURED_ATTRIBUTES_NV50 7
163XF86AttributeRec NVTexturedAttributesNV50[NUM_TEXTURED_ATTRIBUTES_NV50] =
164{
165	{ XvSettable             , 0, 0, "XV_SET_DEFAULTS" },
166	{ XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK" },
167	{ XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS" },
168	{ XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST" },
169	{ XvSettable | XvGettable, -1000, 1000, "XV_SATURATION" },
170	{ XvSettable | XvGettable, -1000, 1000, "XV_HUE" },
171	{ XvSettable | XvGettable, 0, 1, "XV_ITURBT_709" }
172};
173
174#define NUM_IMAGES_YUV 4
175#define NUM_IMAGES_ALL 5
176
177#define FOURCC_RGB 0x0000003
178#define XVIMAGE_RGB \
179   { \
180        FOURCC_RGB, \
181        XvRGB, \
182        LSBFirst, \
183        { 0x03, 0x00, 0x00, 0x00, \
184          0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \
185        32, \
186        XvPacked, \
187        1, \
188        24, 0x00ff0000, 0x0000ff00, 0x000000ff, \
189        0, 0, 0, \
190        0, 0, 0, \
191        0, 0, 0, \
192        {'B','G','R','X',\
193          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \
194        XvTopToBottom \
195   }
196
197static XF86ImageRec NVImages[NUM_IMAGES_ALL] =
198{
199	XVIMAGE_YUY2,
200	XVIMAGE_YV12,
201	XVIMAGE_UYVY,
202	XVIMAGE_I420,
203	XVIMAGE_RGB
204};
205
206static void
207nouveau_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
208{
209    dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
210    dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
211    dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
212    dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
213
214    if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
215	dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
216}
217
218static void
219nouveau_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box)
220{
221    if (crtc->enabled) {
222	crtc_box->x1 = crtc->x;
223	crtc_box->x2 = crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation);
224	crtc_box->y1 = crtc->y;
225	crtc_box->y2 = crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation);
226    } else
227	crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0;
228}
229
230static int
231nouveau_box_area(BoxPtr box)
232{
233    return (int) (box->x2 - box->x1) * (int) (box->y2 - box->y1);
234}
235
236xf86CrtcPtr
237nouveau_pick_best_crtc(ScrnInfoPtr pScrn, Bool consider_disabled,
238                       int x, int y, int w, int h)
239{
240    xf86CrtcConfigPtr   xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
241    int                 coverage, best_coverage, c;
242    BoxRec              box, crtc_box, cover_box;
243    RROutputPtr         primary_output = NULL;
244    xf86CrtcPtr         best_crtc = NULL, primary_crtc = NULL;
245
246    if (!pScrn->vtSema)
247	return NULL;
248
249    box.x1 = x;
250    box.x2 = x + w;
251    box.y1 = y;
252    box.y2 = y + h;
253    best_coverage = 0;
254
255    /* Prefer the CRTC of the primary output */
256#ifdef HAS_DIXREGISTERPRIVATEKEY
257    if (dixPrivateKeyRegistered(rrPrivKey))
258#endif
259    {
260	primary_output = RRFirstOutput(pScrn->pScreen);
261    }
262    if (primary_output && primary_output->crtc)
263	primary_crtc = primary_output->crtc->devPrivate;
264
265    /* first consider only enabled CRTCs */
266    for (c = 0; c < xf86_config->num_crtc; c++) {
267	xf86CrtcPtr crtc = xf86_config->crtc[c];
268
269	if (!crtc->enabled)
270	    continue;
271
272	nouveau_crtc_box(crtc, &crtc_box);
273	nouveau_box_intersect(&cover_box, &crtc_box, &box);
274	coverage = nouveau_box_area(&cover_box);
275	if (coverage > best_coverage ||
276	    (coverage == best_coverage && crtc == primary_crtc)) {
277	    best_crtc = crtc;
278	    best_coverage = coverage;
279	}
280    }
281    if (best_crtc || !consider_disabled)
282	return best_crtc;
283
284    /* if we found nothing, repeat the search including disabled CRTCs */
285    for (c = 0; c < xf86_config->num_crtc; c++) {
286	xf86CrtcPtr crtc = xf86_config->crtc[c];
287
288	nouveau_crtc_box(crtc, &crtc_box);
289	nouveau_box_intersect(&cover_box, &crtc_box, &box);
290	coverage = nouveau_box_area(&cover_box);
291	if (coverage > best_coverage ||
292	    (coverage == best_coverage && crtc == primary_crtc)) {
293	    best_crtc = crtc;
294	    best_coverage = coverage;
295	}
296    }
297    return best_crtc;
298}
299
300unsigned int
301nv_window_belongs_to_crtc(ScrnInfoPtr pScrn, int x, int y, int w, int h)
302{
303	xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
304	unsigned int mask = 0;
305	int i;
306
307	for (i = 0; i < xf86_config->num_crtc; i++) {
308		xf86CrtcPtr crtc = xf86_config->crtc[i];
309
310		if (!drmmode_crtc_on(crtc))
311			continue;
312
313		if ((x < (crtc->x + crtc->mode.HDisplay)) &&
314		    (y < (crtc->y + crtc->mode.VDisplay)) &&
315		    ((x + w) > crtc->x) &&
316		    ((y + h) > crtc->y))
317		    mask |= 1 << i;
318	}
319
320	return mask;
321}
322
323/**
324 * NVSetPortDefaults
325 * set attributes of port "pPriv" to compiled-in (except for colorKey) defaults
326 * this function does not care about the kind of adapter the port is for
327 *
328 * @param pScrn screen to get the default colorKey from
329 * @param pPriv port to reset to defaults
330 */
331void
332NVSetPortDefaults (ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
333{
334	NVPtr pNv = NVPTR(pScrn);
335
336	pPriv->brightness		= 0;
337	pPriv->contrast			= 4096;
338	pPriv->saturation		= 4096;
339	pPriv->hue			= 0;
340	pPriv->colorKey			= pNv->videoKey;
341	pPriv->autopaintColorKey	= TRUE;
342	pPriv->doubleBuffer		= pNv->Architecture != NV_ARCH_04;
343	pPriv->iturbt_709		= FALSE;
344	pPriv->currentHostBuffer	= 0;
345}
346
347static int
348nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size,
349		      struct nouveau_bo **pbo)
350{
351	union nouveau_bo_config config = {};
352	NVPtr pNv = NVPTR(pScrn);
353
354	if (*pbo) {
355		if ((*pbo)->size >= size)
356			return 0;
357		nouveau_bo_ref(NULL, pbo);
358	}
359
360	if (flags & NOUVEAU_BO_VRAM) {
361		if (pNv->Architecture == NV_TESLA)
362			config.nv50.memtype = 0x70;
363		else
364		if (pNv->Architecture >= NV_FERMI)
365			config.nvc0.memtype = 0xfe;
366	}
367	flags |= NOUVEAU_BO_MAP;
368
369	return nouveau_bo_new(pNv->dev, flags, 0, size, &config, pbo);
370}
371
372/**
373 * NVFreePortMemory
374 * frees memory held by a given port
375 *
376 * @param pScrn screen whose port wants to free memory
377 * @param pPriv port to free memory of
378 */
379static void
380NVFreePortMemory(ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
381{
382	nouveau_bo_ref(NULL, &pPriv->video_mem);
383	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
384	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[1]);
385}
386
387/**
388 * NVFreeOverlayMemory
389 * frees memory held by the overlay port
390 *
391 * @param pScrn screen whose overlay port wants to free memory
392 */
393static void
394NVFreeOverlayMemory(ScrnInfoPtr pScrn)
395{
396	NVPtr	pNv = NVPTR(pScrn);
397	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
398
399	NVFreePortMemory(pScrn, pPriv);
400#if NVOVL_SUPPORT
401	/* "power cycle" the overlay */
402	nvWriteMC(pNv, NV_PMC_ENABLE,
403		  (nvReadMC(pNv, NV_PMC_ENABLE) & 0xEFFFFFFF));
404	nvWriteMC(pNv, NV_PMC_ENABLE,
405		  (nvReadMC(pNv, NV_PMC_ENABLE) | 0x10000000));
406#endif
407}
408
409/**
410 * NVFreeBlitMemory
411 * frees memory held by the blit port
412 *
413 * @param pScrn screen whose blit port wants to free memory
414 */
415static void
416NVFreeBlitMemory(ScrnInfoPtr pScrn)
417{
418	NVPtr	pNv = NVPTR(pScrn);
419	NVPortPrivPtr pPriv = GET_BLIT_PRIVATE(pNv);
420
421	NVFreePortMemory(pScrn, pPriv);
422}
423
424/**
425 * NVVideoTimerCallback
426 * callback function which perform cleanup tasks (stop overlay, free memory).
427 * within the driver
428 * purpose and use is unknown
429 */
430void
431NVVideoTimerCallback(ScrnInfoPtr pScrn, Time currentTime)
432{
433	NVPtr         pNv = NVPTR(pScrn);
434	NVPortPrivPtr pOverPriv = NULL;
435	NVPortPrivPtr pBlitPriv = NULL;
436	Bool needCallback = FALSE;
437
438	if (!pScrn->vtSema)
439		return;
440
441	if (pNv->overlayAdaptor) {
442		pOverPriv = GET_OVERLAY_PRIVATE(pNv);
443		if (!pOverPriv->videoStatus)
444			pOverPriv = NULL;
445	}
446
447	if (pNv->blitAdaptor) {
448		pBlitPriv = GET_BLIT_PRIVATE(pNv);
449		if (!pBlitPriv->videoStatus)
450			pBlitPriv = NULL;
451	}
452
453	if (pOverPriv) {
454		if (pOverPriv->videoTime < currentTime) {
455			if (pOverPriv->videoStatus & OFF_TIMER) {
456				NVStopOverlay(pScrn);
457				pOverPriv->videoStatus = FREE_TIMER;
458				pOverPriv->videoTime = currentTime + FREE_DELAY;
459				needCallback = TRUE;
460			} else
461			if (pOverPriv->videoStatus & FREE_TIMER) {
462				NVFreeOverlayMemory(pScrn);
463				pOverPriv->videoStatus = 0;
464			}
465		} else {
466			needCallback = TRUE;
467		}
468	}
469
470	if (pBlitPriv) {
471		if (pBlitPriv->videoTime < currentTime) {
472			NVFreeBlitMemory(pScrn);
473			pBlitPriv->videoStatus = 0;
474		} else {
475			needCallback = TRUE;
476		}
477	}
478
479	pNv->VideoTimerCallback = needCallback ? NVVideoTimerCallback : NULL;
480}
481
482#ifndef ExaOffscreenMarkUsed
483extern void ExaOffscreenMarkUsed(PixmapPtr);
484#endif
485
486/*
487 * StopVideo
488 */
489static void
490NVStopOverlayVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
491{
492	NVPtr         pNv   = NVPTR(pScrn);
493	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
494
495	if (pPriv->grabbedByV4L)
496		return;
497
498	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
499
500	if(Exit) {
501		if (pPriv->videoStatus & CLIENT_VIDEO_ON)
502			NVStopOverlay(pScrn);
503		NVFreeOverlayMemory(pScrn);
504		pPriv->videoStatus = 0;
505	} else {
506		if (pPriv->videoStatus & CLIENT_VIDEO_ON) {
507			pPriv->videoStatus = OFF_TIMER | CLIENT_VIDEO_ON;
508			pPriv->videoTime = currentTime.milliseconds + OFF_DELAY;
509			pNv->VideoTimerCallback = NVVideoTimerCallback;
510		}
511	}
512}
513
514/**
515 * QueryBestSize
516 * used by client applications to ask the driver:
517 * how would you actually scale a video of dimensions
518 * vid_w, vid_h, if i wanted you to scale it to dimensions
519 * drw_w, drw_h?
520 * function stores actual scaling size in pointers p_w, p_h.
521 *
522 *
523 * @param pScrn unused
524 * @param motion unused
525 * @param vid_w width of source video
526 * @param vid_h height of source video
527 * @param drw_w desired scaled width as requested by client
528 * @param drw_h desired scaled height as requested by client
529 * @param p_w actual scaled width as the driver is capable of
530 * @param p_h actual scaled height as the driver is capable of
531 * @param data unused
532 */
533static void
534NVQueryBestSize(ScrnInfoPtr pScrn, Bool motion,
535		short vid_w, short vid_h,
536		short drw_w, short drw_h,
537		unsigned int *p_w, unsigned int *p_h,
538		pointer data)
539{
540	if(vid_w > (drw_w << 3))
541		drw_w = vid_w >> 3;
542	if(vid_h > (drw_h << 3))
543		drw_h = vid_h >> 3;
544
545	*p_w = drw_w;
546	*p_h = drw_h;
547}
548
549/**
550 * NVCopyData420
551 * used to convert YV12 to YUY2 for the blitter and NV04 overlay.
552 * The U and V samples generated are linearly interpolated on the vertical
553 * axis for better quality
554 *
555 * @param src1 source buffer of luma
556 * @param src2 source buffer of chroma1
557 * @param src3 source buffer of chroma2
558 * @param dst1 destination buffer
559 * @param srcPitch pitch of src1
560 * @param srcPitch2 pitch of src2, src3
561 * @param dstPitch pitch of dst1
562 * @param h number of lines to copy
563 * @param w length of lines to copy
564 */
565static inline void
566NVCopyData420(unsigned char *src1, unsigned char *src2, unsigned char *src3,
567	      unsigned char *dst1, int srcPitch, int srcPitch2, int dstPitch,
568	      int h, int w)
569{
570	CARD32 *dst;
571	CARD8 *s1, *s2, *s3;
572	int i, j;
573
574#define su(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s2[X] +        \
575		(signed int)(s2 + srcPitch2)[X]) / 2) : (s2[X]))
576#define sv(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s3[X] +        \
577		(signed int)(s3 + srcPitch2)[X]) / 2) : (s3[X]))
578
579	w >>= 1;
580
581	for (j = 0; j < h; j++) {
582		dst = (CARD32*)dst1;
583		s1 = src1;  s2 = src2;  s3 = src3;
584		i = w;
585
586		while (i > 4) {
587#if X_BYTE_ORDER == X_BIG_ENDIAN
588		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
589		dst[1] = (s1[2] << 24) | (s1[3] << 8) | (sv(1) << 16) | su(1);
590		dst[2] = (s1[4] << 24) | (s1[5] << 8) | (sv(2) << 16) | su(2);
591		dst[3] = (s1[6] << 24) | (s1[7] << 8) | (sv(3) << 16) | su(3);
592#else
593		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
594		dst[1] = s1[2] | (s1[3] << 16) | (sv(1) << 8) | (su(1) << 24);
595		dst[2] = s1[4] | (s1[5] << 16) | (sv(2) << 8) | (su(2) << 24);
596		dst[3] = s1[6] | (s1[7] << 16) | (sv(3) << 8) | (su(3) << 24);
597#endif
598		dst += 4; s2 += 4; s3 += 4; s1 += 8;
599		i -= 4;
600		}
601
602		while (i--) {
603#if X_BYTE_ORDER == X_BIG_ENDIAN
604		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
605#else
606		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
607#endif
608		dst++; s2++; s3++;
609		s1 += 2;
610		}
611
612		dst1 += dstPitch;
613		src1 += srcPitch;
614		if (j & 1) {
615			src2 += srcPitch2;
616			src3 += srcPitch2;
617		}
618	}
619}
620
621/**
622 * NVCopyNV12ColorPlanes
623 * Used to convert YV12 color planes to NV12 (interleaved UV) for the overlay
624 *
625 * @param src1 source buffer of chroma1
626 * @param dst1 destination buffer
627 * @param h number of lines to copy
628 * @param w length of lines to copy
629 * @param id source pixel format (YV12 or I420)
630 */
631static inline void
632NVCopyNV12ColorPlanes(unsigned char *src1, unsigned char *src2,
633		      unsigned char *dst, int dstPitch, int srcPitch2,
634		      int h, int w)
635{
636	int i, j, l, e;
637
638	w >>= 1;
639	h >>= 1;
640#ifdef __SSE2__
641	l = w >> 3;
642	e = w & 7;
643#else
644	l = w >> 1;
645	e = w & 1;
646#endif
647
648	for (j = 0; j < h; j++) {
649		unsigned char *us = src1;
650		unsigned char *vs = src2;
651		unsigned int *vuvud = (unsigned int *) dst;
652		unsigned short *vud;
653
654		for (i = 0; i < l; i++) {
655#ifdef __SSE2__
656			_mm_storeu_si128(
657				(void*)vuvud,
658				_mm_unpacklo_epi8(
659					_mm_loadl_epi64((void*)vs),
660					_mm_loadl_epi64((void*)us)));
661			vuvud+=4;
662			us+=8;
663			vs+=8;
664#else /* __SSE2__ */
665#  if X_BYTE_ORDER == X_BIG_ENDIAN
666			*vuvud++ = (vs[0]<<24) | (us[0]<<16) | (vs[1]<<8) | us[1];
667#  else
668			*vuvud++ = vs[0] | (us[0]<<8) | (vs[1]<<16) | (us[1]<<24);
669#  endif
670			us+=2;
671			vs+=2;
672#endif /* __SSE2__ */
673		}
674
675		vud = (unsigned short *)vuvud;
676		for (i = 0; i < e; i++) {
677#if X_BYTE_ORDER == X_BIG_ENDIAN
678			vud[i] = us[i] | (vs[i]<<8);
679#else
680			vud[i] = vs[i] | (us[i]<<8);
681#endif
682		}
683
684		dst += dstPitch;
685		src1 += srcPitch2;
686		src2 += srcPitch2;
687	}
688
689}
690
691
692static int
693NV_set_dimensions(ScrnInfoPtr pScrn, int action_flags, INT32 *xa, INT32 *xb,
694		  INT32 *ya, INT32 *yb, short *src_x, short *src_y,
695		  short *src_w, short *src_h, short *drw_x, short *drw_y,
696		  short *drw_w, short *drw_h, int *left, int *top, int *right,
697		  int *bottom, BoxRec *dstBox, int *npixels, int *nlines,
698		  RegionPtr clipBoxes, short width, short height)
699{
700	NVPtr pNv = NVPTR(pScrn);
701
702	if (action_flags & USE_OVERLAY) {
703		switch (pNv->Architecture) {
704		case NV_ARCH_04:
705			/* NV0x overlay can't scale down. at all. */
706			if (*drw_w < *src_w)
707				*drw_w = *src_w;
708			if (*drw_h < *src_h)
709				*drw_h = *src_h;
710			break;
711		case NV_ARCH_30:
712			/* According to DirectFB, NV3x can't scale down by
713			 * a ratio > 2
714			 */
715			if (*drw_w < (*src_w) >> 1)
716				*drw_w = *src_w;
717			if (*drw_h < (*src_h) >> 1)
718				*drw_h = *src_h;
719			break;
720		default: /*NV10, NV20*/
721			/* NV1x overlay can't scale down by a ratio > 8 */
722			if (*drw_w < (*src_w) >> 3)
723				*drw_w = *src_w >> 3;
724			if (*drw_h < (*src_h >> 3))
725				*drw_h = *src_h >> 3;
726		}
727	}
728
729	/* Clip */
730	*xa = *src_x;
731	*xb = *src_x + *src_w;
732	*ya = *src_y;
733	*yb = *src_y + *src_h;
734
735	dstBox->x1 = *drw_x;
736	dstBox->x2 = *drw_x + *drw_w;
737	dstBox->y1 = *drw_y;
738	dstBox->y2 = *drw_y + *drw_h;
739
740	/* In randr 1.2 mode VIDEO_CLIP_TO_VIEWPORT is broken (hence it is not
741	 * set in the overlay adapter flags) since pScrn->frame{X,Y}1 do not get
742	 * updated. Hence manual clipping against the CRTC dimensions
743	 */
744	if (action_flags & USE_OVERLAY) {
745		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
746		unsigned id = pPriv->overlayCRTC;
747		xf86CrtcPtr crtc = XF86_CRTC_CONFIG_PTR(pScrn)->crtc[id];
748		RegionRec VPReg;
749		BoxRec VPBox;
750
751		VPBox.x1 = crtc->x;
752		VPBox.y1 = crtc->y;
753		VPBox.x2 = crtc->x + crtc->mode.HDisplay;
754		VPBox.y2 = crtc->y + crtc->mode.VDisplay;
755
756		REGION_INIT(pScreen, &VPReg, &VPBox, 1);
757		REGION_INTERSECT(pScreen, clipBoxes, clipBoxes, &VPReg);
758		REGION_UNINIT(pScreen, &VPReg);
759	}
760
761	if (!xf86XVClipVideoHelper(dstBox, xa, xb, ya, yb, clipBoxes,
762				   width, height))
763		return -1;
764
765	if (action_flags & USE_OVERLAY)	{
766		xf86CrtcConfigPtr xf86_config =
767			XF86_CRTC_CONFIG_PTR(pScrn);
768		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
769
770		dstBox->x1 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
771		dstBox->x2 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
772		dstBox->y1 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
773		dstBox->y2 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
774	}
775
776	/* Convert fixed point to integer, as xf86XVClipVideoHelper probably
777	 * turns its parameter into fixed point values
778	 */
779	*left = (*xa) >> 16;
780	if (*left < 0)
781		*left = 0;
782
783	*top = (*ya) >> 16;
784	if (*top < 0)
785		*top = 0;
786
787	*right = (*xb) >> 16;
788	if (*right > width)
789		*right = width;
790
791	*bottom = (*yb) >> 16;
792	if (*bottom > height)
793		*bottom = height;
794
795	if (action_flags & IS_YV12) {
796		/* even "left", even "top", even number of pixels per line
797		 * and even number of lines
798		 */
799		*left &= ~1;
800		*npixels = ((*right + 1) & ~1) - *left;
801		*top &= ~1;
802		*nlines = ((*bottom + 1) & ~1) - *top;
803	} else
804	if (action_flags & IS_YUY2) {
805		/* even "left" */
806		*left &= ~1;
807		/* even number of pixels per line */
808		*npixels = ((*right + 1) & ~1) - *left;
809		*nlines = *bottom - *top;
810		/* 16bpp */
811		*left <<= 1;
812	} else
813	if (action_flags & IS_RGB) {
814		*npixels = *right - *left;
815		*nlines = *bottom - *top;
816		/* 32bpp */
817		*left <<= 2;
818	}
819
820	return 0;
821}
822
823static int
824NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
825				  int *srcPitch2, int *dstPitch, int *s2offset,
826				  int *s3offset, int *uv_offset,
827				  int *newFBSize, int *newTTSize,
828				  int *line_len, int npixels, int nlines,
829				  int width, int height)
830{
831	int tmp;
832
833	if (pNv->Architecture >= NV_TESLA) {
834		npixels = (npixels + 7) & ~7;
835		nlines = (nlines + 7) & ~7;
836	}
837
838	if (action_flags & IS_YV12) {
839		*srcPitch = (width + 3) & ~3;	/* of luma */
840		*s2offset = *srcPitch * height;
841		*srcPitch2 = ((width >> 1) + 3) & ~3; /*of chroma*/
842		*s3offset = (*srcPitch2 * (height >> 1)) + *s2offset;
843		*dstPitch = (npixels + 63) & ~63; /*luma and chroma pitch*/
844		*line_len = npixels;
845		*uv_offset = nlines * *dstPitch;
846		*newFBSize = *uv_offset + (nlines >> 1) * *dstPitch;
847		*newTTSize = *uv_offset + (nlines >> 1) * *dstPitch;
848	} else
849	if (action_flags & IS_YUY2) {
850		*srcPitch = width << 1; /* one luma, one chroma per pixel */
851		*dstPitch = ((npixels << 1) + 63) & ~63;
852		*line_len = npixels << 1;
853		*newFBSize = nlines * *dstPitch;
854		*newTTSize = nlines * *line_len;
855	} else
856	if (action_flags & IS_RGB) {
857		/* one R, one G, one B, one X per pixel */
858		*srcPitch = width << 2;
859		*dstPitch = ((npixels << 2) + 63) & ~63;
860		*line_len = npixels << 2;
861		*newFBSize = nlines * *dstPitch;
862		*newTTSize = nlines * *dstPitch;
863	}
864
865	if (action_flags & CONVERT_TO_YUY2) {
866		*dstPitch = ((npixels << 1) + 63) & ~63;
867		*line_len = npixels << 1;
868		*newFBSize = nlines * *dstPitch;
869		*newTTSize = nlines * *line_len;
870		*uv_offset = 0;
871	}
872
873	if (action_flags & SWAP_UV)  {
874		/* I420 swaps U and V */
875		tmp = *s2offset;
876		*s2offset = *s3offset;
877		*s3offset = tmp;
878	}
879
880	/* Overlay double buffering... */
881	if (action_flags & USE_OVERLAY)
882                (*newFBSize) <<= 1;
883
884	return 0;
885}
886
887
888/**
889 * NV_set_action_flags
890 * This function computes the action flags from the input image,
891 * that is, it decides what NVPutImage and its helpers must do.
892 * This eases readability by avoiding lots of switch-case statements in the
893 * core NVPutImage
894 */
895static void
896NV_set_action_flags(ScrnInfoPtr pScrn, DrawablePtr pDraw, NVPortPrivPtr pPriv,
897		    int id, short drw_x, short drw_y, short drw_w, short drw_h,
898		    int *action_flags)
899{
900	NVPtr pNv = NVPTR(pScrn);
901
902#define USING_OVERLAY (*action_flags & USE_OVERLAY)
903#define USING_TEXTURE (*action_flags & USE_TEXTURE)
904#define USING_BLITTER ((!(*action_flags & USE_OVERLAY)) &&                     \
905		       (!(*action_flags & USE_TEXTURE)))
906
907	*action_flags = 0;
908
909	/* Pixel format-related bits */
910	if (id == FOURCC_YUY2 || id == FOURCC_UYVY)
911		*action_flags |= IS_YUY2;
912
913	if (id == FOURCC_YV12 || id == FOURCC_I420)
914		*action_flags |= IS_YV12;
915
916	if (id == FOURCC_RGB) /*How long will we support it?*/
917		*action_flags |= IS_RGB;
918
919	if (id == FOURCC_I420) /* I420 is YV12 with swapped UV */
920		*action_flags |= SWAP_UV;
921
922	/* Desired adapter */
923	if (!pPriv->blitter && !pPriv->texture)
924		*action_flags |= USE_OVERLAY;
925
926	if (!pPriv->blitter && pPriv->texture)
927		*action_flags |= USE_TEXTURE;
928
929	/* Adapter fallbacks (when the desired one can't be used)*/
930#ifdef COMPOSITE
931	{
932		PixmapPtr ppix = NVGetDrawablePixmap(pDraw);
933
934		/* this is whether ppix is in the viewable fb, not related to
935		   the EXA "offscreen" stuff */
936		if (!nouveau_exa_pixmap_is_onscreen(ppix))
937			*action_flags &= ~USE_OVERLAY;
938	}
939#endif
940
941#ifdef NVOVL_SUPPORT
942	if (USING_OVERLAY) {
943		char crtc = nv_window_belongs_to_crtc(pScrn, drw_x, drw_y,
944						      drw_w, drw_h);
945
946		if ((crtc & (1 << 0)) && (crtc & (1 << 1))) {
947			/* The overlay cannot be used on two CRTCs at a time,
948			 * so we need to fallback on the blitter
949			 */
950			*action_flags &= ~USE_OVERLAY;
951		} else
952		if ((crtc & (1 << 0))) {
953			/* We need to put the overlay on CRTC0 - if it's not
954			 * already here
955			 */
956			if (pPriv->overlayCRTC == 1) {
957				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
958					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) |
959					    NV_CRTC_FSEL_OVERLAY);
960				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
961					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) &
962					    ~NV_CRTC_FSEL_OVERLAY);
963				pPriv->overlayCRTC = 0;
964			}
965		} else
966		if ((crtc & (1 << 1))) {
967			if (pPriv->overlayCRTC == 0) {
968				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
969					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) |
970					    NV_CRTC_FSEL_OVERLAY);
971				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
972					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) &
973					    ~NV_CRTC_FSEL_OVERLAY);
974				pPriv->overlayCRTC = 1;
975			}
976		}
977
978		if (XF86_CRTC_CONFIG_PTR(pScrn)->crtc[pPriv->overlayCRTC]
979						 ->rotation != RR_Rotate_0)
980			*action_flags &= ~USE_OVERLAY;
981	}
982#endif
983
984	/* At this point the adapter we're going to use is _known_.
985	 * You cannot change it now.
986	 */
987
988	/* Card/adapter format restrictions */
989	if (USING_BLITTER) {
990		/* The blitter does not handle YV12 natively */
991		if (id == FOURCC_YV12 || id == FOURCC_I420)
992			*action_flags |= CONVERT_TO_YUY2;
993	}
994
995	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_04)) {
996		/* NV04-05 don't support YV12, only YUY2 and ITU-R BT.601 */
997		if (*action_flags & IS_YV12)
998			*action_flags |= CONVERT_TO_YUY2;
999	}
1000
1001	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_10 ||
1002			      pNv->Architecture == NV_ARCH_20)) {
1003		/* No YV12 overlay on NV10, 11, 15, 20, NFORCE */
1004		switch (pNv->dev->chipset) {
1005		case 0x10:
1006		case 0x11:
1007		case 0x15:
1008		case 0x1a: /*XXX: unsure about nforce */
1009		case 0x20:
1010			*action_flags |= CONVERT_TO_YUY2;
1011			break;
1012		default:
1013			break;
1014		}
1015	}
1016}
1017
1018
1019/**
1020 * NVPutImage
1021 * PutImage is "the" important function of the Xv extension.
1022 * a client (e.g. video player) calls this function for every
1023 * image (of the video) to be displayed. this function then
1024 * scales and displays the image.
1025 *
1026 * @param pScrn screen which hold the port where the image is put
1027 * @param src_x source point in the source image to start displaying from
1028 * @param src_y see above
1029 * @param src_w width of the source image to display
1030 * @param src_h see above
1031 * @param drw_x  screen point to display to
1032 * @param drw_y
1033 * @param drw_w width of the screen drawable
1034 * @param drw_h
1035 * @param id pixel format of image
1036 * @param buf pointer to buffer containing the source image
1037 * @param width total width of the source image we are passed
1038 * @param height
1039 * @param Sync unused
1040 * @param clipBoxes ??
1041 * @param data pointer to port
1042 * @param pDraw drawable pointer
1043 */
1044static int
1045NVPutImage(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x,
1046	   short drw_y, short src_w, short src_h, short drw_w, short drw_h,
1047	   int id, unsigned char *buf, short width, short height,
1048	   Bool Sync, RegionPtr clipBoxes, pointer data, DrawablePtr pDraw)
1049{
1050	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
1051	NVPtr pNv = NVPTR(pScrn);
1052	PixmapPtr ppix;
1053	/* source box */
1054	INT32 xa = 0, xb = 0, ya = 0, yb = 0;
1055	/* size to allocate in VRAM and in GART respectively */
1056	int newFBSize = 0, newTTSize = 0;
1057	/* card VRAM offsets, source offsets for U and V planes */
1058	int offset = 0, uv_offset = 0, s2offset = 0, s3offset = 0;
1059	/* source pitch, source pitch of U and V planes in case of YV12,
1060	 * VRAM destination pitch
1061	 */
1062	int srcPitch = 0, srcPitch2 = 0, dstPitch = 0;
1063	/* position of the given source data (using src_*), number of pixels
1064	 * and lines we are interested in
1065	 */
1066	int top = 0, left = 0, right = 0, bottom = 0, npixels = 0, nlines = 0;
1067	Bool skip = FALSE;
1068	BoxRec dstBox;
1069	CARD32 tmp = 0;
1070	int line_len = 0; /* length of a line, like npixels, but in bytes */
1071	struct nouveau_bo *destination_buffer = NULL;
1072	int action_flags; /* what shall we do? */
1073	unsigned char *map;
1074	int ret, i;
1075
1076	if (pPriv->grabbedByV4L)
1077		return Success;
1078
1079	if (width > pPriv->max_image_dim || height > pPriv->max_image_dim)
1080		return BadMatch;
1081
1082	NV_set_action_flags(pScrn, pDraw, pPriv, id, drw_x, drw_y, drw_w,
1083			    drw_h, &action_flags);
1084
1085	if (NV_set_dimensions(pScrn, action_flags, &xa, &xb, &ya, &yb,
1086			      &src_x,  &src_y, &src_w, &src_h,
1087			      &drw_x, &drw_y, &drw_w, &drw_h,
1088			      &left, &top, &right, &bottom, &dstBox,
1089			      &npixels, &nlines, clipBoxes, width, height))
1090		return Success;
1091
1092	if (NV_calculate_pitches_and_mem_size(pNv, action_flags, &srcPitch,
1093					      &srcPitch2, &dstPitch, &s2offset,
1094					      &s3offset, &uv_offset,
1095					      &newFBSize, &newTTSize,
1096					      &line_len, npixels, nlines,
1097					      width, height))
1098		return BadImplementation;
1099
1100	/* There are some cases (tvtime with overscan for example) where the
1101	 * input image is larger (width/height) than the source rectangle for
1102	 * the overlay (src_w, src_h). In those cases, we try to do something
1103	 * optimal by uploading only the necessary data.
1104	 */
1105	if (action_flags & IS_YUY2 || action_flags & IS_RGB)
1106		buf += (top * srcPitch) + left;
1107
1108	if (action_flags & IS_YV12) {
1109		tmp = ((top >> 1) * srcPitch2) + (left >> 1);
1110		s2offset += tmp;
1111		s3offset += tmp;
1112	}
1113
1114	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, newFBSize,
1115				    &pPriv->video_mem);
1116	if (ret)
1117		return BadAlloc;
1118
1119#ifdef NVOVL_SUPPORT
1120	if (action_flags & USE_OVERLAY) {
1121		ret = nouveau_bo_pin(pPriv->video_mem, NOUVEAU_BO_VRAM);
1122		if (ret) {
1123			nouveau_bo_ref(NULL, &pPriv->video_mem);
1124			return BadAlloc;
1125		}
1126	}
1127#endif
1128
1129	/* The overlay supports hardware double buffering. We handle this here*/
1130	offset = 0;
1131#ifdef NVOVL_SUPPORT
1132	if (pPriv->doubleBuffer) {
1133		int mask = 1 << (pPriv->currentBuffer << 2);
1134
1135		/* overwrite the newest buffer if there's not one free */
1136		if (nvReadVIDEO(pNv, NV_PVIDEO_BUFFER) & mask) {
1137			if (!pPriv->currentBuffer)
1138				offset += newFBSize >> 1;
1139			skip = TRUE;
1140		} else {
1141			if (pPriv->currentBuffer)
1142				offset += newFBSize >> 1;
1143		}
1144	}
1145#endif
1146
1147	/* Now we take a decision regarding the way we send the data to the
1148	 * card.
1149	 *
1150	 * Either we use double buffering of "private" TT memory
1151	 * Either we rely on X's GARTScratch
1152	 * Either we fallback on CPU copy
1153	 */
1154
1155	/* Try to allocate host-side double buffers, unless we have already
1156	 * failed
1157	 */
1158
1159	/* We take only nlines * line_len bytes - that is, only the pixel
1160	 * data we are interested in - because the stuff in the GART is
1161	 * written contiguously
1162	 */
1163	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1164		ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART, newTTSize,
1165					    &pPriv->TT_mem_chunk[0]);
1166		if (ret == 0) {
1167			ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART,
1168						    newTTSize,
1169						    &pPriv->TT_mem_chunk[1]);
1170			if (ret) {
1171				nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
1172				pPriv->currentHostBuffer =
1173					NO_PRIV_HOST_BUFFER_AVAILABLE;
1174			}
1175		} else {
1176			pPriv->currentHostBuffer =
1177				NO_PRIV_HOST_BUFFER_AVAILABLE;
1178		}
1179	}
1180
1181	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1182		destination_buffer =
1183			pPriv->TT_mem_chunk[pPriv->currentHostBuffer];
1184	}
1185	if (!destination_buffer) {
1186		if (pNv->Architecture >= NV_TESLA) {
1187			NOUVEAU_ERR("No scratch buffer for tiled upload\n");
1188			return BadAlloc;
1189		}
1190
1191		goto CPU_copy;
1192	}
1193
1194	if (newTTSize <= destination_buffer->size) {
1195		unsigned char *dst;
1196
1197		/* Upload to GART */
1198		nouveau_bo_map(destination_buffer, NOUVEAU_BO_WR, pNv->client);
1199		dst = destination_buffer->map;
1200
1201		if (action_flags & IS_YV12) {
1202			if (action_flags & CONVERT_TO_YUY2) {
1203				NVCopyData420(buf + (top * srcPitch) + left,
1204					      buf + s2offset, buf + s3offset,
1205					      dst, srcPitch, srcPitch2,
1206					      line_len, nlines, npixels);
1207			} else {
1208				/* Native YV12 */
1209				unsigned char *tbuf = buf + top *
1210						      srcPitch + left;
1211				unsigned char *tdst = dst;
1212
1213				/* luma upload */
1214				for (i = 0; i < nlines; i++) {
1215					memcpy(tdst, tbuf, line_len);
1216					tdst += line_len;
1217					tbuf += srcPitch;
1218				}
1219				dst += line_len * nlines;
1220
1221				NVCopyNV12ColorPlanes(buf + s2offset,
1222						      buf + s3offset, dst,
1223						      line_len, srcPitch2,
1224						      nlines, npixels);
1225			}
1226		} else {
1227			for (i = 0; i < nlines; i++) {
1228				memcpy(dst, buf, line_len);
1229				dst += line_len;
1230				buf += srcPitch;
1231			}
1232		}
1233
1234		if (uv_offset) {
1235			NVAccelM2MF(pNv, line_len, nlines / 2, 1,
1236				    line_len * nlines, uv_offset,
1237				    destination_buffer, NOUVEAU_BO_GART,
1238				    line_len, nlines >> 1, 0, 0,
1239				    pPriv->video_mem, NOUVEAU_BO_VRAM,
1240				    dstPitch, nlines >> 1, 0, 0);
1241		}
1242
1243		NVAccelM2MF(pNv, line_len, nlines, 1, 0, 0,
1244			    destination_buffer, NOUVEAU_BO_GART,
1245			    line_len, nlines, 0, 0,
1246			    pPriv->video_mem, NOUVEAU_BO_VRAM,
1247			    dstPitch, nlines, 0, 0);
1248
1249	} else {
1250CPU_copy:
1251		nouveau_bo_map(pPriv->video_mem, NOUVEAU_BO_WR, pNv->client);
1252		map = pPriv->video_mem->map + offset;
1253
1254		if (action_flags & IS_YV12) {
1255			if (action_flags & CONVERT_TO_YUY2) {
1256				NVCopyData420(buf + (top * srcPitch) + left,
1257					      buf + s2offset, buf + s3offset,
1258					      map, srcPitch, srcPitch2,
1259					      dstPitch, nlines, npixels);
1260			} else {
1261				unsigned char *tbuf =
1262					buf + left + top * srcPitch;
1263
1264				for (i = 0; i < nlines; i++) {
1265					int dwords = npixels << 1;
1266
1267					while (dwords & ~0x03) {
1268						*map = *tbuf;
1269						*(map + 1) = *(tbuf + 1);
1270						*(map + 2) = *(tbuf + 2);
1271						*(map + 3) = *(tbuf + 3);
1272						map += 4;
1273						tbuf += 4;
1274						dwords -= 4;
1275					}
1276
1277					switch (dwords) {
1278					case 3: *(map + 2) = *(tbuf + 2);
1279					case 2: *(map + 1) = *(tbuf + 1);
1280					case 1: *map = *tbuf;
1281					}
1282
1283					map += dstPitch - (npixels << 1);
1284					tbuf += srcPitch - (npixels << 1);
1285				}
1286
1287				NVCopyNV12ColorPlanes(buf + s2offset,
1288						      buf + s3offset,
1289						      map, dstPitch, srcPitch2,
1290						      nlines, npixels);
1291			}
1292		} else {
1293			/* YUY2 and RGB */
1294			for (i = 0; i < nlines; i++) {
1295				int dwords = npixels << 1;
1296
1297				while (dwords & ~0x03) {
1298					*map = *buf;
1299					*(map + 1) = *(buf + 1);
1300					*(map + 2) = *(buf + 2);
1301					*(map + 3) = *(buf + 3);
1302					map += 4;
1303					buf += 4;
1304					dwords -= 4;
1305				}
1306
1307				switch (dwords) {
1308				case 3: *(map + 2) = *(buf + 2);
1309				case 2: *(map + 1) = *(buf + 1);
1310				case 1: *map = *buf;
1311				}
1312
1313				map += dstPitch - (npixels << 1);
1314				buf += srcPitch - (npixels << 1);
1315			}
1316		}
1317	}
1318
1319	if (skip)
1320		return Success;
1321
1322	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE)
1323		pPriv->currentHostBuffer ^= 1;
1324
1325	/* If we're not using the hw overlay, we're rendering into a pixmap
1326	 * and need to take a couple of additional steps...
1327	 */
1328	if (!(action_flags & USE_OVERLAY)) {
1329		ppix = NVGetDrawablePixmap(pDraw);
1330
1331		/* Ensure pixmap is in offscreen memory */
1332		pNv->exa_force_cp = TRUE;
1333		exaMoveInPixmap(ppix);
1334		pNv->exa_force_cp = FALSE;
1335
1336		if (!exaGetPixmapDriverPrivate(ppix))
1337			return BadAlloc;
1338
1339#ifdef COMPOSITE
1340		/* Convert screen coords to pixmap coords */
1341		if (ppix->screen_x || ppix->screen_y) {
1342			REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
1343					 -ppix->screen_x, -ppix->screen_y);
1344			dstBox.x1 -= ppix->screen_x;
1345			dstBox.x2 -= ppix->screen_x;
1346			dstBox.y1 -= ppix->screen_y;
1347			dstBox.y2 -= ppix->screen_y;
1348		}
1349#endif
1350	}
1351
1352	if (action_flags & USE_OVERLAY) {
1353		if (pNv->Architecture == NV_ARCH_04) {
1354			NV04PutOverlayImage(pScrn, pPriv->video_mem, offset,
1355					    id, dstPitch, &dstBox, 0, 0,
1356					    xb, yb, npixels, nlines,
1357					    src_w, src_h, drw_w, drw_h,
1358					    clipBoxes);
1359		} else {
1360			NV10PutOverlayImage(pScrn, pPriv->video_mem, offset,
1361					    uv_offset, id, dstPitch, &dstBox,
1362					    0, 0, xb, yb,
1363					    npixels, nlines, src_w, src_h,
1364					    drw_w, drw_h, clipBoxes);
1365		}
1366
1367		pPriv->currentBuffer ^= 1;
1368	} else
1369	if (action_flags & USE_TEXTURE) {
1370		ret = BadImplementation;
1371
1372		if (pNv->Architecture == NV_ARCH_30) {
1373			ret = NV30PutTextureImage(pScrn, pPriv->video_mem,
1374						  offset, uv_offset,
1375						  id, dstPitch, &dstBox, 0, 0,
1376						  xb, yb, npixels, nlines,
1377						  src_w, src_h, drw_w, drw_h,
1378						  clipBoxes, ppix, pPriv);
1379		} else
1380		if (pNv->Architecture == NV_ARCH_40) {
1381			ret = NV40PutTextureImage(pScrn, pPriv->video_mem,
1382						  offset, uv_offset,
1383						  id, dstPitch, &dstBox, 0, 0,
1384						  xb, yb, npixels, nlines,
1385						  src_w, src_h, drw_w, drw_h,
1386						  clipBoxes, ppix, pPriv);
1387		} else
1388		if (pNv->Architecture == NV_TESLA) {
1389			ret = nv50_xv_image_put(pScrn, pPriv->video_mem,
1390						offset, uv_offset,
1391						id, dstPitch, &dstBox, 0, 0,
1392						xb, yb, npixels, nlines,
1393						src_w, src_h, drw_w, drw_h,
1394						clipBoxes, ppix, pPriv);
1395		} else {
1396			ret = nvc0_xv_image_put(pScrn, pPriv->video_mem,
1397						offset, uv_offset,
1398						id, dstPitch, &dstBox, 0, 0,
1399						xb, yb, npixels, nlines,
1400						src_w, src_h, drw_w, drw_h,
1401						clipBoxes, ppix, pPriv);
1402		}
1403
1404		if (ret != Success)
1405			return ret;
1406	} else {
1407		ret = NVPutBlitImage(pScrn, pPriv->video_mem, offset, id,
1408				     dstPitch, &dstBox, 0, 0, xb, yb, npixels,
1409				     nlines, src_w, src_h, drw_w, drw_h,
1410				     clipBoxes, ppix);
1411		if (ret != Success)
1412			return ret;
1413	}
1414
1415#ifdef COMPOSITE
1416	/* Damage tracking */
1417	if (!(action_flags & USE_OVERLAY))
1418		DamageDamageRegion(&ppix->drawable, clipBoxes);
1419#endif
1420
1421	return Success;
1422}
1423
1424/**
1425 * QueryImageAttributes
1426 *
1427 * calculates
1428 * - size (memory required to store image),
1429 * - pitches,
1430 * - offsets
1431 * of image
1432 * depending on colorspace (id) and dimensions (w,h) of image
1433 * values of
1434 * - w,
1435 * - h
1436 * may be adjusted as needed
1437 *
1438 * @param pScrn unused
1439 * @param id colorspace of image
1440 * @param w pointer to width of image
1441 * @param h pointer to height of image
1442 * @param pitches pitches[i] = length of a scanline in plane[i]
1443 * @param offsets offsets[i] = offset of plane i from the beginning of the image
1444 * @return size of the memory required for the XvImage queried
1445 */
1446static int
1447NVQueryImageAttributes(ScrnInfoPtr pScrn, int id,
1448		       unsigned short *w, unsigned short *h,
1449		       int *pitches, int *offsets)
1450{
1451	int size, tmp;
1452
1453	*w = (*w + 1) & ~1; // width rounded up to an even number
1454	if (offsets)
1455		offsets[0] = 0;
1456
1457	switch (id) {
1458	case FOURCC_YV12:
1459	case FOURCC_I420:
1460		*h = (*h + 1) & ~1; // height rounded up to an even number
1461		size = (*w + 3) & ~3; // width rounded up to a multiple of 4
1462		if (pitches)
1463			pitches[0] = size; // width rounded up to a multiple of 4
1464		size *= *h;
1465		if (offsets)
1466			offsets[1] = size; // number of pixels in "rounded up" image
1467		tmp = ((*w >> 1) + 3) & ~3; // width/2 rounded up to a multiple of 4
1468		if (pitches)
1469			pitches[1] = pitches[2] = tmp; // width/2 rounded up to a multiple of 4
1470		tmp *= (*h >> 1); // 1/4*number of pixels in "rounded up" image
1471		size += tmp; // 5/4*number of pixels in "rounded up" image
1472		if (offsets)
1473			offsets[2] = size; // 5/4*number of pixels in "rounded up" image
1474		size += tmp; // = 3/2*number of pixels in "rounded up" image
1475		break;
1476	case FOURCC_UYVY:
1477	case FOURCC_YUY2:
1478		size = *w << 1; // 2*width
1479		if (pitches)
1480			pitches[0] = size; // 2*width
1481		size *= *h; // 2*width*height
1482		break;
1483	case FOURCC_RGB:
1484		size = *w << 2; // 4*width (32 bit per pixel)
1485		if (pitches)
1486			pitches[0] = size; // 4*width
1487		size *= *h; // 4*width*height
1488		break;
1489	case FOURCC_AI44:
1490	case FOURCC_IA44:
1491		size = *w; // width
1492		if (pitches)
1493			pitches[0] = size; // width
1494		size *= *h; // width*height
1495		break;
1496	default:
1497		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Unknown colorspace: %x\n", id);
1498		*w = *h = size = 0;
1499		break;
1500	}
1501
1502	return size;
1503}
1504
1505/***** Exported offscreen surface stuff ****/
1506
1507
1508static int
1509NVAllocSurface(ScrnInfoPtr pScrn, int id,
1510	       unsigned short w, unsigned short h,
1511	       XF86SurfacePtr surface)
1512{
1513	NVPtr pNv = NVPTR(pScrn);
1514	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1515	int size, bpp, ret;
1516
1517	bpp = pScrn->bitsPerPixel >> 3;
1518
1519	if (pPriv->grabbedByV4L)
1520		return BadAlloc;
1521
1522	if ((w > IMAGE_MAX_W) || (h > IMAGE_MAX_H))
1523		return BadValue;
1524
1525	w = (w + 1) & ~1;
1526	pPriv->pitch = ((w << 1) + 63) & ~63;
1527	size = h * pPriv->pitch / bpp;
1528
1529	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, size,
1530				    &pPriv->video_mem);
1531	if (ret)
1532		return BadAlloc;
1533	pPriv->offset = 0;
1534
1535	surface->width = w;
1536	surface->height = h;
1537	surface->pScrn = pScrn;
1538	surface->pitches = &pPriv->pitch;
1539	surface->offsets = &pPriv->offset;
1540	surface->devPrivate.ptr = (pointer)pPriv;
1541	surface->id = id;
1542
1543	/* grab the video */
1544	NVStopOverlay(pScrn);
1545	pPriv->videoStatus = 0;
1546	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
1547	pPriv->grabbedByV4L = TRUE;
1548
1549	return Success;
1550}
1551
1552static int
1553NVStopSurface(XF86SurfacePtr surface)
1554{
1555	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1556
1557	if (pPriv->grabbedByV4L && pPriv->videoStatus) {
1558		NV10StopOverlay(surface->pScrn);
1559		pPriv->videoStatus = 0;
1560	}
1561
1562	return Success;
1563}
1564
1565static int
1566NVFreeSurface(XF86SurfacePtr surface)
1567{
1568	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1569
1570	if (pPriv->grabbedByV4L) {
1571		NVStopSurface(surface);
1572		NVFreeOverlayMemory(surface->pScrn);
1573		pPriv->grabbedByV4L = FALSE;
1574	}
1575
1576	return Success;
1577}
1578
1579static int
1580NVGetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 *value)
1581{
1582	NVPtr pNv = NVPTR(pScrn);
1583	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1584
1585	return NV10GetOverlayPortAttribute(pScrn, attribute,
1586					 value, (pointer)pPriv);
1587}
1588
1589static int
1590NVSetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 value)
1591{
1592	NVPtr pNv = NVPTR(pScrn);
1593	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1594
1595	return NV10SetOverlayPortAttribute(pScrn, attribute,
1596					 value, (pointer)pPriv);
1597}
1598
1599static int
1600NVDisplaySurface(XF86SurfacePtr surface,
1601		 short src_x, short src_y,
1602		 short drw_x, short drw_y,
1603		 short src_w, short src_h,
1604		 short drw_w, short drw_h,
1605		 RegionPtr clipBoxes)
1606{
1607	ScrnInfoPtr pScrn = surface->pScrn;
1608	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1609	INT32 xa, xb, ya, yb;
1610	BoxRec dstBox;
1611
1612	if (!pPriv->grabbedByV4L)
1613		return Success;
1614
1615	if (src_w > (drw_w << 3))
1616		drw_w = src_w >> 3;
1617	if (src_h > (drw_h << 3))
1618		drw_h = src_h >> 3;
1619
1620	/* Clip */
1621	xa = src_x;
1622	xb = src_x + src_w;
1623	ya = src_y;
1624	yb = src_y + src_h;
1625
1626	dstBox.x1 = drw_x;
1627	dstBox.x2 = drw_x + drw_w;
1628	dstBox.y1 = drw_y;
1629	dstBox.y2 = drw_y + drw_h;
1630
1631	if(!xf86XVClipVideoHelper(&dstBox, &xa, &xb, &ya, &yb, clipBoxes,
1632				  surface->width, surface->height))
1633		return Success;
1634
1635	dstBox.x1 -= pScrn->frameX0;
1636	dstBox.x2 -= pScrn->frameX0;
1637	dstBox.y1 -= pScrn->frameY0;
1638	dstBox.y2 -= pScrn->frameY0;
1639
1640	pPriv->currentBuffer = 0;
1641
1642	NV10PutOverlayImage(pScrn, pPriv->video_mem, surface->offsets[0],
1643			    0, surface->id, surface->pitches[0], &dstBox,
1644			    xa, ya, xb, yb, surface->width, surface->height,
1645			    src_w, src_h, drw_w, drw_h, clipBoxes);
1646
1647	return Success;
1648}
1649
1650/**
1651 * NVSetupBlitVideo
1652 * this function does all the work setting up a blit port
1653 *
1654 * @return blit port
1655 */
1656static XF86VideoAdaptorPtr
1657NVSetupBlitVideo (ScreenPtr pScreen)
1658{
1659	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1660	NVPtr               pNv       = NVPTR(pScrn);
1661	XF86VideoAdaptorPtr adapt;
1662	NVPortPrivPtr       pPriv;
1663	int i;
1664
1665	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1666					sizeof(NVPortPrivRec) +
1667					(sizeof(DevUnion) * NUM_BLIT_PORTS)))) {
1668		return NULL;
1669	}
1670
1671	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1672	adapt->flags		= 0;
1673	adapt->name		= "NV Video Blitter";
1674	adapt->nEncodings	= 1;
1675	adapt->pEncodings	= &DummyEncoding;
1676	adapt->nFormats		= NUM_FORMATS_ALL;
1677	adapt->pFormats		= NVFormats;
1678	adapt->nPorts		= NUM_BLIT_PORTS;
1679	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1680
1681	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_BLIT_PORTS]);
1682	for(i = 0; i < NUM_BLIT_PORTS; i++)
1683		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1684
1685	if (pNv->dev->chipset >= 0x11) {
1686		adapt->pAttributes = NVBlitAttributes;
1687		adapt->nAttributes = NUM_BLIT_ATTRIBUTES;
1688	} else {
1689		adapt->pAttributes = NULL;
1690		adapt->nAttributes = 0;
1691	}
1692
1693	adapt->pImages			= NVImages;
1694	adapt->nImages			= NUM_IMAGES_ALL;
1695	adapt->PutVideo			= NULL;
1696	adapt->PutStill			= NULL;
1697	adapt->GetVideo			= NULL;
1698	adapt->GetStill			= NULL;
1699	adapt->StopVideo		= NVStopBlitVideo;
1700	adapt->SetPortAttribute		= NVSetBlitPortAttribute;
1701	adapt->GetPortAttribute		= NVGetBlitPortAttribute;
1702	adapt->QueryBestSize		= NVQueryBestSize;
1703	adapt->PutImage			= NVPutImage;
1704	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1705
1706	pPriv->videoStatus		= 0;
1707	pPriv->grabbedByV4L		= FALSE;
1708	pPriv->blitter			= TRUE;
1709	pPriv->texture			= FALSE;
1710	pPriv->bicubic			= FALSE;
1711	pPriv->doubleBuffer		= FALSE;
1712	pPriv->SyncToVBlank		= (pNv->dev->chipset >= 0x11);
1713	pPriv->max_image_dim            = 2048;
1714
1715	pNv->blitAdaptor		= adapt;
1716
1717	return adapt;
1718}
1719
1720/**
1721 * NVSetupOverlayVideo
1722 * this function does all the work setting up an overlay port
1723 *
1724 * @return overlay port
1725 */
1726static XF86VideoAdaptorPtr
1727NVSetupOverlayVideoAdapter(ScreenPtr pScreen)
1728{
1729	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1730	NVPtr               pNv       = NVPTR(pScrn);
1731	XF86VideoAdaptorPtr adapt;
1732	NVPortPrivPtr       pPriv;
1733
1734	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1735					sizeof(NVPortPrivRec) +
1736					sizeof(DevUnion)))) {
1737		return NULL;
1738	}
1739
1740	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1741	adapt->flags		= VIDEO_OVERLAID_IMAGES;
1742	adapt->name		= "NV Video Overlay";
1743	adapt->nEncodings	= 1;
1744	adapt->pEncodings	= &DummyEncoding;
1745	adapt->nFormats		= NUM_FORMATS_ALL;
1746	adapt->pFormats		= NVFormats;
1747	adapt->nPorts		= 1;
1748	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1749
1750	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[1]);
1751	adapt->pPortPrivates[0].ptr	= (pointer)(pPriv);
1752
1753	adapt->pAttributes		= (pNv->Architecture != NV_ARCH_04) ? NV10OverlayAttributes : NV04OverlayAttributes;
1754	adapt->nAttributes		= (pNv->Architecture != NV_ARCH_04) ? NUM_NV10_OVERLAY_ATTRIBUTES : NUM_NV04_OVERLAY_ATTRIBUTES;
1755	adapt->pImages			= NVImages;
1756	adapt->nImages			= NUM_IMAGES_YUV;
1757	adapt->PutVideo			= NULL;
1758	adapt->PutStill			= NULL;
1759	adapt->GetVideo			= NULL;
1760	adapt->GetStill			= NULL;
1761	adapt->StopVideo		= NVStopOverlayVideo;
1762	adapt->SetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10SetOverlayPortAttribute : NV04SetOverlayPortAttribute;
1763	adapt->GetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10GetOverlayPortAttribute : NV04GetOverlayPortAttribute;
1764	adapt->QueryBestSize		= NVQueryBestSize;
1765	adapt->PutImage			= NVPutImage;
1766	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1767
1768	pPriv->videoStatus		= 0;
1769	pPriv->currentBuffer		= 0;
1770	pPriv->grabbedByV4L		= FALSE;
1771	pPriv->blitter			= FALSE;
1772	pPriv->texture			= FALSE;
1773	pPriv->bicubic			= FALSE;
1774	pPriv->max_image_dim            = 2048;
1775
1776	NVSetPortDefaults (pScrn, pPriv);
1777
1778	/* gotta uninit this someplace */
1779	REGION_NULL(pScreen, &pPriv->clip);
1780
1781	pNv->overlayAdaptor	= adapt;
1782
1783	xvBrightness		= MAKE_ATOM("XV_BRIGHTNESS");
1784	xvColorKey		= MAKE_ATOM("XV_COLORKEY");
1785	xvAutopaintColorKey     = MAKE_ATOM("XV_AUTOPAINT_COLORKEY");
1786	xvSetDefaults           = MAKE_ATOM("XV_SET_DEFAULTS");
1787
1788	if ( pNv->Architecture != NV_ARCH_04 )
1789		{
1790		xvDoubleBuffer		= MAKE_ATOM("XV_DOUBLE_BUFFER");
1791		xvContrast		= MAKE_ATOM("XV_CONTRAST");
1792		xvSaturation		= MAKE_ATOM("XV_SATURATION");
1793		xvHue			= MAKE_ATOM("XV_HUE");
1794		xvITURBT709		= MAKE_ATOM("XV_ITURBT_709");
1795		xvOnCRTCNb		= MAKE_ATOM("XV_ON_CRTC_NB");
1796		NV10WriteOverlayParameters(pScrn);
1797		}
1798
1799	return adapt;
1800}
1801
1802
1803XF86OffscreenImageRec NVOffscreenImages[2] = {
1804	{
1805		&NVImages[0],
1806		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1807		NVAllocSurface,
1808		NVFreeSurface,
1809		NVDisplaySurface,
1810		NVStopSurface,
1811		NVGetSurfaceAttribute,
1812		NVSetSurfaceAttribute,
1813		IMAGE_MAX_W, IMAGE_MAX_H,
1814		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1815		&NV10OverlayAttributes[1]
1816	},
1817	{
1818		&NVImages[2],
1819		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1820		NVAllocSurface,
1821		NVFreeSurface,
1822		NVDisplaySurface,
1823		NVStopSurface,
1824		NVGetSurfaceAttribute,
1825		NVSetSurfaceAttribute,
1826		IMAGE_MAX_W, IMAGE_MAX_H,
1827		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1828		&NV10OverlayAttributes[1]
1829	}
1830};
1831
1832static void
1833NVInitOffscreenImages (ScreenPtr pScreen)
1834{
1835	xf86XVRegisterOffscreenImages(pScreen, NVOffscreenImages, 2);
1836}
1837
1838/**
1839 * NVChipsetHasOverlay
1840 *
1841 * newer chips don't support overlay anymore.
1842 * overlay feature is emulated via textures.
1843 *
1844 * @param pNv
1845 * @return true, if chipset supports overlay
1846 */
1847static Bool
1848NVChipsetHasOverlay(NVPtr pNv)
1849{
1850	switch (pNv->Architecture) {
1851	case NV_ARCH_04: /*NV04 has a different overlay than NV10+*/
1852	case NV_ARCH_10:
1853	case NV_ARCH_20:
1854	case NV_ARCH_30:
1855		return TRUE;
1856	case NV_ARCH_40:
1857		if (pNv->dev->chipset == 0x40)
1858			return TRUE;
1859		break;
1860	default:
1861		break;
1862	}
1863
1864	return FALSE;
1865}
1866
1867/**
1868 * NVSetupOverlayVideo
1869 * check if chipset supports Overla
1870 * if so, setup overlay port
1871 *
1872 * @return overlay port
1873 * @see NVChipsetHasOverlay(NVPtr pNv)
1874 * @see NV10SetupOverlayVideo(ScreenPtr pScreen)
1875 * @see NVInitOffscreenImages(ScreenPtr pScreen)
1876 */
1877static XF86VideoAdaptorPtr
1878NVSetupOverlayVideo(ScreenPtr pScreen)
1879{
1880	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
1881	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
1882	NVPtr                pNv   = NVPTR(pScrn);
1883
1884	if (1 /*pNv->kms_enable*/ || !NVChipsetHasOverlay(pNv))
1885		return NULL;
1886
1887	overlayAdaptor = NVSetupOverlayVideoAdapter(pScreen);
1888	/* I am not sure what this call does. */
1889	if (overlayAdaptor && pNv->Architecture != NV_ARCH_04 )
1890		NVInitOffscreenImages(pScreen);
1891
1892	#ifdef COMPOSITE
1893	if (!noCompositeExtension) {
1894		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1895			   "Xv: Composite is enabled, enabling overlay with "
1896			   "smart blitter fallback\n");
1897		overlayAdaptor->name = "NV Video Overlay with Composite";
1898	}
1899	#endif
1900
1901	return overlayAdaptor;
1902}
1903
1904/**
1905 * NV30 texture adapter.
1906 */
1907
1908#define NUM_FORMAT_TEXTURED 2
1909
1910static XF86ImageRec NV30TexturedImages[NUM_FORMAT_TEXTURED] =
1911{
1912	XVIMAGE_YV12,
1913	XVIMAGE_I420,
1914};
1915
1916/**
1917 * NV30SetupTexturedVideo
1918 * this function does all the work setting up textured video port
1919 *
1920 * @return texture port
1921 */
1922static XF86VideoAdaptorPtr
1923NV30SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
1924{
1925	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1926	NVPtr pNv = NVPTR(pScrn);
1927	XF86VideoAdaptorPtr adapt;
1928	NVPortPrivPtr pPriv;
1929	int i;
1930
1931	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1932				 sizeof(NVPortPrivRec) +
1933				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
1934		return NULL;
1935	}
1936
1937	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1938	adapt->flags		= 0;
1939	if (bicubic)
1940		adapt->name		= "NV30 high quality adapter";
1941	else
1942		adapt->name		= "NV30 texture adapter";
1943	adapt->nEncodings	= 1;
1944	adapt->pEncodings	= &DummyEncodingTex;
1945	adapt->nFormats		= NUM_FORMATS_ALL;
1946	adapt->pFormats		= NVFormats;
1947	adapt->nPorts		= NUM_TEXTURE_PORTS;
1948	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1949
1950	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
1951	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
1952		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1953
1954	adapt->pAttributes		= NVTexturedAttributes;
1955	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
1956	adapt->pImages			= NV30TexturedImages;
1957	adapt->nImages			= NUM_FORMAT_TEXTURED;
1958	adapt->PutVideo			= NULL;
1959	adapt->PutStill			= NULL;
1960	adapt->GetVideo			= NULL;
1961	adapt->GetStill			= NULL;
1962	adapt->StopVideo		= NV30StopTexturedVideo;
1963	adapt->SetPortAttribute		= NV30SetTexturePortAttribute;
1964	adapt->GetPortAttribute		= NV30GetTexturePortAttribute;
1965	adapt->QueryBestSize		= NVQueryBestSize;
1966	adapt->PutImage			= NVPutImage;
1967	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1968
1969	pPriv->videoStatus		= 0;
1970	pPriv->grabbedByV4L		= FALSE;
1971	pPriv->blitter			= FALSE;
1972	pPriv->texture			= TRUE;
1973	pPriv->bicubic			= bicubic;
1974	pPriv->doubleBuffer		= FALSE;
1975	pPriv->SyncToVBlank		= TRUE;
1976	pPriv->max_image_dim            = 4096;
1977
1978	if (bicubic)
1979		pNv->textureAdaptor[1]	= adapt;
1980	else
1981		pNv->textureAdaptor[0]	= adapt;
1982
1983	return adapt;
1984}
1985
1986/**
1987 * NV40 texture adapter.
1988 */
1989
1990#define NUM_FORMAT_TEXTURED 2
1991
1992static XF86ImageRec NV40TexturedImages[NUM_FORMAT_TEXTURED] =
1993{
1994	XVIMAGE_YV12,
1995	XVIMAGE_I420,
1996};
1997
1998/**
1999 * NV40SetupTexturedVideo
2000 * this function does all the work setting up textured video port
2001 *
2002 * @return texture port
2003 */
2004static XF86VideoAdaptorPtr
2005NV40SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
2006{
2007	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2008	NVPtr pNv = NVPTR(pScrn);
2009	XF86VideoAdaptorPtr adapt;
2010	NVPortPrivPtr pPriv;
2011	int i;
2012
2013	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2014				 sizeof(NVPortPrivRec) +
2015				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2016		return NULL;
2017	}
2018
2019	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2020	adapt->flags		= 0;
2021	if (bicubic)
2022		adapt->name		= "NV40 high quality adapter";
2023	else
2024		adapt->name		= "NV40 texture adapter";
2025	adapt->nEncodings	= 1;
2026	adapt->pEncodings	= &DummyEncodingTex;
2027	adapt->nFormats		= NUM_FORMATS_ALL;
2028	adapt->pFormats		= NVFormats;
2029	adapt->nPorts		= NUM_TEXTURE_PORTS;
2030	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2031
2032	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2033	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2034		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2035
2036	adapt->pAttributes		= NVTexturedAttributes;
2037	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
2038	adapt->pImages			= NV40TexturedImages;
2039	adapt->nImages			= NUM_FORMAT_TEXTURED;
2040	adapt->PutVideo			= NULL;
2041	adapt->PutStill			= NULL;
2042	adapt->GetVideo			= NULL;
2043	adapt->GetStill			= NULL;
2044	adapt->StopVideo		= NV40StopTexturedVideo;
2045	adapt->SetPortAttribute		= NV40SetTexturePortAttribute;
2046	adapt->GetPortAttribute		= NV40GetTexturePortAttribute;
2047	adapt->QueryBestSize		= NVQueryBestSize;
2048	adapt->PutImage			= NVPutImage;
2049	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2050
2051	pPriv->videoStatus		= 0;
2052	pPriv->grabbedByV4L		= FALSE;
2053	pPriv->blitter			= FALSE;
2054	pPriv->texture			= TRUE;
2055	pPriv->bicubic			= bicubic;
2056	pPriv->doubleBuffer		= FALSE;
2057	pPriv->SyncToVBlank		= TRUE;
2058	pPriv->max_image_dim            = 4096;
2059
2060	if (bicubic)
2061		pNv->textureAdaptor[1]	= adapt;
2062	else
2063		pNv->textureAdaptor[0]	= adapt;
2064
2065	return adapt;
2066}
2067
2068static XF86ImageRec
2069NV50TexturedImages[] =
2070{
2071	XVIMAGE_YV12,
2072	XVIMAGE_I420,
2073	XVIMAGE_YUY2,
2074	XVIMAGE_UYVY
2075};
2076
2077static XF86VideoAdaptorPtr
2078NV50SetupTexturedVideo (ScreenPtr pScreen)
2079{
2080	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2081	NVPtr pNv = NVPTR(pScrn);
2082	XF86VideoAdaptorPtr adapt;
2083	NVPortPrivPtr pPriv;
2084	int i;
2085
2086	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2087				 sizeof(NVPortPrivRec) +
2088				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2089		return NULL;
2090	}
2091
2092	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2093	adapt->flags		= 0;
2094	adapt->name		= "Nouveau GeForce 8/9 Textured Video";
2095	adapt->nEncodings	= 1;
2096	adapt->pEncodings	= &DummyEncodingNV50;
2097	adapt->nFormats		= NUM_FORMATS_NV50;
2098	adapt->pFormats		= NV50Formats;
2099	adapt->nPorts		= NUM_TEXTURE_PORTS;
2100	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2101
2102	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2103	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2104		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2105
2106	adapt->pAttributes		= NVTexturedAttributesNV50;
2107	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES_NV50;
2108	adapt->pImages			= NV50TexturedImages;
2109	adapt->nImages			= sizeof(NV50TexturedImages) /
2110					  sizeof(NV50TexturedImages[0]);
2111	adapt->PutVideo			= NULL;
2112	adapt->PutStill			= NULL;
2113	adapt->GetVideo			= NULL;
2114	adapt->GetStill			= NULL;
2115	adapt->StopVideo		= nv50_xv_video_stop;
2116	adapt->SetPortAttribute		= nv50_xv_port_attribute_set;
2117	adapt->GetPortAttribute		= nv50_xv_port_attribute_get;
2118	adapt->QueryBestSize		= NVQueryBestSize;
2119	adapt->PutImage			= NVPutImage;
2120	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2121
2122	pNv->textureAdaptor[0]		= adapt;
2123
2124	nv50_xv_set_port_defaults(pScrn, pPriv);
2125	nv50_xv_csc_update(pScrn, pPriv);
2126
2127	xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
2128	xvContrast   = MAKE_ATOM("XV_CONTRAST");
2129	xvSaturation = MAKE_ATOM("XV_SATURATION");
2130	xvHue        = MAKE_ATOM("XV_HUE");
2131	xvITURBT709  = MAKE_ATOM("XV_ITURBT_709");
2132	return adapt;
2133}
2134
2135static void
2136NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor)
2137{
2138	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2139	NVPtr                pNv = NVPTR(pScrn);
2140
2141	if (!pNv->Nv3D)
2142		return;
2143
2144	if (pNv->Architecture == NV_ARCH_30) {
2145		textureAdaptor[0] = NV30SetupTexturedVideo(pScreen, FALSE);
2146		textureAdaptor[1] = NV30SetupTexturedVideo(pScreen, TRUE);
2147	} else
2148	if (pNv->Architecture == NV_ARCH_40) {
2149		textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE);
2150		textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE);
2151	} else
2152	if (pNv->Architecture >= NV_TESLA) {
2153		textureAdaptor[0] = NV50SetupTexturedVideo(pScreen);
2154	}
2155}
2156
2157/**
2158 * NVInitVideo
2159 * tries to initialize the various supported adapters
2160 * and add them to the list of ports on screen "pScreen".
2161 *
2162 * @param pScreen
2163 * @see NVSetupOverlayVideo(ScreenPtr pScreen)
2164 * @see NVSetupBlitVideo(ScreenPtr pScreen)
2165 */
2166void
2167NVInitVideo(ScreenPtr pScreen)
2168{
2169	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2170	NVPtr                pNv = NVPTR(pScrn);
2171	XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
2172	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
2173	XF86VideoAdaptorPtr  blitAdaptor = NULL;
2174	XF86VideoAdaptorPtr  textureAdaptor[2] = {NULL, NULL};
2175	int                  num_adaptors;
2176
2177	/*
2178	 * Driving the blitter requires the DMA FIFO. Using the FIFO
2179	 * without accel causes DMA errors. While the overlay might
2180	 * might work without accel, we also disable it for now when
2181	 * acceleration is disabled:
2182	 */
2183	if (pScrn->bitsPerPixel != 8 && pNv->AccelMethod == EXA) {
2184		xvSyncToVBlank = MAKE_ATOM("XV_SYNC_TO_VBLANK");
2185
2186		if (pNv->Architecture < NV_TESLA) {
2187			overlayAdaptor = NVSetupOverlayVideo(pScreen);
2188			blitAdaptor    = NVSetupBlitVideo(pScreen);
2189		}
2190
2191		NVSetupTexturedVideo(pScreen, textureAdaptor);
2192	}
2193
2194	num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
2195	if (blitAdaptor || overlayAdaptor || textureAdaptor[0]) {
2196		int size = num_adaptors;
2197
2198		if(overlayAdaptor) size++;
2199		if(blitAdaptor)    size++;
2200		if(textureAdaptor[0]) size++;
2201		if(textureAdaptor[1]) size++;
2202
2203		newAdaptors = malloc(size * sizeof(XF86VideoAdaptorPtr *));
2204		if(newAdaptors) {
2205			if(num_adaptors) {
2206				memcpy(newAdaptors, adaptors, num_adaptors *
2207						sizeof(XF86VideoAdaptorPtr));
2208			}
2209
2210			if(overlayAdaptor) {
2211				newAdaptors[num_adaptors] = overlayAdaptor;
2212				num_adaptors++;
2213			}
2214
2215			if (textureAdaptor[0]) { /* bilinear */
2216				newAdaptors[num_adaptors] = textureAdaptor[0];
2217				num_adaptors++;
2218			}
2219
2220			if (textureAdaptor[1]) { /* bicubic */
2221				newAdaptors[num_adaptors] = textureAdaptor[1];
2222				num_adaptors++;
2223			}
2224
2225			if(blitAdaptor) {
2226				newAdaptors[num_adaptors] = blitAdaptor;
2227				num_adaptors++;
2228			}
2229
2230			adaptors = newAdaptors;
2231		}
2232	}
2233
2234	if (num_adaptors)
2235		xf86XVScreenInit(pScreen, adaptors, num_adaptors);
2236	if (newAdaptors)
2237		free(newAdaptors);
2238
2239	/*
2240	 * For now we associate with the plain texture adapter since it is logical, but we can
2241	 * associate with any/all adapters since VL doesn't depend on Xv for color conversion.
2242	 */
2243	if (textureAdaptor[0]) {
2244		XF86MCAdaptorPtr *adaptorsXvMC = malloc(sizeof(XF86MCAdaptorPtr));
2245
2246		if (adaptorsXvMC) {
2247			adaptorsXvMC[0] = vlCreateAdaptorXvMC(pScreen, (char *)textureAdaptor[0]->name);
2248
2249			if (adaptorsXvMC[0]) {
2250				vlInitXvMC(pScreen, 1, adaptorsXvMC);
2251				vlDestroyAdaptorXvMC(adaptorsXvMC[0]);
2252			}
2253
2254			free(adaptorsXvMC);
2255		}
2256	}
2257}
2258
2259void
2260NVTakedownVideo(ScrnInfoPtr pScrn)
2261{
2262	NVPtr pNv = NVPTR(pScrn);
2263
2264	if (pNv->blitAdaptor)
2265		NVFreePortMemory(pScrn, GET_BLIT_PRIVATE(pNv));
2266	if (pNv->textureAdaptor[0]) {
2267		NVFreePortMemory(pScrn,
2268				 pNv->textureAdaptor[0]->pPortPrivates[0].ptr);
2269	}
2270	if (pNv->textureAdaptor[1]) {
2271		NVFreePortMemory(pScrn,
2272				 pNv->textureAdaptor[1]->pPortPrivates[0].ptr);
2273	}
2274}
2275
2276/* The filtering function used for video scaling. We use a cubic filter as
2277 * defined in  "Reconstruction Filters in Computer Graphics" Mitchell &
2278 * Netravali in SIGGRAPH '88
2279 */
2280static float filter_func(float x)
2281{
2282	const double B=0.75;
2283	const double C=(1.0-B)/2.0;
2284	double x1=fabs(x);
2285	double x2=fabs(x)*x1;
2286	double x3=fabs(x)*x2;
2287
2288	if (fabs(x)<1.0)
2289		return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0;
2290	else
2291		return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
2292}
2293
2294static int8_t f32tosb8(float v)
2295{
2296	return (int8_t)(v*127.0);
2297}
2298
2299void
2300NVXVComputeBicubicFilter(struct nouveau_bo *bo, unsigned offset, unsigned size)
2301{
2302	int8_t *t = (int8_t *)(bo->map + offset);
2303	int i;
2304
2305	for(i = 0; i < size; i++) {
2306		float  x = (i + 0.5) / size;
2307		float w0 = filter_func(x+1.0);
2308		float w1 = filter_func(x);
2309		float w2 = filter_func(x-1.0);
2310		float w3 = filter_func(x-2.0);
2311
2312		t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
2313		t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
2314		t[4*i+0]=f32tosb8(w0+w1);
2315		t[4*i+3]=f32tosb8(0.0);
2316	}
2317}
2318