nouveau_xv.c revision 16ee1e9a
1/*
2 * Copyright 2007 Arthur Huillet
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#ifdef __SSE2__
29#include <immintrin.h>
30#endif
31
32#include "xf86xv.h"
33#include <X11/extensions/Xv.h>
34#include "exa.h"
35#include "damage.h"
36#include "dixstruct.h"
37#include "fourcc.h"
38
39#include "nv_include.h"
40#include "nv_dma.h"
41
42#include "vl_hwmc.h"
43
44#include "hwdefs/nv_m2mf.xml.h"
45
46#define IMAGE_MAX_W 2046
47#define IMAGE_MAX_H 2046
48
49#define TEX_IMAGE_MAX_W 4096
50#define TEX_IMAGE_MAX_H 4096
51
52#define OFF_DELAY 	500  /* milliseconds */
53#define FREE_DELAY 	5000
54
55#define NUM_BLIT_PORTS 16
56#define NUM_TEXTURE_PORTS 32
57
58
59#define NVStopOverlay(X) (((pNv->Architecture == NV_ARCH_04) ? NV04StopOverlay(X) : NV10StopOverlay(X)))
60
61/* Value taken by pPriv -> currentHostBuffer when we failed to allocate the two private buffers in TT memory, so that we can catch this case
62and attempt no other allocation afterwards (performance reasons) */
63#define NO_PRIV_HOST_BUFFER_AVAILABLE 9999
64
65/* NVPutImage action flags */
66enum {
67	IS_YV12 = 1,
68	IS_YUY2 = 2,
69	CONVERT_TO_YUY2=4,
70	USE_OVERLAY=8,
71	USE_TEXTURE=16,
72	SWAP_UV=32,
73	IS_RGB=64, //I am not sure how long we will support it
74};
75
76#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
77
78Atom xvBrightness, xvContrast, xvColorKey, xvSaturation;
79Atom xvHue, xvAutopaintColorKey, xvSetDefaults, xvDoubleBuffer;
80Atom xvITURBT709, xvSyncToVBlank, xvOnCRTCNb;
81
82/* client libraries expect an encoding */
83static XF86VideoEncodingRec DummyEncoding =
84{
85	0,
86	"XV_IMAGE",
87	IMAGE_MAX_W, IMAGE_MAX_H,
88	{1, 1}
89};
90
91static XF86VideoEncodingRec DummyEncodingTex =
92{
93	0,
94	"XV_IMAGE",
95	TEX_IMAGE_MAX_W, TEX_IMAGE_MAX_H,
96	{1, 1}
97};
98
99static XF86VideoEncodingRec DummyEncodingNV50 =
100{
101	0,
102	"XV_IMAGE",
103	8192, 8192,
104	{1, 1}
105};
106
107#define NUM_FORMATS_ALL 6
108
109XF86VideoFormatRec NVFormats[NUM_FORMATS_ALL] =
110{
111	{15, TrueColor}, {16, TrueColor}, {24, TrueColor},
112	{15, DirectColor}, {16, DirectColor}, {24, DirectColor}
113};
114
115#define NUM_NV04_OVERLAY_ATTRIBUTES 4
116XF86AttributeRec NV04OverlayAttributes[NUM_NV04_OVERLAY_ATTRIBUTES] =
117{
118	    {XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
119	    {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
120	    {XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
121	    {XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
122};
123
124
125#define NUM_NV10_OVERLAY_ATTRIBUTES 10
126XF86AttributeRec NV10OverlayAttributes[NUM_NV10_OVERLAY_ATTRIBUTES] =
127{
128	{XvSettable | XvGettable, 0, 1, "XV_DOUBLE_BUFFER"},
129	{XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
130	{XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
131	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
132	{XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
133	{XvSettable | XvGettable, 0, 8191, "XV_CONTRAST"},
134	{XvSettable | XvGettable, 0, 8191, "XV_SATURATION"},
135	{XvSettable | XvGettable, 0, 360, "XV_HUE"},
136	{XvSettable | XvGettable, 0, 1, "XV_ITURBT_709"},
137	{XvSettable | XvGettable, 0, 1, "XV_ON_CRTC_NB"},
138};
139
140#define NUM_BLIT_ATTRIBUTES 2
141XF86AttributeRec NVBlitAttributes[NUM_BLIT_ATTRIBUTES] =
142{
143	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
144	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
145};
146
147#define NUM_TEXTURED_ATTRIBUTES 2
148XF86AttributeRec NVTexturedAttributes[NUM_TEXTURED_ATTRIBUTES] =
149{
150	{XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
151	{XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
152};
153
154#define NUM_TEXTURED_ATTRIBUTES_NV50 7
155XF86AttributeRec NVTexturedAttributesNV50[NUM_TEXTURED_ATTRIBUTES_NV50] =
156{
157	{ XvSettable             , 0, 0, "XV_SET_DEFAULTS" },
158	{ XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK" },
159	{ XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS" },
160	{ XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST" },
161	{ XvSettable | XvGettable, -1000, 1000, "XV_SATURATION" },
162	{ XvSettable | XvGettable, -1000, 1000, "XV_HUE" },
163	{ XvSettable | XvGettable, 0, 1, "XV_ITURBT_709" }
164};
165
166#define NUM_IMAGES_YUV 4
167#define NUM_IMAGES_ALL 5
168
169#define FOURCC_RGB 0x0000003
170#define XVIMAGE_RGB \
171   { \
172        FOURCC_RGB, \
173        XvRGB, \
174        LSBFirst, \
175        { 0x03, 0x00, 0x00, 0x00, \
176          0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \
177        32, \
178        XvPacked, \
179        1, \
180        24, 0x00ff0000, 0x0000ff00, 0x000000ff, \
181        0, 0, 0, \
182        0, 0, 0, \
183        0, 0, 0, \
184        {'B','G','R','X',\
185          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \
186        XvTopToBottom \
187   }
188
189static XF86ImageRec NVImages[NUM_IMAGES_ALL] =
190{
191	XVIMAGE_YUY2,
192	XVIMAGE_YV12,
193	XVIMAGE_UYVY,
194	XVIMAGE_I420,
195	XVIMAGE_RGB
196};
197
198static void
199nouveau_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
200{
201    dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
202    dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
203    dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
204    dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
205
206    if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
207	dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
208}
209
210static void
211nouveau_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box)
212{
213    if (crtc->enabled) {
214	crtc_box->x1 = crtc->x;
215	crtc_box->x2 = crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation);
216	crtc_box->y1 = crtc->y;
217	crtc_box->y2 = crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation);
218    } else
219	crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0;
220}
221
222static int
223nouveau_box_area(BoxPtr box)
224{
225    return (int) (box->x2 - box->x1) * (int) (box->y2 - box->y1);
226}
227
228xf86CrtcPtr
229nouveau_pick_best_crtc(ScrnInfoPtr pScrn, Bool consider_disabled,
230                       int x, int y, int w, int h)
231{
232    xf86CrtcConfigPtr   xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
233    int                 coverage, best_coverage, c;
234    BoxRec              box, crtc_box, cover_box;
235    RROutputPtr         primary_output = NULL;
236    xf86CrtcPtr         best_crtc = NULL, primary_crtc = NULL;
237
238    if (!pScrn->vtSema)
239	return NULL;
240
241    box.x1 = x;
242    box.x2 = x + w;
243    box.y1 = y;
244    box.y2 = y + h;
245    best_coverage = 0;
246
247    /* Prefer the CRTC of the primary output */
248#ifdef HAS_DIXREGISTERPRIVATEKEY
249    if (dixPrivateKeyRegistered(rrPrivKey))
250#endif
251    {
252	primary_output = RRFirstOutput(pScrn->pScreen);
253    }
254    if (primary_output && primary_output->crtc)
255	primary_crtc = primary_output->crtc->devPrivate;
256
257    /* first consider only enabled CRTCs */
258    for (c = 0; c < xf86_config->num_crtc; c++) {
259	xf86CrtcPtr crtc = xf86_config->crtc[c];
260
261	if (!crtc->enabled)
262	    continue;
263
264	nouveau_crtc_box(crtc, &crtc_box);
265	nouveau_box_intersect(&cover_box, &crtc_box, &box);
266	coverage = nouveau_box_area(&cover_box);
267	if (coverage > best_coverage ||
268	    (coverage == best_coverage && crtc == primary_crtc)) {
269	    best_crtc = crtc;
270	    best_coverage = coverage;
271	}
272    }
273    if (best_crtc || !consider_disabled)
274	return best_crtc;
275
276    /* if we found nothing, repeat the search including disabled CRTCs */
277    for (c = 0; c < xf86_config->num_crtc; c++) {
278	xf86CrtcPtr crtc = xf86_config->crtc[c];
279
280	nouveau_crtc_box(crtc, &crtc_box);
281	nouveau_box_intersect(&cover_box, &crtc_box, &box);
282	coverage = nouveau_box_area(&cover_box);
283	if (coverage > best_coverage ||
284	    (coverage == best_coverage && crtc == primary_crtc)) {
285	    best_crtc = crtc;
286	    best_coverage = coverage;
287	}
288    }
289    return best_crtc;
290}
291
292unsigned int
293nv_window_belongs_to_crtc(ScrnInfoPtr pScrn, int x, int y, int w, int h)
294{
295	xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
296	unsigned int mask = 0;
297	int i;
298
299	for (i = 0; i < xf86_config->num_crtc; i++) {
300		xf86CrtcPtr crtc = xf86_config->crtc[i];
301
302		if (!crtc->enabled)
303			continue;
304
305		if ((x < (crtc->x + crtc->mode.HDisplay)) &&
306		    (y < (crtc->y + crtc->mode.VDisplay)) &&
307		    ((x + w) > crtc->x) &&
308		    ((y + h) > crtc->y))
309		    mask |= 1 << i;
310	}
311
312	return mask;
313}
314
315/**
316 * NVSetPortDefaults
317 * set attributes of port "pPriv" to compiled-in (except for colorKey) defaults
318 * this function does not care about the kind of adapter the port is for
319 *
320 * @param pScrn screen to get the default colorKey from
321 * @param pPriv port to reset to defaults
322 */
323void
324NVSetPortDefaults (ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
325{
326	NVPtr pNv = NVPTR(pScrn);
327
328	pPriv->brightness		= 0;
329	pPriv->contrast			= 4096;
330	pPriv->saturation		= 4096;
331	pPriv->hue			= 0;
332	pPriv->colorKey			= pNv->videoKey;
333	pPriv->autopaintColorKey	= TRUE;
334	pPriv->doubleBuffer		= pNv->Architecture != NV_ARCH_04;
335	pPriv->iturbt_709		= FALSE;
336	pPriv->currentHostBuffer	= 0;
337}
338
339static int
340nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size,
341		      struct nouveau_bo **pbo)
342{
343	union nouveau_bo_config config = {};
344	NVPtr pNv = NVPTR(pScrn);
345
346	if (*pbo) {
347		if ((*pbo)->size >= size)
348			return 0;
349		nouveau_bo_ref(NULL, pbo);
350	}
351
352	if (flags & NOUVEAU_BO_VRAM) {
353		if (pNv->Architecture == NV_TESLA)
354			config.nv50.memtype = 0x70;
355		else
356		if (pNv->Architecture >= NV_FERMI)
357			config.nvc0.memtype = 0xfe;
358	}
359	flags |= NOUVEAU_BO_MAP;
360
361	return nouveau_bo_new(pNv->dev, flags, 0, size, &config, pbo);
362}
363
364/**
365 * NVFreePortMemory
366 * frees memory held by a given port
367 *
368 * @param pScrn screen whose port wants to free memory
369 * @param pPriv port to free memory of
370 */
371static void
372NVFreePortMemory(ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
373{
374	nouveau_bo_ref(NULL, &pPriv->video_mem);
375	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
376	nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[1]);
377}
378
379/**
380 * NVFreeOverlayMemory
381 * frees memory held by the overlay port
382 *
383 * @param pScrn screen whose overlay port wants to free memory
384 */
385static void
386NVFreeOverlayMemory(ScrnInfoPtr pScrn)
387{
388	NVPtr	pNv = NVPTR(pScrn);
389	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
390
391	NVFreePortMemory(pScrn, pPriv);
392#if NVOVL_SUPPORT
393	/* "power cycle" the overlay */
394	nvWriteMC(pNv, NV_PMC_ENABLE,
395		  (nvReadMC(pNv, NV_PMC_ENABLE) & 0xEFFFFFFF));
396	nvWriteMC(pNv, NV_PMC_ENABLE,
397		  (nvReadMC(pNv, NV_PMC_ENABLE) | 0x10000000));
398#endif
399}
400
401/**
402 * NVFreeBlitMemory
403 * frees memory held by the blit port
404 *
405 * @param pScrn screen whose blit port wants to free memory
406 */
407static void
408NVFreeBlitMemory(ScrnInfoPtr pScrn)
409{
410	NVPtr	pNv = NVPTR(pScrn);
411	NVPortPrivPtr pPriv = GET_BLIT_PRIVATE(pNv);
412
413	NVFreePortMemory(pScrn, pPriv);
414}
415
416/**
417 * NVVideoTimerCallback
418 * callback function which perform cleanup tasks (stop overlay, free memory).
419 * within the driver
420 * purpose and use is unknown
421 */
422void
423NVVideoTimerCallback(ScrnInfoPtr pScrn, Time currentTime)
424{
425	NVPtr         pNv = NVPTR(pScrn);
426	NVPortPrivPtr pOverPriv = NULL;
427	NVPortPrivPtr pBlitPriv = NULL;
428	Bool needCallback = FALSE;
429
430	if (!pScrn->vtSema)
431		return;
432
433	if (pNv->overlayAdaptor) {
434		pOverPriv = GET_OVERLAY_PRIVATE(pNv);
435		if (!pOverPriv->videoStatus)
436			pOverPriv = NULL;
437	}
438
439	if (pNv->blitAdaptor) {
440		pBlitPriv = GET_BLIT_PRIVATE(pNv);
441		if (!pBlitPriv->videoStatus)
442			pBlitPriv = NULL;
443	}
444
445	if (pOverPriv) {
446		if (pOverPriv->videoTime < currentTime) {
447			if (pOverPriv->videoStatus & OFF_TIMER) {
448				NVStopOverlay(pScrn);
449				pOverPriv->videoStatus = FREE_TIMER;
450				pOverPriv->videoTime = currentTime + FREE_DELAY;
451				needCallback = TRUE;
452			} else
453			if (pOverPriv->videoStatus & FREE_TIMER) {
454				NVFreeOverlayMemory(pScrn);
455				pOverPriv->videoStatus = 0;
456			}
457		} else {
458			needCallback = TRUE;
459		}
460	}
461
462	if (pBlitPriv) {
463		if (pBlitPriv->videoTime < currentTime) {
464			NVFreeBlitMemory(pScrn);
465			pBlitPriv->videoStatus = 0;
466		} else {
467			needCallback = TRUE;
468		}
469	}
470
471	pNv->VideoTimerCallback = needCallback ? NVVideoTimerCallback : NULL;
472}
473
474#ifndef ExaOffscreenMarkUsed
475extern void ExaOffscreenMarkUsed(PixmapPtr);
476#endif
477
478/*
479 * StopVideo
480 */
481static void
482NVStopOverlayVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
483{
484	NVPtr         pNv   = NVPTR(pScrn);
485	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
486
487	if (pPriv->grabbedByV4L)
488		return;
489
490	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
491
492	if(Exit) {
493		if (pPriv->videoStatus & CLIENT_VIDEO_ON)
494			NVStopOverlay(pScrn);
495		NVFreeOverlayMemory(pScrn);
496		pPriv->videoStatus = 0;
497	} else {
498		if (pPriv->videoStatus & CLIENT_VIDEO_ON) {
499			pPriv->videoStatus = OFF_TIMER | CLIENT_VIDEO_ON;
500			pPriv->videoTime = currentTime.milliseconds + OFF_DELAY;
501			pNv->VideoTimerCallback = NVVideoTimerCallback;
502		}
503	}
504}
505
506/**
507 * QueryBestSize
508 * used by client applications to ask the driver:
509 * how would you actually scale a video of dimensions
510 * vid_w, vid_h, if i wanted you to scale it to dimensions
511 * drw_w, drw_h?
512 * function stores actual scaling size in pointers p_w, p_h.
513 *
514 *
515 * @param pScrn unused
516 * @param motion unused
517 * @param vid_w width of source video
518 * @param vid_h height of source video
519 * @param drw_w desired scaled width as requested by client
520 * @param drw_h desired scaled height as requested by client
521 * @param p_w actual scaled width as the driver is capable of
522 * @param p_h actual scaled height as the driver is capable of
523 * @param data unused
524 */
525static void
526NVQueryBestSize(ScrnInfoPtr pScrn, Bool motion,
527		short vid_w, short vid_h,
528		short drw_w, short drw_h,
529		unsigned int *p_w, unsigned int *p_h,
530		pointer data)
531{
532	if(vid_w > (drw_w << 3))
533		drw_w = vid_w >> 3;
534	if(vid_h > (drw_h << 3))
535		drw_h = vid_h >> 3;
536
537	*p_w = drw_w;
538	*p_h = drw_h;
539}
540
541/**
542 * NVCopyData420
543 * used to convert YV12 to YUY2 for the blitter and NV04 overlay.
544 * The U and V samples generated are linearly interpolated on the vertical
545 * axis for better quality
546 *
547 * @param src1 source buffer of luma
548 * @param src2 source buffer of chroma1
549 * @param src3 source buffer of chroma2
550 * @param dst1 destination buffer
551 * @param srcPitch pitch of src1
552 * @param srcPitch2 pitch of src2, src3
553 * @param dstPitch pitch of dst1
554 * @param h number of lines to copy
555 * @param w length of lines to copy
556 */
557static inline void
558NVCopyData420(unsigned char *src1, unsigned char *src2, unsigned char *src3,
559	      unsigned char *dst1, int srcPitch, int srcPitch2, int dstPitch,
560	      int h, int w)
561{
562	CARD32 *dst;
563	CARD8 *s1, *s2, *s3;
564	int i, j;
565
566#define su(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s2[X] +        \
567		(signed int)(s2 + srcPitch2)[X]) / 2) : (s2[X]))
568#define sv(X) (((j & 1) && j < (h-1)) ? ((unsigned)((signed int)s3[X] +        \
569		(signed int)(s3 + srcPitch2)[X]) / 2) : (s3[X]))
570
571	w >>= 1;
572
573	for (j = 0; j < h; j++) {
574		dst = (CARD32*)dst1;
575		s1 = src1;  s2 = src2;  s3 = src3;
576		i = w;
577
578		while (i > 4) {
579#if X_BYTE_ORDER == X_BIG_ENDIAN
580		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
581		dst[1] = (s1[2] << 24) | (s1[3] << 8) | (sv(1) << 16) | su(1);
582		dst[2] = (s1[4] << 24) | (s1[5] << 8) | (sv(2) << 16) | su(2);
583		dst[3] = (s1[6] << 24) | (s1[7] << 8) | (sv(3) << 16) | su(3);
584#else
585		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
586		dst[1] = s1[2] | (s1[3] << 16) | (sv(1) << 8) | (su(1) << 24);
587		dst[2] = s1[4] | (s1[5] << 16) | (sv(2) << 8) | (su(2) << 24);
588		dst[3] = s1[6] | (s1[7] << 16) | (sv(3) << 8) | (su(3) << 24);
589#endif
590		dst += 4; s2 += 4; s3 += 4; s1 += 8;
591		i -= 4;
592		}
593
594		while (i--) {
595#if X_BYTE_ORDER == X_BIG_ENDIAN
596		dst[0] = (s1[0] << 24) | (s1[1] << 8) | (sv(0) << 16) | su(0);
597#else
598		dst[0] = s1[0] | (s1[1] << 16) | (sv(0) << 8) | (su(0) << 24);
599#endif
600		dst++; s2++; s3++;
601		s1 += 2;
602		}
603
604		dst1 += dstPitch;
605		src1 += srcPitch;
606		if (j & 1) {
607			src2 += srcPitch2;
608			src3 += srcPitch2;
609		}
610	}
611}
612
613/**
614 * NVCopyNV12ColorPlanes
615 * Used to convert YV12 color planes to NV12 (interleaved UV) for the overlay
616 *
617 * @param src1 source buffer of chroma1
618 * @param dst1 destination buffer
619 * @param h number of lines to copy
620 * @param w length of lines to copy
621 * @param id source pixel format (YV12 or I420)
622 */
623static inline void
624NVCopyNV12ColorPlanes(unsigned char *src1, unsigned char *src2,
625		      unsigned char *dst, int dstPitch, int srcPitch2,
626		      int h, int w)
627{
628	int i, j, l, e;
629
630	w >>= 1;
631	h >>= 1;
632#ifdef __SSE2__
633	l = w >> 3;
634	e = w & 7;
635#else
636	l = w >> 1;
637	e = w & 1;
638#endif
639
640	for (j = 0; j < h; j++) {
641		unsigned char *us = src1;
642		unsigned char *vs = src2;
643		unsigned int *vuvud = (unsigned int *) dst;
644		unsigned short *vud;
645
646		for (i = 0; i < l; i++) {
647#ifdef __SSE2__
648			_mm_storeu_si128(
649				(void*)vuvud,
650				_mm_unpacklo_epi8(
651					_mm_loadl_epi64((void*)vs),
652					_mm_loadl_epi64((void*)us)));
653			vuvud+=4;
654			us+=8;
655			vs+=8;
656#else /* __SSE2__ */
657#  if X_BYTE_ORDER == X_BIG_ENDIAN
658			*vuvud++ = (vs[0]<<24) | (us[0]<<16) | (vs[1]<<8) | us[1];
659#  else
660			*vuvud++ = vs[0] | (us[0]<<8) | (vs[1]<<16) | (us[1]<<24);
661#  endif
662			us+=2;
663			vs+=2;
664#endif /* __SSE2__ */
665		}
666
667		vud = (unsigned short *)vuvud;
668		for (i = 0; i < e; i++) {
669#if X_BYTE_ORDER == X_BIG_ENDIAN
670			vud[i] = us[i] | (vs[i]<<8);
671#else
672			vud[i] = vs[i] | (us[i]<<8);
673#endif
674		}
675
676		dst += dstPitch;
677		src1 += srcPitch2;
678		src2 += srcPitch2;
679	}
680
681}
682
683
684static int
685NV_set_dimensions(ScrnInfoPtr pScrn, int action_flags, INT32 *xa, INT32 *xb,
686		  INT32 *ya, INT32 *yb, short *src_x, short *src_y,
687		  short *src_w, short *src_h, short *drw_x, short *drw_y,
688		  short *drw_w, short *drw_h, int *left, int *top, int *right,
689		  int *bottom, BoxRec *dstBox, int *npixels, int *nlines,
690		  RegionPtr clipBoxes, short width, short height)
691{
692	NVPtr pNv = NVPTR(pScrn);
693
694	if (action_flags & USE_OVERLAY) {
695		switch (pNv->Architecture) {
696		case NV_ARCH_04:
697			/* NV0x overlay can't scale down. at all. */
698			if (*drw_w < *src_w)
699				*drw_w = *src_w;
700			if (*drw_h < *src_h)
701				*drw_h = *src_h;
702			break;
703		case NV_ARCH_30:
704			/* According to DirectFB, NV3x can't scale down by
705			 * a ratio > 2
706			 */
707			if (*drw_w < (*src_w) >> 1)
708				*drw_w = *src_w;
709			if (*drw_h < (*src_h) >> 1)
710				*drw_h = *src_h;
711			break;
712		default: /*NV10, NV20*/
713			/* NV1x overlay can't scale down by a ratio > 8 */
714			if (*drw_w < (*src_w) >> 3)
715				*drw_w = *src_w >> 3;
716			if (*drw_h < (*src_h >> 3))
717				*drw_h = *src_h >> 3;
718		}
719	}
720
721	/* Clip */
722	*xa = *src_x;
723	*xb = *src_x + *src_w;
724	*ya = *src_y;
725	*yb = *src_y + *src_h;
726
727	dstBox->x1 = *drw_x;
728	dstBox->x2 = *drw_x + *drw_w;
729	dstBox->y1 = *drw_y;
730	dstBox->y2 = *drw_y + *drw_h;
731
732	/* In randr 1.2 mode VIDEO_CLIP_TO_VIEWPORT is broken (hence it is not
733	 * set in the overlay adapter flags) since pScrn->frame{X,Y}1 do not get
734	 * updated. Hence manual clipping against the CRTC dimensions
735	 */
736	if (action_flags & USE_OVERLAY) {
737		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
738		unsigned id = pPriv->overlayCRTC;
739		xf86CrtcPtr crtc = XF86_CRTC_CONFIG_PTR(pScrn)->crtc[id];
740		RegionRec VPReg;
741		BoxRec VPBox;
742
743		VPBox.x1 = crtc->x;
744		VPBox.y1 = crtc->y;
745		VPBox.x2 = crtc->x + crtc->mode.HDisplay;
746		VPBox.y2 = crtc->y + crtc->mode.VDisplay;
747
748		REGION_INIT(pScreen, &VPReg, &VPBox, 1);
749		REGION_INTERSECT(pScreen, clipBoxes, clipBoxes, &VPReg);
750		REGION_UNINIT(pScreen, &VPReg);
751	}
752
753	if (!xf86XVClipVideoHelper(dstBox, xa, xb, ya, yb, clipBoxes,
754				   width, height))
755		return -1;
756
757	if (action_flags & USE_OVERLAY)	{
758		xf86CrtcConfigPtr xf86_config =
759			XF86_CRTC_CONFIG_PTR(pScrn);
760		NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
761
762		dstBox->x1 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
763		dstBox->x2 -= xf86_config->crtc[pPriv->overlayCRTC]->x;
764		dstBox->y1 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
765		dstBox->y2 -= xf86_config->crtc[pPriv->overlayCRTC]->y;
766	}
767
768	/* Convert fixed point to integer, as xf86XVClipVideoHelper probably
769	 * turns its parameter into fixed point values
770	 */
771	*left = (*xa) >> 16;
772	if (*left < 0)
773		*left = 0;
774
775	*top = (*ya) >> 16;
776	if (*top < 0)
777		*top = 0;
778
779	*right = (*xb) >> 16;
780	if (*right > width)
781		*right = width;
782
783	*bottom = (*yb) >> 16;
784	if (*bottom > height)
785		*bottom = height;
786
787	if (action_flags & IS_YV12) {
788		/* even "left", even "top", even number of pixels per line
789		 * and even number of lines
790		 */
791		*left &= ~1;
792		*npixels = ((*right + 1) & ~1) - *left;
793		*top &= ~1;
794		*nlines = ((*bottom + 1) & ~1) - *top;
795	} else
796	if (action_flags & IS_YUY2) {
797		/* even "left" */
798		*left &= ~1;
799		/* even number of pixels per line */
800		*npixels = ((*right + 1) & ~1) - *left;
801		*nlines = *bottom - *top;
802		/* 16bpp */
803		*left <<= 1;
804	} else
805	if (action_flags & IS_RGB) {
806		*npixels = *right - *left;
807		*nlines = *bottom - *top;
808		/* 32bpp */
809		*left <<= 2;
810	}
811
812	return 0;
813}
814
815static int
816NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
817				  int *srcPitch2, int *dstPitch, int *s2offset,
818				  int *s3offset, int *uv_offset,
819				  int *newFBSize, int *newTTSize,
820				  int *line_len, int npixels, int nlines,
821				  int width, int height)
822{
823	int tmp;
824
825	if (pNv->Architecture >= NV_TESLA) {
826		npixels = (npixels + 7) & ~7;
827		nlines = (nlines + 7) & ~7;
828	}
829
830	if (action_flags & IS_YV12) {
831		*srcPitch = (width + 3) & ~3;	/* of luma */
832		*s2offset = *srcPitch * height;
833		*srcPitch2 = ((width >> 1) + 3) & ~3; /*of chroma*/
834		*s3offset = (*srcPitch2 * (height >> 1)) + *s2offset;
835		*dstPitch = (npixels + 63) & ~63; /*luma and chroma pitch*/
836		*line_len = npixels;
837		*uv_offset = nlines * *dstPitch;
838		*newFBSize = *uv_offset + (nlines >> 1) * *dstPitch;
839		*newTTSize = *uv_offset + (nlines >> 1) * *dstPitch;
840	} else
841	if (action_flags & IS_YUY2) {
842		*srcPitch = width << 1; /* one luma, one chroma per pixel */
843		*dstPitch = ((npixels << 1) + 63) & ~63;
844		*line_len = npixels << 1;
845		*newFBSize = nlines * *dstPitch;
846		*newTTSize = nlines * *line_len;
847	} else
848	if (action_flags & IS_RGB) {
849		/* one R, one G, one B, one X per pixel */
850		*srcPitch = width << 2;
851		*dstPitch = ((npixels << 2) + 63) & ~63;
852		*line_len = npixels << 2;
853		*newFBSize = nlines * *dstPitch;
854		*newTTSize = nlines * *dstPitch;
855	}
856
857	if (action_flags & CONVERT_TO_YUY2) {
858		*dstPitch = ((npixels << 1) + 63) & ~63;
859		*line_len = npixels << 1;
860		*newFBSize = nlines * *dstPitch;
861		*newTTSize = nlines * *line_len;
862		*uv_offset = 0;
863	}
864
865	if (action_flags & SWAP_UV)  {
866		/* I420 swaps U and V */
867		tmp = *s2offset;
868		*s2offset = *s3offset;
869		*s3offset = tmp;
870	}
871
872	/* Overlay double buffering... */
873	if (action_flags & USE_OVERLAY)
874                (*newFBSize) <<= 1;
875
876	return 0;
877}
878
879
880/**
881 * NV_set_action_flags
882 * This function computes the action flags from the input image,
883 * that is, it decides what NVPutImage and its helpers must do.
884 * This eases readability by avoiding lots of switch-case statements in the
885 * core NVPutImage
886 */
887static void
888NV_set_action_flags(ScrnInfoPtr pScrn, DrawablePtr pDraw, NVPortPrivPtr pPriv,
889		    int id, short drw_x, short drw_y, short drw_w, short drw_h,
890		    int *action_flags)
891{
892	NVPtr pNv = NVPTR(pScrn);
893
894#define USING_OVERLAY (*action_flags & USE_OVERLAY)
895#define USING_TEXTURE (*action_flags & USE_TEXTURE)
896#define USING_BLITTER ((!(*action_flags & USE_OVERLAY)) &&                     \
897		       (!(*action_flags & USE_TEXTURE)))
898
899	*action_flags = 0;
900
901	/* Pixel format-related bits */
902	if (id == FOURCC_YUY2 || id == FOURCC_UYVY)
903		*action_flags |= IS_YUY2;
904
905	if (id == FOURCC_YV12 || id == FOURCC_I420)
906		*action_flags |= IS_YV12;
907
908	if (id == FOURCC_RGB) /*How long will we support it?*/
909		*action_flags |= IS_RGB;
910
911	if (id == FOURCC_I420) /* I420 is YV12 with swapped UV */
912		*action_flags |= SWAP_UV;
913
914	/* Desired adapter */
915	if (!pPriv->blitter && !pPriv->texture)
916		*action_flags |= USE_OVERLAY;
917
918	if (!pPriv->blitter && pPriv->texture)
919		*action_flags |= USE_TEXTURE;
920
921	/* Adapter fallbacks (when the desired one can't be used)*/
922#ifdef COMPOSITE
923	{
924		PixmapPtr ppix = NVGetDrawablePixmap(pDraw);
925
926		/* this is whether ppix is in the viewable fb, not related to
927		   the EXA "offscreen" stuff */
928		if (!nouveau_exa_pixmap_is_onscreen(ppix))
929			*action_flags &= ~USE_OVERLAY;
930	}
931#endif
932
933#ifdef NVOVL_SUPPORT
934	if (USING_OVERLAY) {
935		char crtc = nv_window_belongs_to_crtc(pScrn, drw_x, drw_y,
936						      drw_w, drw_h);
937
938		if ((crtc & (1 << 0)) && (crtc & (1 << 1))) {
939			/* The overlay cannot be used on two CRTCs at a time,
940			 * so we need to fallback on the blitter
941			 */
942			*action_flags &= ~USE_OVERLAY;
943		} else
944		if ((crtc & (1 << 0))) {
945			/* We need to put the overlay on CRTC0 - if it's not
946			 * already here
947			 */
948			if (pPriv->overlayCRTC == 1) {
949				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
950					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) |
951					    NV_CRTC_FSEL_OVERLAY);
952				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
953					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) &
954					    ~NV_CRTC_FSEL_OVERLAY);
955				pPriv->overlayCRTC = 0;
956			}
957		} else
958		if ((crtc & (1 << 1))) {
959			if (pPriv->overlayCRTC == 0) {
960				NVWriteCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL,
961					    NVReadCRTC(pNv, 1, NV_PCRTC_ENGINE_CTRL) |
962					    NV_CRTC_FSEL_OVERLAY);
963				NVWriteCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL,
964					    NVReadCRTC(pNv, 0, NV_PCRTC_ENGINE_CTRL) &
965					    ~NV_CRTC_FSEL_OVERLAY);
966				pPriv->overlayCRTC = 1;
967			}
968		}
969
970		if (XF86_CRTC_CONFIG_PTR(pScrn)->crtc[pPriv->overlayCRTC]
971						 ->rotation != RR_Rotate_0)
972			*action_flags &= ~USE_OVERLAY;
973	}
974#endif
975
976	/* At this point the adapter we're going to use is _known_.
977	 * You cannot change it now.
978	 */
979
980	/* Card/adapter format restrictions */
981	if (USING_BLITTER) {
982		/* The blitter does not handle YV12 natively */
983		if (id == FOURCC_YV12 || id == FOURCC_I420)
984			*action_flags |= CONVERT_TO_YUY2;
985	}
986
987	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_04)) {
988		/* NV04-05 don't support YV12, only YUY2 and ITU-R BT.601 */
989		if (*action_flags & IS_YV12)
990			*action_flags |= CONVERT_TO_YUY2;
991	}
992
993	if (USING_OVERLAY && (pNv->Architecture == NV_ARCH_10 ||
994			      pNv->Architecture == NV_ARCH_20)) {
995		/* No YV12 overlay on NV10, 11, 15, 20, NFORCE */
996		switch (pNv->dev->chipset) {
997		case 0x10:
998		case 0x11:
999		case 0x15:
1000		case 0x1a: /*XXX: unsure about nforce */
1001		case 0x20:
1002			*action_flags |= CONVERT_TO_YUY2;
1003			break;
1004		default:
1005			break;
1006		}
1007	}
1008}
1009
1010
1011/**
1012 * NVPutImage
1013 * PutImage is "the" important function of the Xv extension.
1014 * a client (e.g. video player) calls this function for every
1015 * image (of the video) to be displayed. this function then
1016 * scales and displays the image.
1017 *
1018 * @param pScrn screen which hold the port where the image is put
1019 * @param src_x source point in the source image to start displaying from
1020 * @param src_y see above
1021 * @param src_w width of the source image to display
1022 * @param src_h see above
1023 * @param drw_x  screen point to display to
1024 * @param drw_y
1025 * @param drw_w width of the screen drawable
1026 * @param drw_h
1027 * @param id pixel format of image
1028 * @param buf pointer to buffer containing the source image
1029 * @param width total width of the source image we are passed
1030 * @param height
1031 * @param Sync unused
1032 * @param clipBoxes ??
1033 * @param data pointer to port
1034 * @param pDraw drawable pointer
1035 */
1036static int
1037NVPutImage(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x,
1038	   short drw_y, short src_w, short src_h, short drw_w, short drw_h,
1039	   int id, unsigned char *buf, short width, short height,
1040	   Bool Sync, RegionPtr clipBoxes, pointer data, DrawablePtr pDraw)
1041{
1042	NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
1043	NVPtr pNv = NVPTR(pScrn);
1044	PixmapPtr ppix;
1045	/* source box */
1046	INT32 xa = 0, xb = 0, ya = 0, yb = 0;
1047	/* size to allocate in VRAM and in GART respectively */
1048	int newFBSize = 0, newTTSize = 0;
1049	/* card VRAM offsets, source offsets for U and V planes */
1050	int offset = 0, uv_offset = 0, s2offset = 0, s3offset = 0;
1051	/* source pitch, source pitch of U and V planes in case of YV12,
1052	 * VRAM destination pitch
1053	 */
1054	int srcPitch = 0, srcPitch2 = 0, dstPitch = 0;
1055	/* position of the given source data (using src_*), number of pixels
1056	 * and lines we are interested in
1057	 */
1058	int top = 0, left = 0, right = 0, bottom = 0, npixels = 0, nlines = 0;
1059	Bool skip = FALSE;
1060	BoxRec dstBox;
1061	CARD32 tmp = 0;
1062	int line_len = 0; /* length of a line, like npixels, but in bytes */
1063	struct nouveau_bo *destination_buffer = NULL;
1064	int action_flags; /* what shall we do? */
1065	unsigned char *map;
1066	int ret, i;
1067
1068	if (pPriv->grabbedByV4L)
1069		return Success;
1070
1071	if (width > pPriv->max_image_dim || height > pPriv->max_image_dim)
1072		return BadMatch;
1073
1074	NV_set_action_flags(pScrn, pDraw, pPriv, id, drw_x, drw_y, drw_w,
1075			    drw_h, &action_flags);
1076
1077	if (NV_set_dimensions(pScrn, action_flags, &xa, &xb, &ya, &yb,
1078			      &src_x,  &src_y, &src_w, &src_h,
1079			      &drw_x, &drw_y, &drw_w, &drw_h,
1080			      &left, &top, &right, &bottom, &dstBox,
1081			      &npixels, &nlines, clipBoxes, width, height))
1082		return Success;
1083
1084	if (NV_calculate_pitches_and_mem_size(pNv, action_flags, &srcPitch,
1085					      &srcPitch2, &dstPitch, &s2offset,
1086					      &s3offset, &uv_offset,
1087					      &newFBSize, &newTTSize,
1088					      &line_len, npixels, nlines,
1089					      width, height))
1090		return BadImplementation;
1091
1092	/* There are some cases (tvtime with overscan for example) where the
1093	 * input image is larger (width/height) than the source rectangle for
1094	 * the overlay (src_w, src_h). In those cases, we try to do something
1095	 * optimal by uploading only the necessary data.
1096	 */
1097	if (action_flags & IS_YUY2 || action_flags & IS_RGB)
1098		buf += (top * srcPitch) + left;
1099
1100	if (action_flags & IS_YV12) {
1101		tmp = ((top >> 1) * srcPitch2) + (left >> 1);
1102		s2offset += tmp;
1103		s3offset += tmp;
1104	}
1105
1106	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, newFBSize,
1107				    &pPriv->video_mem);
1108	if (ret)
1109		return BadAlloc;
1110
1111#ifdef NVOVL_SUPPORT
1112	if (action_flags & USE_OVERLAY) {
1113		ret = nouveau_bo_pin(pPriv->video_mem, NOUVEAU_BO_VRAM);
1114		if (ret) {
1115			nouveau_bo_ref(NULL, &pPriv->video_mem);
1116			return BadAlloc;
1117		}
1118	}
1119#endif
1120
1121	/* The overlay supports hardware double buffering. We handle this here*/
1122	offset = 0;
1123#ifdef NVOVL_SUPPORT
1124	if (pPriv->doubleBuffer) {
1125		int mask = 1 << (pPriv->currentBuffer << 2);
1126
1127		/* overwrite the newest buffer if there's not one free */
1128		if (nvReadVIDEO(pNv, NV_PVIDEO_BUFFER) & mask) {
1129			if (!pPriv->currentBuffer)
1130				offset += newFBSize >> 1;
1131			skip = TRUE;
1132		} else {
1133			if (pPriv->currentBuffer)
1134				offset += newFBSize >> 1;
1135		}
1136	}
1137#endif
1138
1139	/* Now we take a decision regarding the way we send the data to the
1140	 * card.
1141	 *
1142	 * Either we use double buffering of "private" TT memory
1143	 * Either we rely on X's GARTScratch
1144	 * Either we fallback on CPU copy
1145	 */
1146
1147	/* Try to allocate host-side double buffers, unless we have already
1148	 * failed
1149	 */
1150
1151	/* We take only nlines * line_len bytes - that is, only the pixel
1152	 * data we are interested in - because the stuff in the GART is
1153	 * written contiguously
1154	 */
1155	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1156		ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART, newTTSize,
1157					    &pPriv->TT_mem_chunk[0]);
1158		if (ret == 0) {
1159			ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_GART,
1160						    newTTSize,
1161						    &pPriv->TT_mem_chunk[1]);
1162			if (ret) {
1163				nouveau_bo_ref(NULL, &pPriv->TT_mem_chunk[0]);
1164				pPriv->currentHostBuffer =
1165					NO_PRIV_HOST_BUFFER_AVAILABLE;
1166			}
1167		} else {
1168			pPriv->currentHostBuffer =
1169				NO_PRIV_HOST_BUFFER_AVAILABLE;
1170		}
1171	}
1172
1173	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE) {
1174		destination_buffer =
1175			pPriv->TT_mem_chunk[pPriv->currentHostBuffer];
1176	}
1177	if (!destination_buffer) {
1178		if (pNv->Architecture >= NV_TESLA) {
1179			NOUVEAU_ERR("No scratch buffer for tiled upload\n");
1180			return BadAlloc;
1181		}
1182
1183		goto CPU_copy;
1184	}
1185
1186	if (newTTSize <= destination_buffer->size) {
1187		unsigned char *dst;
1188		int i = 0;
1189
1190		/* Upload to GART */
1191		nouveau_bo_map(destination_buffer, NOUVEAU_BO_WR, pNv->client);
1192		dst = destination_buffer->map;
1193
1194		if (action_flags & IS_YV12) {
1195			if (action_flags & CONVERT_TO_YUY2) {
1196				NVCopyData420(buf + (top * srcPitch) + left,
1197					      buf + s2offset, buf + s3offset,
1198					      dst, srcPitch, srcPitch2,
1199					      line_len, nlines, npixels);
1200			} else {
1201				/* Native YV12 */
1202				unsigned char *tbuf = buf + top *
1203						      srcPitch + left;
1204				unsigned char *tdst = dst;
1205
1206				/* luma upload */
1207				for (i = 0; i < nlines; i++) {
1208					memcpy(tdst, tbuf, line_len);
1209					tdst += line_len;
1210					tbuf += srcPitch;
1211				}
1212				dst += line_len * nlines;
1213
1214				NVCopyNV12ColorPlanes(buf + s2offset,
1215						      buf + s3offset, dst,
1216						      line_len, srcPitch2,
1217						      nlines, npixels);
1218			}
1219		} else {
1220			for (i = 0; i < nlines; i++) {
1221				memcpy(dst, buf, line_len);
1222				dst += line_len;
1223				buf += srcPitch;
1224			}
1225		}
1226
1227		if (uv_offset) {
1228			NVAccelM2MF(pNv, line_len, nlines / 2, 1,
1229				    line_len * nlines, uv_offset,
1230				    destination_buffer, NOUVEAU_BO_GART,
1231				    line_len, nlines >> 1, 0, 0,
1232				    pPriv->video_mem, NOUVEAU_BO_VRAM,
1233				    dstPitch, nlines >> 1, 0, 0);
1234		}
1235
1236		NVAccelM2MF(pNv, line_len, nlines, 1, 0, 0,
1237			    destination_buffer, NOUVEAU_BO_GART,
1238			    line_len, nlines, 0, 0,
1239			    pPriv->video_mem, NOUVEAU_BO_VRAM,
1240			    dstPitch, nlines, 0, 0);
1241
1242	} else {
1243CPU_copy:
1244		nouveau_bo_map(pPriv->video_mem, NOUVEAU_BO_WR, pNv->client);
1245		map = pPriv->video_mem->map + offset;
1246
1247		if (action_flags & IS_YV12) {
1248			if (action_flags & CONVERT_TO_YUY2) {
1249				NVCopyData420(buf + (top * srcPitch) + left,
1250					      buf + s2offset, buf + s3offset,
1251					      map, srcPitch, srcPitch2,
1252					      dstPitch, nlines, npixels);
1253			} else {
1254				unsigned char *tbuf =
1255					buf + left + top * srcPitch;
1256
1257				for (i = 0; i < nlines; i++) {
1258					int dwords = npixels << 1;
1259
1260					while (dwords & ~0x03) {
1261						*map = *tbuf;
1262						*(map + 1) = *(tbuf + 1);
1263						*(map + 2) = *(tbuf + 2);
1264						*(map + 3) = *(tbuf + 3);
1265						map += 4;
1266						tbuf += 4;
1267						dwords -= 4;
1268					}
1269
1270					switch (dwords) {
1271					case 3: *(map + 2) = *(tbuf + 2);
1272					case 2: *(map + 1) = *(tbuf + 1);
1273					case 1: *map = *tbuf;
1274					}
1275
1276					map += dstPitch - (npixels << 1);
1277					tbuf += srcPitch - (npixels << 1);
1278				}
1279
1280				NVCopyNV12ColorPlanes(buf + s2offset,
1281						      buf + s3offset,
1282						      map, dstPitch, srcPitch2,
1283						      nlines, npixels);
1284			}
1285		} else {
1286			/* YUY2 and RGB */
1287			for (i = 0; i < nlines; i++) {
1288				int dwords = npixels << 1;
1289
1290				while (dwords & ~0x03) {
1291					*map = *buf;
1292					*(map + 1) = *(buf + 1);
1293					*(map + 2) = *(buf + 2);
1294					*(map + 3) = *(buf + 3);
1295					map += 4;
1296					buf += 4;
1297					dwords -= 4;
1298				}
1299
1300				switch (dwords) {
1301				case 3: *(map + 2) = *(buf + 2);
1302				case 2: *(map + 1) = *(buf + 1);
1303				case 1: *map = *buf;
1304				}
1305
1306				map += dstPitch - (npixels << 1);
1307				buf += srcPitch - (npixels << 1);
1308			}
1309		}
1310	}
1311
1312	if (skip)
1313		return Success;
1314
1315	if (pPriv->currentHostBuffer != NO_PRIV_HOST_BUFFER_AVAILABLE)
1316		pPriv->currentHostBuffer ^= 1;
1317
1318	/* If we're not using the hw overlay, we're rendering into a pixmap
1319	 * and need to take a couple of additional steps...
1320	 */
1321	if (!(action_flags & USE_OVERLAY)) {
1322		ppix = NVGetDrawablePixmap(pDraw);
1323
1324		/* Ensure pixmap is in offscreen memory */
1325		pNv->exa_force_cp = TRUE;
1326		exaMoveInPixmap(ppix);
1327		pNv->exa_force_cp = FALSE;
1328
1329		if (!exaGetPixmapDriverPrivate(ppix))
1330			return BadAlloc;
1331
1332#ifdef COMPOSITE
1333		/* Convert screen coords to pixmap coords */
1334		if (ppix->screen_x || ppix->screen_y) {
1335			REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
1336					 -ppix->screen_x, -ppix->screen_y);
1337			dstBox.x1 -= ppix->screen_x;
1338			dstBox.x2 -= ppix->screen_x;
1339			dstBox.y1 -= ppix->screen_y;
1340			dstBox.y2 -= ppix->screen_y;
1341		}
1342#endif
1343	}
1344
1345	if (action_flags & USE_OVERLAY) {
1346		if (pNv->Architecture == NV_ARCH_04) {
1347			NV04PutOverlayImage(pScrn, pPriv->video_mem, offset,
1348					    id, dstPitch, &dstBox, 0, 0,
1349					    xb, yb, npixels, nlines,
1350					    src_w, src_h, drw_w, drw_h,
1351					    clipBoxes);
1352		} else {
1353			NV10PutOverlayImage(pScrn, pPriv->video_mem, offset,
1354					    uv_offset, id, dstPitch, &dstBox,
1355					    0, 0, xb, yb,
1356					    npixels, nlines, src_w, src_h,
1357					    drw_w, drw_h, clipBoxes);
1358		}
1359
1360		pPriv->currentBuffer ^= 1;
1361	} else
1362	if (action_flags & USE_TEXTURE) {
1363		int ret = BadImplementation;
1364
1365		if (pNv->Architecture == NV_ARCH_30) {
1366			ret = NV30PutTextureImage(pScrn, pPriv->video_mem,
1367						  offset, uv_offset,
1368						  id, dstPitch, &dstBox, 0, 0,
1369						  xb, yb, npixels, nlines,
1370						  src_w, src_h, drw_w, drw_h,
1371						  clipBoxes, ppix, pPriv);
1372		} else
1373		if (pNv->Architecture == NV_ARCH_40) {
1374			ret = NV40PutTextureImage(pScrn, pPriv->video_mem,
1375						  offset, uv_offset,
1376						  id, dstPitch, &dstBox, 0, 0,
1377						  xb, yb, npixels, nlines,
1378						  src_w, src_h, drw_w, drw_h,
1379						  clipBoxes, ppix, pPriv);
1380		} else
1381		if (pNv->Architecture == NV_TESLA) {
1382			ret = nv50_xv_image_put(pScrn, pPriv->video_mem,
1383						offset, uv_offset,
1384						id, dstPitch, &dstBox, 0, 0,
1385						xb, yb, npixels, nlines,
1386						src_w, src_h, drw_w, drw_h,
1387						clipBoxes, ppix, pPriv);
1388		} else {
1389			ret = nvc0_xv_image_put(pScrn, pPriv->video_mem,
1390						offset, uv_offset,
1391						id, dstPitch, &dstBox, 0, 0,
1392						xb, yb, npixels, nlines,
1393						src_w, src_h, drw_w, drw_h,
1394						clipBoxes, ppix, pPriv);
1395		}
1396
1397		if (ret != Success)
1398			return ret;
1399	} else {
1400		ret = NVPutBlitImage(pScrn, pPriv->video_mem, offset, id,
1401				     dstPitch, &dstBox, 0, 0, xb, yb, npixels,
1402				     nlines, src_w, src_h, drw_w, drw_h,
1403				     clipBoxes, ppix);
1404		if (ret != Success)
1405			return ret;
1406	}
1407
1408#ifdef COMPOSITE
1409	/* Damage tracking */
1410	if (!(action_flags & USE_OVERLAY))
1411		DamageDamageRegion(&ppix->drawable, clipBoxes);
1412#endif
1413
1414	return Success;
1415}
1416
1417/**
1418 * QueryImageAttributes
1419 *
1420 * calculates
1421 * - size (memory required to store image),
1422 * - pitches,
1423 * - offsets
1424 * of image
1425 * depending on colorspace (id) and dimensions (w,h) of image
1426 * values of
1427 * - w,
1428 * - h
1429 * may be adjusted as needed
1430 *
1431 * @param pScrn unused
1432 * @param id colorspace of image
1433 * @param w pointer to width of image
1434 * @param h pointer to height of image
1435 * @param pitches pitches[i] = length of a scanline in plane[i]
1436 * @param offsets offsets[i] = offset of plane i from the beginning of the image
1437 * @return size of the memory required for the XvImage queried
1438 */
1439static int
1440NVQueryImageAttributes(ScrnInfoPtr pScrn, int id,
1441		       unsigned short *w, unsigned short *h,
1442		       int *pitches, int *offsets)
1443{
1444	int size, tmp;
1445
1446	*w = (*w + 1) & ~1; // width rounded up to an even number
1447	if (offsets)
1448		offsets[0] = 0;
1449
1450	switch (id) {
1451	case FOURCC_YV12:
1452	case FOURCC_I420:
1453		*h = (*h + 1) & ~1; // height rounded up to an even number
1454		size = (*w + 3) & ~3; // width rounded up to a multiple of 4
1455		if (pitches)
1456			pitches[0] = size; // width rounded up to a multiple of 4
1457		size *= *h;
1458		if (offsets)
1459			offsets[1] = size; // number of pixels in "rounded up" image
1460		tmp = ((*w >> 1) + 3) & ~3; // width/2 rounded up to a multiple of 4
1461		if (pitches)
1462			pitches[1] = pitches[2] = tmp; // width/2 rounded up to a multiple of 4
1463		tmp *= (*h >> 1); // 1/4*number of pixels in "rounded up" image
1464		size += tmp; // 5/4*number of pixels in "rounded up" image
1465		if (offsets)
1466			offsets[2] = size; // 5/4*number of pixels in "rounded up" image
1467		size += tmp; // = 3/2*number of pixels in "rounded up" image
1468		break;
1469	case FOURCC_UYVY:
1470	case FOURCC_YUY2:
1471		size = *w << 1; // 2*width
1472		if (pitches)
1473			pitches[0] = size; // 2*width
1474		size *= *h; // 2*width*height
1475		break;
1476	case FOURCC_RGB:
1477		size = *w << 2; // 4*width (32 bit per pixel)
1478		if (pitches)
1479			pitches[0] = size; // 4*width
1480		size *= *h; // 4*width*height
1481		break;
1482	case FOURCC_AI44:
1483	case FOURCC_IA44:
1484		size = *w; // width
1485		if (pitches)
1486			pitches[0] = size; // width
1487		size *= *h; // width*height
1488		break;
1489	default:
1490		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Unknown colorspace: %x\n", id);
1491		*w = *h = size = 0;
1492		break;
1493	}
1494
1495	return size;
1496}
1497
1498/***** Exported offscreen surface stuff ****/
1499
1500
1501static int
1502NVAllocSurface(ScrnInfoPtr pScrn, int id,
1503	       unsigned short w, unsigned short h,
1504	       XF86SurfacePtr surface)
1505{
1506	NVPtr pNv = NVPTR(pScrn);
1507	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1508	int size, bpp, ret;
1509
1510	bpp = pScrn->bitsPerPixel >> 3;
1511
1512	if (pPriv->grabbedByV4L)
1513		return BadAlloc;
1514
1515	if ((w > IMAGE_MAX_W) || (h > IMAGE_MAX_H))
1516		return BadValue;
1517
1518	w = (w + 1) & ~1;
1519	pPriv->pitch = ((w << 1) + 63) & ~63;
1520	size = h * pPriv->pitch / bpp;
1521
1522	ret = nouveau_xv_bo_realloc(pScrn, NOUVEAU_BO_VRAM, size,
1523				    &pPriv->video_mem);
1524	if (ret)
1525		return BadAlloc;
1526	pPriv->offset = 0;
1527
1528	surface->width = w;
1529	surface->height = h;
1530	surface->pScrn = pScrn;
1531	surface->pitches = &pPriv->pitch;
1532	surface->offsets = &pPriv->offset;
1533	surface->devPrivate.ptr = (pointer)pPriv;
1534	surface->id = id;
1535
1536	/* grab the video */
1537	NVStopOverlay(pScrn);
1538	pPriv->videoStatus = 0;
1539	REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
1540	pPriv->grabbedByV4L = TRUE;
1541
1542	return Success;
1543}
1544
1545static int
1546NVStopSurface(XF86SurfacePtr surface)
1547{
1548	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1549
1550	if (pPriv->grabbedByV4L && pPriv->videoStatus) {
1551		NV10StopOverlay(surface->pScrn);
1552		pPriv->videoStatus = 0;
1553	}
1554
1555	return Success;
1556}
1557
1558static int
1559NVFreeSurface(XF86SurfacePtr surface)
1560{
1561	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1562
1563	if (pPriv->grabbedByV4L) {
1564		NVStopSurface(surface);
1565		NVFreeOverlayMemory(surface->pScrn);
1566		pPriv->grabbedByV4L = FALSE;
1567	}
1568
1569	return Success;
1570}
1571
1572static int
1573NVGetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 *value)
1574{
1575	NVPtr pNv = NVPTR(pScrn);
1576	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1577
1578	return NV10GetOverlayPortAttribute(pScrn, attribute,
1579					 value, (pointer)pPriv);
1580}
1581
1582static int
1583NVSetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 value)
1584{
1585	NVPtr pNv = NVPTR(pScrn);
1586	NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1587
1588	return NV10SetOverlayPortAttribute(pScrn, attribute,
1589					 value, (pointer)pPriv);
1590}
1591
1592static int
1593NVDisplaySurface(XF86SurfacePtr surface,
1594		 short src_x, short src_y,
1595		 short drw_x, short drw_y,
1596		 short src_w, short src_h,
1597		 short drw_w, short drw_h,
1598		 RegionPtr clipBoxes)
1599{
1600	ScrnInfoPtr pScrn = surface->pScrn;
1601	NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1602	INT32 xa, xb, ya, yb;
1603	BoxRec dstBox;
1604
1605	if (!pPriv->grabbedByV4L)
1606		return Success;
1607
1608	if (src_w > (drw_w << 3))
1609		drw_w = src_w >> 3;
1610	if (src_h > (drw_h << 3))
1611		drw_h = src_h >> 3;
1612
1613	/* Clip */
1614	xa = src_x;
1615	xb = src_x + src_w;
1616	ya = src_y;
1617	yb = src_y + src_h;
1618
1619	dstBox.x1 = drw_x;
1620	dstBox.x2 = drw_x + drw_w;
1621	dstBox.y1 = drw_y;
1622	dstBox.y2 = drw_y + drw_h;
1623
1624	if(!xf86XVClipVideoHelper(&dstBox, &xa, &xb, &ya, &yb, clipBoxes,
1625				  surface->width, surface->height))
1626		return Success;
1627
1628	dstBox.x1 -= pScrn->frameX0;
1629	dstBox.x2 -= pScrn->frameX0;
1630	dstBox.y1 -= pScrn->frameY0;
1631	dstBox.y2 -= pScrn->frameY0;
1632
1633	pPriv->currentBuffer = 0;
1634
1635	NV10PutOverlayImage(pScrn, pPriv->video_mem, surface->offsets[0],
1636			    0, surface->id, surface->pitches[0], &dstBox,
1637			    xa, ya, xb, yb, surface->width, surface->height,
1638			    src_w, src_h, drw_w, drw_h, clipBoxes);
1639
1640	return Success;
1641}
1642
1643/**
1644 * NVSetupBlitVideo
1645 * this function does all the work setting up a blit port
1646 *
1647 * @return blit port
1648 */
1649static XF86VideoAdaptorPtr
1650NVSetupBlitVideo (ScreenPtr pScreen)
1651{
1652	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1653	NVPtr               pNv       = NVPTR(pScrn);
1654	XF86VideoAdaptorPtr adapt;
1655	NVPortPrivPtr       pPriv;
1656	int i;
1657
1658	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1659					sizeof(NVPortPrivRec) +
1660					(sizeof(DevUnion) * NUM_BLIT_PORTS)))) {
1661		return NULL;
1662	}
1663
1664	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1665	adapt->flags		= 0;
1666	adapt->name		= "NV Video Blitter";
1667	adapt->nEncodings	= 1;
1668	adapt->pEncodings	= &DummyEncoding;
1669	adapt->nFormats		= NUM_FORMATS_ALL;
1670	adapt->pFormats		= NVFormats;
1671	adapt->nPorts		= NUM_BLIT_PORTS;
1672	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1673
1674	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_BLIT_PORTS]);
1675	for(i = 0; i < NUM_BLIT_PORTS; i++)
1676		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1677
1678	if (pNv->dev->chipset >= 0x11) {
1679		adapt->pAttributes = NVBlitAttributes;
1680		adapt->nAttributes = NUM_BLIT_ATTRIBUTES;
1681	} else {
1682		adapt->pAttributes = NULL;
1683		adapt->nAttributes = 0;
1684	}
1685
1686	adapt->pImages			= NVImages;
1687	adapt->nImages			= NUM_IMAGES_ALL;
1688	adapt->PutVideo			= NULL;
1689	adapt->PutStill			= NULL;
1690	adapt->GetVideo			= NULL;
1691	adapt->GetStill			= NULL;
1692	adapt->StopVideo		= NVStopBlitVideo;
1693	adapt->SetPortAttribute		= NVSetBlitPortAttribute;
1694	adapt->GetPortAttribute		= NVGetBlitPortAttribute;
1695	adapt->QueryBestSize		= NVQueryBestSize;
1696	adapt->PutImage			= NVPutImage;
1697	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1698
1699	pPriv->videoStatus		= 0;
1700	pPriv->grabbedByV4L		= FALSE;
1701	pPriv->blitter			= TRUE;
1702	pPriv->texture			= FALSE;
1703	pPriv->bicubic			= FALSE;
1704	pPriv->doubleBuffer		= FALSE;
1705	pPriv->SyncToVBlank		= (pNv->dev->chipset >= 0x11);
1706	pPriv->max_image_dim            = 2048;
1707
1708	pNv->blitAdaptor		= adapt;
1709
1710	return adapt;
1711}
1712
1713/**
1714 * NVSetupOverlayVideo
1715 * this function does all the work setting up an overlay port
1716 *
1717 * @return overlay port
1718 */
1719static XF86VideoAdaptorPtr
1720NVSetupOverlayVideoAdapter(ScreenPtr pScreen)
1721{
1722	ScrnInfoPtr         pScrn = xf86ScreenToScrn(pScreen);
1723	NVPtr               pNv       = NVPTR(pScrn);
1724	XF86VideoAdaptorPtr adapt;
1725	NVPortPrivPtr       pPriv;
1726
1727	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1728					sizeof(NVPortPrivRec) +
1729					sizeof(DevUnion)))) {
1730		return NULL;
1731	}
1732
1733	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1734	adapt->flags		= VIDEO_OVERLAID_IMAGES;
1735	adapt->name		= "NV Video Overlay";
1736	adapt->nEncodings	= 1;
1737	adapt->pEncodings	= &DummyEncoding;
1738	adapt->nFormats		= NUM_FORMATS_ALL;
1739	adapt->pFormats		= NVFormats;
1740	adapt->nPorts		= 1;
1741	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1742
1743	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[1]);
1744	adapt->pPortPrivates[0].ptr	= (pointer)(pPriv);
1745
1746	adapt->pAttributes		= (pNv->Architecture != NV_ARCH_04) ? NV10OverlayAttributes : NV04OverlayAttributes;
1747	adapt->nAttributes		= (pNv->Architecture != NV_ARCH_04) ? NUM_NV10_OVERLAY_ATTRIBUTES : NUM_NV04_OVERLAY_ATTRIBUTES;
1748	adapt->pImages			= NVImages;
1749	adapt->nImages			= NUM_IMAGES_YUV;
1750	adapt->PutVideo			= NULL;
1751	adapt->PutStill			= NULL;
1752	adapt->GetVideo			= NULL;
1753	adapt->GetStill			= NULL;
1754	adapt->StopVideo		= NVStopOverlayVideo;
1755	adapt->SetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10SetOverlayPortAttribute : NV04SetOverlayPortAttribute;
1756	adapt->GetPortAttribute		= (pNv->Architecture != NV_ARCH_04) ? NV10GetOverlayPortAttribute : NV04GetOverlayPortAttribute;
1757	adapt->QueryBestSize		= NVQueryBestSize;
1758	adapt->PutImage			= NVPutImage;
1759	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1760
1761	pPriv->videoStatus		= 0;
1762	pPriv->currentBuffer		= 0;
1763	pPriv->grabbedByV4L		= FALSE;
1764	pPriv->blitter			= FALSE;
1765	pPriv->texture			= FALSE;
1766	pPriv->bicubic			= FALSE;
1767	pPriv->max_image_dim            = 2048;
1768
1769	NVSetPortDefaults (pScrn, pPriv);
1770
1771	/* gotta uninit this someplace */
1772	REGION_NULL(pScreen, &pPriv->clip);
1773
1774	pNv->overlayAdaptor	= adapt;
1775
1776	xvBrightness		= MAKE_ATOM("XV_BRIGHTNESS");
1777	xvColorKey		= MAKE_ATOM("XV_COLORKEY");
1778	xvAutopaintColorKey     = MAKE_ATOM("XV_AUTOPAINT_COLORKEY");
1779	xvSetDefaults           = MAKE_ATOM("XV_SET_DEFAULTS");
1780
1781	if ( pNv->Architecture != NV_ARCH_04 )
1782		{
1783		xvDoubleBuffer		= MAKE_ATOM("XV_DOUBLE_BUFFER");
1784		xvContrast		= MAKE_ATOM("XV_CONTRAST");
1785		xvSaturation		= MAKE_ATOM("XV_SATURATION");
1786		xvHue			= MAKE_ATOM("XV_HUE");
1787		xvITURBT709		= MAKE_ATOM("XV_ITURBT_709");
1788		xvOnCRTCNb		= MAKE_ATOM("XV_ON_CRTC_NB");
1789		NV10WriteOverlayParameters(pScrn);
1790		}
1791
1792	return adapt;
1793}
1794
1795
1796XF86OffscreenImageRec NVOffscreenImages[2] = {
1797	{
1798		&NVImages[0],
1799		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1800		NVAllocSurface,
1801		NVFreeSurface,
1802		NVDisplaySurface,
1803		NVStopSurface,
1804		NVGetSurfaceAttribute,
1805		NVSetSurfaceAttribute,
1806		IMAGE_MAX_W, IMAGE_MAX_H,
1807		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1808		&NV10OverlayAttributes[1]
1809	},
1810	{
1811		&NVImages[2],
1812		VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1813		NVAllocSurface,
1814		NVFreeSurface,
1815		NVDisplaySurface,
1816		NVStopSurface,
1817		NVGetSurfaceAttribute,
1818		NVSetSurfaceAttribute,
1819		IMAGE_MAX_W, IMAGE_MAX_H,
1820		NUM_NV10_OVERLAY_ATTRIBUTES - 1,
1821		&NV10OverlayAttributes[1]
1822	}
1823};
1824
1825static void
1826NVInitOffscreenImages (ScreenPtr pScreen)
1827{
1828	xf86XVRegisterOffscreenImages(pScreen, NVOffscreenImages, 2);
1829}
1830
1831/**
1832 * NVChipsetHasOverlay
1833 *
1834 * newer chips don't support overlay anymore.
1835 * overlay feature is emulated via textures.
1836 *
1837 * @param pNv
1838 * @return true, if chipset supports overlay
1839 */
1840static Bool
1841NVChipsetHasOverlay(NVPtr pNv)
1842{
1843	switch (pNv->Architecture) {
1844	case NV_ARCH_04: /*NV04 has a different overlay than NV10+*/
1845	case NV_ARCH_10:
1846	case NV_ARCH_20:
1847	case NV_ARCH_30:
1848		return TRUE;
1849	case NV_ARCH_40:
1850		if (pNv->dev->chipset == 0x40)
1851			return TRUE;
1852		break;
1853	default:
1854		break;
1855	}
1856
1857	return FALSE;
1858}
1859
1860/**
1861 * NVSetupOverlayVideo
1862 * check if chipset supports Overla
1863 * if so, setup overlay port
1864 *
1865 * @return overlay port
1866 * @see NVChipsetHasOverlay(NVPtr pNv)
1867 * @see NV10SetupOverlayVideo(ScreenPtr pScreen)
1868 * @see NVInitOffscreenImages(ScreenPtr pScreen)
1869 */
1870static XF86VideoAdaptorPtr
1871NVSetupOverlayVideo(ScreenPtr pScreen)
1872{
1873	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
1874	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
1875	NVPtr                pNv   = NVPTR(pScrn);
1876
1877	if (1 /*pNv->kms_enable*/ || !NVChipsetHasOverlay(pNv))
1878		return NULL;
1879
1880	overlayAdaptor = NVSetupOverlayVideoAdapter(pScreen);
1881	/* I am not sure what this call does. */
1882	if (overlayAdaptor && pNv->Architecture != NV_ARCH_04 )
1883		NVInitOffscreenImages(pScreen);
1884
1885	#ifdef COMPOSITE
1886	if (!noCompositeExtension) {
1887		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1888			   "Xv: Composite is enabled, enabling overlay with "
1889			   "smart blitter fallback\n");
1890		overlayAdaptor->name = "NV Video Overlay with Composite";
1891	}
1892	#endif
1893
1894	return overlayAdaptor;
1895}
1896
1897/**
1898 * NV30 texture adapter.
1899 */
1900
1901#define NUM_FORMAT_TEXTURED 2
1902
1903static XF86ImageRec NV30TexturedImages[NUM_FORMAT_TEXTURED] =
1904{
1905	XVIMAGE_YV12,
1906	XVIMAGE_I420,
1907};
1908
1909/**
1910 * NV30SetupTexturedVideo
1911 * this function does all the work setting up textured video port
1912 *
1913 * @return texture port
1914 */
1915static XF86VideoAdaptorPtr
1916NV30SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
1917{
1918	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1919	NVPtr pNv = NVPTR(pScrn);
1920	XF86VideoAdaptorPtr adapt;
1921	NVPortPrivPtr pPriv;
1922	int i;
1923
1924	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
1925				 sizeof(NVPortPrivRec) +
1926				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
1927		return NULL;
1928	}
1929
1930	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
1931	adapt->flags		= 0;
1932	if (bicubic)
1933		adapt->name		= "NV30 high quality adapter";
1934	else
1935		adapt->name		= "NV30 texture adapter";
1936	adapt->nEncodings	= 1;
1937	adapt->pEncodings	= &DummyEncodingTex;
1938	adapt->nFormats		= NUM_FORMATS_ALL;
1939	adapt->pFormats		= NVFormats;
1940	adapt->nPorts		= NUM_TEXTURE_PORTS;
1941	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
1942
1943	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
1944	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
1945		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1946
1947	adapt->pAttributes		= NVTexturedAttributes;
1948	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
1949	adapt->pImages			= NV30TexturedImages;
1950	adapt->nImages			= NUM_FORMAT_TEXTURED;
1951	adapt->PutVideo			= NULL;
1952	adapt->PutStill			= NULL;
1953	adapt->GetVideo			= NULL;
1954	adapt->GetStill			= NULL;
1955	adapt->StopVideo		= NV30StopTexturedVideo;
1956	adapt->SetPortAttribute		= NV30SetTexturePortAttribute;
1957	adapt->GetPortAttribute		= NV30GetTexturePortAttribute;
1958	adapt->QueryBestSize		= NVQueryBestSize;
1959	adapt->PutImage			= NVPutImage;
1960	adapt->QueryImageAttributes	= NVQueryImageAttributes;
1961
1962	pPriv->videoStatus		= 0;
1963	pPriv->grabbedByV4L		= FALSE;
1964	pPriv->blitter			= FALSE;
1965	pPriv->texture			= TRUE;
1966	pPriv->bicubic			= bicubic;
1967	pPriv->doubleBuffer		= FALSE;
1968	pPriv->SyncToVBlank		= TRUE;
1969	pPriv->max_image_dim            = 4096;
1970
1971	if (bicubic)
1972		pNv->textureAdaptor[1]	= adapt;
1973	else
1974		pNv->textureAdaptor[0]	= adapt;
1975
1976	return adapt;
1977}
1978
1979/**
1980 * NV40 texture adapter.
1981 */
1982
1983#define NUM_FORMAT_TEXTURED 2
1984
1985static XF86ImageRec NV40TexturedImages[NUM_FORMAT_TEXTURED] =
1986{
1987	XVIMAGE_YV12,
1988	XVIMAGE_I420,
1989};
1990
1991/**
1992 * NV40SetupTexturedVideo
1993 * this function does all the work setting up textured video port
1994 *
1995 * @return texture port
1996 */
1997static XF86VideoAdaptorPtr
1998NV40SetupTexturedVideo (ScreenPtr pScreen, Bool bicubic)
1999{
2000	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2001	NVPtr pNv = NVPTR(pScrn);
2002	XF86VideoAdaptorPtr adapt;
2003	NVPortPrivPtr pPriv;
2004	int i;
2005
2006	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2007				 sizeof(NVPortPrivRec) +
2008				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2009		return NULL;
2010	}
2011
2012	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2013	adapt->flags		= 0;
2014	if (bicubic)
2015		adapt->name		= "NV40 high quality adapter";
2016	else
2017		adapt->name		= "NV40 texture adapter";
2018	adapt->nEncodings	= 1;
2019	adapt->pEncodings	= &DummyEncodingTex;
2020	adapt->nFormats		= NUM_FORMATS_ALL;
2021	adapt->pFormats		= NVFormats;
2022	adapt->nPorts		= NUM_TEXTURE_PORTS;
2023	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2024
2025	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2026	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2027		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2028
2029	adapt->pAttributes		= NVTexturedAttributes;
2030	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES;
2031	adapt->pImages			= NV40TexturedImages;
2032	adapt->nImages			= NUM_FORMAT_TEXTURED;
2033	adapt->PutVideo			= NULL;
2034	adapt->PutStill			= NULL;
2035	adapt->GetVideo			= NULL;
2036	adapt->GetStill			= NULL;
2037	adapt->StopVideo		= NV40StopTexturedVideo;
2038	adapt->SetPortAttribute		= NV40SetTexturePortAttribute;
2039	adapt->GetPortAttribute		= NV40GetTexturePortAttribute;
2040	adapt->QueryBestSize		= NVQueryBestSize;
2041	adapt->PutImage			= NVPutImage;
2042	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2043
2044	pPriv->videoStatus		= 0;
2045	pPriv->grabbedByV4L		= FALSE;
2046	pPriv->blitter			= FALSE;
2047	pPriv->texture			= TRUE;
2048	pPriv->bicubic			= bicubic;
2049	pPriv->doubleBuffer		= FALSE;
2050	pPriv->SyncToVBlank		= TRUE;
2051	pPriv->max_image_dim            = 4096;
2052
2053	if (bicubic)
2054		pNv->textureAdaptor[1]	= adapt;
2055	else
2056		pNv->textureAdaptor[0]	= adapt;
2057
2058	return adapt;
2059}
2060
2061static XF86ImageRec
2062NV50TexturedImages[] =
2063{
2064	XVIMAGE_YV12,
2065	XVIMAGE_I420,
2066	XVIMAGE_YUY2,
2067	XVIMAGE_UYVY
2068};
2069
2070static XF86VideoAdaptorPtr
2071NV50SetupTexturedVideo (ScreenPtr pScreen)
2072{
2073	ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2074	NVPtr pNv = NVPTR(pScrn);
2075	XF86VideoAdaptorPtr adapt;
2076	NVPortPrivPtr pPriv;
2077	int i;
2078
2079	if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) +
2080				 sizeof(NVPortPrivRec) +
2081				 (sizeof(DevUnion) * NUM_TEXTURE_PORTS)))) {
2082		return NULL;
2083	}
2084
2085	adapt->type		= XvWindowMask | XvInputMask | XvImageMask;
2086	adapt->flags		= 0;
2087	adapt->name		= "Nouveau GeForce 8/9 Textured Video";
2088	adapt->nEncodings	= 1;
2089	adapt->pEncodings	= &DummyEncodingNV50;
2090	adapt->nFormats		= NUM_FORMATS_ALL;
2091	adapt->pFormats		= NVFormats;
2092	adapt->nPorts		= NUM_TEXTURE_PORTS;
2093	adapt->pPortPrivates	= (DevUnion*)(&adapt[1]);
2094
2095	pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_TEXTURE_PORTS]);
2096	for(i = 0; i < NUM_TEXTURE_PORTS; i++)
2097		adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
2098
2099	adapt->pAttributes		= NVTexturedAttributesNV50;
2100	adapt->nAttributes		= NUM_TEXTURED_ATTRIBUTES_NV50;
2101	adapt->pImages			= NV50TexturedImages;
2102	adapt->nImages			= sizeof(NV50TexturedImages) /
2103					  sizeof(NV50TexturedImages[0]);
2104	adapt->PutVideo			= NULL;
2105	adapt->PutStill			= NULL;
2106	adapt->GetVideo			= NULL;
2107	adapt->GetStill			= NULL;
2108	adapt->StopVideo		= nv50_xv_video_stop;
2109	adapt->SetPortAttribute		= nv50_xv_port_attribute_set;
2110	adapt->GetPortAttribute		= nv50_xv_port_attribute_get;
2111	adapt->QueryBestSize		= NVQueryBestSize;
2112	adapt->PutImage			= NVPutImage;
2113	adapt->QueryImageAttributes	= NVQueryImageAttributes;
2114
2115	pNv->textureAdaptor[0]		= adapt;
2116
2117	nv50_xv_set_port_defaults(pScrn, pPriv);
2118	nv50_xv_csc_update(pScrn, pPriv);
2119
2120	xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
2121	xvContrast   = MAKE_ATOM("XV_CONTRAST");
2122	xvSaturation = MAKE_ATOM("XV_SATURATION");
2123	xvHue        = MAKE_ATOM("XV_HUE");
2124	xvITURBT709  = MAKE_ATOM("XV_ITURBT_709");
2125	return adapt;
2126}
2127
2128void
2129NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor)
2130{
2131	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2132	NVPtr                pNv = NVPTR(pScrn);
2133
2134	if (!pNv->Nv3D)
2135		return;
2136
2137	if (pNv->Architecture == NV_ARCH_30) {
2138		textureAdaptor[0] = NV30SetupTexturedVideo(pScreen, FALSE);
2139		textureAdaptor[1] = NV30SetupTexturedVideo(pScreen, TRUE);
2140	} else
2141	if (pNv->Architecture == NV_ARCH_40) {
2142		textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE);
2143		textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE);
2144	} else
2145	if (pNv->Architecture >= NV_TESLA && pNv->Architecture < NV_MAXWELL) {
2146		textureAdaptor[0] = NV50SetupTexturedVideo(pScreen);
2147	}
2148}
2149
2150/**
2151 * NVInitVideo
2152 * tries to initialize the various supported adapters
2153 * and add them to the list of ports on screen "pScreen".
2154 *
2155 * @param pScreen
2156 * @see NVSetupOverlayVideo(ScreenPtr pScreen)
2157 * @see NVSetupBlitVideo(ScreenPtr pScreen)
2158 */
2159void
2160NVInitVideo(ScreenPtr pScreen)
2161{
2162	ScrnInfoPtr          pScrn = xf86ScreenToScrn(pScreen);
2163	NVPtr                pNv = NVPTR(pScrn);
2164	XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
2165	XF86VideoAdaptorPtr  overlayAdaptor = NULL;
2166	XF86VideoAdaptorPtr  blitAdaptor = NULL;
2167	XF86VideoAdaptorPtr  textureAdaptor[2] = {NULL, NULL};
2168	int                  num_adaptors;
2169
2170	/*
2171	 * Driving the blitter requires the DMA FIFO. Using the FIFO
2172	 * without accel causes DMA errors. While the overlay might
2173	 * might work without accel, we also disable it for now when
2174	 * acceleration is disabled:
2175	 */
2176	if (pScrn->bitsPerPixel != 8 && pNv->AccelMethod == EXA) {
2177		xvSyncToVBlank = MAKE_ATOM("XV_SYNC_TO_VBLANK");
2178
2179		if (pNv->Architecture < NV_TESLA) {
2180			overlayAdaptor = NVSetupOverlayVideo(pScreen);
2181			blitAdaptor    = NVSetupBlitVideo(pScreen);
2182		}
2183
2184		NVSetupTexturedVideo(pScreen, textureAdaptor);
2185	}
2186
2187	num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
2188	if (blitAdaptor || overlayAdaptor || textureAdaptor[0]) {
2189		int size = num_adaptors;
2190
2191		if(overlayAdaptor) size++;
2192		if(blitAdaptor)    size++;
2193		if(textureAdaptor[0]) size++;
2194		if(textureAdaptor[1]) size++;
2195
2196		newAdaptors = malloc(size * sizeof(XF86VideoAdaptorPtr *));
2197		if(newAdaptors) {
2198			if(num_adaptors) {
2199				memcpy(newAdaptors, adaptors, num_adaptors *
2200						sizeof(XF86VideoAdaptorPtr));
2201			}
2202
2203			if(overlayAdaptor) {
2204				newAdaptors[num_adaptors] = overlayAdaptor;
2205				num_adaptors++;
2206			}
2207
2208			if (textureAdaptor[0]) { /* bilinear */
2209				newAdaptors[num_adaptors] = textureAdaptor[0];
2210				num_adaptors++;
2211			}
2212
2213			if (textureAdaptor[1]) { /* bicubic */
2214				newAdaptors[num_adaptors] = textureAdaptor[1];
2215				num_adaptors++;
2216			}
2217
2218			if(blitAdaptor) {
2219				newAdaptors[num_adaptors] = blitAdaptor;
2220				num_adaptors++;
2221			}
2222
2223			adaptors = newAdaptors;
2224		}
2225	}
2226
2227	if (num_adaptors)
2228		xf86XVScreenInit(pScreen, adaptors, num_adaptors);
2229	if (newAdaptors)
2230		free(newAdaptors);
2231
2232	/*
2233	 * For now we associate with the plain texture adapter since it is logical, but we can
2234	 * associate with any/all adapters since VL doesn't depend on Xv for color conversion.
2235	 */
2236	if (textureAdaptor[0]) {
2237		XF86MCAdaptorPtr *adaptorsXvMC = malloc(sizeof(XF86MCAdaptorPtr));
2238
2239		if (adaptorsXvMC) {
2240			adaptorsXvMC[0] = vlCreateAdaptorXvMC(pScreen, textureAdaptor[0]->name);
2241
2242			if (adaptorsXvMC[0]) {
2243				vlInitXvMC(pScreen, 1, adaptorsXvMC);
2244				vlDestroyAdaptorXvMC(adaptorsXvMC[0]);
2245			}
2246
2247			free(adaptorsXvMC);
2248		}
2249	}
2250}
2251
2252void
2253NVTakedownVideo(ScrnInfoPtr pScrn)
2254{
2255	NVPtr pNv = NVPTR(pScrn);
2256
2257	if (pNv->blitAdaptor)
2258		NVFreePortMemory(pScrn, GET_BLIT_PRIVATE(pNv));
2259	if (pNv->textureAdaptor[0]) {
2260		NVFreePortMemory(pScrn,
2261				 pNv->textureAdaptor[0]->pPortPrivates[0].ptr);
2262	}
2263	if (pNv->textureAdaptor[1]) {
2264		NVFreePortMemory(pScrn,
2265				 pNv->textureAdaptor[1]->pPortPrivates[0].ptr);
2266	}
2267}
2268
2269/* The filtering function used for video scaling. We use a cubic filter as
2270 * defined in  "Reconstruction Filters in Computer Graphics" Mitchell &
2271 * Netravali in SIGGRAPH '88
2272 */
2273static float filter_func(float x)
2274{
2275	const double B=0.75;
2276	const double C=(1.0-B)/2.0;
2277	double x1=fabs(x);
2278	double x2=fabs(x)*x1;
2279	double x3=fabs(x)*x2;
2280
2281	if (fabs(x)<1.0)
2282		return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0;
2283	else
2284		return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
2285}
2286
2287static int8_t f32tosb8(float v)
2288{
2289	return (int8_t)(v*127.0);
2290}
2291
2292void
2293NVXVComputeBicubicFilter(struct nouveau_bo *bo, unsigned offset, unsigned size)
2294{
2295	int8_t *t = (int8_t *)(bo->map + offset);
2296	int i;
2297
2298	for(i = 0; i < size; i++) {
2299		float  x = (i + 0.5) / size;
2300		float w0 = filter_func(x+1.0);
2301		float w1 = filter_func(x);
2302		float w2 = filter_func(x-1.0);
2303		float w3 = filter_func(x-2.0);
2304
2305		t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
2306		t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
2307		t[4*i+0]=f32tosb8(w0+w1);
2308		t[4*i+3]=f32tosb8(0.0);
2309	}
2310}
2311