i915_video.c revision 03b705cf
1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "xf86.h"
33#include "xf86_OSproc.h"
34#include "xf86xv.h"
35#include "fourcc.h"
36#include "gcstruct.h"
37
38#include "intel.h"
39#include "intel_video.h"
40#include "i915_reg.h"
41#include "i915_3d.h"
42
43void
44I915DisplayVideoTextured(ScrnInfoPtr scrn,
45			 intel_adaptor_private *adaptor_priv, int id,
46			 RegionPtr dstRegion,
47			 short width, short height, int video_pitch,
48			 int video_pitch2,
49			 short src_w, short src_h, short drw_w, short drw_h,
50			 PixmapPtr pixmap)
51{
52	intel_screen_private *intel = intel_get_screen_private(scrn);
53	uint32_t format, ms3, s5, tiling;
54	BoxPtr pbox = REGION_RECTS(dstRegion);
55	int nbox_total = REGION_NUM_RECTS(dstRegion);
56	int nbox_this_time;
57	int dxo, dyo, pix_xoff, pix_yoff;
58	PixmapPtr target;
59
60#if 0
61	ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
62	       video_pitch);
63#endif
64
65	dxo = dstRegion->extents.x1;
66	dyo = dstRegion->extents.y1;
67
68	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 ||
69	    !intel_check_pitch_3d(pixmap)) {
70		ScreenPtr screen = pixmap->drawable.pScreen;
71
72		target = screen->CreatePixmap(screen,
73					      dstRegion->extents.x2 - dxo,
74					      dstRegion->extents.y2 - dyo,
75					      pixmap->drawable.depth,
76					      CREATE_PIXMAP_USAGE_SCRATCH);
77		if (target == NULL)
78			return;
79
80		pix_xoff = -dxo;
81		pix_yoff = -dyo;
82	} else {
83		target = pixmap;
84
85		/* Set up the offset for translating from the given region
86		 * (in screen coordinates) to the backing pixmap.
87		 */
88#ifdef COMPOSITE
89		pix_xoff = -target->screen_x + target->drawable.x;
90		pix_yoff = -target->screen_y + target->drawable.y;
91#else
92		pix_xoff = 0;
93		pix_yoff = 0;
94#endif
95	}
96
97#define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
98#define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
99#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)
100
101	while (nbox_total) {
102		nbox_this_time = nbox_total;
103		if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel))
104			nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
105		nbox_total -= nbox_this_time;
106
107		intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);
108
109		IntelEmitInvarientState(scrn);
110		intel->last_3d = LAST_3D_VIDEO;
111
112		/* draw rect -- just clipping */
113		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
114		OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
115			  DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
116		OUT_BATCH(0x00000000);	/* ymin, xmin */
117		/* ymax, xmax */
118		OUT_BATCH((target->drawable.width - 1) |
119			  (target->drawable.height - 1) << 16);
120		OUT_BATCH(0x00000000);	/* yorigin, xorigin */
121
122		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
123			  I1_LOAD_S(5) | I1_LOAD_S(6) | 2);
124		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
125			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
126			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
127			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
128			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
129			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
130			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
131			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
132		s5 = 0x0;
133		if (intel->cpp == 2)
134			s5 |= S5_COLOR_DITHER_ENABLE;
135		OUT_BATCH(s5);	/* S5 - enable bits */
136		OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
137			  (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
138			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
139			  S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
140
141		OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
142		OUT_BATCH(0x00000000);
143
144		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
145		if (intel->cpp == 2)
146			format = COLR_BUF_RGB565;
147		else
148			format =
149			    COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
150
151		OUT_BATCH(LOD_PRECLAMP_OGL |
152			  DSTORG_HORT_BIAS(0x8) |
153			  DSTORG_VERT_BIAS(0x8) | format);
154
155		/* front buffer, pitch, offset */
156		if (intel_pixmap_tiled(target)) {
157			tiling = BUF_3D_TILED_SURFACE;
158			if (intel_get_pixmap_private(target)->tiling == I915_TILING_Y)
159				tiling |= BUF_3D_TILE_WALK_Y;
160		} else
161			tiling = 0;
162		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
163		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling |
164			  BUF_3D_PITCH(intel_pixmap_pitch(target)));
165		OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER,
166				 I915_GEM_DOMAIN_RENDER, 0);
167
168		if (!is_planar_fourcc(id)) {
169			FS_LOCALS();
170
171			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
172			OUT_BATCH(0x0000001);	/* constant 0 */
173			/* constant 0: brightness/contrast */
174			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
175			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
176			OUT_BATCH_F(0.0);
177			OUT_BATCH_F(0.0);
178
179			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
180			OUT_BATCH(0x00000001);
181			OUT_BATCH(SS2_COLORSPACE_CONVERSION |
182				  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
183				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
184			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
185				   SS3_TCX_ADDR_MODE_SHIFT) |
186				  (TEXCOORDMODE_CLAMP_EDGE <<
187				   SS3_TCY_ADDR_MODE_SHIFT) |
188				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
189				  SS3_NORMALIZED_COORDS);
190			OUT_BATCH(0x00000000);
191
192			OUT_BATCH(_3DSTATE_MAP_STATE | 3);
193			OUT_BATCH(0x00000001);	/* texture map #1 */
194			if (adaptor_priv->buf)
195				OUT_RELOC(adaptor_priv->buf,
196					  I915_GEM_DOMAIN_SAMPLER, 0,
197					  adaptor_priv->YBufOffset);
198			else
199				OUT_BATCH(adaptor_priv->YBufOffset);
200
201			ms3 = MAPSURF_422;
202			switch (id) {
203			case FOURCC_YUY2:
204				ms3 |= MT_422_YCRCB_NORMAL;
205				break;
206			case FOURCC_UYVY:
207				ms3 |= MT_422_YCRCB_SWAPY;
208				break;
209			}
210			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
211			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
212			OUT_BATCH(ms3);
213			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
214
215			FS_BEGIN();
216			i915_fs_dcl(FS_S0);
217			i915_fs_dcl(FS_T0);
218			i915_fs_texld(FS_OC, FS_S0, FS_T0);
219			if (adaptor_priv->brightness != 0) {
220				i915_fs_add(FS_OC,
221					    i915_fs_operand_reg(FS_OC),
222					    i915_fs_operand(FS_C0, X, X, X,
223							    ZERO));
224			}
225			FS_END();
226		} else {
227			FS_LOCALS();
228
229			/* For the planar formats, we set up three samplers --
230			 * one for each plane, in a Y8 format.  Because I
231			 * couldn't get the special PLANAR_TO_PACKED
232			 * shader setup to work, I did the manual pixel shader:
233			 *
234			 * y' = y - .0625
235			 * u' = u - .5
236			 * v' = v - .5;
237			 *
238			 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
239			 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
240			 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
241			 *
242			 * register assignment:
243			 * r0 = (y',u',v',0)
244			 * r1 = (y,y,y,y)
245			 * r2 = (u,u,u,u)
246			 * r3 = (v,v,v,v)
247			 * OC = (r,g,b,1)
248			 */
249			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
250			OUT_BATCH(0x000001f);	/* constants 0-4 */
251			/* constant 0: normalization offsets */
252			OUT_BATCH_F(-0.0625);
253			OUT_BATCH_F(-0.5);
254			OUT_BATCH_F(-0.5);
255			OUT_BATCH_F(0.0);
256			/* constant 1: r coefficients */
257			OUT_BATCH_F(1.1643);
258			OUT_BATCH_F(0.0);
259			OUT_BATCH_F(1.5958);
260			OUT_BATCH_F(0.0);
261			/* constant 2: g coefficients */
262			OUT_BATCH_F(1.1643);
263			OUT_BATCH_F(-0.39173);
264			OUT_BATCH_F(-0.81290);
265			OUT_BATCH_F(0.0);
266			/* constant 3: b coefficients */
267			OUT_BATCH_F(1.1643);
268			OUT_BATCH_F(2.017);
269			OUT_BATCH_F(0.0);
270			OUT_BATCH_F(0.0);
271			/* constant 4: brightness/contrast */
272			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
273			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
274			OUT_BATCH_F(0.0);
275			OUT_BATCH_F(0.0);
276
277			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
278			OUT_BATCH(0x00000007);
279			/* sampler 0 */
280			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
281				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
282			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
283				   SS3_TCX_ADDR_MODE_SHIFT) |
284				  (TEXCOORDMODE_CLAMP_EDGE <<
285				   SS3_TCY_ADDR_MODE_SHIFT) |
286				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
287				  SS3_NORMALIZED_COORDS);
288			OUT_BATCH(0x00000000);
289			/* sampler 1 */
290			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
291				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
292			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
293				   SS3_TCX_ADDR_MODE_SHIFT) |
294				  (TEXCOORDMODE_CLAMP_EDGE <<
295				   SS3_TCY_ADDR_MODE_SHIFT) |
296				  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
297				  SS3_NORMALIZED_COORDS);
298			OUT_BATCH(0x00000000);
299			/* sampler 2 */
300			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
301				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
302			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
303				   SS3_TCX_ADDR_MODE_SHIFT) |
304				  (TEXCOORDMODE_CLAMP_EDGE <<
305				   SS3_TCY_ADDR_MODE_SHIFT) |
306				  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
307				  SS3_NORMALIZED_COORDS);
308			OUT_BATCH(0x00000000);
309
310			OUT_BATCH(_3DSTATE_MAP_STATE | 9);
311			OUT_BATCH(0x00000007);
312
313			if (adaptor_priv->buf)
314				OUT_RELOC(adaptor_priv->buf,
315					  I915_GEM_DOMAIN_SAMPLER, 0,
316					  adaptor_priv->YBufOffset);
317			else
318				OUT_BATCH(adaptor_priv->YBufOffset);
319
320			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
321			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
322			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
323			OUT_BATCH(ms3);
324			/* check to see if Y has special pitch than normal
325			 * double u/v pitch, e.g i915 XvMC hw requires at
326			 * least 1K alignment, so Y pitch might
327			 * be same as U/V's.*/
328			if (video_pitch2)
329				OUT_BATCH(((video_pitch2 / 4) -
330					   1) << MS4_PITCH_SHIFT);
331			else
332				OUT_BATCH(((video_pitch * 2 / 4) -
333					   1) << MS4_PITCH_SHIFT);
334
335			if (adaptor_priv->buf)
336				OUT_RELOC(adaptor_priv->buf,
337					  I915_GEM_DOMAIN_SAMPLER, 0,
338					  adaptor_priv->UBufOffset);
339			else
340				OUT_BATCH(adaptor_priv->UBufOffset);
341
342			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
343			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
344			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
345			OUT_BATCH(ms3);
346			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
347
348			if (adaptor_priv->buf)
349				OUT_RELOC(adaptor_priv->buf,
350					  I915_GEM_DOMAIN_SAMPLER, 0,
351					  adaptor_priv->VBufOffset);
352			else
353				OUT_BATCH(adaptor_priv->VBufOffset);
354
355			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
356			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
357			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
358			OUT_BATCH(ms3);
359			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
360
361			FS_BEGIN();
362			/* Declare samplers */
363			i915_fs_dcl(FS_S0);	/* Y */
364			i915_fs_dcl(FS_S1);	/* U */
365			i915_fs_dcl(FS_S2);	/* V */
366			i915_fs_dcl(FS_T0);	/* normalized coords */
367
368			/* Load samplers to temporaries. */
369			i915_fs_texld(FS_R1, FS_S0, FS_T0);
370			i915_fs_texld(FS_R2, FS_S1, FS_T0);
371			i915_fs_texld(FS_R3, FS_S2, FS_T0);
372
373			/* Move the sampled YUV data in R[123] to the first
374			 * 3 channels of R0.
375			 */
376			i915_fs_mov_masked(FS_R0, MASK_X,
377					   i915_fs_operand_reg(FS_R1));
378			i915_fs_mov_masked(FS_R0, MASK_Y,
379					   i915_fs_operand_reg(FS_R2));
380			i915_fs_mov_masked(FS_R0, MASK_Z,
381					   i915_fs_operand_reg(FS_R3));
382
383			/* Normalize the YUV data */
384			i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
385				    i915_fs_operand_reg(FS_C0));
386			/* dot-product the YUV data in R0 by the vectors of
387			 * coefficients for calculating R, G, and B, storing
388			 * the results in the R, G, or B channels of the output
389			 * color.  The OC results are implicitly clamped
390			 * at the end of the program.
391			 */
392			i915_fs_dp3(FS_OC, MASK_X,
393				    i915_fs_operand_reg(FS_R0),
394				    i915_fs_operand_reg(FS_C1));
395			i915_fs_dp3(FS_OC, MASK_Y,
396				    i915_fs_operand_reg(FS_R0),
397				    i915_fs_operand_reg(FS_C2));
398			i915_fs_dp3(FS_OC, MASK_Z,
399				    i915_fs_operand_reg(FS_R0),
400				    i915_fs_operand_reg(FS_C3));
401			/* Set alpha of the output to 1.0, by wiring W to 1
402			 * and not actually using the source.
403			 */
404			i915_fs_mov_masked(FS_OC, MASK_W,
405					   i915_fs_operand_one());
406
407			if (adaptor_priv->brightness != 0) {
408				i915_fs_add(FS_OC,
409					    i915_fs_operand_reg(FS_OC),
410					    i915_fs_operand(FS_C4, X, X, X,
411							    ZERO));
412			}
413			FS_END();
414		}
415
416		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
417		while (nbox_this_time--) {
418			int box_x1 = pbox->x1;
419			int box_y1 = pbox->y1;
420			int box_x2 = pbox->x2;
421			int box_y2 = pbox->y2;
422			float src_scale_x, src_scale_y;
423
424			pbox++;
425
426			src_scale_x = ((float)src_w / width) / drw_w;
427			src_scale_y = ((float)src_h / height) / drw_h;
428
429			/* vertex data - rect list consists of bottom right,
430			 * bottom left, and top left vertices.
431			 */
432
433			/* bottom right */
434			OUT_BATCH_F(box_x2 + pix_xoff);
435			OUT_BATCH_F(box_y2 + pix_yoff);
436			OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
437			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
438
439			/* bottom left */
440			OUT_BATCH_F(box_x1 + pix_xoff);
441			OUT_BATCH_F(box_y2 + pix_yoff);
442			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
443			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
444
445			/* top left */
446			OUT_BATCH_F(box_x1 + pix_xoff);
447			OUT_BATCH_F(box_y1 + pix_yoff);
448			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
449			OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
450		}
451
452		intel_batch_end_atomic(scrn);
453	}
454
455	if (target != pixmap) {
456		GCPtr gc;
457
458		gc = GetScratchGC(pixmap->drawable.depth,
459				  pixmap->drawable.pScreen);
460		if (gc) {
461			gc->subWindowMode = ClipByChildren;
462
463			if (REGION_NUM_RECTS(dstRegion) > 1) {
464				RegionPtr tmp;
465
466				tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0);
467				if (tmp) {
468					REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion);
469					gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0);
470				}
471			}
472
473			ValidateGC(&pixmap->drawable, gc);
474			gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc,
475					  0, 0,
476					  target->drawable.width,
477					  target->drawable.height,
478					  -pix_xoff, -pix_yoff);
479			FreeScratchGC(gc);
480		}
481
482		target->drawable.pScreen->DestroyPixmap(target);
483	}
484
485	intel_debug_flush(scrn);
486}
487