1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "xorg-server.h"
33#include "xf86.h"
34#include "xf86_OSproc.h"
35#include "xf86xv.h"
36#include "fourcc.h"
37#include "gcstruct.h"
38
39#include "intel.h"
40#include "intel_uxa.h"
41#include "i915_reg.h"
42#include "i915_3d.h"
43
44void
45I915DisplayVideoTextured(ScrnInfoPtr scrn,
46			 intel_adaptor_private *adaptor_priv, int id,
47			 RegionPtr dstRegion,
48			 short width, short height, int video_pitch,
49			 int video_pitch2,
50			 short src_w, short src_h, short drw_w, short drw_h,
51			 PixmapPtr pixmap)
52{
53	intel_screen_private *intel = intel_get_screen_private(scrn);
54	uint32_t format, ms3, s5, tiling;
55	BoxPtr pbox = REGION_RECTS(dstRegion);
56	int nbox_total = REGION_NUM_RECTS(dstRegion);
57	int nbox_this_time;
58	int dxo, dyo, pix_xoff, pix_yoff;
59	PixmapPtr target;
60
61#if 0
62	ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
63	       video_pitch);
64#endif
65
66	dxo = dstRegion->extents.x1;
67	dyo = dstRegion->extents.y1;
68
69	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 ||
70	    !intel_uxa_check_pitch_3d(pixmap)) {
71		ScreenPtr screen = pixmap->drawable.pScreen;
72
73		target = screen->CreatePixmap(screen,
74					      dstRegion->extents.x2 - dxo,
75					      dstRegion->extents.y2 - dyo,
76					      pixmap->drawable.depth,
77					      CREATE_PIXMAP_USAGE_SCRATCH);
78		if (target == NULL)
79			return;
80
81		if (intel_uxa_get_pixmap_bo(target) == NULL) {
82			screen->DestroyPixmap(target);
83			return;
84		}
85
86		pix_xoff = -dxo;
87		pix_yoff = -dyo;
88	} else {
89		target = pixmap;
90
91		/* Set up the offset for translating from the given region
92		 * (in screen coordinates) to the backing pixmap.
93		 */
94#ifdef COMPOSITE
95		pix_xoff = -target->screen_x + target->drawable.x;
96		pix_yoff = -target->screen_y + target->drawable.y;
97#else
98		pix_xoff = 0;
99		pix_yoff = 0;
100#endif
101	}
102
103#define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
104#define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
105#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)
106
107	while (nbox_total) {
108		nbox_this_time = nbox_total;
109		if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel))
110			nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
111		nbox_total -= nbox_this_time;
112
113		intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);
114
115		IntelEmitInvarientState(scrn);
116		intel->last_3d = LAST_3D_VIDEO;
117
118		/* draw rect -- just clipping */
119		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
120		OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
121			  DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
122		OUT_BATCH(0x00000000);	/* ymin, xmin */
123		/* ymax, xmax */
124		OUT_BATCH((target->drawable.width - 1) |
125			  (target->drawable.height - 1) << 16);
126		OUT_BATCH(0x00000000);	/* yorigin, xorigin */
127
128		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
129			  I1_LOAD_S(5) | I1_LOAD_S(6) | 2);
130		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
131			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
132			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
133			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
134			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
135			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
136			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
137			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
138		s5 = 0x0;
139		if (intel->cpp == 2)
140			s5 |= S5_COLOR_DITHER_ENABLE;
141		OUT_BATCH(s5);	/* S5 - enable bits */
142		OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
143			  (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
144			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
145			  S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
146
147		OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
148		OUT_BATCH(0x00000000);
149
150		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
151		if (intel->cpp == 2)
152			format = COLR_BUF_RGB565;
153		else
154			format =
155			    COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
156
157		OUT_BATCH(LOD_PRECLAMP_OGL |
158			  DSTORG_HORT_BIAS(0x8) |
159			  DSTORG_VERT_BIAS(0x8) | format);
160
161		/* front buffer, pitch, offset */
162		if (intel_uxa_pixmap_tiled(target)) {
163			tiling = BUF_3D_TILED_SURFACE;
164			if (intel_uxa_get_pixmap_private(target)->tiling == I915_TILING_Y)
165				tiling |= BUF_3D_TILE_WALK_Y;
166		} else
167			tiling = 0;
168		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
169		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling |
170			  BUF_3D_PITCH(intel_pixmap_pitch(target)));
171		OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER,
172				 I915_GEM_DOMAIN_RENDER, 0);
173
174		if (!is_planar_fourcc(id)) {
175			FS_LOCALS();
176
177			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
178			OUT_BATCH(0x0000001);	/* constant 0 */
179			/* constant 0: brightness/contrast */
180			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
181			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
182			OUT_BATCH_F(0.0);
183			OUT_BATCH_F(0.0);
184
185			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
186			OUT_BATCH(0x00000001);
187			OUT_BATCH(SS2_COLORSPACE_CONVERSION |
188				  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
189				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
190			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
191				   SS3_TCX_ADDR_MODE_SHIFT) |
192				  (TEXCOORDMODE_CLAMP_EDGE <<
193				   SS3_TCY_ADDR_MODE_SHIFT) |
194				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
195				  SS3_NORMALIZED_COORDS);
196			OUT_BATCH(0x00000000);
197
198			OUT_BATCH(_3DSTATE_MAP_STATE | 3);
199			OUT_BATCH(0x00000001);	/* texture map #1 */
200			if (adaptor_priv->buf)
201				OUT_RELOC(adaptor_priv->buf,
202					  I915_GEM_DOMAIN_SAMPLER, 0,
203					  adaptor_priv->YBufOffset);
204			else
205				OUT_BATCH(adaptor_priv->YBufOffset);
206
207			ms3 = MAPSURF_422;
208			switch (id) {
209			case FOURCC_YUY2:
210				ms3 |= MT_422_YCRCB_NORMAL;
211				break;
212			case FOURCC_UYVY:
213				ms3 |= MT_422_YCRCB_SWAPY;
214				break;
215			}
216			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
217			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
218			OUT_BATCH(ms3);
219			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
220
221			FS_BEGIN();
222			i915_fs_dcl(FS_S0);
223			i915_fs_dcl(FS_T0);
224			i915_fs_texld(FS_OC, FS_S0, FS_T0);
225			if (adaptor_priv->brightness != 0) {
226				i915_fs_add(FS_OC,
227					    i915_fs_operand_reg(FS_OC),
228					    i915_fs_operand(FS_C0, X, X, X,
229							    ZERO));
230			}
231			FS_END();
232		} else {
233			FS_LOCALS();
234
235			/* For the planar formats, we set up three samplers --
236			 * one for each plane, in a Y8 format.  Because I
237			 * couldn't get the special PLANAR_TO_PACKED
238			 * shader setup to work, I did the manual pixel shader:
239			 *
240			 * y' = y - .0625
241			 * u' = u - .5
242			 * v' = v - .5;
243			 *
244			 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
245			 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
246			 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
247			 *
248			 * register assignment:
249			 * r0 = (y',u',v',0)
250			 * r1 = (y,y,y,y)
251			 * r2 = (u,u,u,u)
252			 * r3 = (v,v,v,v)
253			 * OC = (r,g,b,1)
254			 */
255			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
256			OUT_BATCH(0x000001f);	/* constants 0-4 */
257			/* constant 0: normalization offsets */
258			OUT_BATCH_F(-0.0625);
259			OUT_BATCH_F(-0.5);
260			OUT_BATCH_F(-0.5);
261			OUT_BATCH_F(0.0);
262			/* constant 1: r coefficients */
263			OUT_BATCH_F(1.1643);
264			OUT_BATCH_F(0.0);
265			OUT_BATCH_F(1.5958);
266			OUT_BATCH_F(0.0);
267			/* constant 2: g coefficients */
268			OUT_BATCH_F(1.1643);
269			OUT_BATCH_F(-0.39173);
270			OUT_BATCH_F(-0.81290);
271			OUT_BATCH_F(0.0);
272			/* constant 3: b coefficients */
273			OUT_BATCH_F(1.1643);
274			OUT_BATCH_F(2.017);
275			OUT_BATCH_F(0.0);
276			OUT_BATCH_F(0.0);
277			/* constant 4: brightness/contrast */
278			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
279			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
280			OUT_BATCH_F(0.0);
281			OUT_BATCH_F(0.0);
282
283			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
284			OUT_BATCH(0x00000007);
285			/* sampler 0 */
286			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
287				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
288			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
289				   SS3_TCX_ADDR_MODE_SHIFT) |
290				  (TEXCOORDMODE_CLAMP_EDGE <<
291				   SS3_TCY_ADDR_MODE_SHIFT) |
292				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
293				  SS3_NORMALIZED_COORDS);
294			OUT_BATCH(0x00000000);
295			/* sampler 1 */
296			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
297				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
298			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
299				   SS3_TCX_ADDR_MODE_SHIFT) |
300				  (TEXCOORDMODE_CLAMP_EDGE <<
301				   SS3_TCY_ADDR_MODE_SHIFT) |
302				  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
303				  SS3_NORMALIZED_COORDS);
304			OUT_BATCH(0x00000000);
305			/* sampler 2 */
306			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
307				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
308			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
309				   SS3_TCX_ADDR_MODE_SHIFT) |
310				  (TEXCOORDMODE_CLAMP_EDGE <<
311				   SS3_TCY_ADDR_MODE_SHIFT) |
312				  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
313				  SS3_NORMALIZED_COORDS);
314			OUT_BATCH(0x00000000);
315
316			OUT_BATCH(_3DSTATE_MAP_STATE | 9);
317			OUT_BATCH(0x00000007);
318
319			if (adaptor_priv->buf)
320				OUT_RELOC(adaptor_priv->buf,
321					  I915_GEM_DOMAIN_SAMPLER, 0,
322					  adaptor_priv->YBufOffset);
323			else
324				OUT_BATCH(adaptor_priv->YBufOffset);
325
326			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
327			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
328			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
329			OUT_BATCH(ms3);
330			/* check to see if Y has special pitch than normal
331			 * double u/v pitch, e.g i915 XvMC hw requires at
332			 * least 1K alignment, so Y pitch might
333			 * be same as U/V's.*/
334			if (video_pitch2)
335				OUT_BATCH(((video_pitch2 / 4) -
336					   1) << MS4_PITCH_SHIFT);
337			else
338				OUT_BATCH(((video_pitch * 2 / 4) -
339					   1) << MS4_PITCH_SHIFT);
340
341			if (adaptor_priv->buf)
342				OUT_RELOC(adaptor_priv->buf,
343					  I915_GEM_DOMAIN_SAMPLER, 0,
344					  adaptor_priv->UBufOffset);
345			else
346				OUT_BATCH(adaptor_priv->UBufOffset);
347
348			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
349			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
350			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
351			OUT_BATCH(ms3);
352			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
353
354			if (adaptor_priv->buf)
355				OUT_RELOC(adaptor_priv->buf,
356					  I915_GEM_DOMAIN_SAMPLER, 0,
357					  adaptor_priv->VBufOffset);
358			else
359				OUT_BATCH(adaptor_priv->VBufOffset);
360
361			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
362			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
363			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
364			OUT_BATCH(ms3);
365			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
366
367			FS_BEGIN();
368			/* Declare samplers */
369			i915_fs_dcl(FS_S0);	/* Y */
370			i915_fs_dcl(FS_S1);	/* U */
371			i915_fs_dcl(FS_S2);	/* V */
372			i915_fs_dcl(FS_T0);	/* normalized coords */
373
374			/* Load samplers to temporaries. */
375			i915_fs_texld(FS_R1, FS_S0, FS_T0);
376			i915_fs_texld(FS_R2, FS_S1, FS_T0);
377			i915_fs_texld(FS_R3, FS_S2, FS_T0);
378
379			/* Move the sampled YUV data in R[123] to the first
380			 * 3 channels of R0.
381			 */
382			i915_fs_mov_masked(FS_R0, MASK_X,
383					   i915_fs_operand_reg(FS_R1));
384			i915_fs_mov_masked(FS_R0, MASK_Y,
385					   i915_fs_operand_reg(FS_R2));
386			i915_fs_mov_masked(FS_R0, MASK_Z,
387					   i915_fs_operand_reg(FS_R3));
388
389			/* Normalize the YUV data */
390			i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
391				    i915_fs_operand_reg(FS_C0));
392			/* dot-product the YUV data in R0 by the vectors of
393			 * coefficients for calculating R, G, and B, storing
394			 * the results in the R, G, or B channels of the output
395			 * color.  The OC results are implicitly clamped
396			 * at the end of the program.
397			 */
398			i915_fs_dp3(FS_OC, MASK_X,
399				    i915_fs_operand_reg(FS_R0),
400				    i915_fs_operand_reg(FS_C1));
401			i915_fs_dp3(FS_OC, MASK_Y,
402				    i915_fs_operand_reg(FS_R0),
403				    i915_fs_operand_reg(FS_C2));
404			i915_fs_dp3(FS_OC, MASK_Z,
405				    i915_fs_operand_reg(FS_R0),
406				    i915_fs_operand_reg(FS_C3));
407			/* Set alpha of the output to 1.0, by wiring W to 1
408			 * and not actually using the source.
409			 */
410			i915_fs_mov_masked(FS_OC, MASK_W,
411					   i915_fs_operand_one());
412
413			if (adaptor_priv->brightness != 0) {
414				i915_fs_add(FS_OC,
415					    i915_fs_operand_reg(FS_OC),
416					    i915_fs_operand(FS_C4, X, X, X,
417							    ZERO));
418			}
419			FS_END();
420		}
421
422		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
423		while (nbox_this_time--) {
424			int box_x1 = pbox->x1;
425			int box_y1 = pbox->y1;
426			int box_x2 = pbox->x2;
427			int box_y2 = pbox->y2;
428			float src_scale_x, src_scale_y;
429
430			pbox++;
431
432			src_scale_x = ((float)src_w / width) / drw_w;
433			src_scale_y = ((float)src_h / height) / drw_h;
434
435			/* vertex data - rect list consists of bottom right,
436			 * bottom left, and top left vertices.
437			 */
438
439			/* bottom right */
440			OUT_BATCH_F(box_x2 + pix_xoff);
441			OUT_BATCH_F(box_y2 + pix_yoff);
442			OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
443			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
444
445			/* bottom left */
446			OUT_BATCH_F(box_x1 + pix_xoff);
447			OUT_BATCH_F(box_y2 + pix_yoff);
448			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
449			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
450
451			/* top left */
452			OUT_BATCH_F(box_x1 + pix_xoff);
453			OUT_BATCH_F(box_y1 + pix_yoff);
454			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
455			OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
456		}
457
458		intel_batch_end_atomic(scrn);
459	}
460
461	if (target != pixmap) {
462		GCPtr gc;
463
464		gc = GetScratchGC(pixmap->drawable.depth,
465				  pixmap->drawable.pScreen);
466		if (gc) {
467			gc->subWindowMode = ClipByChildren;
468
469			if (REGION_NUM_RECTS(dstRegion) > 1) {
470				RegionPtr tmp;
471
472				tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0);
473				if (tmp) {
474					REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion);
475					gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0);
476				}
477			}
478
479			ValidateGC(&pixmap->drawable, gc);
480			gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc,
481					  0, 0,
482					  target->drawable.width,
483					  target->drawable.height,
484					  -pix_xoff, -pix_yoff);
485			FreeScratchGC(gc);
486		}
487
488		target->drawable.pScreen->DestroyPixmap(target);
489	}
490
491	intel_uxa_debug_flush(scrn);
492}
493