i915_video.c revision fa225cbc
1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "xf86.h"
33#include "xf86_OSproc.h"
34#include "xf86xv.h"
35#include "fourcc.h"
36
37#include "i830.h"
38#include "i830_video.h"
39#include "i915_reg.h"
40#include "i915_3d.h"
41
42void
43I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
44			 RegionPtr dstRegion,
45			 short width, short height, int video_pitch, int video_pitch2,
46			 int x1, int y1, int x2, int y2,
47			 short src_w, short src_h, short drw_w, short drw_h,
48			 PixmapPtr pPixmap)
49{
50   I830Ptr pI830 = I830PTR(pScrn);
51   uint32_t format, ms3, s5;
52   BoxPtr pbox = REGION_RECTS(dstRegion);
53   int nbox_total = REGION_NUM_RECTS(dstRegion);
54   int nbox_this_time;
55   int dxo, dyo, pix_xoff, pix_yoff;
56   Bool planar;
57
58#if 0
59   ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
60	  video_pitch);
61#endif
62
63   switch (id) {
64   case FOURCC_UYVY:
65   case FOURCC_YUY2:
66      planar = FALSE;
67      break;
68   case FOURCC_YV12:
69   case FOURCC_I420:
70      planar = TRUE;
71      break;
72   default:
73      ErrorF("Unknown format 0x%x\n", id);
74      return;
75   }
76
77#define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
78#define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
79#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)
80
81   while (nbox_total) {
82	nbox_this_time = nbox_total;
83	if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(pI830))
84		nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(pI830));
85	nbox_total -= nbox_this_time;
86
87   intel_batch_start_atomic(pScrn, 200 + 20 * nbox_this_time);
88
89   IntelEmitInvarientState(pScrn);
90   pI830->last_3d = LAST_3D_VIDEO;
91
92   BEGIN_BATCH(20);
93
94   /* flush map & render cache */
95   OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
96   OUT_BATCH(0x00000000);
97
98   /* draw rect -- just clipping */
99   OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
100   OUT_BATCH(DRAW_DITHER_OFS_X(pPixmap->drawable.x & 3) |
101	     DRAW_DITHER_OFS_Y(pPixmap->drawable.y & 3));
102   OUT_BATCH(0x00000000);	/* ymin, xmin */
103   OUT_BATCH((pPixmap->drawable.width - 1) |
104	     (pPixmap->drawable.height - 1) << 16); /* ymax, xmax */
105   OUT_BATCH(0x00000000);	/* yorigin, xorigin */
106   OUT_BATCH(MI_NOOP);
107
108   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
109	     I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
110   OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
111	     S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
112	     S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
113	     S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
114	     S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
115	     S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
116	     S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
117	     S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
118   OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
119	     S4_CULLMODE_NONE | S4_VFMT_XY);
120   s5 = 0x0;
121   if (pI830->cpp == 2)
122      s5 |= S5_COLOR_DITHER_ENABLE;
123   OUT_BATCH(s5); /* S5 - enable bits */
124   OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
125	     (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
126	     (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE |
127	     (2 << S6_TRISTRIP_PV_SHIFT));
128
129   OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
130   OUT_BATCH(0x00000000);
131
132   OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
133   if (pI830->cpp == 2)
134      format = COLR_BUF_RGB565;
135   else
136      format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
137
138   OUT_BATCH(LOD_PRECLAMP_OGL |
139	     DSTORG_HORT_BIAS(0x8) |
140	     DSTORG_VERT_BIAS(0x8) |
141	     format);
142
143   /* front buffer, pitch, offset */
144   OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
145   OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_USE_FENCE |
146	     BUF_3D_PITCH(intel_get_pixmap_pitch(pPixmap)));
147   OUT_RELOC_PIXMAP(pPixmap, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
148   ADVANCE_BATCH();
149
150   if (!planar) {
151      FS_LOCALS(10);
152
153      BEGIN_BATCH(16);
154      OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
155      OUT_BATCH(0x0000001);	/* constant 0 */
156      /* constant 0: brightness/contrast */
157      OUT_BATCH_F(pPriv->brightness / 128.0);
158      OUT_BATCH_F(pPriv->contrast / 255.0);
159      OUT_BATCH_F(0.0);
160      OUT_BATCH_F(0.0);
161
162      OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
163      OUT_BATCH(0x00000001);
164      OUT_BATCH(SS2_COLORSPACE_CONVERSION |
165		(FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
166		(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
167      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
168		(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
169		(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
170		SS3_NORMALIZED_COORDS);
171      OUT_BATCH(0x00000000);
172
173      OUT_BATCH(_3DSTATE_MAP_STATE | 3);
174      OUT_BATCH(0x00000001);	/* texture map #1 */
175      if (pPriv->buf)
176          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset);
177      else
178          OUT_BATCH(pPriv->YBuf0offset);
179
180      ms3 = MAPSURF_422 | MS3_USE_FENCE_REGS;
181      switch (id) {
182      case FOURCC_YUY2:
183	 ms3 |= MT_422_YCRCB_NORMAL;
184	 break;
185      case FOURCC_UYVY:
186	 ms3 |= MT_422_YCRCB_SWAPY;
187	 break;
188      }
189      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
190      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
191      OUT_BATCH(ms3);
192      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
193
194      ADVANCE_BATCH();
195
196      FS_BEGIN();
197      i915_fs_dcl(FS_S0);
198      i915_fs_dcl(FS_T0);
199      i915_fs_texld(FS_OC, FS_S0, FS_T0);
200      if (pPriv->brightness != 0) {
201	  i915_fs_add(FS_OC,
202		      i915_fs_operand_reg(FS_OC),
203		      i915_fs_operand(FS_C0, X, X, X, ZERO));
204      }
205      FS_END();
206   } else {
207      FS_LOCALS(16);
208
209      BEGIN_BATCH(22 + 11 + 11);
210      /* For the planar formats, we set up three samplers -- one for each plane,
211       * in a Y8 format.  Because I couldn't get the special PLANAR_TO_PACKED
212       * shader setup to work, I did the manual pixel shader:
213       *
214       * y' = y - .0625
215       * u' = u - .5
216       * v' = v - .5;
217       *
218       * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
219       * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
220       * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
221       *
222       * register assignment:
223       * r0 = (y',u',v',0)
224       * r1 = (y,y,y,y)
225       * r2 = (u,u,u,u)
226       * r3 = (v,v,v,v)
227       * OC = (r,g,b,1)
228       */
229      OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
230      OUT_BATCH(0x000001f);	/* constants 0-4 */
231      /* constant 0: normalization offsets */
232      OUT_BATCH_F(-0.0625);
233      OUT_BATCH_F(-0.5);
234      OUT_BATCH_F(-0.5);
235      OUT_BATCH_F(0.0);
236      /* constant 1: r coefficients*/
237      OUT_BATCH_F(1.1643);
238      OUT_BATCH_F(0.0);
239      OUT_BATCH_F(1.5958);
240      OUT_BATCH_F(0.0);
241      /* constant 2: g coefficients */
242      OUT_BATCH_F(1.1643);
243      OUT_BATCH_F(-0.39173);
244      OUT_BATCH_F(-0.81290);
245      OUT_BATCH_F(0.0);
246      /* constant 3: b coefficients */
247      OUT_BATCH_F(1.1643);
248      OUT_BATCH_F(2.017);
249      OUT_BATCH_F(0.0);
250      OUT_BATCH_F(0.0);
251      /* constant 4: brightness/contrast */
252      OUT_BATCH_F(pPriv->brightness / 128.0);
253      OUT_BATCH_F(pPriv->contrast / 255.0);
254      OUT_BATCH_F(0.0);
255      OUT_BATCH_F(0.0);
256
257      OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
258      OUT_BATCH(0x00000007);
259      /* sampler 0 */
260      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
261	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
262      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
263	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
264	       (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
265	       SS3_NORMALIZED_COORDS);
266      OUT_BATCH(0x00000000);
267      /* sampler 1 */
268      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
269	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
270      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
271	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
272	       (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
273	       SS3_NORMALIZED_COORDS);
274      OUT_BATCH(0x00000000);
275      /* sampler 2 */
276      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
277		(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
278      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
279		(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
280		(2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
281		SS3_NORMALIZED_COORDS);
282      OUT_BATCH(0x00000000);
283
284      OUT_BATCH(_3DSTATE_MAP_STATE | 9);
285      OUT_BATCH(0x00000007);
286
287      if (pPriv->buf)
288          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset);
289      else
290          OUT_BATCH(pPriv->YBuf0offset);
291
292      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
293      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
294      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
295      OUT_BATCH(ms3);
296      /* check to see if Y has special pitch than normal double u/v pitch,
297       * e.g i915 XvMC hw requires at least 1K alignment, so Y pitch might
298       * be same as U/V's.*/
299      if (video_pitch2)
300	  OUT_BATCH(((video_pitch2 / 4) - 1) << MS4_PITCH_SHIFT);
301      else
302	  OUT_BATCH(((video_pitch * 2 / 4) - 1) << MS4_PITCH_SHIFT);
303
304      if (pPriv->buf)
305          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->UBuf0offset);
306      else
307          OUT_BATCH(pPriv->UBuf0offset);
308
309      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
310      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
311      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
312      OUT_BATCH(ms3);
313      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
314
315      if (pPriv->buf)
316          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->VBuf0offset);
317      else
318          OUT_BATCH(pPriv->VBuf0offset);
319
320      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
321      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
322      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
323      OUT_BATCH(ms3);
324      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
325      ADVANCE_BATCH();
326
327      FS_BEGIN();
328      /* Declare samplers */
329      i915_fs_dcl(FS_S0); /* Y */
330      i915_fs_dcl(FS_S1); /* U */
331      i915_fs_dcl(FS_S2); /* V */
332      i915_fs_dcl(FS_T0); /* normalized coords */
333
334      /* Load samplers to temporaries. */
335      i915_fs_texld(FS_R1, FS_S0, FS_T0);
336      i915_fs_texld(FS_R2, FS_S1, FS_T0);
337      i915_fs_texld(FS_R3, FS_S2, FS_T0);
338
339      /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
340      i915_fs_mov_masked(FS_R0, MASK_X, i915_fs_operand_reg(FS_R1));
341      i915_fs_mov_masked(FS_R0, MASK_Y, i915_fs_operand_reg(FS_R2));
342      i915_fs_mov_masked(FS_R0, MASK_Z, i915_fs_operand_reg(FS_R3));
343
344      /* Normalize the YUV data */
345      i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
346                 i915_fs_operand_reg(FS_C0));
347      /* dot-product the YUV data in R0 by the vectors of coefficients for
348       * calculating R, G, and B, storing the results in the R, G, or B
349       * channels of the output color.  The OC results are implicitly clamped
350       * at the end of the program.
351       */
352      i915_fs_dp3_masked(FS_OC, MASK_X,
353                        i915_fs_operand_reg(FS_R0),
354                        i915_fs_operand_reg(FS_C1));
355      i915_fs_dp3_masked(FS_OC, MASK_Y,
356                        i915_fs_operand_reg(FS_R0),
357                        i915_fs_operand_reg(FS_C2));
358      i915_fs_dp3_masked(FS_OC, MASK_Z,
359                        i915_fs_operand_reg(FS_R0),
360                        i915_fs_operand_reg(FS_C3));
361      /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
362       * the source.
363       */
364      i915_fs_mov_masked(FS_OC, MASK_W, i915_fs_operand_one());
365
366      if (pPriv->brightness != 0) {
367	  i915_fs_add(FS_OC,
368		      i915_fs_operand_reg(FS_OC),
369		      i915_fs_operand(FS_C4, X, X, X, ZERO));
370      }
371      FS_END();
372   }
373
374   {
375      BEGIN_BATCH(2);
376      OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
377      OUT_BATCH(0x00000000);
378      ADVANCE_BATCH();
379   }
380
381   /* Set up the offset for translating from the given region (in screen
382    * coordinates) to the backing pixmap.
383    */
384#ifdef COMPOSITE
385   pix_xoff = -pPixmap->screen_x + pPixmap->drawable.x;
386   pix_yoff = -pPixmap->screen_y + pPixmap->drawable.y;
387#else
388   pix_xoff = 0;
389   pix_yoff = 0;
390#endif
391
392   dxo = dstRegion->extents.x1;
393   dyo = dstRegion->extents.y1;
394
395   while (nbox_this_time--)
396   {
397      int box_x1 = pbox->x1;
398      int box_y1 = pbox->y1;
399      int box_x2 = pbox->x2;
400      int box_y2 = pbox->y2;
401      float src_scale_x, src_scale_y;
402
403      pbox++;
404
405      src_scale_x = ((float)src_w / width) / drw_w;
406      src_scale_y  = ((float)src_h / height) / drw_h;
407
408      BEGIN_BATCH(8 + 12);
409      OUT_BATCH(MI_NOOP);
410      OUT_BATCH(MI_NOOP);
411      OUT_BATCH(MI_NOOP);
412      OUT_BATCH(MI_NOOP);
413      OUT_BATCH(MI_NOOP);
414      OUT_BATCH(MI_NOOP);
415      OUT_BATCH(MI_NOOP);
416
417      /* vertex data - rect list consists of bottom right, bottom left, and top
418       * left vertices.
419       */
420      OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 - 1));
421
422      /* bottom right */
423      OUT_BATCH_F(box_x2 + pix_xoff);
424      OUT_BATCH_F(box_y2 + pix_yoff);
425      OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
426      OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
427
428      /* bottom left */
429      OUT_BATCH_F(box_x1 + pix_xoff);
430      OUT_BATCH_F(box_y2 + pix_yoff);
431      OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
432      OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
433
434      /* top left */
435      OUT_BATCH_F(box_x1 + pix_xoff);
436      OUT_BATCH_F(box_y1 + pix_yoff);
437      OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
438      OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
439
440      ADVANCE_BATCH();
441   }
442
443   intel_batch_end_atomic(pScrn);
444   }
445}
446
447