1fa225cbcSrjs/*
2fa225cbcSrjs * Copyright © 2006 Intel Corporation
3fa225cbcSrjs *
4fa225cbcSrjs * Permission is hereby granted, free of charge, to any person obtaining a
5fa225cbcSrjs * copy of this software and associated documentation files (the "Software"),
6fa225cbcSrjs * to deal in the Software without restriction, including without limitation
7fa225cbcSrjs * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fa225cbcSrjs * and/or sell copies of the Software, and to permit persons to whom the
9fa225cbcSrjs * Software is furnished to do so, subject to the following conditions:
10fa225cbcSrjs *
11fa225cbcSrjs * The above copyright notice and this permission notice (including the next
12fa225cbcSrjs * paragraph) shall be included in all copies or substantial portions of the
13fa225cbcSrjs * Software.
14fa225cbcSrjs *
15fa225cbcSrjs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16fa225cbcSrjs * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17fa225cbcSrjs * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18fa225cbcSrjs * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19fa225cbcSrjs * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20fa225cbcSrjs * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21fa225cbcSrjs * SOFTWARE.
22fa225cbcSrjs *
23fa225cbcSrjs * Authors:
24fa225cbcSrjs *    Eric Anholt <eric@anholt.net>
25fa225cbcSrjs *
26fa225cbcSrjs */
27fa225cbcSrjs
28fa225cbcSrjs#ifdef HAVE_CONFIG_H
29fa225cbcSrjs#include "config.h"
30fa225cbcSrjs#endif
31fa225cbcSrjs
32fa225cbcSrjs#include "xf86.h"
33fa225cbcSrjs#include "xf86_OSproc.h"
34fa225cbcSrjs#include "xf86xv.h"
35fa225cbcSrjs#include "fourcc.h"
36fa225cbcSrjs
37fa225cbcSrjs#include "i830.h"
38fa225cbcSrjs#include "i830_video.h"
39fa225cbcSrjs#include "i915_reg.h"
40fa225cbcSrjs#include "i915_3d.h"
41fa225cbcSrjs
42fa225cbcSrjsvoid
43fa225cbcSrjsI915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
44fa225cbcSrjs			 RegionPtr dstRegion,
45fa225cbcSrjs			 short width, short height, int video_pitch, int video_pitch2,
46fa225cbcSrjs			 int x1, int y1, int x2, int y2,
47fa225cbcSrjs			 short src_w, short src_h, short drw_w, short drw_h,
48fa225cbcSrjs			 PixmapPtr pPixmap)
49fa225cbcSrjs{
50fa225cbcSrjs   I830Ptr pI830 = I830PTR(pScrn);
51fa225cbcSrjs   uint32_t format, ms3, s5;
52fa225cbcSrjs   BoxPtr pbox = REGION_RECTS(dstRegion);
53fa225cbcSrjs   int nbox_total = REGION_NUM_RECTS(dstRegion);
54fa225cbcSrjs   int nbox_this_time;
55fa225cbcSrjs   int dxo, dyo, pix_xoff, pix_yoff;
56fa225cbcSrjs   Bool planar;
57fa225cbcSrjs
58fa225cbcSrjs#if 0
59fa225cbcSrjs   ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
60fa225cbcSrjs	  video_pitch);
61fa225cbcSrjs#endif
62fa225cbcSrjs
63fa225cbcSrjs   switch (id) {
64fa225cbcSrjs   case FOURCC_UYVY:
65fa225cbcSrjs   case FOURCC_YUY2:
66fa225cbcSrjs      planar = FALSE;
67fa225cbcSrjs      break;
68fa225cbcSrjs   case FOURCC_YV12:
69fa225cbcSrjs   case FOURCC_I420:
70fa225cbcSrjs      planar = TRUE;
71fa225cbcSrjs      break;
72fa225cbcSrjs   default:
73fa225cbcSrjs      ErrorF("Unknown format 0x%x\n", id);
74fa225cbcSrjs      return;
75fa225cbcSrjs   }
76fa225cbcSrjs
77fa225cbcSrjs#define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
78fa225cbcSrjs#define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
79fa225cbcSrjs#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)
80fa225cbcSrjs
81fa225cbcSrjs   while (nbox_total) {
82fa225cbcSrjs	nbox_this_time = nbox_total;
83fa225cbcSrjs	if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(pI830))
84fa225cbcSrjs		nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(pI830));
85fa225cbcSrjs	nbox_total -= nbox_this_time;
86fa225cbcSrjs
87fa225cbcSrjs   intel_batch_start_atomic(pScrn, 200 + 20 * nbox_this_time);
88fa225cbcSrjs
89fa225cbcSrjs   IntelEmitInvarientState(pScrn);
90fa225cbcSrjs   pI830->last_3d = LAST_3D_VIDEO;
91fa225cbcSrjs
92fa225cbcSrjs   BEGIN_BATCH(20);
93fa225cbcSrjs
94fa225cbcSrjs   /* flush map & render cache */
95fa225cbcSrjs   OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
96fa225cbcSrjs   OUT_BATCH(0x00000000);
97fa225cbcSrjs
98fa225cbcSrjs   /* draw rect -- just clipping */
99fa225cbcSrjs   OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
100fa225cbcSrjs   OUT_BATCH(DRAW_DITHER_OFS_X(pPixmap->drawable.x & 3) |
101fa225cbcSrjs	     DRAW_DITHER_OFS_Y(pPixmap->drawable.y & 3));
102fa225cbcSrjs   OUT_BATCH(0x00000000);	/* ymin, xmin */
103fa225cbcSrjs   OUT_BATCH((pPixmap->drawable.width - 1) |
104fa225cbcSrjs	     (pPixmap->drawable.height - 1) << 16); /* ymax, xmax */
105fa225cbcSrjs   OUT_BATCH(0x00000000);	/* yorigin, xorigin */
106fa225cbcSrjs   OUT_BATCH(MI_NOOP);
107fa225cbcSrjs
108fa225cbcSrjs   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
109fa225cbcSrjs	     I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
110fa225cbcSrjs   OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
111fa225cbcSrjs	     S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
112fa225cbcSrjs	     S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
113fa225cbcSrjs	     S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
114fa225cbcSrjs	     S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
115fa225cbcSrjs	     S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
116fa225cbcSrjs	     S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
117fa225cbcSrjs	     S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
118fa225cbcSrjs   OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
119fa225cbcSrjs	     S4_CULLMODE_NONE | S4_VFMT_XY);
120fa225cbcSrjs   s5 = 0x0;
121fa225cbcSrjs   if (pI830->cpp == 2)
122fa225cbcSrjs      s5 |= S5_COLOR_DITHER_ENABLE;
123fa225cbcSrjs   OUT_BATCH(s5); /* S5 - enable bits */
124fa225cbcSrjs   OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
125fa225cbcSrjs	     (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
126fa225cbcSrjs	     (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE |
127fa225cbcSrjs	     (2 << S6_TRISTRIP_PV_SHIFT));
128fa225cbcSrjs
129fa225cbcSrjs   OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
130fa225cbcSrjs   OUT_BATCH(0x00000000);
131fa225cbcSrjs
132fa225cbcSrjs   OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
133fa225cbcSrjs   if (pI830->cpp == 2)
134fa225cbcSrjs      format = COLR_BUF_RGB565;
135fa225cbcSrjs   else
136fa225cbcSrjs      format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
137fa225cbcSrjs
138fa225cbcSrjs   OUT_BATCH(LOD_PRECLAMP_OGL |
139fa225cbcSrjs	     DSTORG_HORT_BIAS(0x8) |
140fa225cbcSrjs	     DSTORG_VERT_BIAS(0x8) |
141fa225cbcSrjs	     format);
142fa225cbcSrjs
143fa225cbcSrjs   /* front buffer, pitch, offset */
144fa225cbcSrjs   OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
145fa225cbcSrjs   OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_USE_FENCE |
146fa225cbcSrjs	     BUF_3D_PITCH(intel_get_pixmap_pitch(pPixmap)));
147fa225cbcSrjs   OUT_RELOC_PIXMAP(pPixmap, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
148fa225cbcSrjs   ADVANCE_BATCH();
149fa225cbcSrjs
150fa225cbcSrjs   if (!planar) {
151fa225cbcSrjs      FS_LOCALS(10);
152fa225cbcSrjs
153fa225cbcSrjs      BEGIN_BATCH(16);
154fa225cbcSrjs      OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
155fa225cbcSrjs      OUT_BATCH(0x0000001);	/* constant 0 */
156fa225cbcSrjs      /* constant 0: brightness/contrast */
157fa225cbcSrjs      OUT_BATCH_F(pPriv->brightness / 128.0);
158fa225cbcSrjs      OUT_BATCH_F(pPriv->contrast / 255.0);
159fa225cbcSrjs      OUT_BATCH_F(0.0);
160fa225cbcSrjs      OUT_BATCH_F(0.0);
161fa225cbcSrjs
162fa225cbcSrjs      OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
163fa225cbcSrjs      OUT_BATCH(0x00000001);
164fa225cbcSrjs      OUT_BATCH(SS2_COLORSPACE_CONVERSION |
165fa225cbcSrjs		(FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
166fa225cbcSrjs		(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
167fa225cbcSrjs      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
168fa225cbcSrjs		(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
169fa225cbcSrjs		(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
170fa225cbcSrjs		SS3_NORMALIZED_COORDS);
171fa225cbcSrjs      OUT_BATCH(0x00000000);
172fa225cbcSrjs
173fa225cbcSrjs      OUT_BATCH(_3DSTATE_MAP_STATE | 3);
174fa225cbcSrjs      OUT_BATCH(0x00000001);	/* texture map #1 */
175fa225cbcSrjs      if (pPriv->buf)
176fa225cbcSrjs          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset);
177fa225cbcSrjs      else
178fa225cbcSrjs          OUT_BATCH(pPriv->YBuf0offset);
179fa225cbcSrjs
180fa225cbcSrjs      ms3 = MAPSURF_422 | MS3_USE_FENCE_REGS;
181fa225cbcSrjs      switch (id) {
182fa225cbcSrjs      case FOURCC_YUY2:
183fa225cbcSrjs	 ms3 |= MT_422_YCRCB_NORMAL;
184fa225cbcSrjs	 break;
185fa225cbcSrjs      case FOURCC_UYVY:
186fa225cbcSrjs	 ms3 |= MT_422_YCRCB_SWAPY;
187fa225cbcSrjs	 break;
188fa225cbcSrjs      }
189fa225cbcSrjs      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
190fa225cbcSrjs      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
191fa225cbcSrjs      OUT_BATCH(ms3);
192fa225cbcSrjs      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
193fa225cbcSrjs
194fa225cbcSrjs      ADVANCE_BATCH();
195fa225cbcSrjs
196fa225cbcSrjs      FS_BEGIN();
197fa225cbcSrjs      i915_fs_dcl(FS_S0);
198fa225cbcSrjs      i915_fs_dcl(FS_T0);
199fa225cbcSrjs      i915_fs_texld(FS_OC, FS_S0, FS_T0);
200fa225cbcSrjs      if (pPriv->brightness != 0) {
201fa225cbcSrjs	  i915_fs_add(FS_OC,
202fa225cbcSrjs		      i915_fs_operand_reg(FS_OC),
203fa225cbcSrjs		      i915_fs_operand(FS_C0, X, X, X, ZERO));
204fa225cbcSrjs      }
205fa225cbcSrjs      FS_END();
206fa225cbcSrjs   } else {
207fa225cbcSrjs      FS_LOCALS(16);
208fa225cbcSrjs
209fa225cbcSrjs      BEGIN_BATCH(22 + 11 + 11);
210fa225cbcSrjs      /* For the planar formats, we set up three samplers -- one for each plane,
211fa225cbcSrjs       * in a Y8 format.  Because I couldn't get the special PLANAR_TO_PACKED
212fa225cbcSrjs       * shader setup to work, I did the manual pixel shader:
213fa225cbcSrjs       *
214fa225cbcSrjs       * y' = y - .0625
215fa225cbcSrjs       * u' = u - .5
216fa225cbcSrjs       * v' = v - .5;
217fa225cbcSrjs       *
218fa225cbcSrjs       * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
219fa225cbcSrjs       * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
220fa225cbcSrjs       * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
221fa225cbcSrjs       *
222fa225cbcSrjs       * register assignment:
223fa225cbcSrjs       * r0 = (y',u',v',0)
224fa225cbcSrjs       * r1 = (y,y,y,y)
225fa225cbcSrjs       * r2 = (u,u,u,u)
226fa225cbcSrjs       * r3 = (v,v,v,v)
227fa225cbcSrjs       * OC = (r,g,b,1)
228fa225cbcSrjs       */
229fa225cbcSrjs      OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
230fa225cbcSrjs      OUT_BATCH(0x000001f);	/* constants 0-4 */
231fa225cbcSrjs      /* constant 0: normalization offsets */
232fa225cbcSrjs      OUT_BATCH_F(-0.0625);
233fa225cbcSrjs      OUT_BATCH_F(-0.5);
234fa225cbcSrjs      OUT_BATCH_F(-0.5);
235fa225cbcSrjs      OUT_BATCH_F(0.0);
236fa225cbcSrjs      /* constant 1: r coefficients*/
237fa225cbcSrjs      OUT_BATCH_F(1.1643);
238fa225cbcSrjs      OUT_BATCH_F(0.0);
239fa225cbcSrjs      OUT_BATCH_F(1.5958);
240fa225cbcSrjs      OUT_BATCH_F(0.0);
241fa225cbcSrjs      /* constant 2: g coefficients */
242fa225cbcSrjs      OUT_BATCH_F(1.1643);
243fa225cbcSrjs      OUT_BATCH_F(-0.39173);
244fa225cbcSrjs      OUT_BATCH_F(-0.81290);
245fa225cbcSrjs      OUT_BATCH_F(0.0);
246fa225cbcSrjs      /* constant 3: b coefficients */
247fa225cbcSrjs      OUT_BATCH_F(1.1643);
248fa225cbcSrjs      OUT_BATCH_F(2.017);
249fa225cbcSrjs      OUT_BATCH_F(0.0);
250fa225cbcSrjs      OUT_BATCH_F(0.0);
251fa225cbcSrjs      /* constant 4: brightness/contrast */
252fa225cbcSrjs      OUT_BATCH_F(pPriv->brightness / 128.0);
253fa225cbcSrjs      OUT_BATCH_F(pPriv->contrast / 255.0);
254fa225cbcSrjs      OUT_BATCH_F(0.0);
255fa225cbcSrjs      OUT_BATCH_F(0.0);
256fa225cbcSrjs
257fa225cbcSrjs      OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
258fa225cbcSrjs      OUT_BATCH(0x00000007);
259fa225cbcSrjs      /* sampler 0 */
260fa225cbcSrjs      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
261fa225cbcSrjs	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
262fa225cbcSrjs      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
263fa225cbcSrjs	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
264fa225cbcSrjs	       (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
265fa225cbcSrjs	       SS3_NORMALIZED_COORDS);
266fa225cbcSrjs      OUT_BATCH(0x00000000);
267fa225cbcSrjs      /* sampler 1 */
268fa225cbcSrjs      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
269fa225cbcSrjs	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
270fa225cbcSrjs      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
271fa225cbcSrjs	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
272fa225cbcSrjs	       (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
273fa225cbcSrjs	       SS3_NORMALIZED_COORDS);
274fa225cbcSrjs      OUT_BATCH(0x00000000);
275fa225cbcSrjs      /* sampler 2 */
276fa225cbcSrjs      OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
277fa225cbcSrjs		(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
278fa225cbcSrjs      OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
279fa225cbcSrjs		(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
280fa225cbcSrjs		(2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
281fa225cbcSrjs		SS3_NORMALIZED_COORDS);
282fa225cbcSrjs      OUT_BATCH(0x00000000);
283fa225cbcSrjs
284fa225cbcSrjs      OUT_BATCH(_3DSTATE_MAP_STATE | 9);
285fa225cbcSrjs      OUT_BATCH(0x00000007);
286fa225cbcSrjs
287fa225cbcSrjs      if (pPriv->buf)
288fa225cbcSrjs          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset);
289fa225cbcSrjs      else
290fa225cbcSrjs          OUT_BATCH(pPriv->YBuf0offset);
291fa225cbcSrjs
292fa225cbcSrjs      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
293fa225cbcSrjs      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
294fa225cbcSrjs      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
295fa225cbcSrjs      OUT_BATCH(ms3);
296fa225cbcSrjs      /* check to see if Y has special pitch than normal double u/v pitch,
297fa225cbcSrjs       * e.g i915 XvMC hw requires at least 1K alignment, so Y pitch might
298fa225cbcSrjs       * be same as U/V's.*/
299fa225cbcSrjs      if (video_pitch2)
300fa225cbcSrjs	  OUT_BATCH(((video_pitch2 / 4) - 1) << MS4_PITCH_SHIFT);
301fa225cbcSrjs      else
302fa225cbcSrjs	  OUT_BATCH(((video_pitch * 2 / 4) - 1) << MS4_PITCH_SHIFT);
303fa225cbcSrjs
304fa225cbcSrjs      if (pPriv->buf)
305fa225cbcSrjs          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->UBuf0offset);
306fa225cbcSrjs      else
307fa225cbcSrjs          OUT_BATCH(pPriv->UBuf0offset);
308fa225cbcSrjs
309fa225cbcSrjs      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
310fa225cbcSrjs      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
311fa225cbcSrjs      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
312fa225cbcSrjs      OUT_BATCH(ms3);
313fa225cbcSrjs      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
314fa225cbcSrjs
315fa225cbcSrjs      if (pPriv->buf)
316fa225cbcSrjs          OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->VBuf0offset);
317fa225cbcSrjs      else
318fa225cbcSrjs          OUT_BATCH(pPriv->VBuf0offset);
319fa225cbcSrjs
320fa225cbcSrjs      ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS;
321fa225cbcSrjs      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
322fa225cbcSrjs      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
323fa225cbcSrjs      OUT_BATCH(ms3);
324fa225cbcSrjs      OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
325fa225cbcSrjs      ADVANCE_BATCH();
326fa225cbcSrjs
327fa225cbcSrjs      FS_BEGIN();
328fa225cbcSrjs      /* Declare samplers */
329fa225cbcSrjs      i915_fs_dcl(FS_S0); /* Y */
330fa225cbcSrjs      i915_fs_dcl(FS_S1); /* U */
331fa225cbcSrjs      i915_fs_dcl(FS_S2); /* V */
332fa225cbcSrjs      i915_fs_dcl(FS_T0); /* normalized coords */
333fa225cbcSrjs
334fa225cbcSrjs      /* Load samplers to temporaries. */
335fa225cbcSrjs      i915_fs_texld(FS_R1, FS_S0, FS_T0);
336fa225cbcSrjs      i915_fs_texld(FS_R2, FS_S1, FS_T0);
337fa225cbcSrjs      i915_fs_texld(FS_R3, FS_S2, FS_T0);
338fa225cbcSrjs
339fa225cbcSrjs      /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
340fa225cbcSrjs      i915_fs_mov_masked(FS_R0, MASK_X, i915_fs_operand_reg(FS_R1));
341fa225cbcSrjs      i915_fs_mov_masked(FS_R0, MASK_Y, i915_fs_operand_reg(FS_R2));
342fa225cbcSrjs      i915_fs_mov_masked(FS_R0, MASK_Z, i915_fs_operand_reg(FS_R3));
343fa225cbcSrjs
344fa225cbcSrjs      /* Normalize the YUV data */
345fa225cbcSrjs      i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
346fa225cbcSrjs                 i915_fs_operand_reg(FS_C0));
347fa225cbcSrjs      /* dot-product the YUV data in R0 by the vectors of coefficients for
348fa225cbcSrjs       * calculating R, G, and B, storing the results in the R, G, or B
349fa225cbcSrjs       * channels of the output color.  The OC results are implicitly clamped
350fa225cbcSrjs       * at the end of the program.
351fa225cbcSrjs       */
352fa225cbcSrjs      i915_fs_dp3_masked(FS_OC, MASK_X,
353fa225cbcSrjs                        i915_fs_operand_reg(FS_R0),
354fa225cbcSrjs                        i915_fs_operand_reg(FS_C1));
355fa225cbcSrjs      i915_fs_dp3_masked(FS_OC, MASK_Y,
356fa225cbcSrjs                        i915_fs_operand_reg(FS_R0),
357fa225cbcSrjs                        i915_fs_operand_reg(FS_C2));
358fa225cbcSrjs      i915_fs_dp3_masked(FS_OC, MASK_Z,
359fa225cbcSrjs                        i915_fs_operand_reg(FS_R0),
360fa225cbcSrjs                        i915_fs_operand_reg(FS_C3));
361fa225cbcSrjs      /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
362fa225cbcSrjs       * the source.
363fa225cbcSrjs       */
364fa225cbcSrjs      i915_fs_mov_masked(FS_OC, MASK_W, i915_fs_operand_one());
365fa225cbcSrjs
366fa225cbcSrjs      if (pPriv->brightness != 0) {
367fa225cbcSrjs	  i915_fs_add(FS_OC,
368fa225cbcSrjs		      i915_fs_operand_reg(FS_OC),
369fa225cbcSrjs		      i915_fs_operand(FS_C4, X, X, X, ZERO));
370fa225cbcSrjs      }
371fa225cbcSrjs      FS_END();
372fa225cbcSrjs   }
373fa225cbcSrjs
374fa225cbcSrjs   {
375fa225cbcSrjs      BEGIN_BATCH(2);
376fa225cbcSrjs      OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
377fa225cbcSrjs      OUT_BATCH(0x00000000);
378fa225cbcSrjs      ADVANCE_BATCH();
379fa225cbcSrjs   }
380fa225cbcSrjs
381fa225cbcSrjs   /* Set up the offset for translating from the given region (in screen
382fa225cbcSrjs    * coordinates) to the backing pixmap.
383fa225cbcSrjs    */
384fa225cbcSrjs#ifdef COMPOSITE
385fa225cbcSrjs   pix_xoff = -pPixmap->screen_x + pPixmap->drawable.x;
386fa225cbcSrjs   pix_yoff = -pPixmap->screen_y + pPixmap->drawable.y;
387fa225cbcSrjs#else
388fa225cbcSrjs   pix_xoff = 0;
389fa225cbcSrjs   pix_yoff = 0;
390fa225cbcSrjs#endif
391fa225cbcSrjs
392fa225cbcSrjs   dxo = dstRegion->extents.x1;
393fa225cbcSrjs   dyo = dstRegion->extents.y1;
394fa225cbcSrjs
395fa225cbcSrjs   while (nbox_this_time--)
396fa225cbcSrjs   {
397fa225cbcSrjs      int box_x1 = pbox->x1;
398fa225cbcSrjs      int box_y1 = pbox->y1;
399fa225cbcSrjs      int box_x2 = pbox->x2;
400fa225cbcSrjs      int box_y2 = pbox->y2;
401fa225cbcSrjs      float src_scale_x, src_scale_y;
402fa225cbcSrjs
403fa225cbcSrjs      pbox++;
404fa225cbcSrjs
405fa225cbcSrjs      src_scale_x = ((float)src_w / width) / drw_w;
406fa225cbcSrjs      src_scale_y  = ((float)src_h / height) / drw_h;
407fa225cbcSrjs
408fa225cbcSrjs      BEGIN_BATCH(8 + 12);
409fa225cbcSrjs      OUT_BATCH(MI_NOOP);
410fa225cbcSrjs      OUT_BATCH(MI_NOOP);
411fa225cbcSrjs      OUT_BATCH(MI_NOOP);
412fa225cbcSrjs      OUT_BATCH(MI_NOOP);
413fa225cbcSrjs      OUT_BATCH(MI_NOOP);
414fa225cbcSrjs      OUT_BATCH(MI_NOOP);
415fa225cbcSrjs      OUT_BATCH(MI_NOOP);
416fa225cbcSrjs
417fa225cbcSrjs      /* vertex data - rect list consists of bottom right, bottom left, and top
418fa225cbcSrjs       * left vertices.
419fa225cbcSrjs       */
420fa225cbcSrjs      OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 - 1));
421fa225cbcSrjs
422fa225cbcSrjs      /* bottom right */
423fa225cbcSrjs      OUT_BATCH_F(box_x2 + pix_xoff);
424fa225cbcSrjs      OUT_BATCH_F(box_y2 + pix_yoff);
425fa225cbcSrjs      OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
426fa225cbcSrjs      OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
427fa225cbcSrjs
428fa225cbcSrjs      /* bottom left */
429fa225cbcSrjs      OUT_BATCH_F(box_x1 + pix_xoff);
430fa225cbcSrjs      OUT_BATCH_F(box_y2 + pix_yoff);
431fa225cbcSrjs      OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
432fa225cbcSrjs      OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
433fa225cbcSrjs
434fa225cbcSrjs      /* top left */
435fa225cbcSrjs      OUT_BATCH_F(box_x1 + pix_xoff);
436fa225cbcSrjs      OUT_BATCH_F(box_y1 + pix_yoff);
437fa225cbcSrjs      OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
438fa225cbcSrjs      OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
439fa225cbcSrjs
440fa225cbcSrjs      ADVANCE_BATCH();
441fa225cbcSrjs   }
442fa225cbcSrjs
443fa225cbcSrjs   intel_batch_end_atomic(pScrn);
444fa225cbcSrjs   }
445fa225cbcSrjs}
446fa225cbcSrjs
447