1fa225cbcSrjs/* 2fa225cbcSrjs * Copyright © 2006 Intel Corporation 3fa225cbcSrjs * 4fa225cbcSrjs * Permission is hereby granted, free of charge, to any person obtaining a 5fa225cbcSrjs * copy of this software and associated documentation files (the "Software"), 6fa225cbcSrjs * to deal in the Software without restriction, including without limitation 7fa225cbcSrjs * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fa225cbcSrjs * and/or sell copies of the Software, and to permit persons to whom the 9fa225cbcSrjs * Software is furnished to do so, subject to the following conditions: 10fa225cbcSrjs * 11fa225cbcSrjs * The above copyright notice and this permission notice (including the next 12fa225cbcSrjs * paragraph) shall be included in all copies or substantial portions of the 13fa225cbcSrjs * Software. 14fa225cbcSrjs * 15fa225cbcSrjs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16fa225cbcSrjs * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17fa225cbcSrjs * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18fa225cbcSrjs * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19fa225cbcSrjs * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20fa225cbcSrjs * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21fa225cbcSrjs * SOFTWARE. 22fa225cbcSrjs * 23fa225cbcSrjs * Authors: 24fa225cbcSrjs * Eric Anholt <eric@anholt.net> 25fa225cbcSrjs * 26fa225cbcSrjs */ 27fa225cbcSrjs 28fa225cbcSrjs#ifdef HAVE_CONFIG_H 29fa225cbcSrjs#include "config.h" 30fa225cbcSrjs#endif 31fa225cbcSrjs 32fa225cbcSrjs#include "xf86.h" 33fa225cbcSrjs#include "xf86_OSproc.h" 34fa225cbcSrjs#include "xf86xv.h" 35fa225cbcSrjs#include "fourcc.h" 36fa225cbcSrjs 37fa225cbcSrjs#include "i830.h" 38fa225cbcSrjs#include "i830_video.h" 39fa225cbcSrjs#include "i915_reg.h" 40fa225cbcSrjs#include "i915_3d.h" 41fa225cbcSrjs 42fa225cbcSrjsvoid 43fa225cbcSrjsI915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, 44fa225cbcSrjs RegionPtr dstRegion, 45fa225cbcSrjs short width, short height, int video_pitch, int video_pitch2, 46fa225cbcSrjs int x1, int y1, int x2, int y2, 47fa225cbcSrjs short src_w, short src_h, short drw_w, short drw_h, 48fa225cbcSrjs PixmapPtr pPixmap) 49fa225cbcSrjs{ 50fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 51fa225cbcSrjs uint32_t format, ms3, s5; 52fa225cbcSrjs BoxPtr pbox = REGION_RECTS(dstRegion); 53fa225cbcSrjs int nbox_total = REGION_NUM_RECTS(dstRegion); 54fa225cbcSrjs int nbox_this_time; 55fa225cbcSrjs int dxo, dyo, pix_xoff, pix_yoff; 56fa225cbcSrjs Bool planar; 57fa225cbcSrjs 58fa225cbcSrjs#if 0 59fa225cbcSrjs ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height, 60fa225cbcSrjs video_pitch); 61fa225cbcSrjs#endif 62fa225cbcSrjs 63fa225cbcSrjs switch (id) { 64fa225cbcSrjs case FOURCC_UYVY: 65fa225cbcSrjs case FOURCC_YUY2: 66fa225cbcSrjs planar = FALSE; 67fa225cbcSrjs break; 68fa225cbcSrjs case FOURCC_YV12: 69fa225cbcSrjs case FOURCC_I420: 70fa225cbcSrjs planar = TRUE; 71fa225cbcSrjs break; 72fa225cbcSrjs default: 73fa225cbcSrjs ErrorF("Unknown format 0x%x\n", id); 74fa225cbcSrjs return; 75fa225cbcSrjs } 76fa225cbcSrjs 77fa225cbcSrjs#define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4) 78fa225cbcSrjs#define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20) 79fa225cbcSrjs#define BATCH_BYTES(p) ((p)->batch_bo->size - 16) 80fa225cbcSrjs 81fa225cbcSrjs while (nbox_total) { 82fa225cbcSrjs nbox_this_time = nbox_total; 83fa225cbcSrjs if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(pI830)) 84fa225cbcSrjs nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(pI830)); 85fa225cbcSrjs nbox_total -= nbox_this_time; 86fa225cbcSrjs 87fa225cbcSrjs intel_batch_start_atomic(pScrn, 200 + 20 * nbox_this_time); 88fa225cbcSrjs 89fa225cbcSrjs IntelEmitInvarientState(pScrn); 90fa225cbcSrjs pI830->last_3d = LAST_3D_VIDEO; 91fa225cbcSrjs 92fa225cbcSrjs BEGIN_BATCH(20); 93fa225cbcSrjs 94fa225cbcSrjs /* flush map & render cache */ 95fa225cbcSrjs OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE); 96fa225cbcSrjs OUT_BATCH(0x00000000); 97fa225cbcSrjs 98fa225cbcSrjs /* draw rect -- just clipping */ 99fa225cbcSrjs OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); 100fa225cbcSrjs OUT_BATCH(DRAW_DITHER_OFS_X(pPixmap->drawable.x & 3) | 101fa225cbcSrjs DRAW_DITHER_OFS_Y(pPixmap->drawable.y & 3)); 102fa225cbcSrjs OUT_BATCH(0x00000000); /* ymin, xmin */ 103fa225cbcSrjs OUT_BATCH((pPixmap->drawable.width - 1) | 104fa225cbcSrjs (pPixmap->drawable.height - 1) << 16); /* ymax, xmax */ 105fa225cbcSrjs OUT_BATCH(0x00000000); /* yorigin, xorigin */ 106fa225cbcSrjs OUT_BATCH(MI_NOOP); 107fa225cbcSrjs 108fa225cbcSrjs OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 109fa225cbcSrjs I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3); 110fa225cbcSrjs OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | 111fa225cbcSrjs S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | 112fa225cbcSrjs S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | 113fa225cbcSrjs S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | 114fa225cbcSrjs S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | 115fa225cbcSrjs S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | 116fa225cbcSrjs S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | 117fa225cbcSrjs S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); 118fa225cbcSrjs OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE | 119fa225cbcSrjs S4_CULLMODE_NONE | S4_VFMT_XY); 120fa225cbcSrjs s5 = 0x0; 121fa225cbcSrjs if (pI830->cpp == 2) 122fa225cbcSrjs s5 |= S5_COLOR_DITHER_ENABLE; 123fa225cbcSrjs OUT_BATCH(s5); /* S5 - enable bits */ 124fa225cbcSrjs OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) | 125fa225cbcSrjs (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | 126fa225cbcSrjs (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE | 127fa225cbcSrjs (2 << S6_TRISTRIP_PV_SHIFT)); 128fa225cbcSrjs 129fa225cbcSrjs OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); 130fa225cbcSrjs OUT_BATCH(0x00000000); 131fa225cbcSrjs 132fa225cbcSrjs OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); 133fa225cbcSrjs if (pI830->cpp == 2) 134fa225cbcSrjs format = COLR_BUF_RGB565; 135fa225cbcSrjs else 136fa225cbcSrjs format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; 137fa225cbcSrjs 138fa225cbcSrjs OUT_BATCH(LOD_PRECLAMP_OGL | 139fa225cbcSrjs DSTORG_HORT_BIAS(0x8) | 140fa225cbcSrjs DSTORG_VERT_BIAS(0x8) | 141fa225cbcSrjs format); 142fa225cbcSrjs 143fa225cbcSrjs /* front buffer, pitch, offset */ 144fa225cbcSrjs OUT_BATCH(_3DSTATE_BUF_INFO_CMD); 145fa225cbcSrjs OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_USE_FENCE | 146fa225cbcSrjs BUF_3D_PITCH(intel_get_pixmap_pitch(pPixmap))); 147fa225cbcSrjs OUT_RELOC_PIXMAP(pPixmap, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); 148fa225cbcSrjs ADVANCE_BATCH(); 149fa225cbcSrjs 150fa225cbcSrjs if (!planar) { 151fa225cbcSrjs FS_LOCALS(10); 152fa225cbcSrjs 153fa225cbcSrjs BEGIN_BATCH(16); 154fa225cbcSrjs OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); 155fa225cbcSrjs OUT_BATCH(0x0000001); /* constant 0 */ 156fa225cbcSrjs /* constant 0: brightness/contrast */ 157fa225cbcSrjs OUT_BATCH_F(pPriv->brightness / 128.0); 158fa225cbcSrjs OUT_BATCH_F(pPriv->contrast / 255.0); 159fa225cbcSrjs OUT_BATCH_F(0.0); 160fa225cbcSrjs OUT_BATCH_F(0.0); 161fa225cbcSrjs 162fa225cbcSrjs OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); 163fa225cbcSrjs OUT_BATCH(0x00000001); 164fa225cbcSrjs OUT_BATCH(SS2_COLORSPACE_CONVERSION | 165fa225cbcSrjs (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 166fa225cbcSrjs (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 167fa225cbcSrjs OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 168fa225cbcSrjs (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 169fa225cbcSrjs (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 170fa225cbcSrjs SS3_NORMALIZED_COORDS); 171fa225cbcSrjs OUT_BATCH(0x00000000); 172fa225cbcSrjs 173fa225cbcSrjs OUT_BATCH(_3DSTATE_MAP_STATE | 3); 174fa225cbcSrjs OUT_BATCH(0x00000001); /* texture map #1 */ 175fa225cbcSrjs if (pPriv->buf) 176fa225cbcSrjs OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset); 177fa225cbcSrjs else 178fa225cbcSrjs OUT_BATCH(pPriv->YBuf0offset); 179fa225cbcSrjs 180fa225cbcSrjs ms3 = MAPSURF_422 | MS3_USE_FENCE_REGS; 181fa225cbcSrjs switch (id) { 182fa225cbcSrjs case FOURCC_YUY2: 183fa225cbcSrjs ms3 |= MT_422_YCRCB_NORMAL; 184fa225cbcSrjs break; 185fa225cbcSrjs case FOURCC_UYVY: 186fa225cbcSrjs ms3 |= MT_422_YCRCB_SWAPY; 187fa225cbcSrjs break; 188fa225cbcSrjs } 189fa225cbcSrjs ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; 190fa225cbcSrjs ms3 |= (width - 1) << MS3_WIDTH_SHIFT; 191fa225cbcSrjs OUT_BATCH(ms3); 192fa225cbcSrjs OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); 193fa225cbcSrjs 194fa225cbcSrjs ADVANCE_BATCH(); 195fa225cbcSrjs 196fa225cbcSrjs FS_BEGIN(); 197fa225cbcSrjs i915_fs_dcl(FS_S0); 198fa225cbcSrjs i915_fs_dcl(FS_T0); 199fa225cbcSrjs i915_fs_texld(FS_OC, FS_S0, FS_T0); 200fa225cbcSrjs if (pPriv->brightness != 0) { 201fa225cbcSrjs i915_fs_add(FS_OC, 202fa225cbcSrjs i915_fs_operand_reg(FS_OC), 203fa225cbcSrjs i915_fs_operand(FS_C0, X, X, X, ZERO)); 204fa225cbcSrjs } 205fa225cbcSrjs FS_END(); 206fa225cbcSrjs } else { 207fa225cbcSrjs FS_LOCALS(16); 208fa225cbcSrjs 209fa225cbcSrjs BEGIN_BATCH(22 + 11 + 11); 210fa225cbcSrjs /* For the planar formats, we set up three samplers -- one for each plane, 211fa225cbcSrjs * in a Y8 format. Because I couldn't get the special PLANAR_TO_PACKED 212fa225cbcSrjs * shader setup to work, I did the manual pixel shader: 213fa225cbcSrjs * 214fa225cbcSrjs * y' = y - .0625 215fa225cbcSrjs * u' = u - .5 216fa225cbcSrjs * v' = v - .5; 217fa225cbcSrjs * 218fa225cbcSrjs * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 219fa225cbcSrjs * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 220fa225cbcSrjs * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 221fa225cbcSrjs * 222fa225cbcSrjs * register assignment: 223fa225cbcSrjs * r0 = (y',u',v',0) 224fa225cbcSrjs * r1 = (y,y,y,y) 225fa225cbcSrjs * r2 = (u,u,u,u) 226fa225cbcSrjs * r3 = (v,v,v,v) 227fa225cbcSrjs * OC = (r,g,b,1) 228fa225cbcSrjs */ 229fa225cbcSrjs OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); 230fa225cbcSrjs OUT_BATCH(0x000001f); /* constants 0-4 */ 231fa225cbcSrjs /* constant 0: normalization offsets */ 232fa225cbcSrjs OUT_BATCH_F(-0.0625); 233fa225cbcSrjs OUT_BATCH_F(-0.5); 234fa225cbcSrjs OUT_BATCH_F(-0.5); 235fa225cbcSrjs OUT_BATCH_F(0.0); 236fa225cbcSrjs /* constant 1: r coefficients*/ 237fa225cbcSrjs OUT_BATCH_F(1.1643); 238fa225cbcSrjs OUT_BATCH_F(0.0); 239fa225cbcSrjs OUT_BATCH_F(1.5958); 240fa225cbcSrjs OUT_BATCH_F(0.0); 241fa225cbcSrjs /* constant 2: g coefficients */ 242fa225cbcSrjs OUT_BATCH_F(1.1643); 243fa225cbcSrjs OUT_BATCH_F(-0.39173); 244fa225cbcSrjs OUT_BATCH_F(-0.81290); 245fa225cbcSrjs OUT_BATCH_F(0.0); 246fa225cbcSrjs /* constant 3: b coefficients */ 247fa225cbcSrjs OUT_BATCH_F(1.1643); 248fa225cbcSrjs OUT_BATCH_F(2.017); 249fa225cbcSrjs OUT_BATCH_F(0.0); 250fa225cbcSrjs OUT_BATCH_F(0.0); 251fa225cbcSrjs /* constant 4: brightness/contrast */ 252fa225cbcSrjs OUT_BATCH_F(pPriv->brightness / 128.0); 253fa225cbcSrjs OUT_BATCH_F(pPriv->contrast / 255.0); 254fa225cbcSrjs OUT_BATCH_F(0.0); 255fa225cbcSrjs OUT_BATCH_F(0.0); 256fa225cbcSrjs 257fa225cbcSrjs OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); 258fa225cbcSrjs OUT_BATCH(0x00000007); 259fa225cbcSrjs /* sampler 0 */ 260fa225cbcSrjs OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 261fa225cbcSrjs (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 262fa225cbcSrjs OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 263fa225cbcSrjs (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 264fa225cbcSrjs (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 265fa225cbcSrjs SS3_NORMALIZED_COORDS); 266fa225cbcSrjs OUT_BATCH(0x00000000); 267fa225cbcSrjs /* sampler 1 */ 268fa225cbcSrjs OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 269fa225cbcSrjs (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 270fa225cbcSrjs OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 271fa225cbcSrjs (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 272fa225cbcSrjs (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | 273fa225cbcSrjs SS3_NORMALIZED_COORDS); 274fa225cbcSrjs OUT_BATCH(0x00000000); 275fa225cbcSrjs /* sampler 2 */ 276fa225cbcSrjs OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 277fa225cbcSrjs (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 278fa225cbcSrjs OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 279fa225cbcSrjs (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 280fa225cbcSrjs (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | 281fa225cbcSrjs SS3_NORMALIZED_COORDS); 282fa225cbcSrjs OUT_BATCH(0x00000000); 283fa225cbcSrjs 284fa225cbcSrjs OUT_BATCH(_3DSTATE_MAP_STATE | 9); 285fa225cbcSrjs OUT_BATCH(0x00000007); 286fa225cbcSrjs 287fa225cbcSrjs if (pPriv->buf) 288fa225cbcSrjs OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->YBuf0offset); 289fa225cbcSrjs else 290fa225cbcSrjs OUT_BATCH(pPriv->YBuf0offset); 291fa225cbcSrjs 292fa225cbcSrjs ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; 293fa225cbcSrjs ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; 294fa225cbcSrjs ms3 |= (width - 1) << MS3_WIDTH_SHIFT; 295fa225cbcSrjs OUT_BATCH(ms3); 296fa225cbcSrjs /* check to see if Y has special pitch than normal double u/v pitch, 297fa225cbcSrjs * e.g i915 XvMC hw requires at least 1K alignment, so Y pitch might 298fa225cbcSrjs * be same as U/V's.*/ 299fa225cbcSrjs if (video_pitch2) 300fa225cbcSrjs OUT_BATCH(((video_pitch2 / 4) - 1) << MS4_PITCH_SHIFT); 301fa225cbcSrjs else 302fa225cbcSrjs OUT_BATCH(((video_pitch * 2 / 4) - 1) << MS4_PITCH_SHIFT); 303fa225cbcSrjs 304fa225cbcSrjs if (pPriv->buf) 305fa225cbcSrjs OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->UBuf0offset); 306fa225cbcSrjs else 307fa225cbcSrjs OUT_BATCH(pPriv->UBuf0offset); 308fa225cbcSrjs 309fa225cbcSrjs ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; 310fa225cbcSrjs ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; 311fa225cbcSrjs ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; 312fa225cbcSrjs OUT_BATCH(ms3); 313fa225cbcSrjs OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); 314fa225cbcSrjs 315fa225cbcSrjs if (pPriv->buf) 316fa225cbcSrjs OUT_RELOC(pPriv->buf, I915_GEM_DOMAIN_SAMPLER, 0, pPriv->VBuf0offset); 317fa225cbcSrjs else 318fa225cbcSrjs OUT_BATCH(pPriv->VBuf0offset); 319fa225cbcSrjs 320fa225cbcSrjs ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; 321fa225cbcSrjs ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; 322fa225cbcSrjs ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; 323fa225cbcSrjs OUT_BATCH(ms3); 324fa225cbcSrjs OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); 325fa225cbcSrjs ADVANCE_BATCH(); 326fa225cbcSrjs 327fa225cbcSrjs FS_BEGIN(); 328fa225cbcSrjs /* Declare samplers */ 329fa225cbcSrjs i915_fs_dcl(FS_S0); /* Y */ 330fa225cbcSrjs i915_fs_dcl(FS_S1); /* U */ 331fa225cbcSrjs i915_fs_dcl(FS_S2); /* V */ 332fa225cbcSrjs i915_fs_dcl(FS_T0); /* normalized coords */ 333fa225cbcSrjs 334fa225cbcSrjs /* Load samplers to temporaries. */ 335fa225cbcSrjs i915_fs_texld(FS_R1, FS_S0, FS_T0); 336fa225cbcSrjs i915_fs_texld(FS_R2, FS_S1, FS_T0); 337fa225cbcSrjs i915_fs_texld(FS_R3, FS_S2, FS_T0); 338fa225cbcSrjs 339fa225cbcSrjs /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */ 340fa225cbcSrjs i915_fs_mov_masked(FS_R0, MASK_X, i915_fs_operand_reg(FS_R1)); 341fa225cbcSrjs i915_fs_mov_masked(FS_R0, MASK_Y, i915_fs_operand_reg(FS_R2)); 342fa225cbcSrjs i915_fs_mov_masked(FS_R0, MASK_Z, i915_fs_operand_reg(FS_R3)); 343fa225cbcSrjs 344fa225cbcSrjs /* Normalize the YUV data */ 345fa225cbcSrjs i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0), 346fa225cbcSrjs i915_fs_operand_reg(FS_C0)); 347fa225cbcSrjs /* dot-product the YUV data in R0 by the vectors of coefficients for 348fa225cbcSrjs * calculating R, G, and B, storing the results in the R, G, or B 349fa225cbcSrjs * channels of the output color. The OC results are implicitly clamped 350fa225cbcSrjs * at the end of the program. 351fa225cbcSrjs */ 352fa225cbcSrjs i915_fs_dp3_masked(FS_OC, MASK_X, 353fa225cbcSrjs i915_fs_operand_reg(FS_R0), 354fa225cbcSrjs i915_fs_operand_reg(FS_C1)); 355fa225cbcSrjs i915_fs_dp3_masked(FS_OC, MASK_Y, 356fa225cbcSrjs i915_fs_operand_reg(FS_R0), 357fa225cbcSrjs i915_fs_operand_reg(FS_C2)); 358fa225cbcSrjs i915_fs_dp3_masked(FS_OC, MASK_Z, 359fa225cbcSrjs i915_fs_operand_reg(FS_R0), 360fa225cbcSrjs i915_fs_operand_reg(FS_C3)); 361fa225cbcSrjs /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using 362fa225cbcSrjs * the source. 363fa225cbcSrjs */ 364fa225cbcSrjs i915_fs_mov_masked(FS_OC, MASK_W, i915_fs_operand_one()); 365fa225cbcSrjs 366fa225cbcSrjs if (pPriv->brightness != 0) { 367fa225cbcSrjs i915_fs_add(FS_OC, 368fa225cbcSrjs i915_fs_operand_reg(FS_OC), 369fa225cbcSrjs i915_fs_operand(FS_C4, X, X, X, ZERO)); 370fa225cbcSrjs } 371fa225cbcSrjs FS_END(); 372fa225cbcSrjs } 373fa225cbcSrjs 374fa225cbcSrjs { 375fa225cbcSrjs BEGIN_BATCH(2); 376fa225cbcSrjs OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE); 377fa225cbcSrjs OUT_BATCH(0x00000000); 378fa225cbcSrjs ADVANCE_BATCH(); 379fa225cbcSrjs } 380fa225cbcSrjs 381fa225cbcSrjs /* Set up the offset for translating from the given region (in screen 382fa225cbcSrjs * coordinates) to the backing pixmap. 383fa225cbcSrjs */ 384fa225cbcSrjs#ifdef COMPOSITE 385fa225cbcSrjs pix_xoff = -pPixmap->screen_x + pPixmap->drawable.x; 386fa225cbcSrjs pix_yoff = -pPixmap->screen_y + pPixmap->drawable.y; 387fa225cbcSrjs#else 388fa225cbcSrjs pix_xoff = 0; 389fa225cbcSrjs pix_yoff = 0; 390fa225cbcSrjs#endif 391fa225cbcSrjs 392fa225cbcSrjs dxo = dstRegion->extents.x1; 393fa225cbcSrjs dyo = dstRegion->extents.y1; 394fa225cbcSrjs 395fa225cbcSrjs while (nbox_this_time--) 396fa225cbcSrjs { 397fa225cbcSrjs int box_x1 = pbox->x1; 398fa225cbcSrjs int box_y1 = pbox->y1; 399fa225cbcSrjs int box_x2 = pbox->x2; 400fa225cbcSrjs int box_y2 = pbox->y2; 401fa225cbcSrjs float src_scale_x, src_scale_y; 402fa225cbcSrjs 403fa225cbcSrjs pbox++; 404fa225cbcSrjs 405fa225cbcSrjs src_scale_x = ((float)src_w / width) / drw_w; 406fa225cbcSrjs src_scale_y = ((float)src_h / height) / drw_h; 407fa225cbcSrjs 408fa225cbcSrjs BEGIN_BATCH(8 + 12); 409fa225cbcSrjs OUT_BATCH(MI_NOOP); 410fa225cbcSrjs OUT_BATCH(MI_NOOP); 411fa225cbcSrjs OUT_BATCH(MI_NOOP); 412fa225cbcSrjs OUT_BATCH(MI_NOOP); 413fa225cbcSrjs OUT_BATCH(MI_NOOP); 414fa225cbcSrjs OUT_BATCH(MI_NOOP); 415fa225cbcSrjs OUT_BATCH(MI_NOOP); 416fa225cbcSrjs 417fa225cbcSrjs /* vertex data - rect list consists of bottom right, bottom left, and top 418fa225cbcSrjs * left vertices. 419fa225cbcSrjs */ 420fa225cbcSrjs OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 - 1)); 421fa225cbcSrjs 422fa225cbcSrjs /* bottom right */ 423fa225cbcSrjs OUT_BATCH_F(box_x2 + pix_xoff); 424fa225cbcSrjs OUT_BATCH_F(box_y2 + pix_yoff); 425fa225cbcSrjs OUT_BATCH_F((box_x2 - dxo) * src_scale_x); 426fa225cbcSrjs OUT_BATCH_F((box_y2 - dyo) * src_scale_y); 427fa225cbcSrjs 428fa225cbcSrjs /* bottom left */ 429fa225cbcSrjs OUT_BATCH_F(box_x1 + pix_xoff); 430fa225cbcSrjs OUT_BATCH_F(box_y2 + pix_yoff); 431fa225cbcSrjs OUT_BATCH_F((box_x1 - dxo) * src_scale_x); 432fa225cbcSrjs OUT_BATCH_F((box_y2 - dyo) * src_scale_y); 433fa225cbcSrjs 434fa225cbcSrjs /* top left */ 435fa225cbcSrjs OUT_BATCH_F(box_x1 + pix_xoff); 436fa225cbcSrjs OUT_BATCH_F(box_y1 + pix_yoff); 437fa225cbcSrjs OUT_BATCH_F((box_x1 - dxo) * src_scale_x); 438fa225cbcSrjs OUT_BATCH_F((box_y1 - dyo) * src_scale_y); 439fa225cbcSrjs 440fa225cbcSrjs ADVANCE_BATCH(); 441fa225cbcSrjs } 442fa225cbcSrjs 443fa225cbcSrjs intel_batch_end_atomic(pScrn); 444fa225cbcSrjs } 445fa225cbcSrjs} 446fa225cbcSrjs 447