i965_video.c revision 13496ba1
1/* 2 * Copyright � 2006 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Keith Packard <keithp@keithp.com> 26 * 27 */ 28 29#ifdef HAVE_CONFIG_H 30#include "config.h" 31#endif 32 33#include "xorg-server.h" 34#include "xf86.h" 35#include "xf86_OSproc.h" 36#include "xf86xv.h" 37#include "fourcc.h" 38 39#include "intel.h" 40#include "intel_xvmc.h" 41#include "intel_uxa.h" 42#include "i830_reg.h" 43#include "i965_reg.h" 44#include "brw_defines.h" 45#include "brw_structs.h" 46#include <string.h> 47 48 49/* Make assert() work. */ 50#undef NDEBUG 51#include <assert.h> 52 53static const uint32_t sip_kernel_static[][4] = { 54/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ 55 {0x00000030, 0x20000108, 0x00001220, 0x00000000}, 56/* nop (4) g0<1>UD { align1 + } */ 57 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 58/* nop (4) g0<1>UD { align1 + } */ 59 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 60/* nop (4) g0<1>UD { align1 + } */ 61 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 62/* nop (4) g0<1>UD { align1 + } */ 63 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 64/* nop (4) g0<1>UD { align1 + } */ 65 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 66/* nop (4) g0<1>UD { align1 + } */ 67 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 68/* nop (4) g0<1>UD { align1 + } */ 69 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 70/* nop (4) g0<1>UD { align1 + } */ 71 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 72/* nop (4) g0<1>UD { align1 + } */ 73 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 74}; 75 76/* 77 * this program computes dA/dx and dA/dy for the texture coordinates along 78 * with the base texture coordinate. It was extracted from the Mesa driver. 79 * It uses about 10 GRF registers. 80 */ 81 82#define SF_KERNEL_NUM_GRF 16 83#define SF_MAX_THREADS 1 84 85static const uint32_t sf_kernel_static[][4] = { 86#include "exa_sf.g4b" 87}; 88 89/* 90 * Ok, this kernel picks up the required data flow values in g0 and g1 91 * and passes those along in m0 and m1. In m2-m9, it sticks constant 92 * values (bright pink). 93 */ 94 95/* Our PS kernel uses less than 32 GRF registers (about 20) */ 96#define PS_KERNEL_NUM_GRF 32 97#define PS_MAX_THREADS 32 98 99#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 100 101static const uint32_t ps_kernel_packed_static[][4] = { 102#include "exa_wm_xy.g4b" 103#include "exa_wm_src_affine.g4b" 104#include "exa_wm_src_sample_argb.g4b" 105#include "exa_wm_yuv_rgb.g4b" 106#include "exa_wm_write.g4b" 107}; 108 109static const uint32_t ps_kernel_planar_static[][4] = { 110#include "exa_wm_xy.g4b" 111#include "exa_wm_src_affine.g4b" 112#include "exa_wm_src_sample_planar.g4b" 113#include "exa_wm_yuv_rgb.g4b" 114#include "exa_wm_write.g4b" 115}; 116 117/* new program for Ironlake */ 118static const uint32_t sf_kernel_static_gen5[][4] = { 119#include "exa_sf.g4b.gen5" 120}; 121 122static const uint32_t ps_kernel_packed_static_gen5[][4] = { 123#include "exa_wm_xy.g4b.gen5" 124#include "exa_wm_src_affine.g4b.gen5" 125#include "exa_wm_src_sample_argb.g4b.gen5" 126#include "exa_wm_yuv_rgb.g4b.gen5" 127#include "exa_wm_write.g4b.gen5" 128}; 129 130static const uint32_t ps_kernel_planar_static_gen5[][4] = { 131#include "exa_wm_xy.g4b.gen5" 132#include "exa_wm_src_affine.g4b.gen5" 133#include "exa_wm_src_sample_planar.g4b.gen5" 134#include "exa_wm_yuv_rgb.g4b.gen5" 135#include "exa_wm_write.g4b.gen5" 136}; 137 138/* programs for Sandybridge */ 139static const uint32_t ps_kernel_packed_static_gen6[][4] = { 140#include "exa_wm_src_affine.g6b" 141#include "exa_wm_src_sample_argb.g6b" 142#include "exa_wm_yuv_rgb.g6b" 143#include "exa_wm_write.g6b" 144}; 145 146static const uint32_t ps_kernel_planar_static_gen6[][4] = { 147#include "exa_wm_src_affine.g6b" 148#include "exa_wm_src_sample_planar.g6b" 149#include "exa_wm_yuv_rgb.g6b" 150#include "exa_wm_write.g6b" 151}; 152 153/* programs for Ivybridge */ 154static const uint32_t ps_kernel_packed_static_gen7[][4] = { 155#include "exa_wm_src_affine.g7b" 156#include "exa_wm_src_sample_argb.g7b" 157#include "exa_wm_yuv_rgb.g7b" 158#include "exa_wm_write.g7b" 159}; 160 161static const uint32_t ps_kernel_planar_static_gen7[][4] = { 162#include "exa_wm_src_affine.g7b" 163#include "exa_wm_src_sample_planar.g7b" 164#include "exa_wm_yuv_rgb.g7b" 165#include "exa_wm_write.g7b" 166}; 167 168#ifndef MAX2 169#define MAX2(a,b) ((a) > (b) ? (a) : (b)) 170#endif 171 172#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct brw_surface_state), 32) 173#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) 174#define SURFACE_STATE_PADDED_SIZE MAX2(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) 175#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) 176 177static uint32_t float_to_uint(float f) 178{ 179 union { 180 uint32_t i; 181 float f; 182 } x; 183 x.f = f; 184 return x.i; 185} 186 187#if 0 188static struct { 189 uint32_t svg_ctl; 190 char *name; 191} svg_ctl_bits[] = { 192 { 193 BRW_SVG_CTL_GS_BA, "General State Base Address"}, { 194 BRW_SVG_CTL_SS_BA, "Surface State Base Address"}, { 195 BRW_SVG_CTL_IO_BA, "Indirect Object Base Address"}, { 196 BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound"}, { 197 BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound"}, { 198 BRW_SVG_CTL_SIP, "System Instruction Pointer"}, { 1990, 0},}; 200 201static void brw_debug(ScrnInfoPtr scrn, char *when) 202{ 203 intel_screen_private *intel = intel_get_screen_private(scrn); 204 int i; 205 uint32_t v; 206 207 ErrorF("brw_debug: %s\n", when); 208 for (i = 0; svg_ctl_bits[i].name; i++) { 209 OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl); 210 v = INREG(BRW_SVG_RDATA); 211 ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v); 212 } 213} 214#endif 215 216#define WATCH_SF 0 217#define WATCH_WIZ 0 218#define WATCH_STATS 0 219 220static void i965_pre_draw_debug(ScrnInfoPtr scrn) 221{ 222#if 0 223 intel_screen_private *intel = intel_get_screen_private(scrn); 224#endif 225 226#if 0 227 ErrorF("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 228 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 229 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 230 231 OUTREG(BRW_VF_CTL, 232 BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID | 233 BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX | 234 BRW_VF_CTL_SNAPSHOT_ENABLE); 235 OUTREG(BRW_VF_STRG_VAL, 0); 236#endif 237 238#if 0 239 OUTREG(BRW_VS_CTL, 240 BRW_VS_CTL_SNAPSHOT_ALL_THREADS | 241 BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT | 242 BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE); 243 244 OUTREG(BRW_VS_STRG_VAL, 0); 245#endif 246 247#if WATCH_SF 248 OUTREG(BRW_SF_CTL, 249 BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT | 250 BRW_SF_CTL_SNAPSHOT_ALL_THREADS | 251 BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE); 252 OUTREG(BRW_SF_STRG_VAL, 0); 253#endif 254 255#if WATCH_WIZ 256 OUTREG(BRW_WIZ_CTL, 257 BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE | 258 BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS | BRW_WIZ_CTL_SNAPSHOT_ENABLE); 259 OUTREG(BRW_WIZ_STRG_VAL, (box_x1) | (box_y1 << 16)); 260#endif 261 262#if 0 263 OUTREG(BRW_TS_CTL, 264 BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR | 265 BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS | 266 BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS | 267 BRW_TS_CTL_SNAPSHOT_ENABLE); 268#endif 269} 270 271static void i965_post_draw_debug(ScrnInfoPtr scrn) 272{ 273#if 0 274 intel_screen_private *intel = intel_get_screen_private(scrn); 275#endif 276 277#if 0 278 for (j = 0; j < 100000; j++) { 279 ctl = INREG(BRW_VF_CTL); 280 if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE) 281 break; 282 } 283 284 rdata = INREG(BRW_VF_RDATA); 285 OUTREG(BRW_VF_CTL, 0); 286 ErrorF("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata); 287#endif 288 289#if 0 290 for (j = 0; j < 1000000; j++) { 291 ctl = INREG(BRW_VS_CTL); 292 if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE) 293 break; 294 } 295 296 rdata = INREG(BRW_VS_RDATA); 297 for (k = 0; k <= 3; k++) { 298 OUTREG(BRW_VS_CTL, BRW_VS_CTL_SNAPSHOT_COMPLETE | (k << 8)); 299 rdata = INREG(BRW_VS_RDATA); 300 ErrorF("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata); 301 } 302 303 OUTREG(BRW_VS_CTL, 0); 304#endif 305 306#if WATCH_SF 307 for (j = 0; j < 1000000; j++) { 308 ctl = INREG(BRW_SF_CTL); 309 if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE) 310 break; 311 } 312 313 for (k = 0; k <= 7; k++) { 314 OUTREG(BRW_SF_CTL, BRW_SF_CTL_SNAPSHOT_COMPLETE | (k << 8)); 315 rdata = INREG(BRW_SF_RDATA); 316 ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata); 317 } 318 319 OUTREG(BRW_SF_CTL, 0); 320#endif 321 322#if WATCH_WIZ 323 for (j = 0; j < 100000; j++) { 324 ctl = INREG(BRW_WIZ_CTL); 325 if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE) 326 break; 327 } 328 329 rdata = INREG(BRW_WIZ_RDATA); 330 OUTREG(BRW_WIZ_CTL, 0); 331 ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata); 332#endif 333 334#if 0 335 for (j = 0; j < 100000; j++) { 336 ctl = INREG(BRW_TS_CTL); 337 if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE) 338 break; 339 } 340 341 rdata = INREG(BRW_TS_RDATA); 342 OUTREG(BRW_TS_CTL, 0); 343 ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata); 344 345 ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 346 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 347 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 348#endif 349 350#if 0 351 for (j = 0; j < 256; j++) { 352 OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT); 353 rdata = INREG(BRW_TD_RDATA); 354 ErrorF("TD_RDATA(%d): 0x%08x\n", j, rdata); 355 } 356#endif 357} 358 359/* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it. 360 * A VUE consists of a 256-bit vertex header followed by the vertex data, 361 * which in our case is 4 floats (128 bits), thus a single 512-bit URB 362 * entry. 363 */ 364#define URB_VS_ENTRIES 8 365#define URB_VS_ENTRY_SIZE 1 366 367#define URB_GS_ENTRIES 0 368#define URB_GS_ENTRY_SIZE 0 369 370#define URB_CLIP_ENTRIES 0 371#define URB_CLIP_ENTRY_SIZE 0 372 373/* The SF kernel we use outputs only 4 256-bit registers, leading to an 374 * entry size of 2 512-bit URBs. We don't need to have many entries to 375 * output as we're generally working on large rectangles and don't care 376 * about having WM threads running on different rectangles simultaneously. 377 */ 378#define URB_SF_ENTRIES 1 379#define URB_SF_ENTRY_SIZE 2 380 381#define URB_CS_ENTRIES 0 382#define URB_CS_ENTRY_SIZE 0 383 384static void i965_create_dst_surface_state(ScrnInfoPtr scrn, 385 PixmapPtr pixmap, 386 drm_intel_bo *surf_bo, 387 uint32_t offset) 388{ 389 intel_screen_private *intel = intel_get_screen_private(scrn); 390 struct brw_surface_state dest_surf_state; 391 drm_intel_bo *pixmap_bo = intel_uxa_get_pixmap_bo(pixmap); 392 assert(pixmap_bo != NULL); 393 394 memset(&dest_surf_state, 0, sizeof(dest_surf_state)); 395 396 dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; 397 dest_surf_state.ss0.data_return_format = 398 BRW_SURFACERETURNFORMAT_FLOAT32; 399 if (intel->cpp == 2) { 400 dest_surf_state.ss0.surface_format = 401 BRW_SURFACEFORMAT_B5G6R5_UNORM; 402 } else { 403 dest_surf_state.ss0.surface_format = 404 BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 405 } 406 dest_surf_state.ss0.writedisable_alpha = 0; 407 dest_surf_state.ss0.writedisable_red = 0; 408 dest_surf_state.ss0.writedisable_green = 0; 409 dest_surf_state.ss0.writedisable_blue = 0; 410 dest_surf_state.ss0.color_blend = 1; 411 dest_surf_state.ss0.vert_line_stride = 0; 412 dest_surf_state.ss0.vert_line_stride_ofs = 0; 413 dest_surf_state.ss0.mipmap_layout_mode = 0; 414 dest_surf_state.ss0.render_cache_read_mode = 0; 415 416 dest_surf_state.ss1.base_addr = 417 intel_uxa_emit_reloc(surf_bo, offset + offsetof(struct brw_surface_state, ss1), 418 pixmap_bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); 419 420 dest_surf_state.ss2.height = pixmap->drawable.height - 1; 421 dest_surf_state.ss2.width = pixmap->drawable.width - 1; 422 dest_surf_state.ss2.mip_count = 0; 423 dest_surf_state.ss2.render_target_rotation = 0; 424 dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 425 dest_surf_state.ss3.tiled_surface = intel_uxa_pixmap_tiled(pixmap); 426 dest_surf_state.ss3.tile_walk = 0; /* TileX */ 427 428 dri_bo_subdata(surf_bo, 429 offset, sizeof(dest_surf_state), 430 &dest_surf_state); 431} 432 433static void i965_create_src_surface_state(ScrnInfoPtr scrn, 434 drm_intel_bo * src_bo, 435 uint32_t src_offset, 436 int src_width, 437 int src_height, 438 int src_pitch, 439 uint32_t src_surf_format, 440 drm_intel_bo *surface_bo, 441 uint32_t offset) 442{ 443 struct brw_surface_state src_surf_state; 444 445 memset(&src_surf_state, 0, sizeof(src_surf_state)); 446 447 /* Set up the source surface state buffer */ 448 src_surf_state.ss0.surface_type = BRW_SURFACE_2D; 449 src_surf_state.ss0.surface_format = src_surf_format; 450 src_surf_state.ss0.writedisable_alpha = 0; 451 src_surf_state.ss0.writedisable_red = 0; 452 src_surf_state.ss0.writedisable_green = 0; 453 src_surf_state.ss0.writedisable_blue = 0; 454 src_surf_state.ss0.color_blend = 1; 455 src_surf_state.ss0.vert_line_stride = 0; 456 src_surf_state.ss0.vert_line_stride_ofs = 0; 457 src_surf_state.ss0.mipmap_layout_mode = 0; 458 src_surf_state.ss0.render_cache_read_mode = 0; 459 460 src_surf_state.ss2.width = src_width - 1; 461 src_surf_state.ss2.height = src_height - 1; 462 src_surf_state.ss2.mip_count = 0; 463 src_surf_state.ss2.render_target_rotation = 0; 464 src_surf_state.ss3.pitch = src_pitch - 1; 465 466 if (src_bo) { 467 src_surf_state.ss1.base_addr = 468 intel_uxa_emit_reloc(surface_bo, 469 offset + offsetof(struct brw_surface_state, ss1), 470 src_bo, src_offset, 471 I915_GEM_DOMAIN_SAMPLER, 0); 472 } else { 473 src_surf_state.ss1.base_addr = src_offset; 474 } 475 476 dri_bo_subdata(surface_bo, 477 offset, sizeof(src_surf_state), 478 &src_surf_state); 479} 480 481static void gen7_create_dst_surface_state(ScrnInfoPtr scrn, 482 PixmapPtr pixmap, 483 drm_intel_bo *surf_bo, 484 uint32_t offset) 485{ 486 intel_screen_private *intel = intel_get_screen_private(scrn); 487 struct gen7_surface_state dest_surf_state; 488 drm_intel_bo *pixmap_bo = intel_uxa_get_pixmap_bo(pixmap); 489 assert(pixmap_bo != NULL); 490 491 memset(&dest_surf_state, 0, sizeof(dest_surf_state)); 492 493 dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; 494 dest_surf_state.ss0.tiled_surface = intel_uxa_pixmap_tiled(pixmap); 495 dest_surf_state.ss0.tile_walk = 0; /* TileX */ 496 497 if (intel->cpp == 2) { 498 dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; 499 } else { 500 dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 501 } 502 503 dest_surf_state.ss1.base_addr = 504 intel_uxa_emit_reloc(surf_bo, 505 offset + offsetof(struct gen7_surface_state, ss1), 506 pixmap_bo, 0, 507 I915_GEM_DOMAIN_SAMPLER, 0); 508 509 dest_surf_state.ss2.height = pixmap->drawable.height - 1; 510 dest_surf_state.ss2.width = pixmap->drawable.width - 1; 511 512 dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 513 514 if (IS_HSW(intel)) { 515 dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; 516 dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; 517 dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; 518 dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 519 } 520 521 dri_bo_subdata(surf_bo, 522 offset, sizeof(dest_surf_state), 523 &dest_surf_state); 524} 525 526static void gen7_create_src_surface_state(ScrnInfoPtr scrn, 527 drm_intel_bo * src_bo, 528 uint32_t src_offset, 529 int src_width, 530 int src_height, 531 int src_pitch, 532 uint32_t src_surf_format, 533 drm_intel_bo *surface_bo, 534 uint32_t offset) 535{ 536 intel_screen_private * const intel = intel_get_screen_private(scrn); 537 struct gen7_surface_state src_surf_state; 538 539 memset(&src_surf_state, 0, sizeof(src_surf_state)); 540 541 src_surf_state.ss0.surface_type = BRW_SURFACE_2D; 542 src_surf_state.ss0.surface_format = src_surf_format; 543 544 if (src_bo) { 545 src_surf_state.ss1.base_addr = 546 intel_uxa_emit_reloc(surface_bo, 547 offset + offsetof(struct gen7_surface_state, ss1), 548 src_bo, src_offset, 549 I915_GEM_DOMAIN_SAMPLER, 0); 550 } else { 551 src_surf_state.ss1.base_addr = src_offset; 552 } 553 554 src_surf_state.ss2.width = src_width - 1; 555 src_surf_state.ss2.height = src_height - 1; 556 557 src_surf_state.ss3.pitch = src_pitch - 1; 558 559 if (IS_HSW(intel)) { 560 src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; 561 src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; 562 src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; 563 src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 564 } 565 566 dri_bo_subdata(surface_bo, 567 offset, sizeof(src_surf_state), 568 &src_surf_state); 569} 570 571static void i965_create_binding_table(ScrnInfoPtr scrn, 572 drm_intel_bo *bind_bo, 573 int n_surf) 574{ 575 uint32_t binding_table[n_surf]; 576 int i; 577 578 /* Set up a binding table for our surfaces. Only the PS will use it */ 579 for (i = 0; i < n_surf; i++) 580 binding_table[i] = i * SURFACE_STATE_PADDED_SIZE; 581 582 dri_bo_subdata(bind_bo, 583 n_surf * SURFACE_STATE_PADDED_SIZE, 584 sizeof(binding_table), binding_table); 585} 586 587static drm_intel_bo *i965_create_sampler_state(ScrnInfoPtr scrn) 588{ 589 intel_screen_private *intel = intel_get_screen_private(scrn); 590 struct brw_sampler_state sampler_state; 591 592 memset(&sampler_state, 0, sizeof(sampler_state)); 593 sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; 594 sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; 595 sampler_state.ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 596 sampler_state.ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 597 sampler_state.ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 598 599 return intel_uxa_bo_alloc_for_data(intel, 600 &sampler_state, sizeof(sampler_state), 601 "textured video sampler state"); 602} 603 604static drm_intel_bo *gen7_create_sampler_state(ScrnInfoPtr scrn) 605{ 606 intel_screen_private *intel = intel_get_screen_private(scrn); 607 struct gen7_sampler_state sampler_state; 608 609 memset(&sampler_state, 0, sizeof(sampler_state)); 610 sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; 611 sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; 612 sampler_state.ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 613 sampler_state.ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 614 sampler_state.ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 615 616 return intel_uxa_bo_alloc_for_data(intel, 617 &sampler_state, sizeof(sampler_state), 618 "textured video sampler state"); 619} 620 621static drm_intel_bo *i965_create_vs_state(ScrnInfoPtr scrn) 622{ 623 intel_screen_private *intel = intel_get_screen_private(scrn); 624 struct brw_vs_unit_state vs_state; 625 626 /* Set up the vertex shader to be disabled (passthrough) */ 627 memset(&vs_state, 0, sizeof(vs_state)); 628 if (IS_GEN5(intel)) 629 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; 630 else 631 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 632 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 633 vs_state.vs6.vs_enable = 0; 634 vs_state.vs6.vert_cache_disable = 1; 635 636 return intel_uxa_bo_alloc_for_data(intel, 637 &vs_state, sizeof(vs_state), 638 "textured video vs state"); 639} 640 641static drm_intel_bo *i965_create_program(ScrnInfoPtr scrn, 642 const uint32_t * program, 643 unsigned int program_size) 644{ 645 intel_screen_private *intel = intel_get_screen_private(scrn); 646 return intel_uxa_bo_alloc_for_data(intel, 647 program, program_size, 648 "textured video program"); 649} 650 651static drm_intel_bo *i965_create_sf_state(ScrnInfoPtr scrn) 652{ 653 intel_screen_private *intel = intel_get_screen_private(scrn); 654 drm_intel_bo *sf_bo, *kernel_bo; 655 struct brw_sf_unit_state sf_state; 656 657 if (IS_GEN5(intel)) 658 kernel_bo = i965_create_program(scrn, 659 &sf_kernel_static_gen5[0][0], 660 sizeof(sf_kernel_static_gen5)); 661 else 662 kernel_bo = i965_create_program(scrn, 663 &sf_kernel_static[0][0], 664 sizeof(sf_kernel_static)); 665 if (!kernel_bo) 666 return NULL; 667 668 sf_bo = drm_intel_bo_alloc(intel->bufmgr, 669 "textured video sf state", 4096, 670 sizeof(sf_state)); 671 if (sf_bo == NULL) { 672 drm_intel_bo_unreference(kernel_bo); 673 return NULL; 674 } 675 676 /* Set up the SF kernel to do coord interp: for each attribute, 677 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 678 * back to SF which then hands pixels off to WM. 679 */ 680 memset(&sf_state, 0, sizeof(sf_state)); 681 sf_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 682 sf_state.thread0.kernel_start_pointer = 683 intel_uxa_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0), 684 kernel_bo, sf_state.thread0.grf_reg_count << 1, 685 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 686 sf_state.sf1.single_program_flow = 1; /* XXX */ 687 sf_state.sf1.binding_table_entry_count = 0; 688 sf_state.sf1.thread_priority = 0; 689 sf_state.sf1.floating_point_mode = 0; /* Mesa does this */ 690 sf_state.sf1.illegal_op_exception_enable = 1; 691 sf_state.sf1.mask_stack_exception_enable = 1; 692 sf_state.sf1.sw_exception_enable = 1; 693 sf_state.thread2.per_thread_scratch_space = 0; 694 /* scratch space is not used in our kernel */ 695 sf_state.thread2.scratch_space_base_pointer = 0; 696 sf_state.thread3.const_urb_entry_read_length = 0; /* no const URBs */ 697 sf_state.thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 698 sf_state.thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 699 sf_state.thread3.urb_entry_read_offset = 0; 700 sf_state.thread3.dispatch_grf_start_reg = 3; 701 sf_state.thread4.max_threads = SF_MAX_THREADS - 1; 702 sf_state.thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 703 sf_state.thread4.nr_urb_entries = URB_SF_ENTRIES; 704 sf_state.thread4.stats_enable = 1; 705 sf_state.sf5.viewport_transform = FALSE; /* skip viewport */ 706 sf_state.sf6.cull_mode = BRW_CULLMODE_NONE; 707 sf_state.sf6.scissor = 0; 708 sf_state.sf7.trifan_pv = 2; 709 sf_state.sf6.dest_org_vbias = 0x8; 710 sf_state.sf6.dest_org_hbias = 0x8; 711 712 dri_bo_subdata(sf_bo, 0, sizeof(sf_state), &sf_state); 713 return sf_bo; 714} 715 716static drm_intel_bo *i965_create_wm_state(ScrnInfoPtr scrn, 717 drm_intel_bo * sampler_bo, 718 Bool is_packed) 719{ 720 intel_screen_private *intel = intel_get_screen_private(scrn); 721 drm_intel_bo *wm_bo, *kernel_bo; 722 struct brw_wm_unit_state wm_state; 723 724 if (is_packed) { 725 if (IS_GEN5(intel)) 726 kernel_bo = 727 i965_create_program(scrn, 728 &ps_kernel_packed_static_gen5[0] 729 [0], 730 sizeof 731 (ps_kernel_packed_static_gen5)); 732 else 733 kernel_bo = 734 i965_create_program(scrn, 735 &ps_kernel_packed_static[0][0], 736 sizeof 737 (ps_kernel_packed_static)); 738 } else { 739 if (IS_GEN5(intel)) 740 kernel_bo = 741 i965_create_program(scrn, 742 &ps_kernel_planar_static_gen5[0] 743 [0], 744 sizeof 745 (ps_kernel_planar_static_gen5)); 746 else 747 kernel_bo = 748 i965_create_program(scrn, 749 &ps_kernel_planar_static[0][0], 750 sizeof 751 (ps_kernel_planar_static)); 752 } 753 if (!kernel_bo) 754 return NULL; 755 756 wm_bo = drm_intel_bo_alloc(intel->bufmgr, 757 "textured video wm state", 758 sizeof(wm_state), 0); 759 if (wm_bo == NULL) { 760 drm_intel_bo_unreference(kernel_bo); 761 return NULL; 762 } 763 764 memset(&wm_state, 0, sizeof(wm_state)); 765 wm_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 766 wm_state.thread0.kernel_start_pointer = 767 intel_uxa_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0), 768 kernel_bo, wm_state.thread0.grf_reg_count << 1, 769 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 770 wm_state.thread1.single_program_flow = 1; /* XXX */ 771 if (is_packed) 772 wm_state.thread1.binding_table_entry_count = 2; 773 else 774 wm_state.thread1.binding_table_entry_count = 7; 775 776 /* binding table entry count is only used for prefetching, and it has to 777 * be set 0 for Ironlake 778 */ 779 if (IS_GEN5(intel)) 780 wm_state.thread1.binding_table_entry_count = 0; 781 782 /* Though we never use the scratch space in our WM kernel, it has to be 783 * set, and the minimum allocation is 1024 bytes. 784 */ 785 wm_state.thread2.scratch_space_base_pointer = 0; 786 wm_state.thread2.per_thread_scratch_space = 0; /* 1024 bytes */ 787 wm_state.thread3.dispatch_grf_start_reg = 3; /* XXX */ 788 wm_state.thread3.const_urb_entry_read_length = 0; 789 wm_state.thread3.const_urb_entry_read_offset = 0; 790 wm_state.thread3.urb_entry_read_length = 1; /* XXX */ 791 wm_state.thread3.urb_entry_read_offset = 0; /* XXX */ 792 wm_state.wm4.stats_enable = 1; 793 wm_state.wm4.sampler_state_pointer = 794 intel_uxa_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4), 795 sampler_bo, 0, 796 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 797 if (IS_GEN5(intel)) 798 wm_state.wm4.sampler_count = 0; 799 else 800 wm_state.wm4.sampler_count = 1; /* 1-4 samplers used */ 801 wm_state.wm5.max_threads = PS_MAX_THREADS - 1; 802 wm_state.wm5.thread_dispatch_enable = 1; 803 wm_state.wm5.enable_16_pix = 1; 804 wm_state.wm5.enable_8_pix = 0; 805 wm_state.wm5.early_depth_test = 1; 806 807 dri_bo_subdata(wm_bo, 0, sizeof(wm_state), &wm_state); 808 drm_intel_bo_unreference(kernel_bo); 809 return wm_bo; 810} 811 812static drm_intel_bo *i965_create_cc_vp_state(ScrnInfoPtr scrn) 813{ 814 intel_screen_private *intel = intel_get_screen_private(scrn); 815 struct brw_cc_viewport cc_viewport; 816 817 memset(&cc_viewport, 0, sizeof(cc_viewport)); 818 cc_viewport.min_depth = -1.e35; 819 cc_viewport.max_depth = 1.e35; 820 821 return intel_uxa_bo_alloc_for_data(intel, 822 &cc_viewport, sizeof(cc_viewport), 823 "textured video cc viewport"); 824} 825 826static drm_intel_bo *i965_create_cc_state(ScrnInfoPtr scrn) 827{ 828 intel_screen_private *intel = intel_get_screen_private(scrn); 829 drm_intel_bo *cc_bo, *cc_vp_bo; 830 struct brw_cc_unit_state cc_state; 831 832 cc_vp_bo = i965_create_cc_vp_state(scrn); 833 if (!cc_vp_bo) 834 return NULL; 835 836 cc_bo = drm_intel_bo_alloc(intel->bufmgr, 837 "textured video cc state", 838 sizeof(cc_state), 0); 839 if (cc_bo == NULL){ 840 drm_intel_bo_unreference(cc_vp_bo); 841 return NULL; 842 } 843 844 /* Color calculator state */ 845 memset(&cc_state, 0, sizeof(cc_state)); 846 cc_state.cc0.stencil_enable = 0; /* disable stencil */ 847 cc_state.cc2.depth_test = 0; /* disable depth test */ 848 cc_state.cc2.logicop_enable = 1; /* enable logic op */ 849 cc_state.cc3.ia_blend_enable = 1; /* blend alpha just like colors */ 850 cc_state.cc3.blend_enable = 0; /* disable color blend */ 851 cc_state.cc3.alpha_test = 0; /* disable alpha test */ 852 cc_state.cc4.cc_viewport_state_offset = 853 intel_uxa_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4), 854 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 855 cc_state.cc5.dither_enable = 0; /* disable dither */ 856 cc_state.cc5.logicop_func = 0xc; /* WHITE */ 857 cc_state.cc5.statistics_enable = 1; 858 cc_state.cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 859 cc_state.cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE; 860 cc_state.cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE; 861 862 dri_bo_subdata(cc_bo, 0, sizeof(cc_state), &cc_state); 863 drm_intel_bo_unreference(cc_vp_bo); 864 865 return cc_bo; 866} 867 868static void 869i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_table_bo, int n_src_surf, PixmapPtr pixmap) 870{ 871 intel_screen_private *intel = intel_get_screen_private(scrn); 872 int urb_vs_start, urb_vs_size; 873 int urb_gs_start, urb_gs_size; 874 int urb_clip_start, urb_clip_size; 875 int urb_sf_start, urb_sf_size; 876 int urb_cs_start, urb_cs_size; 877 int pipe_ctl; 878 879 IntelEmitInvarientState(scrn); 880 intel->last_3d = LAST_3D_VIDEO; 881 intel->needs_3d_invariant = TRUE; 882 883 urb_vs_start = 0; 884 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 885 urb_gs_start = urb_vs_start + urb_vs_size; 886 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 887 urb_clip_start = urb_gs_start + urb_gs_size; 888 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 889 urb_sf_start = urb_clip_start + urb_clip_size; 890 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 891 urb_cs_start = urb_sf_start + urb_sf_size; 892 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 893 894 OUT_BATCH(MI_FLUSH | 895 MI_STATE_INSTRUCTION_CACHE_FLUSH | 896 BRW_MI_GLOBAL_SNAPSHOT_RESET); 897 OUT_BATCH(MI_NOOP); 898 899 /* brw_debug (scrn, "before base address modify"); */ 900 /* Match Mesa driver setup */ 901 if (INTEL_INFO(intel)->gen >= 045) 902 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 903 else 904 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 905 906 /* Mesa does this. Who knows... */ 907 OUT_BATCH(BRW_CS_URB_STATE | 0); 908 OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ 909 (0 << 0)); /* Number of URB Entries */ 910 911 /* Zero out the two base address registers so all offsets are 912 * absolute 913 */ 914 if (IS_GEN5(intel)) { 915 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 916 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 917 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 918 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 919 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 920 /* general state max addr, disabled */ 921 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 922 /* media object state max addr, disabled */ 923 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 924 /* Instruction max addr, disabled */ 925 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 926 } else { 927 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 928 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 929 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 930 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 931 /* general state max addr, disabled */ 932 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 933 /* media object state max addr, disabled */ 934 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 935 } 936 937 /* Set system instruction pointer */ 938 OUT_BATCH(BRW_STATE_SIP | 0); 939 /* system instruction pointer */ 940 OUT_RELOC(intel->video.gen4_sip_kernel_bo, 941 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 942 943 /* brw_debug (scrn, "after base address modify"); */ 944 945 if (IS_GEN5(intel)) 946 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE; 947 else 948 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; 949 950 /* Pipe control */ 951 OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); 952 OUT_BATCH(0); /* Destination address */ 953 OUT_BATCH(0); /* Immediate data low DW */ 954 OUT_BATCH(0); /* Immediate data high DW */ 955 956 /* Binding table pointers */ 957 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 958 OUT_BATCH(0); /* vs */ 959 OUT_BATCH(0); /* gs */ 960 OUT_BATCH(0); /* clip */ 961 OUT_BATCH(0); /* sf */ 962 /* Only the PS uses the binding table */ 963 OUT_BATCH((n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 964 965 /* Blend constant color (magenta is fun) */ 966 OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3); 967 OUT_BATCH(float_to_uint(1.0)); 968 OUT_BATCH(float_to_uint(0.0)); 969 OUT_BATCH(float_to_uint(1.0)); 970 OUT_BATCH(float_to_uint(1.0)); 971 972 /* The drawing rectangle clipping is always on. Set it to values that 973 * shouldn't do any clipping. 974 */ 975 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ 976 OUT_BATCH(0x00000000); /* ymin, xmin */ 977 OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ 978 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 979 980 /* skip the depth buffer */ 981 /* skip the polygon stipple */ 982 /* skip the polygon stipple offset */ 983 /* skip the line stipple */ 984 985 /* Set the pointers to the 3d pipeline state */ 986 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 987 OUT_RELOC(intel->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 988 /* disable GS, resulting in passthrough */ 989 OUT_BATCH(BRW_GS_DISABLE); 990 /* disable CLIP, resulting in passthrough */ 991 OUT_BATCH(BRW_CLIP_DISABLE); 992 OUT_RELOC(intel->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 993 if (n_src_surf == 1) 994 OUT_RELOC(intel->video.gen4_wm_packed_bo, 995 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 996 else 997 OUT_RELOC(intel->video.gen4_wm_planar_bo, 998 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 999 OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1000 1001 /* URB fence */ 1002 OUT_BATCH(BRW_URB_FENCE | 1003 UF0_CS_REALLOC | 1004 UF0_SF_REALLOC | 1005 UF0_CLIP_REALLOC | UF0_GS_REALLOC | UF0_VS_REALLOC | 1); 1006 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1007 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1008 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1009 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1010 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1011 1012 /* Constant buffer state */ 1013 OUT_BATCH(BRW_CS_URB_STATE | 0); 1014 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); 1015 1016 /* Set up our vertex elements, sourced from the single vertex buffer. */ 1017 1018 if (IS_GEN5(intel)) { 1019 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 1020 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1021 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1022 VE0_VALID | 1023 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1024 (0 << VE0_OFFSET_SHIFT)); 1025 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1026 | (BRW_VFCOMPONENT_STORE_SRC << 1027 VE1_VFCOMPONENT_1_SHIFT) | 1028 (BRW_VFCOMPONENT_STORE_1_FLT << 1029 VE1_VFCOMPONENT_2_SHIFT) | 1030 (BRW_VFCOMPONENT_STORE_1_FLT << 1031 VE1_VFCOMPONENT_3_SHIFT)); 1032 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1033 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1034 VE0_VALID | 1035 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1036 (8 << VE0_OFFSET_SHIFT)); 1037 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1038 | (BRW_VFCOMPONENT_STORE_SRC << 1039 VE1_VFCOMPONENT_1_SHIFT) | 1040 (BRW_VFCOMPONENT_STORE_1_FLT << 1041 VE1_VFCOMPONENT_2_SHIFT) | 1042 (BRW_VFCOMPONENT_STORE_1_FLT << 1043 VE1_VFCOMPONENT_3_SHIFT)); 1044 } else { 1045 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 1046 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1047 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1048 VE0_VALID | 1049 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1050 (0 << VE0_OFFSET_SHIFT)); 1051 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1052 | (BRW_VFCOMPONENT_STORE_SRC << 1053 VE1_VFCOMPONENT_1_SHIFT) | 1054 (BRW_VFCOMPONENT_STORE_1_FLT << 1055 VE1_VFCOMPONENT_2_SHIFT) | 1056 (BRW_VFCOMPONENT_STORE_1_FLT << 1057 VE1_VFCOMPONENT_3_SHIFT) | (0 << 1058 VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1059 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1060 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1061 VE0_VALID | 1062 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1063 (8 << VE0_OFFSET_SHIFT)); 1064 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1065 | (BRW_VFCOMPONENT_STORE_SRC << 1066 VE1_VFCOMPONENT_1_SHIFT) | 1067 (BRW_VFCOMPONENT_STORE_1_FLT << 1068 VE1_VFCOMPONENT_2_SHIFT) | 1069 (BRW_VFCOMPONENT_STORE_1_FLT << 1070 VE1_VFCOMPONENT_3_SHIFT) | (4 << 1071 VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1072 } 1073} 1074 1075void 1076I965DisplayVideoTextured(ScrnInfoPtr scrn, 1077 intel_adaptor_private *adaptor_priv, int id, 1078 RegionPtr dstRegion, 1079 short width, short height, 1080 int video_pitch, int video_pitch2, 1081 short src_w, short src_h, 1082 short drw_w, short drw_h, PixmapPtr pixmap) 1083{ 1084 intel_screen_private *intel = intel_get_screen_private(scrn); 1085 BoxPtr pbox; 1086 int nbox, dxo, dyo, pix_xoff, pix_yoff; 1087 float src_scale_x, src_scale_y; 1088 int src_surf; 1089 int n_src_surf; 1090 uint32_t src_surf_format; 1091 uint32_t src_surf_base[6]; 1092 int src_width[6]; 1093 int src_height[6]; 1094 int src_pitch[6]; 1095 drm_intel_bo *surface_state_binding_table_bo; 1096 1097#if 0 1098 ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, 1099 height, video_pitch); 1100#endif 1101 1102#if 0 1103 /* enable debug */ 1104 OUTREG(INST_PM, (1 << (16 + 4)) | (1 << 4)); 1105 ErrorF("INST_PM 0x%08x\n", INREG(INST_PM)); 1106#endif 1107 1108 src_surf_base[0] = adaptor_priv->YBufOffset; 1109 src_surf_base[1] = adaptor_priv->YBufOffset; 1110 src_surf_base[2] = adaptor_priv->VBufOffset; 1111 src_surf_base[3] = adaptor_priv->VBufOffset; 1112 src_surf_base[4] = adaptor_priv->UBufOffset; 1113 src_surf_base[5] = adaptor_priv->UBufOffset; 1114#if 0 1115 ErrorF("base 0 0x%x base 1 0x%x base 2 0x%x\n", 1116 src_surf_base[0], src_surf_base[1], src_surf_base[2]); 1117#endif 1118 1119 if (is_planar_fourcc(id)) { 1120 src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; 1121 src_width[1] = src_width[0] = width; 1122 src_height[1] = src_height[0] = height; 1123 src_pitch[1] = src_pitch[0] = video_pitch2; 1124 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1125 width / 2; 1126 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1127 height / 2; 1128 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1129 video_pitch; 1130 n_src_surf = 6; 1131 } else { 1132 if (id == FOURCC_UYVY) 1133 src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; 1134 else 1135 src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; 1136 1137 src_width[0] = width; 1138 src_height[0] = height; 1139 src_pitch[0] = video_pitch; 1140 n_src_surf = 1; 1141 } 1142 1143#if 0 1144 ErrorF("dst surf: 0x%08x\n", state_base_offset + dest_surf_offset); 1145 ErrorF("src surf: 0x%08x\n", state_base_offset + src_surf_offset); 1146#endif 1147 1148 /* We'll be poking the state buffers that could be in use by the 3d 1149 * hardware here, but we should have synced the 3D engine already in 1150 * I830PutImage. 1151 */ 1152 1153 surface_state_binding_table_bo = 1154 drm_intel_bo_alloc(intel->bufmgr, 1155 "surface state & binding table", 1156 (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), 1157 4096); 1158 1159 if (!surface_state_binding_table_bo) 1160 return; 1161 1162 i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); 1163 1164 for (src_surf = 0; src_surf < n_src_surf; src_surf++) { 1165 i965_create_src_surface_state(scrn, 1166 adaptor_priv->buf, 1167 src_surf_base[src_surf], 1168 src_width[src_surf], 1169 src_height[src_surf], 1170 src_pitch[src_surf], 1171 src_surf_format, 1172 surface_state_binding_table_bo, 1173 (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1174 } 1175 1176 i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); 1177 1178 if (intel->video.gen4_sampler_bo == NULL) 1179 intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn); 1180 if (intel->video.gen4_sip_kernel_bo == NULL) { 1181 intel->video.gen4_sip_kernel_bo = 1182 i965_create_program(scrn, &sip_kernel_static[0][0], 1183 sizeof(sip_kernel_static)); 1184 if (!intel->video.gen4_sip_kernel_bo) { 1185 drm_intel_bo_unreference(surface_state_binding_table_bo); 1186 return; 1187 } 1188 } 1189 1190 if (intel->video.gen4_vs_bo == NULL) { 1191 intel->video.gen4_vs_bo = i965_create_vs_state(scrn); 1192 if (!intel->video.gen4_vs_bo) { 1193 drm_intel_bo_unreference(surface_state_binding_table_bo); 1194 return; 1195 } 1196 } 1197 if (intel->video.gen4_sf_bo == NULL) { 1198 intel->video.gen4_sf_bo = i965_create_sf_state(scrn); 1199 if (!intel->video.gen4_sf_bo) { 1200 drm_intel_bo_unreference(surface_state_binding_table_bo); 1201 return; 1202 } 1203 } 1204 if (intel->video.gen4_wm_packed_bo == NULL) { 1205 intel->video.gen4_wm_packed_bo = 1206 i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, 1207 TRUE); 1208 if (!intel->video.gen4_wm_packed_bo) { 1209 drm_intel_bo_unreference(surface_state_binding_table_bo); 1210 return; 1211 } 1212 } 1213 1214 if (intel->video.gen4_wm_planar_bo == NULL) { 1215 intel->video.gen4_wm_planar_bo = 1216 i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, 1217 FALSE); 1218 if (!intel->video.gen4_wm_planar_bo) { 1219 drm_intel_bo_unreference(surface_state_binding_table_bo); 1220 return; 1221 } 1222 } 1223 1224 if (intel->video.gen4_cc_bo == NULL) { 1225 intel->video.gen4_cc_bo = i965_create_cc_state(scrn); 1226 if (!intel->video.gen4_cc_bo) { 1227 drm_intel_bo_unreference(surface_state_binding_table_bo); 1228 return; 1229 } 1230 } 1231 1232 /* Set up the offset for translating from the given region (in screen 1233 * coordinates) to the backing pixmap. 1234 */ 1235#ifdef COMPOSITE 1236 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1237 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1238#else 1239 pix_xoff = 0; 1240 pix_yoff = 0; 1241#endif 1242 1243 dxo = dstRegion->extents.x1; 1244 dyo = dstRegion->extents.y1; 1245 1246 /* Use normalized texture coordinates */ 1247 src_scale_x = ((float)src_w / width) / (float)drw_w; 1248 src_scale_y = ((float)src_h / height) / (float)drw_h; 1249 1250 pbox = REGION_RECTS(dstRegion); 1251 nbox = REGION_NUM_RECTS(dstRegion); 1252 while (nbox--) { 1253 int box_x1 = pbox->x1; 1254 int box_y1 = pbox->y1; 1255 int box_x2 = pbox->x2; 1256 int box_y2 = pbox->y2; 1257 int i; 1258 float vb[12]; 1259 drm_intel_bo *bo_table[] = { 1260 NULL, /* vb_bo */ 1261 intel->batch_bo, 1262 surface_state_binding_table_bo, 1263 intel->video.gen4_sampler_bo, 1264 intel->video.gen4_sip_kernel_bo, 1265 intel->video.gen4_vs_bo, 1266 intel->video.gen4_sf_bo, 1267 intel->video.gen4_wm_packed_bo, 1268 intel->video.gen4_wm_planar_bo, 1269 intel->video.gen4_cc_bo, 1270 }; 1271 1272 pbox++; 1273 1274 i = 0; 1275 vb[i++] = (box_x2 - dxo) * src_scale_x; 1276 vb[i++] = (box_y2 - dyo) * src_scale_y; 1277 vb[i++] = (float)box_x2 + pix_xoff; 1278 vb[i++] = (float)box_y2 + pix_yoff; 1279 1280 vb[i++] = (box_x1 - dxo) * src_scale_x; 1281 vb[i++] = (box_y2 - dyo) * src_scale_y; 1282 vb[i++] = (float)box_x1 + pix_xoff; 1283 vb[i++] = (float)box_y2 + pix_yoff; 1284 1285 vb[i++] = (box_x1 - dxo) * src_scale_x; 1286 vb[i++] = (box_y1 - dyo) * src_scale_y; 1287 vb[i++] = (float)box_x1 + pix_xoff; 1288 vb[i++] = (float)box_y1 + pix_yoff; 1289 1290 bo_table[0] = intel_uxa_bo_alloc_for_data(intel, 1291 vb, sizeof(vb), 1292 "textured video vbo"); 1293 1294 if (IS_GEN4(intel)) 1295 i965_pre_draw_debug(scrn); 1296 1297 /* If this command won't fit in the current batch, flush. 1298 * Assume that it does after being flushed. 1299 */ 1300 if (drm_intel_bufmgr_check_aperture_space(bo_table, 1301 ARRAY_SIZE(bo_table)) 1302 < 0) { 1303 intel_batch_submit(scrn); 1304 } 1305 1306 intel_batch_start_atomic(scrn, 150); 1307 1308 i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap); 1309 1310 /* Set up the pointer to our vertex buffer */ 1311 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 1312 /* four 32-bit floats per vertex */ 1313 OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | 1314 VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1315 OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 0); 1316 if (IS_GEN5(intel)) 1317 OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 1318 i * 4); 1319 else 1320 OUT_BATCH(3); /* four corners to our rectangle */ 1321 OUT_BATCH(0); /* reserved */ 1322 1323 OUT_BATCH(BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | /* CTG - indirect vertex count */ 1324 4); 1325 OUT_BATCH(3); /* vertex count per instance */ 1326 OUT_BATCH(0); /* start vertex offset */ 1327 OUT_BATCH(1); /* single instance */ 1328 OUT_BATCH(0); /* start instance location */ 1329 OUT_BATCH(0); /* index buffer offset, ignored */ 1330 OUT_BATCH(MI_NOOP); 1331 1332 intel_batch_end_atomic(scrn); 1333 1334 drm_intel_bo_unreference(bo_table[0]); 1335 1336 if (IS_GEN4(intel)) 1337 i965_post_draw_debug(scrn); 1338 1339 } 1340 1341 /* release reference once we're finished */ 1342 drm_intel_bo_unreference(surface_state_binding_table_bo); 1343 1344 intel_uxa_debug_flush(scrn); 1345} 1346 1347void i965_free_video(ScrnInfoPtr scrn) 1348{ 1349 intel_screen_private *intel = intel_get_screen_private(scrn); 1350 1351 drm_intel_bo_unreference(intel->video.gen4_vs_bo); 1352 intel->video.gen4_vs_bo = NULL; 1353 drm_intel_bo_unreference(intel->video.gen4_sf_bo); 1354 intel->video.gen4_sf_bo = NULL; 1355 drm_intel_bo_unreference(intel->video.gen4_cc_bo); 1356 intel->video.gen4_cc_bo = NULL; 1357 drm_intel_bo_unreference(intel->video.gen4_wm_packed_bo); 1358 intel->video.gen4_wm_packed_bo = NULL; 1359 drm_intel_bo_unreference(intel->video.gen4_wm_planar_bo); 1360 intel->video.gen4_wm_planar_bo = NULL; 1361 drm_intel_bo_unreference(intel->video.gen4_cc_vp_bo); 1362 intel->video.gen4_cc_vp_bo = NULL; 1363 drm_intel_bo_unreference(intel->video.gen4_sampler_bo); 1364 intel->video.gen4_sampler_bo = NULL; 1365 drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo); 1366 intel->video.gen4_sip_kernel_bo = NULL; 1367 drm_intel_bo_unreference(intel->video.wm_prog_packed_bo); 1368 intel->video.wm_prog_packed_bo = NULL; 1369 drm_intel_bo_unreference(intel->video.wm_prog_planar_bo); 1370 intel->video.wm_prog_planar_bo = NULL; 1371 drm_intel_bo_unreference(intel->video.gen6_blend_bo); 1372 intel->video.gen6_blend_bo = NULL; 1373 drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo); 1374 intel->video.gen6_depth_stencil_bo = NULL; 1375} 1376 1377/* for GEN6+ */ 1378static drm_intel_bo * 1379gen6_create_cc_state(ScrnInfoPtr scrn) 1380{ 1381 intel_screen_private *intel = intel_get_screen_private(scrn); 1382 struct gen6_color_calc_state cc_state; 1383 1384 memset(&cc_state, 0, sizeof(cc_state)); 1385 cc_state.constant_r = 1.0; 1386 cc_state.constant_g = 0.0; 1387 cc_state.constant_b = 1.0; 1388 cc_state.constant_a = 1.0; 1389 1390 return intel_uxa_bo_alloc_for_data(intel, 1391 &cc_state, sizeof(cc_state), 1392 "textured video cc state"); 1393} 1394 1395static drm_intel_bo * 1396gen6_create_blend_state(ScrnInfoPtr scrn) 1397{ 1398 intel_screen_private *intel = intel_get_screen_private(scrn); 1399 struct gen6_blend_state blend_state; 1400 1401 memset(&blend_state, 0, sizeof(blend_state)); 1402 blend_state.blend1.logic_op_enable = 1; 1403 blend_state.blend1.logic_op_func = 0xc; 1404 blend_state.blend1.pre_blend_clamp_enable = 1; 1405 1406 return intel_uxa_bo_alloc_for_data(intel, 1407 &blend_state, sizeof(blend_state), 1408 "textured video blend state"); 1409} 1410 1411static drm_intel_bo * 1412gen6_create_depth_stencil_state(ScrnInfoPtr scrn) 1413{ 1414 intel_screen_private *intel = intel_get_screen_private(scrn); 1415 struct gen6_depth_stencil_state depth_stencil_state; 1416 1417 memset(&depth_stencil_state, 0, sizeof(depth_stencil_state)); 1418 return intel_uxa_bo_alloc_for_data(intel, 1419 &depth_stencil_state, 1420 sizeof(depth_stencil_state), 1421 "textured video blend state"); 1422} 1423 1424static Bool 1425gen6_create_vidoe_objects(ScrnInfoPtr scrn) 1426{ 1427 intel_screen_private *intel = intel_get_screen_private(scrn); 1428 drm_intel_bo *(*create_sampler_state)(ScrnInfoPtr); 1429 const uint32_t *packed_ps_kernel, *planar_ps_kernel; 1430 unsigned int packed_ps_size, planar_ps_size; 1431 1432 if (INTEL_INFO(intel)->gen >= 070) { 1433 create_sampler_state = gen7_create_sampler_state; 1434 packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0]; 1435 packed_ps_size = sizeof(ps_kernel_packed_static_gen7); 1436 planar_ps_kernel = &ps_kernel_planar_static_gen7[0][0]; 1437 planar_ps_size = sizeof(ps_kernel_planar_static_gen7); 1438 } else { 1439 create_sampler_state = i965_create_sampler_state; 1440 packed_ps_kernel = &ps_kernel_packed_static_gen6[0][0]; 1441 packed_ps_size = sizeof(ps_kernel_packed_static_gen6); 1442 planar_ps_kernel = &ps_kernel_planar_static_gen6[0][0]; 1443 planar_ps_size = sizeof(ps_kernel_planar_static_gen6); 1444 } 1445 1446 if (intel->video.gen4_sampler_bo == NULL) 1447 intel->video.gen4_sampler_bo = create_sampler_state(scrn); 1448 1449 if (intel->video.wm_prog_packed_bo == NULL) 1450 intel->video.wm_prog_packed_bo = 1451 i965_create_program(scrn, 1452 packed_ps_kernel, 1453 packed_ps_size); 1454 1455 if (intel->video.wm_prog_planar_bo == NULL) 1456 intel->video.wm_prog_planar_bo = 1457 i965_create_program(scrn, 1458 planar_ps_kernel, 1459 planar_ps_size); 1460 1461 if (intel->video.gen4_cc_vp_bo == NULL) 1462 intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn); 1463 1464 if (intel->video.gen4_cc_bo == NULL) 1465 intel->video.gen4_cc_bo = gen6_create_cc_state(scrn); 1466 1467 if (intel->video.gen6_blend_bo == NULL) 1468 intel->video.gen6_blend_bo = gen6_create_blend_state(scrn); 1469 1470 if (intel->video.gen6_depth_stencil_bo == NULL) 1471 intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn); 1472 1473 1474 return (intel->video.gen4_sampler_bo != NULL && 1475 intel->video.wm_prog_packed_bo != NULL && 1476 intel->video.wm_prog_planar_bo != NULL && 1477 intel->video.gen4_cc_vp_bo != NULL && 1478 intel->video.gen4_cc_bo != NULL && 1479 intel->video.gen6_blend_bo != NULL && 1480 intel->video.gen6_depth_stencil_bo != NULL); 1481} 1482 1483static void 1484gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) 1485{ 1486 intel_screen_private *intel = intel_get_screen_private(scrn); 1487 1488 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); 1489 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ 1490 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 1491 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ 1492 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ 1493 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ 1494 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ 1495 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ 1496 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ 1497 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ 1498} 1499 1500static void 1501gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap) 1502{ 1503 intel_screen_private *intel = intel_get_screen_private(scrn); 1504 1505 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1506 OUT_BATCH(0x00000000); /* ymin, xmin */ 1507 OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ 1508 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1509} 1510 1511static void 1512gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) 1513{ 1514 intel_screen_private *intel = intel_get_screen_private(scrn); 1515 1516 /* disable WM constant buffer */ 1517 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 1518 OUT_BATCH(0); 1519 OUT_BATCH(0); 1520 OUT_BATCH(0); 1521 OUT_BATCH(0); 1522 1523 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 1524 if (is_packed) { 1525 OUT_RELOC(intel->video.wm_prog_packed_bo, 1526 I915_GEM_DOMAIN_INSTRUCTION, 0, 1527 0); 1528 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 1529 (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1530 } else { 1531 OUT_RELOC(intel->video.wm_prog_planar_bo, 1532 I915_GEM_DOMAIN_INSTRUCTION, 0, 1533 0); 1534 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 1535 (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1536 } 1537 OUT_BATCH(0); 1538 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ 1539 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | 1540 GEN6_3DSTATE_WM_DISPATCH_ENABLE | 1541 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); 1542 OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | 1543 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 1544 OUT_BATCH(0); 1545 OUT_BATCH(0); 1546} 1547 1548static void 1549gen6_upload_vertex_element_state(ScrnInfoPtr scrn) 1550{ 1551 intel_screen_private *intel = intel_get_screen_private(scrn); 1552 1553 /* Set up our vertex elements, sourced from the single vertex buffer. */ 1554 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2)); 1555 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1556 OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1557 GEN6_VE0_VALID | 1558 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1559 (0 << VE0_OFFSET_SHIFT)); 1560 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1561 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1562 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1563 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1564 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1565 OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1566 GEN6_VE0_VALID | 1567 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1568 (8 << VE0_OFFSET_SHIFT)); 1569 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1570 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1571 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1572 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1573} 1574 1575static void 1576gen6_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1577{ 1578 intel_screen_private *intel = intel_get_screen_private(scrn); 1579 1580 /* Set up the pointer to our vertex buffer */ 1581 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1582 /* four 32-bit floats per vertex */ 1583 OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | 1584 GEN6_VB0_VERTEXDATA | 1585 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1586 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 1587 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); 1588 OUT_BATCH(0); /* reserved */ 1589} 1590 1591static void 1592gen6_upload_primitive(ScrnInfoPtr scrn) 1593{ 1594 intel_screen_private *intel = intel_get_screen_private(scrn); 1595 1596 OUT_BATCH(BRW_3DPRIMITIVE | 1597 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1598 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 1599 (0 << 9) | /* Internal Vertex Count */ 1600 (6 - 2)); 1601 OUT_BATCH(3); /* vertex count per instance */ 1602 OUT_BATCH(0); /* start vertex offset */ 1603 OUT_BATCH(1); /* single instance */ 1604 OUT_BATCH(0); /* start instance location */ 1605 OUT_BATCH(0); /* index buffer offset, ignored */ 1606} 1607 1608static void 1609gen6_emit_video_setup(ScrnInfoPtr scrn, 1610 drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, 1611 PixmapPtr pixmap, 1612 drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1613{ 1614 intel_screen_private *intel = intel_get_screen_private(scrn); 1615 1616 assert(n_src_surf == 1 || n_src_surf == 6); 1617 IntelEmitInvarientState(scrn); 1618 intel->last_3d = LAST_3D_VIDEO; 1619 intel->needs_3d_invariant = TRUE; 1620 1621 gen6_upload_invariant_states(intel); 1622 gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); 1623 gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); 1624 gen6_upload_urb(intel); 1625 gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); 1626 gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); 1627 gen6_upload_vs_state(intel); 1628 gen6_upload_gs_state(intel); 1629 gen6_upload_clip_state(intel); 1630 gen6_upload_sf_state(intel, 1, 0); 1631 gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); 1632 gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1633 gen6_upload_depth_buffer_state(intel); 1634 gen6_upload_drawing_rectangle(scrn, pixmap); 1635 gen6_upload_vertex_element_state(scrn); 1636 gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); 1637 gen6_upload_primitive(scrn); 1638} 1639 1640static void 1641gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) 1642{ 1643 intel_screen_private *intel = intel_get_screen_private(scrn); 1644 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; 1645 unsigned int num_samples = 0; 1646 1647 if (IS_HSW(intel)) { 1648 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; 1649 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; 1650 } 1651 1652 /* disable WM constant buffer */ 1653 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); 1654 OUT_BATCH(0); 1655 OUT_BATCH(0); 1656 OUT_BATCH(0); 1657 OUT_BATCH(0); 1658 OUT_BATCH(0); 1659 OUT_BATCH(0); 1660 1661 OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); 1662 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 1663 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 1664 OUT_BATCH(0); 1665 1666 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 1667 1668 if (is_packed) { 1669 OUT_RELOC(intel->video.wm_prog_packed_bo, 1670 I915_GEM_DOMAIN_INSTRUCTION, 0, 1671 0); 1672 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 1673 (2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1674 } else { 1675 OUT_RELOC(intel->video.wm_prog_planar_bo, 1676 I915_GEM_DOMAIN_INSTRUCTION, 0, 1677 0); 1678 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 1679 (7 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1680 } 1681 1682 OUT_BATCH(0); /* scratch space base offset */ 1683 OUT_BATCH( 1684 ((48 - 1) << max_threads_shift) | num_samples | 1685 GEN7_PS_ATTRIBUTE_ENABLE | 1686 GEN7_PS_16_DISPATCH_ENABLE); 1687 OUT_BATCH( 1688 (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); 1689 OUT_BATCH(0); /* kernel 1 pointer */ 1690 OUT_BATCH(0); /* kernel 2 pointer */ 1691} 1692 1693static void 1694gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1695{ 1696 intel_screen_private *intel = intel_get_screen_private(scrn); 1697 1698 /* Set up the pointer to our vertex buffer */ 1699 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1700 /* four 32-bit floats per vertex */ 1701 OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | 1702 GEN6_VB0_VERTEXDATA | 1703 GEN7_VB0_ADDRESS_MODIFYENABLE | 1704 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1705 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 1706 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); 1707 OUT_BATCH(0); /* reserved */ 1708} 1709 1710static void 1711gen7_upload_primitive(ScrnInfoPtr scrn) 1712{ 1713 intel_screen_private *intel = intel_get_screen_private(scrn); 1714 1715 OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); 1716 OUT_BATCH(_3DPRIM_RECTLIST | 1717 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); 1718 OUT_BATCH(3); /* vertex count per instance */ 1719 OUT_BATCH(0); /* start vertex offset */ 1720 OUT_BATCH(1); /* single instance */ 1721 OUT_BATCH(0); /* start instance location */ 1722 OUT_BATCH(0); 1723} 1724 1725static void 1726gen7_emit_video_setup(ScrnInfoPtr scrn, 1727 drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, 1728 PixmapPtr pixmap, 1729 drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1730{ 1731 intel_screen_private *intel = intel_get_screen_private(scrn); 1732 1733 assert(n_src_surf == 1 || n_src_surf == 6); 1734 IntelEmitInvarientState(scrn); 1735 intel->last_3d = LAST_3D_VIDEO; 1736 intel->needs_3d_invariant = TRUE; 1737 1738 gen6_upload_invariant_states(intel); 1739 gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); 1740 gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); 1741 gen7_upload_urb(intel); 1742 gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); 1743 gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); 1744 gen7_upload_bypass_states(intel); 1745 gen6_upload_vs_state(intel); 1746 gen6_upload_clip_state(intel); 1747 gen7_upload_sf_state(intel, 1, 0); 1748 gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); 1749 gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1750 gen7_upload_depth_buffer_state(intel); 1751 gen6_upload_drawing_rectangle(scrn, pixmap); 1752 gen6_upload_vertex_element_state(scrn); 1753 gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); 1754 gen7_upload_primitive(scrn); 1755} 1756 1757void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, 1758 intel_adaptor_private *adaptor_priv, int id, 1759 RegionPtr dstRegion, 1760 short width, short height, 1761 int video_pitch, int video_pitch2, 1762 short src_w, short src_h, 1763 short drw_w, short drw_h, PixmapPtr pixmap) 1764{ 1765 intel_screen_private *intel = intel_get_screen_private(scrn); 1766 BoxPtr pbox; 1767 int nbox, dxo, dyo, pix_xoff, pix_yoff; 1768 float src_scale_x, src_scale_y; 1769 int src_surf; 1770 int n_src_surf; 1771 uint32_t src_surf_format; 1772 uint32_t src_surf_base[6]; 1773 int src_width[6]; 1774 int src_height[6]; 1775 int src_pitch[6]; 1776 drm_intel_bo *surface_state_binding_table_bo; 1777 void (*create_dst_surface_state)(ScrnInfoPtr, 1778 PixmapPtr, 1779 drm_intel_bo *, 1780 uint32_t); 1781 void (*create_src_surface_state)(ScrnInfoPtr, 1782 drm_intel_bo *, 1783 uint32_t, int, 1784 int, int, uint32_t, 1785 drm_intel_bo *, uint32_t); 1786 void (*emit_video_setup)(ScrnInfoPtr, 1787 drm_intel_bo *, int, 1788 PixmapPtr, 1789 drm_intel_bo *, uint32_t); 1790 1791 if (INTEL_INFO(intel)->gen >= 070) { 1792 create_dst_surface_state = gen7_create_dst_surface_state; 1793 create_src_surface_state = gen7_create_src_surface_state; 1794 emit_video_setup = gen7_emit_video_setup; 1795 } else { 1796 create_dst_surface_state = i965_create_dst_surface_state; 1797 create_src_surface_state = i965_create_src_surface_state; 1798 emit_video_setup = gen6_emit_video_setup; 1799 } 1800 1801 src_surf_base[0] = adaptor_priv->YBufOffset; 1802 src_surf_base[1] = adaptor_priv->YBufOffset; 1803 src_surf_base[2] = adaptor_priv->VBufOffset; 1804 src_surf_base[3] = adaptor_priv->VBufOffset; 1805 src_surf_base[4] = adaptor_priv->UBufOffset; 1806 src_surf_base[5] = adaptor_priv->UBufOffset; 1807 1808 if (is_planar_fourcc(id)) { 1809 src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; 1810 src_width[1] = src_width[0] = width; 1811 src_height[1] = src_height[0] = height; 1812 src_pitch[1] = src_pitch[0] = video_pitch2; 1813 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1814 width / 2; 1815 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1816 height / 2; 1817 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1818 video_pitch; 1819 n_src_surf = 6; 1820 } else { 1821 if (id == FOURCC_UYVY) 1822 src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; 1823 else 1824 src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; 1825 1826 src_width[0] = width; 1827 src_height[0] = height; 1828 src_pitch[0] = video_pitch; 1829 n_src_surf = 1; 1830 } 1831 1832 surface_state_binding_table_bo = 1833 drm_intel_bo_alloc(intel->bufmgr, 1834 "surface state & binding table", 1835 (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), 1836 4096); 1837 1838 if (!surface_state_binding_table_bo) 1839 return; 1840 1841 create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); 1842 1843 for (src_surf = 0; src_surf < n_src_surf; src_surf++) { 1844 create_src_surface_state(scrn, 1845 adaptor_priv->buf, 1846 src_surf_base[src_surf], 1847 src_width[src_surf], 1848 src_height[src_surf], 1849 src_pitch[src_surf], 1850 src_surf_format, 1851 surface_state_binding_table_bo, 1852 (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1853 } 1854 1855 i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); 1856 1857 if (!gen6_create_vidoe_objects(scrn)) { 1858 drm_intel_bo_unreference(surface_state_binding_table_bo); 1859 return; 1860 } 1861 1862 /* Set up the offset for translating from the given region (in screen 1863 * coordinates) to the backing pixmap. 1864 */ 1865#ifdef COMPOSITE 1866 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1867 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1868#else 1869 pix_xoff = 0; 1870 pix_yoff = 0; 1871#endif 1872 1873 dxo = dstRegion->extents.x1; 1874 dyo = dstRegion->extents.y1; 1875 1876 /* Use normalized texture coordinates */ 1877 src_scale_x = ((float)src_w / width) / (float)drw_w; 1878 src_scale_y = ((float)src_h / height) / (float)drw_h; 1879 1880 pbox = REGION_RECTS(dstRegion); 1881 nbox = REGION_NUM_RECTS(dstRegion); 1882 while (nbox--) { 1883 int box_x1 = pbox->x1; 1884 int box_y1 = pbox->y1; 1885 int box_x2 = pbox->x2; 1886 int box_y2 = pbox->y2; 1887 int i; 1888 float vb[12]; 1889 drm_intel_bo *bo_table[] = { 1890 NULL, /* vb_bo */ 1891 intel->batch_bo, 1892 surface_state_binding_table_bo, 1893 intel->video.gen4_sampler_bo, 1894 intel->video.wm_prog_packed_bo, 1895 intel->video.wm_prog_planar_bo, 1896 intel->video.gen4_cc_vp_bo, 1897 intel->video.gen4_cc_bo, 1898 intel->video.gen6_blend_bo, 1899 intel->video.gen6_depth_stencil_bo, 1900 }; 1901 1902 pbox++; 1903 1904 i = 0; 1905 vb[i++] = (box_x2 - dxo) * src_scale_x; 1906 vb[i++] = (box_y2 - dyo) * src_scale_y; 1907 vb[i++] = (float)box_x2 + pix_xoff; 1908 vb[i++] = (float)box_y2 + pix_yoff; 1909 1910 vb[i++] = (box_x1 - dxo) * src_scale_x; 1911 vb[i++] = (box_y2 - dyo) * src_scale_y; 1912 vb[i++] = (float)box_x1 + pix_xoff; 1913 vb[i++] = (float)box_y2 + pix_yoff; 1914 1915 vb[i++] = (box_x1 - dxo) * src_scale_x; 1916 vb[i++] = (box_y1 - dyo) * src_scale_y; 1917 vb[i++] = (float)box_x1 + pix_xoff; 1918 vb[i++] = (float)box_y1 + pix_yoff; 1919 1920 bo_table[0] = intel_uxa_bo_alloc_for_data(intel, 1921 vb, sizeof(vb), 1922 "video vbo"); 1923 1924 /* If this command won't fit in the current batch, flush. 1925 * Assume that it does after being flushed. 1926 */ 1927 if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) 1928 intel_batch_submit(scrn); 1929 1930 intel_batch_start_atomic(scrn, 200); 1931 emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap, bo_table[0], i * 4); 1932 intel_batch_end_atomic(scrn); 1933 1934 drm_intel_bo_unreference(bo_table[0]); 1935 } 1936 1937 /* release reference once we're finished */ 1938 drm_intel_bo_unreference(surface_state_binding_table_bo); 1939 intel_uxa_debug_flush(scrn); 1940} 1941