1/* 2 * Copyright � 2006 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Keith Packard <keithp@keithp.com> 26 * 27 */ 28 29#ifdef HAVE_CONFIG_H 30#include "config.h" 31#endif 32 33#include "xorg-server.h" 34#include "xf86.h" 35#include "xf86_OSproc.h" 36#include "xf86xv.h" 37#include "fourcc.h" 38 39#include "intel.h" 40#include "intel_uxa.h" 41#include "i830_reg.h" 42#include "i965_reg.h" 43#include "brw_defines.h" 44#include "brw_structs.h" 45#include <string.h> 46 47 48/* Make assert() work. */ 49#undef NDEBUG 50#include <assert.h> 51 52static const uint32_t sip_kernel_static[][4] = { 53/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ 54 {0x00000030, 0x20000108, 0x00001220, 0x00000000}, 55/* nop (4) g0<1>UD { align1 + } */ 56 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 57/* nop (4) g0<1>UD { align1 + } */ 58 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 59/* nop (4) g0<1>UD { align1 + } */ 60 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 61/* nop (4) g0<1>UD { align1 + } */ 62 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 63/* nop (4) g0<1>UD { align1 + } */ 64 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 65/* nop (4) g0<1>UD { align1 + } */ 66 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 67/* nop (4) g0<1>UD { align1 + } */ 68 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 69/* nop (4) g0<1>UD { align1 + } */ 70 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 71/* nop (4) g0<1>UD { align1 + } */ 72 {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, 73}; 74 75/* 76 * this program computes dA/dx and dA/dy for the texture coordinates along 77 * with the base texture coordinate. It was extracted from the Mesa driver. 78 * It uses about 10 GRF registers. 79 */ 80 81#define SF_KERNEL_NUM_GRF 16 82#define SF_MAX_THREADS 1 83 84static const uint32_t sf_kernel_static[][4] = { 85#include "exa_sf.g4b" 86}; 87 88/* 89 * Ok, this kernel picks up the required data flow values in g0 and g1 90 * and passes those along in m0 and m1. In m2-m9, it sticks constant 91 * values (bright pink). 92 */ 93 94/* Our PS kernel uses less than 32 GRF registers (about 20) */ 95#define PS_KERNEL_NUM_GRF 32 96#define PS_MAX_THREADS 32 97 98#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 99 100static const uint32_t ps_kernel_packed_static[][4] = { 101#include "exa_wm_xy.g4b" 102#include "exa_wm_src_affine.g4b" 103#include "exa_wm_src_sample_argb.g4b" 104#include "exa_wm_yuv_rgb_bt601.g4b" 105#include "exa_wm_write.g4b" 106}; 107 108static const uint32_t ps_kernel_planar_static[][4] = { 109#include "exa_wm_xy.g4b" 110#include "exa_wm_src_affine.g4b" 111#include "exa_wm_src_sample_planar.g4b" 112#include "exa_wm_yuv_rgb_bt601.g4b" 113#include "exa_wm_write.g4b" 114}; 115 116/* new program for Ironlake */ 117static const uint32_t sf_kernel_static_gen5[][4] = { 118#include "exa_sf.g4b.gen5" 119}; 120 121static const uint32_t ps_kernel_packed_static_gen5[][4] = { 122#include "exa_wm_xy.g4b.gen5" 123#include "exa_wm_src_affine.g4b.gen5" 124#include "exa_wm_src_sample_argb.g4b.gen5" 125#include "exa_wm_yuv_rgb_bt601.g4b.gen5" 126#include "exa_wm_write.g4b.gen5" 127}; 128 129static const uint32_t ps_kernel_planar_static_gen5[][4] = { 130#include "exa_wm_xy.g4b.gen5" 131#include "exa_wm_src_affine.g4b.gen5" 132#include "exa_wm_src_sample_planar.g4b.gen5" 133#include "exa_wm_yuv_rgb_bt601.g4b.gen5" 134#include "exa_wm_write.g4b.gen5" 135}; 136 137/* programs for Sandybridge */ 138static const uint32_t ps_kernel_packed_static_gen6[][4] = { 139#include "exa_wm_src_affine.g6b" 140#include "exa_wm_src_sample_argb.g6b" 141#include "exa_wm_yuv_rgb_bt601.g6b" 142#include "exa_wm_write.g6b" 143}; 144 145static const uint32_t ps_kernel_planar_static_gen6[][4] = { 146#include "exa_wm_src_affine.g6b" 147#include "exa_wm_src_sample_planar.g6b" 148#include "exa_wm_yuv_rgb_bt601.g6b" 149#include "exa_wm_write.g6b" 150}; 151 152/* programs for Ivybridge */ 153static const uint32_t ps_kernel_packed_static_gen7[][4] = { 154#include "exa_wm_src_affine.g7b" 155#include "exa_wm_src_sample_argb.g7b" 156#include "exa_wm_yuv_rgb_bt601.g7b" 157#include "exa_wm_write.g7b" 158}; 159 160static const uint32_t ps_kernel_planar_static_gen7[][4] = { 161#include "exa_wm_src_affine.g7b" 162#include "exa_wm_src_sample_planar.g7b" 163#include "exa_wm_yuv_rgb_bt601.g7b" 164#include "exa_wm_write.g7b" 165}; 166 167#ifndef MAX2 168#define MAX2(a,b) ((a) > (b) ? (a) : (b)) 169#endif 170 171#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct brw_surface_state), 32) 172#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) 173#define SURFACE_STATE_PADDED_SIZE MAX2(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) 174#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) 175 176static uint32_t float_to_uint(float f) 177{ 178 union { 179 uint32_t i; 180 float f; 181 } x; 182 x.f = f; 183 return x.i; 184} 185 186#if 0 187static struct { 188 uint32_t svg_ctl; 189 char *name; 190} svg_ctl_bits[] = { 191 { 192 BRW_SVG_CTL_GS_BA, "General State Base Address"}, { 193 BRW_SVG_CTL_SS_BA, "Surface State Base Address"}, { 194 BRW_SVG_CTL_IO_BA, "Indirect Object Base Address"}, { 195 BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound"}, { 196 BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound"}, { 197 BRW_SVG_CTL_SIP, "System Instruction Pointer"}, { 1980, 0},}; 199 200static void brw_debug(ScrnInfoPtr scrn, char *when) 201{ 202 intel_screen_private *intel = intel_get_screen_private(scrn); 203 int i; 204 uint32_t v; 205 206 ErrorF("brw_debug: %s\n", when); 207 for (i = 0; svg_ctl_bits[i].name; i++) { 208 OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl); 209 v = INREG(BRW_SVG_RDATA); 210 ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v); 211 } 212} 213#endif 214 215#define WATCH_SF 0 216#define WATCH_WIZ 0 217#define WATCH_STATS 0 218 219static void i965_pre_draw_debug(ScrnInfoPtr scrn) 220{ 221#if 0 222 intel_screen_private *intel = intel_get_screen_private(scrn); 223#endif 224 225#if 0 226 ErrorF("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 227 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 228 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 229 230 OUTREG(BRW_VF_CTL, 231 BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID | 232 BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX | 233 BRW_VF_CTL_SNAPSHOT_ENABLE); 234 OUTREG(BRW_VF_STRG_VAL, 0); 235#endif 236 237#if 0 238 OUTREG(BRW_VS_CTL, 239 BRW_VS_CTL_SNAPSHOT_ALL_THREADS | 240 BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT | 241 BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE); 242 243 OUTREG(BRW_VS_STRG_VAL, 0); 244#endif 245 246#if WATCH_SF 247 OUTREG(BRW_SF_CTL, 248 BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT | 249 BRW_SF_CTL_SNAPSHOT_ALL_THREADS | 250 BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE); 251 OUTREG(BRW_SF_STRG_VAL, 0); 252#endif 253 254#if WATCH_WIZ 255 OUTREG(BRW_WIZ_CTL, 256 BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE | 257 BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS | BRW_WIZ_CTL_SNAPSHOT_ENABLE); 258 OUTREG(BRW_WIZ_STRG_VAL, (box_x1) | (box_y1 << 16)); 259#endif 260 261#if 0 262 OUTREG(BRW_TS_CTL, 263 BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR | 264 BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS | 265 BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS | 266 BRW_TS_CTL_SNAPSHOT_ENABLE); 267#endif 268} 269 270static void i965_post_draw_debug(ScrnInfoPtr scrn) 271{ 272#if 0 273 intel_screen_private *intel = intel_get_screen_private(scrn); 274#endif 275 276#if 0 277 for (j = 0; j < 100000; j++) { 278 ctl = INREG(BRW_VF_CTL); 279 if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE) 280 break; 281 } 282 283 rdata = INREG(BRW_VF_RDATA); 284 OUTREG(BRW_VF_CTL, 0); 285 ErrorF("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata); 286#endif 287 288#if 0 289 for (j = 0; j < 1000000; j++) { 290 ctl = INREG(BRW_VS_CTL); 291 if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE) 292 break; 293 } 294 295 rdata = INREG(BRW_VS_RDATA); 296 for (k = 0; k <= 3; k++) { 297 OUTREG(BRW_VS_CTL, BRW_VS_CTL_SNAPSHOT_COMPLETE | (k << 8)); 298 rdata = INREG(BRW_VS_RDATA); 299 ErrorF("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata); 300 } 301 302 OUTREG(BRW_VS_CTL, 0); 303#endif 304 305#if WATCH_SF 306 for (j = 0; j < 1000000; j++) { 307 ctl = INREG(BRW_SF_CTL); 308 if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE) 309 break; 310 } 311 312 for (k = 0; k <= 7; k++) { 313 OUTREG(BRW_SF_CTL, BRW_SF_CTL_SNAPSHOT_COMPLETE | (k << 8)); 314 rdata = INREG(BRW_SF_RDATA); 315 ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata); 316 } 317 318 OUTREG(BRW_SF_CTL, 0); 319#endif 320 321#if WATCH_WIZ 322 for (j = 0; j < 100000; j++) { 323 ctl = INREG(BRW_WIZ_CTL); 324 if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE) 325 break; 326 } 327 328 rdata = INREG(BRW_WIZ_RDATA); 329 OUTREG(BRW_WIZ_CTL, 0); 330 ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata); 331#endif 332 333#if 0 334 for (j = 0; j < 100000; j++) { 335 ctl = INREG(BRW_TS_CTL); 336 if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE) 337 break; 338 } 339 340 rdata = INREG(BRW_TS_RDATA); 341 OUTREG(BRW_TS_CTL, 0); 342 ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata); 343 344 ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 345 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 346 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 347#endif 348 349#if 0 350 for (j = 0; j < 256; j++) { 351 OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT); 352 rdata = INREG(BRW_TD_RDATA); 353 ErrorF("TD_RDATA(%d): 0x%08x\n", j, rdata); 354 } 355#endif 356} 357 358/* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it. 359 * A VUE consists of a 256-bit vertex header followed by the vertex data, 360 * which in our case is 4 floats (128 bits), thus a single 512-bit URB 361 * entry. 362 */ 363#define URB_VS_ENTRIES 8 364#define URB_VS_ENTRY_SIZE 1 365 366#define URB_GS_ENTRIES 0 367#define URB_GS_ENTRY_SIZE 0 368 369#define URB_CLIP_ENTRIES 0 370#define URB_CLIP_ENTRY_SIZE 0 371 372/* The SF kernel we use outputs only 4 256-bit registers, leading to an 373 * entry size of 2 512-bit URBs. We don't need to have many entries to 374 * output as we're generally working on large rectangles and don't care 375 * about having WM threads running on different rectangles simultaneously. 376 */ 377#define URB_SF_ENTRIES 1 378#define URB_SF_ENTRY_SIZE 2 379 380#define URB_CS_ENTRIES 0 381#define URB_CS_ENTRY_SIZE 0 382 383static void i965_create_dst_surface_state(ScrnInfoPtr scrn, 384 PixmapPtr pixmap, 385 drm_intel_bo *surf_bo, 386 uint32_t offset) 387{ 388 intel_screen_private *intel = intel_get_screen_private(scrn); 389 struct brw_surface_state dest_surf_state; 390 drm_intel_bo *pixmap_bo = intel_uxa_get_pixmap_bo(pixmap); 391 assert(pixmap_bo != NULL); 392 393 memset(&dest_surf_state, 0, sizeof(dest_surf_state)); 394 395 dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; 396 dest_surf_state.ss0.data_return_format = 397 BRW_SURFACERETURNFORMAT_FLOAT32; 398 if (intel->cpp == 2) { 399 dest_surf_state.ss0.surface_format = 400 BRW_SURFACEFORMAT_B5G6R5_UNORM; 401 } else { 402 dest_surf_state.ss0.surface_format = 403 BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 404 } 405 dest_surf_state.ss0.writedisable_alpha = 0; 406 dest_surf_state.ss0.writedisable_red = 0; 407 dest_surf_state.ss0.writedisable_green = 0; 408 dest_surf_state.ss0.writedisable_blue = 0; 409 dest_surf_state.ss0.color_blend = 1; 410 dest_surf_state.ss0.vert_line_stride = 0; 411 dest_surf_state.ss0.vert_line_stride_ofs = 0; 412 dest_surf_state.ss0.mipmap_layout_mode = 0; 413 dest_surf_state.ss0.render_cache_read_mode = 0; 414 415 dest_surf_state.ss1.base_addr = 416 intel_uxa_emit_reloc(surf_bo, offset + offsetof(struct brw_surface_state, ss1), 417 pixmap_bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); 418 419 dest_surf_state.ss2.height = pixmap->drawable.height - 1; 420 dest_surf_state.ss2.width = pixmap->drawable.width - 1; 421 dest_surf_state.ss2.mip_count = 0; 422 dest_surf_state.ss2.render_target_rotation = 0; 423 dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 424 dest_surf_state.ss3.tiled_surface = intel_uxa_pixmap_tiled(pixmap); 425 dest_surf_state.ss3.tile_walk = 0; /* TileX */ 426 427 dri_bo_subdata(surf_bo, 428 offset, sizeof(dest_surf_state), 429 &dest_surf_state); 430} 431 432static void i965_create_src_surface_state(ScrnInfoPtr scrn, 433 drm_intel_bo * src_bo, 434 uint32_t src_offset, 435 int src_width, 436 int src_height, 437 int src_pitch, 438 uint32_t src_surf_format, 439 drm_intel_bo *surface_bo, 440 uint32_t offset) 441{ 442 struct brw_surface_state src_surf_state; 443 444 memset(&src_surf_state, 0, sizeof(src_surf_state)); 445 446 /* Set up the source surface state buffer */ 447 src_surf_state.ss0.surface_type = BRW_SURFACE_2D; 448 src_surf_state.ss0.surface_format = src_surf_format; 449 src_surf_state.ss0.writedisable_alpha = 0; 450 src_surf_state.ss0.writedisable_red = 0; 451 src_surf_state.ss0.writedisable_green = 0; 452 src_surf_state.ss0.writedisable_blue = 0; 453 src_surf_state.ss0.color_blend = 1; 454 src_surf_state.ss0.vert_line_stride = 0; 455 src_surf_state.ss0.vert_line_stride_ofs = 0; 456 src_surf_state.ss0.mipmap_layout_mode = 0; 457 src_surf_state.ss0.render_cache_read_mode = 0; 458 459 src_surf_state.ss2.width = src_width - 1; 460 src_surf_state.ss2.height = src_height - 1; 461 src_surf_state.ss2.mip_count = 0; 462 src_surf_state.ss2.render_target_rotation = 0; 463 src_surf_state.ss3.pitch = src_pitch - 1; 464 465 if (src_bo) { 466 src_surf_state.ss1.base_addr = 467 intel_uxa_emit_reloc(surface_bo, 468 offset + offsetof(struct brw_surface_state, ss1), 469 src_bo, src_offset, 470 I915_GEM_DOMAIN_SAMPLER, 0); 471 } else { 472 src_surf_state.ss1.base_addr = src_offset; 473 } 474 475 dri_bo_subdata(surface_bo, 476 offset, sizeof(src_surf_state), 477 &src_surf_state); 478} 479 480static void gen7_create_dst_surface_state(ScrnInfoPtr scrn, 481 PixmapPtr pixmap, 482 drm_intel_bo *surf_bo, 483 uint32_t offset) 484{ 485 intel_screen_private *intel = intel_get_screen_private(scrn); 486 struct gen7_surface_state dest_surf_state; 487 drm_intel_bo *pixmap_bo = intel_uxa_get_pixmap_bo(pixmap); 488 assert(pixmap_bo != NULL); 489 490 memset(&dest_surf_state, 0, sizeof(dest_surf_state)); 491 492 dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; 493 dest_surf_state.ss0.tiled_surface = intel_uxa_pixmap_tiled(pixmap); 494 dest_surf_state.ss0.tile_walk = 0; /* TileX */ 495 496 if (intel->cpp == 2) { 497 dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; 498 } else { 499 dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 500 } 501 502 dest_surf_state.ss1.base_addr = 503 intel_uxa_emit_reloc(surf_bo, 504 offset + offsetof(struct gen7_surface_state, ss1), 505 pixmap_bo, 0, 506 I915_GEM_DOMAIN_SAMPLER, 0); 507 508 dest_surf_state.ss2.height = pixmap->drawable.height - 1; 509 dest_surf_state.ss2.width = pixmap->drawable.width - 1; 510 511 dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 512 513 if (IS_HSW(intel)) { 514 dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; 515 dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; 516 dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; 517 dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 518 } 519 520 dri_bo_subdata(surf_bo, 521 offset, sizeof(dest_surf_state), 522 &dest_surf_state); 523} 524 525static void gen7_create_src_surface_state(ScrnInfoPtr scrn, 526 drm_intel_bo * src_bo, 527 uint32_t src_offset, 528 int src_width, 529 int src_height, 530 int src_pitch, 531 uint32_t src_surf_format, 532 drm_intel_bo *surface_bo, 533 uint32_t offset) 534{ 535 intel_screen_private * const intel = intel_get_screen_private(scrn); 536 struct gen7_surface_state src_surf_state; 537 538 memset(&src_surf_state, 0, sizeof(src_surf_state)); 539 540 src_surf_state.ss0.surface_type = BRW_SURFACE_2D; 541 src_surf_state.ss0.surface_format = src_surf_format; 542 543 if (src_bo) { 544 src_surf_state.ss1.base_addr = 545 intel_uxa_emit_reloc(surface_bo, 546 offset + offsetof(struct gen7_surface_state, ss1), 547 src_bo, src_offset, 548 I915_GEM_DOMAIN_SAMPLER, 0); 549 } else { 550 src_surf_state.ss1.base_addr = src_offset; 551 } 552 553 src_surf_state.ss2.width = src_width - 1; 554 src_surf_state.ss2.height = src_height - 1; 555 556 src_surf_state.ss3.pitch = src_pitch - 1; 557 558 if (IS_HSW(intel)) { 559 src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; 560 src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; 561 src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; 562 src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 563 } 564 565 dri_bo_subdata(surface_bo, 566 offset, sizeof(src_surf_state), 567 &src_surf_state); 568} 569 570static void i965_create_binding_table(ScrnInfoPtr scrn, 571 drm_intel_bo *bind_bo, 572 int n_surf) 573{ 574 uint32_t binding_table[n_surf]; 575 int i; 576 577 /* Set up a binding table for our surfaces. Only the PS will use it */ 578 for (i = 0; i < n_surf; i++) 579 binding_table[i] = i * SURFACE_STATE_PADDED_SIZE; 580 581 dri_bo_subdata(bind_bo, 582 n_surf * SURFACE_STATE_PADDED_SIZE, 583 sizeof(binding_table), binding_table); 584} 585 586static drm_intel_bo *i965_create_sampler_state(ScrnInfoPtr scrn) 587{ 588 intel_screen_private *intel = intel_get_screen_private(scrn); 589 struct brw_sampler_state sampler_state; 590 591 memset(&sampler_state, 0, sizeof(sampler_state)); 592 sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; 593 sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; 594 sampler_state.ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 595 sampler_state.ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 596 sampler_state.ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 597 598 return intel_uxa_bo_alloc_for_data(intel, 599 &sampler_state, sizeof(sampler_state), 600 "textured video sampler state"); 601} 602 603static drm_intel_bo *gen7_create_sampler_state(ScrnInfoPtr scrn) 604{ 605 intel_screen_private *intel = intel_get_screen_private(scrn); 606 struct gen7_sampler_state sampler_state; 607 608 memset(&sampler_state, 0, sizeof(sampler_state)); 609 sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; 610 sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; 611 sampler_state.ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 612 sampler_state.ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 613 sampler_state.ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 614 615 return intel_uxa_bo_alloc_for_data(intel, 616 &sampler_state, sizeof(sampler_state), 617 "textured video sampler state"); 618} 619 620static drm_intel_bo *i965_create_vs_state(ScrnInfoPtr scrn) 621{ 622 intel_screen_private *intel = intel_get_screen_private(scrn); 623 struct brw_vs_unit_state vs_state; 624 625 /* Set up the vertex shader to be disabled (passthrough) */ 626 memset(&vs_state, 0, sizeof(vs_state)); 627 if (IS_GEN5(intel)) 628 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; 629 else 630 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 631 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 632 vs_state.vs6.vs_enable = 0; 633 vs_state.vs6.vert_cache_disable = 1; 634 635 return intel_uxa_bo_alloc_for_data(intel, 636 &vs_state, sizeof(vs_state), 637 "textured video vs state"); 638} 639 640static drm_intel_bo *i965_create_program(ScrnInfoPtr scrn, 641 const uint32_t * program, 642 unsigned int program_size) 643{ 644 intel_screen_private *intel = intel_get_screen_private(scrn); 645 return intel_uxa_bo_alloc_for_data(intel, 646 program, program_size, 647 "textured video program"); 648} 649 650static drm_intel_bo *i965_create_sf_state(ScrnInfoPtr scrn) 651{ 652 intel_screen_private *intel = intel_get_screen_private(scrn); 653 drm_intel_bo *sf_bo, *kernel_bo; 654 struct brw_sf_unit_state sf_state; 655 656 if (IS_GEN5(intel)) 657 kernel_bo = i965_create_program(scrn, 658 &sf_kernel_static_gen5[0][0], 659 sizeof(sf_kernel_static_gen5)); 660 else 661 kernel_bo = i965_create_program(scrn, 662 &sf_kernel_static[0][0], 663 sizeof(sf_kernel_static)); 664 if (!kernel_bo) 665 return NULL; 666 667 sf_bo = drm_intel_bo_alloc(intel->bufmgr, 668 "textured video sf state", 4096, 669 sizeof(sf_state)); 670 if (sf_bo == NULL) { 671 drm_intel_bo_unreference(kernel_bo); 672 return NULL; 673 } 674 675 /* Set up the SF kernel to do coord interp: for each attribute, 676 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 677 * back to SF which then hands pixels off to WM. 678 */ 679 memset(&sf_state, 0, sizeof(sf_state)); 680 sf_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 681 sf_state.thread0.kernel_start_pointer = 682 intel_uxa_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0), 683 kernel_bo, sf_state.thread0.grf_reg_count << 1, 684 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 685 sf_state.sf1.single_program_flow = 1; /* XXX */ 686 sf_state.sf1.binding_table_entry_count = 0; 687 sf_state.sf1.thread_priority = 0; 688 sf_state.sf1.floating_point_mode = 0; /* Mesa does this */ 689 sf_state.sf1.illegal_op_exception_enable = 1; 690 sf_state.sf1.mask_stack_exception_enable = 1; 691 sf_state.sf1.sw_exception_enable = 1; 692 sf_state.thread2.per_thread_scratch_space = 0; 693 /* scratch space is not used in our kernel */ 694 sf_state.thread2.scratch_space_base_pointer = 0; 695 sf_state.thread3.const_urb_entry_read_length = 0; /* no const URBs */ 696 sf_state.thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 697 sf_state.thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 698 sf_state.thread3.urb_entry_read_offset = 0; 699 sf_state.thread3.dispatch_grf_start_reg = 3; 700 sf_state.thread4.max_threads = SF_MAX_THREADS - 1; 701 sf_state.thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 702 sf_state.thread4.nr_urb_entries = URB_SF_ENTRIES; 703 sf_state.thread4.stats_enable = 1; 704 sf_state.sf5.viewport_transform = FALSE; /* skip viewport */ 705 sf_state.sf6.cull_mode = BRW_CULLMODE_NONE; 706 sf_state.sf6.scissor = 0; 707 sf_state.sf7.trifan_pv = 2; 708 sf_state.sf6.dest_org_vbias = 0x8; 709 sf_state.sf6.dest_org_hbias = 0x8; 710 711 dri_bo_subdata(sf_bo, 0, sizeof(sf_state), &sf_state); 712 return sf_bo; 713} 714 715static drm_intel_bo *i965_create_wm_state(ScrnInfoPtr scrn, 716 drm_intel_bo * sampler_bo, 717 Bool is_packed) 718{ 719 intel_screen_private *intel = intel_get_screen_private(scrn); 720 drm_intel_bo *wm_bo, *kernel_bo; 721 struct brw_wm_unit_state wm_state; 722 723 if (is_packed) { 724 if (IS_GEN5(intel)) 725 kernel_bo = 726 i965_create_program(scrn, 727 &ps_kernel_packed_static_gen5[0] 728 [0], 729 sizeof 730 (ps_kernel_packed_static_gen5)); 731 else 732 kernel_bo = 733 i965_create_program(scrn, 734 &ps_kernel_packed_static[0][0], 735 sizeof 736 (ps_kernel_packed_static)); 737 } else { 738 if (IS_GEN5(intel)) 739 kernel_bo = 740 i965_create_program(scrn, 741 &ps_kernel_planar_static_gen5[0] 742 [0], 743 sizeof 744 (ps_kernel_planar_static_gen5)); 745 else 746 kernel_bo = 747 i965_create_program(scrn, 748 &ps_kernel_planar_static[0][0], 749 sizeof 750 (ps_kernel_planar_static)); 751 } 752 if (!kernel_bo) 753 return NULL; 754 755 wm_bo = drm_intel_bo_alloc(intel->bufmgr, 756 "textured video wm state", 757 sizeof(wm_state), 0); 758 if (wm_bo == NULL) { 759 drm_intel_bo_unreference(kernel_bo); 760 return NULL; 761 } 762 763 memset(&wm_state, 0, sizeof(wm_state)); 764 wm_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 765 wm_state.thread0.kernel_start_pointer = 766 intel_uxa_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0), 767 kernel_bo, wm_state.thread0.grf_reg_count << 1, 768 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 769 wm_state.thread1.single_program_flow = 1; /* XXX */ 770 if (is_packed) 771 wm_state.thread1.binding_table_entry_count = 2; 772 else 773 wm_state.thread1.binding_table_entry_count = 7; 774 775 /* binding table entry count is only used for prefetching, and it has to 776 * be set 0 for Ironlake 777 */ 778 if (IS_GEN5(intel)) 779 wm_state.thread1.binding_table_entry_count = 0; 780 781 /* Though we never use the scratch space in our WM kernel, it has to be 782 * set, and the minimum allocation is 1024 bytes. 783 */ 784 wm_state.thread2.scratch_space_base_pointer = 0; 785 wm_state.thread2.per_thread_scratch_space = 0; /* 1024 bytes */ 786 wm_state.thread3.dispatch_grf_start_reg = 3; /* XXX */ 787 wm_state.thread3.const_urb_entry_read_length = 0; 788 wm_state.thread3.const_urb_entry_read_offset = 0; 789 wm_state.thread3.urb_entry_read_length = 1; /* XXX */ 790 wm_state.thread3.urb_entry_read_offset = 0; /* XXX */ 791 wm_state.wm4.stats_enable = 1; 792 wm_state.wm4.sampler_state_pointer = 793 intel_uxa_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4), 794 sampler_bo, 0, 795 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 796 if (IS_GEN5(intel)) 797 wm_state.wm4.sampler_count = 0; 798 else 799 wm_state.wm4.sampler_count = 1; /* 1-4 samplers used */ 800 wm_state.wm5.max_threads = PS_MAX_THREADS - 1; 801 wm_state.wm5.thread_dispatch_enable = 1; 802 wm_state.wm5.enable_16_pix = 1; 803 wm_state.wm5.enable_8_pix = 0; 804 wm_state.wm5.early_depth_test = 1; 805 806 dri_bo_subdata(wm_bo, 0, sizeof(wm_state), &wm_state); 807 drm_intel_bo_unreference(kernel_bo); 808 return wm_bo; 809} 810 811static drm_intel_bo *i965_create_cc_vp_state(ScrnInfoPtr scrn) 812{ 813 intel_screen_private *intel = intel_get_screen_private(scrn); 814 struct brw_cc_viewport cc_viewport; 815 816 memset(&cc_viewport, 0, sizeof(cc_viewport)); 817 cc_viewport.min_depth = -1.e35; 818 cc_viewport.max_depth = 1.e35; 819 820 return intel_uxa_bo_alloc_for_data(intel, 821 &cc_viewport, sizeof(cc_viewport), 822 "textured video cc viewport"); 823} 824 825static drm_intel_bo *i965_create_cc_state(ScrnInfoPtr scrn) 826{ 827 intel_screen_private *intel = intel_get_screen_private(scrn); 828 drm_intel_bo *cc_bo, *cc_vp_bo; 829 struct brw_cc_unit_state cc_state; 830 831 cc_vp_bo = i965_create_cc_vp_state(scrn); 832 if (!cc_vp_bo) 833 return NULL; 834 835 cc_bo = drm_intel_bo_alloc(intel->bufmgr, 836 "textured video cc state", 837 sizeof(cc_state), 0); 838 if (cc_bo == NULL){ 839 drm_intel_bo_unreference(cc_vp_bo); 840 return NULL; 841 } 842 843 /* Color calculator state */ 844 memset(&cc_state, 0, sizeof(cc_state)); 845 cc_state.cc0.stencil_enable = 0; /* disable stencil */ 846 cc_state.cc2.depth_test = 0; /* disable depth test */ 847 cc_state.cc2.logicop_enable = 1; /* enable logic op */ 848 cc_state.cc3.ia_blend_enable = 1; /* blend alpha just like colors */ 849 cc_state.cc3.blend_enable = 0; /* disable color blend */ 850 cc_state.cc3.alpha_test = 0; /* disable alpha test */ 851 cc_state.cc4.cc_viewport_state_offset = 852 intel_uxa_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4), 853 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 854 cc_state.cc5.dither_enable = 0; /* disable dither */ 855 cc_state.cc5.logicop_func = 0xc; /* WHITE */ 856 cc_state.cc5.statistics_enable = 1; 857 cc_state.cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 858 cc_state.cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE; 859 cc_state.cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE; 860 861 dri_bo_subdata(cc_bo, 0, sizeof(cc_state), &cc_state); 862 drm_intel_bo_unreference(cc_vp_bo); 863 864 return cc_bo; 865} 866 867static void 868i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_table_bo, int n_src_surf, PixmapPtr pixmap) 869{ 870 intel_screen_private *intel = intel_get_screen_private(scrn); 871 int urb_vs_start, urb_vs_size; 872 int urb_gs_start, urb_gs_size; 873 int urb_clip_start, urb_clip_size; 874 int urb_sf_start, urb_sf_size; 875 int urb_cs_start, urb_cs_size; 876 int pipe_ctl; 877 878 IntelEmitInvarientState(scrn); 879 intel->last_3d = LAST_3D_VIDEO; 880 intel->needs_3d_invariant = TRUE; 881 882 urb_vs_start = 0; 883 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 884 urb_gs_start = urb_vs_start + urb_vs_size; 885 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 886 urb_clip_start = urb_gs_start + urb_gs_size; 887 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 888 urb_sf_start = urb_clip_start + urb_clip_size; 889 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 890 urb_cs_start = urb_sf_start + urb_sf_size; 891 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 892 893 OUT_BATCH(MI_FLUSH | 894 MI_STATE_INSTRUCTION_CACHE_FLUSH | 895 BRW_MI_GLOBAL_SNAPSHOT_RESET); 896 OUT_BATCH(MI_NOOP); 897 898 /* brw_debug (scrn, "before base address modify"); */ 899 /* Match Mesa driver setup */ 900 if (INTEL_INFO(intel)->gen >= 045) 901 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 902 else 903 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 904 905 /* Mesa does this. Who knows... */ 906 OUT_BATCH(BRW_CS_URB_STATE | 0); 907 OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ 908 (0 << 0)); /* Number of URB Entries */ 909 910 /* Zero out the two base address registers so all offsets are 911 * absolute 912 */ 913 if (IS_GEN5(intel)) { 914 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 915 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 916 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 917 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 918 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 919 /* general state max addr, disabled */ 920 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 921 /* media object state max addr, disabled */ 922 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 923 /* Instruction max addr, disabled */ 924 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 925 } else { 926 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 927 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 928 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 929 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 930 /* general state max addr, disabled */ 931 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 932 /* media object state max addr, disabled */ 933 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 934 } 935 936 /* Set system instruction pointer */ 937 OUT_BATCH(BRW_STATE_SIP | 0); 938 /* system instruction pointer */ 939 OUT_RELOC(intel->video.gen4_sip_kernel_bo, 940 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 941 942 /* brw_debug (scrn, "after base address modify"); */ 943 944 if (IS_GEN5(intel)) 945 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE; 946 else 947 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; 948 949 /* Pipe control */ 950 OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); 951 OUT_BATCH(0); /* Destination address */ 952 OUT_BATCH(0); /* Immediate data low DW */ 953 OUT_BATCH(0); /* Immediate data high DW */ 954 955 /* Binding table pointers */ 956 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 957 OUT_BATCH(0); /* vs */ 958 OUT_BATCH(0); /* gs */ 959 OUT_BATCH(0); /* clip */ 960 OUT_BATCH(0); /* sf */ 961 /* Only the PS uses the binding table */ 962 OUT_BATCH((n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 963 964 /* Blend constant color (magenta is fun) */ 965 OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3); 966 OUT_BATCH(float_to_uint(1.0)); 967 OUT_BATCH(float_to_uint(0.0)); 968 OUT_BATCH(float_to_uint(1.0)); 969 OUT_BATCH(float_to_uint(1.0)); 970 971 /* The drawing rectangle clipping is always on. Set it to values that 972 * shouldn't do any clipping. 973 */ 974 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ 975 OUT_BATCH(0x00000000); /* ymin, xmin */ 976 OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ 977 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 978 979 /* skip the depth buffer */ 980 /* skip the polygon stipple */ 981 /* skip the polygon stipple offset */ 982 /* skip the line stipple */ 983 984 /* Set the pointers to the 3d pipeline state */ 985 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 986 OUT_RELOC(intel->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 987 /* disable GS, resulting in passthrough */ 988 OUT_BATCH(BRW_GS_DISABLE); 989 /* disable CLIP, resulting in passthrough */ 990 OUT_BATCH(BRW_CLIP_DISABLE); 991 OUT_RELOC(intel->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 992 if (n_src_surf == 1) 993 OUT_RELOC(intel->video.gen4_wm_packed_bo, 994 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 995 else 996 OUT_RELOC(intel->video.gen4_wm_planar_bo, 997 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 998 OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 999 1000 /* URB fence */ 1001 OUT_BATCH(BRW_URB_FENCE | 1002 UF0_CS_REALLOC | 1003 UF0_SF_REALLOC | 1004 UF0_CLIP_REALLOC | UF0_GS_REALLOC | UF0_VS_REALLOC | 1); 1005 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1006 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1007 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1008 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1009 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1010 1011 /* Constant buffer state */ 1012 OUT_BATCH(BRW_CS_URB_STATE | 0); 1013 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); 1014 1015 /* Set up our vertex elements, sourced from the single vertex buffer. */ 1016 1017 if (IS_GEN5(intel)) { 1018 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 1019 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1020 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1021 VE0_VALID | 1022 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1023 (0 << VE0_OFFSET_SHIFT)); 1024 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1025 | (BRW_VFCOMPONENT_STORE_SRC << 1026 VE1_VFCOMPONENT_1_SHIFT) | 1027 (BRW_VFCOMPONENT_STORE_1_FLT << 1028 VE1_VFCOMPONENT_2_SHIFT) | 1029 (BRW_VFCOMPONENT_STORE_1_FLT << 1030 VE1_VFCOMPONENT_3_SHIFT)); 1031 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1032 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1033 VE0_VALID | 1034 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1035 (8 << VE0_OFFSET_SHIFT)); 1036 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1037 | (BRW_VFCOMPONENT_STORE_SRC << 1038 VE1_VFCOMPONENT_1_SHIFT) | 1039 (BRW_VFCOMPONENT_STORE_1_FLT << 1040 VE1_VFCOMPONENT_2_SHIFT) | 1041 (BRW_VFCOMPONENT_STORE_1_FLT << 1042 VE1_VFCOMPONENT_3_SHIFT)); 1043 } else { 1044 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 1045 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1046 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1047 VE0_VALID | 1048 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1049 (0 << VE0_OFFSET_SHIFT)); 1050 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1051 | (BRW_VFCOMPONENT_STORE_SRC << 1052 VE1_VFCOMPONENT_1_SHIFT) | 1053 (BRW_VFCOMPONENT_STORE_1_FLT << 1054 VE1_VFCOMPONENT_2_SHIFT) | 1055 (BRW_VFCOMPONENT_STORE_1_FLT << 1056 VE1_VFCOMPONENT_3_SHIFT) | (0 << 1057 VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1058 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1059 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1060 VE0_VALID | 1061 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1062 (8 << VE0_OFFSET_SHIFT)); 1063 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) 1064 | (BRW_VFCOMPONENT_STORE_SRC << 1065 VE1_VFCOMPONENT_1_SHIFT) | 1066 (BRW_VFCOMPONENT_STORE_1_FLT << 1067 VE1_VFCOMPONENT_2_SHIFT) | 1068 (BRW_VFCOMPONENT_STORE_1_FLT << 1069 VE1_VFCOMPONENT_3_SHIFT) | (4 << 1070 VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1071 } 1072} 1073 1074void 1075I965DisplayVideoTextured(ScrnInfoPtr scrn, 1076 intel_adaptor_private *adaptor_priv, int id, 1077 RegionPtr dstRegion, 1078 short width, short height, 1079 int video_pitch, int video_pitch2, 1080 short src_w, short src_h, 1081 short drw_w, short drw_h, PixmapPtr pixmap) 1082{ 1083 intel_screen_private *intel = intel_get_screen_private(scrn); 1084 BoxPtr pbox; 1085 int nbox, dxo, dyo, pix_xoff, pix_yoff; 1086 float src_scale_x, src_scale_y; 1087 int src_surf; 1088 int n_src_surf; 1089 uint32_t src_surf_format; 1090 uint32_t src_surf_base[6]; 1091 int src_width[6]; 1092 int src_height[6]; 1093 int src_pitch[6]; 1094 drm_intel_bo *surface_state_binding_table_bo; 1095 1096#if 0 1097 ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, 1098 height, video_pitch); 1099#endif 1100 1101#if 0 1102 /* enable debug */ 1103 OUTREG(INST_PM, (1 << (16 + 4)) | (1 << 4)); 1104 ErrorF("INST_PM 0x%08x\n", INREG(INST_PM)); 1105#endif 1106 1107 src_surf_base[0] = adaptor_priv->YBufOffset; 1108 src_surf_base[1] = adaptor_priv->YBufOffset; 1109 src_surf_base[2] = adaptor_priv->VBufOffset; 1110 src_surf_base[3] = adaptor_priv->VBufOffset; 1111 src_surf_base[4] = adaptor_priv->UBufOffset; 1112 src_surf_base[5] = adaptor_priv->UBufOffset; 1113#if 0 1114 ErrorF("base 0 0x%x base 1 0x%x base 2 0x%x\n", 1115 src_surf_base[0], src_surf_base[1], src_surf_base[2]); 1116#endif 1117 1118 if (is_planar_fourcc(id)) { 1119 src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; 1120 src_width[1] = src_width[0] = width; 1121 src_height[1] = src_height[0] = height; 1122 src_pitch[1] = src_pitch[0] = video_pitch2; 1123 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1124 width / 2; 1125 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1126 height / 2; 1127 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1128 video_pitch; 1129 n_src_surf = 6; 1130 } else { 1131 if (id == FOURCC_UYVY) 1132 src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; 1133 else 1134 src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; 1135 1136 src_width[0] = width; 1137 src_height[0] = height; 1138 src_pitch[0] = video_pitch; 1139 n_src_surf = 1; 1140 } 1141 1142#if 0 1143 ErrorF("dst surf: 0x%08x\n", state_base_offset + dest_surf_offset); 1144 ErrorF("src surf: 0x%08x\n", state_base_offset + src_surf_offset); 1145#endif 1146 1147 /* We'll be poking the state buffers that could be in use by the 3d 1148 * hardware here, but we should have synced the 3D engine already in 1149 * I830PutImage. 1150 */ 1151 1152 surface_state_binding_table_bo = 1153 drm_intel_bo_alloc(intel->bufmgr, 1154 "surface state & binding table", 1155 (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), 1156 4096); 1157 1158 if (!surface_state_binding_table_bo) 1159 return; 1160 1161 i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); 1162 1163 for (src_surf = 0; src_surf < n_src_surf; src_surf++) { 1164 i965_create_src_surface_state(scrn, 1165 adaptor_priv->buf, 1166 src_surf_base[src_surf], 1167 src_width[src_surf], 1168 src_height[src_surf], 1169 src_pitch[src_surf], 1170 src_surf_format, 1171 surface_state_binding_table_bo, 1172 (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1173 } 1174 1175 i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); 1176 1177 if (intel->video.gen4_sampler_bo == NULL) 1178 intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn); 1179 if (intel->video.gen4_sip_kernel_bo == NULL) { 1180 intel->video.gen4_sip_kernel_bo = 1181 i965_create_program(scrn, &sip_kernel_static[0][0], 1182 sizeof(sip_kernel_static)); 1183 if (!intel->video.gen4_sip_kernel_bo) { 1184 drm_intel_bo_unreference(surface_state_binding_table_bo); 1185 return; 1186 } 1187 } 1188 1189 if (intel->video.gen4_vs_bo == NULL) { 1190 intel->video.gen4_vs_bo = i965_create_vs_state(scrn); 1191 if (!intel->video.gen4_vs_bo) { 1192 drm_intel_bo_unreference(surface_state_binding_table_bo); 1193 return; 1194 } 1195 } 1196 if (intel->video.gen4_sf_bo == NULL) { 1197 intel->video.gen4_sf_bo = i965_create_sf_state(scrn); 1198 if (!intel->video.gen4_sf_bo) { 1199 drm_intel_bo_unreference(surface_state_binding_table_bo); 1200 return; 1201 } 1202 } 1203 if (intel->video.gen4_wm_packed_bo == NULL) { 1204 intel->video.gen4_wm_packed_bo = 1205 i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, 1206 TRUE); 1207 if (!intel->video.gen4_wm_packed_bo) { 1208 drm_intel_bo_unreference(surface_state_binding_table_bo); 1209 return; 1210 } 1211 } 1212 1213 if (intel->video.gen4_wm_planar_bo == NULL) { 1214 intel->video.gen4_wm_planar_bo = 1215 i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, 1216 FALSE); 1217 if (!intel->video.gen4_wm_planar_bo) { 1218 drm_intel_bo_unreference(surface_state_binding_table_bo); 1219 return; 1220 } 1221 } 1222 1223 if (intel->video.gen4_cc_bo == NULL) { 1224 intel->video.gen4_cc_bo = i965_create_cc_state(scrn); 1225 if (!intel->video.gen4_cc_bo) { 1226 drm_intel_bo_unreference(surface_state_binding_table_bo); 1227 return; 1228 } 1229 } 1230 1231 /* Set up the offset for translating from the given region (in screen 1232 * coordinates) to the backing pixmap. 1233 */ 1234#ifdef COMPOSITE 1235 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1236 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1237#else 1238 pix_xoff = 0; 1239 pix_yoff = 0; 1240#endif 1241 1242 dxo = dstRegion->extents.x1; 1243 dyo = dstRegion->extents.y1; 1244 1245 /* Use normalized texture coordinates */ 1246 src_scale_x = ((float)src_w / width) / (float)drw_w; 1247 src_scale_y = ((float)src_h / height) / (float)drw_h; 1248 1249 pbox = REGION_RECTS(dstRegion); 1250 nbox = REGION_NUM_RECTS(dstRegion); 1251 while (nbox--) { 1252 int box_x1 = pbox->x1; 1253 int box_y1 = pbox->y1; 1254 int box_x2 = pbox->x2; 1255 int box_y2 = pbox->y2; 1256 int i; 1257 float vb[12]; 1258 drm_intel_bo *bo_table[] = { 1259 NULL, /* vb_bo */ 1260 intel->batch_bo, 1261 surface_state_binding_table_bo, 1262 intel->video.gen4_sampler_bo, 1263 intel->video.gen4_sip_kernel_bo, 1264 intel->video.gen4_vs_bo, 1265 intel->video.gen4_sf_bo, 1266 intel->video.gen4_wm_packed_bo, 1267 intel->video.gen4_wm_planar_bo, 1268 intel->video.gen4_cc_bo, 1269 }; 1270 1271 pbox++; 1272 1273 i = 0; 1274 vb[i++] = (box_x2 - dxo) * src_scale_x; 1275 vb[i++] = (box_y2 - dyo) * src_scale_y; 1276 vb[i++] = (float)box_x2 + pix_xoff; 1277 vb[i++] = (float)box_y2 + pix_yoff; 1278 1279 vb[i++] = (box_x1 - dxo) * src_scale_x; 1280 vb[i++] = (box_y2 - dyo) * src_scale_y; 1281 vb[i++] = (float)box_x1 + pix_xoff; 1282 vb[i++] = (float)box_y2 + pix_yoff; 1283 1284 vb[i++] = (box_x1 - dxo) * src_scale_x; 1285 vb[i++] = (box_y1 - dyo) * src_scale_y; 1286 vb[i++] = (float)box_x1 + pix_xoff; 1287 vb[i++] = (float)box_y1 + pix_yoff; 1288 1289 bo_table[0] = intel_uxa_bo_alloc_for_data(intel, 1290 vb, sizeof(vb), 1291 "textured video vbo"); 1292 1293 if (IS_GEN4(intel)) 1294 i965_pre_draw_debug(scrn); 1295 1296 /* If this command won't fit in the current batch, flush. 1297 * Assume that it does after being flushed. 1298 */ 1299 if (drm_intel_bufmgr_check_aperture_space(bo_table, 1300 ARRAY_SIZE(bo_table)) 1301 < 0) { 1302 intel_batch_submit(scrn); 1303 } 1304 1305 intel_batch_start_atomic(scrn, 150); 1306 1307 i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap); 1308 1309 /* Set up the pointer to our vertex buffer */ 1310 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 1311 /* four 32-bit floats per vertex */ 1312 OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | 1313 VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1314 OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 0); 1315 if (IS_GEN5(intel)) 1316 OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 1317 i * 4); 1318 else 1319 OUT_BATCH(3); /* four corners to our rectangle */ 1320 OUT_BATCH(0); /* reserved */ 1321 1322 OUT_BATCH(BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | /* CTG - indirect vertex count */ 1323 4); 1324 OUT_BATCH(3); /* vertex count per instance */ 1325 OUT_BATCH(0); /* start vertex offset */ 1326 OUT_BATCH(1); /* single instance */ 1327 OUT_BATCH(0); /* start instance location */ 1328 OUT_BATCH(0); /* index buffer offset, ignored */ 1329 OUT_BATCH(MI_NOOP); 1330 1331 intel_batch_end_atomic(scrn); 1332 1333 drm_intel_bo_unreference(bo_table[0]); 1334 1335 if (IS_GEN4(intel)) 1336 i965_post_draw_debug(scrn); 1337 1338 } 1339 1340 /* release reference once we're finished */ 1341 drm_intel_bo_unreference(surface_state_binding_table_bo); 1342 1343 intel_uxa_debug_flush(scrn); 1344} 1345 1346void i965_free_video(ScrnInfoPtr scrn) 1347{ 1348 intel_screen_private *intel = intel_get_screen_private(scrn); 1349 1350 drm_intel_bo_unreference(intel->video.gen4_vs_bo); 1351 intel->video.gen4_vs_bo = NULL; 1352 drm_intel_bo_unreference(intel->video.gen4_sf_bo); 1353 intel->video.gen4_sf_bo = NULL; 1354 drm_intel_bo_unreference(intel->video.gen4_cc_bo); 1355 intel->video.gen4_cc_bo = NULL; 1356 drm_intel_bo_unreference(intel->video.gen4_wm_packed_bo); 1357 intel->video.gen4_wm_packed_bo = NULL; 1358 drm_intel_bo_unreference(intel->video.gen4_wm_planar_bo); 1359 intel->video.gen4_wm_planar_bo = NULL; 1360 drm_intel_bo_unreference(intel->video.gen4_cc_vp_bo); 1361 intel->video.gen4_cc_vp_bo = NULL; 1362 drm_intel_bo_unreference(intel->video.gen4_sampler_bo); 1363 intel->video.gen4_sampler_bo = NULL; 1364 drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo); 1365 intel->video.gen4_sip_kernel_bo = NULL; 1366 drm_intel_bo_unreference(intel->video.wm_prog_packed_bo); 1367 intel->video.wm_prog_packed_bo = NULL; 1368 drm_intel_bo_unreference(intel->video.wm_prog_planar_bo); 1369 intel->video.wm_prog_planar_bo = NULL; 1370 drm_intel_bo_unreference(intel->video.gen6_blend_bo); 1371 intel->video.gen6_blend_bo = NULL; 1372 drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo); 1373 intel->video.gen6_depth_stencil_bo = NULL; 1374} 1375 1376/* for GEN6+ */ 1377static drm_intel_bo * 1378gen6_create_cc_state(ScrnInfoPtr scrn) 1379{ 1380 intel_screen_private *intel = intel_get_screen_private(scrn); 1381 struct gen6_color_calc_state cc_state; 1382 1383 memset(&cc_state, 0, sizeof(cc_state)); 1384 cc_state.constant_r = 1.0; 1385 cc_state.constant_g = 0.0; 1386 cc_state.constant_b = 1.0; 1387 cc_state.constant_a = 1.0; 1388 1389 return intel_uxa_bo_alloc_for_data(intel, 1390 &cc_state, sizeof(cc_state), 1391 "textured video cc state"); 1392} 1393 1394static drm_intel_bo * 1395gen6_create_blend_state(ScrnInfoPtr scrn) 1396{ 1397 intel_screen_private *intel = intel_get_screen_private(scrn); 1398 struct gen6_blend_state blend_state; 1399 1400 memset(&blend_state, 0, sizeof(blend_state)); 1401 blend_state.blend1.logic_op_enable = 1; 1402 blend_state.blend1.logic_op_func = 0xc; 1403 blend_state.blend1.pre_blend_clamp_enable = 1; 1404 1405 return intel_uxa_bo_alloc_for_data(intel, 1406 &blend_state, sizeof(blend_state), 1407 "textured video blend state"); 1408} 1409 1410static drm_intel_bo * 1411gen6_create_depth_stencil_state(ScrnInfoPtr scrn) 1412{ 1413 intel_screen_private *intel = intel_get_screen_private(scrn); 1414 struct gen6_depth_stencil_state depth_stencil_state; 1415 1416 memset(&depth_stencil_state, 0, sizeof(depth_stencil_state)); 1417 return intel_uxa_bo_alloc_for_data(intel, 1418 &depth_stencil_state, 1419 sizeof(depth_stencil_state), 1420 "textured video blend state"); 1421} 1422 1423static Bool 1424gen6_create_vidoe_objects(ScrnInfoPtr scrn) 1425{ 1426 intel_screen_private *intel = intel_get_screen_private(scrn); 1427 drm_intel_bo *(*create_sampler_state)(ScrnInfoPtr); 1428 const uint32_t *packed_ps_kernel, *planar_ps_kernel; 1429 unsigned int packed_ps_size, planar_ps_size; 1430 1431 if (INTEL_INFO(intel)->gen >= 070) { 1432 create_sampler_state = gen7_create_sampler_state; 1433 packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0]; 1434 packed_ps_size = sizeof(ps_kernel_packed_static_gen7); 1435 planar_ps_kernel = &ps_kernel_planar_static_gen7[0][0]; 1436 planar_ps_size = sizeof(ps_kernel_planar_static_gen7); 1437 } else { 1438 create_sampler_state = i965_create_sampler_state; 1439 packed_ps_kernel = &ps_kernel_packed_static_gen6[0][0]; 1440 packed_ps_size = sizeof(ps_kernel_packed_static_gen6); 1441 planar_ps_kernel = &ps_kernel_planar_static_gen6[0][0]; 1442 planar_ps_size = sizeof(ps_kernel_planar_static_gen6); 1443 } 1444 1445 if (intel->video.gen4_sampler_bo == NULL) 1446 intel->video.gen4_sampler_bo = create_sampler_state(scrn); 1447 1448 if (intel->video.wm_prog_packed_bo == NULL) 1449 intel->video.wm_prog_packed_bo = 1450 i965_create_program(scrn, 1451 packed_ps_kernel, 1452 packed_ps_size); 1453 1454 if (intel->video.wm_prog_planar_bo == NULL) 1455 intel->video.wm_prog_planar_bo = 1456 i965_create_program(scrn, 1457 planar_ps_kernel, 1458 planar_ps_size); 1459 1460 if (intel->video.gen4_cc_vp_bo == NULL) 1461 intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn); 1462 1463 if (intel->video.gen4_cc_bo == NULL) 1464 intel->video.gen4_cc_bo = gen6_create_cc_state(scrn); 1465 1466 if (intel->video.gen6_blend_bo == NULL) 1467 intel->video.gen6_blend_bo = gen6_create_blend_state(scrn); 1468 1469 if (intel->video.gen6_depth_stencil_bo == NULL) 1470 intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn); 1471 1472 1473 return (intel->video.gen4_sampler_bo != NULL && 1474 intel->video.wm_prog_packed_bo != NULL && 1475 intel->video.wm_prog_planar_bo != NULL && 1476 intel->video.gen4_cc_vp_bo != NULL && 1477 intel->video.gen4_cc_bo != NULL && 1478 intel->video.gen6_blend_bo != NULL && 1479 intel->video.gen6_depth_stencil_bo != NULL); 1480} 1481 1482static void 1483gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) 1484{ 1485 intel_screen_private *intel = intel_get_screen_private(scrn); 1486 1487 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); 1488 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ 1489 OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ 1490 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ 1491 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ 1492 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ 1493 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ 1494 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ 1495 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ 1496 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ 1497} 1498 1499static void 1500gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap) 1501{ 1502 intel_screen_private *intel = intel_get_screen_private(scrn); 1503 1504 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1505 OUT_BATCH(0x00000000); /* ymin, xmin */ 1506 OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ 1507 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1508} 1509 1510static void 1511gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) 1512{ 1513 intel_screen_private *intel = intel_get_screen_private(scrn); 1514 1515 /* disable WM constant buffer */ 1516 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 1517 OUT_BATCH(0); 1518 OUT_BATCH(0); 1519 OUT_BATCH(0); 1520 OUT_BATCH(0); 1521 1522 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 1523 if (is_packed) { 1524 OUT_RELOC(intel->video.wm_prog_packed_bo, 1525 I915_GEM_DOMAIN_INSTRUCTION, 0, 1526 0); 1527 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 1528 (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1529 } else { 1530 OUT_RELOC(intel->video.wm_prog_planar_bo, 1531 I915_GEM_DOMAIN_INSTRUCTION, 0, 1532 0); 1533 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 1534 (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1535 } 1536 OUT_BATCH(0); 1537 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ 1538 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | 1539 GEN6_3DSTATE_WM_DISPATCH_ENABLE | 1540 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); 1541 OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | 1542 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 1543 OUT_BATCH(0); 1544 OUT_BATCH(0); 1545} 1546 1547static void 1548gen6_upload_vertex_element_state(ScrnInfoPtr scrn) 1549{ 1550 intel_screen_private *intel = intel_get_screen_private(scrn); 1551 1552 /* Set up our vertex elements, sourced from the single vertex buffer. */ 1553 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2)); 1554 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 1555 OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1556 GEN6_VE0_VALID | 1557 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1558 (0 << VE0_OFFSET_SHIFT)); 1559 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1560 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1561 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1562 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1563 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 1564 OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1565 GEN6_VE0_VALID | 1566 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1567 (8 << VE0_OFFSET_SHIFT)); 1568 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1569 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1570 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1571 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1572} 1573 1574static void 1575gen6_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1576{ 1577 intel_screen_private *intel = intel_get_screen_private(scrn); 1578 1579 /* Set up the pointer to our vertex buffer */ 1580 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1581 /* four 32-bit floats per vertex */ 1582 OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | 1583 GEN6_VB0_VERTEXDATA | 1584 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1585 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 1586 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); 1587 OUT_BATCH(0); /* reserved */ 1588} 1589 1590static void 1591gen6_upload_primitive(ScrnInfoPtr scrn) 1592{ 1593 intel_screen_private *intel = intel_get_screen_private(scrn); 1594 1595 OUT_BATCH(BRW_3DPRIMITIVE | 1596 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1597 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 1598 (0 << 9) | /* Internal Vertex Count */ 1599 (6 - 2)); 1600 OUT_BATCH(3); /* vertex count per instance */ 1601 OUT_BATCH(0); /* start vertex offset */ 1602 OUT_BATCH(1); /* single instance */ 1603 OUT_BATCH(0); /* start instance location */ 1604 OUT_BATCH(0); /* index buffer offset, ignored */ 1605} 1606 1607static void 1608gen6_emit_video_setup(ScrnInfoPtr scrn, 1609 drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, 1610 PixmapPtr pixmap, 1611 drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1612{ 1613 intel_screen_private *intel = intel_get_screen_private(scrn); 1614 1615 assert(n_src_surf == 1 || n_src_surf == 6); 1616 IntelEmitInvarientState(scrn); 1617 intel->last_3d = LAST_3D_VIDEO; 1618 intel->needs_3d_invariant = TRUE; 1619 1620 gen6_upload_invariant_states(intel); 1621 gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); 1622 gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); 1623 gen6_upload_urb(intel); 1624 gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); 1625 gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); 1626 gen6_upload_vs_state(intel); 1627 gen6_upload_gs_state(intel); 1628 gen6_upload_clip_state(intel); 1629 gen6_upload_sf_state(intel, 1, 0); 1630 gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); 1631 gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1632 gen6_upload_depth_buffer_state(intel); 1633 gen6_upload_drawing_rectangle(scrn, pixmap); 1634 gen6_upload_vertex_element_state(scrn); 1635 gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); 1636 gen6_upload_primitive(scrn); 1637} 1638 1639static void 1640gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) 1641{ 1642 intel_screen_private *intel = intel_get_screen_private(scrn); 1643 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; 1644 unsigned int num_samples = 0; 1645 1646 if (IS_HSW(intel)) { 1647 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; 1648 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; 1649 } 1650 1651 /* disable WM constant buffer */ 1652 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); 1653 OUT_BATCH(0); 1654 OUT_BATCH(0); 1655 OUT_BATCH(0); 1656 OUT_BATCH(0); 1657 OUT_BATCH(0); 1658 OUT_BATCH(0); 1659 1660 OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); 1661 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 1662 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 1663 OUT_BATCH(0); 1664 1665 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 1666 1667 if (is_packed) { 1668 OUT_RELOC(intel->video.wm_prog_packed_bo, 1669 I915_GEM_DOMAIN_INSTRUCTION, 0, 1670 0); 1671 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 1672 (2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1673 } else { 1674 OUT_RELOC(intel->video.wm_prog_planar_bo, 1675 I915_GEM_DOMAIN_INSTRUCTION, 0, 1676 0); 1677 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 1678 (7 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 1679 } 1680 1681 OUT_BATCH(0); /* scratch space base offset */ 1682 OUT_BATCH( 1683 ((48 - 1) << max_threads_shift) | num_samples | 1684 GEN7_PS_ATTRIBUTE_ENABLE | 1685 GEN7_PS_16_DISPATCH_ENABLE); 1686 OUT_BATCH( 1687 (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); 1688 OUT_BATCH(0); /* kernel 1 pointer */ 1689 OUT_BATCH(0); /* kernel 2 pointer */ 1690} 1691 1692static void 1693gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1694{ 1695 intel_screen_private *intel = intel_get_screen_private(scrn); 1696 1697 /* Set up the pointer to our vertex buffer */ 1698 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1699 /* four 32-bit floats per vertex */ 1700 OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | 1701 GEN6_VB0_VERTEXDATA | 1702 GEN7_VB0_ADDRESS_MODIFYENABLE | 1703 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1704 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 1705 OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); 1706 OUT_BATCH(0); /* reserved */ 1707} 1708 1709static void 1710gen7_upload_primitive(ScrnInfoPtr scrn) 1711{ 1712 intel_screen_private *intel = intel_get_screen_private(scrn); 1713 1714 OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); 1715 OUT_BATCH(_3DPRIM_RECTLIST | 1716 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); 1717 OUT_BATCH(3); /* vertex count per instance */ 1718 OUT_BATCH(0); /* start vertex offset */ 1719 OUT_BATCH(1); /* single instance */ 1720 OUT_BATCH(0); /* start instance location */ 1721 OUT_BATCH(0); 1722} 1723 1724static void 1725gen7_emit_video_setup(ScrnInfoPtr scrn, 1726 drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, 1727 PixmapPtr pixmap, 1728 drm_intel_bo *vertex_bo, uint32_t end_address_offset) 1729{ 1730 intel_screen_private *intel = intel_get_screen_private(scrn); 1731 1732 assert(n_src_surf == 1 || n_src_surf == 6); 1733 IntelEmitInvarientState(scrn); 1734 intel->last_3d = LAST_3D_VIDEO; 1735 intel->needs_3d_invariant = TRUE; 1736 1737 gen6_upload_invariant_states(intel); 1738 gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); 1739 gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); 1740 gen7_upload_urb(intel); 1741 gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); 1742 gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); 1743 gen7_upload_bypass_states(intel); 1744 gen6_upload_vs_state(intel); 1745 gen6_upload_clip_state(intel); 1746 gen7_upload_sf_state(intel, 1, 0); 1747 gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); 1748 gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1749 gen7_upload_depth_buffer_state(intel); 1750 gen6_upload_drawing_rectangle(scrn, pixmap); 1751 gen6_upload_vertex_element_state(scrn); 1752 gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); 1753 gen7_upload_primitive(scrn); 1754} 1755 1756void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, 1757 intel_adaptor_private *adaptor_priv, int id, 1758 RegionPtr dstRegion, 1759 short width, short height, 1760 int video_pitch, int video_pitch2, 1761 short src_w, short src_h, 1762 short drw_w, short drw_h, PixmapPtr pixmap) 1763{ 1764 intel_screen_private *intel = intel_get_screen_private(scrn); 1765 BoxPtr pbox; 1766 int nbox, dxo, dyo, pix_xoff, pix_yoff; 1767 float src_scale_x, src_scale_y; 1768 int src_surf; 1769 int n_src_surf; 1770 uint32_t src_surf_format; 1771 uint32_t src_surf_base[6]; 1772 int src_width[6]; 1773 int src_height[6]; 1774 int src_pitch[6]; 1775 drm_intel_bo *surface_state_binding_table_bo; 1776 void (*create_dst_surface_state)(ScrnInfoPtr, 1777 PixmapPtr, 1778 drm_intel_bo *, 1779 uint32_t); 1780 void (*create_src_surface_state)(ScrnInfoPtr, 1781 drm_intel_bo *, 1782 uint32_t, int, 1783 int, int, uint32_t, 1784 drm_intel_bo *, uint32_t); 1785 void (*emit_video_setup)(ScrnInfoPtr, 1786 drm_intel_bo *, int, 1787 PixmapPtr, 1788 drm_intel_bo *, uint32_t); 1789 1790 if (INTEL_INFO(intel)->gen >= 070) { 1791 create_dst_surface_state = gen7_create_dst_surface_state; 1792 create_src_surface_state = gen7_create_src_surface_state; 1793 emit_video_setup = gen7_emit_video_setup; 1794 } else { 1795 create_dst_surface_state = i965_create_dst_surface_state; 1796 create_src_surface_state = i965_create_src_surface_state; 1797 emit_video_setup = gen6_emit_video_setup; 1798 } 1799 1800 src_surf_base[0] = adaptor_priv->YBufOffset; 1801 src_surf_base[1] = adaptor_priv->YBufOffset; 1802 src_surf_base[2] = adaptor_priv->VBufOffset; 1803 src_surf_base[3] = adaptor_priv->VBufOffset; 1804 src_surf_base[4] = adaptor_priv->UBufOffset; 1805 src_surf_base[5] = adaptor_priv->UBufOffset; 1806 1807 if (is_planar_fourcc(id)) { 1808 src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; 1809 src_width[1] = src_width[0] = width; 1810 src_height[1] = src_height[0] = height; 1811 src_pitch[1] = src_pitch[0] = video_pitch2; 1812 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1813 width / 2; 1814 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1815 height / 2; 1816 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1817 video_pitch; 1818 n_src_surf = 6; 1819 } else { 1820 if (id == FOURCC_UYVY) 1821 src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; 1822 else 1823 src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; 1824 1825 src_width[0] = width; 1826 src_height[0] = height; 1827 src_pitch[0] = video_pitch; 1828 n_src_surf = 1; 1829 } 1830 1831 surface_state_binding_table_bo = 1832 drm_intel_bo_alloc(intel->bufmgr, 1833 "surface state & binding table", 1834 (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), 1835 4096); 1836 1837 if (!surface_state_binding_table_bo) 1838 return; 1839 1840 create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); 1841 1842 for (src_surf = 0; src_surf < n_src_surf; src_surf++) { 1843 create_src_surface_state(scrn, 1844 adaptor_priv->buf, 1845 src_surf_base[src_surf], 1846 src_width[src_surf], 1847 src_height[src_surf], 1848 src_pitch[src_surf], 1849 src_surf_format, 1850 surface_state_binding_table_bo, 1851 (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); 1852 } 1853 1854 i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); 1855 1856 if (!gen6_create_vidoe_objects(scrn)) { 1857 drm_intel_bo_unreference(surface_state_binding_table_bo); 1858 return; 1859 } 1860 1861 /* Set up the offset for translating from the given region (in screen 1862 * coordinates) to the backing pixmap. 1863 */ 1864#ifdef COMPOSITE 1865 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1866 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1867#else 1868 pix_xoff = 0; 1869 pix_yoff = 0; 1870#endif 1871 1872 dxo = dstRegion->extents.x1; 1873 dyo = dstRegion->extents.y1; 1874 1875 /* Use normalized texture coordinates */ 1876 src_scale_x = ((float)src_w / width) / (float)drw_w; 1877 src_scale_y = ((float)src_h / height) / (float)drw_h; 1878 1879 pbox = REGION_RECTS(dstRegion); 1880 nbox = REGION_NUM_RECTS(dstRegion); 1881 while (nbox--) { 1882 int box_x1 = pbox->x1; 1883 int box_y1 = pbox->y1; 1884 int box_x2 = pbox->x2; 1885 int box_y2 = pbox->y2; 1886 int i; 1887 float vb[12]; 1888 drm_intel_bo *bo_table[] = { 1889 NULL, /* vb_bo */ 1890 intel->batch_bo, 1891 surface_state_binding_table_bo, 1892 intel->video.gen4_sampler_bo, 1893 intel->video.wm_prog_packed_bo, 1894 intel->video.wm_prog_planar_bo, 1895 intel->video.gen4_cc_vp_bo, 1896 intel->video.gen4_cc_bo, 1897 intel->video.gen6_blend_bo, 1898 intel->video.gen6_depth_stencil_bo, 1899 }; 1900 1901 pbox++; 1902 1903 i = 0; 1904 vb[i++] = (box_x2 - dxo) * src_scale_x; 1905 vb[i++] = (box_y2 - dyo) * src_scale_y; 1906 vb[i++] = (float)box_x2 + pix_xoff; 1907 vb[i++] = (float)box_y2 + pix_yoff; 1908 1909 vb[i++] = (box_x1 - dxo) * src_scale_x; 1910 vb[i++] = (box_y2 - dyo) * src_scale_y; 1911 vb[i++] = (float)box_x1 + pix_xoff; 1912 vb[i++] = (float)box_y2 + pix_yoff; 1913 1914 vb[i++] = (box_x1 - dxo) * src_scale_x; 1915 vb[i++] = (box_y1 - dyo) * src_scale_y; 1916 vb[i++] = (float)box_x1 + pix_xoff; 1917 vb[i++] = (float)box_y1 + pix_yoff; 1918 1919 bo_table[0] = intel_uxa_bo_alloc_for_data(intel, 1920 vb, sizeof(vb), 1921 "video vbo"); 1922 1923 /* If this command won't fit in the current batch, flush. 1924 * Assume that it does after being flushed. 1925 */ 1926 if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) 1927 intel_batch_submit(scrn); 1928 1929 intel_batch_start_atomic(scrn, 200); 1930 emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap, bo_table[0], i * 4); 1931 intel_batch_end_atomic(scrn); 1932 1933 drm_intel_bo_unreference(bo_table[0]); 1934 } 1935 1936 /* release reference once we're finished */ 1937 drm_intel_bo_unreference(surface_state_binding_table_bo); 1938 intel_uxa_debug_flush(scrn); 1939} 1940