i965_video.c revision fa225cbc
1/* 2 * Copyright � 2006 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Keith Packard <keithp@keithp.com> 26 * 27 */ 28 29#ifdef HAVE_CONFIG_H 30#include "config.h" 31#endif 32 33#include "xf86.h" 34#include "xf86_OSproc.h" 35#include "xf86xv.h" 36#include "fourcc.h" 37 38#include "i830.h" 39#include "i830_video.h" 40#include "i830_hwmc.h" 41#include "brw_defines.h" 42#include "brw_structs.h" 43#include <string.h> 44 45/* Make assert() work. */ 46#undef NDEBUG 47#include <assert.h> 48 49static const uint32_t sip_kernel_static[][4] = { 50/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ 51 { 0x00000030, 0x20000108, 0x00001220, 0x00000000 }, 52/* nop (4) g0<1>UD { align1 + } */ 53 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 54/* nop (4) g0<1>UD { align1 + } */ 55 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 56/* nop (4) g0<1>UD { align1 + } */ 57 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 58/* nop (4) g0<1>UD { align1 + } */ 59 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 60/* nop (4) g0<1>UD { align1 + } */ 61 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 62/* nop (4) g0<1>UD { align1 + } */ 63 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 64/* nop (4) g0<1>UD { align1 + } */ 65 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 66/* nop (4) g0<1>UD { align1 + } */ 67 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 68/* nop (4) g0<1>UD { align1 + } */ 69 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 70}; 71 72/* 73 * this program computes dA/dx and dA/dy for the texture coordinates along 74 * with the base texture coordinate. It was extracted from the Mesa driver. 75 * It uses about 10 GRF registers. 76 */ 77 78#define SF_KERNEL_NUM_GRF 16 79#define SF_MAX_THREADS 1 80 81static const uint32_t sf_kernel_static[][4] = { 82#include "exa_sf.g4b" 83}; 84 85/* 86 * Ok, this kernel picks up the required data flow values in g0 and g1 87 * and passes those along in m0 and m1. In m2-m9, it sticks constant 88 * values (bright pink). 89 */ 90 91/* Our PS kernel uses less than 32 GRF registers (about 20) */ 92#define PS_KERNEL_NUM_GRF 32 93#define PS_MAX_THREADS 32 94 95#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 96 97static const uint32_t ps_kernel_packed_static[][4] = { 98#include "exa_wm_xy.g4b" 99#include "exa_wm_src_affine.g4b" 100#include "exa_wm_src_sample_argb.g4b" 101#include "exa_wm_yuv_rgb.g4b" 102#include "exa_wm_write.g4b" 103}; 104 105static const uint32_t ps_kernel_planar_static[][4] = { 106#include "exa_wm_xy.g4b" 107#include "exa_wm_src_affine.g4b" 108#include "exa_wm_src_sample_planar.g4b" 109#include "exa_wm_yuv_rgb.g4b" 110#include "exa_wm_write.g4b" 111}; 112 113/* new program for IGDNG */ 114static const uint32_t sf_kernel_static_gen5[][4] = { 115#include "exa_sf.g4b.gen5" 116}; 117 118static const uint32_t ps_kernel_packed_static_gen5[][4] = { 119#include "exa_wm_xy.g4b.gen5" 120#include "exa_wm_src_affine.g4b.gen5" 121#include "exa_wm_src_sample_argb.g4b.gen5" 122#include "exa_wm_yuv_rgb.g4b.gen5" 123#include "exa_wm_write.g4b.gen5" 124}; 125 126static const uint32_t ps_kernel_planar_static_gen5[][4] = { 127#include "exa_wm_xy.g4b.gen5" 128#include "exa_wm_src_affine.g4b.gen5" 129#include "exa_wm_src_sample_planar.g4b.gen5" 130#include "exa_wm_yuv_rgb.g4b.gen5" 131#include "exa_wm_write.g4b.gen5" 132}; 133 134static uint32_t float_to_uint (float f) { 135 union {uint32_t i; float f;} x; 136 x.f = f; 137 return x.i; 138} 139 140#if 0 141static struct { 142 uint32_t svg_ctl; 143 char *name; 144} svg_ctl_bits[] = { 145 { BRW_SVG_CTL_GS_BA, "General State Base Address" }, 146 { BRW_SVG_CTL_SS_BA, "Surface State Base Address" }, 147 { BRW_SVG_CTL_IO_BA, "Indirect Object Base Address" }, 148 { BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound" }, 149 { BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound" }, 150 { BRW_SVG_CTL_SIP, "System Instruction Pointer" }, 151 { 0, 0 }, 152}; 153 154static void 155brw_debug (ScrnInfoPtr pScrn, char *when) 156{ 157 I830Ptr pI830 = I830PTR(pScrn); 158 int i; 159 uint32_t v; 160 161 I830Sync (pScrn); 162 ErrorF("brw_debug: %s\n", when); 163 for (i = 0; svg_ctl_bits[i].name; i++) { 164 OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl); 165 v = INREG(BRW_SVG_RDATA); 166 ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v); 167 } 168} 169#endif 170 171#define WATCH_SF 0 172#define WATCH_WIZ 0 173#define WATCH_STATS 0 174 175static void 176i965_pre_draw_debug(ScrnInfoPtr scrn) 177{ 178#if 0 179 I830Ptr pI830 = I830PTR(scrn); 180#endif 181 182#if 0 183 ErrorF ("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 184 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 185 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 186 187 OUTREG(BRW_VF_CTL, 188 BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID | 189 BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX | 190 BRW_VF_CTL_SNAPSHOT_ENABLE); 191 OUTREG(BRW_VF_STRG_VAL, 0); 192#endif 193 194#if 0 195 OUTREG(BRW_VS_CTL, 196 BRW_VS_CTL_SNAPSHOT_ALL_THREADS | 197 BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT | 198 BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE); 199 200 OUTREG(BRW_VS_STRG_VAL, 0); 201#endif 202 203#if WATCH_SF 204 OUTREG(BRW_SF_CTL, 205 BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT | 206 BRW_SF_CTL_SNAPSHOT_ALL_THREADS | 207 BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE); 208 OUTREG(BRW_SF_STRG_VAL, 0); 209#endif 210 211#if WATCH_WIZ 212 OUTREG(BRW_WIZ_CTL, 213 BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE | 214 BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS | 215 BRW_WIZ_CTL_SNAPSHOT_ENABLE); 216 OUTREG(BRW_WIZ_STRG_VAL, 217 (box_x1) | (box_y1 << 16)); 218#endif 219 220#if 0 221 OUTREG(BRW_TS_CTL, 222 BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR | 223 BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS | 224 BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS | 225 BRW_TS_CTL_SNAPSHOT_ENABLE); 226#endif 227} 228 229static void 230i965_post_draw_debug(ScrnInfoPtr scrn) 231{ 232#if 0 233 I830Ptr pI830 = I830PTR(scrn); 234#endif 235 236#if 0 237 for (j = 0; j < 100000; j++) { 238 ctl = INREG(BRW_VF_CTL); 239 if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE) 240 break; 241 } 242 243 rdata = INREG(BRW_VF_RDATA); 244 OUTREG(BRW_VF_CTL, 0); 245 ErrorF ("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata); 246#endif 247 248#if 0 249 for (j = 0; j < 1000000; j++) { 250 ctl = INREG(BRW_VS_CTL); 251 if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE) 252 break; 253 } 254 255 rdata = INREG(BRW_VS_RDATA); 256 for (k = 0; k <= 3; k++) { 257 OUTREG(BRW_VS_CTL, 258 BRW_VS_CTL_SNAPSHOT_COMPLETE | 259 (k << 8)); 260 rdata = INREG(BRW_VS_RDATA); 261 ErrorF ("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata); 262 } 263 264 OUTREG(BRW_VS_CTL, 0); 265#endif 266 267#if WATCH_SF 268 for (j = 0; j < 1000000; j++) { 269 ctl = INREG(BRW_SF_CTL); 270 if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE) 271 break; 272 } 273 274 for (k = 0; k <= 7; k++) { 275 OUTREG(BRW_SF_CTL, 276 BRW_SF_CTL_SNAPSHOT_COMPLETE | 277 (k << 8)); 278 rdata = INREG(BRW_SF_RDATA); 279 ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata); 280 } 281 282 OUTREG(BRW_SF_CTL, 0); 283#endif 284 285#if WATCH_WIZ 286 for (j = 0; j < 100000; j++) { 287 ctl = INREG(BRW_WIZ_CTL); 288 if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE) 289 break; 290 } 291 292 rdata = INREG(BRW_WIZ_RDATA); 293 OUTREG(BRW_WIZ_CTL, 0); 294 ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata); 295#endif 296 297#if 0 298 for (j = 0; j < 100000; j++) { 299 ctl = INREG(BRW_TS_CTL); 300 if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE) 301 break; 302 } 303 304 rdata = INREG(BRW_TS_RDATA); 305 OUTREG(BRW_TS_CTL, 0); 306 ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata); 307 308 ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", 309 INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), 310 INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); 311#endif 312 313#if 0 314 for (j = 0; j < 256; j++) { 315 OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT); 316 rdata = INREG(BRW_TD_RDATA); 317 ErrorF ("TD_RDATA(%d): 0x%08x\n", j, rdata); 318 } 319#endif 320} 321 322/* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it. 323 * A VUE consists of a 256-bit vertex header followed by the vertex data, 324 * which in our case is 4 floats (128 bits), thus a single 512-bit URB 325 * entry. 326 */ 327#define URB_VS_ENTRIES 8 328#define URB_VS_ENTRY_SIZE 1 329 330#define URB_GS_ENTRIES 0 331#define URB_GS_ENTRY_SIZE 0 332 333#define URB_CLIP_ENTRIES 0 334#define URB_CLIP_ENTRY_SIZE 0 335 336/* The SF kernel we use outputs only 4 256-bit registers, leading to an 337 * entry size of 2 512-bit URBs. We don't need to have many entries to 338 * output as we're generally working on large rectangles and don't care 339 * about having WM threads running on different rectangles simultaneously. 340 */ 341#define URB_SF_ENTRIES 1 342#define URB_SF_ENTRY_SIZE 2 343 344#define URB_CS_ENTRIES 0 345#define URB_CS_ENTRY_SIZE 0 346 347static int 348intel_alloc_and_map(I830Ptr i830, char *name, int size, 349 drm_intel_bo **bop, void *virtualp) 350{ 351 drm_intel_bo *bo; 352 353 bo = drm_intel_bo_alloc(i830->bufmgr, name, size, 4096); 354 if (!bo) 355 return -1; 356 if (drm_intel_bo_map(bo, TRUE) != 0) { 357 drm_intel_bo_unreference(bo); 358 return -1; 359 } 360 *bop = bo; 361 *(void **) virtualp = bo->virtual; 362 memset (bo->virtual, 0, size); 363 return 0; 364} 365 366static drm_intel_bo * 367i965_create_dst_surface_state(ScrnInfoPtr scrn, 368 PixmapPtr pixmap) 369{ 370 I830Ptr pI830 = I830PTR(scrn); 371 struct brw_surface_state *dest_surf_state; 372 drm_intel_bo *pixmap_bo = i830_get_pixmap_bo(pixmap); 373 drm_intel_bo *surf_bo; 374 375 if (intel_alloc_and_map(pI830, "textured video surface state", 4096, 376 &surf_bo, &dest_surf_state) != 0) 377 return NULL; 378 379 dest_surf_state->ss0.surface_type = BRW_SURFACE_2D; 380 dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 381 if (pI830->cpp == 2) { 382 dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; 383 } else { 384 dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 385 } 386 dest_surf_state->ss0.writedisable_alpha = 0; 387 dest_surf_state->ss0.writedisable_red = 0; 388 dest_surf_state->ss0.writedisable_green = 0; 389 dest_surf_state->ss0.writedisable_blue = 0; 390 dest_surf_state->ss0.color_blend = 1; 391 dest_surf_state->ss0.vert_line_stride = 0; 392 dest_surf_state->ss0.vert_line_stride_ofs = 0; 393 dest_surf_state->ss0.mipmap_layout_mode = 0; 394 dest_surf_state->ss0.render_cache_read_mode = 0; 395 396 dest_surf_state->ss1.base_addr = 397 intel_emit_reloc(surf_bo, offsetof(struct brw_surface_state, ss1), 398 pixmap_bo, 0, 399 I915_GEM_DOMAIN_SAMPLER, 0); 400 401 dest_surf_state->ss2.height = scrn->virtualY - 1; 402 dest_surf_state->ss2.width = scrn->virtualX - 1; 403 dest_surf_state->ss2.mip_count = 0; 404 dest_surf_state->ss2.render_target_rotation = 0; 405 dest_surf_state->ss3.pitch = intel_get_pixmap_pitch(pixmap) - 1; 406 dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pixmap); 407 dest_surf_state->ss3.tile_walk = 0; /* TileX */ 408 409 drm_intel_bo_unmap(surf_bo); 410 return surf_bo; 411} 412 413static drm_intel_bo * 414i965_create_src_surface_state(ScrnInfoPtr scrn, 415 drm_intel_bo *src_bo, 416 uint32_t src_offset, 417 int src_width, 418 int src_height, 419 int src_pitch, 420 uint32_t src_surf_format) 421{ 422 I830Ptr pI830 = I830PTR(scrn); 423 drm_intel_bo *surface_bo; 424 struct brw_surface_state *src_surf_state; 425 426 if (intel_alloc_and_map(pI830, "textured video surface state", 4096, 427 &surface_bo, &src_surf_state) != 0) 428 return NULL; 429 430 /* Set up the source surface state buffer */ 431 src_surf_state->ss0.surface_type = BRW_SURFACE_2D; 432 src_surf_state->ss0.surface_format = src_surf_format; 433 src_surf_state->ss0.writedisable_alpha = 0; 434 src_surf_state->ss0.writedisable_red = 0; 435 src_surf_state->ss0.writedisable_green = 0; 436 src_surf_state->ss0.writedisable_blue = 0; 437 src_surf_state->ss0.color_blend = 1; 438 src_surf_state->ss0.vert_line_stride = 0; 439 src_surf_state->ss0.vert_line_stride_ofs = 0; 440 src_surf_state->ss0.mipmap_layout_mode = 0; 441 src_surf_state->ss0.render_cache_read_mode = 0; 442 443 src_surf_state->ss2.width = src_width - 1; 444 src_surf_state->ss2.height = src_height - 1; 445 src_surf_state->ss2.mip_count = 0; 446 src_surf_state->ss2.render_target_rotation = 0; 447 src_surf_state->ss3.pitch = src_pitch - 1; 448 449 if (src_bo) { 450 src_surf_state->ss1.base_addr = 451 intel_emit_reloc(surface_bo, 452 offsetof(struct brw_surface_state, ss1), 453 src_bo, src_offset, 454 I915_GEM_DOMAIN_SAMPLER, 0); 455 } else { 456 src_surf_state->ss1.base_addr = src_offset; 457 } 458 459 drm_intel_bo_unmap(surface_bo); 460 return surface_bo; 461} 462 463static drm_intel_bo * 464i965_create_binding_table(ScrnInfoPtr scrn, drm_intel_bo **surf_bos, int n_surf) 465{ 466 I830Ptr pI830 = I830PTR(scrn); 467 drm_intel_bo *bind_bo; 468 uint32_t *binding_table; 469 int i; 470 471 /* Set up a binding table for our surfaces. Only the PS will use it */ 472 473 if (intel_alloc_and_map(pI830, "textured video binding table", 4096, 474 &bind_bo, &binding_table) != 0) 475 return NULL; 476 477 for (i = 0; i < n_surf; i++) 478 binding_table[i] = intel_emit_reloc(bind_bo, i * sizeof(uint32_t), 479 surf_bos[i], 0, 480 I915_GEM_DOMAIN_INSTRUCTION, 0); 481 482 drm_intel_bo_unmap(bind_bo); 483 return bind_bo; 484} 485 486static drm_intel_bo * 487i965_create_sampler_state(ScrnInfoPtr scrn) 488{ 489 I830Ptr pI830 = I830PTR(scrn); 490 drm_intel_bo *sampler_bo; 491 struct brw_sampler_state *sampler_state; 492 493 if (intel_alloc_and_map(pI830, "textured video sampler state", 4096, 494 &sampler_bo, &sampler_state) != 0) 495 return NULL; 496 497 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 498 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 499 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 500 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 501 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 502 503 drm_intel_bo_unmap(sampler_bo); 504 return sampler_bo; 505} 506 507static drm_intel_bo * 508i965_create_vs_state(ScrnInfoPtr scrn) 509{ 510 I830Ptr pI830 = I830PTR(scrn); 511 drm_intel_bo *vs_bo; 512 struct brw_vs_unit_state *vs_state; 513 514 if (intel_alloc_and_map(pI830, "textured video vs state", 4096, 515 &vs_bo, &vs_state) != 0) 516 return NULL; 517 518 /* Set up the vertex shader to be disabled (passthrough) */ 519 if (IS_IGDNG(pI830)) 520 vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; 521 else 522 vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; 523 vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 524 vs_state->vs6.vs_enable = 0; 525 vs_state->vs6.vert_cache_disable = 1; 526 527 drm_intel_bo_unmap(vs_bo); 528 return vs_bo; 529} 530 531static drm_intel_bo * 532i965_create_program(ScrnInfoPtr scrn, const uint32_t *program, 533 unsigned int program_size) 534{ 535 I830Ptr pI830 = I830PTR(scrn); 536 drm_intel_bo *prog_bo; 537 538 prog_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video program", 539 program_size, 4096); 540 if (!prog_bo) 541 return NULL; 542 543 drm_intel_bo_subdata(prog_bo, 0, program_size, program); 544 545 return prog_bo; 546} 547 548static drm_intel_bo * 549i965_create_sf_state(ScrnInfoPtr scrn) 550{ 551 I830Ptr pI830 = I830PTR(scrn); 552 drm_intel_bo *sf_bo, *kernel_bo; 553 struct brw_sf_unit_state *sf_state; 554 555 if (IS_IGDNG(pI830)) 556 kernel_bo = i965_create_program(scrn, &sf_kernel_static_gen5[0][0], 557 sizeof(sf_kernel_static_gen5)); 558 else 559 kernel_bo = i965_create_program(scrn, &sf_kernel_static[0][0], 560 sizeof(sf_kernel_static)); 561 562 if (!kernel_bo) 563 return NULL; 564 565 if (intel_alloc_and_map(pI830, "textured video sf state", 4096, 566 &sf_bo, &sf_state) != 0) 567 { 568 drm_intel_bo_unreference(kernel_bo); 569 return NULL; 570 } 571 572 /* Set up the SF kernel to do coord interp: for each attribute, 573 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 574 * back to SF which then hands pixels off to WM. 575 */ 576 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 577 sf_state->thread0.kernel_start_pointer = 578 intel_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0), 579 kernel_bo, sf_state->thread0.grf_reg_count << 1, 580 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 581 sf_state->sf1.single_program_flow = 1; /* XXX */ 582 sf_state->sf1.binding_table_entry_count = 0; 583 sf_state->sf1.thread_priority = 0; 584 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 585 sf_state->sf1.illegal_op_exception_enable = 1; 586 sf_state->sf1.mask_stack_exception_enable = 1; 587 sf_state->sf1.sw_exception_enable = 1; 588 sf_state->thread2.per_thread_scratch_space = 0; 589 /* scratch space is not used in our kernel */ 590 sf_state->thread2.scratch_space_base_pointer = 0; 591 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 592 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 593 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 594 sf_state->thread3.urb_entry_read_offset = 0; 595 sf_state->thread3.dispatch_grf_start_reg = 3; 596 sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 597 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 598 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 599 sf_state->thread4.stats_enable = 1; 600 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 601 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 602 sf_state->sf6.scissor = 0; 603 sf_state->sf7.trifan_pv = 2; 604 sf_state->sf6.dest_org_vbias = 0x8; 605 sf_state->sf6.dest_org_hbias = 0x8; 606 607 drm_intel_bo_unmap(sf_bo); 608 return sf_bo; 609} 610 611static drm_intel_bo * 612i965_create_wm_state(ScrnInfoPtr scrn, drm_intel_bo *sampler_bo, Bool is_packed) 613{ 614 I830Ptr pI830 = I830PTR(scrn); 615 drm_intel_bo *wm_bo, *kernel_bo; 616 struct brw_wm_unit_state *wm_state; 617 618 if (is_packed) { 619 if (IS_IGDNG(pI830)) 620 kernel_bo = i965_create_program(scrn, &ps_kernel_packed_static_gen5[0][0], 621 sizeof(ps_kernel_packed_static_gen5)); 622 else 623 kernel_bo = i965_create_program(scrn, &ps_kernel_packed_static[0][0], 624 sizeof(ps_kernel_packed_static)); 625 } else { 626 if (IS_IGDNG(pI830)) 627 kernel_bo = i965_create_program(scrn, &ps_kernel_planar_static_gen5[0][0], 628 sizeof(ps_kernel_planar_static_gen5)); 629 else 630 kernel_bo = i965_create_program(scrn, &ps_kernel_planar_static[0][0], 631 sizeof(ps_kernel_planar_static)); 632 } 633 if (!kernel_bo) 634 return NULL; 635 636 if (intel_alloc_and_map(pI830, "textured video wm state", sizeof (*wm_state), 637 &wm_bo, &wm_state)) { 638 drm_intel_bo_unreference(kernel_bo); 639 return NULL; 640 } 641 642 wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 643 wm_state->thread0.kernel_start_pointer = 644 intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0), 645 kernel_bo, wm_state->thread0.grf_reg_count << 1, 646 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 647 wm_state->thread1.single_program_flow = 1; /* XXX */ 648 if (is_packed) 649 wm_state->thread1.binding_table_entry_count = 2; 650 else 651 wm_state->thread1.binding_table_entry_count = 7; 652 653 /* binding table entry count is only used for prefetching, and it has to 654 * be set 0 for IGDNG 655 */ 656 if (IS_IGDNG(pI830)) 657 wm_state->thread1.binding_table_entry_count = 0; 658 659 /* Though we never use the scratch space in our WM kernel, it has to be 660 * set, and the minimum allocation is 1024 bytes. 661 */ 662 wm_state->thread2.scratch_space_base_pointer = 0; 663 wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ 664 wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ 665 wm_state->thread3.const_urb_entry_read_length = 0; 666 wm_state->thread3.const_urb_entry_read_offset = 0; 667 wm_state->thread3.urb_entry_read_length = 1; /* XXX */ 668 wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ 669 wm_state->wm4.stats_enable = 1; 670 wm_state->wm4.sampler_state_pointer = 671 intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4), 672 sampler_bo, 0, 673 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 674 if (IS_IGDNG(pI830)) 675 wm_state->wm4.sampler_count = 0; 676 else 677 wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ 678 wm_state->wm5.max_threads = PS_MAX_THREADS - 1; 679 wm_state->wm5.thread_dispatch_enable = 1; 680 wm_state->wm5.enable_16_pix = 1; 681 wm_state->wm5.enable_8_pix = 0; 682 wm_state->wm5.early_depth_test = 1; 683 684 drm_intel_bo_unreference(kernel_bo); 685 686 drm_intel_bo_unmap(wm_bo); 687 return wm_bo; 688} 689 690static drm_intel_bo * 691i965_create_cc_vp_state(ScrnInfoPtr scrn) 692{ 693 I830Ptr pI830 = I830PTR(scrn); 694 drm_intel_bo *cc_vp_bo; 695 struct brw_cc_viewport *cc_viewport; 696 697 if (intel_alloc_and_map(pI830, "textured video cc viewport", 4096, 698 &cc_vp_bo, &cc_viewport) != 0) 699 return NULL; 700 701 cc_viewport->min_depth = -1.e35; 702 cc_viewport->max_depth = 1.e35; 703 704 drm_intel_bo_unmap(cc_vp_bo); 705 return cc_vp_bo; 706} 707 708static drm_intel_bo * 709i965_create_cc_state(ScrnInfoPtr scrn) 710{ 711 I830Ptr pI830 = I830PTR(scrn); 712 drm_intel_bo *cc_bo, *cc_vp_bo; 713 struct brw_cc_unit_state *cc_state; 714 715 cc_vp_bo = i965_create_cc_vp_state(scrn); 716 if (!cc_vp_bo) 717 return NULL; 718 719 if (intel_alloc_and_map(pI830, "textured video cc state", sizeof(*cc_state), 720 &cc_bo, &cc_state) != 0) { 721 drm_intel_bo_unreference(cc_vp_bo); 722 return NULL; 723 } 724 725 /* Color calculator state */ 726 memset(cc_state, 0, sizeof(*cc_state)); 727 cc_state->cc0.stencil_enable = 0; /* disable stencil */ 728 cc_state->cc2.depth_test = 0; /* disable depth test */ 729 cc_state->cc2.logicop_enable = 1; /* enable logic op */ 730 cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */ 731 cc_state->cc3.blend_enable = 0; /* disable color blend */ 732 cc_state->cc3.alpha_test = 0; /* disable alpha test */ 733 cc_state->cc4.cc_viewport_state_offset = 734 intel_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4), 735 cc_vp_bo, 0, 736 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 737 cc_state->cc5.dither_enable = 0; /* disable dither */ 738 cc_state->cc5.logicop_func = 0xc; /* WHITE */ 739 cc_state->cc5.statistics_enable = 1; 740 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 741 cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE; 742 cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE; 743 744 drm_intel_bo_unmap(cc_bo); 745 746 drm_intel_bo_unreference(cc_vp_bo); 747 return cc_bo; 748} 749 750static void 751i965_emit_video_setup(ScrnInfoPtr pScrn, drm_intel_bo *bind_bo, int n_src_surf) 752{ 753 I830Ptr pI830 = I830PTR(pScrn); 754 int urb_vs_start, urb_vs_size; 755 int urb_gs_start, urb_gs_size; 756 int urb_clip_start, urb_clip_size; 757 int urb_sf_start, urb_sf_size; 758 int urb_cs_start, urb_cs_size; 759 int pipe_ctl; 760 761 IntelEmitInvarientState(pScrn); 762 pI830->last_3d = LAST_3D_VIDEO; 763 764 urb_vs_start = 0; 765 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 766 urb_gs_start = urb_vs_start + urb_vs_size; 767 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 768 urb_clip_start = urb_gs_start + urb_gs_size; 769 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 770 urb_sf_start = urb_clip_start + urb_clip_size; 771 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 772 urb_cs_start = urb_sf_start + urb_sf_size; 773 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 774 775 BEGIN_BATCH(2); 776 OUT_BATCH(MI_FLUSH | 777 MI_STATE_INSTRUCTION_CACHE_FLUSH | 778 BRW_MI_GLOBAL_SNAPSHOT_RESET); 779 OUT_BATCH(MI_NOOP); 780 ADVANCE_BATCH(); 781 782 /* brw_debug (pScrn, "before base address modify"); */ 783 if (IS_IGDNG(pI830)) 784 BEGIN_BATCH(14); 785 else 786 BEGIN_BATCH(12); 787 /* Match Mesa driver setup */ 788 if (IS_G4X(pI830) || IS_IGDNG(pI830)) 789 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 790 else 791 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 792 793 /* Mesa does this. Who knows... */ 794 OUT_BATCH(BRW_CS_URB_STATE | 0); 795 OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ 796 (0 << 0)); /* Number of URB Entries */ 797 798 /* Zero out the two base address registers so all offsets are 799 * absolute 800 */ 801 if (IS_IGDNG(pI830)) { 802 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 803 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 804 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 805 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 806 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 807 /* general state max addr, disabled */ 808 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 809 /* media object state max addr, disabled */ 810 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 811 /* Instruction max addr, disabled */ 812 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 813 } else { 814 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 815 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 816 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 817 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 818 /* general state max addr, disabled */ 819 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 820 /* media object state max addr, disabled */ 821 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 822 } 823 824 /* Set system instruction pointer */ 825 OUT_BATCH(BRW_STATE_SIP | 0); 826 /* system instruction pointer */ 827 OUT_RELOC(pI830->video.gen4_sip_kernel_bo, 828 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 829 830 OUT_BATCH(MI_NOOP); 831 ADVANCE_BATCH(); 832 833 /* brw_debug (pScrn, "after base address modify"); */ 834 835 if (IS_IGDNG(pI830)) 836 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE; 837 else 838 pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; 839 840 BEGIN_BATCH(38); 841 842 OUT_BATCH(MI_NOOP); 843 844 /* Pipe control */ 845 OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); 846 OUT_BATCH(0); /* Destination address */ 847 OUT_BATCH(0); /* Immediate data low DW */ 848 OUT_BATCH(0); /* Immediate data high DW */ 849 850 /* Binding table pointers */ 851 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 852 OUT_BATCH(0); /* vs */ 853 OUT_BATCH(0); /* gs */ 854 OUT_BATCH(0); /* clip */ 855 OUT_BATCH(0); /* sf */ 856 /* Only the PS uses the binding table */ 857 OUT_RELOC(bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); 858 859 /* Blend constant color (magenta is fun) */ 860 OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3); 861 OUT_BATCH(float_to_uint (1.0)); 862 OUT_BATCH(float_to_uint (0.0)); 863 OUT_BATCH(float_to_uint (1.0)); 864 OUT_BATCH(float_to_uint (1.0)); 865 866 /* The drawing rectangle clipping is always on. Set it to values that 867 * shouldn't do any clipping. 868 */ 869 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ 870 OUT_BATCH(0x00000000); /* ymin, xmin */ 871 OUT_BATCH((pScrn->virtualX - 1) | 872 (pScrn->virtualY - 1) << 16); /* ymax, xmax */ 873 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 874 875 /* skip the depth buffer */ 876 /* skip the polygon stipple */ 877 /* skip the polygon stipple offset */ 878 /* skip the line stipple */ 879 880 /* Set the pointers to the 3d pipeline state */ 881 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 882 OUT_RELOC(pI830->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 883 /* disable GS, resulting in passthrough */ 884 OUT_BATCH(BRW_GS_DISABLE); 885 /* disable CLIP, resulting in passthrough */ 886 OUT_BATCH(BRW_CLIP_DISABLE); 887 OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 888 if (n_src_surf == 1) 889 OUT_RELOC(pI830->video.gen4_wm_packed_bo, 890 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 891 else 892 OUT_RELOC(pI830->video.gen4_wm_planar_bo, 893 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 894 OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 895 896 /* URB fence */ 897 OUT_BATCH(BRW_URB_FENCE | 898 UF0_CS_REALLOC | 899 UF0_SF_REALLOC | 900 UF0_CLIP_REALLOC | 901 UF0_GS_REALLOC | 902 UF0_VS_REALLOC | 903 1); 904 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 905 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 906 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 907 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 908 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 909 910 /* Constant buffer state */ 911 OUT_BATCH(BRW_CS_URB_STATE | 0); 912 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 913 (URB_CS_ENTRIES << 0)); 914 915 /* Set up our vertex elements, sourced from the single vertex buffer. */ 916 917 if (IS_IGDNG(pI830)) { 918 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 919 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 920 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 921 VE0_VALID | 922 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 923 (0 << VE0_OFFSET_SHIFT)); 924 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 925 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 926 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 927 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 928 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 929 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 930 VE0_VALID | 931 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 932 (8 << VE0_OFFSET_SHIFT)); 933 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 934 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 935 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 936 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 937 } else { 938 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); 939 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ 940 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 941 VE0_VALID | 942 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 943 (0 << VE0_OFFSET_SHIFT)); 944 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 945 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 946 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 947 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 948 (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 949 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ 950 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 951 VE0_VALID | 952 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 953 (8 << VE0_OFFSET_SHIFT)); 954 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 955 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 956 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 957 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 958 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 959 } 960 961 OUT_BATCH(MI_NOOP); /* pad to quadword */ 962 ADVANCE_BATCH(); 963} 964 965void 966I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, 967 RegionPtr dstRegion, 968 short width, short height, int video_pitch, 969 int x1, int y1, int x2, int y2, 970 short src_w, short src_h, 971 short drw_w, short drw_h, 972 PixmapPtr pPixmap) 973{ 974 I830Ptr pI830 = I830PTR(pScrn); 975 BoxPtr pbox; 976 int nbox, dxo, dyo, pix_xoff, pix_yoff; 977 float src_scale_x, src_scale_y; 978 int src_surf, i; 979 int n_src_surf; 980 uint32_t src_surf_format; 981 uint32_t src_surf_base[6]; 982 int src_width[6]; 983 int src_height[6]; 984 int src_pitch[6]; 985 drm_intel_bo *bind_bo, *surf_bos[7]; 986 987#if 0 988 ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height, 989 video_pitch); 990#endif 991 992#if 0 993 /* enable debug */ 994 OUTREG (INST_PM, 995 (1 << (16 + 4)) | 996 (1 << 4)); 997 ErrorF ("INST_PM 0x%08x\n", INREG(INST_PM)); 998#endif 999 1000 src_surf_base[0] = pPriv->YBuf0offset; 1001 src_surf_base[1] = pPriv->YBuf0offset; 1002 src_surf_base[2] = pPriv->VBuf0offset; 1003 src_surf_base[3] = pPriv->VBuf0offset; 1004 src_surf_base[4] = pPriv->UBuf0offset; 1005 src_surf_base[5] = pPriv->UBuf0offset; 1006#if 0 1007 ErrorF ("base 0 0x%x base 1 0x%x base 2 0x%x\n", 1008 src_surf_base[0], src_surf_base[1], src_surf_base[2]); 1009#endif 1010 1011 switch (id) { 1012 case FOURCC_UYVY: 1013 src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; 1014 n_src_surf = 1; 1015 src_width[0] = width; 1016 src_height[0] = height; 1017 src_pitch[0] = video_pitch; 1018 break; 1019 case FOURCC_YUY2: 1020 src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; 1021 src_width[0] = width; 1022 src_height[0] = height; 1023 src_pitch[0] = video_pitch; 1024 n_src_surf = 1; 1025 break; 1026#ifdef INTEL_XVMC 1027 case FOURCC_XVMC: 1028#endif 1029 case FOURCC_I420: 1030 case FOURCC_YV12: 1031 src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; 1032 src_width[1] = src_width[0] = width; 1033 src_height[1] = src_height[0] = height; 1034 src_pitch[1] = src_pitch[0] = video_pitch * 2; 1035 src_width[4] = src_width[5] = src_width[2] = src_width[3] = width / 2; 1036 src_height[4] = src_height[5] = src_height[2] = src_height[3] = height / 2; 1037 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = video_pitch; 1038 n_src_surf = 6; 1039 break; 1040 default: 1041 return; 1042 } 1043 1044#if 0 1045 ErrorF("dst surf: 0x%08x\n", state_base_offset + dest_surf_offset); 1046 ErrorF("src surf: 0x%08x\n", state_base_offset + src_surf_offset); 1047#endif 1048 1049 /* We'll be poking the state buffers that could be in use by the 3d 1050 * hardware here, but we should have synced the 3D engine already in 1051 * I830PutImage. 1052 */ 1053 1054 /* Upload kernels */ 1055 surf_bos[0] = i965_create_dst_surface_state(pScrn, pPixmap); 1056 if (!surf_bos[0]) 1057 return; 1058 1059 for (src_surf = 0; src_surf < n_src_surf; src_surf++) { 1060 drm_intel_bo *surf_bo = 1061 i965_create_src_surface_state(pScrn, 1062 pPriv->buf, 1063 src_surf_base[src_surf], 1064 src_width[src_surf], 1065 src_height[src_surf], 1066 src_pitch[src_surf], 1067 src_surf_format); 1068 if (!surf_bo) { 1069 int q; 1070 for(q = 0; q < src_surf + 1; q++) 1071 drm_intel_bo_unreference(surf_bos[q]); 1072 return; 1073 } 1074 surf_bos[src_surf + 1] = surf_bo; 1075 } 1076 bind_bo = i965_create_binding_table(pScrn, surf_bos, n_src_surf + 1); 1077 for (i = 0; i < n_src_surf + 1; i++) { 1078 drm_intel_bo_unreference(surf_bos[i]); 1079 surf_bos[i] = NULL; 1080 } 1081 if (!bind_bo) 1082 return; 1083 1084 if (pI830->video.gen4_sampler_bo == NULL) 1085 pI830->video.gen4_sampler_bo = i965_create_sampler_state(pScrn); 1086 if (pI830->video.gen4_sip_kernel_bo == NULL) { 1087 pI830->video.gen4_sip_kernel_bo = 1088 i965_create_program(pScrn, &sip_kernel_static[0][0], 1089 sizeof(sip_kernel_static)); 1090 if (!pI830->video.gen4_sip_kernel_bo) { 1091 drm_intel_bo_unreference(bind_bo); 1092 return; 1093 } 1094 } 1095 1096 if (pI830->video.gen4_vs_bo == NULL) { 1097 pI830->video.gen4_vs_bo = i965_create_vs_state(pScrn); 1098 if (!pI830->video.gen4_vs_bo) { 1099 drm_intel_bo_unreference(bind_bo); 1100 return; 1101 } 1102 } 1103 if (pI830->video.gen4_sf_bo == NULL) { 1104 pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn); 1105 if (!pI830->video.gen4_sf_bo) { 1106 drm_intel_bo_unreference(bind_bo); 1107 return; 1108 } 1109 } 1110 if (pI830->video.gen4_wm_packed_bo == NULL) { 1111 pI830->video.gen4_wm_packed_bo = 1112 i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, TRUE); 1113 if (!pI830->video.gen4_wm_packed_bo) { 1114 drm_intel_bo_unreference(bind_bo); 1115 return; 1116 } 1117 } 1118 1119 if (pI830->video.gen4_wm_planar_bo == NULL) { 1120 pI830->video.gen4_wm_planar_bo = 1121 i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, FALSE); 1122 if (!pI830->video.gen4_wm_planar_bo) { 1123 drm_intel_bo_unreference(bind_bo); 1124 return; 1125 } 1126 } 1127 1128 if (pI830->video.gen4_cc_bo == NULL) { 1129 pI830->video.gen4_cc_bo = i965_create_cc_state(pScrn); 1130 if (!pI830->video.gen4_cc_bo) { 1131 drm_intel_bo_unreference(bind_bo); 1132 return; 1133 } 1134 } 1135 1136 /* Set up the offset for translating from the given region (in screen 1137 * coordinates) to the backing pixmap. 1138 */ 1139#ifdef COMPOSITE 1140 pix_xoff = -pPixmap->screen_x + pPixmap->drawable.x; 1141 pix_yoff = -pPixmap->screen_y + pPixmap->drawable.y; 1142#else 1143 pix_xoff = 0; 1144 pix_yoff = 0; 1145#endif 1146 1147 dxo = dstRegion->extents.x1; 1148 dyo = dstRegion->extents.y1; 1149 1150 /* Use normalized texture coordinates */ 1151 src_scale_x = ((float)src_w / width) / (float)drw_w; 1152 src_scale_y = ((float)src_h / height) / (float)drw_h; 1153 1154 pbox = REGION_RECTS(dstRegion); 1155 nbox = REGION_NUM_RECTS(dstRegion); 1156 while (nbox--) { 1157 int box_x1 = pbox->x1; 1158 int box_y1 = pbox->y1; 1159 int box_x2 = pbox->x2; 1160 int box_y2 = pbox->y2; 1161 int i; 1162 drm_intel_bo *vb_bo; 1163 float *vb; 1164 drm_intel_bo *bo_table[] = { 1165 NULL, /* vb_bo */ 1166 pI830->batch_bo, 1167 bind_bo, 1168 pI830->video.gen4_sampler_bo, 1169 pI830->video.gen4_sip_kernel_bo, 1170 pI830->video.gen4_vs_bo, 1171 pI830->video.gen4_sf_bo, 1172 pI830->video.gen4_wm_packed_bo, 1173 pI830->video.gen4_wm_planar_bo, 1174 pI830->video.gen4_cc_bo, 1175 }; 1176 1177 pbox++; 1178 1179 if (intel_alloc_and_map(pI830, "textured video vb", 4096, 1180 &vb_bo, &vb) != 0) 1181 break; 1182 bo_table[0] = vb_bo; 1183 1184 i = 0; 1185 vb[i++] = (box_x2 - dxo) * src_scale_x; 1186 vb[i++] = (box_y2 - dyo) * src_scale_y; 1187 vb[i++] = (float) box_x2 + pix_xoff; 1188 vb[i++] = (float) box_y2 + pix_yoff; 1189 1190 vb[i++] = (box_x1 - dxo) * src_scale_x; 1191 vb[i++] = (box_y2 - dyo) * src_scale_y; 1192 vb[i++] = (float) box_x1 + pix_xoff; 1193 vb[i++] = (float) box_y2 + pix_yoff; 1194 1195 vb[i++] = (box_x1 - dxo) * src_scale_x; 1196 vb[i++] = (box_y1 - dyo) * src_scale_y; 1197 vb[i++] = (float) box_x1 + pix_xoff; 1198 vb[i++] = (float) box_y1 + pix_yoff; 1199 1200 drm_intel_bo_unmap(vb_bo); 1201 1202 if (!IS_IGDNG(pI830)) 1203 i965_pre_draw_debug(pScrn); 1204 1205 /* If this command won't fit in the current batch, flush. 1206 * Assume that it does after being flushed. 1207 */ 1208 if (drm_intel_bufmgr_check_aperture_space(bo_table, 1209 ARRAY_SIZE(bo_table)) < 0) { 1210 intel_batch_flush(pScrn, FALSE); 1211 } 1212 1213 intel_batch_start_atomic(pScrn, 100); 1214 1215 i965_emit_video_setup(pScrn, bind_bo, n_src_surf); 1216 1217 BEGIN_BATCH(12); 1218 /* Set up the pointer to our vertex buffer */ 1219 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 1220 /* four 32-bit floats per vertex */ 1221 OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | 1222 VB0_VERTEXDATA | 1223 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); 1224 OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 1225 if (IS_IGDNG(pI830)) 1226 OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, (vb_bo->offset + i) * 4); 1227 else 1228 OUT_BATCH(3); /* four corners to our rectangle */ 1229 OUT_BATCH(0); /* reserved */ 1230 1231 OUT_BATCH(BRW_3DPRIMITIVE | 1232 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1233 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 1234 (0 << 9) | /* CTG - indirect vertex count */ 1235 4); 1236 OUT_BATCH(3); /* vertex count per instance */ 1237 OUT_BATCH(0); /* start vertex offset */ 1238 OUT_BATCH(1); /* single instance */ 1239 OUT_BATCH(0); /* start instance location */ 1240 OUT_BATCH(0); /* index buffer offset, ignored */ 1241 OUT_BATCH(MI_NOOP); 1242 ADVANCE_BATCH(); 1243 1244 intel_batch_end_atomic(pScrn); 1245 1246 drm_intel_bo_unreference(vb_bo); 1247 1248 if (!IS_IGDNG(pI830)) 1249 i965_post_draw_debug(pScrn); 1250 1251 } 1252 1253 /* release reference once we're finished */ 1254 drm_intel_bo_unreference(bind_bo); 1255 1256#if WATCH_STATS 1257 i830_dump_error_state(pScrn); 1258#endif 1259} 1260 1261void 1262i965_free_video(ScrnInfoPtr scrn) 1263{ 1264 I830Ptr pI830 = I830PTR(scrn); 1265 1266 drm_intel_bo_unreference(pI830->video.gen4_vs_bo); 1267 pI830->video.gen4_vs_bo = NULL; 1268 drm_intel_bo_unreference(pI830->video.gen4_sf_bo); 1269 pI830->video.gen4_sf_bo = NULL; 1270 drm_intel_bo_unreference(pI830->video.gen4_cc_bo); 1271 pI830->video.gen4_cc_bo = NULL; 1272 drm_intel_bo_unreference(pI830->video.gen4_wm_packed_bo); 1273 pI830->video.gen4_wm_packed_bo = NULL; 1274 drm_intel_bo_unreference(pI830->video.gen4_wm_planar_bo); 1275 pI830->video.gen4_wm_planar_bo = NULL; 1276 drm_intel_bo_unreference(pI830->video.gen4_cc_vp_bo); 1277 pI830->video.gen4_cc_vp_bo = NULL; 1278 drm_intel_bo_unreference(pI830->video.gen4_sampler_bo); 1279 pI830->video.gen4_sampler_bo = NULL; 1280 drm_intel_bo_unreference(pI830->video.gen4_sip_kernel_bo); 1281 pI830->video.gen4_sip_kernel_bo = NULL; 1282} 1283