gen8_render.c revision 42542f5f
1/* 2 * Copyright © 2012,2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34#include "sna_render.h" 35#include "sna_render_inline.h" 36#include "sna_video.h" 37 38#include "gen8_render.h" 39#include "gen8_eu.h" 40#include "gen4_common.h" 41#include "gen4_source.h" 42#include "gen4_vertex.h" 43#include "gen6_common.h" 44#include "gen8_vertex.h" 45 46#define SIM 1 47 48#define ALWAYS_INVALIDATE 0 49#define ALWAYS_FLUSH 0 50#define ALWAYS_STALL 0 51 52#define NO_COMPOSITE 0 53#define NO_COMPOSITE_SPANS 0 54#define NO_COPY 0 55#define NO_COPY_BOXES 0 56#define NO_FILL 0 57#define NO_FILL_BOXES 0 58#define NO_FILL_ONE 0 59#define NO_FILL_CLEAR 0 60#define NO_VIDEO 0 61 62#define USE_8_PIXEL_DISPATCH 1 63#define USE_16_PIXEL_DISPATCH 1 64#define USE_32_PIXEL_DISPATCH 0 65 66#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67#error "Must select at least 8, 16 or 32 pixel dispatch" 68#endif 69 70#define GEN8_MAX_SIZE 16384 71 72/* XXX Todo 73 * 74 * STR (software tiled rendering) mode. No, really. 75 * 64x32 pixel blocks align with the rendering cache. Worth considering. 76 */ 77 78#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 79 80/* Pipeline stages: 81 * 1. Command Streamer (CS) 82 * 2. Vertex Fetch (VF) 83 * 3. Vertex Shader (VS) 84 * 4. Hull Shader (HS) 85 * 5. Tesselation Engine (TE) 86 * 6. Domain Shader (DS) 87 * 7. Geometry Shader (GS) 88 * 8. Stream Output Logic (SOL) 89 * 9. Clipper (CLIP) 90 * 10. Strip/Fan (SF) 91 * 11. Windower/Masker (WM) 92 * 12. Color Calculator (CC) 93 */ 94 95#if !NO_VIDEO 96static const uint32_t ps_kernel_packed[][4] = { 97#include "exa_wm_src_affine.g8b" 98#include "exa_wm_src_sample_argb.g8b" 99#include "exa_wm_yuv_rgb.g8b" 100#include "exa_wm_write.g8b" 101}; 102 103static const uint32_t ps_kernel_planar[][4] = { 104#include "exa_wm_src_affine.g8b" 105#include "exa_wm_src_sample_planar.g8b" 106#include "exa_wm_yuv_rgb.g8b" 107#include "exa_wm_write.g8b" 108}; 109#endif 110 111#define SURFACE_DW (64 / sizeof(uint32_t)); 112 113#define KERNEL(kernel_enum, kernel, num_surfaces) \ 114 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 115#define NOKERNEL(kernel_enum, func, num_surfaces) \ 116 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 117static const struct wm_kernel_info { 118 const char *name; 119 const void *data; 120 unsigned int size; 121 int num_surfaces; 122} wm_kernels[] = { 123 NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 124 NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 125 126 NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 127 NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 128 129 NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 130 NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 131 132 NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 133 NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 134 135 NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 136 NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 137 138#if !NO_VIDEO 139 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 140 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 141#endif 142}; 143#undef KERNEL 144 145static const struct blendinfo { 146 uint8_t src_alpha; 147 uint8_t src_blend; 148 uint8_t dst_blend; 149} gen8_blend_op[] = { 150 /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 151 /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 152 /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 153 /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 154 /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 155 /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 156 /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 157 /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 158 /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 159 /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 160 /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 161 /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 162 /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 163}; 164 165/** 166 * Highest-valued BLENDFACTOR used in gen8_blend_op. 167 * 168 * This leaves out GEN8_BLENDFACTOR_INV_DST_COLOR, 169 * GEN8_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 170 * GEN8_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 171 */ 172#define GEN8_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 173 174#define GEN8_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen8_blend_state), 64) 175 176#define BLEND_OFFSET(s, d) \ 177 ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN8_BLENDFACTOR_COUNT + (d)) << 4) 178 179#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 180#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 181 182#define SAMPLER_OFFSET(sf, se, mf, me) \ 183 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 184 185#define VERTEX_2s2s 0 186 187#define COPY_SAMPLER 0 188#define COPY_VERTEX VERTEX_2s2s 189#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN8_WM_KERNEL_NOMASK, COPY_VERTEX) 190 191#define FILL_SAMPLER 1 192#define FILL_VERTEX VERTEX_2s2s 193#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 194#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 195 196#define GEN8_SAMPLER(f) (((f) >> 20) & 0xfff) 197#define GEN8_BLEND(f) (((f) >> 4) & 0x7ff) 198#define GEN8_READS_DST(f) (((f) >> 15) & 1) 199#define GEN8_KERNEL(f) (((f) >> 16) & 0xf) 200#define GEN8_VERTEX(f) (((f) >> 0) & 0xf) 201#define GEN8_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) 202 203#define OUT_BATCH(v) batch_emit(sna, v) 204#define OUT_BATCH64(v) batch_emit64(sna, v) 205#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 206#define OUT_VERTEX_F(v) vertex_emit(sna, v) 207 208static inline bool too_large(int width, int height) 209{ 210 return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; 211} 212 213static inline bool unaligned(struct kgem_bo *bo, int bpp) 214{ 215 /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 216#if 0 217 int x, y; 218 219 if (bo->proxy == NULL) 220 return false; 221 222 /* Assume that all tiled proxies are constructed correctly. */ 223 if (bo->tiling) 224 return false; 225 226 DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 227 __FUNCTION__, bo->delta, bo->pitch, bpp, 228 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 229 230 /* This may be a random userptr map, check that it meets the 231 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 232 */ 233 y = bo->delta / bo->pitch; 234 if (y & 3) 235 return true; 236 237 x = 8 * (bo->delta - y * bo->pitch); 238 if (x & (4*bpp - 1)) 239 return true; 240 241 return false; 242#else 243 return false; 244#endif 245} 246 247static uint32_t gen8_get_blend(int op, 248 bool has_component_alpha, 249 uint32_t dst_format) 250{ 251 uint32_t src, dst; 252 253 COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN8_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 254 255 src = gen8_blend_op[op].src_blend; 256 dst = gen8_blend_op[op].dst_blend; 257 258 /* If there's no dst alpha channel, adjust the blend op so that 259 * we'll treat it always as 1. 260 */ 261 if (PICT_FORMAT_A(dst_format) == 0) { 262 if (src == BLENDFACTOR_DST_ALPHA) 263 src = BLENDFACTOR_ONE; 264 else if (src == BLENDFACTOR_INV_DST_ALPHA) 265 src = BLENDFACTOR_ZERO; 266 } 267 268 /* If the source alpha is being used, then we should only be in a 269 * case where the source blend factor is 0, and the source blend 270 * value is the mask channels multiplied by the source picture's alpha. 271 */ 272 if (has_component_alpha && gen8_blend_op[op].src_alpha) { 273 if (dst == BLENDFACTOR_SRC_ALPHA) 274 dst = BLENDFACTOR_SRC_COLOR; 275 else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 276 dst = BLENDFACTOR_INV_SRC_COLOR; 277 } 278 279 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 280 op, dst_format, PICT_FORMAT_A(dst_format), 281 src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 282 assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 283 return BLEND_OFFSET(src, dst); 284} 285 286static uint32_t gen8_get_card_format(PictFormat format) 287{ 288 switch (format) { 289 default: 290 return -1; 291 case PICT_a8r8g8b8: 292 return SURFACEFORMAT_B8G8R8A8_UNORM; 293 case PICT_x8r8g8b8: 294 return SURFACEFORMAT_B8G8R8X8_UNORM; 295 case PICT_a8b8g8r8: 296 return SURFACEFORMAT_R8G8B8A8_UNORM; 297 case PICT_x8b8g8r8: 298 return SURFACEFORMAT_R8G8B8X8_UNORM; 299#ifdef PICT_a2r10g10b10 300 case PICT_a2r10g10b10: 301 return SURFACEFORMAT_B10G10R10A2_UNORM; 302 case PICT_x2r10g10b10: 303 return SURFACEFORMAT_B10G10R10X2_UNORM; 304#endif 305 case PICT_r8g8b8: 306 return SURFACEFORMAT_R8G8B8_UNORM; 307 case PICT_r5g6b5: 308 return SURFACEFORMAT_B5G6R5_UNORM; 309 case PICT_a1r5g5b5: 310 return SURFACEFORMAT_B5G5R5A1_UNORM; 311 case PICT_a8: 312 return SURFACEFORMAT_A8_UNORM; 313 case PICT_a4r4g4b4: 314 return SURFACEFORMAT_B4G4R4A4_UNORM; 315 } 316} 317 318static uint32_t gen8_get_dest_format(PictFormat format) 319{ 320 switch (format) { 321 default: 322 return -1; 323 case PICT_a8r8g8b8: 324 case PICT_x8r8g8b8: 325 return SURFACEFORMAT_B8G8R8A8_UNORM; 326 case PICT_a8b8g8r8: 327 case PICT_x8b8g8r8: 328 return SURFACEFORMAT_R8G8B8A8_UNORM; 329#ifdef PICT_a2r10g10b10 330 case PICT_a2r10g10b10: 331 case PICT_x2r10g10b10: 332 return SURFACEFORMAT_B10G10R10A2_UNORM; 333#endif 334 case PICT_r5g6b5: 335 return SURFACEFORMAT_B5G6R5_UNORM; 336 case PICT_x1r5g5b5: 337 case PICT_a1r5g5b5: 338 return SURFACEFORMAT_B5G5R5A1_UNORM; 339 case PICT_a8: 340 return SURFACEFORMAT_A8_UNORM; 341 case PICT_a4r4g4b4: 342 case PICT_x4r4g4b4: 343 return SURFACEFORMAT_B4G4R4A4_UNORM; 344 } 345} 346 347static bool gen8_check_dst_format(PictFormat format) 348{ 349 if (gen8_get_dest_format(format) != -1) 350 return true; 351 352 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 353 return false; 354} 355 356static bool gen8_check_format(uint32_t format) 357{ 358 if (gen8_get_card_format(format) != -1) 359 return true; 360 361 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 362 return false; 363} 364 365static uint32_t gen8_filter(uint32_t filter) 366{ 367 switch (filter) { 368 default: 369 assert(0); 370 case PictFilterNearest: 371 return SAMPLER_FILTER_NEAREST; 372 case PictFilterBilinear: 373 return SAMPLER_FILTER_BILINEAR; 374 } 375} 376 377static uint32_t gen8_check_filter(PicturePtr picture) 378{ 379 switch (picture->filter) { 380 case PictFilterNearest: 381 case PictFilterBilinear: 382 return true; 383 default: 384 return false; 385 } 386} 387 388static uint32_t gen8_repeat(uint32_t repeat) 389{ 390 switch (repeat) { 391 default: 392 assert(0); 393 case RepeatNone: 394 return SAMPLER_EXTEND_NONE; 395 case RepeatNormal: 396 return SAMPLER_EXTEND_REPEAT; 397 case RepeatPad: 398 return SAMPLER_EXTEND_PAD; 399 case RepeatReflect: 400 return SAMPLER_EXTEND_REFLECT; 401 } 402} 403 404static bool gen8_check_repeat(PicturePtr picture) 405{ 406 if (!picture->repeat) 407 return true; 408 409 switch (picture->repeatType) { 410 case RepeatNone: 411 case RepeatNormal: 412 case RepeatPad: 413 case RepeatReflect: 414 return true; 415 default: 416 return false; 417 } 418} 419 420static int 421gen8_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 422{ 423 int base; 424 425 if (has_mask) { 426 if (is_ca) { 427 if (gen8_blend_op[op].src_alpha) 428 base = GEN8_WM_KERNEL_MASKSA; 429 else 430 base = GEN8_WM_KERNEL_MASKCA; 431 } else 432 base = GEN8_WM_KERNEL_MASK; 433 } else 434 base = GEN8_WM_KERNEL_NOMASK; 435 436 return base + !is_affine; 437} 438 439static void 440gen8_emit_push_constants(struct sna *sna) 441{ 442#if SIM 443 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 444 OUT_BATCH(0); 445 446 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 447 OUT_BATCH(0); 448 449 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 450 OUT_BATCH(0); 451 452 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 453 OUT_BATCH(0); 454 455 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 456 OUT_BATCH(0); 457#endif 458} 459 460static void 461gen8_emit_urb(struct sna *sna) 462{ 463 /* num of VS entries must be divisible by 8 if size < 9 */ 464 OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); 465 OUT_BATCH(1024 << URB_ENTRY_NUMBER_SHIFT | 466 (2 - 1) << URB_ENTRY_SIZE_SHIFT | 467 0 << URB_STARTING_ADDRESS_SHIFT); 468 469 OUT_BATCH(GEN8_3DSTATE_URB_HS | (2 - 2)); 470 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 471 0 << URB_STARTING_ADDRESS_SHIFT); 472 473 OUT_BATCH(GEN8_3DSTATE_URB_DS | (2 - 2)); 474 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 475 0 << URB_STARTING_ADDRESS_SHIFT); 476 477 OUT_BATCH(GEN8_3DSTATE_URB_GS | (2 - 2)); 478 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 479 0 << URB_STARTING_ADDRESS_SHIFT); 480} 481 482static void 483gen8_emit_state_base_address(struct sna *sna) 484{ 485 uint32_t num_pages; 486 487 assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 488 489 OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2)); 490 OUT_BATCH64(0); /* general */ 491 OUT_BATCH(0); /* stateless dataport */ 492 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 493 sna->kgem.nbatch, 494 NULL, 495 I915_GEM_DOMAIN_INSTRUCTION << 16, 496 BASE_ADDRESS_MODIFY)); 497 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 498 sna->kgem.nbatch, 499 sna->render_state.gen8.general_bo, 500 I915_GEM_DOMAIN_INSTRUCTION << 16, 501 BASE_ADDRESS_MODIFY)); 502 OUT_BATCH64(0); /* indirect */ 503 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 504 sna->kgem.nbatch, 505 sna->render_state.gen8.general_bo, 506 I915_GEM_DOMAIN_INSTRUCTION << 16, 507 BASE_ADDRESS_MODIFY)); 508 /* upper bounds */ 509 num_pages = sna->render_state.gen8.general_bo->size.pages.count; 510 OUT_BATCH(0); /* general */ 511 OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 512 OUT_BATCH(0); /* indirect */ 513 OUT_BATCH(num_pages << 12 | 1); /* instruction */ 514} 515 516static void 517gen8_emit_vs_invariant(struct sna *sna) 518{ 519 OUT_BATCH(GEN8_3DSTATE_VS | (9 - 2)); 520 OUT_BATCH64(0); /* no VS kernel */ 521 OUT_BATCH(0); 522 OUT_BATCH64(0); 523 OUT_BATCH(0); 524 OUT_BATCH(1 << 1); /* pass-through */ 525 OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 526 527#if SIM 528 OUT_BATCH(GEN8_3DSTATE_CONSTANT_VS | (11 - 2)); 529 OUT_BATCH(0); 530 OUT_BATCH(0); 531 OUT_BATCH64(0); 532 OUT_BATCH64(0); 533 OUT_BATCH64(0); 534 OUT_BATCH64(0); 535 536 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 537 OUT_BATCH(0); 538 539 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 540 OUT_BATCH(0); 541#endif 542} 543 544static void 545gen8_emit_hs_invariant(struct sna *sna) 546{ 547 OUT_BATCH(GEN8_3DSTATE_HS | (9 - 2)); 548 OUT_BATCH(0); /* no HS kernel */ 549 OUT_BATCH(0); 550 OUT_BATCH(0); 551 OUT_BATCH(0); 552 OUT_BATCH(0); 553 OUT_BATCH(0); 554 OUT_BATCH(0); 555 OUT_BATCH(0); /* pass-through */ 556 557#if SIM 558 OUT_BATCH(GEN8_3DSTATE_CONSTANT_HS | (11 - 2)); 559 OUT_BATCH(0); 560 OUT_BATCH(0); 561 OUT_BATCH64(0); 562 OUT_BATCH64(0); 563 OUT_BATCH64(0); 564 OUT_BATCH64(0); 565 566#if 1 567 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 568 OUT_BATCH(0); 569 570 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 571 OUT_BATCH(0); 572#endif 573#endif 574} 575 576static void 577gen8_emit_te_invariant(struct sna *sna) 578{ 579 OUT_BATCH(GEN8_3DSTATE_TE | (4 - 2)); 580 OUT_BATCH(0); 581 OUT_BATCH(0); 582 OUT_BATCH(0); 583} 584 585static void 586gen8_emit_ds_invariant(struct sna *sna) 587{ 588 OUT_BATCH(GEN8_3DSTATE_DS | (9 - 2)); 589 OUT_BATCH(0); 590 OUT_BATCH(0); 591 OUT_BATCH(0); 592 OUT_BATCH(0); 593 OUT_BATCH(0); 594 OUT_BATCH(0); 595 OUT_BATCH(0); 596 OUT_BATCH(0); 597 598#if SIM 599 OUT_BATCH(GEN8_3DSTATE_CONSTANT_DS | (11 - 2)); 600 OUT_BATCH(0); 601 OUT_BATCH(0); 602 OUT_BATCH64(0); 603 OUT_BATCH64(0); 604 OUT_BATCH64(0); 605 OUT_BATCH64(0); 606 607#if 1 608 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 609 OUT_BATCH(0); 610 611 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 612 OUT_BATCH(0); 613#endif 614#endif 615} 616 617static void 618gen8_emit_gs_invariant(struct sna *sna) 619{ 620 OUT_BATCH(GEN8_3DSTATE_GS | (10 - 2)); 621 OUT_BATCH(0); /* no GS kernel */ 622 OUT_BATCH(0); 623 OUT_BATCH(0); 624 OUT_BATCH(0); 625 OUT_BATCH(0); 626 OUT_BATCH(0); 627 OUT_BATCH(0); 628 OUT_BATCH(0); 629 OUT_BATCH(0); /* pass-through */ 630 631#if SIM 632 OUT_BATCH(GEN8_3DSTATE_CONSTANT_GS | (11 - 2)); 633 OUT_BATCH(0); 634 OUT_BATCH(0); 635 OUT_BATCH64(0); 636 OUT_BATCH64(0); 637 OUT_BATCH64(0); 638 OUT_BATCH64(0); 639 640#if 1 641 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 642 OUT_BATCH(0); 643 644 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 645 OUT_BATCH(0); 646#endif 647#endif 648} 649 650static void 651gen8_emit_sol_invariant(struct sna *sna) 652{ 653 OUT_BATCH(GEN8_3DSTATE_STREAMOUT | (5 - 2)); 654 OUT_BATCH(0); 655 OUT_BATCH(0); 656 OUT_BATCH(0); 657 OUT_BATCH(0); 658} 659 660static void 661gen8_emit_sf_invariant(struct sna *sna) 662{ 663 OUT_BATCH(GEN8_3DSTATE_SF | (4 - 2)); 664 OUT_BATCH(0); 665 OUT_BATCH(0); 666 OUT_BATCH(0); 667} 668 669static void 670gen8_emit_clip_invariant(struct sna *sna) 671{ 672 OUT_BATCH(GEN8_3DSTATE_CLIP | (4 - 2)); 673 OUT_BATCH(0); 674 OUT_BATCH(0); /* pass-through */ 675 OUT_BATCH(0); 676 677 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 678 OUT_BATCH(0); 679 680 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 681 OUT_BATCH(0); 682} 683 684static void 685gen8_emit_null_depth_buffer(struct sna *sna) 686{ 687 OUT_BATCH(GEN8_3DSTATE_DEPTH_BUFFER | (8 - 2)); 688#if 0 689 OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 690 DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 691#else 692 OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 693 DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 694#endif 695 OUT_BATCH64(0); 696 OUT_BATCH(0); 697 OUT_BATCH(0); 698 OUT_BATCH(0); 699 OUT_BATCH(0); 700 701#if SIM 702 OUT_BATCH(GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 703 OUT_BATCH(0); 704 OUT_BATCH64(0); 705 OUT_BATCH(0); 706#endif 707 708#if SIM 709 OUT_BATCH(GEN8_3DSTATE_STENCIL_BUFFER | (5 - 2)); 710 OUT_BATCH(0); 711 OUT_BATCH64(0); 712 OUT_BATCH(0); 713#endif 714 715#if SIM 716 OUT_BATCH(GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); 717 OUT_BATCH(0); 718 OUT_BATCH(0); 719#endif 720 721#if SIM 722 OUT_BATCH(GEN8_3DSTATE_CLEAR_PARAMS | (3 - 2)); 723 OUT_BATCH(0); 724 OUT_BATCH(0); 725#endif 726} 727 728static void 729gen8_emit_wm_invariant(struct sna *sna) 730{ 731 gen8_emit_null_depth_buffer(sna); 732 733#if SIM 734 OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 735 OUT_BATCH(0); 736#endif 737 738 OUT_BATCH(GEN8_3DSTATE_WM | (2 - 2)); 739 //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 740 OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 741 742#if SIM 743 OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); 744 OUT_BATCH(0); 745 OUT_BATCH(0); 746 OUT_BATCH(0); 747 OUT_BATCH(0); 748 749 OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2)); 750 OUT_BATCH(0); 751#endif 752 753 OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); 754 OUT_BATCH(PSX_PIXEL_SHADER_VALID | 755 PSX_ATTRIBUTE_ENABLE); 756 757 OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); 758 OUT_BATCH(RASTER_FRONT_WINDING_CCW | 759 RASTER_CULL_NONE); 760 OUT_BATCH(0); 761 OUT_BATCH(0); 762 OUT_BATCH(0); 763 764 OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); 765 OUT_BATCH(0); 766 OUT_BATCH(0); 767 OUT_BATCH(0); 768 OUT_BATCH(0); 769 OUT_BATCH(0); 770 OUT_BATCH(0); 771 OUT_BATCH(0); 772 OUT_BATCH(0); 773 OUT_BATCH(0); 774 OUT_BATCH(0); 775 776#if SIM 777 OUT_BATCH(GEN8_3DSTATE_CONSTANT_PS | (11 - 2)); 778 OUT_BATCH(0); 779 OUT_BATCH(0); 780 OUT_BATCH64(0); 781 OUT_BATCH64(0); 782 OUT_BATCH64(0); 783 OUT_BATCH64(0); 784#endif 785} 786 787static void 788gen8_emit_cc_invariant(struct sna *sna) 789{ 790} 791 792static void 793gen8_emit_vf_invariant(struct sna *sna) 794{ 795#if 1 796 OUT_BATCH(GEN8_3DSTATE_VF | (2 - 2)); 797 OUT_BATCH(0); 798#endif 799 800 OUT_BATCH(GEN8_3DSTATE_VF_SGVS | (2 - 2)); 801 OUT_BATCH(0); 802 803 OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); 804 OUT_BATCH(RECTLIST); 805 806 OUT_BATCH(GEN8_3DSTATE_VF_STATISTICS | 0); 807} 808 809static void 810gen8_emit_invariant(struct sna *sna) 811{ 812 OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_3D); 813 814#if SIM 815 OUT_BATCH(GEN8_STATE_SIP | (3 - 2)); 816 OUT_BATCH64(0); 817 818#endif 819 820 OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); 821 OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 822 MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 823 824 OUT_BATCH(GEN8_3DSTATE_SAMPLE_MASK | (2 - 2)); 825 OUT_BATCH(1); 826 827#if SIM 828 OUT_BATCH(GEN8_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 829 OUT_BATCH(0); 830 OUT_BATCH(0); 831 OUT_BATCH(0); 832 //OUT_BATCH(8<<20 | 8<<16); 833 OUT_BATCH(0); 834#endif 835 836 gen8_emit_push_constants(sna); 837 gen8_emit_urb(sna); 838 839 gen8_emit_state_base_address(sna); 840 841 gen8_emit_vf_invariant(sna); 842 gen8_emit_vs_invariant(sna); 843 gen8_emit_hs_invariant(sna); 844 gen8_emit_te_invariant(sna); 845 gen8_emit_ds_invariant(sna); 846 gen8_emit_gs_invariant(sna); 847 gen8_emit_sol_invariant(sna); 848 gen8_emit_clip_invariant(sna); 849 gen8_emit_sf_invariant(sna); 850 gen8_emit_wm_invariant(sna); 851 gen8_emit_cc_invariant(sna); 852 853 sna->render_state.gen8.needs_invariant = false; 854} 855 856static void 857gen8_emit_cc(struct sna *sna, uint32_t blend) 858{ 859 struct gen8_render_state *render = &sna->render_state.gen8; 860 861 if (render->blend == blend) 862 return; 863 864 DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 865 __FUNCTION__, blend, render->blend, 866 blend / GEN8_BLENDFACTOR_COUNT, 867 blend % GEN8_BLENDFACTOR_COUNT)); 868 869 assert(blend < GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT); 870 assert(blend / GEN8_BLENDFACTOR_COUNT > 0); 871 assert(blend % GEN8_BLENDFACTOR_COUNT > 0); 872 873 /* XXX can have upto 8 blend states preload, selectable via 874 * Render Target Index. What other side-effects of Render Target Index? 875 */ 876 877 OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); 878 if (blend != GEN8_BLEND(NO_BLEND)) { 879 uint32_t src = blend / GEN8_BLENDFACTOR_COUNT; 880 uint32_t dst = blend % GEN8_BLENDFACTOR_COUNT; 881 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 882 PS_BLEND_COLOR_BLEND_ENABLE | 883 src << PS_BLEND_SRC_ALPHA_SHIFT | 884 dst << PS_BLEND_DST_ALPHA_SHIFT | 885 src << PS_BLEND_SRC_SHIFT | 886 dst << PS_BLEND_DST_SHIFT); 887 } else 888 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 889 890 OUT_BATCH(GEN8_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 891 OUT_BATCH((render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE) | 1); 892 893 /* Force a CC_STATE pointer change to improve blend performance */ 894 OUT_BATCH(GEN8_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 895 OUT_BATCH(0); 896 897 render->blend = blend; 898} 899 900static void 901gen8_emit_sampler(struct sna *sna, uint32_t state) 902{ 903 if (sna->render_state.gen8.samplers == state) 904 return; 905 906 sna->render_state.gen8.samplers = state; 907 908 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 909 910 assert(2 * sizeof(struct gen8_sampler_state) == 32); 911 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 912 OUT_BATCH(sna->render_state.gen8.wm_state + state * 2 * sizeof(struct gen8_sampler_state)); 913} 914 915static void 916gen8_emit_sf(struct sna *sna, bool has_mask) 917{ 918 int num_sf_outputs = has_mask ? 2 : 1; 919 920 if (sna->render_state.gen8.num_sf_outputs == num_sf_outputs) 921 return; 922 923 DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 924 925 sna->render_state.gen8.num_sf_outputs = num_sf_outputs; 926 927 OUT_BATCH(GEN8_3DSTATE_SBE | (4 - 2)); 928 OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 929 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 930 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 931 OUT_BATCH(0); 932 OUT_BATCH(0); 933} 934 935static void 936gen8_emit_wm(struct sna *sna, int kernel) 937{ 938 const uint32_t *kernels; 939 940 assert(kernel < ARRAY_SIZE(wm_kernels)); 941 if (sna->render_state.gen8.kernel == kernel) 942 return; 943 944 sna->render_state.gen8.kernel = kernel; 945 kernels = sna->render_state.gen8.wm_kernel[kernel]; 946 947 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 948 __FUNCTION__, 949 wm_kernels[kernel].name, 950 wm_kernels[kernel].num_surfaces, 951 kernels[0], kernels[1], kernels[2])); 952 953 OUT_BATCH(GEN8_3DSTATE_PS | (12 - 2)); 954 OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 955 OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 956 //PS_VECTOR_MASK_ENABLE | 957 wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 958 OUT_BATCH64(0); /* scratch address */ 959 OUT_BATCH(PS_MAX_THREADS | 960 (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 961 (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 962 (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 963 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 964 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 965 6 << PS_DISPATCH_START_GRF_SHIFT_2); 966 OUT_BATCH64(kernels[2]); 967 OUT_BATCH64(kernels[1]); 968} 969 970static bool 971gen8_emit_binding_table(struct sna *sna, uint16_t offset) 972{ 973 if (sna->render_state.gen8.surface_table == offset) 974 return false; 975 976 /* Binding table pointers */ 977 assert(is_aligned(4*offset, 32)); 978 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 979 OUT_BATCH(offset*4); 980 981 sna->render_state.gen8.surface_table = offset; 982 return true; 983} 984 985static bool 986gen8_emit_drawing_rectangle(struct sna *sna, 987 const struct sna_composite_op *op) 988{ 989 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 990 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 991 992 assert(!too_large(op->dst.x, op->dst.y)); 993 assert(!too_large(op->dst.width, op->dst.height)); 994 995 if (sna->render_state.gen8.drawrect_limit == limit && 996 sna->render_state.gen8.drawrect_offset == offset) 997 return true; 998 999 sna->render_state.gen8.drawrect_offset = offset; 1000 sna->render_state.gen8.drawrect_limit = limit; 1001 1002 OUT_BATCH(GEN8_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1003 OUT_BATCH(0); 1004 OUT_BATCH(limit); 1005 OUT_BATCH(offset); 1006 return false; 1007} 1008 1009static void 1010gen8_emit_vertex_elements(struct sna *sna, 1011 const struct sna_composite_op *op) 1012{ 1013 /* 1014 * vertex data in vertex buffer 1015 * position: (x, y) 1016 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1017 * texture coordinate 1 if (has_mask is true): same as above 1018 */ 1019 struct gen8_render_state *render = &sna->render_state.gen8; 1020 uint32_t src_format, dw; 1021 int id = GEN8_VERTEX(op->u.gen8.flags); 1022 bool has_mask; 1023 1024 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1025 1026 if (render->ve_id == id) 1027 return; 1028 render->ve_id = id; 1029 1030 /* The VUE layout 1031 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1032 * dword 4-7: position (x, y, 1.0, 1.0), 1033 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1034 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1035 * 1036 * dword 4-15 are fetched from vertex buffer 1037 */ 1038 has_mask = (id >> 2) != 0; 1039 OUT_BATCH(GEN8_3DSTATE_VERTEX_ELEMENTS | 1040 ((2 * (3 + has_mask)) + 1 - 2)); 1041 1042 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1043 SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1044 0 << VE_OFFSET_SHIFT); 1045 OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1046 COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1047 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1048 COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1049 1050 /* x,y */ 1051 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1052 SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1053 0 << VE_OFFSET_SHIFT); 1054 OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1055 COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1056 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1057 COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1058 1059 /* u0, v0, w0 */ 1060 DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1061 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1062 switch (id & 3) { 1063 default: 1064 assert(0); 1065 case 0: 1066 src_format = SURFACEFORMAT_R16G16_SSCALED; 1067 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1068 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1069 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1070 break; 1071 case 1: 1072 src_format = SURFACEFORMAT_R32_FLOAT; 1073 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1074 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1075 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1076 break; 1077 case 2: 1078 src_format = SURFACEFORMAT_R32G32_FLOAT; 1079 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1080 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1081 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1082 break; 1083 case 3: 1084 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1085 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1086 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1087 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1088 break; 1089 } 1090 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1091 src_format << VE_FORMAT_SHIFT | 1092 4 << VE_OFFSET_SHIFT); 1093 OUT_BATCH(dw); 1094 1095 /* u1, v1, w1 */ 1096 if (has_mask) { 1097 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1098 DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1099 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1100 switch (id >> 2) { 1101 case 1: 1102 src_format = SURFACEFORMAT_R32_FLOAT; 1103 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1104 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1105 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1106 break; 1107 default: 1108 assert(0); 1109 case 2: 1110 src_format = SURFACEFORMAT_R32G32_FLOAT; 1111 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1112 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1113 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1114 break; 1115 case 3: 1116 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1117 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1118 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1119 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1120 break; 1121 } 1122 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1123 src_format << VE_FORMAT_SHIFT | 1124 offset << VE_OFFSET_SHIFT); 1125 OUT_BATCH(dw); 1126 } 1127} 1128 1129inline static void 1130gen8_emit_pipe_invalidate(struct sna *sna) 1131{ 1132 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1133 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1134 PIPE_CONTROL_TC_FLUSH | 1135 PIPE_CONTROL_CS_STALL); 1136 OUT_BATCH64(0); 1137 OUT_BATCH64(0); 1138} 1139 1140inline static void 1141gen8_emit_pipe_flush(struct sna *sna, bool need_stall) 1142{ 1143 unsigned stall; 1144 1145 stall = 0; 1146 if (need_stall) 1147 stall = (PIPE_CONTROL_CS_STALL | 1148 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1149 1150 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1151 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1152 OUT_BATCH64(0); 1153 OUT_BATCH64(0); 1154} 1155 1156inline static void 1157gen8_emit_pipe_stall(struct sna *sna) 1158{ 1159 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1160 OUT_BATCH(PIPE_CONTROL_CS_STALL | 1161 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1162 OUT_BATCH64(0); 1163 OUT_BATCH64(0); 1164} 1165 1166static void 1167gen8_emit_state(struct sna *sna, 1168 const struct sna_composite_op *op, 1169 uint16_t wm_binding_table) 1170{ 1171 bool need_invalidate; 1172 bool need_flush; 1173 bool need_stall; 1174 1175 assert(op->dst.bo->exec); 1176 1177 need_flush = wm_binding_table & 1 || 1178 (sna->render_state.gen8.emit_flush && GEN8_READS_DST(op->u.gen8.flags)); 1179 if (ALWAYS_FLUSH) 1180 need_flush = true; 1181 1182 wm_binding_table &= ~1; 1183 1184 need_stall = sna->render_state.gen8.surface_table != wm_binding_table; 1185 1186 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1187 if (ALWAYS_INVALIDATE) 1188 need_invalidate = true; 1189 1190 need_stall &= gen8_emit_drawing_rectangle(sna, op); 1191 if (ALWAYS_STALL) 1192 need_stall = true; 1193 1194 if (need_invalidate) { 1195 gen8_emit_pipe_invalidate(sna); 1196 kgem_clear_dirty(&sna->kgem); 1197 assert(op->dst.bo->exec); 1198 kgem_bo_mark_dirty(op->dst.bo); 1199 1200 need_flush = false; 1201 need_stall = false; 1202 } 1203 if (need_flush) { 1204 gen8_emit_pipe_flush(sna, need_stall); 1205 need_stall = false; 1206 } 1207 if (need_stall) 1208 gen8_emit_pipe_stall(sna); 1209 1210 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1211 gen8_emit_sampler(sna, GEN8_SAMPLER(op->u.gen8.flags)); 1212 gen8_emit_sf(sna, GEN8_VERTEX(op->u.gen8.flags) >> 2); 1213 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1214 gen8_emit_vertex_elements(sna, op); 1215 gen8_emit_binding_table(sna, wm_binding_table); 1216 1217 sna->render_state.gen8.emit_flush = GEN8_READS_DST(op->u.gen8.flags); 1218} 1219 1220static bool gen8_magic_ca_pass(struct sna *sna, 1221 const struct sna_composite_op *op) 1222{ 1223 struct gen8_render_state *state = &sna->render_state.gen8; 1224 1225 if (!op->need_magic_ca_pass) 1226 return false; 1227 1228 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1229 sna->render.vertex_start, sna->render.vertex_index)); 1230 1231 gen8_emit_pipe_stall(sna); 1232 1233 gen8_emit_cc(sna, 1234 GEN8_BLEND(gen8_get_blend(PictOpAdd, true, 1235 op->dst.format))); 1236 gen8_emit_wm(sna, 1237 gen8_choose_composite_kernel(PictOpAdd, 1238 true, true, 1239 op->is_affine)); 1240 1241 OUT_BATCH(GEN8_3DPRIMITIVE | (7- 2)); 1242 OUT_BATCH(RECTLIST); /* ignored, see VF_TOPOLOGY */ 1243 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1244 OUT_BATCH(sna->render.vertex_start); 1245 OUT_BATCH(1); /* single instance */ 1246 OUT_BATCH(0); /* start instance location */ 1247 OUT_BATCH(0); /* index buffer offset, ignored */ 1248 1249 state->last_primitive = sna->kgem.nbatch; 1250 return true; 1251} 1252 1253static void null_create(struct sna_static_stream *stream) 1254{ 1255 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1256 sna_static_stream_map(stream, 64, 64); 1257} 1258 1259static void 1260sampler_state_init(struct gen8_sampler_state *sampler_state, 1261 sampler_filter_t filter, 1262 sampler_extend_t extend) 1263{ 1264 COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1265 1266 sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1267 sampler_state->ss0.default_color_mode = 1; 1268 1269 switch (filter) { 1270 default: 1271 case SAMPLER_FILTER_NEAREST: 1272 sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1273 sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1274 break; 1275 case SAMPLER_FILTER_BILINEAR: 1276 sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1277 sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1278 break; 1279 } 1280 1281 /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1282 1283 switch (extend) { 1284 default: 1285 case SAMPLER_EXTEND_NONE: 1286 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1287 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1288 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1289 break; 1290 case SAMPLER_EXTEND_REPEAT: 1291 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1292 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1293 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1294 break; 1295 case SAMPLER_EXTEND_PAD: 1296 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1297 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1298 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1299 break; 1300 case SAMPLER_EXTEND_REFLECT: 1301 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1302 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1303 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1304 break; 1305 } 1306} 1307 1308static void 1309sampler_copy_init(struct gen8_sampler_state *ss) 1310{ 1311 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1312 ss->ss3.non_normalized_coord = 1; 1313 1314 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1315} 1316 1317static void 1318sampler_fill_init(struct gen8_sampler_state *ss) 1319{ 1320 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1321 ss->ss3.non_normalized_coord = 1; 1322 1323 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1324} 1325 1326static uint32_t 1327gen8_tiling_bits(uint32_t tiling) 1328{ 1329 switch (tiling) { 1330 default: assert(0); 1331 case I915_TILING_NONE: return 0; 1332 case I915_TILING_X: return SURFACE_TILED; 1333 case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1334 } 1335} 1336 1337#define MOCS_WT (2 << 5) 1338#define MOCS_WB (3 << 5) 1339#define MOCS_eLLC_ONLY (0 << 3) 1340#define MOCS_LLC_ONLY (1 << 3) 1341#define MOCS_eLLC_LLC (2 << 3) 1342#define MOCS_ALL_CACHES (3 << 3) 1343 1344/** 1345 * Sets up the common fields for a surface state buffer for the given 1346 * picture in the given surface state buffer. 1347 */ 1348static uint32_t 1349gen8_bind_bo(struct sna *sna, 1350 struct kgem_bo *bo, 1351 uint32_t width, 1352 uint32_t height, 1353 uint32_t format, 1354 bool is_dst) 1355{ 1356 uint32_t *ss; 1357 uint32_t domains; 1358 int offset; 1359 uint32_t is_scanout = is_dst && bo->scanout; 1360 1361 /* After the first bind, we manage the cache domains within the batch */ 1362 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1363 if (offset) { 1364 if (is_dst) 1365 kgem_bo_mark_dirty(bo); 1366 assert(offset >= sna->kgem.surface); 1367 return offset * sizeof(uint32_t); 1368 } 1369 1370 offset = sna->kgem.surface -= SURFACE_DW; 1371 ss = sna->kgem.batch + offset; 1372 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1373 gen8_tiling_bits(bo->tiling) | 1374 format << SURFACE_FORMAT_SHIFT | 1375 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1376 if (is_dst) { 1377 ss[0] |= SURFACE_RC_READ_WRITE; 1378 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1379 } else 1380 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1381 ss[1] = (is_dst && is_uncached(sna, bo)) ? 0 : is_scanout ? (MOCS_WT | MOCS_ALL_CACHES) << 24 : (MOCS_WB | MOCS_ALL_CACHES) << 24; 1382 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1383 (height - 1) << SURFACE_HEIGHT_SHIFT); 1384 ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1385 ss[4] = 0; 1386 ss[5] = 0; 1387 ss[6] = 0; 1388 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1389 *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1390 ss[10] = 0; 1391 ss[11] = 0; 1392 ss[12] = 0; 1393 ss[13] = 0; 1394 ss[14] = 0; 1395 ss[15] = 0; 1396 1397 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1398 1399 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1400 offset, bo->handle, ss[1], 1401 format, width, height, bo->pitch, bo->tiling, 1402 domains & 0xffff ? "render" : "sampler")); 1403 1404 return offset * sizeof(uint32_t); 1405} 1406 1407static void gen8_emit_vertex_buffer(struct sna *sna, 1408 const struct sna_composite_op *op) 1409{ 1410 int id = GEN8_VERTEX(op->u.gen8.flags); 1411 1412 OUT_BATCH(GEN8_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1413 OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1414 4*op->floats_per_vertex); 1415 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1416 OUT_BATCH64(0); 1417 OUT_BATCH(~0); /* buffer size: disabled */ 1418 1419 OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); 1420 OUT_BATCH(id); 1421 OUT_BATCH(0); 1422 1423 sna->render.vb_id |= 1 << id; 1424} 1425 1426static void gen8_emit_primitive(struct sna *sna) 1427{ 1428 if (sna->kgem.nbatch == sna->render_state.gen8.last_primitive) { 1429 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1430 return; 1431 } 1432 1433 OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1434 OUT_BATCH(RECTLIST); /* ignored, see VF_TOPOLOGY */ 1435 sna->render.vertex_offset = sna->kgem.nbatch; 1436 OUT_BATCH(0); /* vertex count, to be filled in later */ 1437 OUT_BATCH(sna->render.vertex_index); 1438 OUT_BATCH(1); /* single instance */ 1439 OUT_BATCH(0); /* start instance location */ 1440 OUT_BATCH(0); /* index buffer offset, ignored */ 1441 sna->render.vertex_start = sna->render.vertex_index; 1442 1443 sna->render_state.gen8.last_primitive = sna->kgem.nbatch; 1444} 1445 1446static bool gen8_rectangle_begin(struct sna *sna, 1447 const struct sna_composite_op *op) 1448{ 1449 int id = 1 << GEN8_VERTEX(op->u.gen8.flags); 1450 int ndwords; 1451 1452 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1453 return true; 1454 1455 ndwords = op->need_magic_ca_pass ? 60 : 6; 1456 if ((sna->render.vb_id & id) == 0) 1457 ndwords += 5; 1458 if (!kgem_check_batch(&sna->kgem, ndwords)) 1459 return false; 1460 1461 if ((sna->render.vb_id & id) == 0) 1462 gen8_emit_vertex_buffer(sna, op); 1463 1464 gen8_emit_primitive(sna); 1465 return true; 1466} 1467 1468static int gen8_get_rectangles__flush(struct sna *sna, 1469 const struct sna_composite_op *op) 1470{ 1471 /* Preventing discarding new vbo after lock contention */ 1472 if (sna_vertex_wait__locked(&sna->render)) { 1473 int rem = vertex_space(sna); 1474 if (rem > op->floats_per_rect) 1475 return rem; 1476 } 1477 1478 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1479 return 0; 1480 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1481 return 0; 1482 1483 if (sna->render.vertex_offset) { 1484 gen8_vertex_flush(sna); 1485 if (gen8_magic_ca_pass(sna, op)) { 1486 gen8_emit_pipe_invalidate(sna); 1487 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1488 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1489 } 1490 } 1491 1492 return gen8_vertex_finish(sna); 1493} 1494 1495inline static int gen8_get_rectangles(struct sna *sna, 1496 const struct sna_composite_op *op, 1497 int want, 1498 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1499{ 1500 int rem; 1501 1502 assert(want); 1503 1504start: 1505 rem = vertex_space(sna); 1506 if (unlikely(rem < op->floats_per_rect)) { 1507 DBG(("flushing vbo for %s: %d < %d\n", 1508 __FUNCTION__, rem, op->floats_per_rect)); 1509 rem = gen8_get_rectangles__flush(sna, op); 1510 if (unlikely(rem == 0)) 1511 goto flush; 1512 } 1513 1514 if (unlikely(sna->render.vertex_offset == 0)) { 1515 if (!gen8_rectangle_begin(sna, op)) 1516 goto flush; 1517 else 1518 goto start; 1519 } 1520 1521 assert(rem <= vertex_space(sna)); 1522 assert(op->floats_per_rect <= rem); 1523 if (want > 1 && want * op->floats_per_rect > rem) 1524 want = rem / op->floats_per_rect; 1525 1526 assert(want > 0); 1527 sna->render.vertex_index += 3*want; 1528 return want; 1529 1530flush: 1531 if (sna->render.vertex_offset) { 1532 gen8_vertex_flush(sna); 1533 gen8_magic_ca_pass(sna, op); 1534 } 1535 sna_vertex_wait__locked(&sna->render); 1536 _kgem_submit(&sna->kgem); 1537 emit_state(sna, op); 1538 goto start; 1539} 1540 1541inline static uint32_t *gen8_composite_get_binding_table(struct sna *sna, 1542 uint16_t *offset) 1543{ 1544 uint32_t *table; 1545 1546 assert(sna->kgem.surface <= 16384); 1547 sna->kgem.surface -= SURFACE_DW; 1548 /* Clear all surplus entries to zero in case of prefetch */ 1549 table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1550 1551 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1552 1553 *offset = sna->kgem.surface; 1554 return table; 1555} 1556 1557static void 1558gen8_get_batch(struct sna *sna, const struct sna_composite_op *op) 1559{ 1560 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1561 1562 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1563 DBG(("%s: flushing batch: %d < %d+%d\n", 1564 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1565 150, 4*8*2)); 1566 _kgem_submit(&sna->kgem); 1567 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1568 } 1569 1570 assert(sna->kgem.mode == KGEM_RENDER); 1571 assert(sna->kgem.ring == KGEM_RENDER); 1572 1573 if (sna->render_state.gen8.needs_invariant) 1574 gen8_emit_invariant(sna); 1575} 1576 1577static void gen8_emit_composite_state(struct sna *sna, 1578 const struct sna_composite_op *op) 1579{ 1580 uint32_t *binding_table; 1581 uint16_t offset, dirty; 1582 1583 gen8_get_batch(sna, op); 1584 1585 binding_table = gen8_composite_get_binding_table(sna, &offset); 1586 1587 dirty = kgem_bo_is_dirty(op->dst.bo); 1588 1589 binding_table[0] = 1590 gen8_bind_bo(sna, 1591 op->dst.bo, op->dst.width, op->dst.height, 1592 gen8_get_dest_format(op->dst.format), 1593 true); 1594 binding_table[1] = 1595 gen8_bind_bo(sna, 1596 op->src.bo, op->src.width, op->src.height, 1597 op->src.card_format, 1598 false); 1599 if (op->mask.bo) { 1600 binding_table[2] = 1601 gen8_bind_bo(sna, 1602 op->mask.bo, 1603 op->mask.width, 1604 op->mask.height, 1605 op->mask.card_format, 1606 false); 1607 } 1608 1609 if (sna->kgem.surface == offset && 1610 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table && 1611 (op->mask.bo == NULL || 1612 sna->kgem.batch[sna->render_state.gen8.surface_table+2] == binding_table[2])) { 1613 sna->kgem.surface += SURFACE_DW; 1614 offset = sna->render_state.gen8.surface_table; 1615 } 1616 1617 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 1618 dirty = 0; 1619 1620 gen8_emit_state(sna, op, offset | dirty); 1621} 1622 1623static void 1624gen8_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1625{ 1626 if (op->floats_per_vertex != sna->render_state.gen8.floats_per_vertex) { 1627 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1628 sna->render_state.gen8.floats_per_vertex, op->floats_per_vertex)); 1629 gen8_vertex_align(sna, op); 1630 sna->render_state.gen8.floats_per_vertex = op->floats_per_vertex; 1631 } 1632} 1633 1634fastcall static void 1635gen8_render_composite_blt(struct sna *sna, 1636 const struct sna_composite_op *op, 1637 const struct sna_composite_rectangles *r) 1638{ 1639 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1640 op->prim_emit(sna, op, r); 1641} 1642 1643fastcall static void 1644gen8_render_composite_box(struct sna *sna, 1645 const struct sna_composite_op *op, 1646 const BoxRec *box) 1647{ 1648 struct sna_composite_rectangles r; 1649 1650 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1651 1652 DBG((" %s: (%d, %d), (%d, %d)\n", 1653 __FUNCTION__, 1654 box->x1, box->y1, box->x2, box->y2)); 1655 1656 r.dst.x = box->x1; 1657 r.dst.y = box->y1; 1658 r.width = box->x2 - box->x1; 1659 r.height = box->y2 - box->y1; 1660 r.src = r.mask = r.dst; 1661 1662 op->prim_emit(sna, op, &r); 1663} 1664 1665static void 1666gen8_render_composite_boxes__blt(struct sna *sna, 1667 const struct sna_composite_op *op, 1668 const BoxRec *box, int nbox) 1669{ 1670 DBG(("composite_boxes(%d)\n", nbox)); 1671 1672 do { 1673 int nbox_this_time; 1674 1675 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1676 gen8_emit_composite_state); 1677 nbox -= nbox_this_time; 1678 1679 do { 1680 struct sna_composite_rectangles r; 1681 1682 DBG((" %s: (%d, %d), (%d, %d)\n", 1683 __FUNCTION__, 1684 box->x1, box->y1, box->x2, box->y2)); 1685 1686 r.dst.x = box->x1; 1687 r.dst.y = box->y1; 1688 r.width = box->x2 - box->x1; 1689 r.height = box->y2 - box->y1; 1690 r.src = r.mask = r.dst; 1691 1692 op->prim_emit(sna, op, &r); 1693 box++; 1694 } while (--nbox_this_time); 1695 } while (nbox); 1696} 1697 1698static void 1699gen8_render_composite_boxes(struct sna *sna, 1700 const struct sna_composite_op *op, 1701 const BoxRec *box, int nbox) 1702{ 1703 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1704 1705 do { 1706 int nbox_this_time; 1707 float *v; 1708 1709 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1710 gen8_emit_composite_state); 1711 assert(nbox_this_time); 1712 nbox -= nbox_this_time; 1713 1714 v = sna->render.vertices + sna->render.vertex_used; 1715 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1716 1717 op->emit_boxes(op, box, nbox_this_time, v); 1718 box += nbox_this_time; 1719 } while (nbox); 1720} 1721 1722static void 1723gen8_render_composite_boxes__thread(struct sna *sna, 1724 const struct sna_composite_op *op, 1725 const BoxRec *box, int nbox) 1726{ 1727 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1728 1729 sna_vertex_lock(&sna->render); 1730 do { 1731 int nbox_this_time; 1732 float *v; 1733 1734 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1735 gen8_emit_composite_state); 1736 assert(nbox_this_time); 1737 nbox -= nbox_this_time; 1738 1739 v = sna->render.vertices + sna->render.vertex_used; 1740 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1741 1742 sna_vertex_acquire__locked(&sna->render); 1743 sna_vertex_unlock(&sna->render); 1744 1745 op->emit_boxes(op, box, nbox_this_time, v); 1746 box += nbox_this_time; 1747 1748 sna_vertex_lock(&sna->render); 1749 sna_vertex_release__locked(&sna->render); 1750 } while (nbox); 1751 sna_vertex_unlock(&sna->render); 1752} 1753 1754static uint32_t 1755gen8_create_blend_state(struct sna_static_stream *stream) 1756{ 1757 char *base, *ptr; 1758 int src, dst; 1759 1760 COMPILE_TIME_ASSERT(((GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1761 1762 base = sna_static_stream_map(stream, 1763 GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT * GEN8_BLEND_STATE_PADDED_SIZE, 1764 64); 1765 1766 ptr = base; 1767 for (src = 0; src < GEN8_BLENDFACTOR_COUNT; src++) { 1768 for (dst = 0; dst < GEN8_BLENDFACTOR_COUNT; dst++) { 1769 struct gen8_blend_state *blend = 1770 (struct gen8_blend_state *)ptr; 1771 1772 assert(((ptr - base) & 63) == 0); 1773 COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1774 COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1775 1776 blend->rt.post_blend_clamp = 1; 1777 blend->rt.pre_blend_clamp = 1; 1778 1779 blend->rt.color_blend = 1780 !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1781 blend->rt.dest_blend_factor = dst; 1782 blend->rt.source_blend_factor = src; 1783 blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1784 1785 blend->rt.dest_alpha_blend_factor = dst; 1786 blend->rt.source_alpha_blend_factor = src; 1787 blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1788 1789 ptr += GEN8_BLEND_STATE_PADDED_SIZE; 1790 } 1791 } 1792 1793 return sna_static_stream_offsetof(stream, base); 1794} 1795 1796static int 1797gen8_composite_picture(struct sna *sna, 1798 PicturePtr picture, 1799 struct sna_composite_channel *channel, 1800 int x, int y, 1801 int w, int h, 1802 int dst_x, int dst_y, 1803 bool precise) 1804{ 1805 PixmapPtr pixmap; 1806 uint32_t color; 1807 int16_t dx, dy; 1808 1809 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1810 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1811 1812 channel->is_solid = false; 1813 channel->card_format = -1; 1814 1815 if (sna_picture_is_solid(picture, &color)) 1816 return gen4_channel_init_solid(sna, channel, color); 1817 1818 if (picture->pDrawable == NULL) { 1819 int ret; 1820 1821 if (picture->pSourcePict->type == SourcePictTypeLinear) 1822 return gen4_channel_init_linear(sna, picture, channel, 1823 x, y, 1824 w, h, 1825 dst_x, dst_y); 1826 1827 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1828 ret = -1; 1829 if (!precise) 1830 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1831 x, y, w, h, dst_x, dst_y); 1832 if (ret == -1) 1833 ret = sna_render_picture_fixup(sna, picture, channel, 1834 x, y, w, h, dst_x, dst_y); 1835 return ret; 1836 } 1837 1838 if (picture->alphaMap) { 1839 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1840 return sna_render_picture_fixup(sna, picture, channel, 1841 x, y, w, h, dst_x, dst_y); 1842 } 1843 1844 if (!gen8_check_repeat(picture)) 1845 return sna_render_picture_fixup(sna, picture, channel, 1846 x, y, w, h, dst_x, dst_y); 1847 1848 if (!gen8_check_filter(picture)) 1849 return sna_render_picture_fixup(sna, picture, channel, 1850 x, y, w, h, dst_x, dst_y); 1851 1852 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1853 channel->filter = picture->filter; 1854 1855 pixmap = get_drawable_pixmap(picture->pDrawable); 1856 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1857 1858 x += dx + picture->pDrawable->x; 1859 y += dy + picture->pDrawable->y; 1860 1861 channel->is_affine = sna_transform_is_affine(picture->transform); 1862 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1863 DBG(("%s: integer translation (%d, %d), removing\n", 1864 __FUNCTION__, dx, dy)); 1865 x += dx; 1866 y += dy; 1867 channel->transform = NULL; 1868 channel->filter = PictFilterNearest; 1869 1870 if (channel->repeat || 1871 (x >= 0 && 1872 y >= 0 && 1873 x + w < pixmap->drawable.width && 1874 y + h < pixmap->drawable.height)) { 1875 struct sna_pixmap *priv = sna_pixmap(pixmap); 1876 if (priv && priv->clear) { 1877 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1878 return gen4_channel_init_solid(sna, channel, priv->clear_color); 1879 } 1880 } 1881 } else 1882 channel->transform = picture->transform; 1883 1884 channel->pict_format = picture->format; 1885 channel->card_format = gen8_get_card_format(picture->format); 1886 if (channel->card_format == (unsigned)-1) 1887 return sna_render_picture_convert(sna, picture, channel, pixmap, 1888 x, y, w, h, dst_x, dst_y, 1889 false); 1890 1891 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1892 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1893 pixmap->drawable.width, pixmap->drawable.height)); 1894 return sna_render_picture_extract(sna, picture, channel, 1895 x, y, w, h, dst_x, dst_y); 1896 } 1897 1898 return sna_render_pixmap_bo(sna, channel, pixmap, 1899 x, y, w, h, dst_x, dst_y); 1900} 1901 1902inline static bool gen8_composite_channel_convert(struct sna_composite_channel *channel) 1903{ 1904 if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 1905 return false; 1906 1907 channel->repeat = gen8_repeat(channel->repeat); 1908 channel->filter = gen8_filter(channel->filter); 1909 if (channel->card_format == (unsigned)-1) 1910 channel->card_format = gen8_get_card_format(channel->pict_format); 1911 assert(channel->card_format != (unsigned)-1); 1912 1913 return true; 1914} 1915 1916static void gen8_render_composite_done(struct sna *sna, 1917 const struct sna_composite_op *op) 1918{ 1919 if (sna->render.vertex_offset) { 1920 gen8_vertex_flush(sna); 1921 gen8_magic_ca_pass(sna, op); 1922 } 1923 1924 if (op->mask.bo) 1925 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1926 if (op->src.bo) 1927 kgem_bo_destroy(&sna->kgem, op->src.bo); 1928 1929 sna_render_composite_redirect_done(sna, op); 1930} 1931 1932inline static bool 1933gen8_composite_set_target(struct sna *sna, 1934 struct sna_composite_op *op, 1935 PicturePtr dst, 1936 int x, int y, int w, int h, 1937 bool partial) 1938{ 1939 BoxRec box; 1940 unsigned int hint; 1941 1942 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1943 1944 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1945 op->dst.format = dst->format; 1946 op->dst.width = op->dst.pixmap->drawable.width; 1947 op->dst.height = op->dst.pixmap->drawable.height; 1948 1949 if (w | h) { 1950 assert(w && h); 1951 box.x1 = x; 1952 box.y1 = y; 1953 box.x2 = x + w; 1954 box.y2 = y + h; 1955 } else 1956 sna_render_picture_extents(dst, &box); 1957 1958 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1959 if (!partial) { 1960 hint |= IGNORE_DAMAGE; 1961 if (w == op->dst.width && h == op->dst.height) 1962 hint |= REPLACES; 1963 } 1964 1965 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1966 if (op->dst.bo == NULL) 1967 return false; 1968 1969 if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 1970 return false; 1971 1972 if (hint & REPLACES) { 1973 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1974 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1975 } 1976 1977 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1978 &op->dst.x, &op->dst.y); 1979 1980 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1981 __FUNCTION__, 1982 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1983 op->dst.width, op->dst.height, 1984 op->dst.bo->pitch, 1985 op->dst.x, op->dst.y, 1986 op->damage ? *op->damage : (void *)-1)); 1987 1988 assert(op->dst.bo->proxy == NULL); 1989 1990 if (too_large(op->dst.width, op->dst.height) && 1991 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1992 return false; 1993 1994 return true; 1995} 1996 1997static bool 1998try_blt(struct sna *sna, 1999 PicturePtr dst, PicturePtr src, 2000 int width, int height) 2001{ 2002 struct kgem_bo *bo; 2003 2004 if (sna->kgem.mode == KGEM_BLT) { 2005 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2006 return true; 2007 } 2008 2009 if (too_large(width, height)) { 2010 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2011 __FUNCTION__, width, height)); 2012 return true; 2013 } 2014 2015 bo = __sna_drawable_peek_bo(dst->pDrawable); 2016 if (bo == NULL) 2017 return true; 2018 if (bo->rq) 2019 return RQ_IS_BLT(bo->rq); 2020 2021 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2022 return true; 2023 2024 if (src->pDrawable) { 2025 bo = __sna_drawable_peek_bo(src->pDrawable); 2026 if (bo == NULL) 2027 return true; 2028 2029 if (prefer_blt_bo(sna, bo)) 2030 return RQ_IS_BLT(bo->rq); 2031 } 2032 2033 if (sna->kgem.ring == KGEM_BLT) { 2034 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2035 return true; 2036 } 2037 2038 return false; 2039} 2040 2041static bool 2042check_gradient(PicturePtr picture, bool precise) 2043{ 2044 if (picture->pDrawable) 2045 return false; 2046 2047 switch (picture->pSourcePict->type) { 2048 case SourcePictTypeSolidFill: 2049 case SourcePictTypeLinear: 2050 return false; 2051 default: 2052 return precise; 2053 } 2054} 2055 2056static bool 2057has_alphamap(PicturePtr p) 2058{ 2059 return p->alphaMap != NULL; 2060} 2061 2062static bool 2063need_upload(PicturePtr p) 2064{ 2065 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2066} 2067 2068static bool 2069source_is_busy(PixmapPtr pixmap) 2070{ 2071 struct sna_pixmap *priv = sna_pixmap(pixmap); 2072 if (priv == NULL || priv->clear) 2073 return false; 2074 2075 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2076 return true; 2077 2078 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2079 return true; 2080 2081 return priv->gpu_damage && !priv->cpu_damage; 2082} 2083 2084static bool 2085source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2086{ 2087 if (sna_picture_is_solid(p, NULL)) 2088 return false; 2089 2090 if (p->pSourcePict) 2091 return check_gradient(p, precise); 2092 2093 if (!gen8_check_repeat(p) || !gen8_check_format(p->format)) 2094 return true; 2095 2096 if (pixmap && source_is_busy(pixmap)) 2097 return false; 2098 2099 return has_alphamap(p) || !gen8_check_filter(p) || need_upload(p); 2100} 2101 2102static bool 2103gen8_composite_fallback(struct sna *sna, 2104 PicturePtr src, 2105 PicturePtr mask, 2106 PicturePtr dst) 2107{ 2108 PixmapPtr src_pixmap; 2109 PixmapPtr mask_pixmap; 2110 PixmapPtr dst_pixmap; 2111 bool src_fallback, mask_fallback; 2112 2113 if (!gen8_check_dst_format(dst->format)) { 2114 DBG(("%s: unknown destination format: %d\n", 2115 __FUNCTION__, dst->format)); 2116 return true; 2117 } 2118 2119 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2120 2121 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2122 src_fallback = source_fallback(src, src_pixmap, 2123 dst->polyMode == PolyModePrecise); 2124 2125 if (mask) { 2126 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2127 mask_fallback = source_fallback(mask, mask_pixmap, 2128 dst->polyMode == PolyModePrecise); 2129 } else { 2130 mask_pixmap = NULL; 2131 mask_fallback = false; 2132 } 2133 2134 /* If we are using the destination as a source and need to 2135 * readback in order to upload the source, do it all 2136 * on the cpu. 2137 */ 2138 if (src_pixmap == dst_pixmap && src_fallback) { 2139 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2140 return true; 2141 } 2142 if (mask_pixmap == dst_pixmap && mask_fallback) { 2143 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2144 return true; 2145 } 2146 2147 /* If anything is on the GPU, push everything out to the GPU */ 2148 if (dst_use_gpu(dst_pixmap)) { 2149 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2150 __FUNCTION__)); 2151 return false; 2152 } 2153 2154 if (src_pixmap && !src_fallback) { 2155 DBG(("%s: src is already on the GPU, try to use GPU\n", 2156 __FUNCTION__)); 2157 return false; 2158 } 2159 if (mask_pixmap && !mask_fallback) { 2160 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2161 __FUNCTION__)); 2162 return false; 2163 } 2164 2165 /* However if the dst is not on the GPU and we need to 2166 * render one of the sources using the CPU, we may 2167 * as well do the entire operation in place onthe CPU. 2168 */ 2169 if (src_fallback) { 2170 DBG(("%s: dst is on the CPU and src will fallback\n", 2171 __FUNCTION__)); 2172 return true; 2173 } 2174 2175 if (mask && mask_fallback) { 2176 DBG(("%s: dst is on the CPU and mask will fallback\n", 2177 __FUNCTION__)); 2178 return true; 2179 } 2180 2181 if (too_large(dst_pixmap->drawable.width, 2182 dst_pixmap->drawable.height) && 2183 dst_is_cpu(dst_pixmap)) { 2184 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2185 return true; 2186 } 2187 2188 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2189 __FUNCTION__)); 2190 return dst_use_cpu(dst_pixmap); 2191} 2192 2193static int 2194reuse_source(struct sna *sna, 2195 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2196 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2197{ 2198 uint32_t color; 2199 2200 if (src_x != msk_x || src_y != msk_y) 2201 return false; 2202 2203 if (src == mask) { 2204 DBG(("%s: mask is source\n", __FUNCTION__)); 2205 *mc = *sc; 2206 mc->bo = kgem_bo_reference(mc->bo); 2207 return true; 2208 } 2209 2210 if (sna_picture_is_solid(mask, &color)) 2211 return gen4_channel_init_solid(sna, mc, color); 2212 2213 if (sc->is_solid) 2214 return false; 2215 2216 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2217 return false; 2218 2219 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2220 2221 if (!sna_transform_equal(src->transform, mask->transform)) 2222 return false; 2223 2224 if (!sna_picture_alphamap_equal(src, mask)) 2225 return false; 2226 2227 if (!gen8_check_repeat(mask)) 2228 return false; 2229 2230 if (!gen8_check_filter(mask)) 2231 return false; 2232 2233 if (!gen8_check_format(mask->format)) 2234 return false; 2235 2236 DBG(("%s: reusing source channel for mask with a twist\n", 2237 __FUNCTION__)); 2238 2239 *mc = *sc; 2240 mc->repeat = gen8_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2241 mc->filter = gen8_filter(mask->filter); 2242 mc->pict_format = mask->format; 2243 mc->card_format = gen8_get_card_format(mask->format); 2244 mc->bo = kgem_bo_reference(mc->bo); 2245 return true; 2246} 2247 2248static bool 2249gen8_render_composite(struct sna *sna, 2250 uint8_t op, 2251 PicturePtr src, 2252 PicturePtr mask, 2253 PicturePtr dst, 2254 int16_t src_x, int16_t src_y, 2255 int16_t msk_x, int16_t msk_y, 2256 int16_t dst_x, int16_t dst_y, 2257 int16_t width, int16_t height, 2258 unsigned flags, 2259 struct sna_composite_op *tmp) 2260{ 2261 if (op >= ARRAY_SIZE(gen8_blend_op)) 2262 return false; 2263 2264 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2265 width, height, sna->kgem.mode, sna->kgem.ring)); 2266 2267 if (mask == NULL && 2268 try_blt(sna, dst, src, width, height) && 2269 sna_blt_composite(sna, op, 2270 src, dst, 2271 src_x, src_y, 2272 dst_x, dst_y, 2273 width, height, 2274 flags, tmp)) 2275 return true; 2276 2277 if (gen8_composite_fallback(sna, src, mask, dst)) 2278 goto fallback; 2279 2280 if (need_tiling(sna, width, height)) 2281 return sna_tiling_composite(op, src, mask, dst, 2282 src_x, src_y, 2283 msk_x, msk_y, 2284 dst_x, dst_y, 2285 width, height, 2286 tmp); 2287 2288 if (op == PictOpClear && src == sna->clear) 2289 op = PictOpSrc; 2290 tmp->op = op; 2291 if (!gen8_composite_set_target(sna, tmp, dst, 2292 dst_x, dst_y, width, height, 2293 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2294 goto fallback; 2295 2296 switch (gen8_composite_picture(sna, src, &tmp->src, 2297 src_x, src_y, 2298 width, height, 2299 dst_x, dst_y, 2300 dst->polyMode == PolyModePrecise)) { 2301 case -1: 2302 goto cleanup_dst; 2303 case 0: 2304 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2305 goto cleanup_dst; 2306 /* fall through to fixup */ 2307 case 1: 2308 /* Did we just switch rings to prepare the source? */ 2309 if (mask == NULL && 2310 (prefer_blt_composite(sna, tmp) || 2311 unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2312 sna_blt_composite__convert(sna, 2313 dst_x, dst_y, width, height, 2314 tmp)) 2315 return true; 2316 2317 if (!gen8_composite_channel_convert(&tmp->src)) 2318 goto cleanup_src; 2319 2320 break; 2321 } 2322 2323 tmp->is_affine = tmp->src.is_affine; 2324 tmp->has_component_alpha = false; 2325 tmp->need_magic_ca_pass = false; 2326 2327 tmp->mask.bo = NULL; 2328 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2329 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2330 2331 if (mask) { 2332 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2333 tmp->has_component_alpha = true; 2334 2335 /* Check if it's component alpha that relies on a source alpha and on 2336 * the source value. We can only get one of those into the single 2337 * source value that we get to blend with. 2338 */ 2339 if (gen8_blend_op[op].src_alpha && 2340 (gen8_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2341 if (op != PictOpOver) 2342 goto cleanup_src; 2343 2344 tmp->need_magic_ca_pass = true; 2345 tmp->op = PictOpOutReverse; 2346 } 2347 } 2348 2349 if (!reuse_source(sna, 2350 src, &tmp->src, src_x, src_y, 2351 mask, &tmp->mask, msk_x, msk_y)) { 2352 switch (gen8_composite_picture(sna, mask, &tmp->mask, 2353 msk_x, msk_y, 2354 width, height, 2355 dst_x, dst_y, 2356 dst->polyMode == PolyModePrecise)) { 2357 case -1: 2358 goto cleanup_src; 2359 case 0: 2360 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2361 goto cleanup_src; 2362 /* fall through to fixup */ 2363 case 1: 2364 if (!gen8_composite_channel_convert(&tmp->mask)) 2365 goto cleanup_mask; 2366 break; 2367 } 2368 } 2369 2370 tmp->is_affine &= tmp->mask.is_affine; 2371 } 2372 2373 tmp->u.gen8.flags = 2374 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2375 tmp->src.repeat, 2376 tmp->mask.filter, 2377 tmp->mask.repeat), 2378 gen8_get_blend(tmp->op, 2379 tmp->has_component_alpha, 2380 tmp->dst.format), 2381 gen8_choose_composite_kernel(tmp->op, 2382 tmp->mask.bo != NULL, 2383 tmp->has_component_alpha, 2384 tmp->is_affine), 2385 gen4_choose_composite_emitter(sna, tmp)); 2386 2387 tmp->blt = gen8_render_composite_blt; 2388 tmp->box = gen8_render_composite_box; 2389 tmp->boxes = gen8_render_composite_boxes__blt; 2390 if (tmp->emit_boxes){ 2391 tmp->boxes = gen8_render_composite_boxes; 2392 tmp->thread_boxes = gen8_render_composite_boxes__thread; 2393 } 2394 tmp->done = gen8_render_composite_done; 2395 2396 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2397 if (!kgem_check_bo(&sna->kgem, 2398 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2399 NULL)) { 2400 kgem_submit(&sna->kgem); 2401 if (!kgem_check_bo(&sna->kgem, 2402 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2403 NULL)) 2404 goto cleanup_mask; 2405 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2406 } 2407 2408 gen8_align_vertex(sna, tmp); 2409 gen8_emit_composite_state(sna, tmp); 2410 return true; 2411 2412cleanup_mask: 2413 if (tmp->mask.bo) { 2414 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2415 tmp->mask.bo = NULL; 2416 } 2417cleanup_src: 2418 if (tmp->src.bo) { 2419 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2420 tmp->src.bo = NULL; 2421 } 2422cleanup_dst: 2423 if (tmp->redirect.real_bo) { 2424 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2425 tmp->redirect.real_bo = NULL; 2426 } 2427fallback: 2428 return (mask == NULL && 2429 sna_blt_composite(sna, op, 2430 src, dst, 2431 src_x, src_y, 2432 dst_x, dst_y, 2433 width, height, 2434 flags | COMPOSITE_FALLBACK, tmp)); 2435} 2436 2437#if !NO_COMPOSITE_SPANS 2438fastcall static void 2439gen8_render_composite_spans_box(struct sna *sna, 2440 const struct sna_composite_spans_op *op, 2441 const BoxRec *box, float opacity) 2442{ 2443 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2444 __FUNCTION__, 2445 op->base.src.offset[0], op->base.src.offset[1], 2446 opacity, 2447 op->base.dst.x, op->base.dst.y, 2448 box->x1, box->y1, 2449 box->x2 - box->x1, 2450 box->y2 - box->y1)); 2451 2452 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_composite_state); 2453 op->prim_emit(sna, op, box, opacity); 2454} 2455 2456static void 2457gen8_render_composite_spans_boxes(struct sna *sna, 2458 const struct sna_composite_spans_op *op, 2459 const BoxRec *box, int nbox, 2460 float opacity) 2461{ 2462 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2463 __FUNCTION__, nbox, 2464 op->base.src.offset[0], op->base.src.offset[1], 2465 opacity, 2466 op->base.dst.x, op->base.dst.y)); 2467 2468 do { 2469 int nbox_this_time; 2470 2471 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2472 gen8_emit_composite_state); 2473 nbox -= nbox_this_time; 2474 2475 do { 2476 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2477 box->x1, box->y1, 2478 box->x2 - box->x1, 2479 box->y2 - box->y1)); 2480 2481 op->prim_emit(sna, op, box++, opacity); 2482 } while (--nbox_this_time); 2483 } while (nbox); 2484} 2485 2486fastcall static void 2487gen8_render_composite_spans_boxes__thread(struct sna *sna, 2488 const struct sna_composite_spans_op *op, 2489 const struct sna_opacity_box *box, 2490 int nbox) 2491{ 2492 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2493 __FUNCTION__, nbox, 2494 op->base.src.offset[0], op->base.src.offset[1], 2495 op->base.dst.x, op->base.dst.y)); 2496 2497 sna_vertex_lock(&sna->render); 2498 do { 2499 int nbox_this_time; 2500 float *v; 2501 2502 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2503 gen8_emit_composite_state); 2504 assert(nbox_this_time); 2505 nbox -= nbox_this_time; 2506 2507 v = sna->render.vertices + sna->render.vertex_used; 2508 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2509 2510 sna_vertex_acquire__locked(&sna->render); 2511 sna_vertex_unlock(&sna->render); 2512 2513 op->emit_boxes(op, box, nbox_this_time, v); 2514 box += nbox_this_time; 2515 2516 sna_vertex_lock(&sna->render); 2517 sna_vertex_release__locked(&sna->render); 2518 } while (nbox); 2519 sna_vertex_unlock(&sna->render); 2520} 2521 2522fastcall static void 2523gen8_render_composite_spans_done(struct sna *sna, 2524 const struct sna_composite_spans_op *op) 2525{ 2526 if (sna->render.vertex_offset) 2527 gen8_vertex_flush(sna); 2528 2529 DBG(("%s()\n", __FUNCTION__)); 2530 2531 if (op->base.src.bo) 2532 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2533 2534 sna_render_composite_redirect_done(sna, &op->base); 2535} 2536 2537static bool 2538gen8_check_composite_spans(struct sna *sna, 2539 uint8_t op, PicturePtr src, PicturePtr dst, 2540 int16_t width, int16_t height, unsigned flags) 2541{ 2542 if (op >= ARRAY_SIZE(gen8_blend_op)) 2543 return false; 2544 2545 if (gen8_composite_fallback(sna, src, NULL, dst)) 2546 return false; 2547 2548 if (need_tiling(sna, width, height) && 2549 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2550 DBG(("%s: fallback, tiled operation not on GPU\n", 2551 __FUNCTION__)); 2552 return false; 2553 } 2554 2555 return true; 2556} 2557 2558static bool 2559gen8_render_composite_spans(struct sna *sna, 2560 uint8_t op, 2561 PicturePtr src, 2562 PicturePtr dst, 2563 int16_t src_x, int16_t src_y, 2564 int16_t dst_x, int16_t dst_y, 2565 int16_t width, int16_t height, 2566 unsigned flags, 2567 struct sna_composite_spans_op *tmp) 2568{ 2569 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2570 width, height, flags, sna->kgem.ring)); 2571 2572 assert(gen8_check_composite_spans(sna, op, src, dst, width, height, flags)); 2573 2574 if (need_tiling(sna, width, height)) { 2575 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2576 __FUNCTION__, width, height)); 2577 return sna_tiling_composite_spans(op, src, dst, 2578 src_x, src_y, dst_x, dst_y, 2579 width, height, flags, tmp); 2580 } 2581 2582 tmp->base.op = op; 2583 if (!gen8_composite_set_target(sna, &tmp->base, dst, 2584 dst_x, dst_y, width, height, true)) 2585 return false; 2586 2587 switch (gen8_composite_picture(sna, src, &tmp->base.src, 2588 src_x, src_y, 2589 width, height, 2590 dst_x, dst_y, 2591 dst->polyMode == PolyModePrecise)) { 2592 case -1: 2593 goto cleanup_dst; 2594 case 0: 2595 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2596 goto cleanup_dst; 2597 /* fall through to fixup */ 2598 case 1: 2599 if (!gen8_composite_channel_convert(&tmp->base.src)) 2600 goto cleanup_src; 2601 break; 2602 } 2603 tmp->base.mask.bo = NULL; 2604 2605 tmp->base.is_affine = tmp->base.src.is_affine; 2606 tmp->base.need_magic_ca_pass = false; 2607 2608 tmp->base.u.gen8.flags = 2609 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2610 tmp->base.src.repeat, 2611 SAMPLER_FILTER_NEAREST, 2612 SAMPLER_EXTEND_PAD), 2613 gen8_get_blend(tmp->base.op, false, tmp->base.dst.format), 2614 GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2615 gen4_choose_spans_emitter(sna, tmp)); 2616 2617 tmp->box = gen8_render_composite_spans_box; 2618 tmp->boxes = gen8_render_composite_spans_boxes; 2619 if (tmp->emit_boxes) 2620 tmp->thread_boxes = gen8_render_composite_spans_boxes__thread; 2621 tmp->done = gen8_render_composite_spans_done; 2622 2623 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2624 if (!kgem_check_bo(&sna->kgem, 2625 tmp->base.dst.bo, tmp->base.src.bo, 2626 NULL)) { 2627 kgem_submit(&sna->kgem); 2628 if (!kgem_check_bo(&sna->kgem, 2629 tmp->base.dst.bo, tmp->base.src.bo, 2630 NULL)) 2631 goto cleanup_src; 2632 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2633 } 2634 2635 gen8_align_vertex(sna, &tmp->base); 2636 gen8_emit_composite_state(sna, &tmp->base); 2637 return true; 2638 2639cleanup_src: 2640 if (tmp->base.src.bo) 2641 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2642cleanup_dst: 2643 if (tmp->base.redirect.real_bo) 2644 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2645 return false; 2646} 2647#endif 2648 2649static void 2650gen8_emit_copy_state(struct sna *sna, 2651 const struct sna_composite_op *op) 2652{ 2653 uint32_t *binding_table; 2654 uint16_t offset, dirty; 2655 2656 gen8_get_batch(sna, op); 2657 2658 binding_table = gen8_composite_get_binding_table(sna, &offset); 2659 2660 dirty = kgem_bo_is_dirty(op->dst.bo); 2661 2662 binding_table[0] = 2663 gen8_bind_bo(sna, 2664 op->dst.bo, op->dst.width, op->dst.height, 2665 gen8_get_dest_format(op->dst.format), 2666 true); 2667 binding_table[1] = 2668 gen8_bind_bo(sna, 2669 op->src.bo, op->src.width, op->src.height, 2670 op->src.card_format, 2671 false); 2672 2673 if (sna->kgem.surface == offset && 2674 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 2675 sna->kgem.surface += SURFACE_DW; 2676 offset = sna->render_state.gen8.surface_table; 2677 } 2678 2679 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 2680 dirty = 0; 2681 2682 assert(!GEN8_READS_DST(op->u.gen8.flags)); 2683 gen8_emit_state(sna, op, offset | dirty); 2684} 2685 2686static inline bool 2687prefer_blt_copy(struct sna *sna, 2688 struct kgem_bo *src_bo, 2689 struct kgem_bo *dst_bo, 2690 unsigned flags) 2691{ 2692 if (sna->kgem.mode == KGEM_BLT) 2693 return true; 2694 2695 assert((flags & COPY_SYNC) == 0); 2696 2697 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2698 return true; 2699 2700 if (untiled_tlb_miss(src_bo) || 2701 untiled_tlb_miss(dst_bo)) 2702 return true; 2703 2704 if (force_blt_ring(sna)) 2705 return true; 2706 2707 if (kgem_bo_is_render(dst_bo) || 2708 kgem_bo_is_render(src_bo)) 2709 return false; 2710 2711 if (prefer_render_ring(sna, dst_bo)) 2712 return false; 2713 2714 if (!prefer_blt_ring(sna, dst_bo, flags)) 2715 return false; 2716 2717 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2718} 2719 2720static bool 2721gen8_render_copy_boxes(struct sna *sna, uint8_t alu, 2722 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2723 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2724 const BoxRec *box, int n, unsigned flags) 2725{ 2726 struct sna_composite_op tmp; 2727 BoxRec extents; 2728 2729 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2730 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2731 src_bo == dst_bo, 2732 overlaps(sna, 2733 src_bo, src_dx, src_dy, 2734 dst_bo, dst_dx, dst_dy, 2735 box, n, flags, &extents))); 2736 2737 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2738 sna_blt_compare_depth(src, dst) && 2739 sna_blt_copy_boxes(sna, alu, 2740 src_bo, src_dx, src_dy, 2741 dst_bo, dst_dx, dst_dy, 2742 dst->bitsPerPixel, 2743 box, n)) 2744 return true; 2745 2746 if (!(alu == GXcopy || alu == GXclear) || 2747 unaligned(src_bo, src->bitsPerPixel) || 2748 unaligned(dst_bo, dst->bitsPerPixel)) { 2749fallback_blt: 2750 DBG(("%s: fallback blt\n", __FUNCTION__)); 2751 if (!sna_blt_compare_depth(src, dst)) 2752 return false; 2753 2754 return sna_blt_copy_boxes_fallback(sna, alu, 2755 src, src_bo, src_dx, src_dy, 2756 dst, dst_bo, dst_dx, dst_dy, 2757 box, n); 2758 } 2759 2760 if (overlaps(sna, 2761 src_bo, src_dx, src_dy, 2762 dst_bo, dst_dx, dst_dy, 2763 box, n, flags, 2764 &extents)) { 2765 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2766 2767 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2768 sna_blt_copy_boxes(sna, alu, 2769 src_bo, src_dx, src_dy, 2770 dst_bo, dst_dx, dst_dy, 2771 dst->bitsPerPixel, 2772 box, n)) 2773 return true; 2774 2775 if (big) 2776 goto fallback_blt; 2777 2778 assert(src_bo == dst_bo); 2779 assert(src->depth == dst->depth); 2780 assert(src->width == dst->width); 2781 assert(src->height == dst->height); 2782 return sna_render_copy_boxes__overlap(sna, alu, 2783 src, src_bo, 2784 src_dx, src_dy, 2785 dst_dx, dst_dy, 2786 box, n, &extents); 2787 } 2788 2789 if (dst->depth == src->depth) { 2790 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2791 tmp.src.pict_format = tmp.dst.format; 2792 } else { 2793 tmp.dst.format = sna_format_for_depth(dst->depth); 2794 tmp.src.pict_format = sna_format_for_depth(src->depth); 2795 } 2796 if (!gen8_check_format(tmp.src.pict_format)) 2797 goto fallback_blt; 2798 2799 tmp.dst.pixmap = (PixmapPtr)dst; 2800 tmp.dst.width = dst->width; 2801 tmp.dst.height = dst->height; 2802 tmp.dst.bo = dst_bo; 2803 tmp.dst.x = tmp.dst.y = 0; 2804 tmp.damage = NULL; 2805 2806 sna_render_composite_redirect_init(&tmp); 2807 if (too_large(tmp.dst.width, tmp.dst.height)) { 2808 int i; 2809 2810 extents = box[0]; 2811 for (i = 1; i < n; i++) { 2812 if (box[i].x1 < extents.x1) 2813 extents.x1 = box[i].x1; 2814 if (box[i].y1 < extents.y1) 2815 extents.y1 = box[i].y1; 2816 2817 if (box[i].x2 > extents.x2) 2818 extents.x2 = box[i].x2; 2819 if (box[i].y2 > extents.y2) 2820 extents.y2 = box[i].y2; 2821 } 2822 2823 if (!sna_render_composite_redirect(sna, &tmp, 2824 extents.x1 + dst_dx, 2825 extents.y1 + dst_dy, 2826 extents.x2 - extents.x1, 2827 extents.y2 - extents.y1, 2828 n > 1)) 2829 goto fallback_tiled; 2830 } 2831 2832 tmp.src.card_format = gen8_get_card_format(tmp.src.pict_format); 2833 if (too_large(src->width, src->height)) { 2834 int i; 2835 2836 extents = box[0]; 2837 for (i = 1; i < n; i++) { 2838 if (box[i].x1 < extents.x1) 2839 extents.x1 = box[i].x1; 2840 if (box[i].y1 < extents.y1) 2841 extents.y1 = box[i].y1; 2842 2843 if (box[i].x2 > extents.x2) 2844 extents.x2 = box[i].x2; 2845 if (box[i].y2 > extents.y2) 2846 extents.y2 = box[i].y2; 2847 } 2848 2849 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2850 extents.x1 + src_dx, 2851 extents.y1 + src_dy, 2852 extents.x2 - extents.x1, 2853 extents.y2 - extents.y1)) 2854 goto fallback_tiled_dst; 2855 } else { 2856 tmp.src.bo = src_bo; 2857 tmp.src.width = src->width; 2858 tmp.src.height = src->height; 2859 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2860 } 2861 2862 tmp.mask.bo = NULL; 2863 2864 tmp.floats_per_vertex = 2; 2865 tmp.floats_per_rect = 6; 2866 tmp.need_magic_ca_pass = 0; 2867 2868 tmp.u.gen8.flags = COPY_FLAGS(alu); 2869 2870 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2871 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2872 kgem_submit(&sna->kgem); 2873 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2874 if (tmp.src.bo != src_bo) 2875 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2876 if (tmp.redirect.real_bo) 2877 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2878 goto fallback_blt; 2879 } 2880 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2881 } 2882 2883 src_dx += tmp.src.offset[0]; 2884 src_dy += tmp.src.offset[1]; 2885 2886 dst_dx += tmp.dst.x; 2887 dst_dy += tmp.dst.y; 2888 2889 tmp.dst.x = tmp.dst.y = 0; 2890 2891 gen8_align_vertex(sna, &tmp); 2892 gen8_emit_copy_state(sna, &tmp); 2893 2894 do { 2895 int16_t *v; 2896 int n_this_time; 2897 2898 n_this_time = gen8_get_rectangles(sna, &tmp, n, 2899 gen8_emit_copy_state); 2900 n -= n_this_time; 2901 2902 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2903 sna->render.vertex_used += 6 * n_this_time; 2904 assert(sna->render.vertex_used <= sna->render.vertex_size); 2905 do { 2906 2907 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2908 box->x1 + src_dx, box->y1 + src_dy, 2909 box->x1 + dst_dx, box->y1 + dst_dy, 2910 box->x2 - box->x1, box->y2 - box->y1)); 2911 v[0] = box->x2 + dst_dx; 2912 v[2] = box->x2 + src_dx; 2913 v[1] = v[5] = box->y2 + dst_dy; 2914 v[3] = v[7] = box->y2 + src_dy; 2915 v[8] = v[4] = box->x1 + dst_dx; 2916 v[10] = v[6] = box->x1 + src_dx; 2917 v[9] = box->y1 + dst_dy; 2918 v[11] = box->y1 + src_dy; 2919 v += 12; box++; 2920 } while (--n_this_time); 2921 } while (n); 2922 2923 gen8_vertex_flush(sna); 2924 sna_render_composite_redirect_done(sna, &tmp); 2925 if (tmp.src.bo != src_bo) 2926 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2927 return true; 2928 2929fallback_tiled_dst: 2930 if (tmp.redirect.real_bo) 2931 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2932fallback_tiled: 2933 DBG(("%s: fallback tiled\n", __FUNCTION__)); 2934 if (sna_blt_compare_depth(src, dst) && 2935 sna_blt_copy_boxes(sna, alu, 2936 src_bo, src_dx, src_dy, 2937 dst_bo, dst_dx, dst_dy, 2938 dst->bitsPerPixel, 2939 box, n)) 2940 return true; 2941 2942 return sna_tiling_copy_boxes(sna, alu, 2943 src, src_bo, src_dx, src_dy, 2944 dst, dst_bo, dst_dx, dst_dy, 2945 box, n); 2946} 2947 2948static void 2949gen8_render_copy_blt(struct sna *sna, 2950 const struct sna_copy_op *op, 2951 int16_t sx, int16_t sy, 2952 int16_t w, int16_t h, 2953 int16_t dx, int16_t dy) 2954{ 2955 int16_t *v; 2956 2957 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_copy_state); 2958 2959 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2960 sna->render.vertex_used += 6; 2961 assert(sna->render.vertex_used <= sna->render.vertex_size); 2962 2963 v[0] = dx+w; v[1] = dy+h; 2964 v[2] = sx+w; v[3] = sy+h; 2965 v[4] = dx; v[5] = dy+h; 2966 v[6] = sx; v[7] = sy+h; 2967 v[8] = dx; v[9] = dy; 2968 v[10] = sx; v[11] = sy; 2969} 2970 2971static void 2972gen8_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2973{ 2974 if (sna->render.vertex_offset) 2975 gen8_vertex_flush(sna); 2976} 2977 2978static bool 2979gen8_render_copy(struct sna *sna, uint8_t alu, 2980 PixmapPtr src, struct kgem_bo *src_bo, 2981 PixmapPtr dst, struct kgem_bo *dst_bo, 2982 struct sna_copy_op *op) 2983{ 2984 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2985 __FUNCTION__, alu, 2986 src->drawable.width, src->drawable.height, 2987 dst->drawable.width, dst->drawable.height)); 2988 2989 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2990 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2991 sna_blt_copy(sna, alu, 2992 src_bo, dst_bo, 2993 dst->drawable.bitsPerPixel, 2994 op)) 2995 return true; 2996 2997 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2998 too_large(src->drawable.width, src->drawable.height) || 2999 too_large(dst->drawable.width, dst->drawable.height) || 3000 unaligned(src_bo, src->drawable.bitsPerPixel) || 3001 unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3002fallback: 3003 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3004 return false; 3005 3006 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3007 dst->drawable.bitsPerPixel, 3008 op); 3009 } 3010 3011 if (dst->drawable.depth == src->drawable.depth) { 3012 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3013 op->base.src.pict_format = op->base.dst.format; 3014 } else { 3015 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3016 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3017 } 3018 if (!gen8_check_format(op->base.src.pict_format)) 3019 goto fallback; 3020 3021 op->base.dst.pixmap = dst; 3022 op->base.dst.width = dst->drawable.width; 3023 op->base.dst.height = dst->drawable.height; 3024 op->base.dst.bo = dst_bo; 3025 3026 op->base.src.bo = src_bo; 3027 op->base.src.card_format = 3028 gen8_get_card_format(op->base.src.pict_format); 3029 op->base.src.width = src->drawable.width; 3030 op->base.src.height = src->drawable.height; 3031 3032 op->base.mask.bo = NULL; 3033 3034 op->base.floats_per_vertex = 2; 3035 op->base.floats_per_rect = 6; 3036 3037 op->base.u.gen8.flags = COPY_FLAGS(alu); 3038 3039 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3040 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3041 kgem_submit(&sna->kgem); 3042 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3043 goto fallback; 3044 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3045 } 3046 3047 gen8_align_vertex(sna, &op->base); 3048 gen8_emit_copy_state(sna, &op->base); 3049 3050 op->blt = gen8_render_copy_blt; 3051 op->done = gen8_render_copy_done; 3052 return true; 3053} 3054 3055static void 3056gen8_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3057{ 3058 uint32_t *binding_table; 3059 uint16_t offset, dirty; 3060 3061 /* XXX Render Target Fast Clear 3062 * Set RTFC Enable in PS and render a rectangle. 3063 * Limited to a clearing the full MSC surface only with a 3064 * specific kernel. 3065 */ 3066 3067 gen8_get_batch(sna, op); 3068 3069 binding_table = gen8_composite_get_binding_table(sna, &offset); 3070 3071 dirty = kgem_bo_is_dirty(op->dst.bo); 3072 3073 binding_table[0] = 3074 gen8_bind_bo(sna, 3075 op->dst.bo, op->dst.width, op->dst.height, 3076 gen8_get_dest_format(op->dst.format), 3077 true); 3078 binding_table[1] = 3079 gen8_bind_bo(sna, 3080 op->src.bo, 1, 1, 3081 SURFACEFORMAT_B8G8R8A8_UNORM, 3082 false); 3083 3084 if (sna->kgem.surface == offset && 3085 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 3086 sna->kgem.surface += SURFACE_DW; 3087 offset = sna->render_state.gen8.surface_table; 3088 } 3089 3090 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 3091 dirty = 0; 3092 3093 gen8_emit_state(sna, op, offset | dirty); 3094} 3095 3096static bool 3097gen8_render_fill_boxes(struct sna *sna, 3098 CARD8 op, 3099 PictFormat format, 3100 const xRenderColor *color, 3101 const DrawableRec *dst, struct kgem_bo *dst_bo, 3102 const BoxRec *box, int n) 3103{ 3104 struct sna_composite_op tmp; 3105 uint32_t pixel; 3106 3107 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3108 __FUNCTION__, op, 3109 color->red, color->green, color->blue, color->alpha, (int)format)); 3110 3111 if (op >= ARRAY_SIZE(gen8_blend_op)) { 3112 DBG(("%s: fallback due to unhandled blend op: %d\n", 3113 __FUNCTION__, op)); 3114 return false; 3115 } 3116 3117 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3118 !gen8_check_dst_format(format) || 3119 unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3120 uint8_t alu = GXinvalid; 3121 3122 if (op <= PictOpSrc) { 3123 pixel = 0; 3124 if (op == PictOpClear) 3125 alu = GXclear; 3126 else if (sna_get_pixel_from_rgba(&pixel, 3127 color->red, 3128 color->green, 3129 color->blue, 3130 color->alpha, 3131 format)) 3132 alu = GXcopy; 3133 } 3134 3135 if (alu != GXinvalid && 3136 sna_blt_fill_boxes(sna, alu, 3137 dst_bo, dst->bitsPerPixel, 3138 pixel, box, n)) 3139 return true; 3140 3141 if (!gen8_check_dst_format(format)) 3142 return false; 3143 } 3144 3145 if (op == PictOpClear) { 3146 pixel = 0; 3147 op = PictOpSrc; 3148 } else if (!sna_get_pixel_from_rgba(&pixel, 3149 color->red, 3150 color->green, 3151 color->blue, 3152 color->alpha, 3153 PICT_a8r8g8b8)) 3154 return false; 3155 3156 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3157 __FUNCTION__, pixel, n, 3158 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3159 3160 tmp.dst.pixmap = (PixmapPtr)dst; 3161 tmp.dst.width = dst->width; 3162 tmp.dst.height = dst->height; 3163 tmp.dst.format = format; 3164 tmp.dst.bo = dst_bo; 3165 tmp.dst.x = tmp.dst.y = 0; 3166 tmp.damage = NULL; 3167 3168 sna_render_composite_redirect_init(&tmp); 3169 if (too_large(dst->width, dst->height)) { 3170 BoxRec extents; 3171 3172 boxes_extents(box, n, &extents); 3173 if (!sna_render_composite_redirect(sna, &tmp, 3174 extents.x1, extents.y1, 3175 extents.x2 - extents.x1, 3176 extents.y2 - extents.y1, 3177 n > 1)) 3178 return sna_tiling_fill_boxes(sna, op, format, color, 3179 dst, dst_bo, box, n); 3180 } 3181 3182 tmp.src.bo = sna_render_get_solid(sna, pixel); 3183 tmp.mask.bo = NULL; 3184 3185 tmp.floats_per_vertex = 2; 3186 tmp.floats_per_rect = 6; 3187 tmp.need_magic_ca_pass = false; 3188 3189 tmp.u.gen8.flags = FILL_FLAGS(op, format); 3190 3191 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3192 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3193 kgem_submit(&sna->kgem); 3194 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3195 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3196 tmp.src.bo = NULL; 3197 3198 if (tmp.redirect.real_bo) { 3199 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3200 tmp.redirect.real_bo = NULL; 3201 } 3202 3203 return false; 3204 } 3205 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3206 } 3207 3208 gen8_align_vertex(sna, &tmp); 3209 gen8_emit_fill_state(sna, &tmp); 3210 3211 do { 3212 int n_this_time; 3213 int16_t *v; 3214 3215 n_this_time = gen8_get_rectangles(sna, &tmp, n, 3216 gen8_emit_fill_state); 3217 n -= n_this_time; 3218 3219 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3220 sna->render.vertex_used += 6 * n_this_time; 3221 assert(sna->render.vertex_used <= sna->render.vertex_size); 3222 do { 3223 DBG((" (%d, %d), (%d, %d)\n", 3224 box->x1, box->y1, box->x2, box->y2)); 3225 3226 v[0] = box->x2; 3227 v[5] = v[1] = box->y2; 3228 v[8] = v[4] = box->x1; 3229 v[9] = box->y1; 3230 v[2] = v[3] = v[7] = 1; 3231 v[6] = v[10] = v[11] = 0; 3232 v += 12; box++; 3233 } while (--n_this_time); 3234 } while (n); 3235 3236 gen8_vertex_flush(sna); 3237 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3238 sna_render_composite_redirect_done(sna, &tmp); 3239 return true; 3240} 3241 3242static void 3243gen8_render_fill_op_blt(struct sna *sna, 3244 const struct sna_fill_op *op, 3245 int16_t x, int16_t y, int16_t w, int16_t h) 3246{ 3247 int16_t *v; 3248 3249 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3250 3251 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3252 3253 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3254 sna->render.vertex_used += 6; 3255 assert(sna->render.vertex_used <= sna->render.vertex_size); 3256 3257 v[0] = x+w; 3258 v[4] = v[8] = x; 3259 v[1] = v[5] = y+h; 3260 v[9] = y; 3261 3262 v[2] = v[3] = v[7] = 1; 3263 v[6] = v[10] = v[11] = 0; 3264} 3265 3266fastcall static void 3267gen8_render_fill_op_box(struct sna *sna, 3268 const struct sna_fill_op *op, 3269 const BoxRec *box) 3270{ 3271 int16_t *v; 3272 3273 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3274 box->x1, box->y1, box->x2, box->y2)); 3275 3276 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3277 3278 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3279 sna->render.vertex_used += 6; 3280 assert(sna->render.vertex_used <= sna->render.vertex_size); 3281 3282 v[0] = box->x2; 3283 v[8] = v[4] = box->x1; 3284 v[5] = v[1] = box->y2; 3285 v[9] = box->y1; 3286 3287 v[7] = v[2] = v[3] = 1; 3288 v[6] = v[10] = v[11] = 0; 3289} 3290 3291fastcall static void 3292gen8_render_fill_op_boxes(struct sna *sna, 3293 const struct sna_fill_op *op, 3294 const BoxRec *box, 3295 int nbox) 3296{ 3297 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3298 box->x1, box->y1, box->x2, box->y2, nbox)); 3299 3300 do { 3301 int nbox_this_time; 3302 int16_t *v; 3303 3304 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 3305 gen8_emit_fill_state); 3306 nbox -= nbox_this_time; 3307 3308 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3309 sna->render.vertex_used += 6 * nbox_this_time; 3310 assert(sna->render.vertex_used <= sna->render.vertex_size); 3311 3312 do { 3313 v[0] = box->x2; 3314 v[8] = v[4] = box->x1; 3315 v[5] = v[1] = box->y2; 3316 v[9] = box->y1; 3317 v[7] = v[2] = v[3] = 1; 3318 v[6] = v[10] = v[11] = 0; 3319 box++; v += 12; 3320 } while (--nbox_this_time); 3321 } while (nbox); 3322} 3323 3324static void 3325gen8_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3326{ 3327 if (sna->render.vertex_offset) 3328 gen8_vertex_flush(sna); 3329 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3330} 3331 3332static bool 3333gen8_render_fill(struct sna *sna, uint8_t alu, 3334 PixmapPtr dst, struct kgem_bo *dst_bo, 3335 uint32_t color, unsigned flags, 3336 struct sna_fill_op *op) 3337{ 3338 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3339 3340 if (prefer_blt_fill(sna, dst_bo, flags) && 3341 sna_blt_fill(sna, alu, 3342 dst_bo, dst->drawable.bitsPerPixel, 3343 color, 3344 op)) 3345 return true; 3346 3347 if (!(alu == GXcopy || alu == GXclear) || 3348 too_large(dst->drawable.width, dst->drawable.height) || 3349 unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3350 return sna_blt_fill(sna, alu, 3351 dst_bo, dst->drawable.bitsPerPixel, 3352 color, 3353 op); 3354 3355 if (alu == GXclear) 3356 color = 0; 3357 3358 op->base.dst.pixmap = dst; 3359 op->base.dst.width = dst->drawable.width; 3360 op->base.dst.height = dst->drawable.height; 3361 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3362 op->base.dst.bo = dst_bo; 3363 op->base.dst.x = op->base.dst.y = 0; 3364 3365 op->base.src.bo = 3366 sna_render_get_solid(sna, 3367 sna_rgba_for_color(color, 3368 dst->drawable.depth)); 3369 op->base.mask.bo = NULL; 3370 3371 op->base.need_magic_ca_pass = false; 3372 op->base.floats_per_vertex = 2; 3373 op->base.floats_per_rect = 6; 3374 3375 op->base.u.gen8.flags = FILL_FLAGS_NOBLEND; 3376 3377 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3378 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3379 kgem_submit(&sna->kgem); 3380 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3381 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3382 return false; 3383 } 3384 3385 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3386 } 3387 3388 gen8_align_vertex(sna, &op->base); 3389 gen8_emit_fill_state(sna, &op->base); 3390 3391 op->blt = gen8_render_fill_op_blt; 3392 op->box = gen8_render_fill_op_box; 3393 op->boxes = gen8_render_fill_op_boxes; 3394 op->points = NULL; 3395 op->done = gen8_render_fill_op_done; 3396 return true; 3397} 3398 3399static bool 3400gen8_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3401 uint32_t color, 3402 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3403 uint8_t alu) 3404{ 3405 BoxRec box; 3406 3407 box.x1 = x1; 3408 box.y1 = y1; 3409 box.x2 = x2; 3410 box.y2 = y2; 3411 3412 return sna_blt_fill_boxes(sna, alu, 3413 bo, dst->drawable.bitsPerPixel, 3414 color, &box, 1); 3415} 3416 3417static bool 3418gen8_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3419 uint32_t color, 3420 int16_t x1, int16_t y1, 3421 int16_t x2, int16_t y2, 3422 uint8_t alu) 3423{ 3424 struct sna_composite_op tmp; 3425 int16_t *v; 3426 3427 /* Prefer to use the BLT if already engaged */ 3428 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3429 gen8_render_fill_one_try_blt(sna, dst, bo, color, 3430 x1, y1, x2, y2, alu)) 3431 return true; 3432 3433 /* Must use the BLT if we can't RENDER... */ 3434 if (!(alu == GXcopy || alu == GXclear) || 3435 too_large(dst->drawable.width, dst->drawable.height) || 3436 unaligned(bo, dst->drawable.bitsPerPixel)) 3437 return gen8_render_fill_one_try_blt(sna, dst, bo, color, 3438 x1, y1, x2, y2, alu); 3439 3440 if (alu == GXclear) 3441 color = 0; 3442 3443 tmp.dst.pixmap = dst; 3444 tmp.dst.width = dst->drawable.width; 3445 tmp.dst.height = dst->drawable.height; 3446 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3447 tmp.dst.bo = bo; 3448 tmp.dst.x = tmp.dst.y = 0; 3449 3450 tmp.src.bo = 3451 sna_render_get_solid(sna, 3452 sna_rgba_for_color(color, 3453 dst->drawable.depth)); 3454 tmp.mask.bo = NULL; 3455 3456 tmp.floats_per_vertex = 2; 3457 tmp.floats_per_rect = 6; 3458 tmp.need_magic_ca_pass = false; 3459 3460 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3461 3462 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3463 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3464 kgem_submit(&sna->kgem); 3465 if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3466 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3467 return false; 3468 } 3469 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3470 } 3471 3472 gen8_align_vertex(sna, &tmp); 3473 gen8_emit_fill_state(sna, &tmp); 3474 3475 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3476 3477 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3478 3479 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3480 sna->render.vertex_used += 6; 3481 assert(sna->render.vertex_used <= sna->render.vertex_size); 3482 3483 v[0] = x2; 3484 v[8] = v[4] = x1; 3485 v[5] = v[1] = y2; 3486 v[9] = y1; 3487 v[7] = v[2] = v[3] = 1; 3488 v[6] = v[10] = v[11] = 0; 3489 3490 gen8_vertex_flush(sna); 3491 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3492 3493 return true; 3494} 3495 3496static bool 3497gen8_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3498{ 3499 BoxRec box; 3500 3501 box.x1 = 0; 3502 box.y1 = 0; 3503 box.x2 = dst->drawable.width; 3504 box.y2 = dst->drawable.height; 3505 3506 return sna_blt_fill_boxes(sna, GXclear, 3507 bo, dst->drawable.bitsPerPixel, 3508 0, &box, 1); 3509} 3510 3511static bool 3512gen8_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3513{ 3514 struct sna_composite_op tmp; 3515 int16_t *v; 3516 3517 DBG(("%s: %dx%d\n", 3518 __FUNCTION__, 3519 dst->drawable.width, 3520 dst->drawable.height)); 3521 3522 /* Prefer to use the BLT if already engaged */ 3523 if (sna->kgem.mode == KGEM_BLT && 3524 gen8_render_clear_try_blt(sna, dst, bo)) 3525 return true; 3526 3527 /* Must use the BLT if we can't RENDER... */ 3528 if (too_large(dst->drawable.width, dst->drawable.height) || 3529 unaligned(bo, dst->drawable.bitsPerPixel)) 3530 return gen8_render_clear_try_blt(sna, dst, bo); 3531 3532 tmp.dst.pixmap = dst; 3533 tmp.dst.width = dst->drawable.width; 3534 tmp.dst.height = dst->drawable.height; 3535 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3536 tmp.dst.bo = bo; 3537 tmp.dst.x = tmp.dst.y = 0; 3538 3539 tmp.src.bo = sna_render_get_solid(sna, 0); 3540 tmp.mask.bo = NULL; 3541 3542 tmp.floats_per_vertex = 2; 3543 tmp.floats_per_rect = 6; 3544 tmp.need_magic_ca_pass = false; 3545 3546 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3547 3548 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3549 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3550 kgem_submit(&sna->kgem); 3551 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3552 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3553 return false; 3554 } 3555 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3556 } 3557 3558 gen8_align_vertex(sna, &tmp); 3559 gen8_emit_fill_state(sna, &tmp); 3560 3561 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3562 3563 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3564 sna->render.vertex_used += 6; 3565 assert(sna->render.vertex_used <= sna->render.vertex_size); 3566 3567 v[0] = dst->drawable.width; 3568 v[5] = v[1] = dst->drawable.height; 3569 v[8] = v[4] = 0; 3570 v[9] = 0; 3571 3572 v[7] = v[2] = v[3] = 1; 3573 v[6] = v[10] = v[11] = 0; 3574 3575 gen8_vertex_flush(sna); 3576 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3577 3578 return true; 3579} 3580 3581#if !NO_VIDEO 3582static uint32_t gen8_bind_video_source(struct sna *sna, 3583 struct kgem_bo *bo, 3584 uint32_t delta, 3585 int width, 3586 int height, 3587 int pitch, 3588 uint32_t format) 3589{ 3590 uint32_t *ss; 3591 int offset; 3592 3593 offset = sna->kgem.surface -= SURFACE_DW; 3594 ss = sna->kgem.batch + offset; 3595 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3596 gen8_tiling_bits(bo->tiling) | 3597 format << SURFACE_FORMAT_SHIFT | 3598 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3599 ss[1] = 0; 3600 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3601 (height - 1) << SURFACE_HEIGHT_SHIFT); 3602 ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3603 ss[4] = 0; 3604 ss[5] = 0; 3605 ss[6] = 0; 3606 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3607 *(uint64_t *)(ss+8) = 3608 kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3609 I915_GEM_DOMAIN_SAMPLER << 16, 3610 delta); 3611 ss[10] = 0; 3612 ss[11] = 0; 3613 ss[12] = 0; 3614 ss[13] = 0; 3615 ss[14] = 0; 3616 ss[15] = 0; 3617 3618 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3619 offset, bo->handle, ss[1], 3620 format, width, height, bo->pitch, bo->tiling)); 3621 3622 return offset * sizeof(uint32_t); 3623} 3624 3625static void gen8_emit_video_state(struct sna *sna, 3626 const struct sna_composite_op *op) 3627{ 3628 struct sna_video_frame *frame = op->priv; 3629 uint32_t src_surf_format; 3630 uint32_t src_surf_base[6]; 3631 int src_width[6]; 3632 int src_height[6]; 3633 int src_pitch[6]; 3634 uint32_t *binding_table; 3635 uint16_t offset; 3636 int n_src, n; 3637 3638 /* XXX VeBox, bicubic */ 3639 3640 gen8_get_batch(sna, op); 3641 3642 src_surf_base[0] = 0; 3643 src_surf_base[1] = 0; 3644 src_surf_base[2] = frame->VBufOffset; 3645 src_surf_base[3] = frame->VBufOffset; 3646 src_surf_base[4] = frame->UBufOffset; 3647 src_surf_base[5] = frame->UBufOffset; 3648 3649 if (is_planar_fourcc(frame->id)) { 3650 src_surf_format = SURFACEFORMAT_R8_UNORM; 3651 src_width[1] = src_width[0] = frame->width; 3652 src_height[1] = src_height[0] = frame->height; 3653 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 3654 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 3655 frame->width / 2; 3656 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 3657 frame->height / 2; 3658 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 3659 frame->pitch[0]; 3660 n_src = 6; 3661 } else { 3662 if (frame->id == FOURCC_UYVY) 3663 src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; 3664 else 3665 src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; 3666 3667 src_width[0] = frame->width; 3668 src_height[0] = frame->height; 3669 src_pitch[0] = frame->pitch[0]; 3670 n_src = 1; 3671 } 3672 3673 binding_table = gen8_composite_get_binding_table(sna, &offset); 3674 3675 binding_table[0] = 3676 gen8_bind_bo(sna, 3677 op->dst.bo, op->dst.width, op->dst.height, 3678 gen8_get_dest_format(op->dst.format), 3679 true); 3680 for (n = 0; n < n_src; n++) { 3681 binding_table[1+n] = 3682 gen8_bind_video_source(sna, 3683 frame->bo, 3684 src_surf_base[n], 3685 src_width[n], 3686 src_height[n], 3687 src_pitch[n], 3688 src_surf_format); 3689 } 3690 3691 gen8_emit_state(sna, op, offset); 3692} 3693 3694static bool 3695gen8_render_video(struct sna *sna, 3696 struct sna_video *video, 3697 struct sna_video_frame *frame, 3698 RegionPtr dstRegion, 3699 PixmapPtr pixmap) 3700{ 3701 struct sna_composite_op tmp; 3702 struct sna_pixmap *priv = sna_pixmap(pixmap); 3703 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3704 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3705 int src_width = frame->src.x2 - frame->src.x1; 3706 int src_height = frame->src.y2 - frame->src.y1; 3707 float src_offset_x, src_offset_y; 3708 float src_scale_x, src_scale_y; 3709 int nbox, pix_xoff, pix_yoff; 3710 unsigned filter; 3711 const BoxRec *box; 3712 3713 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3714 __FUNCTION__, 3715 src_width, src_height, dst_width, dst_height, 3716 region_num_rects(dstRegion), 3717 REGION_EXTENTS(NULL, dstRegion)->x1, 3718 REGION_EXTENTS(NULL, dstRegion)->y1, 3719 REGION_EXTENTS(NULL, dstRegion)->x2, 3720 REGION_EXTENTS(NULL, dstRegion)->y2)); 3721 3722 assert(priv->gpu_bo); 3723 assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3724 assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3725 3726 memset(&tmp, 0, sizeof(tmp)); 3727 3728 tmp.dst.pixmap = pixmap; 3729 tmp.dst.width = pixmap->drawable.width; 3730 tmp.dst.height = pixmap->drawable.height; 3731 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3732 tmp.dst.bo = priv->gpu_bo; 3733 3734 tmp.src.bo = frame->bo; 3735 tmp.mask.bo = NULL; 3736 3737 tmp.floats_per_vertex = 3; 3738 tmp.floats_per_rect = 9; 3739 3740 if (src_width == dst_width && src_height == dst_height) 3741 filter = SAMPLER_FILTER_NEAREST; 3742 else 3743 filter = SAMPLER_FILTER_BILINEAR; 3744 3745 tmp.u.gen8.flags = 3746 GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3747 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3748 NO_BLEND, 3749 is_planar_fourcc(frame->id) ? 3750 GEN8_WM_KERNEL_VIDEO_PLANAR : 3751 GEN8_WM_KERNEL_VIDEO_PACKED, 3752 2); 3753 tmp.priv = frame; 3754 3755 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3756 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 3757 kgem_submit(&sna->kgem); 3758 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 3759 return false; 3760 3761 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3762 } 3763 3764 gen8_align_vertex(sna, &tmp); 3765 gen8_emit_video_state(sna, &tmp); 3766 3767 /* Set up the offset for translating from the given region (in screen 3768 * coordinates) to the backing pixmap. 3769 */ 3770#ifdef COMPOSITE 3771 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 3772 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 3773#else 3774 pix_xoff = 0; 3775 pix_yoff = 0; 3776#endif 3777 3778 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 3779 __FUNCTION__, 3780 frame->src.x1, frame->src.y1, 3781 src_width, src_height, 3782 dst_width, dst_height, 3783 frame->width, frame->height)); 3784 3785 src_scale_x = (float)src_width / dst_width / frame->width; 3786 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 3787 3788 src_scale_y = (float)src_height / dst_height / frame->height; 3789 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 3790 3791 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 3792 __FUNCTION__, 3793 src_scale_x, src_scale_y, 3794 src_offset_x, src_offset_y)); 3795 3796 box = region_rects(dstRegion); 3797 nbox = region_num_rects(dstRegion); 3798 while (nbox--) { 3799 BoxRec r; 3800 3801 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 3802 __FUNCTION__, 3803 box->x1, box->y1, 3804 box->x2, box->y2, 3805 pix_xoff, pix_yoff, 3806 box->x1 * src_scale_x + src_offset_x, 3807 box->y1 * src_scale_y + src_offset_y, 3808 box->x2 * src_scale_x + src_offset_x, 3809 box->y2 * src_scale_y + src_offset_y)); 3810 3811 r.x1 = box->x1 + pix_xoff; 3812 r.x2 = box->x2 + pix_xoff; 3813 r.y1 = box->y1 + pix_yoff; 3814 r.y2 = box->y2 + pix_yoff; 3815 3816 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); 3817 3818 OUT_VERTEX(r.x2, r.y2); 3819 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 3820 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3821 3822 OUT_VERTEX(r.x1, r.y2); 3823 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3824 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3825 3826 OUT_VERTEX(r.x1, r.y1); 3827 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3828 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 3829 3830 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 3831 sna_damage_add_box(&priv->gpu_damage, &r); 3832 sna_damage_subtract_box(&priv->cpu_damage, &r); 3833 } 3834 box++; 3835 } 3836 3837 gen8_vertex_flush(sna); 3838 return true; 3839} 3840#endif 3841 3842static void gen8_render_flush(struct sna *sna) 3843{ 3844 gen8_vertex_close(sna); 3845 3846 assert(sna->render.vb_id == 0); 3847 assert(sna->render.vertex_offset == 0); 3848} 3849 3850static void gen8_render_reset(struct sna *sna) 3851{ 3852 sna->render_state.gen8.emit_flush = false; 3853 sna->render_state.gen8.needs_invariant = true; 3854 sna->render_state.gen8.ve_id = 3 << 2; 3855 sna->render_state.gen8.last_primitive = -1; 3856 3857 sna->render_state.gen8.num_sf_outputs = 0; 3858 sna->render_state.gen8.samplers = -1; 3859 sna->render_state.gen8.blend = -1; 3860 sna->render_state.gen8.kernel = -1; 3861 sna->render_state.gen8.drawrect_offset = -1; 3862 sna->render_state.gen8.drawrect_limit = -1; 3863 sna->render_state.gen8.surface_table = 0; 3864 3865 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3866 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3867 discard_vbo(sna); 3868 } 3869 3870 sna->render.vertex_offset = 0; 3871 sna->render.nvertex_reloc = 0; 3872 sna->render.vb_id = 0; 3873} 3874 3875static void gen8_render_fini(struct sna *sna) 3876{ 3877 kgem_bo_destroy(&sna->kgem, sna->render_state.gen8.general_bo); 3878} 3879 3880static bool gen8_render_setup(struct sna *sna) 3881{ 3882 struct gen8_render_state *state = &sna->render_state.gen8; 3883 struct sna_static_stream general; 3884 struct gen8_sampler_state *ss; 3885 int i, j, k, l, m; 3886 uint32_t devid; 3887 3888 devid = intel_get_device_id(sna->scrn); 3889 if (devid & 0xf) 3890 state->gt = ((devid >> 4) & 0xf) + 1; 3891 DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 3892 3893 sna_static_stream_init(&general); 3894 3895 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3896 * dumps, you know it points to zero. 3897 */ 3898 null_create(&general); 3899 3900 for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 3901 if (wm_kernels[m].size) { 3902 state->wm_kernel[m][1] = 3903 sna_static_stream_add(&general, 3904 wm_kernels[m].data, 3905 wm_kernels[m].size, 3906 64); 3907 } else { 3908 if (USE_8_PIXEL_DISPATCH) { 3909 state->wm_kernel[m][0] = 3910 sna_static_stream_compile_wm(sna, &general, 3911 wm_kernels[m].data, 8); 3912 } 3913 3914 if (USE_16_PIXEL_DISPATCH) { 3915 state->wm_kernel[m][1] = 3916 sna_static_stream_compile_wm(sna, &general, 3917 wm_kernels[m].data, 16); 3918 } 3919 3920 if (USE_32_PIXEL_DISPATCH) { 3921 state->wm_kernel[m][2] = 3922 sna_static_stream_compile_wm(sna, &general, 3923 wm_kernels[m].data, 32); 3924 } 3925 } 3926 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3927 } 3928 3929 COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 3930 ss = sna_static_stream_map(&general, 3931 2 * sizeof(*ss) * 3932 (2 + 3933 FILTER_COUNT * EXTEND_COUNT * 3934 FILTER_COUNT * EXTEND_COUNT), 3935 32); 3936 state->wm_state = sna_static_stream_offsetof(&general, ss); 3937 sampler_copy_init(ss); ss += 2; 3938 sampler_fill_init(ss); ss += 2; 3939 for (i = 0; i < FILTER_COUNT; i++) { 3940 for (j = 0; j < EXTEND_COUNT; j++) { 3941 for (k = 0; k < FILTER_COUNT; k++) { 3942 for (l = 0; l < EXTEND_COUNT; l++) { 3943 sampler_state_init(ss++, i, j); 3944 sampler_state_init(ss++, k, l); 3945 } 3946 } 3947 } 3948 } 3949 3950 state->cc_blend = gen8_create_blend_state(&general); 3951 3952 state->general_bo = sna_static_stream_fini(sna, &general); 3953 return state->general_bo != NULL; 3954} 3955 3956const char *gen8_render_init(struct sna *sna, const char *backend) 3957{ 3958 if (!gen8_render_setup(sna)) 3959 return backend; 3960 3961 sna->kgem.context_switch = gen6_render_context_switch; 3962 sna->kgem.retire = gen6_render_retire; 3963 sna->kgem.expire = gen4_render_expire; 3964 3965#if !NO_COMPOSITE 3966 sna->render.composite = gen8_render_composite; 3967 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3968#endif 3969#if !NO_COMPOSITE_SPANS 3970 sna->render.check_composite_spans = gen8_check_composite_spans; 3971 sna->render.composite_spans = gen8_render_composite_spans; 3972 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3973#endif 3974#if !NO_VIDEO 3975 sna->render.video = gen8_render_video; 3976#endif 3977 3978#if !NO_COPY_BOXES 3979 sna->render.copy_boxes = gen8_render_copy_boxes; 3980#endif 3981#if !NO_COPY 3982 sna->render.copy = gen8_render_copy; 3983#endif 3984 3985#if !NO_FILL_BOXES 3986 sna->render.fill_boxes = gen8_render_fill_boxes; 3987#endif 3988#if !NO_FILL 3989 sna->render.fill = gen8_render_fill; 3990#endif 3991#if !NO_FILL_ONE 3992 sna->render.fill_one = gen8_render_fill_one; 3993#endif 3994#if !NO_FILL_CLEAR 3995 sna->render.clear = gen8_render_clear; 3996#endif 3997 3998 sna->render.flush = gen8_render_flush; 3999 sna->render.reset = gen8_render_reset; 4000 sna->render.fini = gen8_render_fini; 4001 4002 sna->render.max_3d_size = GEN8_MAX_SIZE; 4003 sna->render.max_3d_pitch = 1 << 18; 4004 return "Broadwell"; 4005} 4006