1/* 2 * Copyright © 2012,2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34#include "sna_render.h" 35#include "sna_render_inline.h" 36#include "sna_video.h" 37 38#include "gen8_render.h" 39#include "gen8_eu.h" 40#include "gen4_common.h" 41#include "gen4_source.h" 42#include "gen4_vertex.h" 43#include "gen6_common.h" 44#include "gen8_vertex.h" 45 46#define SIM 1 47 48#define ALWAYS_INVALIDATE 0 49#define ALWAYS_FLUSH 0 50#define ALWAYS_STALL 0 51 52#define NO_COMPOSITE 0 53#define NO_COMPOSITE_SPANS 0 54#define NO_COPY 0 55#define NO_COPY_BOXES 0 56#define NO_FILL 0 57#define NO_FILL_BOXES 0 58#define NO_FILL_ONE 0 59#define NO_FILL_CLEAR 0 60#define NO_VIDEO 0 61 62#define USE_8_PIXEL_DISPATCH 1 63#define USE_16_PIXEL_DISPATCH 1 64#define USE_32_PIXEL_DISPATCH 0 65 66#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67#error "Must select at least 8, 16 or 32 pixel dispatch" 68#endif 69 70#define GEN8_MAX_SIZE 16384 71 72/* XXX Todo 73 * 74 * STR (software tiled rendering) mode. No, really. 75 * 64x32 pixel blocks align with the rendering cache. Worth considering. 76 */ 77 78#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 79 80/* Pipeline stages: 81 * 1. Command Streamer (CS) 82 * 2. Vertex Fetch (VF) 83 * 3. Vertex Shader (VS) 84 * 4. Hull Shader (HS) 85 * 5. Tesselation Engine (TE) 86 * 6. Domain Shader (DS) 87 * 7. Geometry Shader (GS) 88 * 8. Stream Output Logic (SOL) 89 * 9. Clipper (CLIP) 90 * 10. Strip/Fan (SF) 91 * 11. Windower/Masker (WM) 92 * 12. Color Calculator (CC) 93 */ 94 95#if !NO_VIDEO 96static const uint32_t ps_kernel_packed[][4] = { 97#include "exa_wm_src_affine.g8b" 98#include "exa_wm_src_sample_argb.g8b" 99#include "exa_wm_yuv_rgb.g8b" 100#include "exa_wm_write.g8b" 101}; 102 103static const uint32_t ps_kernel_planar[][4] = { 104#include "exa_wm_src_affine.g8b" 105#include "exa_wm_src_sample_planar.g8b" 106#include "exa_wm_yuv_rgb.g8b" 107#include "exa_wm_write.g8b" 108}; 109#endif 110 111#define SURFACE_DW (64 / sizeof(uint32_t)); 112 113#define KERNEL(kernel_enum, kernel, num_surfaces) \ 114 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 115#define NOKERNEL(kernel_enum, func, num_surfaces) \ 116 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 117static const struct wm_kernel_info { 118 const char *name; 119 const void *data; 120 unsigned int size; 121 int num_surfaces; 122} wm_kernels[] = { 123 NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 124 NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 125 126 NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 127 NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 128 129 NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 130 NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 131 132 NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 133 NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 134 135 NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 136 NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 137 138#if !NO_VIDEO 139 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 140 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 141#endif 142}; 143#undef KERNEL 144 145static const struct blendinfo { 146 uint8_t src_alpha; 147 uint8_t src_blend; 148 uint8_t dst_blend; 149} gen8_blend_op[] = { 150 /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 151 /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 152 /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 153 /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 154 /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 155 /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 156 /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 157 /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 158 /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 159 /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 160 /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 161 /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 162 /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 163}; 164 165/** 166 * Highest-valued BLENDFACTOR used in gen8_blend_op. 167 * 168 * This leaves out GEN8_BLENDFACTOR_INV_DST_COLOR, 169 * GEN8_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 170 * GEN8_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 171 */ 172#define GEN8_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 173 174#define GEN8_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen8_blend_state), 64) 175 176#define BLEND_OFFSET(s, d) \ 177 ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN8_BLENDFACTOR_COUNT + (d)) << 4) 178 179#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 180#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 181 182#define SAMPLER_OFFSET(sf, se, mf, me) \ 183 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 184 185#define VERTEX_2s2s 0 186 187#define COPY_SAMPLER 0 188#define COPY_VERTEX VERTEX_2s2s 189#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN8_WM_KERNEL_NOMASK, COPY_VERTEX) 190 191#define FILL_SAMPLER 1 192#define FILL_VERTEX VERTEX_2s2s 193#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 194#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 195 196#define GEN8_SAMPLER(f) (((f) >> 20) & 0xfff) 197#define GEN8_BLEND(f) (((f) >> 4) & 0x7ff) 198#define GEN8_READS_DST(f) (((f) >> 15) & 1) 199#define GEN8_KERNEL(f) (((f) >> 16) & 0xf) 200#define GEN8_VERTEX(f) (((f) >> 0) & 0xf) 201#define GEN8_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) 202 203#define OUT_BATCH(v) batch_emit(sna, v) 204#define OUT_BATCH64(v) batch_emit64(sna, v) 205#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 206#define OUT_VERTEX_F(v) vertex_emit(sna, v) 207 208static inline bool too_large(int width, int height) 209{ 210 return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; 211} 212 213static inline bool unaligned(struct kgem_bo *bo, int bpp) 214{ 215 /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 216#if 0 217 int x, y; 218 219 if (bo->proxy == NULL) 220 return false; 221 222 /* Assume that all tiled proxies are constructed correctly. */ 223 if (bo->tiling) 224 return false; 225 226 DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 227 __FUNCTION__, bo->delta, bo->pitch, bpp, 228 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 229 230 /* This may be a random userptr map, check that it meets the 231 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 232 */ 233 y = bo->delta / bo->pitch; 234 if (y & 3) 235 return true; 236 237 x = 8 * (bo->delta - y * bo->pitch); 238 if (x & (4*bpp - 1)) 239 return true; 240 241 return false; 242#else 243 return false; 244#endif 245} 246 247static uint32_t gen8_get_blend(int op, 248 bool has_component_alpha, 249 uint32_t dst_format) 250{ 251 uint32_t src, dst; 252 253 COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN8_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 254 255 src = gen8_blend_op[op].src_blend; 256 dst = gen8_blend_op[op].dst_blend; 257 258 /* If there's no dst alpha channel, adjust the blend op so that 259 * we'll treat it always as 1. 260 */ 261 if (PICT_FORMAT_A(dst_format) == 0) { 262 if (src == BLENDFACTOR_DST_ALPHA) 263 src = BLENDFACTOR_ONE; 264 else if (src == BLENDFACTOR_INV_DST_ALPHA) 265 src = BLENDFACTOR_ZERO; 266 } 267 268 /* If the source alpha is being used, then we should only be in a 269 * case where the source blend factor is 0, and the source blend 270 * value is the mask channels multiplied by the source picture's alpha. 271 */ 272 if (has_component_alpha && gen8_blend_op[op].src_alpha) { 273 if (dst == BLENDFACTOR_SRC_ALPHA) 274 dst = BLENDFACTOR_SRC_COLOR; 275 else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 276 dst = BLENDFACTOR_INV_SRC_COLOR; 277 } 278 279 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 280 op, dst_format, PICT_FORMAT_A(dst_format), 281 src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 282 assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 283 return BLEND_OFFSET(src, dst); 284} 285 286static uint32_t gen8_get_card_format(PictFormat format) 287{ 288 switch (format) { 289 default: 290 return -1; 291 case PICT_a8r8g8b8: 292 return SURFACEFORMAT_B8G8R8A8_UNORM; 293 case PICT_x8r8g8b8: 294 return SURFACEFORMAT_B8G8R8X8_UNORM; 295 case PICT_a8b8g8r8: 296 return SURFACEFORMAT_R8G8B8A8_UNORM; 297 case PICT_x8b8g8r8: 298 return SURFACEFORMAT_R8G8B8X8_UNORM; 299#ifdef PICT_a2r10g10b10 300 case PICT_a2r10g10b10: 301 return SURFACEFORMAT_B10G10R10A2_UNORM; 302 case PICT_x2r10g10b10: 303 return SURFACEFORMAT_B10G10R10X2_UNORM; 304#endif 305 case PICT_r8g8b8: 306 return SURFACEFORMAT_R8G8B8_UNORM; 307 case PICT_r5g6b5: 308 return SURFACEFORMAT_B5G6R5_UNORM; 309 case PICT_a1r5g5b5: 310 return SURFACEFORMAT_B5G5R5A1_UNORM; 311 case PICT_a8: 312 return SURFACEFORMAT_A8_UNORM; 313 case PICT_a4r4g4b4: 314 return SURFACEFORMAT_B4G4R4A4_UNORM; 315 } 316} 317 318static uint32_t gen8_get_dest_format(PictFormat format) 319{ 320 switch (format) { 321 default: 322 return -1; 323 case PICT_a8r8g8b8: 324 case PICT_x8r8g8b8: 325 return SURFACEFORMAT_B8G8R8A8_UNORM; 326 case PICT_a8b8g8r8: 327 case PICT_x8b8g8r8: 328 return SURFACEFORMAT_R8G8B8A8_UNORM; 329#ifdef PICT_a2r10g10b10 330 case PICT_a2r10g10b10: 331 case PICT_x2r10g10b10: 332 return SURFACEFORMAT_B10G10R10A2_UNORM; 333#endif 334 case PICT_r5g6b5: 335 return SURFACEFORMAT_B5G6R5_UNORM; 336 case PICT_x1r5g5b5: 337 case PICT_a1r5g5b5: 338 return SURFACEFORMAT_B5G5R5A1_UNORM; 339 case PICT_a8: 340 return SURFACEFORMAT_A8_UNORM; 341 case PICT_a4r4g4b4: 342 case PICT_x4r4g4b4: 343 return SURFACEFORMAT_B4G4R4A4_UNORM; 344 } 345} 346 347static bool gen8_check_dst_format(PictFormat format) 348{ 349 if (gen8_get_dest_format(format) != -1) 350 return true; 351 352 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 353 return false; 354} 355 356static bool gen8_check_format(uint32_t format) 357{ 358 if (gen8_get_card_format(format) != -1) 359 return true; 360 361 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 362 return false; 363} 364 365static uint32_t gen8_filter(uint32_t filter) 366{ 367 switch (filter) { 368 default: 369 assert(0); 370 case PictFilterNearest: 371 return SAMPLER_FILTER_NEAREST; 372 case PictFilterBilinear: 373 return SAMPLER_FILTER_BILINEAR; 374 } 375} 376 377static uint32_t gen8_check_filter(PicturePtr picture) 378{ 379 switch (picture->filter) { 380 case PictFilterNearest: 381 case PictFilterBilinear: 382 return true; 383 default: 384 return false; 385 } 386} 387 388static uint32_t gen8_repeat(uint32_t repeat) 389{ 390 switch (repeat) { 391 default: 392 assert(0); 393 case RepeatNone: 394 return SAMPLER_EXTEND_NONE; 395 case RepeatNormal: 396 return SAMPLER_EXTEND_REPEAT; 397 case RepeatPad: 398 return SAMPLER_EXTEND_PAD; 399 case RepeatReflect: 400 return SAMPLER_EXTEND_REFLECT; 401 } 402} 403 404static bool gen8_check_repeat(PicturePtr picture) 405{ 406 if (!picture->repeat) 407 return true; 408 409 switch (picture->repeatType) { 410 case RepeatNone: 411 case RepeatNormal: 412 case RepeatPad: 413 case RepeatReflect: 414 return true; 415 default: 416 return false; 417 } 418} 419 420static int 421gen8_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 422{ 423 int base; 424 425 if (has_mask) { 426 if (is_ca) { 427 if (gen8_blend_op[op].src_alpha) 428 base = GEN8_WM_KERNEL_MASKSA; 429 else 430 base = GEN8_WM_KERNEL_MASKCA; 431 } else 432 base = GEN8_WM_KERNEL_MASK; 433 } else 434 base = GEN8_WM_KERNEL_NOMASK; 435 436 return base + !is_affine; 437} 438 439static void 440gen8_emit_push_constants(struct sna *sna) 441{ 442#if SIM 443 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 444 OUT_BATCH(0); 445 446 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 447 OUT_BATCH(0); 448 449 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 450 OUT_BATCH(0); 451 452 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 453 OUT_BATCH(0); 454 455 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 456 OUT_BATCH(0); 457#endif 458} 459 460static void 461gen8_emit_urb(struct sna *sna) 462{ 463 /* num of VS entries must be divisible by 8 if size < 9 */ 464 OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); 465 OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT | 466 (2 - 1) << URB_ENTRY_SIZE_SHIFT | 467 4 << URB_STARTING_ADDRESS_SHIFT); 468 469 OUT_BATCH(GEN8_3DSTATE_URB_HS | (2 - 2)); 470 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 471 4 << URB_STARTING_ADDRESS_SHIFT); 472 473 OUT_BATCH(GEN8_3DSTATE_URB_DS | (2 - 2)); 474 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 475 4 << URB_STARTING_ADDRESS_SHIFT); 476 477 OUT_BATCH(GEN8_3DSTATE_URB_GS | (2 - 2)); 478 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 479 4 << URB_STARTING_ADDRESS_SHIFT); 480} 481 482static void 483gen8_emit_state_base_address(struct sna *sna) 484{ 485 uint32_t num_pages; 486 487 assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 488 489 OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2)); 490 OUT_BATCH64(0); /* general */ 491 OUT_BATCH(0); /* stateless dataport */ 492 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 493 sna->kgem.nbatch, 494 NULL, 495 I915_GEM_DOMAIN_INSTRUCTION << 16, 496 BASE_ADDRESS_MODIFY)); 497 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 498 sna->kgem.nbatch, 499 sna->render_state.gen8.general_bo, 500 I915_GEM_DOMAIN_INSTRUCTION << 16, 501 BASE_ADDRESS_MODIFY)); 502 OUT_BATCH64(0); /* indirect */ 503 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 504 sna->kgem.nbatch, 505 sna->render_state.gen8.general_bo, 506 I915_GEM_DOMAIN_INSTRUCTION << 16, 507 BASE_ADDRESS_MODIFY)); 508 /* upper bounds */ 509 num_pages = sna->render_state.gen8.general_bo->size.pages.count; 510 OUT_BATCH(0); /* general */ 511 OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 512 OUT_BATCH(0); /* indirect */ 513 OUT_BATCH(num_pages << 12 | 1); /* instruction */ 514} 515 516static void 517gen8_emit_vs_invariant(struct sna *sna) 518{ 519 OUT_BATCH(GEN8_3DSTATE_VS | (9 - 2)); 520 OUT_BATCH64(0); /* no VS kernel */ 521 OUT_BATCH(0); 522 OUT_BATCH64(0); /* scratch */ 523 OUT_BATCH(0); 524 OUT_BATCH(1 << 1); /* pass-through */ 525 OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 526 527#if SIM 528 OUT_BATCH(GEN8_3DSTATE_CONSTANT_VS | (11 - 2)); 529 OUT_BATCH(0); 530 OUT_BATCH(0); 531 OUT_BATCH64(0); 532 OUT_BATCH64(0); 533 OUT_BATCH64(0); 534 OUT_BATCH64(0); 535 536 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 537 OUT_BATCH(0); 538 539 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 540 OUT_BATCH(0); 541#endif 542} 543 544static void 545gen8_emit_hs_invariant(struct sna *sna) 546{ 547 OUT_BATCH(GEN8_3DSTATE_HS | (9 - 2)); 548 OUT_BATCH(0); 549 OUT_BATCH(0); 550 OUT_BATCH64(0); /* no HS kernel */ 551 OUT_BATCH64(0); /* scratch */ 552 OUT_BATCH(0); 553 OUT_BATCH(0); /* pass-through */ 554 555#if SIM 556 OUT_BATCH(GEN8_3DSTATE_CONSTANT_HS | (11 - 2)); 557 OUT_BATCH(0); 558 OUT_BATCH(0); 559 OUT_BATCH64(0); 560 OUT_BATCH64(0); 561 OUT_BATCH64(0); 562 OUT_BATCH64(0); 563 564#if 1 565 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 566 OUT_BATCH(0); 567 568 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 569 OUT_BATCH(0); 570#endif 571#endif 572} 573 574static void 575gen8_emit_te_invariant(struct sna *sna) 576{ 577 OUT_BATCH(GEN8_3DSTATE_TE | (4 - 2)); 578 OUT_BATCH(0); 579 OUT_BATCH(0); 580 OUT_BATCH(0); 581} 582 583static void 584gen8_emit_ds_invariant(struct sna *sna) 585{ 586 OUT_BATCH(GEN8_3DSTATE_DS | (9 - 2)); 587 OUT_BATCH64(0); /* no kernel */ 588 OUT_BATCH(0); 589 OUT_BATCH64(0); /* scratch */ 590 OUT_BATCH(0); 591 OUT_BATCH(0); 592 OUT_BATCH(0); 593 594#if SIM 595 OUT_BATCH(GEN8_3DSTATE_CONSTANT_DS | (11 - 2)); 596 OUT_BATCH(0); 597 OUT_BATCH(0); 598 OUT_BATCH64(0); 599 OUT_BATCH64(0); 600 OUT_BATCH64(0); 601 OUT_BATCH64(0); 602 603#if 1 604 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 605 OUT_BATCH(0); 606 607 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 608 OUT_BATCH(0); 609#endif 610#endif 611} 612 613static void 614gen8_emit_gs_invariant(struct sna *sna) 615{ 616 OUT_BATCH(GEN8_3DSTATE_GS | (10 - 2)); 617 OUT_BATCH64(0); /* no GS kernel */ 618 OUT_BATCH(0); 619 OUT_BATCH64(0); /* scratch */ 620 OUT_BATCH(0); 621 OUT_BATCH(0); /* pass-through */ 622 OUT_BATCH(0); 623 OUT_BATCH(0); 624 625#if SIM 626 OUT_BATCH(GEN8_3DSTATE_CONSTANT_GS | (11 - 2)); 627 OUT_BATCH(0); 628 OUT_BATCH(0); 629 OUT_BATCH64(0); 630 OUT_BATCH64(0); 631 OUT_BATCH64(0); 632 OUT_BATCH64(0); 633 634#if 1 635 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 636 OUT_BATCH(0); 637 638 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 639 OUT_BATCH(0); 640#endif 641#endif 642} 643 644static void 645gen8_emit_sol_invariant(struct sna *sna) 646{ 647 OUT_BATCH(GEN8_3DSTATE_STREAMOUT | (5 - 2)); 648 OUT_BATCH(0); 649 OUT_BATCH(0); 650 OUT_BATCH(0); 651 OUT_BATCH(0); 652} 653 654static void 655gen8_emit_sf_invariant(struct sna *sna) 656{ 657 OUT_BATCH(GEN8_3DSTATE_SF | (4 - 2)); 658 OUT_BATCH(0); 659 OUT_BATCH(0); 660 OUT_BATCH(0); 661} 662 663static void 664gen8_emit_clip_invariant(struct sna *sna) 665{ 666 OUT_BATCH(GEN8_3DSTATE_CLIP | (4 - 2)); 667 OUT_BATCH(0); 668 OUT_BATCH(0); /* pass-through */ 669 OUT_BATCH(0); 670 671 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 672 OUT_BATCH(0); 673 674 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 675 OUT_BATCH(0); 676} 677 678static void 679gen8_emit_null_depth_buffer(struct sna *sna) 680{ 681 OUT_BATCH(GEN8_3DSTATE_DEPTH_BUFFER | (8 - 2)); 682#if 0 683 OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 684 DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 685#else 686 OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 687 DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 688#endif 689 OUT_BATCH64(0); 690 OUT_BATCH(0); 691 OUT_BATCH(0); 692 OUT_BATCH(0); 693 OUT_BATCH(0); 694 695#if SIM 696 OUT_BATCH(GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 697 OUT_BATCH(0); 698 OUT_BATCH64(0); 699 OUT_BATCH(0); 700#endif 701 702#if SIM 703 OUT_BATCH(GEN8_3DSTATE_STENCIL_BUFFER | (5 - 2)); 704 OUT_BATCH(0); 705 OUT_BATCH64(0); 706 OUT_BATCH(0); 707#endif 708 709#if SIM 710 OUT_BATCH(GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); 711 OUT_BATCH(0); 712 OUT_BATCH(0); 713#endif 714 715#if SIM 716 OUT_BATCH(GEN8_3DSTATE_CLEAR_PARAMS | (3 - 2)); 717 OUT_BATCH(0); 718 OUT_BATCH(0); 719#endif 720} 721 722static void 723gen8_emit_wm_invariant(struct sna *sna) 724{ 725 gen8_emit_null_depth_buffer(sna); 726 727#if SIM 728 OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 729 OUT_BATCH(0); 730#endif 731 732 OUT_BATCH(GEN8_3DSTATE_WM | (2 - 2)); 733 //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 734 OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 735 736#if SIM 737 OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2)); 738 OUT_BATCH(0); 739#endif 740 741#if 0 742 OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); 743 OUT_BATCH(0); 744 OUT_BATCH(0); 745 OUT_BATCH(0); 746 OUT_BATCH(0); 747#endif 748 749 OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); 750 OUT_BATCH(PSX_PIXEL_SHADER_VALID | 751 PSX_ATTRIBUTE_ENABLE); 752 753 OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); 754 OUT_BATCH(RASTER_FRONT_WINDING_CCW | 755 RASTER_CULL_NONE); 756 OUT_BATCH(0); 757 OUT_BATCH(0); 758 OUT_BATCH(0); 759 760 OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); 761 OUT_BATCH(0); 762 OUT_BATCH(0); 763 OUT_BATCH(0); 764 OUT_BATCH(0); 765 OUT_BATCH(0); 766 OUT_BATCH(0); 767 OUT_BATCH(0); 768 OUT_BATCH(0); 769 OUT_BATCH(0); 770 OUT_BATCH(0); 771 772#if SIM 773 OUT_BATCH(GEN8_3DSTATE_CONSTANT_PS | (11 - 2)); 774 OUT_BATCH(0); 775 OUT_BATCH(0); 776 OUT_BATCH64(0); 777 OUT_BATCH64(0); 778 OUT_BATCH64(0); 779 OUT_BATCH64(0); 780#endif 781} 782 783static void 784gen8_emit_cc_invariant(struct sna *sna) 785{ 786} 787 788static void 789gen8_emit_vf_invariant(struct sna *sna) 790{ 791 int n; 792 793#if 1 794 OUT_BATCH(GEN8_3DSTATE_VF | (2 - 2)); 795 OUT_BATCH(0); 796#endif 797 798 OUT_BATCH(GEN8_3DSTATE_VF_SGVS | (2 - 2)); 799 OUT_BATCH(0); 800 801 OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); 802 OUT_BATCH(RECTLIST); 803 804 OUT_BATCH(GEN8_3DSTATE_VF_STATISTICS | 0); 805 806 for (n = 1; n <= 3; n++) { 807 OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); 808 OUT_BATCH(n); 809 OUT_BATCH(0); 810 } 811} 812 813static void 814gen8_emit_invariant(struct sna *sna) 815{ 816 OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_3D); 817 818#if SIM 819 OUT_BATCH(GEN8_STATE_SIP | (3 - 2)); 820 OUT_BATCH64(0); 821#endif 822 823 OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); 824 OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 825 MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 826 827 OUT_BATCH(GEN8_3DSTATE_SAMPLE_MASK | (2 - 2)); 828 OUT_BATCH(1); 829 830#if SIM 831 OUT_BATCH(GEN8_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 832 OUT_BATCH(0); 833 OUT_BATCH(0); 834 OUT_BATCH(0); 835 //OUT_BATCH(8<<20 | 8<<16); 836 OUT_BATCH(0); 837#endif 838 839 gen8_emit_push_constants(sna); 840 gen8_emit_urb(sna); 841 842 gen8_emit_state_base_address(sna); 843 844 gen8_emit_vf_invariant(sna); 845 gen8_emit_vs_invariant(sna); 846 gen8_emit_hs_invariant(sna); 847 gen8_emit_te_invariant(sna); 848 gen8_emit_ds_invariant(sna); 849 gen8_emit_gs_invariant(sna); 850 gen8_emit_sol_invariant(sna); 851 gen8_emit_clip_invariant(sna); 852 gen8_emit_sf_invariant(sna); 853 gen8_emit_wm_invariant(sna); 854 gen8_emit_cc_invariant(sna); 855 856 sna->render_state.gen8.needs_invariant = false; 857} 858 859static void 860gen8_emit_cc(struct sna *sna, uint32_t blend) 861{ 862 struct gen8_render_state *render = &sna->render_state.gen8; 863 864 if (render->blend == blend) 865 return; 866 867 DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 868 __FUNCTION__, blend, render->blend, 869 blend / GEN8_BLENDFACTOR_COUNT, 870 blend % GEN8_BLENDFACTOR_COUNT)); 871 872 assert(blend < GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT); 873 assert(blend / GEN8_BLENDFACTOR_COUNT > 0); 874 assert(blend % GEN8_BLENDFACTOR_COUNT > 0); 875 876 /* XXX can have upto 8 blend states preload, selectable via 877 * Render Target Index. What other side-effects of Render Target Index? 878 */ 879 880 OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); 881 if (blend != GEN8_BLEND(NO_BLEND)) { 882 uint32_t src = blend / GEN8_BLENDFACTOR_COUNT; 883 uint32_t dst = blend % GEN8_BLENDFACTOR_COUNT; 884 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 885 PS_BLEND_COLOR_BLEND_ENABLE | 886 src << PS_BLEND_SRC_ALPHA_SHIFT | 887 dst << PS_BLEND_DST_ALPHA_SHIFT | 888 src << PS_BLEND_SRC_SHIFT | 889 dst << PS_BLEND_DST_SHIFT); 890 } else 891 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 892 893 assert(is_aligned(render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE, 64)); 894 OUT_BATCH(GEN8_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 895 OUT_BATCH((render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE) | 1); 896 897 /* Force a CC_STATE pointer change to improve blend performance */ 898 OUT_BATCH(GEN8_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 899 OUT_BATCH(0); 900 901 render->blend = blend; 902} 903 904static void 905gen8_emit_sampler(struct sna *sna, uint32_t state) 906{ 907 if (sna->render_state.gen8.samplers == state) 908 return; 909 910 sna->render_state.gen8.samplers = state; 911 912 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 913 914 assert(2 * sizeof(struct gen8_sampler_state) == 32); 915 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 916 OUT_BATCH(sna->render_state.gen8.wm_state + state * 2 * sizeof(struct gen8_sampler_state)); 917} 918 919static void 920gen8_emit_sf(struct sna *sna, bool has_mask) 921{ 922 int num_sf_outputs = has_mask ? 2 : 1; 923 924 if (sna->render_state.gen8.num_sf_outputs == num_sf_outputs) 925 return; 926 927 DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 928 929 sna->render_state.gen8.num_sf_outputs = num_sf_outputs; 930 931 OUT_BATCH(GEN8_3DSTATE_SBE | (4 - 2)); 932 OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 933 SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ 934 SBE_FORCE_VERTEX_URB_READ_OFFSET | 935 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 936 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 937 OUT_BATCH(0); 938 OUT_BATCH(0); 939} 940 941static void 942gen8_emit_wm(struct sna *sna, int kernel) 943{ 944 const uint32_t *kernels; 945 946 assert(kernel < ARRAY_SIZE(wm_kernels)); 947 if (sna->render_state.gen8.kernel == kernel) 948 return; 949 950 sna->render_state.gen8.kernel = kernel; 951 kernels = sna->render_state.gen8.wm_kernel[kernel]; 952 953 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 954 __FUNCTION__, 955 wm_kernels[kernel].name, 956 wm_kernels[kernel].num_surfaces, 957 kernels[0], kernels[1], kernels[2])); 958 assert(is_aligned(kernels[0], 64)); 959 assert(is_aligned(kernels[1], 64)); 960 assert(is_aligned(kernels[2], 64)); 961 962 OUT_BATCH(GEN8_3DSTATE_PS | (12 - 2)); 963 OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 964 OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 965 PS_VECTOR_MASK_ENABLE | 966 wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 967 OUT_BATCH64(0); /* scratch address */ 968 OUT_BATCH(PS_MAX_THREADS | 969 (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 970 (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 971 (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 972 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 973 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 974 6 << PS_DISPATCH_START_GRF_SHIFT_2); 975 OUT_BATCH64(kernels[2]); 976 OUT_BATCH64(kernels[1]); 977} 978 979static bool 980gen8_emit_binding_table(struct sna *sna, uint16_t offset) 981{ 982 if (sna->render_state.gen8.surface_table == offset) 983 return false; 984 985 /* Binding table pointers */ 986 assert(is_aligned(4*offset, 32)); 987 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 988 OUT_BATCH(offset*4); 989 990 sna->render_state.gen8.surface_table = offset; 991 return true; 992} 993 994static bool 995gen8_emit_drawing_rectangle(struct sna *sna, 996 const struct sna_composite_op *op) 997{ 998 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 999 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 1000 1001 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 1002 assert(!too_large(op->dst.width, op->dst.height)); 1003 1004 if (sna->render_state.gen8.drawrect_limit == limit && 1005 sna->render_state.gen8.drawrect_offset == offset) 1006 return true; 1007 1008 sna->render_state.gen8.drawrect_offset = offset; 1009 sna->render_state.gen8.drawrect_limit = limit; 1010 1011 OUT_BATCH(GEN8_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1012 OUT_BATCH(0); 1013 OUT_BATCH(limit); 1014 OUT_BATCH(offset); 1015 return false; 1016} 1017 1018static void 1019gen8_emit_vertex_elements(struct sna *sna, 1020 const struct sna_composite_op *op) 1021{ 1022 /* 1023 * vertex data in vertex buffer 1024 * position: (x, y) 1025 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1026 * texture coordinate 1 if (has_mask is true): same as above 1027 */ 1028 struct gen8_render_state *render = &sna->render_state.gen8; 1029 uint32_t src_format, dw; 1030 int id = GEN8_VERTEX(op->u.gen8.flags); 1031 bool has_mask; 1032 1033 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1034 1035 if (render->ve_id == id) 1036 return; 1037 render->ve_id = id; 1038 1039 /* The VUE layout 1040 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1041 * dword 4-7: position (x, y, 1.0, 1.0), 1042 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1043 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1044 * 1045 * dword 4-15 are fetched from vertex buffer 1046 */ 1047 has_mask = (id >> 2) != 0; 1048 OUT_BATCH(GEN8_3DSTATE_VERTEX_ELEMENTS | 1049 ((2 * (3 + has_mask)) + 1 - 2)); 1050 1051 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1052 SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1053 0 << VE_OFFSET_SHIFT); 1054 OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1055 COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1056 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1057 COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1058 1059 /* x,y */ 1060 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1061 SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1062 0 << VE_OFFSET_SHIFT); 1063 OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1064 COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1065 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1066 COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1067 1068 /* u0, v0, w0 */ 1069 DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1070 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1071 switch (id & 3) { 1072 default: 1073 assert(0); 1074 case 0: 1075 src_format = SURFACEFORMAT_R16G16_SSCALED; 1076 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1077 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1078 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1079 break; 1080 case 1: 1081 src_format = SURFACEFORMAT_R32_FLOAT; 1082 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1083 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1084 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1085 break; 1086 case 2: 1087 src_format = SURFACEFORMAT_R32G32_FLOAT; 1088 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1089 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1090 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1091 break; 1092 case 3: 1093 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1094 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1095 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1096 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1097 break; 1098 } 1099 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1100 src_format << VE_FORMAT_SHIFT | 1101 4 << VE_OFFSET_SHIFT); 1102 OUT_BATCH(dw); 1103 1104 /* u1, v1, w1 */ 1105 if (has_mask) { 1106 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1107 DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1108 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1109 switch (id >> 2) { 1110 case 1: 1111 src_format = SURFACEFORMAT_R32_FLOAT; 1112 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1113 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1114 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1115 break; 1116 default: 1117 assert(0); 1118 case 2: 1119 src_format = SURFACEFORMAT_R32G32_FLOAT; 1120 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1121 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1122 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1123 break; 1124 case 3: 1125 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1126 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1127 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1128 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1129 break; 1130 } 1131 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1132 src_format << VE_FORMAT_SHIFT | 1133 offset << VE_OFFSET_SHIFT); 1134 OUT_BATCH(dw); 1135 } 1136} 1137 1138inline static void 1139gen8_emit_pipe_invalidate(struct sna *sna) 1140{ 1141 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1142 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1143 PIPE_CONTROL_TC_FLUSH | 1144 PIPE_CONTROL_CS_STALL); 1145 OUT_BATCH64(0); 1146 OUT_BATCH64(0); 1147} 1148 1149inline static void 1150gen8_emit_pipe_flush(struct sna *sna, bool need_stall) 1151{ 1152 unsigned stall; 1153 1154 stall = 0; 1155 if (need_stall) 1156 stall = (PIPE_CONTROL_CS_STALL | 1157 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1158 1159 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1160 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1161 OUT_BATCH64(0); 1162 OUT_BATCH64(0); 1163} 1164 1165inline static void 1166gen8_emit_pipe_stall(struct sna *sna) 1167{ 1168 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1169 OUT_BATCH(PIPE_CONTROL_CS_STALL | 1170 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1171 OUT_BATCH64(0); 1172 OUT_BATCH64(0); 1173} 1174 1175static void 1176gen8_emit_state(struct sna *sna, 1177 const struct sna_composite_op *op, 1178 uint16_t wm_binding_table) 1179{ 1180 bool need_invalidate; 1181 bool need_flush; 1182 bool need_stall; 1183 1184 assert(op->dst.bo->exec); 1185 1186 need_flush = wm_binding_table & 1 || 1187 (sna->render_state.gen8.emit_flush && GEN8_READS_DST(op->u.gen8.flags)); 1188 if (ALWAYS_FLUSH) 1189 need_flush = true; 1190 1191 wm_binding_table &= ~1; 1192 1193 need_stall = sna->render_state.gen8.surface_table != wm_binding_table; 1194 1195 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1196 if (ALWAYS_INVALIDATE) 1197 need_invalidate = true; 1198 1199 need_stall &= gen8_emit_drawing_rectangle(sna, op); 1200 if (ALWAYS_STALL) 1201 need_stall = true; 1202 1203 if (need_invalidate) { 1204 gen8_emit_pipe_invalidate(sna); 1205 kgem_clear_dirty(&sna->kgem); 1206 assert(op->dst.bo->exec); 1207 kgem_bo_mark_dirty(op->dst.bo); 1208 1209 need_flush = false; 1210 need_stall = false; 1211 } 1212 if (need_flush) { 1213 gen8_emit_pipe_flush(sna, need_stall); 1214 need_stall = false; 1215 } 1216 if (need_stall) 1217 gen8_emit_pipe_stall(sna); 1218 1219 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1220 gen8_emit_sampler(sna, GEN8_SAMPLER(op->u.gen8.flags)); 1221 gen8_emit_sf(sna, GEN8_VERTEX(op->u.gen8.flags) >> 2); 1222 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1223 gen8_emit_vertex_elements(sna, op); 1224 gen8_emit_binding_table(sna, wm_binding_table); 1225 1226 sna->render_state.gen8.emit_flush = GEN8_READS_DST(op->u.gen8.flags); 1227} 1228 1229static bool gen8_magic_ca_pass(struct sna *sna, 1230 const struct sna_composite_op *op) 1231{ 1232 struct gen8_render_state *state = &sna->render_state.gen8; 1233 1234 if (!op->need_magic_ca_pass) 1235 return false; 1236 1237 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1238 sna->render.vertex_start, sna->render.vertex_index)); 1239 1240 gen8_emit_pipe_stall(sna); 1241 1242 gen8_emit_cc(sna, 1243 GEN8_BLEND(gen8_get_blend(PictOpAdd, true, 1244 op->dst.format))); 1245 gen8_emit_wm(sna, 1246 gen8_choose_composite_kernel(PictOpAdd, 1247 true, true, 1248 op->is_affine)); 1249 1250 OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1251 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1252 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1253 OUT_BATCH(sna->render.vertex_start); 1254 OUT_BATCH(1); /* single instance */ 1255 OUT_BATCH(0); /* start instance location */ 1256 OUT_BATCH(0); /* index buffer offset, ignored */ 1257 1258 state->last_primitive = sna->kgem.nbatch; 1259 return true; 1260} 1261 1262static void null_create(struct sna_static_stream *stream) 1263{ 1264 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1265 sna_static_stream_map(stream, 64, 64); 1266} 1267 1268static void 1269sampler_state_init(struct gen8_sampler_state *sampler_state, 1270 sampler_filter_t filter, 1271 sampler_extend_t extend) 1272{ 1273 COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1274 1275 sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1276 sampler_state->ss0.default_color_mode = 1; 1277 1278 switch (filter) { 1279 default: 1280 case SAMPLER_FILTER_NEAREST: 1281 sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1282 sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1283 break; 1284 case SAMPLER_FILTER_BILINEAR: 1285 sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1286 sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1287 break; 1288 } 1289 1290 /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1291 1292 switch (extend) { 1293 default: 1294 case SAMPLER_EXTEND_NONE: 1295 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1296 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1297 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1298 break; 1299 case SAMPLER_EXTEND_REPEAT: 1300 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1301 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1302 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1303 break; 1304 case SAMPLER_EXTEND_PAD: 1305 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1306 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1307 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1308 break; 1309 case SAMPLER_EXTEND_REFLECT: 1310 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1311 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1312 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1313 break; 1314 } 1315} 1316 1317static void 1318sampler_copy_init(struct gen8_sampler_state *ss) 1319{ 1320 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1321 ss->ss3.non_normalized_coord = 1; 1322 1323 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1324} 1325 1326static void 1327sampler_fill_init(struct gen8_sampler_state *ss) 1328{ 1329 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1330 ss->ss3.non_normalized_coord = 1; 1331 1332 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1333} 1334 1335static uint32_t 1336gen8_tiling_bits(uint32_t tiling) 1337{ 1338 switch (tiling) { 1339 default: assert(0); 1340 case I915_TILING_NONE: return 0; 1341 case I915_TILING_X: return SURFACE_TILED; 1342 case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1343 } 1344} 1345 1346#define MOCS_WT (2 << 5) 1347#define MOCS_WB (3 << 5) 1348#define MOCS_eLLC_ONLY (0 << 3) 1349#define MOCS_LLC_ONLY (1 << 3) 1350#define MOCS_eLLC_LLC (2 << 3) 1351#define MOCS_ALL_CACHES (3 << 3) 1352 1353/** 1354 * Sets up the common fields for a surface state buffer for the given 1355 * picture in the given surface state buffer. 1356 */ 1357static uint32_t 1358gen8_bind_bo(struct sna *sna, 1359 struct kgem_bo *bo, 1360 uint32_t width, 1361 uint32_t height, 1362 uint32_t format, 1363 bool is_dst) 1364{ 1365 uint32_t *ss; 1366 uint32_t domains; 1367 int offset; 1368 uint32_t is_scanout = is_dst && bo->scanout; 1369 1370 /* After the first bind, we manage the cache domains within the batch */ 1371 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1372 if (offset) { 1373 if (is_dst) 1374 kgem_bo_mark_dirty(bo); 1375 assert(offset >= sna->kgem.surface); 1376 return offset * sizeof(uint32_t); 1377 } 1378 1379 offset = sna->kgem.surface -= SURFACE_DW; 1380 ss = sna->kgem.batch + offset; 1381 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1382 gen8_tiling_bits(bo->tiling) | 1383 format << SURFACE_FORMAT_SHIFT | 1384 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1385 if (is_dst) { 1386 ss[0] |= SURFACE_RC_READ_WRITE; 1387 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1388 } else 1389 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1390 ss[1] = (is_dst && is_uncached(sna, bo)) ? 0 : is_scanout ? (MOCS_WT | MOCS_ALL_CACHES) << 24 : (MOCS_WB | MOCS_ALL_CACHES) << 24; 1391 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1392 (height - 1) << SURFACE_HEIGHT_SHIFT); 1393 ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1394 ss[4] = 0; 1395 ss[5] = 0; 1396 ss[6] = 0; 1397 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1398 *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1399 ss[10] = 0; 1400 ss[11] = 0; 1401 ss[12] = 0; 1402 ss[13] = 0; 1403 ss[14] = 0; 1404 ss[15] = 0; 1405 1406 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1407 1408 DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1409 offset, bo->handle, *(uint64_t *)(ss+8), 1410 format, width, height, bo->pitch, bo->tiling, 1411 domains & 0xffff ? "render" : "sampler")); 1412 1413 return offset * sizeof(uint32_t); 1414} 1415 1416static void gen8_emit_vertex_buffer(struct sna *sna, 1417 const struct sna_composite_op *op) 1418{ 1419 int id = GEN8_VERTEX(op->u.gen8.flags); 1420 1421 OUT_BATCH(GEN8_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1422 OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1423 4*op->floats_per_vertex); 1424 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1425 OUT_BATCH64(0); 1426 OUT_BATCH(~0); /* buffer size: disabled */ 1427 1428 sna->render.vb_id |= 1 << id; 1429} 1430 1431static void gen8_emit_primitive(struct sna *sna) 1432{ 1433 if (sna->kgem.nbatch == sna->render_state.gen8.last_primitive) { 1434 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1435 return; 1436 } 1437 1438 OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1439 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1440 sna->render.vertex_offset = sna->kgem.nbatch; 1441 OUT_BATCH(0); /* vertex count, to be filled in later */ 1442 OUT_BATCH(sna->render.vertex_index); 1443 OUT_BATCH(1); /* single instance */ 1444 OUT_BATCH(0); /* start instance location */ 1445 OUT_BATCH(0); /* index buffer offset, ignored */ 1446 sna->render.vertex_start = sna->render.vertex_index; 1447 1448 sna->render_state.gen8.last_primitive = sna->kgem.nbatch; 1449} 1450 1451static bool gen8_rectangle_begin(struct sna *sna, 1452 const struct sna_composite_op *op) 1453{ 1454 int id = 1 << GEN8_VERTEX(op->u.gen8.flags); 1455 int ndwords; 1456 1457 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1458 return true; 1459 1460 ndwords = op->need_magic_ca_pass ? 60 : 6; 1461 if ((sna->render.vb_id & id) == 0) 1462 ndwords += 5; 1463 if (!kgem_check_batch(&sna->kgem, ndwords)) 1464 return false; 1465 1466 if ((sna->render.vb_id & id) == 0) 1467 gen8_emit_vertex_buffer(sna, op); 1468 1469 gen8_emit_primitive(sna); 1470 return true; 1471} 1472 1473static int gen8_get_rectangles__flush(struct sna *sna, 1474 const struct sna_composite_op *op) 1475{ 1476 /* Preventing discarding new vbo after lock contention */ 1477 if (sna_vertex_wait__locked(&sna->render)) { 1478 int rem = vertex_space(sna); 1479 if (rem > op->floats_per_rect) 1480 return rem; 1481 } 1482 1483 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1484 return 0; 1485 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1486 return 0; 1487 1488 if (sna->render.vertex_offset) { 1489 gen8_vertex_flush(sna); 1490 if (gen8_magic_ca_pass(sna, op)) { 1491 gen8_emit_pipe_invalidate(sna); 1492 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1493 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1494 } 1495 } 1496 1497 return gen8_vertex_finish(sna); 1498} 1499 1500inline static int gen8_get_rectangles(struct sna *sna, 1501 const struct sna_composite_op *op, 1502 int want, 1503 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1504{ 1505 int rem; 1506 1507 assert(want); 1508 1509start: 1510 rem = vertex_space(sna); 1511 if (unlikely(rem < op->floats_per_rect)) { 1512 DBG(("flushing vbo for %s: %d < %d\n", 1513 __FUNCTION__, rem, op->floats_per_rect)); 1514 rem = gen8_get_rectangles__flush(sna, op); 1515 if (unlikely(rem == 0)) 1516 goto flush; 1517 } 1518 1519 if (unlikely(sna->render.vertex_offset == 0)) { 1520 if (!gen8_rectangle_begin(sna, op)) 1521 goto flush; 1522 else 1523 goto start; 1524 } 1525 1526 assert(rem <= vertex_space(sna)); 1527 assert(op->floats_per_rect <= rem); 1528 if (want > 1 && want * op->floats_per_rect > rem) 1529 want = rem / op->floats_per_rect; 1530 1531 assert(want > 0); 1532 sna->render.vertex_index += 3*want; 1533 return want; 1534 1535flush: 1536 if (sna->render.vertex_offset) { 1537 gen8_vertex_flush(sna); 1538 gen8_magic_ca_pass(sna, op); 1539 } 1540 sna_vertex_wait__locked(&sna->render); 1541 _kgem_submit(&sna->kgem); 1542 emit_state(sna, op); 1543 goto start; 1544} 1545 1546inline static uint32_t *gen8_composite_get_binding_table(struct sna *sna, 1547 uint16_t *offset) 1548{ 1549 uint32_t *table; 1550 1551 assert(sna->kgem.surface <= 16384); 1552 sna->kgem.surface -= SURFACE_DW; 1553 /* Clear all surplus entries to zero in case of prefetch */ 1554 table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1555 1556 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1557 1558 *offset = sna->kgem.surface; 1559 return table; 1560} 1561 1562static void 1563gen8_get_batch(struct sna *sna, const struct sna_composite_op *op) 1564{ 1565 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1566 1567 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1568 DBG(("%s: flushing batch: %d < %d+%d\n", 1569 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1570 150, 4*8*2)); 1571 _kgem_submit(&sna->kgem); 1572 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1573 } 1574 1575 assert(sna->kgem.mode == KGEM_RENDER); 1576 assert(sna->kgem.ring == KGEM_RENDER); 1577 1578 if (sna->render_state.gen8.needs_invariant) 1579 gen8_emit_invariant(sna); 1580} 1581 1582static void gen8_emit_composite_state(struct sna *sna, 1583 const struct sna_composite_op *op) 1584{ 1585 uint32_t *binding_table; 1586 uint16_t offset, dirty; 1587 1588 gen8_get_batch(sna, op); 1589 1590 binding_table = gen8_composite_get_binding_table(sna, &offset); 1591 1592 dirty = kgem_bo_is_dirty(op->dst.bo); 1593 1594 binding_table[0] = 1595 gen8_bind_bo(sna, 1596 op->dst.bo, op->dst.width, op->dst.height, 1597 gen8_get_dest_format(op->dst.format), 1598 true); 1599 binding_table[1] = 1600 gen8_bind_bo(sna, 1601 op->src.bo, op->src.width, op->src.height, 1602 op->src.card_format, 1603 false); 1604 if (op->mask.bo) { 1605 binding_table[2] = 1606 gen8_bind_bo(sna, 1607 op->mask.bo, 1608 op->mask.width, 1609 op->mask.height, 1610 op->mask.card_format, 1611 false); 1612 } 1613 1614 if (sna->kgem.surface == offset && 1615 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table && 1616 (op->mask.bo == NULL || 1617 sna->kgem.batch[sna->render_state.gen8.surface_table+2] == binding_table[2])) { 1618 sna->kgem.surface += SURFACE_DW; 1619 offset = sna->render_state.gen8.surface_table; 1620 } 1621 1622 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 1623 dirty = 0; 1624 1625 gen8_emit_state(sna, op, offset | dirty); 1626} 1627 1628static void 1629gen8_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1630{ 1631 if (op->floats_per_vertex != sna->render_state.gen8.floats_per_vertex) { 1632 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1633 sna->render_state.gen8.floats_per_vertex, op->floats_per_vertex)); 1634 gen8_vertex_align(sna, op); 1635 sna->render_state.gen8.floats_per_vertex = op->floats_per_vertex; 1636 } 1637} 1638 1639fastcall static void 1640gen8_render_composite_blt(struct sna *sna, 1641 const struct sna_composite_op *op, 1642 const struct sna_composite_rectangles *r) 1643{ 1644 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1645 op->prim_emit(sna, op, r); 1646} 1647 1648fastcall static void 1649gen8_render_composite_box(struct sna *sna, 1650 const struct sna_composite_op *op, 1651 const BoxRec *box) 1652{ 1653 struct sna_composite_rectangles r; 1654 1655 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1656 1657 DBG((" %s: (%d, %d), (%d, %d)\n", 1658 __FUNCTION__, 1659 box->x1, box->y1, box->x2, box->y2)); 1660 1661 r.dst.x = box->x1; 1662 r.dst.y = box->y1; 1663 r.width = box->x2 - box->x1; 1664 r.height = box->y2 - box->y1; 1665 r.src = r.mask = r.dst; 1666 1667 op->prim_emit(sna, op, &r); 1668} 1669 1670static void 1671gen8_render_composite_boxes__blt(struct sna *sna, 1672 const struct sna_composite_op *op, 1673 const BoxRec *box, int nbox) 1674{ 1675 DBG(("composite_boxes(%d)\n", nbox)); 1676 1677 do { 1678 int nbox_this_time; 1679 1680 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1681 gen8_emit_composite_state); 1682 nbox -= nbox_this_time; 1683 1684 do { 1685 struct sna_composite_rectangles r; 1686 1687 DBG((" %s: (%d, %d), (%d, %d)\n", 1688 __FUNCTION__, 1689 box->x1, box->y1, box->x2, box->y2)); 1690 1691 r.dst.x = box->x1; 1692 r.dst.y = box->y1; 1693 r.width = box->x2 - box->x1; 1694 r.height = box->y2 - box->y1; 1695 r.src = r.mask = r.dst; 1696 1697 op->prim_emit(sna, op, &r); 1698 box++; 1699 } while (--nbox_this_time); 1700 } while (nbox); 1701} 1702 1703static void 1704gen8_render_composite_boxes(struct sna *sna, 1705 const struct sna_composite_op *op, 1706 const BoxRec *box, int nbox) 1707{ 1708 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1709 1710 do { 1711 int nbox_this_time; 1712 float *v; 1713 1714 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1715 gen8_emit_composite_state); 1716 assert(nbox_this_time); 1717 nbox -= nbox_this_time; 1718 1719 v = sna->render.vertices + sna->render.vertex_used; 1720 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1721 1722 op->emit_boxes(op, box, nbox_this_time, v); 1723 box += nbox_this_time; 1724 } while (nbox); 1725} 1726 1727static void 1728gen8_render_composite_boxes__thread(struct sna *sna, 1729 const struct sna_composite_op *op, 1730 const BoxRec *box, int nbox) 1731{ 1732 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1733 1734 sna_vertex_lock(&sna->render); 1735 do { 1736 int nbox_this_time; 1737 float *v; 1738 1739 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1740 gen8_emit_composite_state); 1741 assert(nbox_this_time); 1742 nbox -= nbox_this_time; 1743 1744 v = sna->render.vertices + sna->render.vertex_used; 1745 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1746 1747 sna_vertex_acquire__locked(&sna->render); 1748 sna_vertex_unlock(&sna->render); 1749 1750 op->emit_boxes(op, box, nbox_this_time, v); 1751 box += nbox_this_time; 1752 1753 sna_vertex_lock(&sna->render); 1754 sna_vertex_release__locked(&sna->render); 1755 } while (nbox); 1756 sna_vertex_unlock(&sna->render); 1757} 1758 1759static uint32_t 1760gen8_create_blend_state(struct sna_static_stream *stream) 1761{ 1762 char *base, *ptr; 1763 int src, dst; 1764 1765 COMPILE_TIME_ASSERT(((GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1766 1767 base = sna_static_stream_map(stream, 1768 GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT * GEN8_BLEND_STATE_PADDED_SIZE, 1769 64); 1770 1771 ptr = base; 1772 for (src = 0; src < GEN8_BLENDFACTOR_COUNT; src++) { 1773 for (dst = 0; dst < GEN8_BLENDFACTOR_COUNT; dst++) { 1774 struct gen8_blend_state *blend = 1775 (struct gen8_blend_state *)ptr; 1776 1777 assert(((ptr - base) & 63) == 0); 1778 COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1779 COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1780 COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); 1781 1782 blend->rt.post_blend_clamp = 1; 1783 blend->rt.pre_blend_clamp = 1; 1784 1785 blend->rt.color_blend = 1786 !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1787 blend->rt.dest_blend_factor = dst; 1788 blend->rt.source_blend_factor = src; 1789 blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1790 1791 blend->rt.dest_alpha_blend_factor = dst; 1792 blend->rt.source_alpha_blend_factor = src; 1793 blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1794 1795 ptr += GEN8_BLEND_STATE_PADDED_SIZE; 1796 } 1797 } 1798 1799 return sna_static_stream_offsetof(stream, base); 1800} 1801 1802static int 1803gen8_composite_picture(struct sna *sna, 1804 PicturePtr picture, 1805 struct sna_composite_channel *channel, 1806 int x, int y, 1807 int w, int h, 1808 int dst_x, int dst_y, 1809 bool precise) 1810{ 1811 PixmapPtr pixmap; 1812 uint32_t color; 1813 int16_t dx, dy; 1814 1815 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1816 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1817 1818 channel->is_solid = false; 1819 channel->card_format = -1; 1820 1821 if (sna_picture_is_solid(picture, &color)) 1822 return gen4_channel_init_solid(sna, channel, color); 1823 1824 if (picture->pDrawable == NULL) { 1825 int ret; 1826 1827 if (picture->pSourcePict->type == SourcePictTypeLinear) 1828 return gen4_channel_init_linear(sna, picture, channel, 1829 x, y, 1830 w, h, 1831 dst_x, dst_y); 1832 1833 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1834 ret = -1; 1835 if (!precise) 1836 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1837 x, y, w, h, dst_x, dst_y); 1838 if (ret == -1) 1839 ret = sna_render_picture_fixup(sna, picture, channel, 1840 x, y, w, h, dst_x, dst_y); 1841 return ret; 1842 } 1843 1844 if (picture->alphaMap) { 1845 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1846 return sna_render_picture_fixup(sna, picture, channel, 1847 x, y, w, h, dst_x, dst_y); 1848 } 1849 1850 if (!gen8_check_repeat(picture)) 1851 return sna_render_picture_fixup(sna, picture, channel, 1852 x, y, w, h, dst_x, dst_y); 1853 1854 if (!gen8_check_filter(picture)) 1855 return sna_render_picture_fixup(sna, picture, channel, 1856 x, y, w, h, dst_x, dst_y); 1857 1858 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1859 channel->filter = picture->filter; 1860 1861 pixmap = get_drawable_pixmap(picture->pDrawable); 1862 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1863 1864 x += dx + picture->pDrawable->x; 1865 y += dy + picture->pDrawable->y; 1866 1867 channel->is_affine = sna_transform_is_affine(picture->transform); 1868 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1869 DBG(("%s: integer translation (%d, %d), removing\n", 1870 __FUNCTION__, dx, dy)); 1871 x += dx; 1872 y += dy; 1873 channel->transform = NULL; 1874 channel->filter = PictFilterNearest; 1875 1876 if (channel->repeat || 1877 (x >= 0 && 1878 y >= 0 && 1879 x + w < pixmap->drawable.width && 1880 y + h < pixmap->drawable.height)) { 1881 struct sna_pixmap *priv = sna_pixmap(pixmap); 1882 if (priv && priv->clear) { 1883 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1884 return gen4_channel_init_solid(sna, channel, priv->clear_color); 1885 } 1886 } 1887 } else 1888 channel->transform = picture->transform; 1889 1890 channel->pict_format = picture->format; 1891 channel->card_format = gen8_get_card_format(picture->format); 1892 if (channel->card_format == (unsigned)-1) 1893 return sna_render_picture_convert(sna, picture, channel, pixmap, 1894 x, y, w, h, dst_x, dst_y, 1895 false); 1896 1897 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1898 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1899 pixmap->drawable.width, pixmap->drawable.height)); 1900 return sna_render_picture_extract(sna, picture, channel, 1901 x, y, w, h, dst_x, dst_y); 1902 } 1903 1904 return sna_render_pixmap_bo(sna, channel, pixmap, 1905 x, y, w, h, dst_x, dst_y); 1906} 1907 1908inline static bool gen8_composite_channel_convert(struct sna_composite_channel *channel) 1909{ 1910 if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 1911 return false; 1912 1913 channel->repeat = gen8_repeat(channel->repeat); 1914 channel->filter = gen8_filter(channel->filter); 1915 if (channel->card_format == (unsigned)-1) 1916 channel->card_format = gen8_get_card_format(channel->pict_format); 1917 assert(channel->card_format != (unsigned)-1); 1918 1919 return true; 1920} 1921 1922static void gen8_render_composite_done(struct sna *sna, 1923 const struct sna_composite_op *op) 1924{ 1925 if (sna->render.vertex_offset) { 1926 gen8_vertex_flush(sna); 1927 gen8_magic_ca_pass(sna, op); 1928 } 1929 1930 if (op->mask.bo) 1931 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1932 if (op->src.bo) 1933 kgem_bo_destroy(&sna->kgem, op->src.bo); 1934 1935 sna_render_composite_redirect_done(sna, op); 1936} 1937 1938inline static bool 1939gen8_composite_set_target(struct sna *sna, 1940 struct sna_composite_op *op, 1941 PicturePtr dst, 1942 int x, int y, int w, int h, 1943 bool partial) 1944{ 1945 BoxRec box; 1946 unsigned int hint; 1947 1948 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1949 1950 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1951 op->dst.format = dst->format; 1952 op->dst.width = op->dst.pixmap->drawable.width; 1953 op->dst.height = op->dst.pixmap->drawable.height; 1954 1955 if (w | h) { 1956 assert(w && h); 1957 box.x1 = x; 1958 box.y1 = y; 1959 box.x2 = x + w; 1960 box.y2 = y + h; 1961 } else 1962 sna_render_picture_extents(dst, &box); 1963 1964 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1965 if (!partial) { 1966 hint |= IGNORE_DAMAGE; 1967 if (w == op->dst.width && h == op->dst.height) 1968 hint |= REPLACES; 1969 } 1970 1971 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1972 if (op->dst.bo == NULL) 1973 return false; 1974 1975 if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 1976 return false; 1977 1978 if (hint & REPLACES) { 1979 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1980 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1981 } 1982 1983 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1984 &op->dst.x, &op->dst.y); 1985 1986 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1987 __FUNCTION__, 1988 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1989 op->dst.width, op->dst.height, 1990 op->dst.bo->pitch, 1991 op->dst.x, op->dst.y, 1992 op->damage ? *op->damage : (void *)-1)); 1993 1994 assert(op->dst.bo->proxy == NULL); 1995 1996 if (too_large(op->dst.width, op->dst.height) && 1997 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1998 return false; 1999 2000 return true; 2001} 2002 2003static bool 2004try_blt(struct sna *sna, 2005 PicturePtr dst, PicturePtr src, 2006 int width, int height) 2007{ 2008 struct kgem_bo *bo; 2009 2010 if (sna->kgem.mode == KGEM_BLT) { 2011 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2012 return true; 2013 } 2014 2015 if (too_large(width, height)) { 2016 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2017 __FUNCTION__, width, height)); 2018 return true; 2019 } 2020 2021 bo = __sna_drawable_peek_bo(dst->pDrawable); 2022 if (bo == NULL) 2023 return true; 2024 if (bo->rq) 2025 return RQ_IS_BLT(bo->rq); 2026 2027 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2028 return true; 2029 2030 if (src->pDrawable) { 2031 bo = __sna_drawable_peek_bo(src->pDrawable); 2032 if (bo == NULL) 2033 return true; 2034 2035 if (prefer_blt_bo(sna, bo)) 2036 return RQ_IS_BLT(bo->rq); 2037 } 2038 2039 if (sna->kgem.ring == KGEM_BLT) { 2040 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2041 return true; 2042 } 2043 2044 return false; 2045} 2046 2047static bool 2048check_gradient(PicturePtr picture, bool precise) 2049{ 2050 if (picture->pDrawable) 2051 return false; 2052 2053 switch (picture->pSourcePict->type) { 2054 case SourcePictTypeSolidFill: 2055 case SourcePictTypeLinear: 2056 return false; 2057 default: 2058 return precise; 2059 } 2060} 2061 2062static bool 2063has_alphamap(PicturePtr p) 2064{ 2065 return p->alphaMap != NULL; 2066} 2067 2068static bool 2069need_upload(PicturePtr p) 2070{ 2071 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2072} 2073 2074static bool 2075source_is_busy(PixmapPtr pixmap) 2076{ 2077 struct sna_pixmap *priv = sna_pixmap(pixmap); 2078 if (priv == NULL || priv->clear) 2079 return false; 2080 2081 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2082 return true; 2083 2084 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2085 return true; 2086 2087 return priv->gpu_damage && !priv->cpu_damage; 2088} 2089 2090static bool 2091source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2092{ 2093 if (sna_picture_is_solid(p, NULL)) 2094 return false; 2095 2096 if (p->pSourcePict) 2097 return check_gradient(p, precise); 2098 2099 if (!gen8_check_repeat(p) || !gen8_check_format(p->format)) 2100 return true; 2101 2102 if (pixmap && source_is_busy(pixmap)) 2103 return false; 2104 2105 return has_alphamap(p) || !gen8_check_filter(p) || need_upload(p); 2106} 2107 2108static bool 2109gen8_composite_fallback(struct sna *sna, 2110 PicturePtr src, 2111 PicturePtr mask, 2112 PicturePtr dst) 2113{ 2114 PixmapPtr src_pixmap; 2115 PixmapPtr mask_pixmap; 2116 PixmapPtr dst_pixmap; 2117 bool src_fallback, mask_fallback; 2118 2119 if (!gen8_check_dst_format(dst->format)) { 2120 DBG(("%s: unknown destination format: %d\n", 2121 __FUNCTION__, dst->format)); 2122 return true; 2123 } 2124 2125 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2126 2127 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2128 src_fallback = source_fallback(src, src_pixmap, 2129 dst->polyMode == PolyModePrecise); 2130 2131 if (mask) { 2132 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2133 mask_fallback = source_fallback(mask, mask_pixmap, 2134 dst->polyMode == PolyModePrecise); 2135 } else { 2136 mask_pixmap = NULL; 2137 mask_fallback = false; 2138 } 2139 2140 /* If we are using the destination as a source and need to 2141 * readback in order to upload the source, do it all 2142 * on the cpu. 2143 */ 2144 if (src_pixmap == dst_pixmap && src_fallback) { 2145 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2146 return true; 2147 } 2148 if (mask_pixmap == dst_pixmap && mask_fallback) { 2149 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2150 return true; 2151 } 2152 2153 /* If anything is on the GPU, push everything out to the GPU */ 2154 if (dst_use_gpu(dst_pixmap)) { 2155 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2156 __FUNCTION__)); 2157 return false; 2158 } 2159 2160 if (src_pixmap && !src_fallback) { 2161 DBG(("%s: src is already on the GPU, try to use GPU\n", 2162 __FUNCTION__)); 2163 return false; 2164 } 2165 if (mask_pixmap && !mask_fallback) { 2166 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2167 __FUNCTION__)); 2168 return false; 2169 } 2170 2171 /* However if the dst is not on the GPU and we need to 2172 * render one of the sources using the CPU, we may 2173 * as well do the entire operation in place onthe CPU. 2174 */ 2175 if (src_fallback) { 2176 DBG(("%s: dst is on the CPU and src will fallback\n", 2177 __FUNCTION__)); 2178 return true; 2179 } 2180 2181 if (mask && mask_fallback) { 2182 DBG(("%s: dst is on the CPU and mask will fallback\n", 2183 __FUNCTION__)); 2184 return true; 2185 } 2186 2187 if (too_large(dst_pixmap->drawable.width, 2188 dst_pixmap->drawable.height) && 2189 dst_is_cpu(dst_pixmap)) { 2190 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2191 return true; 2192 } 2193 2194 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2195 __FUNCTION__)); 2196 return dst_use_cpu(dst_pixmap); 2197} 2198 2199static int 2200reuse_source(struct sna *sna, 2201 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2202 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2203{ 2204 uint32_t color; 2205 2206 if (src_x != msk_x || src_y != msk_y) 2207 return false; 2208 2209 if (src == mask) { 2210 DBG(("%s: mask is source\n", __FUNCTION__)); 2211 *mc = *sc; 2212 mc->bo = kgem_bo_reference(mc->bo); 2213 return true; 2214 } 2215 2216 if (sna_picture_is_solid(mask, &color)) 2217 return gen4_channel_init_solid(sna, mc, color); 2218 2219 if (sc->is_solid) 2220 return false; 2221 2222 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2223 return false; 2224 2225 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2226 2227 if (!sna_transform_equal(src->transform, mask->transform)) 2228 return false; 2229 2230 if (!sna_picture_alphamap_equal(src, mask)) 2231 return false; 2232 2233 if (!gen8_check_repeat(mask)) 2234 return false; 2235 2236 if (!gen8_check_filter(mask)) 2237 return false; 2238 2239 if (!gen8_check_format(mask->format)) 2240 return false; 2241 2242 DBG(("%s: reusing source channel for mask with a twist\n", 2243 __FUNCTION__)); 2244 2245 *mc = *sc; 2246 mc->repeat = gen8_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2247 mc->filter = gen8_filter(mask->filter); 2248 mc->pict_format = mask->format; 2249 mc->card_format = gen8_get_card_format(mask->format); 2250 mc->bo = kgem_bo_reference(mc->bo); 2251 return true; 2252} 2253 2254static bool 2255gen8_render_composite(struct sna *sna, 2256 uint8_t op, 2257 PicturePtr src, 2258 PicturePtr mask, 2259 PicturePtr dst, 2260 int16_t src_x, int16_t src_y, 2261 int16_t msk_x, int16_t msk_y, 2262 int16_t dst_x, int16_t dst_y, 2263 int16_t width, int16_t height, 2264 unsigned flags, 2265 struct sna_composite_op *tmp) 2266{ 2267 if (op >= ARRAY_SIZE(gen8_blend_op)) 2268 return false; 2269 2270 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2271 width, height, sna->kgem.mode, sna->kgem.ring)); 2272 2273 if (mask == NULL && 2274 try_blt(sna, dst, src, width, height) && 2275 sna_blt_composite(sna, op, 2276 src, dst, 2277 src_x, src_y, 2278 dst_x, dst_y, 2279 width, height, 2280 flags, tmp)) 2281 return true; 2282 2283 if (gen8_composite_fallback(sna, src, mask, dst)) 2284 goto fallback; 2285 2286 if (need_tiling(sna, width, height)) 2287 return sna_tiling_composite(op, src, mask, dst, 2288 src_x, src_y, 2289 msk_x, msk_y, 2290 dst_x, dst_y, 2291 width, height, 2292 tmp); 2293 2294 if (op == PictOpClear && src == sna->clear) 2295 op = PictOpSrc; 2296 tmp->op = op; 2297 if (!gen8_composite_set_target(sna, tmp, dst, 2298 dst_x, dst_y, width, height, 2299 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2300 goto fallback; 2301 2302 switch (gen8_composite_picture(sna, src, &tmp->src, 2303 src_x, src_y, 2304 width, height, 2305 dst_x, dst_y, 2306 dst->polyMode == PolyModePrecise)) { 2307 case -1: 2308 goto cleanup_dst; 2309 case 0: 2310 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2311 goto cleanup_dst; 2312 /* fall through to fixup */ 2313 case 1: 2314 /* Did we just switch rings to prepare the source? */ 2315 if (mask == NULL && 2316 (prefer_blt_composite(sna, tmp) || 2317 unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2318 sna_blt_composite__convert(sna, 2319 dst_x, dst_y, width, height, 2320 tmp)) 2321 return true; 2322 2323 if (!gen8_composite_channel_convert(&tmp->src)) 2324 goto cleanup_src; 2325 2326 break; 2327 } 2328 2329 tmp->is_affine = tmp->src.is_affine; 2330 tmp->has_component_alpha = false; 2331 tmp->need_magic_ca_pass = false; 2332 2333 tmp->mask.bo = NULL; 2334 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2335 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2336 2337 if (mask) { 2338 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2339 tmp->has_component_alpha = true; 2340 2341 /* Check if it's component alpha that relies on a source alpha and on 2342 * the source value. We can only get one of those into the single 2343 * source value that we get to blend with. 2344 */ 2345 if (gen8_blend_op[op].src_alpha && 2346 (gen8_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2347 if (op != PictOpOver) 2348 goto cleanup_src; 2349 2350 tmp->need_magic_ca_pass = true; 2351 tmp->op = PictOpOutReverse; 2352 } 2353 } 2354 2355 if (!reuse_source(sna, 2356 src, &tmp->src, src_x, src_y, 2357 mask, &tmp->mask, msk_x, msk_y)) { 2358 switch (gen8_composite_picture(sna, mask, &tmp->mask, 2359 msk_x, msk_y, 2360 width, height, 2361 dst_x, dst_y, 2362 dst->polyMode == PolyModePrecise)) { 2363 case -1: 2364 goto cleanup_src; 2365 case 0: 2366 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2367 goto cleanup_src; 2368 /* fall through to fixup */ 2369 case 1: 2370 if (!gen8_composite_channel_convert(&tmp->mask)) 2371 goto cleanup_mask; 2372 break; 2373 } 2374 } 2375 2376 tmp->is_affine &= tmp->mask.is_affine; 2377 } 2378 2379 tmp->u.gen8.flags = 2380 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2381 tmp->src.repeat, 2382 tmp->mask.filter, 2383 tmp->mask.repeat), 2384 gen8_get_blend(tmp->op, 2385 tmp->has_component_alpha, 2386 tmp->dst.format), 2387 gen8_choose_composite_kernel(tmp->op, 2388 tmp->mask.bo != NULL, 2389 tmp->has_component_alpha, 2390 tmp->is_affine), 2391 gen4_choose_composite_emitter(sna, tmp)); 2392 2393 tmp->blt = gen8_render_composite_blt; 2394 tmp->box = gen8_render_composite_box; 2395 tmp->boxes = gen8_render_composite_boxes__blt; 2396 if (tmp->emit_boxes){ 2397 tmp->boxes = gen8_render_composite_boxes; 2398 tmp->thread_boxes = gen8_render_composite_boxes__thread; 2399 } 2400 tmp->done = gen8_render_composite_done; 2401 2402 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2403 if (!kgem_check_bo(&sna->kgem, 2404 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2405 NULL)) { 2406 kgem_submit(&sna->kgem); 2407 if (!kgem_check_bo(&sna->kgem, 2408 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2409 NULL)) 2410 goto cleanup_mask; 2411 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2412 } 2413 2414 gen8_align_vertex(sna, tmp); 2415 gen8_emit_composite_state(sna, tmp); 2416 return true; 2417 2418cleanup_mask: 2419 if (tmp->mask.bo) { 2420 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2421 tmp->mask.bo = NULL; 2422 } 2423cleanup_src: 2424 if (tmp->src.bo) { 2425 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2426 tmp->src.bo = NULL; 2427 } 2428cleanup_dst: 2429 if (tmp->redirect.real_bo) { 2430 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2431 tmp->redirect.real_bo = NULL; 2432 } 2433fallback: 2434 return (mask == NULL && 2435 sna_blt_composite(sna, op, 2436 src, dst, 2437 src_x, src_y, 2438 dst_x, dst_y, 2439 width, height, 2440 flags | COMPOSITE_FALLBACK, tmp)); 2441} 2442 2443#if !NO_COMPOSITE_SPANS 2444fastcall static void 2445gen8_render_composite_spans_box(struct sna *sna, 2446 const struct sna_composite_spans_op *op, 2447 const BoxRec *box, float opacity) 2448{ 2449 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2450 __FUNCTION__, 2451 op->base.src.offset[0], op->base.src.offset[1], 2452 opacity, 2453 op->base.dst.x, op->base.dst.y, 2454 box->x1, box->y1, 2455 box->x2 - box->x1, 2456 box->y2 - box->y1)); 2457 2458 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_composite_state); 2459 op->prim_emit(sna, op, box, opacity); 2460} 2461 2462static void 2463gen8_render_composite_spans_boxes(struct sna *sna, 2464 const struct sna_composite_spans_op *op, 2465 const BoxRec *box, int nbox, 2466 float opacity) 2467{ 2468 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2469 __FUNCTION__, nbox, 2470 op->base.src.offset[0], op->base.src.offset[1], 2471 opacity, 2472 op->base.dst.x, op->base.dst.y)); 2473 2474 do { 2475 int nbox_this_time; 2476 2477 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2478 gen8_emit_composite_state); 2479 nbox -= nbox_this_time; 2480 2481 do { 2482 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2483 box->x1, box->y1, 2484 box->x2 - box->x1, 2485 box->y2 - box->y1)); 2486 2487 op->prim_emit(sna, op, box++, opacity); 2488 } while (--nbox_this_time); 2489 } while (nbox); 2490} 2491 2492fastcall static void 2493gen8_render_composite_spans_boxes__thread(struct sna *sna, 2494 const struct sna_composite_spans_op *op, 2495 const struct sna_opacity_box *box, 2496 int nbox) 2497{ 2498 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2499 __FUNCTION__, nbox, 2500 op->base.src.offset[0], op->base.src.offset[1], 2501 op->base.dst.x, op->base.dst.y)); 2502 2503 sna_vertex_lock(&sna->render); 2504 do { 2505 int nbox_this_time; 2506 float *v; 2507 2508 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2509 gen8_emit_composite_state); 2510 assert(nbox_this_time); 2511 nbox -= nbox_this_time; 2512 2513 v = sna->render.vertices + sna->render.vertex_used; 2514 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2515 2516 sna_vertex_acquire__locked(&sna->render); 2517 sna_vertex_unlock(&sna->render); 2518 2519 op->emit_boxes(op, box, nbox_this_time, v); 2520 box += nbox_this_time; 2521 2522 sna_vertex_lock(&sna->render); 2523 sna_vertex_release__locked(&sna->render); 2524 } while (nbox); 2525 sna_vertex_unlock(&sna->render); 2526} 2527 2528fastcall static void 2529gen8_render_composite_spans_done(struct sna *sna, 2530 const struct sna_composite_spans_op *op) 2531{ 2532 if (sna->render.vertex_offset) 2533 gen8_vertex_flush(sna); 2534 2535 DBG(("%s()\n", __FUNCTION__)); 2536 2537 if (op->base.src.bo) 2538 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2539 2540 sna_render_composite_redirect_done(sna, &op->base); 2541} 2542 2543static bool 2544gen8_check_composite_spans(struct sna *sna, 2545 uint8_t op, PicturePtr src, PicturePtr dst, 2546 int16_t width, int16_t height, unsigned flags) 2547{ 2548 if (op >= ARRAY_SIZE(gen8_blend_op)) 2549 return false; 2550 2551 if (gen8_composite_fallback(sna, src, NULL, dst)) 2552 return false; 2553 2554 if (need_tiling(sna, width, height) && 2555 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2556 DBG(("%s: fallback, tiled operation not on GPU\n", 2557 __FUNCTION__)); 2558 return false; 2559 } 2560 2561 return true; 2562} 2563 2564static bool 2565gen8_render_composite_spans(struct sna *sna, 2566 uint8_t op, 2567 PicturePtr src, 2568 PicturePtr dst, 2569 int16_t src_x, int16_t src_y, 2570 int16_t dst_x, int16_t dst_y, 2571 int16_t width, int16_t height, 2572 unsigned flags, 2573 struct sna_composite_spans_op *tmp) 2574{ 2575 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2576 width, height, flags, sna->kgem.ring)); 2577 2578 assert(gen8_check_composite_spans(sna, op, src, dst, width, height, flags)); 2579 2580 if (need_tiling(sna, width, height)) { 2581 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2582 __FUNCTION__, width, height)); 2583 return sna_tiling_composite_spans(op, src, dst, 2584 src_x, src_y, dst_x, dst_y, 2585 width, height, flags, tmp); 2586 } 2587 2588 tmp->base.op = op; 2589 if (!gen8_composite_set_target(sna, &tmp->base, dst, 2590 dst_x, dst_y, width, height, true)) 2591 return false; 2592 2593 switch (gen8_composite_picture(sna, src, &tmp->base.src, 2594 src_x, src_y, 2595 width, height, 2596 dst_x, dst_y, 2597 dst->polyMode == PolyModePrecise)) { 2598 case -1: 2599 goto cleanup_dst; 2600 case 0: 2601 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2602 goto cleanup_dst; 2603 /* fall through to fixup */ 2604 case 1: 2605 if (!gen8_composite_channel_convert(&tmp->base.src)) 2606 goto cleanup_src; 2607 break; 2608 } 2609 tmp->base.mask.bo = NULL; 2610 2611 tmp->base.is_affine = tmp->base.src.is_affine; 2612 tmp->base.need_magic_ca_pass = false; 2613 2614 tmp->base.u.gen8.flags = 2615 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2616 tmp->base.src.repeat, 2617 SAMPLER_FILTER_NEAREST, 2618 SAMPLER_EXTEND_PAD), 2619 gen8_get_blend(tmp->base.op, false, tmp->base.dst.format), 2620 GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2621 gen4_choose_spans_emitter(sna, tmp)); 2622 2623 tmp->box = gen8_render_composite_spans_box; 2624 tmp->boxes = gen8_render_composite_spans_boxes; 2625 if (tmp->emit_boxes) 2626 tmp->thread_boxes = gen8_render_composite_spans_boxes__thread; 2627 tmp->done = gen8_render_composite_spans_done; 2628 2629 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2630 if (!kgem_check_bo(&sna->kgem, 2631 tmp->base.dst.bo, tmp->base.src.bo, 2632 NULL)) { 2633 kgem_submit(&sna->kgem); 2634 if (!kgem_check_bo(&sna->kgem, 2635 tmp->base.dst.bo, tmp->base.src.bo, 2636 NULL)) 2637 goto cleanup_src; 2638 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2639 } 2640 2641 gen8_align_vertex(sna, &tmp->base); 2642 gen8_emit_composite_state(sna, &tmp->base); 2643 return true; 2644 2645cleanup_src: 2646 if (tmp->base.src.bo) 2647 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2648cleanup_dst: 2649 if (tmp->base.redirect.real_bo) 2650 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2651 return false; 2652} 2653#endif 2654 2655static void 2656gen8_emit_copy_state(struct sna *sna, 2657 const struct sna_composite_op *op) 2658{ 2659 uint32_t *binding_table; 2660 uint16_t offset, dirty; 2661 2662 gen8_get_batch(sna, op); 2663 2664 binding_table = gen8_composite_get_binding_table(sna, &offset); 2665 2666 dirty = kgem_bo_is_dirty(op->dst.bo); 2667 2668 binding_table[0] = 2669 gen8_bind_bo(sna, 2670 op->dst.bo, op->dst.width, op->dst.height, 2671 gen8_get_dest_format(op->dst.format), 2672 true); 2673 binding_table[1] = 2674 gen8_bind_bo(sna, 2675 op->src.bo, op->src.width, op->src.height, 2676 op->src.card_format, 2677 false); 2678 2679 if (sna->kgem.surface == offset && 2680 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 2681 sna->kgem.surface += SURFACE_DW; 2682 offset = sna->render_state.gen8.surface_table; 2683 } 2684 2685 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 2686 dirty = 0; 2687 2688 assert(!GEN8_READS_DST(op->u.gen8.flags)); 2689 gen8_emit_state(sna, op, offset | dirty); 2690} 2691 2692static inline bool 2693prefer_blt_copy(struct sna *sna, 2694 struct kgem_bo *src_bo, 2695 struct kgem_bo *dst_bo, 2696 unsigned flags) 2697{ 2698 if (sna->kgem.mode == KGEM_BLT) 2699 return true; 2700 2701 assert((flags & COPY_SYNC) == 0); 2702 2703 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2704 return true; 2705 2706 if (untiled_tlb_miss(src_bo) || 2707 untiled_tlb_miss(dst_bo)) 2708 return true; 2709 2710 if (force_blt_ring(sna)) 2711 return true; 2712 2713 if (kgem_bo_is_render(dst_bo) || 2714 kgem_bo_is_render(src_bo)) 2715 return false; 2716 2717 if (prefer_render_ring(sna, dst_bo)) 2718 return false; 2719 2720 if (!prefer_blt_ring(sna, dst_bo, flags)) 2721 return false; 2722 2723 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2724} 2725 2726static bool 2727gen8_render_copy_boxes(struct sna *sna, uint8_t alu, 2728 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2729 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2730 const BoxRec *box, int n, unsigned flags) 2731{ 2732 struct sna_composite_op tmp; 2733 BoxRec extents; 2734 2735 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2736 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2737 src_bo == dst_bo, 2738 overlaps(sna, 2739 src_bo, src_dx, src_dy, 2740 dst_bo, dst_dx, dst_dy, 2741 box, n, flags, &extents))); 2742 2743 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2744 sna_blt_compare_depth(src, dst) && 2745 sna_blt_copy_boxes(sna, alu, 2746 src_bo, src_dx, src_dy, 2747 dst_bo, dst_dx, dst_dy, 2748 dst->bitsPerPixel, 2749 box, n)) 2750 return true; 2751 2752 if (!(alu == GXcopy || alu == GXclear) || 2753 unaligned(src_bo, src->bitsPerPixel) || 2754 unaligned(dst_bo, dst->bitsPerPixel)) { 2755fallback_blt: 2756 DBG(("%s: fallback blt\n", __FUNCTION__)); 2757 if (!sna_blt_compare_depth(src, dst)) 2758 return false; 2759 2760 return sna_blt_copy_boxes_fallback(sna, alu, 2761 src, src_bo, src_dx, src_dy, 2762 dst, dst_bo, dst_dx, dst_dy, 2763 box, n); 2764 } 2765 2766 if (overlaps(sna, 2767 src_bo, src_dx, src_dy, 2768 dst_bo, dst_dx, dst_dy, 2769 box, n, flags, 2770 &extents)) { 2771 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2772 2773 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2774 sna_blt_copy_boxes(sna, alu, 2775 src_bo, src_dx, src_dy, 2776 dst_bo, dst_dx, dst_dy, 2777 dst->bitsPerPixel, 2778 box, n)) 2779 return true; 2780 2781 if (big) 2782 goto fallback_blt; 2783 2784 assert(src_bo == dst_bo); 2785 assert(src->depth == dst->depth); 2786 assert(src->width == dst->width); 2787 assert(src->height == dst->height); 2788 return sna_render_copy_boxes__overlap(sna, alu, 2789 src, src_bo, 2790 src_dx, src_dy, 2791 dst_dx, dst_dy, 2792 box, n, &extents); 2793 } 2794 2795 if (dst->depth == src->depth) { 2796 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2797 tmp.src.pict_format = tmp.dst.format; 2798 } else { 2799 tmp.dst.format = sna_format_for_depth(dst->depth); 2800 tmp.src.pict_format = sna_format_for_depth(src->depth); 2801 } 2802 if (!gen8_check_format(tmp.src.pict_format)) 2803 goto fallback_blt; 2804 2805 tmp.dst.pixmap = (PixmapPtr)dst; 2806 tmp.dst.width = dst->width; 2807 tmp.dst.height = dst->height; 2808 tmp.dst.bo = dst_bo; 2809 tmp.dst.x = tmp.dst.y = 0; 2810 tmp.damage = NULL; 2811 2812 sna_render_composite_redirect_init(&tmp); 2813 if (too_large(tmp.dst.width, tmp.dst.height)) { 2814 int i; 2815 2816 extents = box[0]; 2817 for (i = 1; i < n; i++) { 2818 if (box[i].x1 < extents.x1) 2819 extents.x1 = box[i].x1; 2820 if (box[i].y1 < extents.y1) 2821 extents.y1 = box[i].y1; 2822 2823 if (box[i].x2 > extents.x2) 2824 extents.x2 = box[i].x2; 2825 if (box[i].y2 > extents.y2) 2826 extents.y2 = box[i].y2; 2827 } 2828 2829 if (!sna_render_composite_redirect(sna, &tmp, 2830 extents.x1 + dst_dx, 2831 extents.y1 + dst_dy, 2832 extents.x2 - extents.x1, 2833 extents.y2 - extents.y1, 2834 n > 1)) 2835 goto fallback_tiled; 2836 } 2837 2838 tmp.src.card_format = gen8_get_card_format(tmp.src.pict_format); 2839 if (too_large(src->width, src->height)) { 2840 int i; 2841 2842 extents = box[0]; 2843 for (i = 1; i < n; i++) { 2844 if (box[i].x1 < extents.x1) 2845 extents.x1 = box[i].x1; 2846 if (box[i].y1 < extents.y1) 2847 extents.y1 = box[i].y1; 2848 2849 if (box[i].x2 > extents.x2) 2850 extents.x2 = box[i].x2; 2851 if (box[i].y2 > extents.y2) 2852 extents.y2 = box[i].y2; 2853 } 2854 2855 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2856 extents.x1 + src_dx, 2857 extents.y1 + src_dy, 2858 extents.x2 - extents.x1, 2859 extents.y2 - extents.y1)) 2860 goto fallback_tiled_dst; 2861 } else { 2862 tmp.src.bo = src_bo; 2863 tmp.src.width = src->width; 2864 tmp.src.height = src->height; 2865 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2866 } 2867 2868 tmp.mask.bo = NULL; 2869 2870 tmp.floats_per_vertex = 2; 2871 tmp.floats_per_rect = 6; 2872 tmp.need_magic_ca_pass = 0; 2873 2874 tmp.u.gen8.flags = COPY_FLAGS(alu); 2875 2876 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2877 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2878 kgem_submit(&sna->kgem); 2879 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2880 if (tmp.src.bo != src_bo) 2881 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2882 if (tmp.redirect.real_bo) 2883 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2884 goto fallback_blt; 2885 } 2886 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2887 } 2888 2889 src_dx += tmp.src.offset[0]; 2890 src_dy += tmp.src.offset[1]; 2891 2892 dst_dx += tmp.dst.x; 2893 dst_dy += tmp.dst.y; 2894 2895 tmp.dst.x = tmp.dst.y = 0; 2896 2897 gen8_align_vertex(sna, &tmp); 2898 gen8_emit_copy_state(sna, &tmp); 2899 2900 do { 2901 int16_t *v; 2902 int n_this_time; 2903 2904 n_this_time = gen8_get_rectangles(sna, &tmp, n, 2905 gen8_emit_copy_state); 2906 n -= n_this_time; 2907 2908 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2909 sna->render.vertex_used += 6 * n_this_time; 2910 assert(sna->render.vertex_used <= sna->render.vertex_size); 2911 do { 2912 2913 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2914 box->x1 + src_dx, box->y1 + src_dy, 2915 box->x1 + dst_dx, box->y1 + dst_dy, 2916 box->x2 - box->x1, box->y2 - box->y1)); 2917 v[0] = box->x2 + dst_dx; 2918 v[2] = box->x2 + src_dx; 2919 v[1] = v[5] = box->y2 + dst_dy; 2920 v[3] = v[7] = box->y2 + src_dy; 2921 v[8] = v[4] = box->x1 + dst_dx; 2922 v[10] = v[6] = box->x1 + src_dx; 2923 v[9] = box->y1 + dst_dy; 2924 v[11] = box->y1 + src_dy; 2925 v += 12; box++; 2926 } while (--n_this_time); 2927 } while (n); 2928 2929 gen8_vertex_flush(sna); 2930 sna_render_composite_redirect_done(sna, &tmp); 2931 if (tmp.src.bo != src_bo) 2932 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2933 return true; 2934 2935fallback_tiled_dst: 2936 if (tmp.redirect.real_bo) 2937 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2938fallback_tiled: 2939 DBG(("%s: fallback tiled\n", __FUNCTION__)); 2940 if (sna_blt_compare_depth(src, dst) && 2941 sna_blt_copy_boxes(sna, alu, 2942 src_bo, src_dx, src_dy, 2943 dst_bo, dst_dx, dst_dy, 2944 dst->bitsPerPixel, 2945 box, n)) 2946 return true; 2947 2948 return sna_tiling_copy_boxes(sna, alu, 2949 src, src_bo, src_dx, src_dy, 2950 dst, dst_bo, dst_dx, dst_dy, 2951 box, n); 2952} 2953 2954static void 2955gen8_render_copy_blt(struct sna *sna, 2956 const struct sna_copy_op *op, 2957 int16_t sx, int16_t sy, 2958 int16_t w, int16_t h, 2959 int16_t dx, int16_t dy) 2960{ 2961 int16_t *v; 2962 2963 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_copy_state); 2964 2965 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2966 sna->render.vertex_used += 6; 2967 assert(sna->render.vertex_used <= sna->render.vertex_size); 2968 2969 v[0] = dx+w; v[1] = dy+h; 2970 v[2] = sx+w; v[3] = sy+h; 2971 v[4] = dx; v[5] = dy+h; 2972 v[6] = sx; v[7] = sy+h; 2973 v[8] = dx; v[9] = dy; 2974 v[10] = sx; v[11] = sy; 2975} 2976 2977static void 2978gen8_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2979{ 2980 if (sna->render.vertex_offset) 2981 gen8_vertex_flush(sna); 2982} 2983 2984static bool 2985gen8_render_copy(struct sna *sna, uint8_t alu, 2986 PixmapPtr src, struct kgem_bo *src_bo, 2987 PixmapPtr dst, struct kgem_bo *dst_bo, 2988 struct sna_copy_op *op) 2989{ 2990 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2991 __FUNCTION__, alu, 2992 src->drawable.width, src->drawable.height, 2993 dst->drawable.width, dst->drawable.height)); 2994 2995 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2996 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2997 sna_blt_copy(sna, alu, 2998 src_bo, dst_bo, 2999 dst->drawable.bitsPerPixel, 3000 op)) 3001 return true; 3002 3003 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3004 too_large(src->drawable.width, src->drawable.height) || 3005 too_large(dst->drawable.width, dst->drawable.height) || 3006 unaligned(src_bo, src->drawable.bitsPerPixel) || 3007 unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3008fallback: 3009 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3010 return false; 3011 3012 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3013 dst->drawable.bitsPerPixel, 3014 op); 3015 } 3016 3017 if (dst->drawable.depth == src->drawable.depth) { 3018 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3019 op->base.src.pict_format = op->base.dst.format; 3020 } else { 3021 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3022 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3023 } 3024 if (!gen8_check_format(op->base.src.pict_format)) 3025 goto fallback; 3026 3027 op->base.dst.pixmap = dst; 3028 op->base.dst.width = dst->drawable.width; 3029 op->base.dst.height = dst->drawable.height; 3030 op->base.dst.bo = dst_bo; 3031 3032 op->base.src.bo = src_bo; 3033 op->base.src.card_format = 3034 gen8_get_card_format(op->base.src.pict_format); 3035 op->base.src.width = src->drawable.width; 3036 op->base.src.height = src->drawable.height; 3037 3038 op->base.mask.bo = NULL; 3039 3040 op->base.floats_per_vertex = 2; 3041 op->base.floats_per_rect = 6; 3042 3043 op->base.u.gen8.flags = COPY_FLAGS(alu); 3044 3045 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3046 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3047 kgem_submit(&sna->kgem); 3048 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3049 goto fallback; 3050 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3051 } 3052 3053 gen8_align_vertex(sna, &op->base); 3054 gen8_emit_copy_state(sna, &op->base); 3055 3056 op->blt = gen8_render_copy_blt; 3057 op->done = gen8_render_copy_done; 3058 return true; 3059} 3060 3061static void 3062gen8_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3063{ 3064 uint32_t *binding_table; 3065 uint16_t offset, dirty; 3066 3067 /* XXX Render Target Fast Clear 3068 * Set RTFC Enable in PS and render a rectangle. 3069 * Limited to a clearing the full MSC surface only with a 3070 * specific kernel. 3071 */ 3072 3073 gen8_get_batch(sna, op); 3074 3075 binding_table = gen8_composite_get_binding_table(sna, &offset); 3076 3077 dirty = kgem_bo_is_dirty(op->dst.bo); 3078 3079 binding_table[0] = 3080 gen8_bind_bo(sna, 3081 op->dst.bo, op->dst.width, op->dst.height, 3082 gen8_get_dest_format(op->dst.format), 3083 true); 3084 binding_table[1] = 3085 gen8_bind_bo(sna, 3086 op->src.bo, 1, 1, 3087 SURFACEFORMAT_B8G8R8A8_UNORM, 3088 false); 3089 3090 if (sna->kgem.surface == offset && 3091 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 3092 sna->kgem.surface += SURFACE_DW; 3093 offset = sna->render_state.gen8.surface_table; 3094 } 3095 3096 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 3097 dirty = 0; 3098 3099 gen8_emit_state(sna, op, offset | dirty); 3100} 3101 3102static bool 3103gen8_render_fill_boxes(struct sna *sna, 3104 CARD8 op, 3105 PictFormat format, 3106 const xRenderColor *color, 3107 const DrawableRec *dst, struct kgem_bo *dst_bo, 3108 const BoxRec *box, int n) 3109{ 3110 struct sna_composite_op tmp; 3111 uint32_t pixel; 3112 3113 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3114 __FUNCTION__, op, 3115 color->red, color->green, color->blue, color->alpha, (int)format)); 3116 3117 if (op >= ARRAY_SIZE(gen8_blend_op)) { 3118 DBG(("%s: fallback due to unhandled blend op: %d\n", 3119 __FUNCTION__, op)); 3120 return false; 3121 } 3122 3123 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3124 !gen8_check_dst_format(format) || 3125 unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3126 uint8_t alu = GXinvalid; 3127 3128 if (op <= PictOpSrc) { 3129 pixel = 0; 3130 if (op == PictOpClear) 3131 alu = GXclear; 3132 else if (sna_get_pixel_from_rgba(&pixel, 3133 color->red, 3134 color->green, 3135 color->blue, 3136 color->alpha, 3137 format)) 3138 alu = GXcopy; 3139 } 3140 3141 if (alu != GXinvalid && 3142 sna_blt_fill_boxes(sna, alu, 3143 dst_bo, dst->bitsPerPixel, 3144 pixel, box, n)) 3145 return true; 3146 3147 if (!gen8_check_dst_format(format)) 3148 return false; 3149 } 3150 3151 if (op == PictOpClear) { 3152 pixel = 0; 3153 op = PictOpSrc; 3154 } else if (!sna_get_pixel_from_rgba(&pixel, 3155 color->red, 3156 color->green, 3157 color->blue, 3158 color->alpha, 3159 PICT_a8r8g8b8)) 3160 return false; 3161 3162 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3163 __FUNCTION__, pixel, n, 3164 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3165 3166 tmp.dst.pixmap = (PixmapPtr)dst; 3167 tmp.dst.width = dst->width; 3168 tmp.dst.height = dst->height; 3169 tmp.dst.format = format; 3170 tmp.dst.bo = dst_bo; 3171 tmp.dst.x = tmp.dst.y = 0; 3172 tmp.damage = NULL; 3173 3174 sna_render_composite_redirect_init(&tmp); 3175 if (too_large(dst->width, dst->height)) { 3176 BoxRec extents; 3177 3178 boxes_extents(box, n, &extents); 3179 if (!sna_render_composite_redirect(sna, &tmp, 3180 extents.x1, extents.y1, 3181 extents.x2 - extents.x1, 3182 extents.y2 - extents.y1, 3183 n > 1)) 3184 return sna_tiling_fill_boxes(sna, op, format, color, 3185 dst, dst_bo, box, n); 3186 } 3187 3188 tmp.src.bo = sna_render_get_solid(sna, pixel); 3189 tmp.mask.bo = NULL; 3190 3191 tmp.floats_per_vertex = 2; 3192 tmp.floats_per_rect = 6; 3193 tmp.need_magic_ca_pass = false; 3194 3195 tmp.u.gen8.flags = FILL_FLAGS(op, format); 3196 3197 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3198 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3199 kgem_submit(&sna->kgem); 3200 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3201 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3202 tmp.src.bo = NULL; 3203 3204 if (tmp.redirect.real_bo) { 3205 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3206 tmp.redirect.real_bo = NULL; 3207 } 3208 3209 return false; 3210 } 3211 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3212 } 3213 3214 gen8_align_vertex(sna, &tmp); 3215 gen8_emit_fill_state(sna, &tmp); 3216 3217 do { 3218 int n_this_time; 3219 int16_t *v; 3220 3221 n_this_time = gen8_get_rectangles(sna, &tmp, n, 3222 gen8_emit_fill_state); 3223 n -= n_this_time; 3224 3225 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3226 sna->render.vertex_used += 6 * n_this_time; 3227 assert(sna->render.vertex_used <= sna->render.vertex_size); 3228 do { 3229 DBG((" (%d, %d), (%d, %d)\n", 3230 box->x1, box->y1, box->x2, box->y2)); 3231 3232 v[0] = box->x2; 3233 v[5] = v[1] = box->y2; 3234 v[8] = v[4] = box->x1; 3235 v[9] = box->y1; 3236 v[2] = v[3] = v[7] = 1; 3237 v[6] = v[10] = v[11] = 0; 3238 v += 12; box++; 3239 } while (--n_this_time); 3240 } while (n); 3241 3242 gen8_vertex_flush(sna); 3243 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3244 sna_render_composite_redirect_done(sna, &tmp); 3245 return true; 3246} 3247 3248static void 3249gen8_render_fill_op_blt(struct sna *sna, 3250 const struct sna_fill_op *op, 3251 int16_t x, int16_t y, int16_t w, int16_t h) 3252{ 3253 int16_t *v; 3254 3255 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3256 3257 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3258 3259 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3260 sna->render.vertex_used += 6; 3261 assert(sna->render.vertex_used <= sna->render.vertex_size); 3262 3263 v[0] = x+w; 3264 v[4] = v[8] = x; 3265 v[1] = v[5] = y+h; 3266 v[9] = y; 3267 3268 v[2] = v[3] = v[7] = 1; 3269 v[6] = v[10] = v[11] = 0; 3270} 3271 3272fastcall static void 3273gen8_render_fill_op_box(struct sna *sna, 3274 const struct sna_fill_op *op, 3275 const BoxRec *box) 3276{ 3277 int16_t *v; 3278 3279 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3280 box->x1, box->y1, box->x2, box->y2)); 3281 3282 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3283 3284 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3285 sna->render.vertex_used += 6; 3286 assert(sna->render.vertex_used <= sna->render.vertex_size); 3287 3288 v[0] = box->x2; 3289 v[8] = v[4] = box->x1; 3290 v[5] = v[1] = box->y2; 3291 v[9] = box->y1; 3292 3293 v[7] = v[2] = v[3] = 1; 3294 v[6] = v[10] = v[11] = 0; 3295} 3296 3297fastcall static void 3298gen8_render_fill_op_boxes(struct sna *sna, 3299 const struct sna_fill_op *op, 3300 const BoxRec *box, 3301 int nbox) 3302{ 3303 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3304 box->x1, box->y1, box->x2, box->y2, nbox)); 3305 3306 do { 3307 int nbox_this_time; 3308 int16_t *v; 3309 3310 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 3311 gen8_emit_fill_state); 3312 nbox -= nbox_this_time; 3313 3314 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3315 sna->render.vertex_used += 6 * nbox_this_time; 3316 assert(sna->render.vertex_used <= sna->render.vertex_size); 3317 3318 do { 3319 v[0] = box->x2; 3320 v[8] = v[4] = box->x1; 3321 v[5] = v[1] = box->y2; 3322 v[9] = box->y1; 3323 v[7] = v[2] = v[3] = 1; 3324 v[6] = v[10] = v[11] = 0; 3325 box++; v += 12; 3326 } while (--nbox_this_time); 3327 } while (nbox); 3328} 3329 3330static void 3331gen8_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3332{ 3333 if (sna->render.vertex_offset) 3334 gen8_vertex_flush(sna); 3335 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3336} 3337 3338static bool 3339gen8_render_fill(struct sna *sna, uint8_t alu, 3340 PixmapPtr dst, struct kgem_bo *dst_bo, 3341 uint32_t color, unsigned flags, 3342 struct sna_fill_op *op) 3343{ 3344 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3345 3346 if (prefer_blt_fill(sna, dst_bo, flags) && 3347 sna_blt_fill(sna, alu, 3348 dst_bo, dst->drawable.bitsPerPixel, 3349 color, 3350 op)) 3351 return true; 3352 3353 if (!(alu == GXcopy || alu == GXclear) || 3354 too_large(dst->drawable.width, dst->drawable.height) || 3355 unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3356 return sna_blt_fill(sna, alu, 3357 dst_bo, dst->drawable.bitsPerPixel, 3358 color, 3359 op); 3360 3361 if (alu == GXclear) 3362 color = 0; 3363 3364 op->base.dst.pixmap = dst; 3365 op->base.dst.width = dst->drawable.width; 3366 op->base.dst.height = dst->drawable.height; 3367 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3368 op->base.dst.bo = dst_bo; 3369 op->base.dst.x = op->base.dst.y = 0; 3370 3371 op->base.src.bo = 3372 sna_render_get_solid(sna, 3373 sna_rgba_for_color(color, 3374 dst->drawable.depth)); 3375 op->base.mask.bo = NULL; 3376 3377 op->base.need_magic_ca_pass = false; 3378 op->base.floats_per_vertex = 2; 3379 op->base.floats_per_rect = 6; 3380 3381 op->base.u.gen8.flags = FILL_FLAGS_NOBLEND; 3382 3383 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3384 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3385 kgem_submit(&sna->kgem); 3386 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3387 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3388 return false; 3389 } 3390 3391 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3392 } 3393 3394 gen8_align_vertex(sna, &op->base); 3395 gen8_emit_fill_state(sna, &op->base); 3396 3397 op->blt = gen8_render_fill_op_blt; 3398 op->box = gen8_render_fill_op_box; 3399 op->boxes = gen8_render_fill_op_boxes; 3400 op->points = NULL; 3401 op->done = gen8_render_fill_op_done; 3402 return true; 3403} 3404 3405static bool 3406gen8_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3407 uint32_t color, 3408 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3409 uint8_t alu) 3410{ 3411 BoxRec box; 3412 3413 box.x1 = x1; 3414 box.y1 = y1; 3415 box.x2 = x2; 3416 box.y2 = y2; 3417 3418 return sna_blt_fill_boxes(sna, alu, 3419 bo, dst->drawable.bitsPerPixel, 3420 color, &box, 1); 3421} 3422 3423static bool 3424gen8_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3425 uint32_t color, 3426 int16_t x1, int16_t y1, 3427 int16_t x2, int16_t y2, 3428 uint8_t alu) 3429{ 3430 struct sna_composite_op tmp; 3431 int16_t *v; 3432 3433 /* Prefer to use the BLT if already engaged */ 3434 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3435 gen8_render_fill_one_try_blt(sna, dst, bo, color, 3436 x1, y1, x2, y2, alu)) 3437 return true; 3438 3439 /* Must use the BLT if we can't RENDER... */ 3440 if (!(alu == GXcopy || alu == GXclear) || 3441 too_large(dst->drawable.width, dst->drawable.height) || 3442 unaligned(bo, dst->drawable.bitsPerPixel)) 3443 return gen8_render_fill_one_try_blt(sna, dst, bo, color, 3444 x1, y1, x2, y2, alu); 3445 3446 if (alu == GXclear) 3447 color = 0; 3448 3449 tmp.dst.pixmap = dst; 3450 tmp.dst.width = dst->drawable.width; 3451 tmp.dst.height = dst->drawable.height; 3452 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3453 tmp.dst.bo = bo; 3454 tmp.dst.x = tmp.dst.y = 0; 3455 3456 tmp.src.bo = 3457 sna_render_get_solid(sna, 3458 sna_rgba_for_color(color, 3459 dst->drawable.depth)); 3460 tmp.mask.bo = NULL; 3461 3462 tmp.floats_per_vertex = 2; 3463 tmp.floats_per_rect = 6; 3464 tmp.need_magic_ca_pass = false; 3465 3466 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3467 3468 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3469 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3470 kgem_submit(&sna->kgem); 3471 if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3472 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3473 return false; 3474 } 3475 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3476 } 3477 3478 gen8_align_vertex(sna, &tmp); 3479 gen8_emit_fill_state(sna, &tmp); 3480 3481 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3482 3483 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3484 3485 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3486 sna->render.vertex_used += 6; 3487 assert(sna->render.vertex_used <= sna->render.vertex_size); 3488 3489 v[0] = x2; 3490 v[8] = v[4] = x1; 3491 v[5] = v[1] = y2; 3492 v[9] = y1; 3493 v[7] = v[2] = v[3] = 1; 3494 v[6] = v[10] = v[11] = 0; 3495 3496 gen8_vertex_flush(sna); 3497 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3498 3499 return true; 3500} 3501 3502static bool 3503gen8_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3504{ 3505 BoxRec box; 3506 3507 box.x1 = 0; 3508 box.y1 = 0; 3509 box.x2 = dst->drawable.width; 3510 box.y2 = dst->drawable.height; 3511 3512 return sna_blt_fill_boxes(sna, GXclear, 3513 bo, dst->drawable.bitsPerPixel, 3514 0, &box, 1); 3515} 3516 3517static bool 3518gen8_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3519{ 3520 struct sna_composite_op tmp; 3521 int16_t *v; 3522 3523 DBG(("%s: %dx%d\n", 3524 __FUNCTION__, 3525 dst->drawable.width, 3526 dst->drawable.height)); 3527 3528 /* Prefer to use the BLT if already engaged */ 3529 if (sna->kgem.mode == KGEM_BLT && 3530 gen8_render_clear_try_blt(sna, dst, bo)) 3531 return true; 3532 3533 /* Must use the BLT if we can't RENDER... */ 3534 if (too_large(dst->drawable.width, dst->drawable.height) || 3535 unaligned(bo, dst->drawable.bitsPerPixel)) 3536 return gen8_render_clear_try_blt(sna, dst, bo); 3537 3538 tmp.dst.pixmap = dst; 3539 tmp.dst.width = dst->drawable.width; 3540 tmp.dst.height = dst->drawable.height; 3541 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3542 tmp.dst.bo = bo; 3543 tmp.dst.x = tmp.dst.y = 0; 3544 3545 tmp.src.bo = sna_render_get_solid(sna, 0); 3546 tmp.mask.bo = NULL; 3547 3548 tmp.floats_per_vertex = 2; 3549 tmp.floats_per_rect = 6; 3550 tmp.need_magic_ca_pass = false; 3551 3552 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3553 3554 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3555 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3556 kgem_submit(&sna->kgem); 3557 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3558 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3559 return false; 3560 } 3561 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3562 } 3563 3564 gen8_align_vertex(sna, &tmp); 3565 gen8_emit_fill_state(sna, &tmp); 3566 3567 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3568 3569 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3570 sna->render.vertex_used += 6; 3571 assert(sna->render.vertex_used <= sna->render.vertex_size); 3572 3573 v[0] = dst->drawable.width; 3574 v[5] = v[1] = dst->drawable.height; 3575 v[8] = v[4] = 0; 3576 v[9] = 0; 3577 3578 v[7] = v[2] = v[3] = 1; 3579 v[6] = v[10] = v[11] = 0; 3580 3581 gen8_vertex_flush(sna); 3582 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3583 3584 return true; 3585} 3586 3587#if !NO_VIDEO 3588static uint32_t gen8_bind_video_source(struct sna *sna, 3589 struct kgem_bo *bo, 3590 uint32_t delta, 3591 int width, 3592 int height, 3593 int pitch, 3594 uint32_t format) 3595{ 3596 uint32_t *ss; 3597 int offset; 3598 3599 offset = sna->kgem.surface -= SURFACE_DW; 3600 ss = sna->kgem.batch + offset; 3601 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3602 gen8_tiling_bits(bo->tiling) | 3603 format << SURFACE_FORMAT_SHIFT | 3604 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3605 ss[1] = 0; 3606 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3607 (height - 1) << SURFACE_HEIGHT_SHIFT); 3608 ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3609 ss[4] = 0; 3610 ss[5] = 0; 3611 ss[6] = 0; 3612 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3613 *(uint64_t *)(ss+8) = 3614 kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3615 I915_GEM_DOMAIN_SAMPLER << 16, 3616 delta); 3617 ss[10] = 0; 3618 ss[11] = 0; 3619 ss[12] = 0; 3620 ss[13] = 0; 3621 ss[14] = 0; 3622 ss[15] = 0; 3623 3624 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3625 offset, bo->handle, ss[1], 3626 format, width, height, bo->pitch, bo->tiling)); 3627 3628 return offset * sizeof(uint32_t); 3629} 3630 3631static void gen8_emit_video_state(struct sna *sna, 3632 const struct sna_composite_op *op) 3633{ 3634 struct sna_video_frame *frame = op->priv; 3635 uint32_t src_surf_format; 3636 uint32_t src_surf_base[6]; 3637 int src_width[6]; 3638 int src_height[6]; 3639 int src_pitch[6]; 3640 uint32_t *binding_table; 3641 uint16_t offset; 3642 int n_src, n; 3643 3644 /* XXX VeBox, bicubic */ 3645 3646 gen8_get_batch(sna, op); 3647 3648 src_surf_base[0] = 0; 3649 src_surf_base[1] = 0; 3650 src_surf_base[2] = frame->VBufOffset; 3651 src_surf_base[3] = frame->VBufOffset; 3652 src_surf_base[4] = frame->UBufOffset; 3653 src_surf_base[5] = frame->UBufOffset; 3654 3655 if (is_planar_fourcc(frame->id)) { 3656 src_surf_format = SURFACEFORMAT_R8_UNORM; 3657 src_width[1] = src_width[0] = frame->width; 3658 src_height[1] = src_height[0] = frame->height; 3659 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 3660 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 3661 frame->width / 2; 3662 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 3663 frame->height / 2; 3664 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 3665 frame->pitch[0]; 3666 n_src = 6; 3667 } else { 3668 if (frame->id == FOURCC_UYVY) 3669 src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; 3670 else 3671 src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; 3672 3673 src_width[0] = frame->width; 3674 src_height[0] = frame->height; 3675 src_pitch[0] = frame->pitch[0]; 3676 n_src = 1; 3677 } 3678 3679 binding_table = gen8_composite_get_binding_table(sna, &offset); 3680 3681 binding_table[0] = 3682 gen8_bind_bo(sna, 3683 op->dst.bo, op->dst.width, op->dst.height, 3684 gen8_get_dest_format(op->dst.format), 3685 true); 3686 for (n = 0; n < n_src; n++) { 3687 binding_table[1+n] = 3688 gen8_bind_video_source(sna, 3689 frame->bo, 3690 src_surf_base[n], 3691 src_width[n], 3692 src_height[n], 3693 src_pitch[n], 3694 src_surf_format); 3695 } 3696 3697 gen8_emit_state(sna, op, offset); 3698} 3699 3700static bool 3701gen8_render_video(struct sna *sna, 3702 struct sna_video *video, 3703 struct sna_video_frame *frame, 3704 RegionPtr dstRegion, 3705 PixmapPtr pixmap) 3706{ 3707 struct sna_composite_op tmp; 3708 struct sna_pixmap *priv = sna_pixmap(pixmap); 3709 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3710 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3711 int src_width = frame->src.x2 - frame->src.x1; 3712 int src_height = frame->src.y2 - frame->src.y1; 3713 float src_offset_x, src_offset_y; 3714 float src_scale_x, src_scale_y; 3715 int nbox, pix_xoff, pix_yoff; 3716 unsigned filter; 3717 const BoxRec *box; 3718 3719 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3720 __FUNCTION__, 3721 src_width, src_height, dst_width, dst_height, 3722 region_num_rects(dstRegion), 3723 REGION_EXTENTS(NULL, dstRegion)->x1, 3724 REGION_EXTENTS(NULL, dstRegion)->y1, 3725 REGION_EXTENTS(NULL, dstRegion)->x2, 3726 REGION_EXTENTS(NULL, dstRegion)->y2)); 3727 3728 assert(priv->gpu_bo); 3729 assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3730 assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3731 3732 memset(&tmp, 0, sizeof(tmp)); 3733 3734 tmp.dst.pixmap = pixmap; 3735 tmp.dst.width = pixmap->drawable.width; 3736 tmp.dst.height = pixmap->drawable.height; 3737 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3738 tmp.dst.bo = priv->gpu_bo; 3739 3740 tmp.src.bo = frame->bo; 3741 tmp.mask.bo = NULL; 3742 3743 tmp.floats_per_vertex = 3; 3744 tmp.floats_per_rect = 9; 3745 3746 if (src_width == dst_width && src_height == dst_height) 3747 filter = SAMPLER_FILTER_NEAREST; 3748 else 3749 filter = SAMPLER_FILTER_BILINEAR; 3750 3751 tmp.u.gen8.flags = 3752 GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3753 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3754 NO_BLEND, 3755 is_planar_fourcc(frame->id) ? 3756 GEN8_WM_KERNEL_VIDEO_PLANAR : 3757 GEN8_WM_KERNEL_VIDEO_PACKED, 3758 2); 3759 tmp.priv = frame; 3760 3761 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3762 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 3763 kgem_submit(&sna->kgem); 3764 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 3765 return false; 3766 3767 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3768 } 3769 3770 gen8_align_vertex(sna, &tmp); 3771 gen8_emit_video_state(sna, &tmp); 3772 3773 /* Set up the offset for translating from the given region (in screen 3774 * coordinates) to the backing pixmap. 3775 */ 3776#ifdef COMPOSITE 3777 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 3778 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 3779#else 3780 pix_xoff = 0; 3781 pix_yoff = 0; 3782#endif 3783 3784 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 3785 __FUNCTION__, 3786 frame->src.x1, frame->src.y1, 3787 src_width, src_height, 3788 dst_width, dst_height, 3789 frame->width, frame->height)); 3790 3791 src_scale_x = (float)src_width / dst_width / frame->width; 3792 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 3793 3794 src_scale_y = (float)src_height / dst_height / frame->height; 3795 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 3796 3797 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 3798 __FUNCTION__, 3799 src_scale_x, src_scale_y, 3800 src_offset_x, src_offset_y)); 3801 3802 box = region_rects(dstRegion); 3803 nbox = region_num_rects(dstRegion); 3804 while (nbox--) { 3805 BoxRec r; 3806 3807 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 3808 __FUNCTION__, 3809 box->x1, box->y1, 3810 box->x2, box->y2, 3811 pix_xoff, pix_yoff, 3812 box->x1 * src_scale_x + src_offset_x, 3813 box->y1 * src_scale_y + src_offset_y, 3814 box->x2 * src_scale_x + src_offset_x, 3815 box->y2 * src_scale_y + src_offset_y)); 3816 3817 r.x1 = box->x1 + pix_xoff; 3818 r.x2 = box->x2 + pix_xoff; 3819 r.y1 = box->y1 + pix_yoff; 3820 r.y2 = box->y2 + pix_yoff; 3821 3822 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); 3823 3824 OUT_VERTEX(r.x2, r.y2); 3825 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 3826 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3827 3828 OUT_VERTEX(r.x1, r.y2); 3829 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3830 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3831 3832 OUT_VERTEX(r.x1, r.y1); 3833 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3834 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 3835 3836 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 3837 sna_damage_add_box(&priv->gpu_damage, &r); 3838 sna_damage_subtract_box(&priv->cpu_damage, &r); 3839 } 3840 box++; 3841 } 3842 3843 gen8_vertex_flush(sna); 3844 return true; 3845} 3846#endif 3847 3848static void gen8_render_flush(struct sna *sna) 3849{ 3850 gen8_vertex_close(sna); 3851 3852 assert(sna->render.vb_id == 0); 3853 assert(sna->render.vertex_offset == 0); 3854} 3855 3856static void gen8_render_reset(struct sna *sna) 3857{ 3858 sna->render_state.gen8.emit_flush = false; 3859 sna->render_state.gen8.needs_invariant = true; 3860 sna->render_state.gen8.ve_id = 3 << 2; 3861 sna->render_state.gen8.last_primitive = -1; 3862 3863 sna->render_state.gen8.num_sf_outputs = 0; 3864 sna->render_state.gen8.samplers = -1; 3865 sna->render_state.gen8.blend = -1; 3866 sna->render_state.gen8.kernel = -1; 3867 sna->render_state.gen8.drawrect_offset = -1; 3868 sna->render_state.gen8.drawrect_limit = -1; 3869 sna->render_state.gen8.surface_table = 0; 3870 3871 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3872 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3873 discard_vbo(sna); 3874 } 3875 3876 sna->render.vertex_offset = 0; 3877 sna->render.nvertex_reloc = 0; 3878 sna->render.vb_id = 0; 3879} 3880 3881static void gen8_render_fini(struct sna *sna) 3882{ 3883 kgem_bo_destroy(&sna->kgem, sna->render_state.gen8.general_bo); 3884} 3885 3886static bool gen8_render_setup(struct sna *sna) 3887{ 3888 struct gen8_render_state *state = &sna->render_state.gen8; 3889 struct sna_static_stream general; 3890 struct gen8_sampler_state *ss; 3891 int i, j, k, l, m; 3892 uint32_t devid; 3893 3894 devid = intel_get_device_id(sna->dev); 3895 if (devid & 0xf) 3896 state->gt = ((devid >> 4) & 0xf) + 1; 3897 DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 3898 3899 sna_static_stream_init(&general); 3900 3901 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3902 * dumps, you know it points to zero. 3903 */ 3904 null_create(&general); 3905 3906 for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 3907 if (wm_kernels[m].size) { 3908 state->wm_kernel[m][1] = 3909 sna_static_stream_add(&general, 3910 wm_kernels[m].data, 3911 wm_kernels[m].size, 3912 64); 3913 } else { 3914 if (USE_8_PIXEL_DISPATCH) { 3915 state->wm_kernel[m][0] = 3916 sna_static_stream_compile_wm(sna, &general, 3917 wm_kernels[m].data, 8); 3918 } 3919 3920 if (USE_16_PIXEL_DISPATCH) { 3921 state->wm_kernel[m][1] = 3922 sna_static_stream_compile_wm(sna, &general, 3923 wm_kernels[m].data, 16); 3924 } 3925 3926 if (USE_32_PIXEL_DISPATCH) { 3927 state->wm_kernel[m][2] = 3928 sna_static_stream_compile_wm(sna, &general, 3929 wm_kernels[m].data, 32); 3930 } 3931 } 3932 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3933 } 3934 3935 COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 3936 ss = sna_static_stream_map(&general, 3937 2 * sizeof(*ss) * 3938 (2 + 3939 FILTER_COUNT * EXTEND_COUNT * 3940 FILTER_COUNT * EXTEND_COUNT), 3941 32); 3942 state->wm_state = sna_static_stream_offsetof(&general, ss); 3943 sampler_copy_init(ss); ss += 2; 3944 sampler_fill_init(ss); ss += 2; 3945 for (i = 0; i < FILTER_COUNT; i++) { 3946 for (j = 0; j < EXTEND_COUNT; j++) { 3947 for (k = 0; k < FILTER_COUNT; k++) { 3948 for (l = 0; l < EXTEND_COUNT; l++) { 3949 sampler_state_init(ss++, i, j); 3950 sampler_state_init(ss++, k, l); 3951 } 3952 } 3953 } 3954 } 3955 3956 state->cc_blend = gen8_create_blend_state(&general); 3957 3958 state->general_bo = sna_static_stream_fini(sna, &general); 3959 return state->general_bo != NULL; 3960} 3961 3962const char *gen8_render_init(struct sna *sna, const char *backend) 3963{ 3964 if (!gen8_render_setup(sna)) 3965 return backend; 3966 3967 sna->kgem.context_switch = gen6_render_context_switch; 3968 sna->kgem.retire = gen6_render_retire; 3969 sna->kgem.expire = gen4_render_expire; 3970 3971#if !NO_COMPOSITE 3972 sna->render.composite = gen8_render_composite; 3973 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3974#endif 3975#if !NO_COMPOSITE_SPANS 3976 sna->render.check_composite_spans = gen8_check_composite_spans; 3977 sna->render.composite_spans = gen8_render_composite_spans; 3978 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3979#endif 3980#if !NO_VIDEO 3981 sna->render.video = gen8_render_video; 3982#endif 3983 3984#if !NO_COPY_BOXES 3985 sna->render.copy_boxes = gen8_render_copy_boxes; 3986#endif 3987#if !NO_COPY 3988 sna->render.copy = gen8_render_copy; 3989#endif 3990 3991#if !NO_FILL_BOXES 3992 sna->render.fill_boxes = gen8_render_fill_boxes; 3993#endif 3994#if !NO_FILL 3995 sna->render.fill = gen8_render_fill; 3996#endif 3997#if !NO_FILL_ONE 3998 sna->render.fill_one = gen8_render_fill_one; 3999#endif 4000#if !NO_FILL_CLEAR 4001 sna->render.clear = gen8_render_clear; 4002#endif 4003 4004 sna->render.flush = gen8_render_flush; 4005 sna->render.reset = gen8_render_reset; 4006 sna->render.fini = gen8_render_fini; 4007 4008 sna->render.max_3d_size = GEN8_MAX_SIZE; 4009 sna->render.max_3d_pitch = 1 << 18; 4010 return "Broadwell"; 4011} 4012