1/* 2 * Copyright © 2012,2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34#include "sna_render.h" 35#include "sna_render_inline.h" 36#include "sna_video.h" 37 38#include "gen8_render.h" 39#include "gen8_eu.h" 40#include "gen4_common.h" 41#include "gen4_source.h" 42#include "gen4_vertex.h" 43#include "gen6_common.h" 44#include "gen8_vertex.h" 45 46#define SIM 1 47 48#define ALWAYS_INVALIDATE 0 49#define ALWAYS_FLUSH 0 50#define ALWAYS_STALL 0 51 52#define NO_COMPOSITE 0 53#define NO_COMPOSITE_SPANS 0 54#define NO_COPY 0 55#define NO_COPY_BOXES 0 56#define NO_FILL 0 57#define NO_FILL_BOXES 0 58#define NO_FILL_ONE 0 59#define NO_FILL_CLEAR 0 60#define NO_VIDEO 0 61 62#define USE_8_PIXEL_DISPATCH 1 63#define USE_16_PIXEL_DISPATCH 1 64#define USE_32_PIXEL_DISPATCH 0 65 66#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67#error "Must select at least 8, 16 or 32 pixel dispatch" 68#endif 69 70#define GEN8_MAX_SIZE 16384 71 72/* XXX Todo 73 * 74 * STR (software tiled rendering) mode. No, really. 75 * 64x32 pixel blocks align with the rendering cache. Worth considering. 76 */ 77 78#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 79 80/* Pipeline stages: 81 * 1. Command Streamer (CS) 82 * 2. Vertex Fetch (VF) 83 * 3. Vertex Shader (VS) 84 * 4. Hull Shader (HS) 85 * 5. Tesselation Engine (TE) 86 * 6. Domain Shader (DS) 87 * 7. Geometry Shader (GS) 88 * 8. Stream Output Logic (SOL) 89 * 9. Clipper (CLIP) 90 * 10. Strip/Fan (SF) 91 * 11. Windower/Masker (WM) 92 * 12. Color Calculator (CC) 93 */ 94 95#if !NO_VIDEO 96static const uint32_t ps_kernel_packed_bt601[][4] = { 97#include "exa_wm_src_affine.g8b" 98#include "exa_wm_src_sample_argb.g8b" 99#include "exa_wm_yuv_rgb_bt601.g8b" 100#include "exa_wm_write.g8b" 101}; 102 103static const uint32_t ps_kernel_planar_bt601[][4] = { 104#include "exa_wm_src_affine.g8b" 105#include "exa_wm_src_sample_planar.g8b" 106#include "exa_wm_yuv_rgb_bt601.g8b" 107#include "exa_wm_write.g8b" 108}; 109 110static const uint32_t ps_kernel_nv12_bt601[][4] = { 111#include "exa_wm_src_affine.g8b" 112#include "exa_wm_src_sample_nv12.g8b" 113#include "exa_wm_yuv_rgb_bt601.g8b" 114#include "exa_wm_write.g8b" 115}; 116 117static const uint32_t ps_kernel_packed_bt709[][4] = { 118#include "exa_wm_src_affine.g8b" 119#include "exa_wm_src_sample_argb.g8b" 120#include "exa_wm_yuv_rgb_bt709.g8b" 121#include "exa_wm_write.g8b" 122}; 123 124static const uint32_t ps_kernel_planar_bt709[][4] = { 125#include "exa_wm_src_affine.g8b" 126#include "exa_wm_src_sample_planar.g8b" 127#include "exa_wm_yuv_rgb_bt709.g8b" 128#include "exa_wm_write.g8b" 129}; 130 131static const uint32_t ps_kernel_nv12_bt709[][4] = { 132#include "exa_wm_src_affine.g8b" 133#include "exa_wm_src_sample_nv12.g8b" 134#include "exa_wm_yuv_rgb_bt709.g8b" 135#include "exa_wm_write.g8b" 136}; 137 138static const uint32_t ps_kernel_rgb[][4] = { 139#include "exa_wm_src_affine.g8b" 140#include "exa_wm_src_sample_argb.g8b" 141#include "exa_wm_write.g8b" 142}; 143#endif 144 145#define SURFACE_DW (64 / sizeof(uint32_t)); 146 147#define KERNEL(kernel_enum, kernel, num_surfaces) \ 148 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 149#define NOKERNEL(kernel_enum, func, num_surfaces) \ 150 [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 151static const struct wm_kernel_info { 152 const char *name; 153 const void *data; 154 unsigned int size; 155 int num_surfaces; 156} wm_kernels[GEN8_WM_KERNEL_COUNT] = { 157 NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 158 NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 159 160 NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 161 NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 162 163 NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 164 NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 165 166 NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 167 NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 168 169 NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 170 NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 171 172#if !NO_VIDEO 173 KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7), 174 KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7), 175 KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2), 176 KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), 177 KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), 178 KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), 179 KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), 180#endif 181}; 182#undef KERNEL 183 184static const struct blendinfo { 185 uint8_t src_alpha; 186 uint8_t src_blend; 187 uint8_t dst_blend; 188} gen8_blend_op[] = { 189 /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 190 /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 191 /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 192 /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 193 /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 194 /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 195 /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 196 /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 197 /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 198 /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 199 /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 200 /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 201 /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 202}; 203 204/** 205 * Highest-valued BLENDFACTOR used in gen8_blend_op. 206 * 207 * This leaves out GEN8_BLENDFACTOR_INV_DST_COLOR, 208 * GEN8_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 209 * GEN8_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 210 */ 211#define GEN8_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 212 213#define GEN8_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen8_blend_state), 64) 214 215#define BLEND_OFFSET(s, d) \ 216 ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN8_BLENDFACTOR_COUNT + (d)) << 4) 217 218#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 219#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 220 221#define SAMPLER_OFFSET(sf, se, mf, me) \ 222 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 223 224#define VERTEX_2s2s 0 225 226#define COPY_SAMPLER 0 227#define COPY_VERTEX VERTEX_2s2s 228#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN8_WM_KERNEL_NOMASK, COPY_VERTEX) 229 230#define FILL_SAMPLER 1 231#define FILL_VERTEX VERTEX_2s2s 232#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 233#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 234 235#define GEN8_SAMPLER(f) (((f) >> 20) & 0xfff) 236#define GEN8_BLEND(f) (((f) >> 4) & 0x7ff) 237#define GEN8_READS_DST(f) (((f) >> 15) & 1) 238#define GEN8_KERNEL(f) (((f) >> 16) & 0xf) 239#define GEN8_VERTEX(f) (((f) >> 0) & 0xf) 240#define GEN8_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) 241 242#define OUT_BATCH(v) batch_emit(sna, v) 243#define OUT_BATCH64(v) batch_emit64(sna, v) 244#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 245#define OUT_VERTEX_F(v) vertex_emit(sna, v) 246 247struct gt_info { 248 const char *name; 249 struct { 250 int max_vs_entries; 251 } urb; 252}; 253 254static const struct gt_info bdw_gt_info = { 255 .name = "Broadwell (gen8)", 256 .urb = { .max_vs_entries = 960 }, 257}; 258 259static bool is_bdw(struct sna *sna) 260{ 261 return sna->kgem.gen == 0100; 262} 263 264static const struct gt_info chv_gt_info = { 265 .name = "Cherryview (gen8)", 266 .urb = { .max_vs_entries = 640 }, 267}; 268 269static bool is_chv(struct sna *sna) 270{ 271 return sna->kgem.gen == 0101; 272} 273 274static inline bool too_large(int width, int height) 275{ 276 return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; 277} 278 279static inline bool unaligned(struct kgem_bo *bo, int bpp) 280{ 281 /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 282#if 0 283 int x, y; 284 285 if (bo->proxy == NULL) 286 return false; 287 288 /* Assume that all tiled proxies are constructed correctly. */ 289 if (bo->tiling) 290 return false; 291 292 DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 293 __FUNCTION__, bo->delta, bo->pitch, bpp, 294 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 295 296 /* This may be a random userptr map, check that it meets the 297 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 298 */ 299 y = bo->delta / bo->pitch; 300 if (y & 3) 301 return true; 302 303 x = 8 * (bo->delta - y * bo->pitch); 304 if (x & (4*bpp - 1)) 305 return true; 306 307 return false; 308#else 309 return false; 310#endif 311} 312 313static uint32_t gen8_get_blend(int op, 314 bool has_component_alpha, 315 uint32_t dst_format) 316{ 317 uint32_t src, dst; 318 319 COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN8_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 320 321 src = gen8_blend_op[op].src_blend; 322 dst = gen8_blend_op[op].dst_blend; 323 324 /* If there's no dst alpha channel, adjust the blend op so that 325 * we'll treat it always as 1. 326 */ 327 if (PICT_FORMAT_A(dst_format) == 0) { 328 if (src == BLENDFACTOR_DST_ALPHA) 329 src = BLENDFACTOR_ONE; 330 else if (src == BLENDFACTOR_INV_DST_ALPHA) 331 src = BLENDFACTOR_ZERO; 332 } 333 334 /* If the source alpha is being used, then we should only be in a 335 * case where the source blend factor is 0, and the source blend 336 * value is the mask channels multiplied by the source picture's alpha. 337 */ 338 if (has_component_alpha && gen8_blend_op[op].src_alpha) { 339 if (dst == BLENDFACTOR_SRC_ALPHA) 340 dst = BLENDFACTOR_SRC_COLOR; 341 else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 342 dst = BLENDFACTOR_INV_SRC_COLOR; 343 } 344 345 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 346 op, dst_format, PICT_FORMAT_A(dst_format), 347 src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 348 assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 349 return BLEND_OFFSET(src, dst); 350} 351 352static uint32_t gen8_get_card_format(PictFormat format) 353{ 354 switch (format) { 355 default: 356 return -1; 357 case PICT_a8r8g8b8: 358 return SURFACEFORMAT_B8G8R8A8_UNORM; 359 case PICT_x8r8g8b8: 360 return SURFACEFORMAT_B8G8R8X8_UNORM; 361 case PICT_a8b8g8r8: 362 return SURFACEFORMAT_R8G8B8A8_UNORM; 363 case PICT_x8b8g8r8: 364 return SURFACEFORMAT_R8G8B8X8_UNORM; 365#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 366 case PICT_a2r10g10b10: 367 return SURFACEFORMAT_B10G10R10A2_UNORM; 368 case PICT_x2r10g10b10: 369 return SURFACEFORMAT_B10G10R10X2_UNORM; 370#endif 371 case PICT_r8g8b8: 372 return SURFACEFORMAT_R8G8B8_UNORM; 373 case PICT_r5g6b5: 374 return SURFACEFORMAT_B5G6R5_UNORM; 375 case PICT_a1r5g5b5: 376 return SURFACEFORMAT_B5G5R5A1_UNORM; 377 case PICT_a8: 378 return SURFACEFORMAT_A8_UNORM; 379 case PICT_a4r4g4b4: 380 return SURFACEFORMAT_B4G4R4A4_UNORM; 381 } 382} 383 384static uint32_t gen8_get_dest_format(PictFormat format) 385{ 386 switch (format) { 387 default: 388 return -1; 389 case PICT_a8r8g8b8: 390 case PICT_x8r8g8b8: 391 return SURFACEFORMAT_B8G8R8A8_UNORM; 392 case PICT_a8b8g8r8: 393 case PICT_x8b8g8r8: 394 return SURFACEFORMAT_R8G8B8A8_UNORM; 395#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 396 case PICT_a2r10g10b10: 397 case PICT_x2r10g10b10: 398 return SURFACEFORMAT_B10G10R10A2_UNORM; 399#endif 400 case PICT_r5g6b5: 401 return SURFACEFORMAT_B5G6R5_UNORM; 402 case PICT_x1r5g5b5: 403 case PICT_a1r5g5b5: 404 return SURFACEFORMAT_B5G5R5A1_UNORM; 405 case PICT_a8: 406 return SURFACEFORMAT_A8_UNORM; 407 case PICT_a4r4g4b4: 408 case PICT_x4r4g4b4: 409 return SURFACEFORMAT_B4G4R4A4_UNORM; 410 } 411} 412 413static bool gen8_check_dst_format(PictFormat format) 414{ 415 if (gen8_get_dest_format(format) != -1) 416 return true; 417 418 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 419 return false; 420} 421 422static bool gen8_check_format(uint32_t format) 423{ 424 if (gen8_get_card_format(format) != -1) 425 return true; 426 427 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 428 return false; 429} 430 431static uint32_t gen8_filter(uint32_t filter) 432{ 433 switch (filter) { 434 default: 435 assert(0); 436 case PictFilterNearest: 437 return SAMPLER_FILTER_NEAREST; 438 case PictFilterBilinear: 439 return SAMPLER_FILTER_BILINEAR; 440 } 441} 442 443static uint32_t gen8_check_filter(PicturePtr picture) 444{ 445 switch (picture->filter) { 446 case PictFilterNearest: 447 case PictFilterBilinear: 448 return true; 449 default: 450 return false; 451 } 452} 453 454static uint32_t gen8_repeat(uint32_t repeat) 455{ 456 switch (repeat) { 457 default: 458 assert(0); 459 case RepeatNone: 460 return SAMPLER_EXTEND_NONE; 461 case RepeatNormal: 462 return SAMPLER_EXTEND_REPEAT; 463 case RepeatPad: 464 return SAMPLER_EXTEND_PAD; 465 case RepeatReflect: 466 return SAMPLER_EXTEND_REFLECT; 467 } 468} 469 470static bool gen8_check_repeat(PicturePtr picture) 471{ 472 if (!picture->repeat) 473 return true; 474 475 switch (picture->repeatType) { 476 case RepeatNone: 477 case RepeatNormal: 478 case RepeatPad: 479 case RepeatReflect: 480 return true; 481 default: 482 return false; 483 } 484} 485 486static int 487gen8_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 488{ 489 int base; 490 491 if (has_mask) { 492 if (is_ca) { 493 if (gen8_blend_op[op].src_alpha) 494 base = GEN8_WM_KERNEL_MASKSA; 495 else 496 base = GEN8_WM_KERNEL_MASKCA; 497 } else 498 base = GEN8_WM_KERNEL_MASK; 499 } else 500 base = GEN8_WM_KERNEL_NOMASK; 501 502 return base + !is_affine; 503} 504 505static void 506gen8_emit_push_constants(struct sna *sna) 507{ 508#if SIM 509 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 510 OUT_BATCH(0); 511 512 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 513 OUT_BATCH(0); 514 515 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 516 OUT_BATCH(0); 517 518 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 519 OUT_BATCH(0); 520 521 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 522 OUT_BATCH(0); 523#endif 524} 525 526static void 527gen8_emit_urb(struct sna *sna) 528{ 529 /* num of VS entries must be divisible by 8 if size < 9 */ 530 OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); 531 OUT_BATCH(sna->render_state.gen8.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | 532 (2 - 1) << URB_ENTRY_SIZE_SHIFT | 533 4 << URB_STARTING_ADDRESS_SHIFT); 534 535 OUT_BATCH(GEN8_3DSTATE_URB_HS | (2 - 2)); 536 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 537 4 << URB_STARTING_ADDRESS_SHIFT); 538 539 OUT_BATCH(GEN8_3DSTATE_URB_DS | (2 - 2)); 540 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 541 4 << URB_STARTING_ADDRESS_SHIFT); 542 543 OUT_BATCH(GEN8_3DSTATE_URB_GS | (2 - 2)); 544 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 545 4 << URB_STARTING_ADDRESS_SHIFT); 546} 547 548static void 549gen8_emit_state_base_address(struct sna *sna) 550{ 551 uint32_t num_pages; 552 553 assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 554 555 OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2)); 556 OUT_BATCH64(0); /* general */ 557 OUT_BATCH(0); /* stateless dataport */ 558 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 559 sna->kgem.nbatch, 560 NULL, 561 I915_GEM_DOMAIN_INSTRUCTION << 16, 562 BASE_ADDRESS_MODIFY)); 563 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 564 sna->kgem.nbatch, 565 sna->render_state.gen8.general_bo, 566 I915_GEM_DOMAIN_INSTRUCTION << 16, 567 BASE_ADDRESS_MODIFY)); 568 OUT_BATCH64(0); /* indirect */ 569 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 570 sna->kgem.nbatch, 571 sna->render_state.gen8.general_bo, 572 I915_GEM_DOMAIN_INSTRUCTION << 16, 573 BASE_ADDRESS_MODIFY)); 574 /* upper bounds */ 575 num_pages = sna->render_state.gen8.general_bo->size.pages.count; 576 OUT_BATCH(0); /* general */ 577 OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 578 OUT_BATCH(0); /* indirect */ 579 OUT_BATCH(num_pages << 12 | 1); /* instruction */ 580} 581 582static void 583gen8_emit_vs_invariant(struct sna *sna) 584{ 585 OUT_BATCH(GEN8_3DSTATE_VS | (9 - 2)); 586 OUT_BATCH64(0); /* no VS kernel */ 587 OUT_BATCH(0); 588 OUT_BATCH64(0); /* scratch */ 589 OUT_BATCH(0); 590 OUT_BATCH(1 << 1); /* pass-through */ 591 OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 592 593#if SIM 594 OUT_BATCH(GEN8_3DSTATE_CONSTANT_VS | (11 - 2)); 595 OUT_BATCH(0); 596 OUT_BATCH(0); 597 OUT_BATCH64(0); 598 OUT_BATCH64(0); 599 OUT_BATCH64(0); 600 OUT_BATCH64(0); 601 602 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 603 OUT_BATCH(0); 604 605 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 606 OUT_BATCH(0); 607#endif 608} 609 610static void 611gen8_emit_hs_invariant(struct sna *sna) 612{ 613 OUT_BATCH(GEN8_3DSTATE_HS | (9 - 2)); 614 OUT_BATCH(0); 615 OUT_BATCH(0); 616 OUT_BATCH64(0); /* no HS kernel */ 617 OUT_BATCH64(0); /* scratch */ 618 OUT_BATCH(0); 619 OUT_BATCH(0); /* pass-through */ 620 621#if SIM 622 OUT_BATCH(GEN8_3DSTATE_CONSTANT_HS | (11 - 2)); 623 OUT_BATCH(0); 624 OUT_BATCH(0); 625 OUT_BATCH64(0); 626 OUT_BATCH64(0); 627 OUT_BATCH64(0); 628 OUT_BATCH64(0); 629 630#if 1 631 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 632 OUT_BATCH(0); 633 634 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 635 OUT_BATCH(0); 636#endif 637#endif 638} 639 640static void 641gen8_emit_te_invariant(struct sna *sna) 642{ 643 OUT_BATCH(GEN8_3DSTATE_TE | (4 - 2)); 644 OUT_BATCH(0); 645 OUT_BATCH(0); 646 OUT_BATCH(0); 647} 648 649static void 650gen8_emit_ds_invariant(struct sna *sna) 651{ 652 OUT_BATCH(GEN8_3DSTATE_DS | (9 - 2)); 653 OUT_BATCH64(0); /* no kernel */ 654 OUT_BATCH(0); 655 OUT_BATCH64(0); /* scratch */ 656 OUT_BATCH(0); 657 OUT_BATCH(0); 658 OUT_BATCH(0); 659 660#if SIM 661 OUT_BATCH(GEN8_3DSTATE_CONSTANT_DS | (11 - 2)); 662 OUT_BATCH(0); 663 OUT_BATCH(0); 664 OUT_BATCH64(0); 665 OUT_BATCH64(0); 666 OUT_BATCH64(0); 667 OUT_BATCH64(0); 668 669#if 1 670 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 671 OUT_BATCH(0); 672 673 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 674 OUT_BATCH(0); 675#endif 676#endif 677} 678 679static void 680gen8_emit_gs_invariant(struct sna *sna) 681{ 682 OUT_BATCH(GEN8_3DSTATE_GS | (10 - 2)); 683 OUT_BATCH64(0); /* no GS kernel */ 684 OUT_BATCH(0); 685 OUT_BATCH64(0); /* scratch */ 686 OUT_BATCH(0); 687 OUT_BATCH(0); /* pass-through */ 688 OUT_BATCH(0); 689 OUT_BATCH(0); 690 691#if SIM 692 OUT_BATCH(GEN8_3DSTATE_CONSTANT_GS | (11 - 2)); 693 OUT_BATCH(0); 694 OUT_BATCH(0); 695 OUT_BATCH64(0); 696 OUT_BATCH64(0); 697 OUT_BATCH64(0); 698 OUT_BATCH64(0); 699 700#if 1 701 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 702 OUT_BATCH(0); 703 704 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 705 OUT_BATCH(0); 706#endif 707#endif 708} 709 710static void 711gen8_emit_sol_invariant(struct sna *sna) 712{ 713 OUT_BATCH(GEN8_3DSTATE_STREAMOUT | (5 - 2)); 714 OUT_BATCH(0); 715 OUT_BATCH(0); 716 OUT_BATCH(0); 717 OUT_BATCH(0); 718} 719 720static void 721gen8_emit_sf_invariant(struct sna *sna) 722{ 723 OUT_BATCH(GEN8_3DSTATE_SF | (4 - 2)); 724 OUT_BATCH(0); 725 OUT_BATCH(0); 726 OUT_BATCH(0); 727} 728 729static void 730gen8_emit_clip_invariant(struct sna *sna) 731{ 732 OUT_BATCH(GEN8_3DSTATE_CLIP | (4 - 2)); 733 OUT_BATCH(0); 734 OUT_BATCH(0); /* pass-through */ 735 OUT_BATCH(0); 736 737 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 738 OUT_BATCH(0); 739 740 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 741 OUT_BATCH(0); 742} 743 744static void 745gen8_emit_null_depth_buffer(struct sna *sna) 746{ 747 OUT_BATCH(GEN8_3DSTATE_DEPTH_BUFFER | (8 - 2)); 748#if 0 749 OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 750 DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 751#else 752 OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 753 DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 754#endif 755 OUT_BATCH64(0); 756 OUT_BATCH(0); 757 OUT_BATCH(0); 758 OUT_BATCH(0); 759 OUT_BATCH(0); 760 761#if SIM 762 OUT_BATCH(GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 763 OUT_BATCH(0); 764 OUT_BATCH64(0); 765 OUT_BATCH(0); 766#endif 767 768#if SIM 769 OUT_BATCH(GEN8_3DSTATE_STENCIL_BUFFER | (5 - 2)); 770 OUT_BATCH(0); 771 OUT_BATCH64(0); 772 OUT_BATCH(0); 773#endif 774 775#if SIM 776 OUT_BATCH(GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); 777 OUT_BATCH(0); 778 OUT_BATCH(0); 779#endif 780 781#if SIM 782 OUT_BATCH(GEN8_3DSTATE_CLEAR_PARAMS | (3 - 2)); 783 OUT_BATCH(0); 784 OUT_BATCH(0); 785#endif 786} 787 788static void 789gen8_emit_wm_invariant(struct sna *sna) 790{ 791 gen8_emit_null_depth_buffer(sna); 792 793#if SIM 794 OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 795 OUT_BATCH(0); 796#endif 797 798 OUT_BATCH(GEN8_3DSTATE_WM | (2 - 2)); 799 //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 800 OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 801 802#if SIM 803 OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2)); 804 OUT_BATCH(0); 805#endif 806 807#if 0 808 OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); 809 OUT_BATCH(0); 810 OUT_BATCH(0); 811 OUT_BATCH(0); 812 OUT_BATCH(0); 813#endif 814 815 OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); 816 OUT_BATCH(PSX_PIXEL_SHADER_VALID | 817 PSX_ATTRIBUTE_ENABLE); 818 819 OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); 820 OUT_BATCH(RASTER_FRONT_WINDING_CCW | 821 RASTER_CULL_NONE); 822 OUT_BATCH(0); 823 OUT_BATCH(0); 824 OUT_BATCH(0); 825 826 OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); 827 OUT_BATCH(0); 828 OUT_BATCH(0); 829 OUT_BATCH(0); 830 OUT_BATCH(0); 831 OUT_BATCH(0); 832 OUT_BATCH(0); 833 OUT_BATCH(0); 834 OUT_BATCH(0); 835 OUT_BATCH(0); 836 OUT_BATCH(0); 837 838#if SIM 839 OUT_BATCH(GEN8_3DSTATE_CONSTANT_PS | (11 - 2)); 840 OUT_BATCH(0); 841 OUT_BATCH(0); 842 OUT_BATCH64(0); 843 OUT_BATCH64(0); 844 OUT_BATCH64(0); 845 OUT_BATCH64(0); 846#endif 847} 848 849static void 850gen8_emit_cc_invariant(struct sna *sna) 851{ 852} 853 854static void 855gen8_emit_vf_invariant(struct sna *sna) 856{ 857 int n; 858 859#if 1 860 OUT_BATCH(GEN8_3DSTATE_VF | (2 - 2)); 861 OUT_BATCH(0); 862#endif 863 864 OUT_BATCH(GEN8_3DSTATE_VF_SGVS | (2 - 2)); 865 OUT_BATCH(0); 866 867 OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); 868 OUT_BATCH(RECTLIST); 869 870 OUT_BATCH(GEN8_3DSTATE_VF_STATISTICS | 0); 871 872 for (n = 1; n <= 3; n++) { 873 OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); 874 OUT_BATCH(n); 875 OUT_BATCH(0); 876 } 877} 878 879static void 880gen8_emit_invariant(struct sna *sna) 881{ 882 OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_3D); 883 884#if SIM 885 OUT_BATCH(GEN8_STATE_SIP | (3 - 2)); 886 OUT_BATCH64(0); 887#endif 888 889 OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); 890 OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 891 MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 892 893 OUT_BATCH(GEN8_3DSTATE_SAMPLE_MASK | (2 - 2)); 894 OUT_BATCH(1); 895 896#if SIM 897 OUT_BATCH(GEN8_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 898 OUT_BATCH(0); 899 OUT_BATCH(0); 900 OUT_BATCH(0); 901 //OUT_BATCH(8<<20 | 8<<16); 902 OUT_BATCH(0); 903#endif 904 905 gen8_emit_push_constants(sna); 906 gen8_emit_urb(sna); 907 908 gen8_emit_state_base_address(sna); 909 910 gen8_emit_vf_invariant(sna); 911 gen8_emit_vs_invariant(sna); 912 gen8_emit_hs_invariant(sna); 913 gen8_emit_te_invariant(sna); 914 gen8_emit_ds_invariant(sna); 915 gen8_emit_gs_invariant(sna); 916 gen8_emit_sol_invariant(sna); 917 gen8_emit_clip_invariant(sna); 918 gen8_emit_sf_invariant(sna); 919 gen8_emit_wm_invariant(sna); 920 gen8_emit_cc_invariant(sna); 921 922 sna->render_state.gen8.needs_invariant = false; 923} 924 925static void 926gen8_emit_cc(struct sna *sna, uint32_t blend) 927{ 928 struct gen8_render_state *render = &sna->render_state.gen8; 929 930 if (render->blend == blend) 931 return; 932 933 DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 934 __FUNCTION__, blend, render->blend, 935 blend / GEN8_BLENDFACTOR_COUNT, 936 blend % GEN8_BLENDFACTOR_COUNT)); 937 938 assert(blend < GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT); 939 assert(blend / GEN8_BLENDFACTOR_COUNT > 0); 940 assert(blend % GEN8_BLENDFACTOR_COUNT > 0); 941 942 /* XXX can have up to 8 blend states preload, selectable via 943 * Render Target Index. What other side-effects of Render Target Index? 944 */ 945 946 OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); 947 if (blend != GEN8_BLEND(NO_BLEND)) { 948 uint32_t src = blend / GEN8_BLENDFACTOR_COUNT; 949 uint32_t dst = blend % GEN8_BLENDFACTOR_COUNT; 950 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 951 PS_BLEND_COLOR_BLEND_ENABLE | 952 src << PS_BLEND_SRC_ALPHA_SHIFT | 953 dst << PS_BLEND_DST_ALPHA_SHIFT | 954 src << PS_BLEND_SRC_SHIFT | 955 dst << PS_BLEND_DST_SHIFT); 956 } else 957 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 958 959 assert(is_aligned(render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE, 64)); 960 OUT_BATCH(GEN8_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 961 OUT_BATCH((render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE) | 1); 962 963 /* Force a CC_STATE pointer change to improve blend performance */ 964 OUT_BATCH(GEN8_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 965 OUT_BATCH(0); 966 967 render->blend = blend; 968} 969 970static void 971gen8_emit_sampler(struct sna *sna, uint32_t state) 972{ 973 if (sna->render_state.gen8.samplers == state) 974 return; 975 976 sna->render_state.gen8.samplers = state; 977 978 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 979 980 assert(2 * sizeof(struct gen8_sampler_state) == 32); 981 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 982 OUT_BATCH(sna->render_state.gen8.wm_state + state * 2 * sizeof(struct gen8_sampler_state)); 983} 984 985static void 986gen8_emit_sf(struct sna *sna, bool has_mask) 987{ 988 int num_sf_outputs = has_mask ? 2 : 1; 989 990 if (sna->render_state.gen8.num_sf_outputs == num_sf_outputs) 991 return; 992 993 DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 994 995 sna->render_state.gen8.num_sf_outputs = num_sf_outputs; 996 997 OUT_BATCH(GEN8_3DSTATE_SBE | (4 - 2)); 998 OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 999 SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ 1000 SBE_FORCE_VERTEX_URB_READ_OFFSET | 1001 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 1002 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 1003 OUT_BATCH(0); 1004 OUT_BATCH(0); 1005} 1006 1007static void 1008gen8_emit_wm(struct sna *sna, int kernel) 1009{ 1010 const uint32_t *kernels; 1011 1012 assert(kernel < ARRAY_SIZE(wm_kernels)); 1013 if (sna->render_state.gen8.kernel == kernel) 1014 return; 1015 1016 sna->render_state.gen8.kernel = kernel; 1017 kernels = sna->render_state.gen8.wm_kernel[kernel]; 1018 1019 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 1020 __FUNCTION__, 1021 wm_kernels[kernel].name, 1022 wm_kernels[kernel].num_surfaces, 1023 kernels[0], kernels[1], kernels[2])); 1024 assert(is_aligned(kernels[0], 64)); 1025 assert(is_aligned(kernels[1], 64)); 1026 assert(is_aligned(kernels[2], 64)); 1027 1028 OUT_BATCH(GEN8_3DSTATE_PS | (12 - 2)); 1029 OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 1030 OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 1031 PS_VECTOR_MASK_ENABLE | 1032 wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 1033 OUT_BATCH64(0); /* scratch address */ 1034 OUT_BATCH(PS_MAX_THREADS | 1035 (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 1036 (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 1037 (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 1038 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 1039 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 1040 6 << PS_DISPATCH_START_GRF_SHIFT_2); 1041 OUT_BATCH64(kernels[2]); 1042 OUT_BATCH64(kernels[1]); 1043} 1044 1045static bool 1046gen8_emit_binding_table(struct sna *sna, uint16_t offset) 1047{ 1048 if (sna->render_state.gen8.surface_table == offset) 1049 return false; 1050 1051 /* Binding table pointers */ 1052 assert(is_aligned(4*offset, 32)); 1053 OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 1054 OUT_BATCH(offset*4); 1055 1056 sna->render_state.gen8.surface_table = offset; 1057 return true; 1058} 1059 1060static bool 1061gen8_emit_drawing_rectangle(struct sna *sna, 1062 const struct sna_composite_op *op) 1063{ 1064 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 1065 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 1066 1067 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 1068 assert(!too_large(op->dst.width, op->dst.height)); 1069 1070 if (sna->render_state.gen8.drawrect_limit == limit && 1071 sna->render_state.gen8.drawrect_offset == offset) 1072 return true; 1073 1074 sna->render_state.gen8.drawrect_offset = offset; 1075 sna->render_state.gen8.drawrect_limit = limit; 1076 1077 OUT_BATCH(GEN8_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1078 OUT_BATCH(0); 1079 OUT_BATCH(limit); 1080 OUT_BATCH(offset); 1081 return false; 1082} 1083 1084static void 1085gen8_emit_vertex_elements(struct sna *sna, 1086 const struct sna_composite_op *op) 1087{ 1088 /* 1089 * vertex data in vertex buffer 1090 * position: (x, y) 1091 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1092 * texture coordinate 1 if (has_mask is true): same as above 1093 */ 1094 struct gen8_render_state *render = &sna->render_state.gen8; 1095 uint32_t src_format, dw; 1096 int id = GEN8_VERTEX(op->u.gen8.flags); 1097 bool has_mask; 1098 1099 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1100 1101 if (render->ve_id == id) 1102 return; 1103 render->ve_id = id; 1104 1105 /* The VUE layout 1106 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1107 * dword 4-7: position (x, y, 1.0, 1.0), 1108 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1109 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1110 * 1111 * dword 4-15 are fetched from vertex buffer 1112 */ 1113 has_mask = (id >> 2) != 0; 1114 OUT_BATCH(GEN8_3DSTATE_VERTEX_ELEMENTS | 1115 ((2 * (3 + has_mask)) + 1 - 2)); 1116 1117 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1118 SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1119 0 << VE_OFFSET_SHIFT); 1120 OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1121 COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1122 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1123 COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1124 1125 /* x,y */ 1126 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1127 SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1128 0 << VE_OFFSET_SHIFT); 1129 OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1130 COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1131 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1132 COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1133 1134 /* u0, v0, w0 */ 1135 DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1136 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1137 switch (id & 3) { 1138 default: 1139 assert(0); 1140 case 0: 1141 src_format = SURFACEFORMAT_R16G16_SSCALED; 1142 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1143 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1144 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1145 break; 1146 case 1: 1147 src_format = SURFACEFORMAT_R32_FLOAT; 1148 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1149 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1150 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1151 break; 1152 case 2: 1153 src_format = SURFACEFORMAT_R32G32_FLOAT; 1154 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1155 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1156 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1157 break; 1158 case 3: 1159 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1160 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1161 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1162 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1163 break; 1164 } 1165 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1166 src_format << VE_FORMAT_SHIFT | 1167 4 << VE_OFFSET_SHIFT); 1168 OUT_BATCH(dw); 1169 1170 /* u1, v1, w1 */ 1171 if (has_mask) { 1172 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1173 DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1174 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1175 switch (id >> 2) { 1176 case 1: 1177 src_format = SURFACEFORMAT_R32_FLOAT; 1178 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1179 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1180 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1181 break; 1182 default: 1183 assert(0); 1184 case 2: 1185 src_format = SURFACEFORMAT_R32G32_FLOAT; 1186 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1187 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1188 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1189 break; 1190 case 3: 1191 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1192 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1193 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1194 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1195 break; 1196 } 1197 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1198 src_format << VE_FORMAT_SHIFT | 1199 offset << VE_OFFSET_SHIFT); 1200 OUT_BATCH(dw); 1201 } 1202} 1203 1204inline static void 1205gen8_emit_pipe_invalidate(struct sna *sna) 1206{ 1207 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1208 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1209 PIPE_CONTROL_TC_FLUSH | 1210 PIPE_CONTROL_CS_STALL); 1211 OUT_BATCH64(0); 1212 OUT_BATCH64(0); 1213} 1214 1215inline static void 1216gen8_emit_pipe_flush(struct sna *sna, bool need_stall) 1217{ 1218 unsigned stall; 1219 1220 stall = 0; 1221 if (need_stall) 1222 stall = (PIPE_CONTROL_CS_STALL | 1223 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1224 1225 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1226 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1227 OUT_BATCH64(0); 1228 OUT_BATCH64(0); 1229} 1230 1231inline static void 1232gen8_emit_pipe_stall(struct sna *sna) 1233{ 1234 OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1235 OUT_BATCH(PIPE_CONTROL_CS_STALL | 1236 PIPE_CONTROL_FLUSH | 1237 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1238 OUT_BATCH64(0); 1239 OUT_BATCH64(0); 1240} 1241 1242static void 1243gen8_emit_state(struct sna *sna, 1244 const struct sna_composite_op *op, 1245 uint16_t wm_binding_table) 1246{ 1247 bool need_invalidate; 1248 bool need_flush; 1249 bool need_stall; 1250 1251 assert(op->dst.bo->exec); 1252 1253 need_flush = wm_binding_table & 1 || 1254 (sna->render_state.gen8.emit_flush && GEN8_READS_DST(op->u.gen8.flags)); 1255 if (ALWAYS_FLUSH) 1256 need_flush = true; 1257 1258 wm_binding_table &= ~1; 1259 1260 need_stall = sna->render_state.gen8.surface_table != wm_binding_table; 1261 1262 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1263 if (ALWAYS_INVALIDATE) 1264 need_invalidate = true; 1265 1266 need_stall &= gen8_emit_drawing_rectangle(sna, op); 1267 if (ALWAYS_STALL) 1268 need_stall = true; 1269 1270 if (need_invalidate) { 1271 gen8_emit_pipe_invalidate(sna); 1272 kgem_clear_dirty(&sna->kgem); 1273 assert(op->dst.bo->exec); 1274 kgem_bo_mark_dirty(op->dst.bo); 1275 1276 need_flush = false; 1277 need_stall = false; 1278 } 1279 if (need_flush) { 1280 gen8_emit_pipe_flush(sna, need_stall); 1281 need_stall = false; 1282 } 1283 if (need_stall) 1284 gen8_emit_pipe_stall(sna); 1285 1286 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1287 gen8_emit_sampler(sna, GEN8_SAMPLER(op->u.gen8.flags)); 1288 gen8_emit_sf(sna, GEN8_VERTEX(op->u.gen8.flags) >> 2); 1289 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1290 gen8_emit_vertex_elements(sna, op); 1291 gen8_emit_binding_table(sna, wm_binding_table); 1292 1293 sna->render_state.gen8.emit_flush = GEN8_READS_DST(op->u.gen8.flags); 1294} 1295 1296static bool gen8_magic_ca_pass(struct sna *sna, 1297 const struct sna_composite_op *op) 1298{ 1299 struct gen8_render_state *state = &sna->render_state.gen8; 1300 1301 if (!op->need_magic_ca_pass) 1302 return false; 1303 1304 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1305 sna->render.vertex_start, sna->render.vertex_index)); 1306 1307 gen8_emit_pipe_stall(sna); 1308 1309 gen8_emit_cc(sna, 1310 GEN8_BLEND(gen8_get_blend(PictOpAdd, true, 1311 op->dst.format))); 1312 gen8_emit_wm(sna, 1313 gen8_choose_composite_kernel(PictOpAdd, 1314 true, true, 1315 op->is_affine)); 1316 1317 OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1318 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1319 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1320 OUT_BATCH(sna->render.vertex_start); 1321 OUT_BATCH(1); /* single instance */ 1322 OUT_BATCH(0); /* start instance location */ 1323 OUT_BATCH(0); /* index buffer offset, ignored */ 1324 1325 state->last_primitive = sna->kgem.nbatch; 1326 return true; 1327} 1328 1329static void null_create(struct sna_static_stream *stream) 1330{ 1331 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1332 sna_static_stream_map(stream, 64, 64); 1333} 1334 1335static void 1336sampler_state_init(struct gen8_sampler_state *sampler_state, 1337 sampler_filter_t filter, 1338 sampler_extend_t extend) 1339{ 1340 COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1341 1342 sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1343 sampler_state->ss0.default_color_mode = 1; 1344 1345 switch (filter) { 1346 default: 1347 case SAMPLER_FILTER_NEAREST: 1348 sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1349 sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1350 break; 1351 case SAMPLER_FILTER_BILINEAR: 1352 sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1353 sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1354 break; 1355 } 1356 1357 /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1358 1359 switch (extend) { 1360 default: 1361 case SAMPLER_EXTEND_NONE: 1362 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1363 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1364 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1365 break; 1366 case SAMPLER_EXTEND_REPEAT: 1367 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1368 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1369 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1370 break; 1371 case SAMPLER_EXTEND_PAD: 1372 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1373 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1374 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1375 break; 1376 case SAMPLER_EXTEND_REFLECT: 1377 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1378 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1379 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1380 break; 1381 } 1382} 1383 1384static void 1385sampler_copy_init(struct gen8_sampler_state *ss) 1386{ 1387 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1388 ss->ss3.non_normalized_coord = 1; 1389 1390 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1391} 1392 1393static void 1394sampler_fill_init(struct gen8_sampler_state *ss) 1395{ 1396 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1397 ss->ss3.non_normalized_coord = 1; 1398 1399 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1400} 1401 1402static uint32_t 1403gen8_tiling_bits(uint32_t tiling) 1404{ 1405 switch (tiling) { 1406 default: assert(0); 1407 case I915_TILING_NONE: return 0; 1408 case I915_TILING_X: return SURFACE_TILED; 1409 case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1410 } 1411} 1412 1413#define MOCS_WT (2 << 5) 1414#define MOCS_WB (3 << 5) 1415#define MOCS_eLLC_ONLY (0 << 3) 1416#define MOCS_LLC_ONLY (1 << 3) 1417#define MOCS_eLLC_LLC (2 << 3) 1418#define MOCS_ALL_CACHES (3 << 3) 1419 1420/** 1421 * Sets up the common fields for a surface state buffer for the given 1422 * picture in the given surface state buffer. 1423 */ 1424static uint32_t 1425gen8_bind_bo(struct sna *sna, 1426 struct kgem_bo *bo, 1427 uint32_t width, 1428 uint32_t height, 1429 uint32_t format, 1430 bool is_dst) 1431{ 1432 uint32_t *ss; 1433 uint32_t domains; 1434 int offset; 1435 uint32_t is_scanout = is_dst && bo->scanout; 1436 1437 /* After the first bind, we manage the cache domains within the batch */ 1438 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1439 if (offset) { 1440 if (is_dst) 1441 kgem_bo_mark_dirty(bo); 1442 assert(offset >= sna->kgem.surface); 1443 return offset * sizeof(uint32_t); 1444 } 1445 1446 offset = sna->kgem.surface -= SURFACE_DW; 1447 ss = sna->kgem.batch + offset; 1448 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1449 gen8_tiling_bits(bo->tiling) | 1450 format << SURFACE_FORMAT_SHIFT | 1451 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1452 if (is_dst) { 1453 ss[0] |= SURFACE_RC_READ_WRITE; 1454 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1455 } else 1456 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1457 ss[1] = (is_dst && is_uncached(sna, bo)) ? 0 : is_scanout ? (MOCS_WT | MOCS_ALL_CACHES) << 24 : (MOCS_WB | MOCS_ALL_CACHES) << 24; 1458 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1459 (height - 1) << SURFACE_HEIGHT_SHIFT); 1460 ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1461 ss[4] = 0; 1462 ss[5] = 0; 1463 ss[6] = 0; 1464 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1465 *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1466 ss[10] = 0; 1467 ss[11] = 0; 1468 ss[12] = 0; 1469 ss[13] = 0; 1470 ss[14] = 0; 1471 ss[15] = 0; 1472 1473 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1474 1475 DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1476 offset, bo->handle, *(uint64_t *)(ss+8), 1477 format, width, height, bo->pitch, bo->tiling, 1478 domains & 0xffff ? "render" : "sampler")); 1479 1480 return offset * sizeof(uint32_t); 1481} 1482 1483static void gen8_emit_vertex_buffer(struct sna *sna, 1484 const struct sna_composite_op *op) 1485{ 1486 int id = GEN8_VERTEX(op->u.gen8.flags); 1487 1488 OUT_BATCH(GEN8_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1489 OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1490 4*op->floats_per_vertex); 1491 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1492 OUT_BATCH64(0); 1493 OUT_BATCH(~0); /* buffer size: disabled */ 1494 1495 sna->render.vb_id |= 1 << id; 1496} 1497 1498static void gen8_emit_primitive(struct sna *sna) 1499{ 1500 if (sna->kgem.nbatch == sna->render_state.gen8.last_primitive) { 1501 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1502 return; 1503 } 1504 1505 OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1506 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1507 sna->render.vertex_offset = sna->kgem.nbatch; 1508 OUT_BATCH(0); /* vertex count, to be filled in later */ 1509 OUT_BATCH(sna->render.vertex_index); 1510 OUT_BATCH(1); /* single instance */ 1511 OUT_BATCH(0); /* start instance location */ 1512 OUT_BATCH(0); /* index buffer offset, ignored */ 1513 sna->render.vertex_start = sna->render.vertex_index; 1514 1515 sna->render_state.gen8.last_primitive = sna->kgem.nbatch; 1516} 1517 1518static bool gen8_rectangle_begin(struct sna *sna, 1519 const struct sna_composite_op *op) 1520{ 1521 int id = 1 << GEN8_VERTEX(op->u.gen8.flags); 1522 int ndwords; 1523 1524 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1525 return true; 1526 1527 ndwords = op->need_magic_ca_pass ? 60 : 6; 1528 if ((sna->render.vb_id & id) == 0) 1529 ndwords += 5; 1530 if (!kgem_check_batch(&sna->kgem, ndwords)) 1531 return false; 1532 1533 if ((sna->render.vb_id & id) == 0) 1534 gen8_emit_vertex_buffer(sna, op); 1535 1536 gen8_emit_primitive(sna); 1537 return true; 1538} 1539 1540static int gen8_get_rectangles__flush(struct sna *sna, 1541 const struct sna_composite_op *op) 1542{ 1543 /* Preventing discarding new vbo after lock contention */ 1544 if (sna_vertex_wait__locked(&sna->render)) { 1545 int rem = vertex_space(sna); 1546 if (rem > op->floats_per_rect) 1547 return rem; 1548 } 1549 1550 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1551 return 0; 1552 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1553 return 0; 1554 1555 if (sna->render.vertex_offset) { 1556 gen8_vertex_flush(sna); 1557 if (gen8_magic_ca_pass(sna, op)) { 1558 gen8_emit_pipe_invalidate(sna); 1559 gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1560 gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1561 } 1562 } 1563 1564 return gen8_vertex_finish(sna); 1565} 1566 1567inline static int gen8_get_rectangles(struct sna *sna, 1568 const struct sna_composite_op *op, 1569 int want, 1570 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1571{ 1572 int rem; 1573 1574 assert(want); 1575 1576start: 1577 rem = vertex_space(sna); 1578 if (unlikely(rem < op->floats_per_rect)) { 1579 DBG(("flushing vbo for %s: %d < %d\n", 1580 __FUNCTION__, rem, op->floats_per_rect)); 1581 rem = gen8_get_rectangles__flush(sna, op); 1582 if (unlikely(rem == 0)) 1583 goto flush; 1584 } 1585 1586 if (unlikely(sna->render.vertex_offset == 0)) { 1587 if (!gen8_rectangle_begin(sna, op)) 1588 goto flush; 1589 else 1590 goto start; 1591 } 1592 1593 assert(rem <= vertex_space(sna)); 1594 assert(op->floats_per_rect <= rem); 1595 if (want > 1 && want * op->floats_per_rect > rem) 1596 want = rem / op->floats_per_rect; 1597 1598 assert(want > 0); 1599 sna->render.vertex_index += 3*want; 1600 return want; 1601 1602flush: 1603 if (sna->render.vertex_offset) { 1604 gen8_vertex_flush(sna); 1605 gen8_magic_ca_pass(sna, op); 1606 } 1607 sna_vertex_wait__locked(&sna->render); 1608 _kgem_submit(&sna->kgem); 1609 emit_state(sna, op); 1610 goto start; 1611} 1612 1613inline static uint32_t *gen8_composite_get_binding_table(struct sna *sna, 1614 uint16_t *offset) 1615{ 1616 uint32_t *table; 1617 1618 assert(sna->kgem.surface <= 16384); 1619 sna->kgem.surface -= SURFACE_DW; 1620 /* Clear all surplus entries to zero in case of prefetch */ 1621 table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1622 1623 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1624 1625 *offset = sna->kgem.surface; 1626 return table; 1627} 1628 1629static void 1630gen8_get_batch(struct sna *sna, const struct sna_composite_op *op) 1631{ 1632 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1633 1634 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1635 DBG(("%s: flushing batch: %d < %d+%d\n", 1636 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1637 150, 4*8*2)); 1638 _kgem_submit(&sna->kgem); 1639 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1640 } 1641 1642 assert(sna->kgem.mode == KGEM_RENDER); 1643 assert(sna->kgem.ring == KGEM_RENDER); 1644 1645 if (sna->render_state.gen8.needs_invariant) 1646 gen8_emit_invariant(sna); 1647} 1648 1649static void gen8_emit_composite_state(struct sna *sna, 1650 const struct sna_composite_op *op) 1651{ 1652 uint32_t *binding_table; 1653 uint16_t offset, dirty; 1654 1655 gen8_get_batch(sna, op); 1656 1657 binding_table = gen8_composite_get_binding_table(sna, &offset); 1658 1659 dirty = kgem_bo_is_dirty(op->dst.bo); 1660 1661 binding_table[0] = 1662 gen8_bind_bo(sna, 1663 op->dst.bo, op->dst.width, op->dst.height, 1664 gen8_get_dest_format(op->dst.format), 1665 true); 1666 binding_table[1] = 1667 gen8_bind_bo(sna, 1668 op->src.bo, op->src.width, op->src.height, 1669 op->src.card_format, 1670 false); 1671 if (op->mask.bo) { 1672 binding_table[2] = 1673 gen8_bind_bo(sna, 1674 op->mask.bo, 1675 op->mask.width, 1676 op->mask.height, 1677 op->mask.card_format, 1678 false); 1679 } 1680 1681 if (sna->kgem.surface == offset && 1682 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table && 1683 (op->mask.bo == NULL || 1684 sna->kgem.batch[sna->render_state.gen8.surface_table+2] == binding_table[2])) { 1685 sna->kgem.surface += SURFACE_DW; 1686 offset = sna->render_state.gen8.surface_table; 1687 } 1688 1689 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 1690 dirty = 0; 1691 1692 gen8_emit_state(sna, op, offset | dirty); 1693} 1694 1695static void 1696gen8_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1697{ 1698 if (op->floats_per_vertex != sna->render_state.gen8.floats_per_vertex) { 1699 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1700 sna->render_state.gen8.floats_per_vertex, op->floats_per_vertex)); 1701 gen8_vertex_align(sna, op); 1702 sna->render_state.gen8.floats_per_vertex = op->floats_per_vertex; 1703 } 1704} 1705 1706fastcall static void 1707gen8_render_composite_blt(struct sna *sna, 1708 const struct sna_composite_op *op, 1709 const struct sna_composite_rectangles *r) 1710{ 1711 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1712 op->prim_emit(sna, op, r); 1713} 1714 1715fastcall static void 1716gen8_render_composite_box(struct sna *sna, 1717 const struct sna_composite_op *op, 1718 const BoxRec *box) 1719{ 1720 struct sna_composite_rectangles r; 1721 1722 gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1723 1724 DBG((" %s: (%d, %d), (%d, %d)\n", 1725 __FUNCTION__, 1726 box->x1, box->y1, box->x2, box->y2)); 1727 1728 r.dst.x = box->x1; 1729 r.dst.y = box->y1; 1730 r.width = box->x2 - box->x1; 1731 r.height = box->y2 - box->y1; 1732 r.src = r.mask = r.dst; 1733 1734 op->prim_emit(sna, op, &r); 1735} 1736 1737static void 1738gen8_render_composite_boxes__blt(struct sna *sna, 1739 const struct sna_composite_op *op, 1740 const BoxRec *box, int nbox) 1741{ 1742 DBG(("composite_boxes(%d)\n", nbox)); 1743 1744 do { 1745 int nbox_this_time; 1746 1747 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1748 gen8_emit_composite_state); 1749 nbox -= nbox_this_time; 1750 1751 do { 1752 struct sna_composite_rectangles r; 1753 1754 DBG((" %s: (%d, %d), (%d, %d)\n", 1755 __FUNCTION__, 1756 box->x1, box->y1, box->x2, box->y2)); 1757 1758 r.dst.x = box->x1; 1759 r.dst.y = box->y1; 1760 r.width = box->x2 - box->x1; 1761 r.height = box->y2 - box->y1; 1762 r.src = r.mask = r.dst; 1763 1764 op->prim_emit(sna, op, &r); 1765 box++; 1766 } while (--nbox_this_time); 1767 } while (nbox); 1768} 1769 1770static void 1771gen8_render_composite_boxes(struct sna *sna, 1772 const struct sna_composite_op *op, 1773 const BoxRec *box, int nbox) 1774{ 1775 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1776 1777 do { 1778 int nbox_this_time; 1779 float *v; 1780 1781 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1782 gen8_emit_composite_state); 1783 assert(nbox_this_time); 1784 nbox -= nbox_this_time; 1785 1786 v = sna->render.vertices + sna->render.vertex_used; 1787 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1788 1789 op->emit_boxes(op, box, nbox_this_time, v); 1790 box += nbox_this_time; 1791 } while (nbox); 1792} 1793 1794static void 1795gen8_render_composite_boxes__thread(struct sna *sna, 1796 const struct sna_composite_op *op, 1797 const BoxRec *box, int nbox) 1798{ 1799 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1800 1801 sna_vertex_lock(&sna->render); 1802 do { 1803 int nbox_this_time; 1804 float *v; 1805 1806 nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1807 gen8_emit_composite_state); 1808 assert(nbox_this_time); 1809 nbox -= nbox_this_time; 1810 1811 v = sna->render.vertices + sna->render.vertex_used; 1812 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1813 1814 sna_vertex_acquire__locked(&sna->render); 1815 sna_vertex_unlock(&sna->render); 1816 1817 op->emit_boxes(op, box, nbox_this_time, v); 1818 box += nbox_this_time; 1819 1820 sna_vertex_lock(&sna->render); 1821 sna_vertex_release__locked(&sna->render); 1822 } while (nbox); 1823 sna_vertex_unlock(&sna->render); 1824} 1825 1826static uint32_t 1827gen8_create_blend_state(struct sna_static_stream *stream) 1828{ 1829 char *base, *ptr; 1830 int src, dst; 1831 1832 COMPILE_TIME_ASSERT(((GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1833 1834 base = sna_static_stream_map(stream, 1835 GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT * GEN8_BLEND_STATE_PADDED_SIZE, 1836 64); 1837 1838 ptr = base; 1839 for (src = 0; src < GEN8_BLENDFACTOR_COUNT; src++) { 1840 for (dst = 0; dst < GEN8_BLENDFACTOR_COUNT; dst++) { 1841 struct gen8_blend_state *blend = 1842 (struct gen8_blend_state *)ptr; 1843 1844 assert(((ptr - base) & 63) == 0); 1845 COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1846 COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1847 COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); 1848 1849 blend->rt.post_blend_clamp = 1; 1850 blend->rt.pre_blend_clamp = 1; 1851 1852 blend->rt.color_blend = 1853 !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1854 blend->rt.dest_blend_factor = dst; 1855 blend->rt.source_blend_factor = src; 1856 blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1857 1858 blend->rt.dest_alpha_blend_factor = dst; 1859 blend->rt.source_alpha_blend_factor = src; 1860 blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1861 1862 ptr += GEN8_BLEND_STATE_PADDED_SIZE; 1863 } 1864 } 1865 1866 return sna_static_stream_offsetof(stream, base); 1867} 1868 1869static int 1870gen8_composite_picture(struct sna *sna, 1871 PicturePtr picture, 1872 struct sna_composite_channel *channel, 1873 int x, int y, 1874 int w, int h, 1875 int dst_x, int dst_y, 1876 bool precise) 1877{ 1878 PixmapPtr pixmap; 1879 uint32_t color; 1880 int16_t dx, dy; 1881 1882 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1883 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1884 1885 channel->is_solid = false; 1886 channel->card_format = -1; 1887 1888 if (sna_picture_is_solid(picture, &color)) 1889 return gen4_channel_init_solid(sna, channel, color); 1890 1891 if (picture->pDrawable == NULL) { 1892 int ret; 1893 1894 if (picture->pSourcePict->type == SourcePictTypeLinear) 1895 return gen4_channel_init_linear(sna, picture, channel, 1896 x, y, 1897 w, h, 1898 dst_x, dst_y); 1899 1900 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1901 ret = -1; 1902 if (!precise) 1903 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1904 x, y, w, h, dst_x, dst_y); 1905 if (ret == -1) 1906 ret = sna_render_picture_fixup(sna, picture, channel, 1907 x, y, w, h, dst_x, dst_y); 1908 return ret; 1909 } 1910 1911 if (picture->alphaMap) { 1912 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1913 return sna_render_picture_fixup(sna, picture, channel, 1914 x, y, w, h, dst_x, dst_y); 1915 } 1916 1917 if (!gen8_check_repeat(picture)) 1918 return sna_render_picture_fixup(sna, picture, channel, 1919 x, y, w, h, dst_x, dst_y); 1920 1921 if (!gen8_check_filter(picture)) 1922 return sna_render_picture_fixup(sna, picture, channel, 1923 x, y, w, h, dst_x, dst_y); 1924 1925 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1926 channel->filter = picture->filter; 1927 1928 pixmap = get_drawable_pixmap(picture->pDrawable); 1929 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1930 1931 x += dx + picture->pDrawable->x; 1932 y += dy + picture->pDrawable->y; 1933 1934 channel->is_affine = sna_transform_is_affine(picture->transform); 1935 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1936 DBG(("%s: integer translation (%d, %d), removing\n", 1937 __FUNCTION__, dx, dy)); 1938 x += dx; 1939 y += dy; 1940 channel->transform = NULL; 1941 channel->filter = PictFilterNearest; 1942 1943 if (channel->repeat || 1944 (x >= 0 && 1945 y >= 0 && 1946 x + w <= pixmap->drawable.width && 1947 y + h <= pixmap->drawable.height)) { 1948 struct sna_pixmap *priv = sna_pixmap(pixmap); 1949 if (priv && priv->clear) { 1950 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1951 return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); 1952 } 1953 } 1954 } else 1955 channel->transform = picture->transform; 1956 1957 channel->pict_format = picture->format; 1958 channel->card_format = gen8_get_card_format(picture->format); 1959 if (channel->card_format == (unsigned)-1) 1960 return sna_render_picture_convert(sna, picture, channel, pixmap, 1961 x, y, w, h, dst_x, dst_y, 1962 false); 1963 1964 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1965 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1966 pixmap->drawable.width, pixmap->drawable.height)); 1967 return sna_render_picture_extract(sna, picture, channel, 1968 x, y, w, h, dst_x, dst_y); 1969 } 1970 1971 return sna_render_pixmap_bo(sna, channel, pixmap, 1972 x, y, w, h, dst_x, dst_y); 1973} 1974 1975inline static bool gen8_composite_channel_convert(struct sna_composite_channel *channel) 1976{ 1977 if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 1978 return false; 1979 1980 channel->repeat = gen8_repeat(channel->repeat); 1981 channel->filter = gen8_filter(channel->filter); 1982 if (channel->card_format == (unsigned)-1) 1983 channel->card_format = gen8_get_card_format(channel->pict_format); 1984 assert(channel->card_format != (unsigned)-1); 1985 1986 return true; 1987} 1988 1989static void gen8_render_composite_done(struct sna *sna, 1990 const struct sna_composite_op *op) 1991{ 1992 if (sna->render.vertex_offset) { 1993 gen8_vertex_flush(sna); 1994 gen8_magic_ca_pass(sna, op); 1995 } 1996 1997 if (op->mask.bo) 1998 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1999 if (op->src.bo) 2000 kgem_bo_destroy(&sna->kgem, op->src.bo); 2001 2002 sna_render_composite_redirect_done(sna, op); 2003} 2004 2005inline static bool 2006gen8_composite_set_target(struct sna *sna, 2007 struct sna_composite_op *op, 2008 PicturePtr dst, 2009 int x, int y, int w, int h, 2010 bool partial) 2011{ 2012 BoxRec box; 2013 unsigned int hint; 2014 2015 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2016 2017 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2018 op->dst.format = dst->format; 2019 op->dst.width = op->dst.pixmap->drawable.width; 2020 op->dst.height = op->dst.pixmap->drawable.height; 2021 2022 if (w | h) { 2023 assert(w && h); 2024 box.x1 = x; 2025 box.y1 = y; 2026 box.x2 = x + w; 2027 box.y2 = y + h; 2028 } else 2029 sna_render_picture_extents(dst, &box); 2030 2031 hint = PREFER_GPU | RENDER_GPU; 2032 if (!need_tiling(sna, op->dst.width, op->dst.height)) 2033 hint |= FORCE_GPU; 2034 if (!partial) { 2035 hint |= IGNORE_DAMAGE; 2036 if (w == op->dst.width && h == op->dst.height) 2037 hint |= REPLACES; 2038 } 2039 2040 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2041 if (op->dst.bo == NULL) 2042 return false; 2043 2044 if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 2045 return false; 2046 2047 if (hint & REPLACES) { 2048 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2049 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2050 } 2051 2052 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2053 &op->dst.x, &op->dst.y); 2054 2055 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2056 __FUNCTION__, 2057 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2058 op->dst.width, op->dst.height, 2059 op->dst.bo->pitch, 2060 op->dst.x, op->dst.y, 2061 op->damage ? *op->damage : (void *)-1)); 2062 2063 assert(op->dst.bo->proxy == NULL); 2064 2065 if (too_large(op->dst.width, op->dst.height) && 2066 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2067 return false; 2068 2069 return true; 2070} 2071 2072static bool 2073try_blt(struct sna *sna, 2074 uint8_t op, 2075 PicturePtr src, 2076 PicturePtr mask, 2077 PicturePtr dst, 2078 int16_t src_x, int16_t src_y, 2079 int16_t msk_x, int16_t msk_y, 2080 int16_t dst_x, int16_t dst_y, 2081 int16_t width, int16_t height, 2082 unsigned flags, 2083 struct sna_composite_op *tmp) 2084{ 2085 struct kgem_bo *bo; 2086 2087 if (sna->kgem.mode == KGEM_BLT) { 2088 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2089 goto execute; 2090 } 2091 2092 if (too_large(width, height)) { 2093 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2094 __FUNCTION__, width, height)); 2095 goto execute; 2096 } 2097 2098 bo = __sna_drawable_peek_bo(dst->pDrawable); 2099 if (bo == NULL) 2100 goto execute; 2101 2102 if (untiled_tlb_miss(bo)) 2103 goto execute; 2104 2105 if (bo->rq) { 2106 if (RQ_IS_BLT(bo->rq)) 2107 goto execute; 2108 2109 return false; 2110 } 2111 2112 if (bo->tiling == I915_TILING_Y) 2113 goto upload; 2114 2115 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2116 goto execute; 2117 2118 if (src->pDrawable == dst->pDrawable && 2119 (sna->render_state.gt < 3 || width*height < 1024) && 2120 can_switch_to_blt(sna, bo, 0)) 2121 goto execute; 2122 2123 if (src->pDrawable) { 2124 struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); 2125 if (s == NULL) 2126 goto upload; 2127 2128 if (prefer_blt_bo(sna, s, bo)) 2129 goto execute; 2130 } 2131 2132 if (sna->kgem.ring == KGEM_BLT) { 2133 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2134 goto execute; 2135 } 2136 2137upload: 2138 flags |= COMPOSITE_UPLOAD; 2139execute: 2140 return sna_blt_composite(sna, op, 2141 src, dst, 2142 src_x, src_y, 2143 dst_x, dst_y, 2144 width, height, 2145 flags, tmp); 2146} 2147 2148static bool 2149check_gradient(PicturePtr picture, bool precise) 2150{ 2151 if (picture->pDrawable) 2152 return false; 2153 2154 switch (picture->pSourcePict->type) { 2155 case SourcePictTypeSolidFill: 2156 case SourcePictTypeLinear: 2157 return false; 2158 default: 2159 return precise; 2160 } 2161} 2162 2163static bool 2164has_alphamap(PicturePtr p) 2165{ 2166 return p->alphaMap != NULL; 2167} 2168 2169static bool 2170need_upload(PicturePtr p) 2171{ 2172 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2173} 2174 2175static bool 2176source_is_busy(PixmapPtr pixmap) 2177{ 2178 struct sna_pixmap *priv = sna_pixmap(pixmap); 2179 if (priv == NULL || priv->clear) 2180 return false; 2181 2182 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2183 return true; 2184 2185 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2186 return true; 2187 2188 return priv->gpu_damage && !priv->cpu_damage; 2189} 2190 2191static bool 2192source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2193{ 2194 if (sna_picture_is_solid(p, NULL)) 2195 return false; 2196 2197 if (p->pSourcePict) 2198 return check_gradient(p, precise); 2199 2200 if (!gen8_check_repeat(p) || !gen8_check_format(p->format)) 2201 return true; 2202 2203 if (pixmap && source_is_busy(pixmap)) 2204 return false; 2205 2206 return has_alphamap(p) || !gen8_check_filter(p) || need_upload(p); 2207} 2208 2209static bool 2210gen8_composite_fallback(struct sna *sna, 2211 PicturePtr src, 2212 PicturePtr mask, 2213 PicturePtr dst) 2214{ 2215 PixmapPtr src_pixmap; 2216 PixmapPtr mask_pixmap; 2217 PixmapPtr dst_pixmap; 2218 bool src_fallback, mask_fallback; 2219 2220 if (!gen8_check_dst_format(dst->format)) { 2221 DBG(("%s: unknown destination format: %d\n", 2222 __FUNCTION__, dst->format)); 2223 return true; 2224 } 2225 2226 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2227 2228 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2229 src_fallback = source_fallback(src, src_pixmap, 2230 dst->polyMode == PolyModePrecise); 2231 2232 if (mask) { 2233 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2234 mask_fallback = source_fallback(mask, mask_pixmap, 2235 dst->polyMode == PolyModePrecise); 2236 } else { 2237 mask_pixmap = NULL; 2238 mask_fallback = false; 2239 } 2240 2241 /* If we are using the destination as a source and need to 2242 * readback in order to upload the source, do it all 2243 * on the cpu. 2244 */ 2245 if (src_pixmap == dst_pixmap && src_fallback) { 2246 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2247 return true; 2248 } 2249 if (mask_pixmap == dst_pixmap && mask_fallback) { 2250 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2251 return true; 2252 } 2253 2254 /* If anything is on the GPU, push everything out to the GPU */ 2255 if (dst_use_gpu(dst_pixmap)) { 2256 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2257 __FUNCTION__)); 2258 return false; 2259 } 2260 2261 if (src_pixmap && !src_fallback) { 2262 DBG(("%s: src is already on the GPU, try to use GPU\n", 2263 __FUNCTION__)); 2264 return false; 2265 } 2266 if (mask_pixmap && !mask_fallback) { 2267 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2268 __FUNCTION__)); 2269 return false; 2270 } 2271 2272 /* However if the dst is not on the GPU and we need to 2273 * render one of the sources using the CPU, we may 2274 * as well do the entire operation in place onthe CPU. 2275 */ 2276 if (src_fallback) { 2277 DBG(("%s: dst is on the CPU and src will fallback\n", 2278 __FUNCTION__)); 2279 return true; 2280 } 2281 2282 if (mask && mask_fallback) { 2283 DBG(("%s: dst is on the CPU and mask will fallback\n", 2284 __FUNCTION__)); 2285 return true; 2286 } 2287 2288 if (too_large(dst_pixmap->drawable.width, 2289 dst_pixmap->drawable.height) && 2290 dst_is_cpu(dst_pixmap)) { 2291 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2292 return true; 2293 } 2294 2295 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2296 __FUNCTION__)); 2297 return dst_use_cpu(dst_pixmap); 2298} 2299 2300static int 2301reuse_source(struct sna *sna, 2302 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2303 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2304{ 2305 uint32_t color; 2306 2307 if (src_x != msk_x || src_y != msk_y) 2308 return false; 2309 2310 if (src == mask) { 2311 DBG(("%s: mask is source\n", __FUNCTION__)); 2312 *mc = *sc; 2313 mc->bo = kgem_bo_reference(mc->bo); 2314 return true; 2315 } 2316 2317 if (sna_picture_is_solid(mask, &color)) 2318 return gen4_channel_init_solid(sna, mc, color); 2319 2320 if (sc->is_solid) 2321 return false; 2322 2323 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2324 return false; 2325 2326 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2327 2328 if (!sna_transform_equal(src->transform, mask->transform)) 2329 return false; 2330 2331 if (!sna_picture_alphamap_equal(src, mask)) 2332 return false; 2333 2334 if (!gen8_check_repeat(mask)) 2335 return false; 2336 2337 if (!gen8_check_filter(mask)) 2338 return false; 2339 2340 if (!gen8_check_format(mask->format)) 2341 return false; 2342 2343 DBG(("%s: reusing source channel for mask with a twist\n", 2344 __FUNCTION__)); 2345 2346 *mc = *sc; 2347 mc->repeat = gen8_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2348 mc->filter = gen8_filter(mask->filter); 2349 mc->pict_format = mask->format; 2350 mc->card_format = gen8_get_card_format(mask->format); 2351 mc->bo = kgem_bo_reference(mc->bo); 2352 return true; 2353} 2354 2355static bool 2356gen8_render_composite(struct sna *sna, 2357 uint8_t op, 2358 PicturePtr src, 2359 PicturePtr mask, 2360 PicturePtr dst, 2361 int16_t src_x, int16_t src_y, 2362 int16_t msk_x, int16_t msk_y, 2363 int16_t dst_x, int16_t dst_y, 2364 int16_t width, int16_t height, 2365 unsigned flags, 2366 struct sna_composite_op *tmp) 2367{ 2368 if (op >= ARRAY_SIZE(gen8_blend_op)) 2369 return false; 2370 2371 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2372 width, height, sna->kgem.mode, sna->kgem.ring)); 2373 2374 if (mask == NULL && 2375 try_blt(sna, op, 2376 src, mask, dst, 2377 src_x, src_y, 2378 msk_x, msk_y, 2379 dst_x, dst_y, 2380 width, height, 2381 flags, tmp)) 2382 return true; 2383 2384 if (gen8_composite_fallback(sna, src, mask, dst)) 2385 goto fallback; 2386 2387 if (need_tiling(sna, width, height)) 2388 return sna_tiling_composite(op, src, mask, dst, 2389 src_x, src_y, 2390 msk_x, msk_y, 2391 dst_x, dst_y, 2392 width, height, 2393 tmp); 2394 2395 if (op == PictOpClear && src == sna->clear) 2396 op = PictOpSrc; 2397 tmp->op = op; 2398 if (!gen8_composite_set_target(sna, tmp, dst, 2399 dst_x, dst_y, width, height, 2400 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2401 goto fallback; 2402 2403 switch (gen8_composite_picture(sna, src, &tmp->src, 2404 src_x, src_y, 2405 width, height, 2406 dst_x, dst_y, 2407 dst->polyMode == PolyModePrecise)) { 2408 case -1: 2409 goto cleanup_dst; 2410 case 0: 2411 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2412 goto cleanup_dst; 2413 /* fall through to fixup */ 2414 case 1: 2415 /* Did we just switch rings to prepare the source? */ 2416 if (mask == NULL && 2417 (prefer_blt_composite(sna, tmp) || 2418 unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2419 sna_blt_composite__convert(sna, 2420 dst_x, dst_y, width, height, 2421 tmp)) 2422 return true; 2423 2424 if (!gen8_composite_channel_convert(&tmp->src)) 2425 goto cleanup_src; 2426 2427 break; 2428 } 2429 2430 tmp->is_affine = tmp->src.is_affine; 2431 tmp->has_component_alpha = false; 2432 tmp->need_magic_ca_pass = false; 2433 2434 tmp->mask.bo = NULL; 2435 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2436 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2437 2438 if (mask) { 2439 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2440 tmp->has_component_alpha = true; 2441 2442 /* Check if it's component alpha that relies on a source alpha and on 2443 * the source value. We can only get one of those into the single 2444 * source value that we get to blend with. 2445 */ 2446 if (gen8_blend_op[op].src_alpha && 2447 (gen8_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2448 if (op != PictOpOver) 2449 goto cleanup_src; 2450 2451 tmp->need_magic_ca_pass = true; 2452 tmp->op = PictOpOutReverse; 2453 } 2454 } 2455 2456 if (!reuse_source(sna, 2457 src, &tmp->src, src_x, src_y, 2458 mask, &tmp->mask, msk_x, msk_y)) { 2459 switch (gen8_composite_picture(sna, mask, &tmp->mask, 2460 msk_x, msk_y, 2461 width, height, 2462 dst_x, dst_y, 2463 dst->polyMode == PolyModePrecise)) { 2464 case -1: 2465 goto cleanup_src; 2466 case 0: 2467 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2468 goto cleanup_src; 2469 /* fall through to fixup */ 2470 case 1: 2471 if (!gen8_composite_channel_convert(&tmp->mask)) 2472 goto cleanup_mask; 2473 break; 2474 } 2475 } 2476 2477 tmp->is_affine &= tmp->mask.is_affine; 2478 } 2479 2480 tmp->u.gen8.flags = 2481 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2482 tmp->src.repeat, 2483 tmp->mask.filter, 2484 tmp->mask.repeat), 2485 gen8_get_blend(tmp->op, 2486 tmp->has_component_alpha, 2487 tmp->dst.format), 2488 gen8_choose_composite_kernel(tmp->op, 2489 tmp->mask.bo != NULL, 2490 tmp->has_component_alpha, 2491 tmp->is_affine), 2492 gen4_choose_composite_emitter(sna, tmp)); 2493 2494 tmp->blt = gen8_render_composite_blt; 2495 tmp->box = gen8_render_composite_box; 2496 tmp->boxes = gen8_render_composite_boxes__blt; 2497 if (tmp->emit_boxes){ 2498 tmp->boxes = gen8_render_composite_boxes; 2499 tmp->thread_boxes = gen8_render_composite_boxes__thread; 2500 } 2501 tmp->done = gen8_render_composite_done; 2502 2503 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2504 if (!kgem_check_bo(&sna->kgem, 2505 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2506 NULL)) { 2507 kgem_submit(&sna->kgem); 2508 if (!kgem_check_bo(&sna->kgem, 2509 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2510 NULL)) 2511 goto cleanup_mask; 2512 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2513 } 2514 2515 gen8_align_vertex(sna, tmp); 2516 gen8_emit_composite_state(sna, tmp); 2517 return true; 2518 2519cleanup_mask: 2520 if (tmp->mask.bo) { 2521 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2522 tmp->mask.bo = NULL; 2523 } 2524cleanup_src: 2525 if (tmp->src.bo) { 2526 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2527 tmp->src.bo = NULL; 2528 } 2529cleanup_dst: 2530 if (tmp->redirect.real_bo) { 2531 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2532 tmp->redirect.real_bo = NULL; 2533 } 2534fallback: 2535 return (mask == NULL && 2536 sna_blt_composite(sna, op, 2537 src, dst, 2538 src_x, src_y, 2539 dst_x, dst_y, 2540 width, height, 2541 flags | COMPOSITE_FALLBACK, tmp)); 2542} 2543 2544#if !NO_COMPOSITE_SPANS 2545fastcall static void 2546gen8_render_composite_spans_box(struct sna *sna, 2547 const struct sna_composite_spans_op *op, 2548 const BoxRec *box, float opacity) 2549{ 2550 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2551 __FUNCTION__, 2552 op->base.src.offset[0], op->base.src.offset[1], 2553 opacity, 2554 op->base.dst.x, op->base.dst.y, 2555 box->x1, box->y1, 2556 box->x2 - box->x1, 2557 box->y2 - box->y1)); 2558 2559 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_composite_state); 2560 op->prim_emit(sna, op, box, opacity); 2561} 2562 2563static void 2564gen8_render_composite_spans_boxes(struct sna *sna, 2565 const struct sna_composite_spans_op *op, 2566 const BoxRec *box, int nbox, 2567 float opacity) 2568{ 2569 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2570 __FUNCTION__, nbox, 2571 op->base.src.offset[0], op->base.src.offset[1], 2572 opacity, 2573 op->base.dst.x, op->base.dst.y)); 2574 2575 do { 2576 int nbox_this_time; 2577 2578 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2579 gen8_emit_composite_state); 2580 nbox -= nbox_this_time; 2581 2582 do { 2583 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2584 box->x1, box->y1, 2585 box->x2 - box->x1, 2586 box->y2 - box->y1)); 2587 2588 op->prim_emit(sna, op, box++, opacity); 2589 } while (--nbox_this_time); 2590 } while (nbox); 2591} 2592 2593fastcall static void 2594gen8_render_composite_spans_boxes__thread(struct sna *sna, 2595 const struct sna_composite_spans_op *op, 2596 const struct sna_opacity_box *box, 2597 int nbox) 2598{ 2599 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2600 __FUNCTION__, nbox, 2601 op->base.src.offset[0], op->base.src.offset[1], 2602 op->base.dst.x, op->base.dst.y)); 2603 2604 sna_vertex_lock(&sna->render); 2605 do { 2606 int nbox_this_time; 2607 float *v; 2608 2609 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2610 gen8_emit_composite_state); 2611 assert(nbox_this_time); 2612 nbox -= nbox_this_time; 2613 2614 v = sna->render.vertices + sna->render.vertex_used; 2615 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2616 2617 sna_vertex_acquire__locked(&sna->render); 2618 sna_vertex_unlock(&sna->render); 2619 2620 op->emit_boxes(op, box, nbox_this_time, v); 2621 box += nbox_this_time; 2622 2623 sna_vertex_lock(&sna->render); 2624 sna_vertex_release__locked(&sna->render); 2625 } while (nbox); 2626 sna_vertex_unlock(&sna->render); 2627} 2628 2629fastcall static void 2630gen8_render_composite_spans_done(struct sna *sna, 2631 const struct sna_composite_spans_op *op) 2632{ 2633 if (sna->render.vertex_offset) 2634 gen8_vertex_flush(sna); 2635 2636 DBG(("%s()\n", __FUNCTION__)); 2637 2638 if (op->base.src.bo) 2639 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2640 2641 sna_render_composite_redirect_done(sna, &op->base); 2642} 2643 2644static bool 2645gen8_check_composite_spans(struct sna *sna, 2646 uint8_t op, PicturePtr src, PicturePtr dst, 2647 int16_t width, int16_t height, unsigned flags) 2648{ 2649 if (op >= ARRAY_SIZE(gen8_blend_op)) 2650 return false; 2651 2652 if (gen8_composite_fallback(sna, src, NULL, dst)) 2653 return false; 2654 2655 if (need_tiling(sna, width, height) && 2656 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2657 DBG(("%s: fallback, tiled operation not on GPU\n", 2658 __FUNCTION__)); 2659 return false; 2660 } 2661 2662 return true; 2663} 2664 2665static bool 2666gen8_render_composite_spans(struct sna *sna, 2667 uint8_t op, 2668 PicturePtr src, 2669 PicturePtr dst, 2670 int16_t src_x, int16_t src_y, 2671 int16_t dst_x, int16_t dst_y, 2672 int16_t width, int16_t height, 2673 unsigned flags, 2674 struct sna_composite_spans_op *tmp) 2675{ 2676 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2677 width, height, flags, sna->kgem.ring)); 2678 2679 assert(gen8_check_composite_spans(sna, op, src, dst, width, height, flags)); 2680 2681 if (need_tiling(sna, width, height)) { 2682 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2683 __FUNCTION__, width, height)); 2684 return sna_tiling_composite_spans(op, src, dst, 2685 src_x, src_y, dst_x, dst_y, 2686 width, height, flags, tmp); 2687 } 2688 2689 tmp->base.op = op; 2690 if (!gen8_composite_set_target(sna, &tmp->base, dst, 2691 dst_x, dst_y, width, height, true)) 2692 return false; 2693 2694 switch (gen8_composite_picture(sna, src, &tmp->base.src, 2695 src_x, src_y, 2696 width, height, 2697 dst_x, dst_y, 2698 dst->polyMode == PolyModePrecise)) { 2699 case -1: 2700 goto cleanup_dst; 2701 case 0: 2702 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2703 goto cleanup_dst; 2704 /* fall through to fixup */ 2705 case 1: 2706 if (!gen8_composite_channel_convert(&tmp->base.src)) 2707 goto cleanup_src; 2708 break; 2709 } 2710 tmp->base.mask.bo = NULL; 2711 2712 tmp->base.is_affine = tmp->base.src.is_affine; 2713 tmp->base.need_magic_ca_pass = false; 2714 2715 tmp->base.u.gen8.flags = 2716 GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2717 tmp->base.src.repeat, 2718 SAMPLER_FILTER_NEAREST, 2719 SAMPLER_EXTEND_PAD), 2720 gen8_get_blend(tmp->base.op, false, tmp->base.dst.format), 2721 GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2722 gen4_choose_spans_emitter(sna, tmp)); 2723 2724 tmp->box = gen8_render_composite_spans_box; 2725 tmp->boxes = gen8_render_composite_spans_boxes; 2726 if (tmp->emit_boxes) 2727 tmp->thread_boxes = gen8_render_composite_spans_boxes__thread; 2728 tmp->done = gen8_render_composite_spans_done; 2729 2730 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2731 if (!kgem_check_bo(&sna->kgem, 2732 tmp->base.dst.bo, tmp->base.src.bo, 2733 NULL)) { 2734 kgem_submit(&sna->kgem); 2735 if (!kgem_check_bo(&sna->kgem, 2736 tmp->base.dst.bo, tmp->base.src.bo, 2737 NULL)) 2738 goto cleanup_src; 2739 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2740 } 2741 2742 gen8_align_vertex(sna, &tmp->base); 2743 gen8_emit_composite_state(sna, &tmp->base); 2744 return true; 2745 2746cleanup_src: 2747 if (tmp->base.src.bo) 2748 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2749cleanup_dst: 2750 if (tmp->base.redirect.real_bo) 2751 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2752 return false; 2753} 2754#endif 2755 2756static void 2757gen8_emit_copy_state(struct sna *sna, 2758 const struct sna_composite_op *op) 2759{ 2760 uint32_t *binding_table; 2761 uint16_t offset, dirty; 2762 2763 gen8_get_batch(sna, op); 2764 2765 binding_table = gen8_composite_get_binding_table(sna, &offset); 2766 2767 dirty = kgem_bo_is_dirty(op->dst.bo); 2768 2769 binding_table[0] = 2770 gen8_bind_bo(sna, 2771 op->dst.bo, op->dst.width, op->dst.height, 2772 gen8_get_dest_format(op->dst.format), 2773 true); 2774 binding_table[1] = 2775 gen8_bind_bo(sna, 2776 op->src.bo, op->src.width, op->src.height, 2777 op->src.card_format, 2778 false); 2779 2780 if (sna->kgem.surface == offset && 2781 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 2782 sna->kgem.surface += SURFACE_DW; 2783 offset = sna->render_state.gen8.surface_table; 2784 } 2785 2786 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 2787 dirty = 0; 2788 2789 assert(!GEN8_READS_DST(op->u.gen8.flags)); 2790 gen8_emit_state(sna, op, offset | dirty); 2791} 2792 2793static inline bool 2794prefer_blt_copy(struct sna *sna, 2795 struct kgem_bo *src_bo, 2796 struct kgem_bo *dst_bo, 2797 unsigned flags) 2798{ 2799 if (sna->kgem.mode == KGEM_BLT) 2800 return true; 2801 2802 assert((flags & COPY_SYNC) == 0); 2803 2804 if (untiled_tlb_miss(src_bo) || 2805 untiled_tlb_miss(dst_bo)) 2806 return true; 2807 2808 if (flags & COPY_DRI && !sna->kgem.has_semaphores) 2809 return false; 2810 2811 if (force_blt_ring(sna, dst_bo)) 2812 return true; 2813 2814 if ((flags & COPY_SMALL || 2815 (sna->render_state.gt < 3 && src_bo == dst_bo)) && 2816 can_switch_to_blt(sna, dst_bo, flags)) 2817 return true; 2818 2819 if (kgem_bo_is_render(dst_bo) || 2820 kgem_bo_is_render(src_bo)) 2821 return false; 2822 2823 if (flags & COPY_LAST && 2824 sna->render_state.gt < 3 && 2825 can_switch_to_blt(sna, dst_bo, flags)) 2826 return true; 2827 2828 if (prefer_render_ring(sna, dst_bo)) 2829 return false; 2830 2831 if (!prefer_blt_ring(sna, dst_bo, flags)) 2832 return false; 2833 2834 return prefer_blt_bo(sna, src_bo, dst_bo); 2835} 2836 2837static bool 2838gen8_render_copy_boxes(struct sna *sna, uint8_t alu, 2839 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2840 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2841 const BoxRec *box, int n, unsigned flags) 2842{ 2843 struct sna_composite_op tmp; 2844 BoxRec extents; 2845 2846 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2847 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2848 src_bo == dst_bo, 2849 overlaps(sna, 2850 src_bo, src_dx, src_dy, 2851 dst_bo, dst_dx, dst_dy, 2852 box, n, flags, &extents))); 2853 2854 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2855 sna_blt_compare_depth(src, dst) && 2856 sna_blt_copy_boxes(sna, alu, 2857 src_bo, src_dx, src_dy, 2858 dst_bo, dst_dx, dst_dy, 2859 dst->bitsPerPixel, 2860 box, n)) 2861 return true; 2862 2863 if (!(alu == GXcopy || alu == GXclear) || 2864 unaligned(src_bo, src->bitsPerPixel) || 2865 unaligned(dst_bo, dst->bitsPerPixel)) { 2866fallback_blt: 2867 DBG(("%s: fallback blt\n", __FUNCTION__)); 2868 if (!sna_blt_compare_depth(src, dst)) 2869 return false; 2870 2871 return sna_blt_copy_boxes_fallback(sna, alu, 2872 src, src_bo, src_dx, src_dy, 2873 dst, dst_bo, dst_dx, dst_dy, 2874 box, n); 2875 } 2876 2877 if (overlaps(sna, 2878 src_bo, src_dx, src_dy, 2879 dst_bo, dst_dx, dst_dy, 2880 box, n, flags, 2881 &extents)) { 2882 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2883 2884 if ((big || !prefer_render_ring(sna, dst_bo)) && 2885 sna_blt_copy_boxes(sna, alu, 2886 src_bo, src_dx, src_dy, 2887 dst_bo, dst_dx, dst_dy, 2888 dst->bitsPerPixel, 2889 box, n)) 2890 return true; 2891 2892 if (big) 2893 goto fallback_blt; 2894 2895 assert(src_bo == dst_bo); 2896 assert(src->depth == dst->depth); 2897 assert(src->width == dst->width); 2898 assert(src->height == dst->height); 2899 return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, 2900 src_dx, src_dy, 2901 dst_dx, dst_dy, 2902 box, n, &extents); 2903 } 2904 2905 if (dst->depth == src->depth) { 2906 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2907 tmp.src.pict_format = tmp.dst.format; 2908 } else { 2909 tmp.dst.format = sna_format_for_depth(dst->depth); 2910 tmp.src.pict_format = sna_format_for_depth(src->depth); 2911 } 2912 if (!gen8_check_format(tmp.src.pict_format)) 2913 goto fallback_blt; 2914 2915 tmp.dst.pixmap = (PixmapPtr)dst; 2916 tmp.dst.width = dst->width; 2917 tmp.dst.height = dst->height; 2918 tmp.dst.bo = dst_bo; 2919 tmp.dst.x = tmp.dst.y = 0; 2920 tmp.damage = NULL; 2921 2922 sna_render_composite_redirect_init(&tmp); 2923 if (too_large(tmp.dst.width, tmp.dst.height)) { 2924 int i; 2925 2926 extents = box[0]; 2927 for (i = 1; i < n; i++) { 2928 if (box[i].x1 < extents.x1) 2929 extents.x1 = box[i].x1; 2930 if (box[i].y1 < extents.y1) 2931 extents.y1 = box[i].y1; 2932 2933 if (box[i].x2 > extents.x2) 2934 extents.x2 = box[i].x2; 2935 if (box[i].y2 > extents.y2) 2936 extents.y2 = box[i].y2; 2937 } 2938 2939 if (!sna_render_composite_redirect(sna, &tmp, 2940 extents.x1 + dst_dx, 2941 extents.y1 + dst_dy, 2942 extents.x2 - extents.x1, 2943 extents.y2 - extents.y1, 2944 n > 1)) 2945 goto fallback_tiled; 2946 } 2947 2948 tmp.src.card_format = gen8_get_card_format(tmp.src.pict_format); 2949 if (too_large(src->width, src->height)) { 2950 int i; 2951 2952 extents = box[0]; 2953 for (i = 1; i < n; i++) { 2954 if (box[i].x1 < extents.x1) 2955 extents.x1 = box[i].x1; 2956 if (box[i].y1 < extents.y1) 2957 extents.y1 = box[i].y1; 2958 2959 if (box[i].x2 > extents.x2) 2960 extents.x2 = box[i].x2; 2961 if (box[i].y2 > extents.y2) 2962 extents.y2 = box[i].y2; 2963 } 2964 2965 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2966 extents.x1 + src_dx, 2967 extents.y1 + src_dy, 2968 extents.x2 - extents.x1, 2969 extents.y2 - extents.y1)) 2970 goto fallback_tiled_dst; 2971 } else { 2972 tmp.src.bo = src_bo; 2973 tmp.src.width = src->width; 2974 tmp.src.height = src->height; 2975 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2976 } 2977 2978 tmp.mask.bo = NULL; 2979 2980 tmp.floats_per_vertex = 2; 2981 tmp.floats_per_rect = 6; 2982 tmp.need_magic_ca_pass = 0; 2983 2984 tmp.u.gen8.flags = COPY_FLAGS(alu); 2985 2986 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2987 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2988 kgem_submit(&sna->kgem); 2989 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2990 if (tmp.src.bo != src_bo) 2991 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2992 if (tmp.redirect.real_bo) 2993 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2994 goto fallback_blt; 2995 } 2996 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2997 } 2998 2999 src_dx += tmp.src.offset[0]; 3000 src_dy += tmp.src.offset[1]; 3001 3002 dst_dx += tmp.dst.x; 3003 dst_dy += tmp.dst.y; 3004 3005 tmp.dst.x = tmp.dst.y = 0; 3006 3007 gen8_align_vertex(sna, &tmp); 3008 gen8_emit_copy_state(sna, &tmp); 3009 3010 do { 3011 int16_t *v; 3012 int n_this_time; 3013 3014 n_this_time = gen8_get_rectangles(sna, &tmp, n, 3015 gen8_emit_copy_state); 3016 n -= n_this_time; 3017 3018 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3019 sna->render.vertex_used += 6 * n_this_time; 3020 assert(sna->render.vertex_used <= sna->render.vertex_size); 3021 do { 3022 3023 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3024 box->x1 + src_dx, box->y1 + src_dy, 3025 box->x1 + dst_dx, box->y1 + dst_dy, 3026 box->x2 - box->x1, box->y2 - box->y1)); 3027 v[0] = box->x2 + dst_dx; 3028 v[2] = box->x2 + src_dx; 3029 v[1] = v[5] = box->y2 + dst_dy; 3030 v[3] = v[7] = box->y2 + src_dy; 3031 v[8] = v[4] = box->x1 + dst_dx; 3032 v[10] = v[6] = box->x1 + src_dx; 3033 v[9] = box->y1 + dst_dy; 3034 v[11] = box->y1 + src_dy; 3035 v += 12; box++; 3036 } while (--n_this_time); 3037 } while (n); 3038 3039 gen8_vertex_flush(sna); 3040 sna_render_composite_redirect_done(sna, &tmp); 3041 if (tmp.src.bo != src_bo) 3042 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3043 return true; 3044 3045fallback_tiled_dst: 3046 if (tmp.redirect.real_bo) 3047 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3048fallback_tiled: 3049 DBG(("%s: fallback tiled\n", __FUNCTION__)); 3050 if (sna_blt_compare_depth(src, dst) && 3051 sna_blt_copy_boxes(sna, alu, 3052 src_bo, src_dx, src_dy, 3053 dst_bo, dst_dx, dst_dy, 3054 dst->bitsPerPixel, 3055 box, n)) 3056 return true; 3057 3058 return sna_tiling_copy_boxes(sna, alu, 3059 src, src_bo, src_dx, src_dy, 3060 dst, dst_bo, dst_dx, dst_dy, 3061 box, n); 3062} 3063 3064static void 3065gen8_render_copy_blt(struct sna *sna, 3066 const struct sna_copy_op *op, 3067 int16_t sx, int16_t sy, 3068 int16_t w, int16_t h, 3069 int16_t dx, int16_t dy) 3070{ 3071 int16_t *v; 3072 3073 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_copy_state); 3074 3075 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3076 sna->render.vertex_used += 6; 3077 assert(sna->render.vertex_used <= sna->render.vertex_size); 3078 3079 v[0] = dx+w; v[1] = dy+h; 3080 v[2] = sx+w; v[3] = sy+h; 3081 v[4] = dx; v[5] = dy+h; 3082 v[6] = sx; v[7] = sy+h; 3083 v[8] = dx; v[9] = dy; 3084 v[10] = sx; v[11] = sy; 3085} 3086 3087static void 3088gen8_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3089{ 3090 if (sna->render.vertex_offset) 3091 gen8_vertex_flush(sna); 3092} 3093 3094static bool 3095gen8_render_copy(struct sna *sna, uint8_t alu, 3096 PixmapPtr src, struct kgem_bo *src_bo, 3097 PixmapPtr dst, struct kgem_bo *dst_bo, 3098 struct sna_copy_op *op) 3099{ 3100 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3101 __FUNCTION__, alu, 3102 src->drawable.width, src->drawable.height, 3103 dst->drawable.width, dst->drawable.height)); 3104 3105 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3106 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3107 sna_blt_copy(sna, alu, 3108 src_bo, dst_bo, 3109 dst->drawable.bitsPerPixel, 3110 op)) 3111 return true; 3112 3113 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3114 too_large(src->drawable.width, src->drawable.height) || 3115 too_large(dst->drawable.width, dst->drawable.height) || 3116 unaligned(src_bo, src->drawable.bitsPerPixel) || 3117 unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3118fallback: 3119 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3120 return false; 3121 3122 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3123 dst->drawable.bitsPerPixel, 3124 op); 3125 } 3126 3127 if (dst->drawable.depth == src->drawable.depth) { 3128 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3129 op->base.src.pict_format = op->base.dst.format; 3130 } else { 3131 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3132 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3133 } 3134 if (!gen8_check_format(op->base.src.pict_format)) 3135 goto fallback; 3136 3137 op->base.dst.pixmap = dst; 3138 op->base.dst.width = dst->drawable.width; 3139 op->base.dst.height = dst->drawable.height; 3140 op->base.dst.bo = dst_bo; 3141 3142 op->base.src.bo = src_bo; 3143 op->base.src.card_format = 3144 gen8_get_card_format(op->base.src.pict_format); 3145 op->base.src.width = src->drawable.width; 3146 op->base.src.height = src->drawable.height; 3147 3148 op->base.mask.bo = NULL; 3149 3150 op->base.floats_per_vertex = 2; 3151 op->base.floats_per_rect = 6; 3152 3153 op->base.u.gen8.flags = COPY_FLAGS(alu); 3154 3155 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3156 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3157 kgem_submit(&sna->kgem); 3158 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3159 goto fallback; 3160 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3161 } 3162 3163 gen8_align_vertex(sna, &op->base); 3164 gen8_emit_copy_state(sna, &op->base); 3165 3166 op->blt = gen8_render_copy_blt; 3167 op->done = gen8_render_copy_done; 3168 return true; 3169} 3170 3171static void 3172gen8_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3173{ 3174 uint32_t *binding_table; 3175 uint16_t offset, dirty; 3176 3177 /* XXX Render Target Fast Clear 3178 * Set RTFC Enable in PS and render a rectangle. 3179 * Limited to a clearing the full MSC surface only with a 3180 * specific kernel. 3181 */ 3182 3183 gen8_get_batch(sna, op); 3184 3185 binding_table = gen8_composite_get_binding_table(sna, &offset); 3186 3187 dirty = kgem_bo_is_dirty(op->dst.bo); 3188 3189 binding_table[0] = 3190 gen8_bind_bo(sna, 3191 op->dst.bo, op->dst.width, op->dst.height, 3192 gen8_get_dest_format(op->dst.format), 3193 true); 3194 binding_table[1] = 3195 gen8_bind_bo(sna, 3196 op->src.bo, 1, 1, 3197 SURFACEFORMAT_B8G8R8A8_UNORM, 3198 false); 3199 3200 if (sna->kgem.surface == offset && 3201 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 3202 sna->kgem.surface += SURFACE_DW; 3203 offset = sna->render_state.gen8.surface_table; 3204 } 3205 3206 if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 3207 dirty = 0; 3208 3209 gen8_emit_state(sna, op, offset | dirty); 3210} 3211 3212static bool 3213gen8_render_fill_boxes(struct sna *sna, 3214 CARD8 op, 3215 PictFormat format, 3216 const xRenderColor *color, 3217 const DrawableRec *dst, struct kgem_bo *dst_bo, 3218 const BoxRec *box, int n) 3219{ 3220 struct sna_composite_op tmp; 3221 uint32_t pixel; 3222 3223 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3224 __FUNCTION__, op, 3225 color->red, color->green, color->blue, color->alpha, (int)format)); 3226 3227 if (op >= ARRAY_SIZE(gen8_blend_op)) { 3228 DBG(("%s: fallback due to unhandled blend op: %d\n", 3229 __FUNCTION__, op)); 3230 return false; 3231 } 3232 3233 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3234 !gen8_check_dst_format(format) || 3235 unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3236 uint8_t alu = GXinvalid; 3237 3238 if (op <= PictOpSrc) { 3239 pixel = 0; 3240 if (op == PictOpClear) 3241 alu = GXclear; 3242 else if (sna_get_pixel_from_rgba(&pixel, 3243 color->red, 3244 color->green, 3245 color->blue, 3246 color->alpha, 3247 format)) 3248 alu = GXcopy; 3249 } 3250 3251 if (alu != GXinvalid && 3252 sna_blt_fill_boxes(sna, alu, 3253 dst_bo, dst->bitsPerPixel, 3254 pixel, box, n)) 3255 return true; 3256 3257 if (!gen8_check_dst_format(format)) 3258 return false; 3259 } 3260 3261 if (op == PictOpClear) { 3262 pixel = 0; 3263 op = PictOpSrc; 3264 } else if (!sna_get_pixel_from_rgba(&pixel, 3265 color->red, 3266 color->green, 3267 color->blue, 3268 color->alpha, 3269 PICT_a8r8g8b8)) 3270 return false; 3271 3272 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3273 __FUNCTION__, pixel, n, 3274 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3275 3276 tmp.dst.pixmap = (PixmapPtr)dst; 3277 tmp.dst.width = dst->width; 3278 tmp.dst.height = dst->height; 3279 tmp.dst.format = format; 3280 tmp.dst.bo = dst_bo; 3281 tmp.dst.x = tmp.dst.y = 0; 3282 tmp.damage = NULL; 3283 3284 sna_render_composite_redirect_init(&tmp); 3285 if (too_large(dst->width, dst->height)) { 3286 BoxRec extents; 3287 3288 boxes_extents(box, n, &extents); 3289 if (!sna_render_composite_redirect(sna, &tmp, 3290 extents.x1, extents.y1, 3291 extents.x2 - extents.x1, 3292 extents.y2 - extents.y1, 3293 n > 1)) 3294 return sna_tiling_fill_boxes(sna, op, format, color, 3295 dst, dst_bo, box, n); 3296 } 3297 3298 tmp.src.bo = sna_render_get_solid(sna, pixel); 3299 tmp.mask.bo = NULL; 3300 3301 tmp.floats_per_vertex = 2; 3302 tmp.floats_per_rect = 6; 3303 tmp.need_magic_ca_pass = false; 3304 3305 tmp.u.gen8.flags = FILL_FLAGS(op, format); 3306 3307 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3308 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3309 kgem_submit(&sna->kgem); 3310 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3311 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3312 tmp.src.bo = NULL; 3313 3314 if (tmp.redirect.real_bo) { 3315 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3316 tmp.redirect.real_bo = NULL; 3317 } 3318 3319 return false; 3320 } 3321 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3322 } 3323 3324 gen8_align_vertex(sna, &tmp); 3325 gen8_emit_fill_state(sna, &tmp); 3326 3327 do { 3328 int n_this_time; 3329 int16_t *v; 3330 3331 n_this_time = gen8_get_rectangles(sna, &tmp, n, 3332 gen8_emit_fill_state); 3333 n -= n_this_time; 3334 3335 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3336 sna->render.vertex_used += 6 * n_this_time; 3337 assert(sna->render.vertex_used <= sna->render.vertex_size); 3338 do { 3339 DBG((" (%d, %d), (%d, %d)\n", 3340 box->x1, box->y1, box->x2, box->y2)); 3341 3342 v[0] = box->x2; 3343 v[5] = v[1] = box->y2; 3344 v[8] = v[4] = box->x1; 3345 v[9] = box->y1; 3346 v[2] = v[3] = v[7] = 1; 3347 v[6] = v[10] = v[11] = 0; 3348 v += 12; box++; 3349 } while (--n_this_time); 3350 } while (n); 3351 3352 gen8_vertex_flush(sna); 3353 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3354 sna_render_composite_redirect_done(sna, &tmp); 3355 return true; 3356} 3357 3358static void 3359gen8_render_fill_op_blt(struct sna *sna, 3360 const struct sna_fill_op *op, 3361 int16_t x, int16_t y, int16_t w, int16_t h) 3362{ 3363 int16_t *v; 3364 3365 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3366 3367 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3368 3369 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3370 sna->render.vertex_used += 6; 3371 assert(sna->render.vertex_used <= sna->render.vertex_size); 3372 3373 v[0] = x+w; 3374 v[4] = v[8] = x; 3375 v[1] = v[5] = y+h; 3376 v[9] = y; 3377 3378 v[2] = v[3] = v[7] = 1; 3379 v[6] = v[10] = v[11] = 0; 3380} 3381 3382fastcall static void 3383gen8_render_fill_op_box(struct sna *sna, 3384 const struct sna_fill_op *op, 3385 const BoxRec *box) 3386{ 3387 int16_t *v; 3388 3389 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3390 box->x1, box->y1, box->x2, box->y2)); 3391 3392 gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3393 3394 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3395 sna->render.vertex_used += 6; 3396 assert(sna->render.vertex_used <= sna->render.vertex_size); 3397 3398 v[0] = box->x2; 3399 v[8] = v[4] = box->x1; 3400 v[5] = v[1] = box->y2; 3401 v[9] = box->y1; 3402 3403 v[7] = v[2] = v[3] = 1; 3404 v[6] = v[10] = v[11] = 0; 3405} 3406 3407fastcall static void 3408gen8_render_fill_op_boxes(struct sna *sna, 3409 const struct sna_fill_op *op, 3410 const BoxRec *box, 3411 int nbox) 3412{ 3413 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3414 box->x1, box->y1, box->x2, box->y2, nbox)); 3415 3416 do { 3417 int nbox_this_time; 3418 int16_t *v; 3419 3420 nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 3421 gen8_emit_fill_state); 3422 nbox -= nbox_this_time; 3423 3424 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3425 sna->render.vertex_used += 6 * nbox_this_time; 3426 assert(sna->render.vertex_used <= sna->render.vertex_size); 3427 3428 do { 3429 v[0] = box->x2; 3430 v[8] = v[4] = box->x1; 3431 v[5] = v[1] = box->y2; 3432 v[9] = box->y1; 3433 v[7] = v[2] = v[3] = 1; 3434 v[6] = v[10] = v[11] = 0; 3435 box++; v += 12; 3436 } while (--nbox_this_time); 3437 } while (nbox); 3438} 3439 3440static void 3441gen8_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3442{ 3443 if (sna->render.vertex_offset) 3444 gen8_vertex_flush(sna); 3445 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3446} 3447 3448static bool 3449gen8_render_fill(struct sna *sna, uint8_t alu, 3450 PixmapPtr dst, struct kgem_bo *dst_bo, 3451 uint32_t color, unsigned flags, 3452 struct sna_fill_op *op) 3453{ 3454 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3455 3456 if (prefer_blt_fill(sna, dst_bo, flags) && 3457 sna_blt_fill(sna, alu, 3458 dst_bo, dst->drawable.bitsPerPixel, 3459 color, 3460 op)) 3461 return true; 3462 3463 if (!(alu == GXcopy || alu == GXclear) || 3464 too_large(dst->drawable.width, dst->drawable.height) || 3465 unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3466 return sna_blt_fill(sna, alu, 3467 dst_bo, dst->drawable.bitsPerPixel, 3468 color, 3469 op); 3470 3471 if (alu == GXclear) 3472 color = 0; 3473 3474 op->base.dst.pixmap = dst; 3475 op->base.dst.width = dst->drawable.width; 3476 op->base.dst.height = dst->drawable.height; 3477 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3478 op->base.dst.bo = dst_bo; 3479 op->base.dst.x = op->base.dst.y = 0; 3480 3481 op->base.src.bo = 3482 sna_render_get_solid(sna, 3483 sna_rgba_for_color(color, 3484 dst->drawable.depth)); 3485 op->base.mask.bo = NULL; 3486 3487 op->base.need_magic_ca_pass = false; 3488 op->base.floats_per_vertex = 2; 3489 op->base.floats_per_rect = 6; 3490 3491 op->base.u.gen8.flags = FILL_FLAGS_NOBLEND; 3492 3493 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3494 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3495 kgem_submit(&sna->kgem); 3496 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3497 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3498 return false; 3499 } 3500 3501 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3502 } 3503 3504 gen8_align_vertex(sna, &op->base); 3505 gen8_emit_fill_state(sna, &op->base); 3506 3507 op->blt = gen8_render_fill_op_blt; 3508 op->box = gen8_render_fill_op_box; 3509 op->boxes = gen8_render_fill_op_boxes; 3510 op->points = NULL; 3511 op->done = gen8_render_fill_op_done; 3512 return true; 3513} 3514 3515static bool 3516gen8_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3517 uint32_t color, 3518 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3519 uint8_t alu) 3520{ 3521 BoxRec box; 3522 3523 box.x1 = x1; 3524 box.y1 = y1; 3525 box.x2 = x2; 3526 box.y2 = y2; 3527 3528 return sna_blt_fill_boxes(sna, alu, 3529 bo, dst->drawable.bitsPerPixel, 3530 color, &box, 1); 3531} 3532 3533static bool 3534gen8_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3535 uint32_t color, 3536 int16_t x1, int16_t y1, 3537 int16_t x2, int16_t y2, 3538 uint8_t alu) 3539{ 3540 struct sna_composite_op tmp; 3541 int16_t *v; 3542 3543 /* Prefer to use the BLT if already engaged */ 3544 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3545 gen8_render_fill_one_try_blt(sna, dst, bo, color, 3546 x1, y1, x2, y2, alu)) 3547 return true; 3548 3549 /* Must use the BLT if we can't RENDER... */ 3550 if (!(alu == GXcopy || alu == GXclear) || 3551 too_large(dst->drawable.width, dst->drawable.height) || 3552 unaligned(bo, dst->drawable.bitsPerPixel)) 3553 return gen8_render_fill_one_try_blt(sna, dst, bo, color, 3554 x1, y1, x2, y2, alu); 3555 3556 if (alu == GXclear) 3557 color = 0; 3558 3559 tmp.dst.pixmap = dst; 3560 tmp.dst.width = dst->drawable.width; 3561 tmp.dst.height = dst->drawable.height; 3562 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3563 tmp.dst.bo = bo; 3564 tmp.dst.x = tmp.dst.y = 0; 3565 3566 tmp.src.bo = 3567 sna_render_get_solid(sna, 3568 sna_rgba_for_color(color, 3569 dst->drawable.depth)); 3570 tmp.mask.bo = NULL; 3571 3572 tmp.floats_per_vertex = 2; 3573 tmp.floats_per_rect = 6; 3574 tmp.need_magic_ca_pass = false; 3575 3576 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3577 3578 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3579 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3580 kgem_submit(&sna->kgem); 3581 if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3582 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3583 return false; 3584 } 3585 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3586 } 3587 3588 gen8_align_vertex(sna, &tmp); 3589 gen8_emit_fill_state(sna, &tmp); 3590 3591 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3592 3593 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3594 3595 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3596 sna->render.vertex_used += 6; 3597 assert(sna->render.vertex_used <= sna->render.vertex_size); 3598 3599 v[0] = x2; 3600 v[8] = v[4] = x1; 3601 v[5] = v[1] = y2; 3602 v[9] = y1; 3603 v[7] = v[2] = v[3] = 1; 3604 v[6] = v[10] = v[11] = 0; 3605 3606 gen8_vertex_flush(sna); 3607 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3608 3609 return true; 3610} 3611 3612static bool 3613gen8_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3614{ 3615 BoxRec box; 3616 3617 box.x1 = 0; 3618 box.y1 = 0; 3619 box.x2 = dst->drawable.width; 3620 box.y2 = dst->drawable.height; 3621 3622 return sna_blt_fill_boxes(sna, GXclear, 3623 bo, dst->drawable.bitsPerPixel, 3624 0, &box, 1); 3625} 3626 3627static bool 3628gen8_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3629{ 3630 struct sna_composite_op tmp; 3631 int16_t *v; 3632 3633 DBG(("%s: %dx%d\n", 3634 __FUNCTION__, 3635 dst->drawable.width, 3636 dst->drawable.height)); 3637 3638 /* Prefer to use the BLT if already engaged */ 3639 if (sna->kgem.mode == KGEM_BLT && 3640 gen8_render_clear_try_blt(sna, dst, bo)) 3641 return true; 3642 3643 /* Must use the BLT if we can't RENDER... */ 3644 if (too_large(dst->drawable.width, dst->drawable.height) || 3645 unaligned(bo, dst->drawable.bitsPerPixel)) 3646 return gen8_render_clear_try_blt(sna, dst, bo); 3647 3648 tmp.dst.pixmap = dst; 3649 tmp.dst.width = dst->drawable.width; 3650 tmp.dst.height = dst->drawable.height; 3651 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3652 tmp.dst.bo = bo; 3653 tmp.dst.x = tmp.dst.y = 0; 3654 3655 tmp.src.bo = sna_render_get_solid(sna, 0); 3656 tmp.mask.bo = NULL; 3657 3658 tmp.floats_per_vertex = 2; 3659 tmp.floats_per_rect = 6; 3660 tmp.need_magic_ca_pass = false; 3661 3662 tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3663 3664 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3665 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3666 kgem_submit(&sna->kgem); 3667 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3668 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3669 return false; 3670 } 3671 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3672 } 3673 3674 gen8_align_vertex(sna, &tmp); 3675 gen8_emit_fill_state(sna, &tmp); 3676 3677 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3678 3679 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3680 sna->render.vertex_used += 6; 3681 assert(sna->render.vertex_used <= sna->render.vertex_size); 3682 3683 v[0] = dst->drawable.width; 3684 v[5] = v[1] = dst->drawable.height; 3685 v[8] = v[4] = 0; 3686 v[9] = 0; 3687 3688 v[7] = v[2] = v[3] = 1; 3689 v[6] = v[10] = v[11] = 0; 3690 3691 gen8_vertex_flush(sna); 3692 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3693 3694 return true; 3695} 3696 3697#if !NO_VIDEO 3698static uint32_t gen8_bind_video_source(struct sna *sna, 3699 struct kgem_bo *bo, 3700 uint32_t delta, 3701 int width, 3702 int height, 3703 int pitch, 3704 uint32_t format) 3705{ 3706 uint32_t *ss; 3707 int offset; 3708 3709 offset = sna->kgem.surface -= SURFACE_DW; 3710 ss = sna->kgem.batch + offset; 3711 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3712 gen8_tiling_bits(bo->tiling) | 3713 format << SURFACE_FORMAT_SHIFT | 3714 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3715 ss[1] = 0; 3716 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3717 (height - 1) << SURFACE_HEIGHT_SHIFT); 3718 ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3719 ss[4] = 0; 3720 ss[5] = 0; 3721 ss[6] = 0; 3722 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3723 *(uint64_t *)(ss+8) = 3724 kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3725 I915_GEM_DOMAIN_SAMPLER << 16, 3726 delta); 3727 ss[10] = 0; 3728 ss[11] = 0; 3729 ss[12] = 0; 3730 ss[13] = 0; 3731 ss[14] = 0; 3732 ss[15] = 0; 3733 3734 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3735 offset, bo->handle, ss[1], 3736 format, width, height, bo->pitch, bo->tiling)); 3737 3738 return offset * sizeof(uint32_t); 3739} 3740 3741static void gen8_emit_video_state(struct sna *sna, 3742 const struct sna_composite_op *op) 3743{ 3744 struct sna_video_frame *frame = op->priv; 3745 uint32_t src_surf_format[6]; 3746 uint32_t src_surf_base[6]; 3747 int src_width[6]; 3748 int src_height[6]; 3749 int src_pitch[6]; 3750 uint32_t *binding_table; 3751 uint16_t offset; 3752 int n_src, n; 3753 3754 /* XXX VeBox, bicubic */ 3755 3756 gen8_get_batch(sna, op); 3757 3758 src_surf_base[0] = 0; 3759 src_surf_base[1] = 0; 3760 src_surf_base[2] = frame->VBufOffset; 3761 src_surf_base[3] = frame->VBufOffset; 3762 src_surf_base[4] = frame->UBufOffset; 3763 src_surf_base[5] = frame->UBufOffset; 3764 3765 if (is_planar_fourcc(frame->id)) { 3766 for (n = 0; n < 2; n++) { 3767 src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3768 src_width[n] = frame->width; 3769 src_height[n] = frame->height; 3770 src_pitch[n] = frame->pitch[1]; 3771 } 3772 for (; n < 6; n++) { 3773 if (is_nv12_fourcc(frame->id)) 3774 src_surf_format[n] = SURFACEFORMAT_R8G8_UNORM; 3775 else 3776 src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3777 src_width[n] = frame->width / 2; 3778 src_height[n] = frame->height / 2; 3779 src_pitch[n] = frame->pitch[0]; 3780 } 3781 n_src = 6; 3782 } else { 3783 if (frame->id == FOURCC_RGB888) 3784 src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; 3785 else if (frame->id == FOURCC_UYVY) 3786 src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY; 3787 else 3788 src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL; 3789 3790 src_width[0] = frame->width; 3791 src_height[0] = frame->height; 3792 src_pitch[0] = frame->pitch[0]; 3793 n_src = 1; 3794 } 3795 3796 binding_table = gen8_composite_get_binding_table(sna, &offset); 3797 3798 binding_table[0] = 3799 gen8_bind_bo(sna, 3800 op->dst.bo, op->dst.width, op->dst.height, 3801 gen8_get_dest_format(op->dst.format), 3802 true); 3803 for (n = 0; n < n_src; n++) { 3804 binding_table[1+n] = 3805 gen8_bind_video_source(sna, 3806 frame->bo, 3807 src_surf_base[n], 3808 src_width[n], 3809 src_height[n], 3810 src_pitch[n], 3811 src_surf_format[n]); 3812 } 3813 3814 gen8_emit_state(sna, op, offset); 3815} 3816 3817static unsigned select_video_kernel(const struct sna_video *video, 3818 const struct sna_video_frame *frame) 3819{ 3820 switch (frame->id) { 3821 case FOURCC_YV12: 3822 case FOURCC_I420: 3823 case FOURCC_XVMC: 3824 return video->colorspace ? 3825 GEN8_WM_KERNEL_VIDEO_PLANAR_BT709 : 3826 GEN8_WM_KERNEL_VIDEO_PLANAR_BT601; 3827 3828 case FOURCC_NV12: 3829 return video->colorspace ? 3830 GEN8_WM_KERNEL_VIDEO_NV12_BT709 : 3831 GEN8_WM_KERNEL_VIDEO_NV12_BT601; 3832 3833 case FOURCC_RGB888: 3834 case FOURCC_RGB565: 3835 return GEN8_WM_KERNEL_VIDEO_RGB; 3836 3837 default: 3838 return video->colorspace ? 3839 GEN8_WM_KERNEL_VIDEO_PACKED_BT709 : 3840 GEN8_WM_KERNEL_VIDEO_PACKED_BT601; 3841 } 3842} 3843 3844static bool 3845gen8_render_video(struct sna *sna, 3846 struct sna_video *video, 3847 struct sna_video_frame *frame, 3848 RegionPtr dstRegion, 3849 PixmapPtr pixmap) 3850{ 3851 struct sna_composite_op tmp; 3852 struct sna_pixmap *priv = sna_pixmap(pixmap); 3853 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3854 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3855 int src_width = frame->src.x2 - frame->src.x1; 3856 int src_height = frame->src.y2 - frame->src.y1; 3857 float src_offset_x, src_offset_y; 3858 float src_scale_x, src_scale_y; 3859 unsigned filter; 3860 const BoxRec *box; 3861 int nbox; 3862 3863 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3864 __FUNCTION__, 3865 src_width, src_height, dst_width, dst_height, 3866 region_num_rects(dstRegion), 3867 REGION_EXTENTS(NULL, dstRegion)->x1, 3868 REGION_EXTENTS(NULL, dstRegion)->y1, 3869 REGION_EXTENTS(NULL, dstRegion)->x2, 3870 REGION_EXTENTS(NULL, dstRegion)->y2)); 3871 3872 assert(priv->gpu_bo); 3873 assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3874 assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3875 3876 memset(&tmp, 0, sizeof(tmp)); 3877 3878 tmp.dst.pixmap = pixmap; 3879 tmp.dst.width = pixmap->drawable.width; 3880 tmp.dst.height = pixmap->drawable.height; 3881 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3882 tmp.dst.bo = priv->gpu_bo; 3883 3884 tmp.src.bo = frame->bo; 3885 tmp.mask.bo = NULL; 3886 3887 tmp.floats_per_vertex = 3; 3888 tmp.floats_per_rect = 9; 3889 3890 DBG(("%s: scaling?=%d, planar?=%d [%x]\n", 3891 __FUNCTION__, 3892 src_width != dst_width || src_height != dst_height, 3893 is_planar_fourcc(frame->id), frame->id)); 3894 3895 if (src_width == dst_width && src_height == dst_height) 3896 filter = SAMPLER_FILTER_NEAREST; 3897 else 3898 filter = SAMPLER_FILTER_BILINEAR; 3899 3900 tmp.u.gen8.flags = 3901 GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3902 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3903 NO_BLEND, 3904 select_video_kernel(video, frame), 3905 2); 3906 tmp.priv = frame; 3907 3908 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3909 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 3910 kgem_submit(&sna->kgem); 3911 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 3912 return false; 3913 3914 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3915 } 3916 3917 gen8_align_vertex(sna, &tmp); 3918 gen8_emit_video_state(sna, &tmp); 3919 3920 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 3921 __FUNCTION__, 3922 frame->src.x1, frame->src.y1, 3923 src_width, src_height, 3924 dst_width, dst_height, 3925 frame->width, frame->height)); 3926 3927 src_scale_x = (float)src_width / dst_width / frame->width; 3928 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 3929 3930 src_scale_y = (float)src_height / dst_height / frame->height; 3931 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 3932 3933 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 3934 __FUNCTION__, 3935 src_scale_x, src_scale_y, 3936 src_offset_x, src_offset_y)); 3937 3938 box = region_rects(dstRegion); 3939 nbox = region_num_rects(dstRegion); 3940 while (nbox--) { 3941 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 3942 __FUNCTION__, 3943 box->x1, box->y1, 3944 box->x2, box->y2, 3945 box->x1 * src_scale_x + src_offset_x, 3946 box->y1 * src_scale_y + src_offset_y, 3947 box->x2 * src_scale_x + src_offset_x, 3948 box->y2 * src_scale_y + src_offset_y)); 3949 3950 gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); 3951 3952 OUT_VERTEX(box->x2, box->y2); 3953 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 3954 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3955 3956 OUT_VERTEX(box->x1, box->y2); 3957 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3958 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3959 3960 OUT_VERTEX(box->x1, box->y1); 3961 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3962 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 3963 3964 box++; 3965 } 3966 gen8_vertex_flush(sna); 3967 3968 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 3969 sna_damage_add(&priv->gpu_damage, dstRegion); 3970 3971 return true; 3972} 3973#endif 3974 3975static void gen8_render_flush(struct sna *sna) 3976{ 3977 gen8_vertex_close(sna); 3978 3979 assert(sna->render.vb_id == 0); 3980 assert(sna->render.vertex_offset == 0); 3981} 3982 3983static void gen8_render_reset(struct sna *sna) 3984{ 3985 sna->render_state.gen8.emit_flush = false; 3986 sna->render_state.gen8.needs_invariant = true; 3987 sna->render_state.gen8.ve_id = 3 << 2; 3988 sna->render_state.gen8.last_primitive = -1; 3989 3990 sna->render_state.gen8.num_sf_outputs = 0; 3991 sna->render_state.gen8.samplers = -1; 3992 sna->render_state.gen8.blend = -1; 3993 sna->render_state.gen8.kernel = -1; 3994 sna->render_state.gen8.drawrect_offset = -1; 3995 sna->render_state.gen8.drawrect_limit = -1; 3996 sna->render_state.gen8.surface_table = 0; 3997 3998 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3999 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 4000 discard_vbo(sna); 4001 } 4002 4003 sna->render.vertex_offset = 0; 4004 sna->render.nvertex_reloc = 0; 4005 sna->render.vb_id = 0; 4006} 4007 4008static void gen8_render_fini(struct sna *sna) 4009{ 4010 kgem_bo_destroy(&sna->kgem, sna->render_state.gen8.general_bo); 4011} 4012 4013static bool gen8_render_setup(struct sna *sna) 4014{ 4015 struct gen8_render_state *state = &sna->render_state.gen8; 4016 struct sna_static_stream general; 4017 struct gen8_sampler_state *ss; 4018 int i, j, k, l, m; 4019 uint32_t devid; 4020 4021 devid = intel_get_device_id(sna->dev); 4022 if (devid & 0xf) 4023 state->gt = ((devid >> 4) & 0xf) + 1; 4024 DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 4025 4026 if (is_bdw(sna)) 4027 state->info = &bdw_gt_info; 4028 else if (is_chv(sna)) 4029 state->info = &chv_gt_info; 4030 else 4031 return false; 4032 4033 sna_static_stream_init(&general); 4034 4035 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 4036 * dumps, you know it points to zero. 4037 */ 4038 null_create(&general); 4039 4040 for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 4041 if (wm_kernels[m].size) { 4042 state->wm_kernel[m][1] = 4043 sna_static_stream_add(&general, 4044 wm_kernels[m].data, 4045 wm_kernels[m].size, 4046 64); 4047 } else { 4048 if (USE_8_PIXEL_DISPATCH) { 4049 state->wm_kernel[m][0] = 4050 sna_static_stream_compile_wm(sna, &general, 4051 wm_kernels[m].data, 8); 4052 } 4053 4054 if (USE_16_PIXEL_DISPATCH) { 4055 state->wm_kernel[m][1] = 4056 sna_static_stream_compile_wm(sna, &general, 4057 wm_kernels[m].data, 16); 4058 } 4059 4060 if (USE_32_PIXEL_DISPATCH) { 4061 state->wm_kernel[m][2] = 4062 sna_static_stream_compile_wm(sna, &general, 4063 wm_kernels[m].data, 32); 4064 } 4065 } 4066 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 4067 } 4068 4069 COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 4070 ss = sna_static_stream_map(&general, 4071 2 * sizeof(*ss) * 4072 (2 + 4073 FILTER_COUNT * EXTEND_COUNT * 4074 FILTER_COUNT * EXTEND_COUNT), 4075 32); 4076 state->wm_state = sna_static_stream_offsetof(&general, ss); 4077 sampler_copy_init(ss); ss += 2; 4078 sampler_fill_init(ss); ss += 2; 4079 for (i = 0; i < FILTER_COUNT; i++) { 4080 for (j = 0; j < EXTEND_COUNT; j++) { 4081 for (k = 0; k < FILTER_COUNT; k++) { 4082 for (l = 0; l < EXTEND_COUNT; l++) { 4083 sampler_state_init(ss++, i, j); 4084 sampler_state_init(ss++, k, l); 4085 } 4086 } 4087 } 4088 } 4089 4090 state->cc_blend = gen8_create_blend_state(&general); 4091 4092 state->general_bo = sna_static_stream_fini(sna, &general); 4093 return state->general_bo != NULL; 4094} 4095 4096const char *gen8_render_init(struct sna *sna, const char *backend) 4097{ 4098 if (!gen8_render_setup(sna)) 4099 return backend; 4100 4101 sna->kgem.context_switch = gen6_render_context_switch; 4102 sna->kgem.retire = gen6_render_retire; 4103 sna->kgem.expire = gen4_render_expire; 4104 4105#if !NO_COMPOSITE 4106 sna->render.composite = gen8_render_composite; 4107 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 4108#endif 4109#if !NO_COMPOSITE_SPANS 4110 sna->render.check_composite_spans = gen8_check_composite_spans; 4111 sna->render.composite_spans = gen8_render_composite_spans; 4112 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 4113#endif 4114#if !NO_VIDEO 4115 sna->render.video = gen8_render_video; 4116#endif 4117 4118#if !NO_COPY_BOXES 4119 sna->render.copy_boxes = gen8_render_copy_boxes; 4120#endif 4121#if !NO_COPY 4122 sna->render.copy = gen8_render_copy; 4123#endif 4124 4125#if !NO_FILL_BOXES 4126 sna->render.fill_boxes = gen8_render_fill_boxes; 4127#endif 4128#if !NO_FILL 4129 sna->render.fill = gen8_render_fill; 4130#endif 4131#if !NO_FILL_ONE 4132 sna->render.fill_one = gen8_render_fill_one; 4133#endif 4134#if !NO_FILL_CLEAR 4135 sna->render.clear = gen8_render_clear; 4136#endif 4137 4138 sna->render.flush = gen8_render_flush; 4139 sna->render.reset = gen8_render_reset; 4140 sna->render.fini = gen8_render_fini; 4141 4142 sna->render.max_3d_size = GEN8_MAX_SIZE; 4143 sna->render.max_3d_pitch = 1 << 18; 4144 return sna->render_state.gen8.info->name; 4145} 4146