gen7_render.c revision 42542f5f
1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen7_render.h" 45#include "gen4_common.h" 46#include "gen4_source.h" 47#include "gen4_vertex.h" 48#include "gen6_common.h" 49 50#define ALWAYS_INVALIDATE 0 51#define ALWAYS_FLUSH 0 52#define ALWAYS_STALL 0 53 54#define NO_COMPOSITE 0 55#define NO_COMPOSITE_SPANS 0 56#define NO_COPY 0 57#define NO_COPY_BOXES 0 58#define NO_FILL 0 59#define NO_FILL_BOXES 0 60#define NO_FILL_ONE 0 61#define NO_FILL_CLEAR 0 62 63#define NO_RING_SWITCH 0 64 65#define USE_8_PIXEL_DISPATCH 1 66#define USE_16_PIXEL_DISPATCH 1 67#define USE_32_PIXEL_DISPATCH 0 68 69#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 70#error "Must select at least 8, 16 or 32 pixel dispatch" 71#endif 72 73#define GEN7_MAX_SIZE 16384 74 75/* XXX Todo 76 * 77 * STR (software tiled rendering) mode. No, really. 78 * 64x32 pixel blocks align with the rendering cache. Worth considering. 79 */ 80 81#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 82 83struct gt_info { 84 const char *name; 85 uint32_t max_vs_threads; 86 uint32_t max_gs_threads; 87 uint32_t max_wm_threads; 88 struct { 89 int size; 90 int max_vs_entries; 91 int max_gs_entries; 92 int push_ps_size; /* in 1KBs */ 93 } urb; 94 int gt; 95 uint32_t mocs; 96}; 97 98static const struct gt_info ivb_gt_info = { 99 .name = "Ivybridge (gen7)", 100 .max_vs_threads = 16, 101 .max_gs_threads = 16, 102 .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, 103 .urb = { 128, 64, 64, 8 }, 104 .gt = 0, 105}; 106 107static const struct gt_info ivb_gt1_info = { 108 .name = "Ivybridge (gen7, gt1)", 109 .max_vs_threads = 36, 110 .max_gs_threads = 36, 111 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 112 .urb = { 128, 512, 192, 8 }, 113 .gt = 1, 114 .mocs = 3, 115}; 116 117static const struct gt_info ivb_gt2_info = { 118 .name = "Ivybridge (gen7, gt2)", 119 .max_vs_threads = 128, 120 .max_gs_threads = 128, 121 .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, 122 .urb = { 256, 704, 320, 8 }, 123 .gt = 2, 124 .mocs = 3, 125}; 126 127static const struct gt_info byt_gt_info = { 128 .name = "Baytrail (gen7)", 129 .max_vs_threads = 36, 130 .max_gs_threads = 36, 131 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 132 .urb = { 128, 512, 192, 8 }, 133 .gt = 1, 134}; 135 136static const struct gt_info hsw_gt_info = { 137 .name = "Haswell (gen7.5)", 138 .max_vs_threads = 8, 139 .max_gs_threads = 8, 140 .max_wm_threads = 141 (8 - 1) << HSW_PS_MAX_THREADS_SHIFT | 142 1 << HSW_PS_SAMPLE_MASK_SHIFT, 143 .urb = { 128, 64, 64, 8 }, 144 .gt = 0, 145}; 146 147static const struct gt_info hsw_gt1_info = { 148 .name = "Haswell (gen7.5, gt1)", 149 .max_vs_threads = 70, 150 .max_gs_threads = 70, 151 .max_wm_threads = 152 (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | 153 1 << HSW_PS_SAMPLE_MASK_SHIFT, 154 .urb = { 128, 640, 256, 8 }, 155 .gt = 1, 156 .mocs = 5, 157}; 158 159static const struct gt_info hsw_gt2_info = { 160 .name = "Haswell (gen7.5, gt2)", 161 .max_vs_threads = 140, 162 .max_gs_threads = 140, 163 .max_wm_threads = 164 (140 - 1) << HSW_PS_MAX_THREADS_SHIFT | 165 1 << HSW_PS_SAMPLE_MASK_SHIFT, 166 .urb = { 256, 1664, 640, 8 }, 167 .gt = 2, 168 .mocs = 5, 169}; 170 171static const struct gt_info hsw_gt3_info = { 172 .name = "Haswell (gen7.5, gt3)", 173 .max_vs_threads = 280, 174 .max_gs_threads = 280, 175 .max_wm_threads = 176 (280 - 1) << HSW_PS_MAX_THREADS_SHIFT | 177 1 << HSW_PS_SAMPLE_MASK_SHIFT, 178 .urb = { 512, 3328, 1280, 16 }, 179 .gt = 3, 180 .mocs = 5, 181}; 182 183inline static bool is_ivb(struct sna *sna) 184{ 185 return sna->kgem.gen == 070; 186} 187 188inline static bool is_byt(struct sna *sna) 189{ 190 return sna->kgem.gen == 071; 191} 192 193inline static bool is_hsw(struct sna *sna) 194{ 195 return sna->kgem.gen == 075; 196} 197 198static const uint32_t ps_kernel_packed[][4] = { 199#include "exa_wm_src_affine.g7b" 200#include "exa_wm_src_sample_argb.g7b" 201#include "exa_wm_yuv_rgb.g7b" 202#include "exa_wm_write.g7b" 203}; 204 205static const uint32_t ps_kernel_planar[][4] = { 206#include "exa_wm_src_affine.g7b" 207#include "exa_wm_src_sample_planar.g7b" 208#include "exa_wm_yuv_rgb.g7b" 209#include "exa_wm_write.g7b" 210}; 211 212#define KERNEL(kernel_enum, kernel, num_surfaces) \ 213 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 214#define NOKERNEL(kernel_enum, func, num_surfaces) \ 215 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 216static const struct wm_kernel_info { 217 const char *name; 218 const void *data; 219 unsigned int size; 220 int num_surfaces; 221} wm_kernels[] = { 222 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 223 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 224 225 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 226 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 227 228 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 229 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 230 231 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 232 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 233 234 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 235 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 236 237 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 238 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 239}; 240#undef KERNEL 241 242static const struct blendinfo { 243 bool src_alpha; 244 uint32_t src_blend; 245 uint32_t dst_blend; 246} gen7_blend_op[] = { 247 /* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO}, 248 /* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO}, 249 /* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE}, 250 /* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 251 /* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE}, 252 /* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 253 /* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA}, 254 /* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 255 /* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 256 /* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 257 /* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA}, 258 /* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 259 /* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE}, 260}; 261 262/** 263 * Highest-valued BLENDFACTOR used in gen7_blend_op. 264 * 265 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR, 266 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 267 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 268 */ 269#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1) 270 271#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64) 272 273#define BLEND_OFFSET(s, d) \ 274 ((d != GEN7_BLENDFACTOR_ZERO) << 15 | \ 275 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE)) 276 277#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO) 278#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO) 279 280#define SAMPLER_OFFSET(sf, se, mf, me) \ 281 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state)) 282 283#define VERTEX_2s2s 0 284 285#define COPY_SAMPLER 0 286#define COPY_VERTEX VERTEX_2s2s 287#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX) 288 289#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state)) 290#define FILL_VERTEX VERTEX_2s2s 291#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 292#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 293 294#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0) 295#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0) 296#define GEN7_READS_DST(f) (((f) >> 15) & 1) 297#define GEN7_KERNEL(f) (((f) >> 16) & 0xf) 298#define GEN7_VERTEX(f) (((f) >> 0) & 0xf) 299#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 300 301#define OUT_BATCH(v) batch_emit(sna, v) 302#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 303#define OUT_VERTEX_F(v) vertex_emit(sna, v) 304 305static inline bool too_large(int width, int height) 306{ 307 return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; 308} 309 310static uint32_t gen7_get_blend(int op, 311 bool has_component_alpha, 312 uint32_t dst_format) 313{ 314 uint32_t src, dst; 315 316 src = gen7_blend_op[op].src_blend; 317 dst = gen7_blend_op[op].dst_blend; 318 319 /* If there's no dst alpha channel, adjust the blend op so that 320 * we'll treat it always as 1. 321 */ 322 if (PICT_FORMAT_A(dst_format) == 0) { 323 if (src == GEN7_BLENDFACTOR_DST_ALPHA) 324 src = GEN7_BLENDFACTOR_ONE; 325 else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA) 326 src = GEN7_BLENDFACTOR_ZERO; 327 } 328 329 /* If the source alpha is being used, then we should only be in a 330 * case where the source blend factor is 0, and the source blend 331 * value is the mask channels multiplied by the source picture's alpha. 332 */ 333 if (has_component_alpha && gen7_blend_op[op].src_alpha) { 334 if (dst == GEN7_BLENDFACTOR_SRC_ALPHA) 335 dst = GEN7_BLENDFACTOR_SRC_COLOR; 336 else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA) 337 dst = GEN7_BLENDFACTOR_INV_SRC_COLOR; 338 } 339 340 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 341 op, dst_format, PICT_FORMAT_A(dst_format), 342 src, dst, (int)BLEND_OFFSET(src, dst))); 343 return BLEND_OFFSET(src, dst); 344} 345 346static uint32_t gen7_get_card_format(PictFormat format) 347{ 348 switch (format) { 349 default: 350 return -1; 351 case PICT_a8r8g8b8: 352 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 353 case PICT_x8r8g8b8: 354 return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; 355 case PICT_a8b8g8r8: 356 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 357 case PICT_x8b8g8r8: 358 return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; 359#ifdef PICT_a2r10g10b10 360 case PICT_a2r10g10b10: 361 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 362 case PICT_x2r10g10b10: 363 return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; 364#endif 365 case PICT_r8g8b8: 366 return GEN7_SURFACEFORMAT_R8G8B8_UNORM; 367 case PICT_r5g6b5: 368 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 369 case PICT_a1r5g5b5: 370 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 371 case PICT_a8: 372 return GEN7_SURFACEFORMAT_A8_UNORM; 373 case PICT_a4r4g4b4: 374 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 375 } 376} 377 378static uint32_t gen7_get_dest_format(PictFormat format) 379{ 380 switch (format) { 381 default: 382 return -1; 383 case PICT_a8r8g8b8: 384 case PICT_x8r8g8b8: 385 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 386 case PICT_a8b8g8r8: 387 case PICT_x8b8g8r8: 388 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 389#ifdef PICT_a2r10g10b10 390 case PICT_a2r10g10b10: 391 case PICT_x2r10g10b10: 392 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 393#endif 394 case PICT_r5g6b5: 395 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 396 case PICT_x1r5g5b5: 397 case PICT_a1r5g5b5: 398 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 399 case PICT_a8: 400 return GEN7_SURFACEFORMAT_A8_UNORM; 401 case PICT_a4r4g4b4: 402 case PICT_x4r4g4b4: 403 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 404 } 405} 406 407static bool gen7_check_dst_format(PictFormat format) 408{ 409 if (gen7_get_dest_format(format) != -1) 410 return true; 411 412 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 413 return false; 414} 415 416static bool gen7_check_format(uint32_t format) 417{ 418 if (gen7_get_card_format(format) != -1) 419 return true; 420 421 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 422 return false; 423} 424 425static uint32_t gen7_filter(uint32_t filter) 426{ 427 switch (filter) { 428 default: 429 assert(0); 430 case PictFilterNearest: 431 return SAMPLER_FILTER_NEAREST; 432 case PictFilterBilinear: 433 return SAMPLER_FILTER_BILINEAR; 434 } 435} 436 437static uint32_t gen7_check_filter(PicturePtr picture) 438{ 439 switch (picture->filter) { 440 case PictFilterNearest: 441 case PictFilterBilinear: 442 return true; 443 default: 444 return false; 445 } 446} 447 448static uint32_t gen7_repeat(uint32_t repeat) 449{ 450 switch (repeat) { 451 default: 452 assert(0); 453 case RepeatNone: 454 return SAMPLER_EXTEND_NONE; 455 case RepeatNormal: 456 return SAMPLER_EXTEND_REPEAT; 457 case RepeatPad: 458 return SAMPLER_EXTEND_PAD; 459 case RepeatReflect: 460 return SAMPLER_EXTEND_REFLECT; 461 } 462} 463 464static bool gen7_check_repeat(PicturePtr picture) 465{ 466 if (!picture->repeat) 467 return true; 468 469 switch (picture->repeatType) { 470 case RepeatNone: 471 case RepeatNormal: 472 case RepeatPad: 473 case RepeatReflect: 474 return true; 475 default: 476 return false; 477 } 478} 479 480static int 481gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 482{ 483 int base; 484 485 if (has_mask) { 486 if (is_ca) { 487 if (gen7_blend_op[op].src_alpha) 488 base = GEN7_WM_KERNEL_MASKSA; 489 else 490 base = GEN7_WM_KERNEL_MASKCA; 491 } else 492 base = GEN7_WM_KERNEL_MASK; 493 } else 494 base = GEN7_WM_KERNEL_NOMASK; 495 496 return base + !is_affine; 497} 498 499static void 500gen7_emit_urb(struct sna *sna) 501{ 502 OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 503 OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); 504 505 /* num of VS entries must be divisible by 8 if size < 9 */ 506 OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); 507 OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | 508 (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | 509 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 510 511 OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); 512 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 513 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 514 515 OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); 516 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 517 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 518 519 OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); 520 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 521 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 522} 523 524static void 525gen7_emit_state_base_address(struct sna *sna) 526{ 527 uint32_t mocs = sna->render_state.gen7.info->mocs << 8; 528 529 OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); 530 OUT_BATCH(0); /* general */ 531 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 532 sna->kgem.nbatch, 533 NULL, 534 I915_GEM_DOMAIN_INSTRUCTION << 16, 535 BASE_ADDRESS_MODIFY)); 536 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ 537 sna->kgem.nbatch, 538 sna->render_state.gen7.general_bo, 539 I915_GEM_DOMAIN_INSTRUCTION << 16, 540 mocs | BASE_ADDRESS_MODIFY)); 541 OUT_BATCH(0); /* indirect */ 542 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 543 sna->kgem.nbatch, 544 sna->render_state.gen7.general_bo, 545 I915_GEM_DOMAIN_INSTRUCTION << 16, 546 mocs | BASE_ADDRESS_MODIFY)); 547 548 /* upper bounds, disable */ 549 OUT_BATCH(0); 550 OUT_BATCH(BASE_ADDRESS_MODIFY); 551 OUT_BATCH(0); 552 OUT_BATCH(BASE_ADDRESS_MODIFY); 553} 554 555static void 556gen7_disable_vs(struct sna *sna) 557{ 558 /* For future reference: 559 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs 560 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS, 561 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, 562 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS. 563 * 564 * Here we saved by the full-flush incurred when emitting 565 * the batchbuffer. 566 */ 567 OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2)); 568 OUT_BATCH(0); /* no VS kernel */ 569 OUT_BATCH(0); 570 OUT_BATCH(0); 571 OUT_BATCH(0); 572 OUT_BATCH(0); /* pass-through */ 573 574#if 0 575 OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2)); 576 OUT_BATCH(0); 577 OUT_BATCH(0); 578 OUT_BATCH(0); 579 OUT_BATCH(0); 580 OUT_BATCH(0); 581 OUT_BATCH(0); 582 583 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 584 OUT_BATCH(0); 585 586 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 587 OUT_BATCH(0); 588#endif 589} 590 591static void 592gen7_disable_hs(struct sna *sna) 593{ 594 OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); 595 OUT_BATCH(0); /* no HS kernel */ 596 OUT_BATCH(0); 597 OUT_BATCH(0); 598 OUT_BATCH(0); 599 OUT_BATCH(0); 600 OUT_BATCH(0); /* pass-through */ 601 602#if 0 603 OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); 604 OUT_BATCH(0); 605 OUT_BATCH(0); 606 OUT_BATCH(0); 607 OUT_BATCH(0); 608 OUT_BATCH(0); 609 OUT_BATCH(0); 610 611 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 612 OUT_BATCH(0); 613 614 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 615 OUT_BATCH(0); 616#endif 617} 618 619static void 620gen7_disable_te(struct sna *sna) 621{ 622 OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); 623 OUT_BATCH(0); 624 OUT_BATCH(0); 625 OUT_BATCH(0); 626} 627 628static void 629gen7_disable_ds(struct sna *sna) 630{ 631 OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); 632 OUT_BATCH(0); 633 OUT_BATCH(0); 634 OUT_BATCH(0); 635 OUT_BATCH(0); 636 OUT_BATCH(0); 637 638#if 0 639 OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); 640 OUT_BATCH(0); 641 OUT_BATCH(0); 642 OUT_BATCH(0); 643 OUT_BATCH(0); 644 OUT_BATCH(0); 645 OUT_BATCH(0); 646 647 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 648 OUT_BATCH(0); 649 650 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 651 OUT_BATCH(0); 652#endif 653} 654 655static void 656gen7_disable_gs(struct sna *sna) 657{ 658 OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2)); 659 OUT_BATCH(0); /* no GS kernel */ 660 OUT_BATCH(0); 661 OUT_BATCH(0); 662 OUT_BATCH(0); 663 OUT_BATCH(0); 664 OUT_BATCH(0); /* pass-through */ 665 666#if 0 667 OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2)); 668 OUT_BATCH(0); 669 OUT_BATCH(0); 670 OUT_BATCH(0); 671 OUT_BATCH(0); 672 OUT_BATCH(0); 673 OUT_BATCH(0); 674 675 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 676 OUT_BATCH(0); 677 678 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 679 OUT_BATCH(0); 680#endif 681} 682 683static void 684gen7_disable_streamout(struct sna *sna) 685{ 686 OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); 687 OUT_BATCH(0); 688 OUT_BATCH(0); 689} 690 691static void 692gen7_emit_sf_invariant(struct sna *sna) 693{ 694 OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2)); 695 OUT_BATCH(0); 696 OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE); 697 OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); 698 OUT_BATCH(0); 699 OUT_BATCH(0); 700 OUT_BATCH(0); 701} 702 703static void 704gen7_emit_cc_invariant(struct sna *sna) 705{ 706#if 0 /* unused, no change */ 707 OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 708 OUT_BATCH(0); 709 710 OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); 711 OUT_BATCH(0); 712#endif 713 714 /* XXX clear to be safe */ 715 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 716 OUT_BATCH(0); 717} 718 719static void 720gen7_disable_clip(struct sna *sna) 721{ 722 OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2)); 723 OUT_BATCH(0); 724 OUT_BATCH(0); /* pass-through */ 725 OUT_BATCH(0); 726 727 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); 728 OUT_BATCH(0); 729} 730 731static void 732gen7_emit_wm_invariant(struct sna *sna) 733{ 734 OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2)); 735 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 736 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 737 OUT_BATCH(0); 738 739#if 0 740 /* XXX length bias of 7 in old spec? */ 741 OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2)); 742 OUT_BATCH(0); 743 OUT_BATCH(0); 744 OUT_BATCH(0); 745 OUT_BATCH(0); 746 OUT_BATCH(0); 747 OUT_BATCH(0); 748#endif 749} 750 751static void 752gen7_emit_null_depth_buffer(struct sna *sna) 753{ 754 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); 755 OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 756 GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 757 OUT_BATCH(0); /* disable depth, stencil and hiz */ 758 OUT_BATCH(0); 759 OUT_BATCH(0); 760 OUT_BATCH(0); 761 OUT_BATCH(0); 762 763#if 0 764 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); 765 OUT_BATCH(0); 766 OUT_BATCH(0); 767#endif 768} 769 770static void 771gen7_emit_invariant(struct sna *sna) 772{ 773 OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); 774 775 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2)); 776 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 777 GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 778 OUT_BATCH(0); 779 OUT_BATCH(0); 780 781 OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2)); 782 OUT_BATCH(1); 783 784 gen7_emit_urb(sna); 785 786 gen7_emit_state_base_address(sna); 787 788 gen7_disable_vs(sna); 789 gen7_disable_hs(sna); 790 gen7_disable_te(sna); 791 gen7_disable_ds(sna); 792 gen7_disable_gs(sna); 793 gen7_disable_clip(sna); 794 gen7_emit_sf_invariant(sna); 795 gen7_emit_wm_invariant(sna); 796 gen7_emit_cc_invariant(sna); 797 gen7_disable_streamout(sna); 798 gen7_emit_null_depth_buffer(sna); 799 800 sna->render_state.gen7.needs_invariant = false; 801} 802 803static void 804gen7_emit_cc(struct sna *sna, uint32_t blend_offset) 805{ 806 struct gen7_render_state *render = &sna->render_state.gen7; 807 808 if (render->blend == blend_offset) 809 return; 810 811 DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); 812 813 /* XXX can have upto 8 blend states preload, selectable via 814 * Render Target Index. What other side-effects of Render Target Index? 815 */ 816 817 assert (is_aligned(render->cc_blend + blend_offset, 64)); 818 OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 819 OUT_BATCH((render->cc_blend + blend_offset) | 1); 820 821 render->blend = blend_offset; 822} 823 824static void 825gen7_emit_sampler(struct sna *sna, uint32_t state) 826{ 827 if (sna->render_state.gen7.samplers == state) 828 return; 829 830 sna->render_state.gen7.samplers = state; 831 832 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 833 834 assert (is_aligned(sna->render_state.gen7.wm_state + state, 32)); 835 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 836 OUT_BATCH(sna->render_state.gen7.wm_state + state); 837} 838 839static void 840gen7_emit_sf(struct sna *sna, bool has_mask) 841{ 842 int num_sf_outputs = has_mask ? 2 : 1; 843 844 if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs) 845 return; 846 847 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 848 __FUNCTION__, num_sf_outputs, 1, 0)); 849 850 sna->render_state.gen7.num_sf_outputs = num_sf_outputs; 851 852 OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); 853 OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT | 854 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | 855 1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); 856 OUT_BATCH(0); 857 OUT_BATCH(0); /* dw4 */ 858 OUT_BATCH(0); 859 OUT_BATCH(0); 860 OUT_BATCH(0); 861 OUT_BATCH(0); /* dw8 */ 862 OUT_BATCH(0); 863 OUT_BATCH(0); 864 OUT_BATCH(0); 865 OUT_BATCH(0); /* dw12 */ 866 OUT_BATCH(0); 867 OUT_BATCH(0); 868} 869 870static void 871gen7_emit_wm(struct sna *sna, int kernel) 872{ 873 const uint32_t *kernels; 874 875 if (sna->render_state.gen7.kernel == kernel) 876 return; 877 878 sna->render_state.gen7.kernel = kernel; 879 kernels = sna->render_state.gen7.wm_kernel[kernel]; 880 881 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 882 __FUNCTION__, 883 wm_kernels[kernel].name, 884 wm_kernels[kernel].num_surfaces, 885 kernels[0], kernels[1], kernels[2])); 886 887 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 888 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 889 OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | 890 wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 891 OUT_BATCH(0); /* scratch address */ 892 OUT_BATCH(sna->render_state.gen7.info->max_wm_threads | 893 (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) | 894 (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) | 895 (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) | 896 GEN7_PS_ATTRIBUTE_ENABLE); 897 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | 898 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | 899 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); 900 OUT_BATCH(kernels[2]); 901 OUT_BATCH(kernels[1]); 902} 903 904static bool 905gen7_emit_binding_table(struct sna *sna, uint16_t offset) 906{ 907 if (sna->render_state.gen7.surface_table == offset) 908 return false; 909 910 /* Binding table pointers */ 911 assert(is_aligned(4*offset, 32)); 912 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 913 OUT_BATCH(offset*4); 914 915 sna->render_state.gen7.surface_table = offset; 916 return true; 917} 918 919static bool 920gen7_emit_drawing_rectangle(struct sna *sna, 921 const struct sna_composite_op *op) 922{ 923 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 924 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 925 926 assert(!too_large(op->dst.x, op->dst.y)); 927 assert(!too_large(op->dst.width, op->dst.height)); 928 929 if (sna->render_state.gen7.drawrect_limit == limit && 930 sna->render_state.gen7.drawrect_offset == offset) 931 return true; 932 933 sna->render_state.gen7.drawrect_offset = offset; 934 sna->render_state.gen7.drawrect_limit = limit; 935 936 OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 937 OUT_BATCH(0); 938 OUT_BATCH(limit); 939 OUT_BATCH(offset); 940 return false; 941} 942 943static void 944gen7_emit_vertex_elements(struct sna *sna, 945 const struct sna_composite_op *op) 946{ 947 /* 948 * vertex data in vertex buffer 949 * position: (x, y) 950 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 951 * texture coordinate 1 if (has_mask is true): same as above 952 */ 953 struct gen7_render_state *render = &sna->render_state.gen7; 954 uint32_t src_format, dw; 955 int id = GEN7_VERTEX(op->u.gen7.flags); 956 bool has_mask; 957 958 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 959 960 if (render->ve_id == id) 961 return; 962 render->ve_id = id; 963 964 /* The VUE layout 965 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 966 * dword 4-7: position (x, y, 1.0, 1.0), 967 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 968 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 969 * 970 * dword 4-15 are fetched from vertex buffer 971 */ 972 has_mask = (id >> 2) != 0; 973 OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | 974 ((2 * (3 + has_mask)) + 1 - 2)); 975 976 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 977 GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 978 0 << GEN7_VE0_OFFSET_SHIFT); 979 OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | 980 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | 981 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 982 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); 983 984 /* x,y */ 985 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 986 GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 987 0 << GEN7_VE0_OFFSET_SHIFT); 988 OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | 989 GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | 990 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 991 GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); 992 993 /* u0, v0, w0 */ 994 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 995 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 996 switch (id & 3) { 997 default: 998 assert(0); 999 case 0: 1000 src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED; 1001 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1002 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1003 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1004 break; 1005 case 1: 1006 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1007 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1008 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1009 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1010 break; 1011 case 2: 1012 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1013 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1014 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1015 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1016 break; 1017 case 3: 1018 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1019 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1020 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1021 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1022 break; 1023 } 1024 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1025 src_format << GEN7_VE0_FORMAT_SHIFT | 1026 4 << GEN7_VE0_OFFSET_SHIFT); 1027 OUT_BATCH(dw); 1028 1029 /* u1, v1, w1 */ 1030 if (has_mask) { 1031 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1032 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 1033 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 1034 switch (id >> 2) { 1035 case 1: 1036 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1037 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1038 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1039 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1040 break; 1041 default: 1042 assert(0); 1043 case 2: 1044 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1045 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1046 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1047 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1048 break; 1049 case 3: 1050 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1051 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1052 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1053 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1054 break; 1055 } 1056 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1057 src_format << GEN7_VE0_FORMAT_SHIFT | 1058 offset << GEN7_VE0_OFFSET_SHIFT); 1059 OUT_BATCH(dw); 1060 } 1061} 1062 1063inline static void 1064gen7_emit_pipe_invalidate(struct sna *sna) 1065{ 1066 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1067 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | 1068 GEN7_PIPE_CONTROL_TC_FLUSH | 1069 GEN7_PIPE_CONTROL_CS_STALL); 1070 OUT_BATCH(0); 1071 OUT_BATCH(0); 1072} 1073 1074inline static void 1075gen7_emit_pipe_flush(struct sna *sna, bool need_stall) 1076{ 1077 unsigned stall; 1078 1079 stall = 0; 1080 if (need_stall) 1081 stall = (GEN7_PIPE_CONTROL_CS_STALL | 1082 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); 1083 1084 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1085 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); 1086 OUT_BATCH(0); 1087 OUT_BATCH(0); 1088} 1089 1090inline static void 1091gen7_emit_pipe_stall(struct sna *sna) 1092{ 1093 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1094 OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | 1095 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); 1096 OUT_BATCH(0); 1097 OUT_BATCH(0); 1098} 1099 1100static void 1101gen7_emit_state(struct sna *sna, 1102 const struct sna_composite_op *op, 1103 uint16_t wm_binding_table) 1104{ 1105 bool need_invalidate; 1106 bool need_flush; 1107 bool need_stall; 1108 1109 assert(op->dst.bo->exec); 1110 1111 need_flush = wm_binding_table & 1 || 1112 (sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags)); 1113 if (ALWAYS_FLUSH) 1114 need_flush = true; 1115 1116 wm_binding_table &= ~1; 1117 1118 need_stall = sna->render_state.gen7.surface_table != wm_binding_table; 1119 1120 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1121 if (ALWAYS_INVALIDATE) 1122 need_invalidate = true; 1123 1124 need_stall &= gen7_emit_drawing_rectangle(sna, op); 1125 if (ALWAYS_STALL) 1126 need_stall = true; 1127 1128 if (need_invalidate) { 1129 gen7_emit_pipe_invalidate(sna); 1130 kgem_clear_dirty(&sna->kgem); 1131 assert(op->dst.bo->exec); 1132 kgem_bo_mark_dirty(op->dst.bo); 1133 1134 need_flush = false; 1135 need_stall = false; 1136 } 1137 if (need_flush) { 1138 gen7_emit_pipe_flush(sna, need_stall); 1139 need_stall = false; 1140 } 1141 if (need_stall) 1142 gen7_emit_pipe_stall(sna); 1143 1144 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1145 gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); 1146 gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); 1147 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1148 gen7_emit_vertex_elements(sna, op); 1149 gen7_emit_binding_table(sna, wm_binding_table); 1150 1151 sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); 1152} 1153 1154static bool gen7_magic_ca_pass(struct sna *sna, 1155 const struct sna_composite_op *op) 1156{ 1157 struct gen7_render_state *state = &sna->render_state.gen7; 1158 1159 if (!op->need_magic_ca_pass) 1160 return false; 1161 1162 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1163 sna->render.vertex_start, sna->render.vertex_index)); 1164 1165 gen7_emit_pipe_stall(sna); 1166 1167 gen7_emit_cc(sna, 1168 GEN7_BLEND(gen7_get_blend(PictOpAdd, true, 1169 op->dst.format))); 1170 gen7_emit_wm(sna, 1171 gen7_choose_composite_kernel(PictOpAdd, 1172 true, true, 1173 op->is_affine)); 1174 1175 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1176 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1177 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1178 OUT_BATCH(sna->render.vertex_start); 1179 OUT_BATCH(1); /* single instance */ 1180 OUT_BATCH(0); /* start instance location */ 1181 OUT_BATCH(0); /* index buffer offset, ignored */ 1182 1183 state->last_primitive = sna->kgem.nbatch; 1184 return true; 1185} 1186 1187static void null_create(struct sna_static_stream *stream) 1188{ 1189 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1190 sna_static_stream_map(stream, 64, 64); 1191} 1192 1193static void 1194sampler_state_init(struct gen7_sampler_state *sampler_state, 1195 sampler_filter_t filter, 1196 sampler_extend_t extend) 1197{ 1198 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1199 1200 /* We use the legacy mode to get the semantics specified by 1201 * the Render extension. */ 1202 sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY; 1203 1204 switch (filter) { 1205 default: 1206 case SAMPLER_FILTER_NEAREST: 1207 sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST; 1208 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST; 1209 break; 1210 case SAMPLER_FILTER_BILINEAR: 1211 sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR; 1212 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR; 1213 break; 1214 } 1215 1216 switch (extend) { 1217 default: 1218 case SAMPLER_EXTEND_NONE: 1219 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1220 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1221 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1222 break; 1223 case SAMPLER_EXTEND_REPEAT: 1224 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1225 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1226 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1227 break; 1228 case SAMPLER_EXTEND_PAD: 1229 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1230 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1231 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1232 break; 1233 case SAMPLER_EXTEND_REFLECT: 1234 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1235 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1236 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1237 break; 1238 } 1239} 1240 1241static void 1242sampler_copy_init(struct gen7_sampler_state *ss) 1243{ 1244 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1245 ss->ss3.non_normalized_coord = 1; 1246 1247 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1248} 1249 1250static void 1251sampler_fill_init(struct gen7_sampler_state *ss) 1252{ 1253 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1254 ss->ss3.non_normalized_coord = 1; 1255 1256 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1257} 1258 1259static uint32_t 1260gen7_tiling_bits(uint32_t tiling) 1261{ 1262 switch (tiling) { 1263 default: assert(0); 1264 case I915_TILING_NONE: return 0; 1265 case I915_TILING_X: return GEN7_SURFACE_TILED; 1266 case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y; 1267 } 1268} 1269 1270/** 1271 * Sets up the common fields for a surface state buffer for the given 1272 * picture in the given surface state buffer. 1273 */ 1274static uint32_t 1275gen7_bind_bo(struct sna *sna, 1276 struct kgem_bo *bo, 1277 uint32_t width, 1278 uint32_t height, 1279 uint32_t format, 1280 bool is_dst) 1281{ 1282 uint32_t *ss; 1283 uint32_t domains; 1284 int offset; 1285 uint32_t is_scanout = is_dst && bo->scanout; 1286 1287 COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); 1288 1289 /* After the first bind, we manage the cache domains within the batch */ 1290 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1291 if (offset) { 1292 assert(offset >= sna->kgem.surface); 1293 if (is_dst) 1294 kgem_bo_mark_dirty(bo); 1295 return offset * sizeof(uint32_t); 1296 } 1297 1298 offset = sna->kgem.surface -= 1299 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1300 ss = sna->kgem.batch + offset; 1301 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1302 gen7_tiling_bits(bo->tiling) | 1303 format << GEN7_SURFACE_FORMAT_SHIFT); 1304 if (bo->tiling == I915_TILING_Y) 1305 ss[0] |= GEN7_SURFACE_VALIGN_4; 1306 if (is_dst) { 1307 ss[0] |= GEN7_SURFACE_RC_READ_WRITE; 1308 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1309 } else 1310 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1311 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1312 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1313 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1314 ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1315 ss[4] = 0; 1316 ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16; 1317 ss[6] = 0; 1318 ss[7] = 0; 1319 if (is_hsw(sna)) 1320 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1321 1322 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1323 1324 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1325 offset, bo->handle, ss[1], 1326 format, width, height, bo->pitch, bo->tiling, 1327 domains & 0xffff ? "render" : "sampler")); 1328 1329 return offset * sizeof(uint32_t); 1330} 1331 1332static void gen7_emit_vertex_buffer(struct sna *sna, 1333 const struct sna_composite_op *op) 1334{ 1335 int id = GEN7_VERTEX(op->u.gen7.flags); 1336 1337 OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1338 OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT | 1339 GEN7_VB0_VERTEXDATA | 1340 GEN7_VB0_ADDRESS_MODIFY_ENABLE | 1341 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); 1342 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1343 OUT_BATCH(0); 1344 OUT_BATCH(~0); /* max address: disabled */ 1345 OUT_BATCH(0); 1346 1347 sna->render.vb_id |= 1 << id; 1348} 1349 1350static void gen7_emit_primitive(struct sna *sna) 1351{ 1352 if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) { 1353 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1354 return; 1355 } 1356 1357 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1358 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1359 sna->render.vertex_offset = sna->kgem.nbatch; 1360 OUT_BATCH(0); /* vertex count, to be filled in later */ 1361 OUT_BATCH(sna->render.vertex_index); 1362 OUT_BATCH(1); /* single instance */ 1363 OUT_BATCH(0); /* start instance location */ 1364 OUT_BATCH(0); /* index buffer offset, ignored */ 1365 sna->render.vertex_start = sna->render.vertex_index; 1366 1367 sna->render_state.gen7.last_primitive = sna->kgem.nbatch; 1368} 1369 1370static bool gen7_rectangle_begin(struct sna *sna, 1371 const struct sna_composite_op *op) 1372{ 1373 int id = 1 << GEN7_VERTEX(op->u.gen7.flags); 1374 int ndwords; 1375 1376 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1377 return true; 1378 1379 ndwords = op->need_magic_ca_pass ? 60 : 6; 1380 if ((sna->render.vb_id & id) == 0) 1381 ndwords += 5; 1382 if (!kgem_check_batch(&sna->kgem, ndwords)) 1383 return false; 1384 1385 if ((sna->render.vb_id & id) == 0) 1386 gen7_emit_vertex_buffer(sna, op); 1387 1388 gen7_emit_primitive(sna); 1389 return true; 1390} 1391 1392static int gen7_get_rectangles__flush(struct sna *sna, 1393 const struct sna_composite_op *op) 1394{ 1395 /* Preventing discarding new vbo after lock contention */ 1396 if (sna_vertex_wait__locked(&sna->render)) { 1397 int rem = vertex_space(sna); 1398 if (rem > op->floats_per_rect) 1399 return rem; 1400 } 1401 1402 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1403 return 0; 1404 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1405 return 0; 1406 1407 if (sna->render.vertex_offset) { 1408 gen4_vertex_flush(sna); 1409 if (gen7_magic_ca_pass(sna, op)) { 1410 gen7_emit_pipe_stall(sna); 1411 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1412 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1413 } 1414 } 1415 1416 return gen4_vertex_finish(sna); 1417} 1418 1419inline static int gen7_get_rectangles(struct sna *sna, 1420 const struct sna_composite_op *op, 1421 int want, 1422 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1423{ 1424 int rem; 1425 1426 assert(want); 1427 1428start: 1429 rem = vertex_space(sna); 1430 if (unlikely(rem < op->floats_per_rect)) { 1431 DBG(("flushing vbo for %s: %d < %d\n", 1432 __FUNCTION__, rem, op->floats_per_rect)); 1433 rem = gen7_get_rectangles__flush(sna, op); 1434 if (unlikely(rem == 0)) 1435 goto flush; 1436 } 1437 1438 if (unlikely(sna->render.vertex_offset == 0)) { 1439 if (!gen7_rectangle_begin(sna, op)) 1440 goto flush; 1441 else 1442 goto start; 1443 } 1444 1445 assert(rem <= vertex_space(sna)); 1446 assert(op->floats_per_rect <= rem); 1447 if (want > 1 && want * op->floats_per_rect > rem) 1448 want = rem / op->floats_per_rect; 1449 1450 assert(want > 0); 1451 sna->render.vertex_index += 3*want; 1452 return want; 1453 1454flush: 1455 if (sna->render.vertex_offset) { 1456 gen4_vertex_flush(sna); 1457 gen7_magic_ca_pass(sna, op); 1458 } 1459 sna_vertex_wait__locked(&sna->render); 1460 _kgem_submit(&sna->kgem); 1461 emit_state(sna, op); 1462 goto start; 1463} 1464 1465inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, 1466 uint16_t *offset) 1467{ 1468 uint32_t *table; 1469 1470 sna->kgem.surface -= 1471 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1472 /* Clear all surplus entries to zero in case of prefetch */ 1473 table = memset(sna->kgem.batch + sna->kgem.surface, 1474 0, sizeof(struct gen7_surface_state)); 1475 1476 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1477 1478 *offset = sna->kgem.surface; 1479 return table; 1480} 1481 1482static void 1483gen7_get_batch(struct sna *sna, const struct sna_composite_op *op) 1484{ 1485 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1486 1487 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1488 DBG(("%s: flushing batch: %d < %d+%d\n", 1489 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1490 150, 4*8)); 1491 _kgem_submit(&sna->kgem); 1492 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1493 } 1494 1495 assert(sna->kgem.mode == KGEM_RENDER); 1496 assert(sna->kgem.ring == KGEM_RENDER); 1497 1498 if (sna->render_state.gen7.needs_invariant) 1499 gen7_emit_invariant(sna); 1500} 1501 1502static void gen7_emit_composite_state(struct sna *sna, 1503 const struct sna_composite_op *op) 1504{ 1505 uint32_t *binding_table; 1506 uint16_t offset, dirty; 1507 1508 gen7_get_batch(sna, op); 1509 1510 binding_table = gen7_composite_get_binding_table(sna, &offset); 1511 1512 dirty = kgem_bo_is_dirty(op->dst.bo); 1513 1514 binding_table[0] = 1515 gen7_bind_bo(sna, 1516 op->dst.bo, op->dst.width, op->dst.height, 1517 gen7_get_dest_format(op->dst.format), 1518 true); 1519 binding_table[1] = 1520 gen7_bind_bo(sna, 1521 op->src.bo, op->src.width, op->src.height, 1522 op->src.card_format, 1523 false); 1524 if (op->mask.bo) { 1525 binding_table[2] = 1526 gen7_bind_bo(sna, 1527 op->mask.bo, 1528 op->mask.width, 1529 op->mask.height, 1530 op->mask.card_format, 1531 false); 1532 } 1533 1534 if (sna->kgem.surface == offset && 1535 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table && 1536 (op->mask.bo == NULL || 1537 sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) { 1538 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1539 offset = sna->render_state.gen7.surface_table; 1540 } 1541 1542 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 1543 dirty = 0; 1544 1545 gen7_emit_state(sna, op, offset | dirty); 1546} 1547 1548static void 1549gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1550{ 1551 if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { 1552 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1553 sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex)); 1554 gen4_vertex_align(sna, op); 1555 sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex; 1556 } 1557} 1558 1559fastcall static void 1560gen7_render_composite_blt(struct sna *sna, 1561 const struct sna_composite_op *op, 1562 const struct sna_composite_rectangles *r) 1563{ 1564 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1565 op->prim_emit(sna, op, r); 1566} 1567 1568fastcall static void 1569gen7_render_composite_box(struct sna *sna, 1570 const struct sna_composite_op *op, 1571 const BoxRec *box) 1572{ 1573 struct sna_composite_rectangles r; 1574 1575 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1576 1577 DBG((" %s: (%d, %d), (%d, %d)\n", 1578 __FUNCTION__, 1579 box->x1, box->y1, box->x2, box->y2)); 1580 1581 r.dst.x = box->x1; 1582 r.dst.y = box->y1; 1583 r.width = box->x2 - box->x1; 1584 r.height = box->y2 - box->y1; 1585 r.src = r.mask = r.dst; 1586 1587 op->prim_emit(sna, op, &r); 1588} 1589 1590static void 1591gen7_render_composite_boxes__blt(struct sna *sna, 1592 const struct sna_composite_op *op, 1593 const BoxRec *box, int nbox) 1594{ 1595 DBG(("composite_boxes(%d)\n", nbox)); 1596 1597 do { 1598 int nbox_this_time; 1599 1600 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1601 gen7_emit_composite_state); 1602 nbox -= nbox_this_time; 1603 1604 do { 1605 struct sna_composite_rectangles r; 1606 1607 DBG((" %s: (%d, %d), (%d, %d)\n", 1608 __FUNCTION__, 1609 box->x1, box->y1, box->x2, box->y2)); 1610 1611 r.dst.x = box->x1; 1612 r.dst.y = box->y1; 1613 r.width = box->x2 - box->x1; 1614 r.height = box->y2 - box->y1; 1615 r.src = r.mask = r.dst; 1616 1617 op->prim_emit(sna, op, &r); 1618 box++; 1619 } while (--nbox_this_time); 1620 } while (nbox); 1621} 1622 1623static void 1624gen7_render_composite_boxes(struct sna *sna, 1625 const struct sna_composite_op *op, 1626 const BoxRec *box, int nbox) 1627{ 1628 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1629 1630 do { 1631 int nbox_this_time; 1632 float *v; 1633 1634 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1635 gen7_emit_composite_state); 1636 assert(nbox_this_time); 1637 nbox -= nbox_this_time; 1638 1639 v = sna->render.vertices + sna->render.vertex_used; 1640 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1641 1642 op->emit_boxes(op, box, nbox_this_time, v); 1643 box += nbox_this_time; 1644 } while (nbox); 1645} 1646 1647static void 1648gen7_render_composite_boxes__thread(struct sna *sna, 1649 const struct sna_composite_op *op, 1650 const BoxRec *box, int nbox) 1651{ 1652 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1653 1654 sna_vertex_lock(&sna->render); 1655 do { 1656 int nbox_this_time; 1657 float *v; 1658 1659 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1660 gen7_emit_composite_state); 1661 assert(nbox_this_time); 1662 nbox -= nbox_this_time; 1663 1664 v = sna->render.vertices + sna->render.vertex_used; 1665 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1666 1667 sna_vertex_acquire__locked(&sna->render); 1668 sna_vertex_unlock(&sna->render); 1669 1670 op->emit_boxes(op, box, nbox_this_time, v); 1671 box += nbox_this_time; 1672 1673 sna_vertex_lock(&sna->render); 1674 sna_vertex_release__locked(&sna->render); 1675 } while (nbox); 1676 sna_vertex_unlock(&sna->render); 1677} 1678 1679#ifndef MAX 1680#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1681#endif 1682 1683static uint32_t 1684gen7_composite_create_blend_state(struct sna_static_stream *stream) 1685{ 1686 char *base, *ptr; 1687 int src, dst; 1688 1689 base = sna_static_stream_map(stream, 1690 GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE, 1691 64); 1692 1693 ptr = base; 1694 for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) { 1695 for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) { 1696 struct gen7_blend_state *blend = 1697 (struct gen7_blend_state *)ptr; 1698 1699 blend->blend0.dest_blend_factor = dst; 1700 blend->blend0.source_blend_factor = src; 1701 blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD; 1702 blend->blend0.blend_enable = 1703 !(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE); 1704 1705 blend->blend1.post_blend_clamp_enable = 1; 1706 blend->blend1.pre_blend_clamp_enable = 1; 1707 1708 ptr += GEN7_BLEND_STATE_PADDED_SIZE; 1709 } 1710 } 1711 1712 return sna_static_stream_offsetof(stream, base); 1713} 1714 1715static uint32_t gen7_bind_video_source(struct sna *sna, 1716 struct kgem_bo *bo, 1717 uint32_t offset, 1718 int width, 1719 int height, 1720 int pitch, 1721 uint32_t format) 1722{ 1723 uint32_t *ss, bind; 1724 1725 bind = sna->kgem.surface -= 1726 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1727 1728 assert(bo->tiling == I915_TILING_NONE); 1729 1730 ss = sna->kgem.batch + bind; 1731 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1732 format << GEN7_SURFACE_FORMAT_SHIFT); 1733 ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, 1734 I915_GEM_DOMAIN_SAMPLER << 16, 1735 offset); 1736 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1737 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1738 ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1739 ss[4] = 0; 1740 ss[5] = 0; 1741 ss[6] = 0; 1742 ss[7] = 0; 1743 if (is_hsw(sna)) 1744 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1745 1746 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", 1747 bind, bo->handle, ss[1], 1748 format, width, height, pitch, offset)); 1749 1750 return bind * sizeof(uint32_t); 1751} 1752 1753static void gen7_emit_video_state(struct sna *sna, 1754 const struct sna_composite_op *op) 1755{ 1756 struct sna_video_frame *frame = op->priv; 1757 uint32_t src_surf_format; 1758 uint32_t src_surf_base[6]; 1759 int src_width[6]; 1760 int src_height[6]; 1761 int src_pitch[6]; 1762 uint32_t *binding_table; 1763 uint16_t offset, dirty; 1764 int n_src, n; 1765 1766 gen7_get_batch(sna, op); 1767 1768 src_surf_base[0] = 0; 1769 src_surf_base[1] = 0; 1770 src_surf_base[2] = frame->VBufOffset; 1771 src_surf_base[3] = frame->VBufOffset; 1772 src_surf_base[4] = frame->UBufOffset; 1773 src_surf_base[5] = frame->UBufOffset; 1774 1775 if (is_planar_fourcc(frame->id)) { 1776 src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM; 1777 src_width[1] = src_width[0] = frame->width; 1778 src_height[1] = src_height[0] = frame->height; 1779 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1780 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1781 frame->width / 2; 1782 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1783 frame->height / 2; 1784 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1785 frame->pitch[0]; 1786 n_src = 6; 1787 } else { 1788 if (frame->id == FOURCC_UYVY) 1789 src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; 1790 else 1791 src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; 1792 1793 src_width[0] = frame->width; 1794 src_height[0] = frame->height; 1795 src_pitch[0] = frame->pitch[0]; 1796 n_src = 1; 1797 } 1798 1799 binding_table = gen7_composite_get_binding_table(sna, &offset); 1800 1801 dirty = kgem_bo_is_dirty(op->dst.bo); 1802 1803 binding_table[0] = 1804 gen7_bind_bo(sna, 1805 op->dst.bo, op->dst.width, op->dst.height, 1806 gen7_get_dest_format(op->dst.format), 1807 true); 1808 for (n = 0; n < n_src; n++) { 1809 binding_table[1+n] = 1810 gen7_bind_video_source(sna, 1811 frame->bo, 1812 src_surf_base[n], 1813 src_width[n], 1814 src_height[n], 1815 src_pitch[n], 1816 src_surf_format); 1817 } 1818 1819 gen7_emit_state(sna, op, offset | dirty); 1820} 1821 1822static bool 1823gen7_render_video(struct sna *sna, 1824 struct sna_video *video, 1825 struct sna_video_frame *frame, 1826 RegionPtr dstRegion, 1827 PixmapPtr pixmap) 1828{ 1829 struct sna_composite_op tmp; 1830 struct sna_pixmap *priv = sna_pixmap(pixmap); 1831 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1832 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1833 int src_width = frame->src.x2 - frame->src.x1; 1834 int src_height = frame->src.y2 - frame->src.y1; 1835 float src_offset_x, src_offset_y; 1836 float src_scale_x, src_scale_y; 1837 int nbox, pix_xoff, pix_yoff; 1838 unsigned filter; 1839 const BoxRec *box; 1840 1841 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1842 __FUNCTION__, 1843 src_width, src_height, dst_width, dst_height, 1844 region_num_rects(dstRegion), 1845 REGION_EXTENTS(NULL, dstRegion)->x1, 1846 REGION_EXTENTS(NULL, dstRegion)->y1, 1847 REGION_EXTENTS(NULL, dstRegion)->x2, 1848 REGION_EXTENTS(NULL, dstRegion)->y2)); 1849 1850 assert(priv->gpu_bo); 1851 memset(&tmp, 0, sizeof(tmp)); 1852 1853 tmp.dst.pixmap = pixmap; 1854 tmp.dst.width = pixmap->drawable.width; 1855 tmp.dst.height = pixmap->drawable.height; 1856 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1857 tmp.dst.bo = priv->gpu_bo; 1858 1859 tmp.src.bo = frame->bo; 1860 tmp.mask.bo = NULL; 1861 1862 tmp.floats_per_vertex = 3; 1863 tmp.floats_per_rect = 9; 1864 1865 if (src_width == dst_width && src_height == dst_height) 1866 filter = SAMPLER_FILTER_NEAREST; 1867 else 1868 filter = SAMPLER_FILTER_BILINEAR; 1869 1870 tmp.u.gen7.flags = 1871 GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1872 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1873 NO_BLEND, 1874 is_planar_fourcc(frame->id) ? 1875 GEN7_WM_KERNEL_VIDEO_PLANAR : 1876 GEN7_WM_KERNEL_VIDEO_PACKED, 1877 2); 1878 tmp.priv = frame; 1879 1880 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1881 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1882 kgem_submit(&sna->kgem); 1883 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1884 return false; 1885 1886 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1887 } 1888 1889 gen7_align_vertex(sna, &tmp); 1890 gen7_emit_video_state(sna, &tmp); 1891 1892 /* Set up the offset for translating from the given region (in screen 1893 * coordinates) to the backing pixmap. 1894 */ 1895#ifdef COMPOSITE 1896 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1897 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1898#else 1899 pix_xoff = 0; 1900 pix_yoff = 0; 1901#endif 1902 1903 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 1904 __FUNCTION__, 1905 frame->src.x1, frame->src.y1, 1906 src_width, src_height, 1907 dst_width, dst_height, 1908 frame->width, frame->height)); 1909 1910 src_scale_x = (float)src_width / dst_width / frame->width; 1911 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1912 1913 src_scale_y = (float)src_height / dst_height / frame->height; 1914 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1915 1916 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 1917 __FUNCTION__, 1918 src_scale_x, src_scale_y, 1919 src_offset_x, src_offset_y)); 1920 1921 box = region_rects(dstRegion); 1922 nbox = region_num_rects(dstRegion); 1923 while (nbox--) { 1924 BoxRec r; 1925 1926 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 1927 __FUNCTION__, 1928 box->x1, box->y1, 1929 box->x2, box->y2, 1930 pix_xoff, pix_yoff, 1931 box->x1 * src_scale_x + src_offset_x, 1932 box->y1 * src_scale_y + src_offset_y, 1933 box->x2 * src_scale_x + src_offset_x, 1934 box->y2 * src_scale_y + src_offset_y)); 1935 1936 r.x1 = box->x1 + pix_xoff; 1937 r.x2 = box->x2 + pix_xoff; 1938 r.y1 = box->y1 + pix_yoff; 1939 r.y2 = box->y2 + pix_yoff; 1940 1941 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); 1942 1943 OUT_VERTEX(r.x2, r.y2); 1944 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1945 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1946 1947 OUT_VERTEX(r.x1, r.y2); 1948 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1949 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1950 1951 OUT_VERTEX(r.x1, r.y1); 1952 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1953 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1954 1955 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1956 sna_damage_add_box(&priv->gpu_damage, &r); 1957 sna_damage_subtract_box(&priv->cpu_damage, &r); 1958 } 1959 box++; 1960 } 1961 1962 gen4_vertex_flush(sna); 1963 return true; 1964} 1965 1966static int 1967gen7_composite_picture(struct sna *sna, 1968 PicturePtr picture, 1969 struct sna_composite_channel *channel, 1970 int x, int y, 1971 int w, int h, 1972 int dst_x, int dst_y, 1973 bool precise) 1974{ 1975 PixmapPtr pixmap; 1976 uint32_t color; 1977 int16_t dx, dy; 1978 1979 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1980 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1981 1982 channel->is_solid = false; 1983 channel->card_format = -1; 1984 1985 if (sna_picture_is_solid(picture, &color)) 1986 return gen4_channel_init_solid(sna, channel, color); 1987 1988 if (picture->pDrawable == NULL) { 1989 int ret; 1990 1991 if (picture->pSourcePict->type == SourcePictTypeLinear) 1992 return gen4_channel_init_linear(sna, picture, channel, 1993 x, y, 1994 w, h, 1995 dst_x, dst_y); 1996 1997 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1998 ret = -1; 1999 if (!precise) 2000 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 2001 x, y, w, h, dst_x, dst_y); 2002 if (ret == -1) 2003 ret = sna_render_picture_fixup(sna, picture, channel, 2004 x, y, w, h, dst_x, dst_y); 2005 return ret; 2006 } 2007 2008 if (picture->alphaMap) { 2009 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 2010 return sna_render_picture_fixup(sna, picture, channel, 2011 x, y, w, h, dst_x, dst_y); 2012 } 2013 2014 if (!gen7_check_repeat(picture)) 2015 return sna_render_picture_fixup(sna, picture, channel, 2016 x, y, w, h, dst_x, dst_y); 2017 2018 if (!gen7_check_filter(picture)) 2019 return sna_render_picture_fixup(sna, picture, channel, 2020 x, y, w, h, dst_x, dst_y); 2021 2022 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2023 channel->filter = picture->filter; 2024 2025 assert(picture->pDrawable); 2026 pixmap = get_drawable_pixmap(picture->pDrawable); 2027 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2028 2029 x += dx + picture->pDrawable->x; 2030 y += dy + picture->pDrawable->y; 2031 2032 channel->is_affine = sna_transform_is_affine(picture->transform); 2033 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2034 DBG(("%s: integer translation (%d, %d), removing\n", 2035 __FUNCTION__, dx, dy)); 2036 x += dx; 2037 y += dy; 2038 channel->transform = NULL; 2039 channel->filter = PictFilterNearest; 2040 2041 if (channel->repeat || 2042 (x >= 0 && 2043 y >= 0 && 2044 x + w < pixmap->drawable.width && 2045 y + h < pixmap->drawable.height)) { 2046 struct sna_pixmap *priv = sna_pixmap(pixmap); 2047 if (priv && priv->clear) { 2048 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2049 return gen4_channel_init_solid(sna, channel, priv->clear_color); 2050 } 2051 } 2052 } else 2053 channel->transform = picture->transform; 2054 2055 channel->pict_format = picture->format; 2056 channel->card_format = gen7_get_card_format(picture->format); 2057 if (channel->card_format == (unsigned)-1) 2058 return sna_render_picture_convert(sna, picture, channel, pixmap, 2059 x, y, w, h, dst_x, dst_y, 2060 false); 2061 2062 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2063 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2064 pixmap->drawable.width, pixmap->drawable.height)); 2065 return sna_render_picture_extract(sna, picture, channel, 2066 x, y, w, h, dst_x, dst_y); 2067 } 2068 2069 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 2070 __FUNCTION__, 2071 channel->repeat, channel->filter, 2072 channel->transform != NULL, channel->is_affine, 2073 channel->pict_format)); 2074 if (channel->transform) { 2075 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n", 2076 __FUNCTION__, 2077 channel->transform->matrix[0][0] / 65536., 2078 channel->transform->matrix[0][1] / 65536., 2079 channel->transform->matrix[0][2] / 65536., 2080 channel->transform->matrix[1][0] / 65536., 2081 channel->transform->matrix[1][1] / 65536., 2082 channel->transform->matrix[1][2] / 65536., 2083 channel->transform->matrix[2][0] / 65536., 2084 channel->transform->matrix[2][1] / 65536., 2085 channel->transform->matrix[2][2] / 65536.)); 2086 } 2087 2088 return sna_render_pixmap_bo(sna, channel, pixmap, 2089 x, y, w, h, dst_x, dst_y); 2090} 2091 2092inline static void gen7_composite_channel_convert(struct sna_composite_channel *channel) 2093{ 2094 channel->repeat = gen7_repeat(channel->repeat); 2095 channel->filter = gen7_filter(channel->filter); 2096 if (channel->card_format == (unsigned)-1) 2097 channel->card_format = gen7_get_card_format(channel->pict_format); 2098 assert(channel->card_format != (unsigned)-1); 2099} 2100 2101static void gen7_render_composite_done(struct sna *sna, 2102 const struct sna_composite_op *op) 2103{ 2104 if (sna->render.vertex_offset) { 2105 gen4_vertex_flush(sna); 2106 gen7_magic_ca_pass(sna, op); 2107 } 2108 2109 if (op->mask.bo) 2110 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2111 if (op->src.bo) 2112 kgem_bo_destroy(&sna->kgem, op->src.bo); 2113 2114 sna_render_composite_redirect_done(sna, op); 2115} 2116 2117inline static bool 2118gen7_composite_set_target(struct sna *sna, 2119 struct sna_composite_op *op, 2120 PicturePtr dst, 2121 int x, int y, int w, int h, 2122 bool partial) 2123{ 2124 BoxRec box; 2125 unsigned int hint; 2126 2127 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2128 2129 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2130 op->dst.format = dst->format; 2131 op->dst.width = op->dst.pixmap->drawable.width; 2132 op->dst.height = op->dst.pixmap->drawable.height; 2133 2134 if (w | h) { 2135 assert(w && h); 2136 box.x1 = x; 2137 box.y1 = y; 2138 box.x2 = x + w; 2139 box.y2 = y + h; 2140 } else 2141 sna_render_picture_extents(dst, &box); 2142 2143 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 2144 if (!partial) { 2145 hint |= IGNORE_DAMAGE; 2146 if (w == op->dst.width && h == op->dst.height) 2147 hint |= REPLACES; 2148 } 2149 2150 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2151 if (op->dst.bo == NULL) 2152 return false; 2153 2154 if (hint & REPLACES) { 2155 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2156 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2157 } 2158 2159 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2160 &op->dst.x, &op->dst.y); 2161 2162 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2163 __FUNCTION__, 2164 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2165 op->dst.width, op->dst.height, 2166 op->dst.bo->pitch, 2167 op->dst.x, op->dst.y, 2168 op->damage ? *op->damage : (void *)-1)); 2169 2170 assert(op->dst.bo->proxy == NULL); 2171 2172 if (too_large(op->dst.width, op->dst.height) && 2173 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2174 return false; 2175 2176 return true; 2177} 2178 2179static bool 2180try_blt(struct sna *sna, 2181 PicturePtr dst, PicturePtr src, 2182 int width, int height) 2183{ 2184 struct kgem_bo *bo; 2185 2186 if (sna->kgem.mode == KGEM_BLT) { 2187 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2188 return true; 2189 } 2190 2191 if (too_large(width, height)) { 2192 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2193 __FUNCTION__, width, height)); 2194 return true; 2195 } 2196 2197 bo = __sna_drawable_peek_bo(dst->pDrawable); 2198 if (bo == NULL) 2199 return true; 2200 if (bo->rq) 2201 return RQ_IS_BLT(bo->rq); 2202 2203 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2204 return true; 2205 2206 if (src->pDrawable) { 2207 bo = __sna_drawable_peek_bo(src->pDrawable); 2208 if (bo == NULL) 2209 return true; 2210 2211 if (prefer_blt_bo(sna, bo)) 2212 return true; 2213 } 2214 2215 if (sna->kgem.ring == KGEM_BLT) { 2216 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2217 return true; 2218 } 2219 2220 return false; 2221} 2222 2223static bool 2224check_gradient(PicturePtr picture, bool precise) 2225{ 2226 if (picture->pDrawable) 2227 return false; 2228 2229 switch (picture->pSourcePict->type) { 2230 case SourcePictTypeSolidFill: 2231 case SourcePictTypeLinear: 2232 return false; 2233 default: 2234 return precise; 2235 } 2236} 2237 2238static bool 2239has_alphamap(PicturePtr p) 2240{ 2241 return p->alphaMap != NULL; 2242} 2243 2244static bool 2245need_upload(PicturePtr p) 2246{ 2247 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2248} 2249 2250static bool 2251source_is_busy(PixmapPtr pixmap) 2252{ 2253 struct sna_pixmap *priv = sna_pixmap(pixmap); 2254 if (priv == NULL || priv->clear) 2255 return false; 2256 2257 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2258 return true; 2259 2260 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2261 return true; 2262 2263 return priv->gpu_damage && !priv->cpu_damage; 2264} 2265 2266static bool 2267source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2268{ 2269 if (sna_picture_is_solid(p, NULL)) 2270 return false; 2271 2272 if (p->pSourcePict) 2273 return check_gradient(p, precise); 2274 2275 if (!gen7_check_repeat(p) || !gen7_check_format(p->format)) 2276 return true; 2277 2278 if (pixmap && source_is_busy(pixmap)) 2279 return false; 2280 2281 return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p); 2282} 2283 2284static bool 2285gen7_composite_fallback(struct sna *sna, 2286 PicturePtr src, 2287 PicturePtr mask, 2288 PicturePtr dst) 2289{ 2290 PixmapPtr src_pixmap; 2291 PixmapPtr mask_pixmap; 2292 PixmapPtr dst_pixmap; 2293 bool src_fallback, mask_fallback; 2294 2295 if (!gen7_check_dst_format(dst->format)) { 2296 DBG(("%s: unknown destination format: %d\n", 2297 __FUNCTION__, dst->format)); 2298 return true; 2299 } 2300 2301 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2302 2303 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2304 src_fallback = source_fallback(src, src_pixmap, 2305 dst->polyMode == PolyModePrecise); 2306 2307 if (mask) { 2308 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2309 mask_fallback = source_fallback(mask, mask_pixmap, 2310 dst->polyMode == PolyModePrecise); 2311 } else { 2312 mask_pixmap = NULL; 2313 mask_fallback = false; 2314 } 2315 2316 /* If we are using the destination as a source and need to 2317 * readback in order to upload the source, do it all 2318 * on the cpu. 2319 */ 2320 if (src_pixmap == dst_pixmap && src_fallback) { 2321 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2322 return true; 2323 } 2324 if (mask_pixmap == dst_pixmap && mask_fallback) { 2325 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2326 return true; 2327 } 2328 2329 /* If anything is on the GPU, push everything out to the GPU */ 2330 if (dst_use_gpu(dst_pixmap)) { 2331 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2332 __FUNCTION__)); 2333 return false; 2334 } 2335 2336 if (src_pixmap && !src_fallback) { 2337 DBG(("%s: src is already on the GPU, try to use GPU\n", 2338 __FUNCTION__)); 2339 return false; 2340 } 2341 if (mask_pixmap && !mask_fallback) { 2342 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2343 __FUNCTION__)); 2344 return false; 2345 } 2346 2347 /* However if the dst is not on the GPU and we need to 2348 * render one of the sources using the CPU, we may 2349 * as well do the entire operation in place onthe CPU. 2350 */ 2351 if (src_fallback) { 2352 DBG(("%s: dst is on the CPU and src will fallback\n", 2353 __FUNCTION__)); 2354 return true; 2355 } 2356 2357 if (mask && mask_fallback) { 2358 DBG(("%s: dst is on the CPU and mask will fallback\n", 2359 __FUNCTION__)); 2360 return true; 2361 } 2362 2363 if (too_large(dst_pixmap->drawable.width, 2364 dst_pixmap->drawable.height) && 2365 dst_is_cpu(dst_pixmap)) { 2366 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2367 return true; 2368 } 2369 2370 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2371 __FUNCTION__)); 2372 return dst_use_cpu(dst_pixmap); 2373} 2374 2375static int 2376reuse_source(struct sna *sna, 2377 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2378 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2379{ 2380 uint32_t color; 2381 2382 if (src_x != msk_x || src_y != msk_y) 2383 return false; 2384 2385 if (src == mask) { 2386 DBG(("%s: mask is source\n", __FUNCTION__)); 2387 *mc = *sc; 2388 mc->bo = kgem_bo_reference(mc->bo); 2389 return true; 2390 } 2391 2392 if (sna_picture_is_solid(mask, &color)) 2393 return gen4_channel_init_solid(sna, mc, color); 2394 2395 if (sc->is_solid) 2396 return false; 2397 2398 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2399 return false; 2400 2401 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2402 2403 if (!sna_transform_equal(src->transform, mask->transform)) 2404 return false; 2405 2406 if (!sna_picture_alphamap_equal(src, mask)) 2407 return false; 2408 2409 if (!gen7_check_repeat(mask)) 2410 return false; 2411 2412 if (!gen7_check_filter(mask)) 2413 return false; 2414 2415 if (!gen7_check_format(mask->format)) 2416 return false; 2417 2418 DBG(("%s: reusing source channel for mask with a twist\n", 2419 __FUNCTION__)); 2420 2421 *mc = *sc; 2422 mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2423 mc->filter = gen7_filter(mask->filter); 2424 mc->pict_format = mask->format; 2425 mc->card_format = gen7_get_card_format(mask->format); 2426 mc->bo = kgem_bo_reference(mc->bo); 2427 return true; 2428} 2429 2430static bool 2431gen7_render_composite(struct sna *sna, 2432 uint8_t op, 2433 PicturePtr src, 2434 PicturePtr mask, 2435 PicturePtr dst, 2436 int16_t src_x, int16_t src_y, 2437 int16_t msk_x, int16_t msk_y, 2438 int16_t dst_x, int16_t dst_y, 2439 int16_t width, int16_t height, 2440 unsigned flags, 2441 struct sna_composite_op *tmp) 2442{ 2443 if (op >= ARRAY_SIZE(gen7_blend_op)) 2444 return false; 2445 2446 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2447 width, height, sna->kgem.mode, sna->kgem.ring)); 2448 2449 if (mask == NULL && 2450 try_blt(sna, dst, src, width, height) && 2451 sna_blt_composite(sna, op, 2452 src, dst, 2453 src_x, src_y, 2454 dst_x, dst_y, 2455 width, height, 2456 flags, tmp)) 2457 return true; 2458 2459 if (gen7_composite_fallback(sna, src, mask, dst)) 2460 goto fallback; 2461 2462 if (need_tiling(sna, width, height)) 2463 return sna_tiling_composite(op, src, mask, dst, 2464 src_x, src_y, 2465 msk_x, msk_y, 2466 dst_x, dst_y, 2467 width, height, 2468 tmp); 2469 2470 if (op == PictOpClear && src == sna->clear) 2471 op = PictOpSrc; 2472 tmp->op = op; 2473 if (!gen7_composite_set_target(sna, tmp, dst, 2474 dst_x, dst_y, width, height, 2475 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2476 goto fallback; 2477 2478 switch (gen7_composite_picture(sna, src, &tmp->src, 2479 src_x, src_y, 2480 width, height, 2481 dst_x, dst_y, 2482 dst->polyMode == PolyModePrecise)) { 2483 case -1: 2484 goto cleanup_dst; 2485 case 0: 2486 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2487 goto cleanup_dst; 2488 /* fall through to fixup */ 2489 case 1: 2490 /* Did we just switch rings to prepare the source? */ 2491 if (mask == NULL && 2492 prefer_blt_composite(sna, tmp) && 2493 sna_blt_composite__convert(sna, 2494 dst_x, dst_y, width, height, 2495 tmp)) 2496 return true; 2497 2498 gen7_composite_channel_convert(&tmp->src); 2499 break; 2500 } 2501 2502 tmp->is_affine = tmp->src.is_affine; 2503 tmp->has_component_alpha = false; 2504 tmp->need_magic_ca_pass = false; 2505 2506 tmp->mask.bo = NULL; 2507 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2508 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2509 2510 if (mask) { 2511 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2512 tmp->has_component_alpha = true; 2513 2514 /* Check if it's component alpha that relies on a source alpha and on 2515 * the source value. We can only get one of those into the single 2516 * source value that we get to blend with. 2517 */ 2518 if (gen7_blend_op[op].src_alpha && 2519 (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) { 2520 if (op != PictOpOver) 2521 goto cleanup_src; 2522 2523 tmp->need_magic_ca_pass = true; 2524 tmp->op = PictOpOutReverse; 2525 } 2526 } 2527 2528 if (!reuse_source(sna, 2529 src, &tmp->src, src_x, src_y, 2530 mask, &tmp->mask, msk_x, msk_y)) { 2531 switch (gen7_composite_picture(sna, mask, &tmp->mask, 2532 msk_x, msk_y, 2533 width, height, 2534 dst_x, dst_y, 2535 dst->polyMode == PolyModePrecise)) { 2536 case -1: 2537 goto cleanup_src; 2538 case 0: 2539 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2540 goto cleanup_src; 2541 /* fall through to fixup */ 2542 case 1: 2543 gen7_composite_channel_convert(&tmp->mask); 2544 break; 2545 } 2546 } 2547 2548 tmp->is_affine &= tmp->mask.is_affine; 2549 } 2550 2551 tmp->u.gen7.flags = 2552 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2553 tmp->src.repeat, 2554 tmp->mask.filter, 2555 tmp->mask.repeat), 2556 gen7_get_blend(tmp->op, 2557 tmp->has_component_alpha, 2558 tmp->dst.format), 2559 gen7_choose_composite_kernel(tmp->op, 2560 tmp->mask.bo != NULL, 2561 tmp->has_component_alpha, 2562 tmp->is_affine), 2563 gen4_choose_composite_emitter(sna, tmp)); 2564 2565 tmp->blt = gen7_render_composite_blt; 2566 tmp->box = gen7_render_composite_box; 2567 tmp->boxes = gen7_render_composite_boxes__blt; 2568 if (tmp->emit_boxes){ 2569 tmp->boxes = gen7_render_composite_boxes; 2570 tmp->thread_boxes = gen7_render_composite_boxes__thread; 2571 } 2572 tmp->done = gen7_render_composite_done; 2573 2574 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2575 if (!kgem_check_bo(&sna->kgem, 2576 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2577 NULL)) { 2578 kgem_submit(&sna->kgem); 2579 if (!kgem_check_bo(&sna->kgem, 2580 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2581 NULL)) 2582 goto cleanup_mask; 2583 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2584 } 2585 2586 gen7_align_vertex(sna, tmp); 2587 gen7_emit_composite_state(sna, tmp); 2588 return true; 2589 2590cleanup_mask: 2591 if (tmp->mask.bo) { 2592 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2593 tmp->mask.bo = NULL; 2594 } 2595cleanup_src: 2596 if (tmp->src.bo) { 2597 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2598 tmp->src.bo = NULL; 2599 } 2600cleanup_dst: 2601 if (tmp->redirect.real_bo) { 2602 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2603 tmp->redirect.real_bo = NULL; 2604 } 2605fallback: 2606 return (mask == NULL && 2607 sna_blt_composite(sna, op, 2608 src, dst, 2609 src_x, src_y, 2610 dst_x, dst_y, 2611 width, height, 2612 flags | COMPOSITE_FALLBACK, tmp)); 2613} 2614 2615#if !NO_COMPOSITE_SPANS 2616fastcall static void 2617gen7_render_composite_spans_box(struct sna *sna, 2618 const struct sna_composite_spans_op *op, 2619 const BoxRec *box, float opacity) 2620{ 2621 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2622 __FUNCTION__, 2623 op->base.src.offset[0], op->base.src.offset[1], 2624 opacity, 2625 op->base.dst.x, op->base.dst.y, 2626 box->x1, box->y1, 2627 box->x2 - box->x1, 2628 box->y2 - box->y1)); 2629 2630 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state); 2631 op->prim_emit(sna, op, box, opacity); 2632} 2633 2634static void 2635gen7_render_composite_spans_boxes(struct sna *sna, 2636 const struct sna_composite_spans_op *op, 2637 const BoxRec *box, int nbox, 2638 float opacity) 2639{ 2640 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2641 __FUNCTION__, nbox, 2642 op->base.src.offset[0], op->base.src.offset[1], 2643 opacity, 2644 op->base.dst.x, op->base.dst.y)); 2645 2646 do { 2647 int nbox_this_time; 2648 2649 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2650 gen7_emit_composite_state); 2651 nbox -= nbox_this_time; 2652 2653 do { 2654 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2655 box->x1, box->y1, 2656 box->x2 - box->x1, 2657 box->y2 - box->y1)); 2658 2659 op->prim_emit(sna, op, box++, opacity); 2660 } while (--nbox_this_time); 2661 } while (nbox); 2662} 2663 2664fastcall static void 2665gen7_render_composite_spans_boxes__thread(struct sna *sna, 2666 const struct sna_composite_spans_op *op, 2667 const struct sna_opacity_box *box, 2668 int nbox) 2669{ 2670 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2671 __FUNCTION__, nbox, 2672 op->base.src.offset[0], op->base.src.offset[1], 2673 op->base.dst.x, op->base.dst.y)); 2674 2675 sna_vertex_lock(&sna->render); 2676 do { 2677 int nbox_this_time; 2678 float *v; 2679 2680 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2681 gen7_emit_composite_state); 2682 assert(nbox_this_time); 2683 nbox -= nbox_this_time; 2684 2685 v = sna->render.vertices + sna->render.vertex_used; 2686 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2687 2688 sna_vertex_acquire__locked(&sna->render); 2689 sna_vertex_unlock(&sna->render); 2690 2691 op->emit_boxes(op, box, nbox_this_time, v); 2692 box += nbox_this_time; 2693 2694 sna_vertex_lock(&sna->render); 2695 sna_vertex_release__locked(&sna->render); 2696 } while (nbox); 2697 sna_vertex_unlock(&sna->render); 2698} 2699 2700fastcall static void 2701gen7_render_composite_spans_done(struct sna *sna, 2702 const struct sna_composite_spans_op *op) 2703{ 2704 if (sna->render.vertex_offset) 2705 gen4_vertex_flush(sna); 2706 2707 DBG(("%s()\n", __FUNCTION__)); 2708 2709 if (op->base.src.bo) 2710 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2711 2712 sna_render_composite_redirect_done(sna, &op->base); 2713} 2714 2715static bool 2716gen7_check_composite_spans(struct sna *sna, 2717 uint8_t op, PicturePtr src, PicturePtr dst, 2718 int16_t width, int16_t height, unsigned flags) 2719{ 2720 if (op >= ARRAY_SIZE(gen7_blend_op)) 2721 return false; 2722 2723 if (gen7_composite_fallback(sna, src, NULL, dst)) 2724 return false; 2725 2726 if (need_tiling(sna, width, height) && 2727 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2728 DBG(("%s: fallback, tiled operation not on GPU\n", 2729 __FUNCTION__)); 2730 return false; 2731 } 2732 2733 return true; 2734} 2735 2736static bool 2737gen7_render_composite_spans(struct sna *sna, 2738 uint8_t op, 2739 PicturePtr src, 2740 PicturePtr dst, 2741 int16_t src_x, int16_t src_y, 2742 int16_t dst_x, int16_t dst_y, 2743 int16_t width, int16_t height, 2744 unsigned flags, 2745 struct sna_composite_spans_op *tmp) 2746{ 2747 DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__, 2748 width, height, flags, sna->kgem.mode, sna->kgem.ring)); 2749 2750 assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags)); 2751 2752 if (need_tiling(sna, width, height)) { 2753 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2754 __FUNCTION__, width, height)); 2755 return sna_tiling_composite_spans(op, src, dst, 2756 src_x, src_y, dst_x, dst_y, 2757 width, height, flags, tmp); 2758 } 2759 2760 tmp->base.op = op; 2761 if (!gen7_composite_set_target(sna, &tmp->base, dst, 2762 dst_x, dst_y, width, height, true)) 2763 return false; 2764 2765 switch (gen7_composite_picture(sna, src, &tmp->base.src, 2766 src_x, src_y, 2767 width, height, 2768 dst_x, dst_y, 2769 dst->polyMode == PolyModePrecise)) { 2770 case -1: 2771 goto cleanup_dst; 2772 case 0: 2773 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2774 goto cleanup_dst; 2775 /* fall through to fixup */ 2776 case 1: 2777 gen7_composite_channel_convert(&tmp->base.src); 2778 break; 2779 } 2780 tmp->base.mask.bo = NULL; 2781 2782 tmp->base.is_affine = tmp->base.src.is_affine; 2783 tmp->base.need_magic_ca_pass = false; 2784 2785 tmp->base.u.gen7.flags = 2786 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2787 tmp->base.src.repeat, 2788 SAMPLER_FILTER_NEAREST, 2789 SAMPLER_EXTEND_PAD), 2790 gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), 2791 GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2792 gen4_choose_spans_emitter(sna, tmp)); 2793 2794 tmp->box = gen7_render_composite_spans_box; 2795 tmp->boxes = gen7_render_composite_spans_boxes; 2796 if (tmp->emit_boxes) 2797 tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; 2798 tmp->done = gen7_render_composite_spans_done; 2799 2800 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2801 if (!kgem_check_bo(&sna->kgem, 2802 tmp->base.dst.bo, tmp->base.src.bo, 2803 NULL)) { 2804 kgem_submit(&sna->kgem); 2805 if (!kgem_check_bo(&sna->kgem, 2806 tmp->base.dst.bo, tmp->base.src.bo, 2807 NULL)) 2808 goto cleanup_src; 2809 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2810 } 2811 2812 gen7_align_vertex(sna, &tmp->base); 2813 gen7_emit_composite_state(sna, &tmp->base); 2814 return true; 2815 2816cleanup_src: 2817 if (tmp->base.src.bo) 2818 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2819cleanup_dst: 2820 if (tmp->base.redirect.real_bo) 2821 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2822 return false; 2823} 2824#endif 2825 2826static void 2827gen7_emit_copy_state(struct sna *sna, 2828 const struct sna_composite_op *op) 2829{ 2830 uint32_t *binding_table; 2831 uint16_t offset, dirty; 2832 2833 gen7_get_batch(sna, op); 2834 2835 binding_table = gen7_composite_get_binding_table(sna, &offset); 2836 2837 dirty = kgem_bo_is_dirty(op->dst.bo); 2838 2839 binding_table[0] = 2840 gen7_bind_bo(sna, 2841 op->dst.bo, op->dst.width, op->dst.height, 2842 gen7_get_dest_format(op->dst.format), 2843 true); 2844 binding_table[1] = 2845 gen7_bind_bo(sna, 2846 op->src.bo, op->src.width, op->src.height, 2847 op->src.card_format, 2848 false); 2849 2850 if (sna->kgem.surface == offset && 2851 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 2852 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 2853 offset = sna->render_state.gen7.surface_table; 2854 } 2855 2856 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 2857 dirty = 0; 2858 2859 assert(!GEN7_READS_DST(op->u.gen7.flags)); 2860 gen7_emit_state(sna, op, offset | dirty); 2861} 2862 2863static inline bool 2864prefer_blt_copy(struct sna *sna, 2865 struct kgem_bo *src_bo, 2866 struct kgem_bo *dst_bo, 2867 unsigned flags) 2868{ 2869 if (sna->kgem.mode == KGEM_BLT) 2870 return true; 2871 2872 assert((flags & COPY_SYNC) == 0); 2873 2874 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2875 return true; 2876 2877 if (untiled_tlb_miss(src_bo) || 2878 untiled_tlb_miss(dst_bo)) 2879 return true; 2880 2881 if (force_blt_ring(sna)) 2882 return true; 2883 2884 if (kgem_bo_is_render(dst_bo) || 2885 kgem_bo_is_render(src_bo)) 2886 return false; 2887 2888 if (prefer_render_ring(sna, dst_bo)) 2889 return false; 2890 2891 if (!prefer_blt_ring(sna, dst_bo, flags)) 2892 return false; 2893 2894 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2895} 2896 2897static bool 2898gen7_render_copy_boxes(struct sna *sna, uint8_t alu, 2899 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2900 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2901 const BoxRec *box, int n, unsigned flags) 2902{ 2903 struct sna_composite_op tmp; 2904 BoxRec extents; 2905 2906 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2907 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2908 src_bo == dst_bo, 2909 overlaps(sna, 2910 src_bo, src_dx, src_dy, 2911 dst_bo, dst_dx, dst_dy, 2912 box, n, flags, &extents))); 2913 2914 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2915 sna_blt_compare_depth(src, dst) && 2916 sna_blt_copy_boxes(sna, alu, 2917 src_bo, src_dx, src_dy, 2918 dst_bo, dst_dx, dst_dy, 2919 dst->bitsPerPixel, 2920 box, n)) 2921 return true; 2922 2923 if (!(alu == GXcopy || alu == GXclear)) { 2924fallback_blt: 2925 DBG(("%s: fallback blt\n", __FUNCTION__)); 2926 if (!sna_blt_compare_depth(src, dst)) 2927 return false; 2928 2929 return sna_blt_copy_boxes_fallback(sna, alu, 2930 src, src_bo, src_dx, src_dy, 2931 dst, dst_bo, dst_dx, dst_dy, 2932 box, n); 2933 } 2934 2935 if (overlaps(sna, 2936 src_bo, src_dx, src_dy, 2937 dst_bo, dst_dx, dst_dy, 2938 box, n, flags, 2939 &extents)) { 2940 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2941 2942 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2943 sna_blt_copy_boxes(sna, alu, 2944 src_bo, src_dx, src_dy, 2945 dst_bo, dst_dx, dst_dy, 2946 dst->bitsPerPixel, 2947 box, n)) 2948 return true; 2949 2950 if (big) 2951 goto fallback_blt; 2952 2953 assert(src_bo == dst_bo); 2954 assert(src->depth == dst->depth); 2955 assert(src->width == dst->width); 2956 assert(src->height == dst->height); 2957 return sna_render_copy_boxes__overlap(sna, alu, 2958 src, src_bo, 2959 src_dx, src_dy, 2960 dst_dx, dst_dy, 2961 box, n, &extents); 2962 } 2963 2964 if (dst->depth == src->depth) { 2965 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2966 tmp.src.pict_format = tmp.dst.format; 2967 } else { 2968 tmp.dst.format = sna_format_for_depth(dst->depth); 2969 tmp.src.pict_format = sna_format_for_depth(src->depth); 2970 } 2971 if (!gen7_check_format(tmp.src.pict_format)) 2972 goto fallback_blt; 2973 2974 tmp.dst.pixmap = (PixmapPtr)dst; 2975 tmp.dst.width = dst->width; 2976 tmp.dst.height = dst->height; 2977 tmp.dst.bo = dst_bo; 2978 tmp.dst.x = tmp.dst.y = 0; 2979 tmp.damage = NULL; 2980 2981 sna_render_composite_redirect_init(&tmp); 2982 if (too_large(tmp.dst.width, tmp.dst.height)) { 2983 int i; 2984 2985 extents = box[0]; 2986 for (i = 1; i < n; i++) { 2987 if (box[i].x1 < extents.x1) 2988 extents.x1 = box[i].x1; 2989 if (box[i].y1 < extents.y1) 2990 extents.y1 = box[i].y1; 2991 2992 if (box[i].x2 > extents.x2) 2993 extents.x2 = box[i].x2; 2994 if (box[i].y2 > extents.y2) 2995 extents.y2 = box[i].y2; 2996 } 2997 2998 if (!sna_render_composite_redirect(sna, &tmp, 2999 extents.x1 + dst_dx, 3000 extents.y1 + dst_dy, 3001 extents.x2 - extents.x1, 3002 extents.y2 - extents.y1, 3003 n > 1)) 3004 goto fallback_tiled; 3005 } 3006 3007 tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format); 3008 if (too_large(src->width, src->height)) { 3009 int i; 3010 3011 extents = box[0]; 3012 for (i = 1; i < n; i++) { 3013 if (box[i].x1 < extents.x1) 3014 extents.x1 = box[i].x1; 3015 if (box[i].y1 < extents.y1) 3016 extents.y1 = box[i].y1; 3017 3018 if (box[i].x2 > extents.x2) 3019 extents.x2 = box[i].x2; 3020 if (box[i].y2 > extents.y2) 3021 extents.y2 = box[i].y2; 3022 } 3023 3024 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3025 extents.x1 + src_dx, 3026 extents.y1 + src_dy, 3027 extents.x2 - extents.x1, 3028 extents.y2 - extents.y1)) 3029 goto fallback_tiled_dst; 3030 } else { 3031 tmp.src.bo = src_bo; 3032 tmp.src.width = src->width; 3033 tmp.src.height = src->height; 3034 tmp.src.offset[0] = tmp.src.offset[1] = 0; 3035 } 3036 3037 tmp.mask.bo = NULL; 3038 3039 tmp.floats_per_vertex = 2; 3040 tmp.floats_per_rect = 6; 3041 tmp.need_magic_ca_pass = 0; 3042 3043 tmp.u.gen7.flags = COPY_FLAGS(alu); 3044 3045 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3046 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3047 kgem_submit(&sna->kgem); 3048 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3049 if (tmp.src.bo != src_bo) 3050 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3051 if (tmp.redirect.real_bo) 3052 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3053 goto fallback_blt; 3054 } 3055 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3056 } 3057 3058 src_dx += tmp.src.offset[0]; 3059 src_dy += tmp.src.offset[1]; 3060 3061 dst_dx += tmp.dst.x; 3062 dst_dy += tmp.dst.y; 3063 3064 tmp.dst.x = tmp.dst.y = 0; 3065 3066 gen7_align_vertex(sna, &tmp); 3067 gen7_emit_copy_state(sna, &tmp); 3068 3069 do { 3070 int16_t *v; 3071 int n_this_time; 3072 3073 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3074 gen7_emit_copy_state); 3075 n -= n_this_time; 3076 3077 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3078 sna->render.vertex_used += 6 * n_this_time; 3079 assert(sna->render.vertex_used <= sna->render.vertex_size); 3080 do { 3081 3082 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3083 box->x1 + src_dx, box->y1 + src_dy, 3084 box->x1 + dst_dx, box->y1 + dst_dy, 3085 box->x2 - box->x1, box->y2 - box->y1)); 3086 v[0] = box->x2 + dst_dx; 3087 v[2] = box->x2 + src_dx; 3088 v[1] = v[5] = box->y2 + dst_dy; 3089 v[3] = v[7] = box->y2 + src_dy; 3090 v[8] = v[4] = box->x1 + dst_dx; 3091 v[10] = v[6] = box->x1 + src_dx; 3092 v[9] = box->y1 + dst_dy; 3093 v[11] = box->y1 + src_dy; 3094 v += 12; box++; 3095 } while (--n_this_time); 3096 } while (n); 3097 3098 gen4_vertex_flush(sna); 3099 sna_render_composite_redirect_done(sna, &tmp); 3100 if (tmp.src.bo != src_bo) 3101 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3102 return true; 3103 3104fallback_tiled_dst: 3105 if (tmp.redirect.real_bo) 3106 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3107fallback_tiled: 3108 DBG(("%s: fallback tiled\n", __FUNCTION__)); 3109 if (sna_blt_compare_depth(src, dst) && 3110 sna_blt_copy_boxes(sna, alu, 3111 src_bo, src_dx, src_dy, 3112 dst_bo, dst_dx, dst_dy, 3113 dst->bitsPerPixel, 3114 box, n)) 3115 return true; 3116 3117 return sna_tiling_copy_boxes(sna, alu, 3118 src, src_bo, src_dx, src_dy, 3119 dst, dst_bo, dst_dx, dst_dy, 3120 box, n); 3121} 3122 3123static void 3124gen7_render_copy_blt(struct sna *sna, 3125 const struct sna_copy_op *op, 3126 int16_t sx, int16_t sy, 3127 int16_t w, int16_t h, 3128 int16_t dx, int16_t dy) 3129{ 3130 int16_t *v; 3131 3132 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state); 3133 3134 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3135 sna->render.vertex_used += 6; 3136 assert(sna->render.vertex_used <= sna->render.vertex_size); 3137 3138 v[0] = dx+w; v[1] = dy+h; 3139 v[2] = sx+w; v[3] = sy+h; 3140 v[4] = dx; v[5] = dy+h; 3141 v[6] = sx; v[7] = sy+h; 3142 v[8] = dx; v[9] = dy; 3143 v[10] = sx; v[11] = sy; 3144} 3145 3146static void 3147gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3148{ 3149 if (sna->render.vertex_offset) 3150 gen4_vertex_flush(sna); 3151} 3152 3153static bool 3154gen7_render_copy(struct sna *sna, uint8_t alu, 3155 PixmapPtr src, struct kgem_bo *src_bo, 3156 PixmapPtr dst, struct kgem_bo *dst_bo, 3157 struct sna_copy_op *op) 3158{ 3159 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3160 __FUNCTION__, alu, 3161 src->drawable.width, src->drawable.height, 3162 dst->drawable.width, dst->drawable.height)); 3163 3164 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3165 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3166 sna_blt_copy(sna, alu, 3167 src_bo, dst_bo, 3168 dst->drawable.bitsPerPixel, 3169 op)) 3170 return true; 3171 3172 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3173 too_large(src->drawable.width, src->drawable.height) || 3174 too_large(dst->drawable.width, dst->drawable.height)) { 3175fallback: 3176 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3177 return false; 3178 3179 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3180 dst->drawable.bitsPerPixel, 3181 op); 3182 } 3183 3184 if (dst->drawable.depth == src->drawable.depth) { 3185 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3186 op->base.src.pict_format = op->base.dst.format; 3187 } else { 3188 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3189 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3190 } 3191 if (!gen7_check_format(op->base.src.pict_format)) 3192 goto fallback; 3193 3194 op->base.dst.pixmap = dst; 3195 op->base.dst.width = dst->drawable.width; 3196 op->base.dst.height = dst->drawable.height; 3197 op->base.dst.bo = dst_bo; 3198 3199 op->base.src.bo = src_bo; 3200 op->base.src.card_format = 3201 gen7_get_card_format(op->base.src.pict_format); 3202 op->base.src.width = src->drawable.width; 3203 op->base.src.height = src->drawable.height; 3204 3205 op->base.mask.bo = NULL; 3206 3207 op->base.floats_per_vertex = 2; 3208 op->base.floats_per_rect = 6; 3209 3210 op->base.u.gen7.flags = COPY_FLAGS(alu); 3211 3212 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3213 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3214 kgem_submit(&sna->kgem); 3215 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3216 goto fallback; 3217 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3218 } 3219 3220 gen7_align_vertex(sna, &op->base); 3221 gen7_emit_copy_state(sna, &op->base); 3222 3223 op->blt = gen7_render_copy_blt; 3224 op->done = gen7_render_copy_done; 3225 return true; 3226} 3227 3228static void 3229gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3230{ 3231 uint16_t dirty; 3232 uint32_t *binding_table; 3233 uint16_t offset; 3234 3235 /* XXX Render Target Fast Clear 3236 * Set RTFC Enable in PS and render a rectangle. 3237 * Limited to a clearing the full MSC surface only with a 3238 * specific kernel. 3239 */ 3240 3241 gen7_get_batch(sna, op); 3242 3243 binding_table = gen7_composite_get_binding_table(sna, &offset); 3244 3245 dirty = kgem_bo_is_dirty(op->dst.bo); 3246 3247 binding_table[0] = 3248 gen7_bind_bo(sna, 3249 op->dst.bo, op->dst.width, op->dst.height, 3250 gen7_get_dest_format(op->dst.format), 3251 true); 3252 binding_table[1] = 3253 gen7_bind_bo(sna, 3254 op->src.bo, 1, 1, 3255 GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 3256 false); 3257 3258 if (sna->kgem.surface == offset && 3259 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 3260 sna->kgem.surface += 3261 sizeof(struct gen7_surface_state)/sizeof(uint32_t); 3262 offset = sna->render_state.gen7.surface_table; 3263 } 3264 3265 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 3266 dirty = 0; 3267 3268 gen7_emit_state(sna, op, offset | dirty); 3269} 3270 3271static bool 3272gen7_render_fill_boxes(struct sna *sna, 3273 CARD8 op, 3274 PictFormat format, 3275 const xRenderColor *color, 3276 const DrawableRec *dst, struct kgem_bo *dst_bo, 3277 const BoxRec *box, int n) 3278{ 3279 struct sna_composite_op tmp; 3280 uint32_t pixel; 3281 3282 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3283 __FUNCTION__, op, 3284 color->red, color->green, color->blue, color->alpha, (int)format)); 3285 3286 if (op >= ARRAY_SIZE(gen7_blend_op)) { 3287 DBG(("%s: fallback due to unhandled blend op: %d\n", 3288 __FUNCTION__, op)); 3289 return false; 3290 } 3291 3292 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3293 !gen7_check_dst_format(format)) { 3294 uint8_t alu = GXinvalid; 3295 3296 if (op <= PictOpSrc) { 3297 pixel = 0; 3298 if (op == PictOpClear) 3299 alu = GXclear; 3300 else if (sna_get_pixel_from_rgba(&pixel, 3301 color->red, 3302 color->green, 3303 color->blue, 3304 color->alpha, 3305 format)) 3306 alu = GXcopy; 3307 } 3308 3309 if (alu != GXinvalid && 3310 sna_blt_fill_boxes(sna, alu, 3311 dst_bo, dst->bitsPerPixel, 3312 pixel, box, n)) 3313 return true; 3314 3315 if (!gen7_check_dst_format(format)) 3316 return false; 3317 } 3318 3319 if (op == PictOpClear) { 3320 pixel = 0; 3321 op = PictOpSrc; 3322 } else if (!sna_get_pixel_from_rgba(&pixel, 3323 color->red, 3324 color->green, 3325 color->blue, 3326 color->alpha, 3327 PICT_a8r8g8b8)) 3328 return false; 3329 3330 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3331 __FUNCTION__, pixel, n, 3332 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3333 3334 tmp.dst.pixmap = (PixmapPtr)dst; 3335 tmp.dst.width = dst->width; 3336 tmp.dst.height = dst->height; 3337 tmp.dst.format = format; 3338 tmp.dst.bo = dst_bo; 3339 tmp.dst.x = tmp.dst.y = 0; 3340 tmp.damage = NULL; 3341 3342 sna_render_composite_redirect_init(&tmp); 3343 if (too_large(dst->width, dst->height)) { 3344 BoxRec extents; 3345 3346 boxes_extents(box, n, &extents); 3347 if (!sna_render_composite_redirect(sna, &tmp, 3348 extents.x1, extents.y1, 3349 extents.x2 - extents.x1, 3350 extents.y2 - extents.y1, 3351 n > 1)) 3352 return sna_tiling_fill_boxes(sna, op, format, color, 3353 dst, dst_bo, box, n); 3354 } 3355 3356 tmp.src.bo = sna_render_get_solid(sna, pixel); 3357 tmp.mask.bo = NULL; 3358 3359 tmp.floats_per_vertex = 2; 3360 tmp.floats_per_rect = 6; 3361 tmp.need_magic_ca_pass = false; 3362 3363 tmp.u.gen7.flags = FILL_FLAGS(op, format); 3364 3365 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3366 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3367 kgem_submit(&sna->kgem); 3368 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3369 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3370 if (tmp.redirect.real_bo) 3371 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3372 3373 return false; 3374 } 3375 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3376 } 3377 3378 gen7_align_vertex(sna, &tmp); 3379 gen7_emit_fill_state(sna, &tmp); 3380 3381 do { 3382 int n_this_time; 3383 int16_t *v; 3384 3385 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3386 gen7_emit_fill_state); 3387 n -= n_this_time; 3388 3389 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3390 sna->render.vertex_used += 6 * n_this_time; 3391 assert(sna->render.vertex_used <= sna->render.vertex_size); 3392 do { 3393 DBG((" (%d, %d), (%d, %d)\n", 3394 box->x1, box->y1, box->x2, box->y2)); 3395 3396 v[0] = box->x2; 3397 v[5] = v[1] = box->y2; 3398 v[8] = v[4] = box->x1; 3399 v[9] = box->y1; 3400 v[2] = v[3] = v[7] = 1; 3401 v[6] = v[10] = v[11] = 0; 3402 v += 12; box++; 3403 } while (--n_this_time); 3404 } while (n); 3405 3406 gen4_vertex_flush(sna); 3407 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3408 sna_render_composite_redirect_done(sna, &tmp); 3409 return true; 3410} 3411 3412static void 3413gen7_render_fill_op_blt(struct sna *sna, 3414 const struct sna_fill_op *op, 3415 int16_t x, int16_t y, int16_t w, int16_t h) 3416{ 3417 int16_t *v; 3418 3419 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3420 3421 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3422 3423 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3424 sna->render.vertex_used += 6; 3425 assert(sna->render.vertex_used <= sna->render.vertex_size); 3426 3427 v[0] = x+w; 3428 v[4] = v[8] = x; 3429 v[1] = v[5] = y+h; 3430 v[9] = y; 3431 3432 v[2] = v[3] = v[7] = 1; 3433 v[6] = v[10] = v[11] = 0; 3434} 3435 3436fastcall static void 3437gen7_render_fill_op_box(struct sna *sna, 3438 const struct sna_fill_op *op, 3439 const BoxRec *box) 3440{ 3441 int16_t *v; 3442 3443 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3444 box->x1, box->y1, box->x2, box->y2)); 3445 3446 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3447 3448 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3449 sna->render.vertex_used += 6; 3450 assert(sna->render.vertex_used <= sna->render.vertex_size); 3451 3452 v[0] = box->x2; 3453 v[8] = v[4] = box->x1; 3454 v[5] = v[1] = box->y2; 3455 v[9] = box->y1; 3456 3457 v[7] = v[2] = v[3] = 1; 3458 v[6] = v[10] = v[11] = 0; 3459} 3460 3461fastcall static void 3462gen7_render_fill_op_boxes(struct sna *sna, 3463 const struct sna_fill_op *op, 3464 const BoxRec *box, 3465 int nbox) 3466{ 3467 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3468 box->x1, box->y1, box->x2, box->y2, nbox)); 3469 3470 do { 3471 int nbox_this_time; 3472 int16_t *v; 3473 3474 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 3475 gen7_emit_fill_state); 3476 nbox -= nbox_this_time; 3477 3478 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3479 sna->render.vertex_used += 6 * nbox_this_time; 3480 assert(sna->render.vertex_used <= sna->render.vertex_size); 3481 3482 do { 3483 v[0] = box->x2; 3484 v[8] = v[4] = box->x1; 3485 v[5] = v[1] = box->y2; 3486 v[9] = box->y1; 3487 v[7] = v[2] = v[3] = 1; 3488 v[6] = v[10] = v[11] = 0; 3489 box++; v += 12; 3490 } while (--nbox_this_time); 3491 } while (nbox); 3492} 3493 3494static void 3495gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3496{ 3497 if (sna->render.vertex_offset) 3498 gen4_vertex_flush(sna); 3499 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3500} 3501 3502static bool 3503gen7_render_fill(struct sna *sna, uint8_t alu, 3504 PixmapPtr dst, struct kgem_bo *dst_bo, 3505 uint32_t color, unsigned flags, 3506 struct sna_fill_op *op) 3507{ 3508 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3509 3510 if (prefer_blt_fill(sna, dst_bo, flags) && 3511 sna_blt_fill(sna, alu, 3512 dst_bo, dst->drawable.bitsPerPixel, 3513 color, 3514 op)) 3515 return true; 3516 3517 if (!(alu == GXcopy || alu == GXclear) || 3518 too_large(dst->drawable.width, dst->drawable.height)) 3519 return sna_blt_fill(sna, alu, 3520 dst_bo, dst->drawable.bitsPerPixel, 3521 color, 3522 op); 3523 3524 if (alu == GXclear) 3525 color = 0; 3526 3527 op->base.dst.pixmap = dst; 3528 op->base.dst.width = dst->drawable.width; 3529 op->base.dst.height = dst->drawable.height; 3530 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3531 op->base.dst.bo = dst_bo; 3532 op->base.dst.x = op->base.dst.y = 0; 3533 3534 op->base.src.bo = 3535 sna_render_get_solid(sna, 3536 sna_rgba_for_color(color, 3537 dst->drawable.depth)); 3538 op->base.mask.bo = NULL; 3539 3540 op->base.need_magic_ca_pass = false; 3541 op->base.floats_per_vertex = 2; 3542 op->base.floats_per_rect = 6; 3543 3544 op->base.u.gen7.flags = FILL_FLAGS_NOBLEND; 3545 3546 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3547 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3548 kgem_submit(&sna->kgem); 3549 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3550 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3551 return false; 3552 } 3553 3554 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3555 } 3556 3557 gen7_align_vertex(sna, &op->base); 3558 gen7_emit_fill_state(sna, &op->base); 3559 3560 op->blt = gen7_render_fill_op_blt; 3561 op->box = gen7_render_fill_op_box; 3562 op->boxes = gen7_render_fill_op_boxes; 3563 op->points = NULL; 3564 op->done = gen7_render_fill_op_done; 3565 return true; 3566} 3567 3568static bool 3569gen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3570 uint32_t color, 3571 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3572 uint8_t alu) 3573{ 3574 BoxRec box; 3575 3576 box.x1 = x1; 3577 box.y1 = y1; 3578 box.x2 = x2; 3579 box.y2 = y2; 3580 3581 return sna_blt_fill_boxes(sna, alu, 3582 bo, dst->drawable.bitsPerPixel, 3583 color, &box, 1); 3584} 3585 3586static bool 3587gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3588 uint32_t color, 3589 int16_t x1, int16_t y1, 3590 int16_t x2, int16_t y2, 3591 uint8_t alu) 3592{ 3593 struct sna_composite_op tmp; 3594 int16_t *v; 3595 3596 /* Prefer to use the BLT if already engaged */ 3597 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3598 gen7_render_fill_one_try_blt(sna, dst, bo, color, 3599 x1, y1, x2, y2, alu)) 3600 return true; 3601 3602 /* Must use the BLT if we can't RENDER... */ 3603 if (!(alu == GXcopy || alu == GXclear) || 3604 too_large(dst->drawable.width, dst->drawable.height)) 3605 return gen7_render_fill_one_try_blt(sna, dst, bo, color, 3606 x1, y1, x2, y2, alu); 3607 3608 if (alu == GXclear) 3609 color = 0; 3610 3611 tmp.dst.pixmap = dst; 3612 tmp.dst.width = dst->drawable.width; 3613 tmp.dst.height = dst->drawable.height; 3614 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3615 tmp.dst.bo = bo; 3616 tmp.dst.x = tmp.dst.y = 0; 3617 3618 tmp.src.bo = 3619 sna_render_get_solid(sna, 3620 sna_rgba_for_color(color, 3621 dst->drawable.depth)); 3622 tmp.mask.bo = NULL; 3623 3624 tmp.floats_per_vertex = 2; 3625 tmp.floats_per_rect = 6; 3626 tmp.need_magic_ca_pass = false; 3627 3628 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3629 3630 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3631 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3632 kgem_submit(&sna->kgem); 3633 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3634 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3635 return false; 3636 } 3637 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3638 } 3639 3640 gen7_align_vertex(sna, &tmp); 3641 gen7_emit_fill_state(sna, &tmp); 3642 3643 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3644 3645 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3646 3647 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3648 sna->render.vertex_used += 6; 3649 assert(sna->render.vertex_used <= sna->render.vertex_size); 3650 3651 v[0] = x2; 3652 v[8] = v[4] = x1; 3653 v[5] = v[1] = y2; 3654 v[9] = y1; 3655 v[7] = v[2] = v[3] = 1; 3656 v[6] = v[10] = v[11] = 0; 3657 3658 gen4_vertex_flush(sna); 3659 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3660 3661 return true; 3662} 3663 3664static bool 3665gen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3666{ 3667 BoxRec box; 3668 3669 box.x1 = 0; 3670 box.y1 = 0; 3671 box.x2 = dst->drawable.width; 3672 box.y2 = dst->drawable.height; 3673 3674 return sna_blt_fill_boxes(sna, GXclear, 3675 bo, dst->drawable.bitsPerPixel, 3676 0, &box, 1); 3677} 3678 3679static bool 3680gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3681{ 3682 struct sna_composite_op tmp; 3683 int16_t *v; 3684 3685 DBG(("%s: %dx%d\n", 3686 __FUNCTION__, 3687 dst->drawable.width, 3688 dst->drawable.height)); 3689 3690 /* Prefer to use the BLT if already engaged */ 3691 if (sna->kgem.mode == KGEM_BLT && 3692 gen7_render_clear_try_blt(sna, dst, bo)) 3693 return true; 3694 3695 /* Must use the BLT if we can't RENDER... */ 3696 if (too_large(dst->drawable.width, dst->drawable.height)) 3697 return gen7_render_clear_try_blt(sna, dst, bo); 3698 3699 tmp.dst.pixmap = dst; 3700 tmp.dst.width = dst->drawable.width; 3701 tmp.dst.height = dst->drawable.height; 3702 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3703 tmp.dst.bo = bo; 3704 tmp.dst.x = tmp.dst.y = 0; 3705 3706 tmp.src.bo = sna_render_get_solid(sna, 0); 3707 tmp.mask.bo = NULL; 3708 3709 tmp.floats_per_vertex = 2; 3710 tmp.floats_per_rect = 6; 3711 tmp.need_magic_ca_pass = false; 3712 3713 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3714 3715 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3716 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3717 kgem_submit(&sna->kgem); 3718 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3719 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3720 return false; 3721 } 3722 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3723 } 3724 3725 gen7_align_vertex(sna, &tmp); 3726 gen7_emit_fill_state(sna, &tmp); 3727 3728 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3729 3730 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3731 sna->render.vertex_used += 6; 3732 assert(sna->render.vertex_used <= sna->render.vertex_size); 3733 3734 v[0] = dst->drawable.width; 3735 v[5] = v[1] = dst->drawable.height; 3736 v[8] = v[4] = 0; 3737 v[9] = 0; 3738 3739 v[7] = v[2] = v[3] = 1; 3740 v[6] = v[10] = v[11] = 0; 3741 3742 gen4_vertex_flush(sna); 3743 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3744 3745 return true; 3746} 3747static void gen7_render_reset(struct sna *sna) 3748{ 3749 sna->render_state.gen7.emit_flush = false; 3750 sna->render_state.gen7.needs_invariant = true; 3751 sna->render_state.gen7.ve_id = 3 << 2; 3752 sna->render_state.gen7.last_primitive = -1; 3753 3754 sna->render_state.gen7.num_sf_outputs = 0; 3755 sna->render_state.gen7.samplers = -1; 3756 sna->render_state.gen7.blend = -1; 3757 sna->render_state.gen7.kernel = -1; 3758 sna->render_state.gen7.drawrect_offset = -1; 3759 sna->render_state.gen7.drawrect_limit = -1; 3760 sna->render_state.gen7.surface_table = 0; 3761 3762 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3763 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3764 discard_vbo(sna); 3765 } 3766 3767 sna->render.vertex_offset = 0; 3768 sna->render.nvertex_reloc = 0; 3769 sna->render.vb_id = 0; 3770} 3771 3772static void gen7_render_fini(struct sna *sna) 3773{ 3774 kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); 3775} 3776 3777static bool is_gt3(struct sna *sna, int devid) 3778{ 3779 assert(sna->kgem.gen == 075); 3780 return devid & 0x20; 3781} 3782 3783static bool is_gt2(struct sna *sna, int devid) 3784{ 3785 return devid & (is_hsw(sna)? 0x30 : 0x20); 3786} 3787 3788static bool is_mobile(struct sna *sna, int devid) 3789{ 3790 return (devid & 0xf) == 0x6; 3791} 3792 3793static bool gen7_render_setup(struct sna *sna, int devid) 3794{ 3795 struct gen7_render_state *state = &sna->render_state.gen7; 3796 struct sna_static_stream general; 3797 struct gen7_sampler_state *ss; 3798 int i, j, k, l, m; 3799 3800 if (is_ivb(sna)) { 3801 state->info = &ivb_gt_info; 3802 if (devid & 0xf) { 3803 state->info = &ivb_gt1_info; 3804 if (is_gt2(sna, devid)) 3805 state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ 3806 } 3807 } else if (is_byt(sna)) { 3808 state->info = &byt_gt_info; 3809 } else if (is_hsw(sna)) { 3810 state->info = &hsw_gt_info; 3811 if (devid & 0xf) { 3812 if (is_gt3(sna, devid)) 3813 state->info = &hsw_gt3_info; 3814 else if (is_gt2(sna, devid)) 3815 state->info = &hsw_gt2_info; 3816 else 3817 state->info = &hsw_gt1_info; 3818 } 3819 } else 3820 return false; 3821 3822 state->gt = state->info->gt; 3823 3824 sna_static_stream_init(&general); 3825 3826 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3827 * dumps, you know it points to zero. 3828 */ 3829 null_create(&general); 3830 3831 for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) { 3832 if (wm_kernels[m].size) { 3833 state->wm_kernel[m][1] = 3834 sna_static_stream_add(&general, 3835 wm_kernels[m].data, 3836 wm_kernels[m].size, 3837 64); 3838 } else { 3839 if (USE_8_PIXEL_DISPATCH) { 3840 state->wm_kernel[m][0] = 3841 sna_static_stream_compile_wm(sna, &general, 3842 wm_kernels[m].data, 8); 3843 } 3844 3845 if (USE_16_PIXEL_DISPATCH) { 3846 state->wm_kernel[m][1] = 3847 sna_static_stream_compile_wm(sna, &general, 3848 wm_kernels[m].data, 16); 3849 } 3850 3851 if (USE_32_PIXEL_DISPATCH) { 3852 state->wm_kernel[m][2] = 3853 sna_static_stream_compile_wm(sna, &general, 3854 wm_kernels[m].data, 32); 3855 } 3856 } 3857 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3858 } 3859 3860 ss = sna_static_stream_map(&general, 3861 2 * sizeof(*ss) * 3862 (2 + 3863 FILTER_COUNT * EXTEND_COUNT * 3864 FILTER_COUNT * EXTEND_COUNT), 3865 32); 3866 state->wm_state = sna_static_stream_offsetof(&general, ss); 3867 sampler_copy_init(ss); ss += 2; 3868 sampler_fill_init(ss); ss += 2; 3869 for (i = 0; i < FILTER_COUNT; i++) { 3870 for (j = 0; j < EXTEND_COUNT; j++) { 3871 for (k = 0; k < FILTER_COUNT; k++) { 3872 for (l = 0; l < EXTEND_COUNT; l++) { 3873 sampler_state_init(ss++, i, j); 3874 sampler_state_init(ss++, k, l); 3875 } 3876 } 3877 } 3878 } 3879 3880 state->cc_blend = gen7_composite_create_blend_state(&general); 3881 3882 state->general_bo = sna_static_stream_fini(sna, &general); 3883 return state->general_bo != NULL; 3884} 3885 3886const char *gen7_render_init(struct sna *sna, const char *backend) 3887{ 3888 int devid = intel_get_device_id(sna->scrn); 3889 3890 if (!gen7_render_setup(sna, devid)) 3891 return backend; 3892 3893 sna->kgem.context_switch = gen6_render_context_switch; 3894 sna->kgem.retire = gen6_render_retire; 3895 sna->kgem.expire = gen4_render_expire; 3896 3897#if !NO_COMPOSITE 3898 sna->render.composite = gen7_render_composite; 3899 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3900#endif 3901#if !NO_COMPOSITE_SPANS 3902 sna->render.check_composite_spans = gen7_check_composite_spans; 3903 sna->render.composite_spans = gen7_render_composite_spans; 3904 if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna)) 3905 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3906#endif 3907 sna->render.video = gen7_render_video; 3908 3909#if !NO_COPY_BOXES 3910 sna->render.copy_boxes = gen7_render_copy_boxes; 3911#endif 3912#if !NO_COPY 3913 sna->render.copy = gen7_render_copy; 3914#endif 3915 3916#if !NO_FILL_BOXES 3917 sna->render.fill_boxes = gen7_render_fill_boxes; 3918#endif 3919#if !NO_FILL 3920 sna->render.fill = gen7_render_fill; 3921#endif 3922#if !NO_FILL_ONE 3923 sna->render.fill_one = gen7_render_fill_one; 3924#endif 3925#if !NO_FILL_CLEAR 3926 sna->render.clear = gen7_render_clear; 3927#endif 3928 3929 sna->render.flush = gen4_render_flush; 3930 sna->render.reset = gen7_render_reset; 3931 sna->render.fini = gen7_render_fini; 3932 3933 sna->render.max_3d_size = GEN7_MAX_SIZE; 3934 sna->render.max_3d_pitch = 1 << 18; 3935 return sna->render_state.gen7.info->name; 3936} 3937