gen7_render.c revision 13496ba1
1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen7_render.h" 45#include "gen4_common.h" 46#include "gen4_source.h" 47#include "gen4_vertex.h" 48#include "gen6_common.h" 49 50#define ALWAYS_INVALIDATE 0 51#define ALWAYS_FLUSH 0 52#define ALWAYS_STALL 0 53 54#define NO_COMPOSITE 0 55#define NO_COMPOSITE_SPANS 0 56#define NO_COPY 0 57#define NO_COPY_BOXES 0 58#define NO_FILL 0 59#define NO_FILL_BOXES 0 60#define NO_FILL_ONE 0 61#define NO_FILL_CLEAR 0 62 63#define NO_RING_SWITCH 0 64 65#define USE_8_PIXEL_DISPATCH 1 66#define USE_16_PIXEL_DISPATCH 1 67#define USE_32_PIXEL_DISPATCH 0 68 69#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 70#error "Must select at least 8, 16 or 32 pixel dispatch" 71#endif 72 73#define GEN7_MAX_SIZE 16384 74 75/* XXX Todo 76 * 77 * STR (software tiled rendering) mode. No, really. 78 * 64x32 pixel blocks align with the rendering cache. Worth considering. 79 */ 80 81#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 82 83struct gt_info { 84 const char *name; 85 uint32_t max_vs_threads; 86 uint32_t max_gs_threads; 87 uint32_t max_wm_threads; 88 struct { 89 int size; 90 int max_vs_entries; 91 int max_gs_entries; 92 int push_ps_size; /* in 1KBs */ 93 } urb; 94 int gt; 95 uint32_t mocs; 96}; 97 98static const struct gt_info ivb_gt_info = { 99 .name = "Ivybridge (gen7)", 100 .max_vs_threads = 16, 101 .max_gs_threads = 16, 102 .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, 103 .urb = { 128, 64, 64, 8 }, 104 .gt = 0, 105}; 106 107static const struct gt_info ivb_gt1_info = { 108 .name = "Ivybridge (gen7, gt1)", 109 .max_vs_threads = 36, 110 .max_gs_threads = 36, 111 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 112 .urb = { 128, 512, 192, 8 }, 113 .gt = 1, 114 .mocs = 3, 115}; 116 117static const struct gt_info ivb_gt2_info = { 118 .name = "Ivybridge (gen7, gt2)", 119 .max_vs_threads = 128, 120 .max_gs_threads = 128, 121 .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, 122 .urb = { 256, 704, 320, 8 }, 123 .gt = 2, 124 .mocs = 3, 125}; 126 127static const struct gt_info byt_gt_info = { 128 .name = "Baytrail (gen7)", 129 .max_vs_threads = 36, 130 .max_gs_threads = 36, 131 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 132 .urb = { 128, 512, 192, 8 }, 133 .gt = 1, 134}; 135 136static const struct gt_info hsw_gt_info = { 137 .name = "Haswell (gen7.5)", 138 .max_vs_threads = 8, 139 .max_gs_threads = 8, 140 .max_wm_threads = 141 (8 - 1) << HSW_PS_MAX_THREADS_SHIFT | 142 1 << HSW_PS_SAMPLE_MASK_SHIFT, 143 .urb = { 128, 64, 64, 8 }, 144 .gt = 0, 145}; 146 147static const struct gt_info hsw_gt1_info = { 148 .name = "Haswell (gen7.5, gt1)", 149 .max_vs_threads = 70, 150 .max_gs_threads = 70, 151 .max_wm_threads = 152 (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | 153 1 << HSW_PS_SAMPLE_MASK_SHIFT, 154 .urb = { 128, 640, 256, 8 }, 155 .gt = 1, 156 .mocs = 5, 157}; 158 159static const struct gt_info hsw_gt2_info = { 160 .name = "Haswell (gen7.5, gt2)", 161 .max_vs_threads = 140, 162 .max_gs_threads = 140, 163 .max_wm_threads = 164 (140 - 1) << HSW_PS_MAX_THREADS_SHIFT | 165 1 << HSW_PS_SAMPLE_MASK_SHIFT, 166 .urb = { 256, 1664, 640, 8 }, 167 .gt = 2, 168 .mocs = 5, 169}; 170 171static const struct gt_info hsw_gt3_info = { 172 .name = "Haswell (gen7.5, gt3)", 173 .max_vs_threads = 280, 174 .max_gs_threads = 280, 175 .max_wm_threads = 176 (280 - 1) << HSW_PS_MAX_THREADS_SHIFT | 177 1 << HSW_PS_SAMPLE_MASK_SHIFT, 178 .urb = { 512, 3328, 1280, 16 }, 179 .gt = 3, 180 .mocs = 5, 181}; 182 183inline static bool is_ivb(struct sna *sna) 184{ 185 return sna->kgem.gen == 070; 186} 187 188inline static bool is_byt(struct sna *sna) 189{ 190 return sna->kgem.gen == 071; 191} 192 193inline static bool is_hsw(struct sna *sna) 194{ 195 return sna->kgem.gen == 075; 196} 197 198static const uint32_t ps_kernel_packed[][4] = { 199#include "exa_wm_src_affine.g7b" 200#include "exa_wm_src_sample_argb.g7b" 201#include "exa_wm_yuv_rgb.g7b" 202#include "exa_wm_write.g7b" 203}; 204 205static const uint32_t ps_kernel_planar[][4] = { 206#include "exa_wm_src_affine.g7b" 207#include "exa_wm_src_sample_planar.g7b" 208#include "exa_wm_yuv_rgb.g7b" 209#include "exa_wm_write.g7b" 210}; 211 212#define KERNEL(kernel_enum, kernel, num_surfaces) \ 213 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 214#define NOKERNEL(kernel_enum, func, num_surfaces) \ 215 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 216static const struct wm_kernel_info { 217 const char *name; 218 const void *data; 219 unsigned int size; 220 int num_surfaces; 221} wm_kernels[] = { 222 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 223 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 224 225 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 226 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 227 228 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 229 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 230 231 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 232 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 233 234 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 235 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 236 237 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 238 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 239}; 240#undef KERNEL 241 242static const struct blendinfo { 243 bool src_alpha; 244 uint32_t src_blend; 245 uint32_t dst_blend; 246} gen7_blend_op[] = { 247 /* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO}, 248 /* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO}, 249 /* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE}, 250 /* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 251 /* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE}, 252 /* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 253 /* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA}, 254 /* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 255 /* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 256 /* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 257 /* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA}, 258 /* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 259 /* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE}, 260}; 261 262/** 263 * Highest-valued BLENDFACTOR used in gen7_blend_op. 264 * 265 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR, 266 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 267 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 268 */ 269#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1) 270 271#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64) 272 273#define BLEND_OFFSET(s, d) \ 274 ((d != GEN7_BLENDFACTOR_ZERO) << 15 | \ 275 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE)) 276 277#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO) 278#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO) 279 280#define SAMPLER_OFFSET(sf, se, mf, me) \ 281 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state)) 282 283#define VERTEX_2s2s 0 284 285#define COPY_SAMPLER 0 286#define COPY_VERTEX VERTEX_2s2s 287#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX) 288 289#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state)) 290#define FILL_VERTEX VERTEX_2s2s 291#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 292#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 293 294#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0) 295#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0) 296#define GEN7_READS_DST(f) (((f) >> 15) & 1) 297#define GEN7_KERNEL(f) (((f) >> 16) & 0xf) 298#define GEN7_VERTEX(f) (((f) >> 0) & 0xf) 299#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 300 301#define OUT_BATCH(v) batch_emit(sna, v) 302#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 303#define OUT_VERTEX_F(v) vertex_emit(sna, v) 304 305static inline bool too_large(int width, int height) 306{ 307 return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; 308} 309 310static uint32_t gen7_get_blend(int op, 311 bool has_component_alpha, 312 uint32_t dst_format) 313{ 314 uint32_t src, dst; 315 316 src = gen7_blend_op[op].src_blend; 317 dst = gen7_blend_op[op].dst_blend; 318 319 /* If there's no dst alpha channel, adjust the blend op so that 320 * we'll treat it always as 1. 321 */ 322 if (PICT_FORMAT_A(dst_format) == 0) { 323 if (src == GEN7_BLENDFACTOR_DST_ALPHA) 324 src = GEN7_BLENDFACTOR_ONE; 325 else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA) 326 src = GEN7_BLENDFACTOR_ZERO; 327 } 328 329 /* If the source alpha is being used, then we should only be in a 330 * case where the source blend factor is 0, and the source blend 331 * value is the mask channels multiplied by the source picture's alpha. 332 */ 333 if (has_component_alpha && gen7_blend_op[op].src_alpha) { 334 if (dst == GEN7_BLENDFACTOR_SRC_ALPHA) 335 dst = GEN7_BLENDFACTOR_SRC_COLOR; 336 else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA) 337 dst = GEN7_BLENDFACTOR_INV_SRC_COLOR; 338 } 339 340 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 341 op, dst_format, PICT_FORMAT_A(dst_format), 342 src, dst, (int)BLEND_OFFSET(src, dst))); 343 return BLEND_OFFSET(src, dst); 344} 345 346static uint32_t gen7_get_card_format(PictFormat format) 347{ 348 switch (format) { 349 default: 350 return -1; 351 case PICT_a8r8g8b8: 352 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 353 case PICT_x8r8g8b8: 354 return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; 355 case PICT_a8b8g8r8: 356 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 357 case PICT_x8b8g8r8: 358 return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; 359#ifdef PICT_a2r10g10b10 360 case PICT_a2r10g10b10: 361 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 362 case PICT_x2r10g10b10: 363 return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; 364#endif 365 case PICT_r8g8b8: 366 return GEN7_SURFACEFORMAT_R8G8B8_UNORM; 367 case PICT_r5g6b5: 368 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 369 case PICT_a1r5g5b5: 370 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 371 case PICT_a8: 372 return GEN7_SURFACEFORMAT_A8_UNORM; 373 case PICT_a4r4g4b4: 374 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 375 } 376} 377 378static uint32_t gen7_get_dest_format(PictFormat format) 379{ 380 switch (format) { 381 default: 382 return -1; 383 case PICT_a8r8g8b8: 384 case PICT_x8r8g8b8: 385 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 386 case PICT_a8b8g8r8: 387 case PICT_x8b8g8r8: 388 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 389#ifdef PICT_a2r10g10b10 390 case PICT_a2r10g10b10: 391 case PICT_x2r10g10b10: 392 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 393#endif 394 case PICT_r5g6b5: 395 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 396 case PICT_x1r5g5b5: 397 case PICT_a1r5g5b5: 398 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 399 case PICT_a8: 400 return GEN7_SURFACEFORMAT_A8_UNORM; 401 case PICT_a4r4g4b4: 402 case PICT_x4r4g4b4: 403 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 404 } 405} 406 407static bool gen7_check_dst_format(PictFormat format) 408{ 409 if (gen7_get_dest_format(format) != -1) 410 return true; 411 412 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 413 return false; 414} 415 416static bool gen7_check_format(uint32_t format) 417{ 418 if (gen7_get_card_format(format) != -1) 419 return true; 420 421 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 422 return false; 423} 424 425static uint32_t gen7_filter(uint32_t filter) 426{ 427 switch (filter) { 428 default: 429 assert(0); 430 case PictFilterNearest: 431 return SAMPLER_FILTER_NEAREST; 432 case PictFilterBilinear: 433 return SAMPLER_FILTER_BILINEAR; 434 } 435} 436 437static uint32_t gen7_check_filter(PicturePtr picture) 438{ 439 switch (picture->filter) { 440 case PictFilterNearest: 441 case PictFilterBilinear: 442 return true; 443 default: 444 return false; 445 } 446} 447 448static uint32_t gen7_repeat(uint32_t repeat) 449{ 450 switch (repeat) { 451 default: 452 assert(0); 453 case RepeatNone: 454 return SAMPLER_EXTEND_NONE; 455 case RepeatNormal: 456 return SAMPLER_EXTEND_REPEAT; 457 case RepeatPad: 458 return SAMPLER_EXTEND_PAD; 459 case RepeatReflect: 460 return SAMPLER_EXTEND_REFLECT; 461 } 462} 463 464static bool gen7_check_repeat(PicturePtr picture) 465{ 466 if (!picture->repeat) 467 return true; 468 469 switch (picture->repeatType) { 470 case RepeatNone: 471 case RepeatNormal: 472 case RepeatPad: 473 case RepeatReflect: 474 return true; 475 default: 476 return false; 477 } 478} 479 480static int 481gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 482{ 483 int base; 484 485 if (has_mask) { 486 if (is_ca) { 487 if (gen7_blend_op[op].src_alpha) 488 base = GEN7_WM_KERNEL_MASKSA; 489 else 490 base = GEN7_WM_KERNEL_MASKCA; 491 } else 492 base = GEN7_WM_KERNEL_MASK; 493 } else 494 base = GEN7_WM_KERNEL_NOMASK; 495 496 return base + !is_affine; 497} 498 499static void 500gen7_emit_urb(struct sna *sna) 501{ 502 OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 503 OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); 504 505 /* num of VS entries must be divisible by 8 if size < 9 */ 506 OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); 507 OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | 508 (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | 509 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 510 511 OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); 512 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 513 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 514 515 OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); 516 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 517 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 518 519 OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); 520 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 521 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 522} 523 524static void 525gen7_emit_state_base_address(struct sna *sna) 526{ 527 uint32_t mocs = sna->render_state.gen7.info->mocs << 8; 528 529 OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); 530 OUT_BATCH(0); /* general */ 531 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 532 sna->kgem.nbatch, 533 NULL, 534 I915_GEM_DOMAIN_INSTRUCTION << 16, 535 BASE_ADDRESS_MODIFY)); 536 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ 537 sna->kgem.nbatch, 538 sna->render_state.gen7.general_bo, 539 I915_GEM_DOMAIN_INSTRUCTION << 16, 540 mocs | BASE_ADDRESS_MODIFY)); 541 OUT_BATCH(0); /* indirect */ 542 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 543 sna->kgem.nbatch, 544 sna->render_state.gen7.general_bo, 545 I915_GEM_DOMAIN_INSTRUCTION << 16, 546 mocs | BASE_ADDRESS_MODIFY)); 547 548 /* upper bounds, disable */ 549 OUT_BATCH(0); 550 OUT_BATCH(BASE_ADDRESS_MODIFY); 551 OUT_BATCH(0); 552 OUT_BATCH(BASE_ADDRESS_MODIFY); 553} 554 555static void 556gen7_disable_vs(struct sna *sna) 557{ 558 /* For future reference: 559 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs 560 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS, 561 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, 562 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS. 563 * 564 * Here we saved by the full-flush incurred when emitting 565 * the batchbuffer. 566 */ 567 OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2)); 568 OUT_BATCH(0); /* no VS kernel */ 569 OUT_BATCH(0); 570 OUT_BATCH(0); 571 OUT_BATCH(0); 572 OUT_BATCH(0); /* pass-through */ 573 574#if 0 575 OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2)); 576 OUT_BATCH(0); 577 OUT_BATCH(0); 578 OUT_BATCH(0); 579 OUT_BATCH(0); 580 OUT_BATCH(0); 581 OUT_BATCH(0); 582 583 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 584 OUT_BATCH(0); 585 586 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 587 OUT_BATCH(0); 588#endif 589} 590 591static void 592gen7_disable_hs(struct sna *sna) 593{ 594 OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); 595 OUT_BATCH(0); /* no HS kernel */ 596 OUT_BATCH(0); 597 OUT_BATCH(0); 598 OUT_BATCH(0); 599 OUT_BATCH(0); 600 OUT_BATCH(0); /* pass-through */ 601 602#if 0 603 OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); 604 OUT_BATCH(0); 605 OUT_BATCH(0); 606 OUT_BATCH(0); 607 OUT_BATCH(0); 608 OUT_BATCH(0); 609 OUT_BATCH(0); 610 611 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 612 OUT_BATCH(0); 613 614 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 615 OUT_BATCH(0); 616#endif 617} 618 619static void 620gen7_disable_te(struct sna *sna) 621{ 622 OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); 623 OUT_BATCH(0); 624 OUT_BATCH(0); 625 OUT_BATCH(0); 626} 627 628static void 629gen7_disable_ds(struct sna *sna) 630{ 631 OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); 632 OUT_BATCH(0); 633 OUT_BATCH(0); 634 OUT_BATCH(0); 635 OUT_BATCH(0); 636 OUT_BATCH(0); 637 638#if 0 639 OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); 640 OUT_BATCH(0); 641 OUT_BATCH(0); 642 OUT_BATCH(0); 643 OUT_BATCH(0); 644 OUT_BATCH(0); 645 OUT_BATCH(0); 646 647 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 648 OUT_BATCH(0); 649 650 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 651 OUT_BATCH(0); 652#endif 653} 654 655static void 656gen7_disable_gs(struct sna *sna) 657{ 658 OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2)); 659 OUT_BATCH(0); /* no GS kernel */ 660 OUT_BATCH(0); 661 OUT_BATCH(0); 662 OUT_BATCH(0); 663 OUT_BATCH(0); 664 OUT_BATCH(0); /* pass-through */ 665 666#if 0 667 OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2)); 668 OUT_BATCH(0); 669 OUT_BATCH(0); 670 OUT_BATCH(0); 671 OUT_BATCH(0); 672 OUT_BATCH(0); 673 OUT_BATCH(0); 674 675 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 676 OUT_BATCH(0); 677 678 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 679 OUT_BATCH(0); 680#endif 681} 682 683static void 684gen7_disable_streamout(struct sna *sna) 685{ 686 OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); 687 OUT_BATCH(0); 688 OUT_BATCH(0); 689} 690 691static void 692gen7_emit_sf_invariant(struct sna *sna) 693{ 694 OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2)); 695 OUT_BATCH(0); 696 OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE); 697 OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); 698 OUT_BATCH(0); 699 OUT_BATCH(0); 700 OUT_BATCH(0); 701} 702 703static void 704gen7_emit_cc_invariant(struct sna *sna) 705{ 706#if 0 /* unused, no change */ 707 OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 708 OUT_BATCH(0); 709 710 OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); 711 OUT_BATCH(0); 712#endif 713 714 /* XXX clear to be safe */ 715 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 716 OUT_BATCH(0); 717} 718 719static void 720gen7_disable_clip(struct sna *sna) 721{ 722 OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2)); 723 OUT_BATCH(0); 724 OUT_BATCH(0); /* pass-through */ 725 OUT_BATCH(0); 726 727 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); 728 OUT_BATCH(0); 729} 730 731static void 732gen7_emit_wm_invariant(struct sna *sna) 733{ 734 OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2)); 735 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 736 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 737 OUT_BATCH(0); 738 739#if 0 740 /* XXX length bias of 7 in old spec? */ 741 OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2)); 742 OUT_BATCH(0); 743 OUT_BATCH(0); 744 OUT_BATCH(0); 745 OUT_BATCH(0); 746 OUT_BATCH(0); 747 OUT_BATCH(0); 748#endif 749} 750 751static void 752gen7_emit_null_depth_buffer(struct sna *sna) 753{ 754 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); 755 OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 756 GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 757 OUT_BATCH(0); /* disable depth, stencil and hiz */ 758 OUT_BATCH(0); 759 OUT_BATCH(0); 760 OUT_BATCH(0); 761 OUT_BATCH(0); 762 763#if 0 764 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); 765 OUT_BATCH(0); 766 OUT_BATCH(0); 767#endif 768} 769 770static void 771gen7_emit_invariant(struct sna *sna) 772{ 773 OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); 774 775 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2)); 776 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 777 GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 778 OUT_BATCH(0); 779 OUT_BATCH(0); 780 781 OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2)); 782 OUT_BATCH(1); 783 784 gen7_emit_urb(sna); 785 786 gen7_emit_state_base_address(sna); 787 788 gen7_disable_vs(sna); 789 gen7_disable_hs(sna); 790 gen7_disable_te(sna); 791 gen7_disable_ds(sna); 792 gen7_disable_gs(sna); 793 gen7_disable_clip(sna); 794 gen7_emit_sf_invariant(sna); 795 gen7_emit_wm_invariant(sna); 796 gen7_emit_cc_invariant(sna); 797 gen7_disable_streamout(sna); 798 gen7_emit_null_depth_buffer(sna); 799 800 sna->render_state.gen7.needs_invariant = false; 801} 802 803static void 804gen7_emit_cc(struct sna *sna, uint32_t blend_offset) 805{ 806 struct gen7_render_state *render = &sna->render_state.gen7; 807 808 if (render->blend == blend_offset) 809 return; 810 811 DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); 812 813 /* XXX can have upto 8 blend states preload, selectable via 814 * Render Target Index. What other side-effects of Render Target Index? 815 */ 816 817 assert (is_aligned(render->cc_blend + blend_offset, 64)); 818 OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 819 OUT_BATCH((render->cc_blend + blend_offset) | 1); 820 821 render->blend = blend_offset; 822} 823 824static void 825gen7_emit_sampler(struct sna *sna, uint32_t state) 826{ 827 if (sna->render_state.gen7.samplers == state) 828 return; 829 830 sna->render_state.gen7.samplers = state; 831 832 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 833 834 assert (is_aligned(sna->render_state.gen7.wm_state + state, 32)); 835 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 836 OUT_BATCH(sna->render_state.gen7.wm_state + state); 837} 838 839static void 840gen7_emit_sf(struct sna *sna, bool has_mask) 841{ 842 int num_sf_outputs = has_mask ? 2 : 1; 843 844 if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs) 845 return; 846 847 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 848 __FUNCTION__, num_sf_outputs, 1, 0)); 849 850 sna->render_state.gen7.num_sf_outputs = num_sf_outputs; 851 852 OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); 853 OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT | 854 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | 855 1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); 856 OUT_BATCH(0); 857 OUT_BATCH(0); /* dw4 */ 858 OUT_BATCH(0); 859 OUT_BATCH(0); 860 OUT_BATCH(0); 861 OUT_BATCH(0); /* dw8 */ 862 OUT_BATCH(0); 863 OUT_BATCH(0); 864 OUT_BATCH(0); 865 OUT_BATCH(0); /* dw12 */ 866 OUT_BATCH(0); 867 OUT_BATCH(0); 868} 869 870static void 871gen7_emit_wm(struct sna *sna, int kernel) 872{ 873 const uint32_t *kernels; 874 875 if (sna->render_state.gen7.kernel == kernel) 876 return; 877 878 sna->render_state.gen7.kernel = kernel; 879 kernels = sna->render_state.gen7.wm_kernel[kernel]; 880 881 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 882 __FUNCTION__, 883 wm_kernels[kernel].name, 884 wm_kernels[kernel].num_surfaces, 885 kernels[0], kernels[1], kernels[2])); 886 887 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 888 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 889 OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | 890 wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 891 OUT_BATCH(0); /* scratch address */ 892 OUT_BATCH(sna->render_state.gen7.info->max_wm_threads | 893 (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) | 894 (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) | 895 (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) | 896 GEN7_PS_ATTRIBUTE_ENABLE); 897 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | 898 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | 899 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); 900 OUT_BATCH(kernels[2]); 901 OUT_BATCH(kernels[1]); 902} 903 904static bool 905gen7_emit_binding_table(struct sna *sna, uint16_t offset) 906{ 907 if (sna->render_state.gen7.surface_table == offset) 908 return false; 909 910 /* Binding table pointers */ 911 assert(is_aligned(4*offset, 32)); 912 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 913 OUT_BATCH(offset*4); 914 915 sna->render_state.gen7.surface_table = offset; 916 return true; 917} 918 919static bool 920gen7_emit_drawing_rectangle(struct sna *sna, 921 const struct sna_composite_op *op) 922{ 923 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 924 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 925 926 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 927 assert(!too_large(op->dst.width, op->dst.height)); 928 929 if (sna->render_state.gen7.drawrect_limit == limit && 930 sna->render_state.gen7.drawrect_offset == offset) 931 return true; 932 933 sna->render_state.gen7.drawrect_offset = offset; 934 sna->render_state.gen7.drawrect_limit = limit; 935 936 OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 937 OUT_BATCH(0); 938 OUT_BATCH(limit); 939 OUT_BATCH(offset); 940 return false; 941} 942 943static void 944gen7_emit_vertex_elements(struct sna *sna, 945 const struct sna_composite_op *op) 946{ 947 /* 948 * vertex data in vertex buffer 949 * position: (x, y) 950 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 951 * texture coordinate 1 if (has_mask is true): same as above 952 */ 953 struct gen7_render_state *render = &sna->render_state.gen7; 954 uint32_t src_format, dw; 955 int id = GEN7_VERTEX(op->u.gen7.flags); 956 bool has_mask; 957 958 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 959 960 if (render->ve_id == id) 961 return; 962 render->ve_id = id; 963 964 /* The VUE layout 965 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 966 * dword 4-7: position (x, y, 1.0, 1.0), 967 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 968 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 969 * 970 * dword 4-15 are fetched from vertex buffer 971 */ 972 has_mask = (id >> 2) != 0; 973 OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | 974 ((2 * (3 + has_mask)) + 1 - 2)); 975 976 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 977 GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 978 0 << GEN7_VE0_OFFSET_SHIFT); 979 OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | 980 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | 981 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 982 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); 983 984 /* x,y */ 985 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 986 GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 987 0 << GEN7_VE0_OFFSET_SHIFT); 988 OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | 989 GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | 990 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 991 GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); 992 993 /* u0, v0, w0 */ 994 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 995 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 996 switch (id & 3) { 997 default: 998 assert(0); 999 case 0: 1000 src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED; 1001 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1002 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1003 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1004 break; 1005 case 1: 1006 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1007 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1008 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1009 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1010 break; 1011 case 2: 1012 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1013 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1014 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1015 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1016 break; 1017 case 3: 1018 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1019 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1020 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1021 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1022 break; 1023 } 1024 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1025 src_format << GEN7_VE0_FORMAT_SHIFT | 1026 4 << GEN7_VE0_OFFSET_SHIFT); 1027 OUT_BATCH(dw); 1028 1029 /* u1, v1, w1 */ 1030 if (has_mask) { 1031 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1032 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 1033 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 1034 switch (id >> 2) { 1035 case 1: 1036 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1037 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1038 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1039 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1040 break; 1041 default: 1042 assert(0); 1043 case 2: 1044 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1045 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1046 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1047 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1048 break; 1049 case 3: 1050 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1051 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1052 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1053 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1054 break; 1055 } 1056 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1057 src_format << GEN7_VE0_FORMAT_SHIFT | 1058 offset << GEN7_VE0_OFFSET_SHIFT); 1059 OUT_BATCH(dw); 1060 } 1061} 1062 1063inline static void 1064gen7_emit_pipe_invalidate(struct sna *sna) 1065{ 1066 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1067 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | 1068 GEN7_PIPE_CONTROL_TC_FLUSH | 1069 GEN7_PIPE_CONTROL_CS_STALL); 1070 OUT_BATCH(0); 1071 OUT_BATCH(0); 1072 sna->render_state.gen7.pipe_controls_since_stall = 0; 1073} 1074 1075inline static void 1076gen7_emit_pipe_flush(struct sna *sna, bool need_stall) 1077{ 1078 unsigned stall; 1079 1080 stall = 0; 1081 if (need_stall) { 1082 stall = GEN7_PIPE_CONTROL_CS_STALL; 1083 sna->render_state.gen7.pipe_controls_since_stall = 0; 1084 } else 1085 sna->render_state.gen7.pipe_controls_since_stall++; 1086 1087 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1088 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); 1089 OUT_BATCH(0); 1090 OUT_BATCH(0); 1091} 1092 1093inline static void 1094gen7_emit_pipe_stall(struct sna *sna) 1095{ 1096 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1097 OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | 1098 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); 1099 OUT_BATCH(0); 1100 OUT_BATCH(0); 1101 sna->render_state.gen7.pipe_controls_since_stall = 0; 1102} 1103 1104static void 1105gen7_emit_state(struct sna *sna, 1106 const struct sna_composite_op *op, 1107 uint16_t wm_binding_table) 1108{ 1109 bool need_invalidate; 1110 bool need_flush; 1111 bool need_stall; 1112 1113 assert(op->dst.bo->exec); 1114 1115 need_flush = wm_binding_table & 1 || 1116 (sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags)); 1117 if (ALWAYS_FLUSH) 1118 need_flush = true; 1119 1120 wm_binding_table &= ~1; 1121 1122 need_stall = sna->render_state.gen7.surface_table != wm_binding_table; 1123 1124 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1125 if (ALWAYS_INVALIDATE) 1126 need_invalidate = true; 1127 1128 need_stall &= gen7_emit_drawing_rectangle(sna, op); 1129 if (ALWAYS_STALL) 1130 need_stall = true; 1131 if (sna->kgem.gen < 075 && 1132 sna->render_state.gen7.pipe_controls_since_stall >= 3) 1133 need_stall = true; 1134 1135 if (need_invalidate) { 1136 gen7_emit_pipe_invalidate(sna); 1137 kgem_clear_dirty(&sna->kgem); 1138 assert(op->dst.bo->exec); 1139 kgem_bo_mark_dirty(op->dst.bo); 1140 1141 need_flush = false; 1142 need_stall = false; 1143 } 1144 if (need_flush) { 1145 gen7_emit_pipe_flush(sna, need_stall); 1146 need_stall = false; 1147 } 1148 if (need_stall) 1149 gen7_emit_pipe_stall(sna); 1150 1151 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1152 gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); 1153 gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); 1154 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1155 gen7_emit_vertex_elements(sna, op); 1156 gen7_emit_binding_table(sna, wm_binding_table); 1157 1158 sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); 1159} 1160 1161static bool gen7_magic_ca_pass(struct sna *sna, 1162 const struct sna_composite_op *op) 1163{ 1164 struct gen7_render_state *state = &sna->render_state.gen7; 1165 1166 if (!op->need_magic_ca_pass) 1167 return false; 1168 1169 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1170 sna->render.vertex_start, sna->render.vertex_index)); 1171 1172 gen7_emit_pipe_stall(sna); 1173 1174 gen7_emit_cc(sna, 1175 GEN7_BLEND(gen7_get_blend(PictOpAdd, true, 1176 op->dst.format))); 1177 gen7_emit_wm(sna, 1178 gen7_choose_composite_kernel(PictOpAdd, 1179 true, true, 1180 op->is_affine)); 1181 1182 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1183 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1184 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1185 OUT_BATCH(sna->render.vertex_start); 1186 OUT_BATCH(1); /* single instance */ 1187 OUT_BATCH(0); /* start instance location */ 1188 OUT_BATCH(0); /* index buffer offset, ignored */ 1189 1190 state->last_primitive = sna->kgem.nbatch; 1191 return true; 1192} 1193 1194static void null_create(struct sna_static_stream *stream) 1195{ 1196 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1197 sna_static_stream_map(stream, 64, 64); 1198} 1199 1200static void 1201sampler_state_init(struct gen7_sampler_state *sampler_state, 1202 sampler_filter_t filter, 1203 sampler_extend_t extend) 1204{ 1205 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1206 1207 /* We use the legacy mode to get the semantics specified by 1208 * the Render extension. */ 1209 sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY; 1210 1211 switch (filter) { 1212 default: 1213 case SAMPLER_FILTER_NEAREST: 1214 sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST; 1215 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST; 1216 break; 1217 case SAMPLER_FILTER_BILINEAR: 1218 sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR; 1219 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR; 1220 break; 1221 } 1222 1223 switch (extend) { 1224 default: 1225 case SAMPLER_EXTEND_NONE: 1226 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1227 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1228 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1229 break; 1230 case SAMPLER_EXTEND_REPEAT: 1231 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1232 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1233 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1234 break; 1235 case SAMPLER_EXTEND_PAD: 1236 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1237 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1238 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1239 break; 1240 case SAMPLER_EXTEND_REFLECT: 1241 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1242 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1243 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1244 break; 1245 } 1246} 1247 1248static void 1249sampler_copy_init(struct gen7_sampler_state *ss) 1250{ 1251 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1252 ss->ss3.non_normalized_coord = 1; 1253 1254 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1255} 1256 1257static void 1258sampler_fill_init(struct gen7_sampler_state *ss) 1259{ 1260 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1261 ss->ss3.non_normalized_coord = 1; 1262 1263 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1264} 1265 1266static uint32_t 1267gen7_tiling_bits(uint32_t tiling) 1268{ 1269 switch (tiling) { 1270 default: assert(0); 1271 case I915_TILING_NONE: return 0; 1272 case I915_TILING_X: return GEN7_SURFACE_TILED; 1273 case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y; 1274 } 1275} 1276 1277/** 1278 * Sets up the common fields for a surface state buffer for the given 1279 * picture in the given surface state buffer. 1280 */ 1281static uint32_t 1282gen7_bind_bo(struct sna *sna, 1283 struct kgem_bo *bo, 1284 uint32_t width, 1285 uint32_t height, 1286 uint32_t format, 1287 bool is_dst) 1288{ 1289 uint32_t *ss; 1290 uint32_t domains; 1291 int offset; 1292 uint32_t is_scanout = is_dst && bo->scanout; 1293 1294 COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); 1295 1296 /* After the first bind, we manage the cache domains within the batch */ 1297 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1298 if (offset) { 1299 assert(offset >= sna->kgem.surface); 1300 if (is_dst) 1301 kgem_bo_mark_dirty(bo); 1302 return offset * sizeof(uint32_t); 1303 } 1304 1305 offset = sna->kgem.surface -= 1306 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1307 ss = sna->kgem.batch + offset; 1308 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1309 gen7_tiling_bits(bo->tiling) | 1310 format << GEN7_SURFACE_FORMAT_SHIFT); 1311 if (bo->tiling == I915_TILING_Y) 1312 ss[0] |= GEN7_SURFACE_VALIGN_4; 1313 if (is_dst) { 1314 ss[0] |= GEN7_SURFACE_RC_READ_WRITE; 1315 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1316 } else 1317 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1318 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1319 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1320 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1321 ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1322 ss[4] = 0; 1323 ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16; 1324 ss[6] = 0; 1325 ss[7] = 0; 1326 if (is_hsw(sna)) 1327 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1328 1329 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1330 1331 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1332 offset, bo->handle, ss[1], 1333 format, width, height, bo->pitch, bo->tiling, 1334 domains & 0xffff ? "render" : "sampler")); 1335 1336 return offset * sizeof(uint32_t); 1337} 1338 1339static void gen7_emit_vertex_buffer(struct sna *sna, 1340 const struct sna_composite_op *op) 1341{ 1342 int id = GEN7_VERTEX(op->u.gen7.flags); 1343 1344 OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1345 OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT | 1346 GEN7_VB0_VERTEXDATA | 1347 GEN7_VB0_ADDRESS_MODIFY_ENABLE | 1348 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); 1349 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1350 OUT_BATCH(0); 1351 OUT_BATCH(~0); /* max address: disabled */ 1352 OUT_BATCH(0); 1353 1354 sna->render.vb_id |= 1 << id; 1355} 1356 1357static void gen7_emit_primitive(struct sna *sna) 1358{ 1359 if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) { 1360 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1361 return; 1362 } 1363 1364 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1365 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1366 sna->render.vertex_offset = sna->kgem.nbatch; 1367 OUT_BATCH(0); /* vertex count, to be filled in later */ 1368 OUT_BATCH(sna->render.vertex_index); 1369 OUT_BATCH(1); /* single instance */ 1370 OUT_BATCH(0); /* start instance location */ 1371 OUT_BATCH(0); /* index buffer offset, ignored */ 1372 sna->render.vertex_start = sna->render.vertex_index; 1373 1374 sna->render_state.gen7.last_primitive = sna->kgem.nbatch; 1375} 1376 1377static bool gen7_rectangle_begin(struct sna *sna, 1378 const struct sna_composite_op *op) 1379{ 1380 int id = 1 << GEN7_VERTEX(op->u.gen7.flags); 1381 int ndwords; 1382 1383 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1384 return true; 1385 1386 ndwords = op->need_magic_ca_pass ? 60 : 6; 1387 if ((sna->render.vb_id & id) == 0) 1388 ndwords += 5; 1389 if (!kgem_check_batch(&sna->kgem, ndwords)) 1390 return false; 1391 1392 if ((sna->render.vb_id & id) == 0) 1393 gen7_emit_vertex_buffer(sna, op); 1394 1395 gen7_emit_primitive(sna); 1396 return true; 1397} 1398 1399static int gen7_get_rectangles__flush(struct sna *sna, 1400 const struct sna_composite_op *op) 1401{ 1402 /* Preventing discarding new vbo after lock contention */ 1403 if (sna_vertex_wait__locked(&sna->render)) { 1404 int rem = vertex_space(sna); 1405 if (rem > op->floats_per_rect) 1406 return rem; 1407 } 1408 1409 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1410 return 0; 1411 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1412 return 0; 1413 1414 if (sna->render.vertex_offset) { 1415 gen4_vertex_flush(sna); 1416 if (gen7_magic_ca_pass(sna, op)) { 1417 gen7_emit_pipe_stall(sna); 1418 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1419 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1420 } 1421 } 1422 1423 return gen4_vertex_finish(sna); 1424} 1425 1426inline static int gen7_get_rectangles(struct sna *sna, 1427 const struct sna_composite_op *op, 1428 int want, 1429 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1430{ 1431 int rem; 1432 1433 assert(want); 1434 1435start: 1436 rem = vertex_space(sna); 1437 if (unlikely(rem < op->floats_per_rect)) { 1438 DBG(("flushing vbo for %s: %d < %d\n", 1439 __FUNCTION__, rem, op->floats_per_rect)); 1440 rem = gen7_get_rectangles__flush(sna, op); 1441 if (unlikely(rem == 0)) 1442 goto flush; 1443 } 1444 1445 if (unlikely(sna->render.vertex_offset == 0)) { 1446 if (!gen7_rectangle_begin(sna, op)) 1447 goto flush; 1448 else 1449 goto start; 1450 } 1451 1452 assert(rem <= vertex_space(sna)); 1453 assert(op->floats_per_rect <= rem); 1454 if (want > 1 && want * op->floats_per_rect > rem) 1455 want = rem / op->floats_per_rect; 1456 1457 assert(want > 0); 1458 sna->render.vertex_index += 3*want; 1459 return want; 1460 1461flush: 1462 if (sna->render.vertex_offset) { 1463 gen4_vertex_flush(sna); 1464 gen7_magic_ca_pass(sna, op); 1465 } 1466 sna_vertex_wait__locked(&sna->render); 1467 _kgem_submit(&sna->kgem); 1468 emit_state(sna, op); 1469 goto start; 1470} 1471 1472inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, 1473 uint16_t *offset) 1474{ 1475 uint32_t *table; 1476 1477 sna->kgem.surface -= 1478 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1479 /* Clear all surplus entries to zero in case of prefetch */ 1480 table = memset(sna->kgem.batch + sna->kgem.surface, 1481 0, sizeof(struct gen7_surface_state)); 1482 1483 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1484 1485 *offset = sna->kgem.surface; 1486 return table; 1487} 1488 1489static void 1490gen7_get_batch(struct sna *sna, const struct sna_composite_op *op) 1491{ 1492 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1493 1494 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1495 DBG(("%s: flushing batch: %d < %d+%d\n", 1496 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1497 150, 4*8)); 1498 _kgem_submit(&sna->kgem); 1499 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1500 } 1501 1502 assert(sna->kgem.mode == KGEM_RENDER); 1503 assert(sna->kgem.ring == KGEM_RENDER); 1504 1505 if (sna->render_state.gen7.needs_invariant) 1506 gen7_emit_invariant(sna); 1507} 1508 1509static void gen7_emit_composite_state(struct sna *sna, 1510 const struct sna_composite_op *op) 1511{ 1512 uint32_t *binding_table; 1513 uint16_t offset, dirty; 1514 1515 gen7_get_batch(sna, op); 1516 1517 binding_table = gen7_composite_get_binding_table(sna, &offset); 1518 1519 dirty = kgem_bo_is_dirty(op->dst.bo); 1520 1521 binding_table[0] = 1522 gen7_bind_bo(sna, 1523 op->dst.bo, op->dst.width, op->dst.height, 1524 gen7_get_dest_format(op->dst.format), 1525 true); 1526 binding_table[1] = 1527 gen7_bind_bo(sna, 1528 op->src.bo, op->src.width, op->src.height, 1529 op->src.card_format, 1530 false); 1531 if (op->mask.bo) { 1532 binding_table[2] = 1533 gen7_bind_bo(sna, 1534 op->mask.bo, 1535 op->mask.width, 1536 op->mask.height, 1537 op->mask.card_format, 1538 false); 1539 } 1540 1541 if (sna->kgem.surface == offset && 1542 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table && 1543 (op->mask.bo == NULL || 1544 sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) { 1545 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1546 offset = sna->render_state.gen7.surface_table; 1547 } 1548 1549 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 1550 dirty = 0; 1551 1552 gen7_emit_state(sna, op, offset | dirty); 1553} 1554 1555static void 1556gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1557{ 1558 if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { 1559 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1560 sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex)); 1561 gen4_vertex_align(sna, op); 1562 sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex; 1563 } 1564} 1565 1566fastcall static void 1567gen7_render_composite_blt(struct sna *sna, 1568 const struct sna_composite_op *op, 1569 const struct sna_composite_rectangles *r) 1570{ 1571 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1572 op->prim_emit(sna, op, r); 1573} 1574 1575fastcall static void 1576gen7_render_composite_box(struct sna *sna, 1577 const struct sna_composite_op *op, 1578 const BoxRec *box) 1579{ 1580 struct sna_composite_rectangles r; 1581 1582 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1583 1584 DBG((" %s: (%d, %d), (%d, %d)\n", 1585 __FUNCTION__, 1586 box->x1, box->y1, box->x2, box->y2)); 1587 1588 r.dst.x = box->x1; 1589 r.dst.y = box->y1; 1590 r.width = box->x2 - box->x1; 1591 r.height = box->y2 - box->y1; 1592 r.src = r.mask = r.dst; 1593 1594 op->prim_emit(sna, op, &r); 1595} 1596 1597static void 1598gen7_render_composite_boxes__blt(struct sna *sna, 1599 const struct sna_composite_op *op, 1600 const BoxRec *box, int nbox) 1601{ 1602 DBG(("composite_boxes(%d)\n", nbox)); 1603 1604 do { 1605 int nbox_this_time; 1606 1607 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1608 gen7_emit_composite_state); 1609 nbox -= nbox_this_time; 1610 1611 do { 1612 struct sna_composite_rectangles r; 1613 1614 DBG((" %s: (%d, %d), (%d, %d)\n", 1615 __FUNCTION__, 1616 box->x1, box->y1, box->x2, box->y2)); 1617 1618 r.dst.x = box->x1; 1619 r.dst.y = box->y1; 1620 r.width = box->x2 - box->x1; 1621 r.height = box->y2 - box->y1; 1622 r.src = r.mask = r.dst; 1623 1624 op->prim_emit(sna, op, &r); 1625 box++; 1626 } while (--nbox_this_time); 1627 } while (nbox); 1628} 1629 1630static void 1631gen7_render_composite_boxes(struct sna *sna, 1632 const struct sna_composite_op *op, 1633 const BoxRec *box, int nbox) 1634{ 1635 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1636 1637 do { 1638 int nbox_this_time; 1639 float *v; 1640 1641 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1642 gen7_emit_composite_state); 1643 assert(nbox_this_time); 1644 nbox -= nbox_this_time; 1645 1646 v = sna->render.vertices + sna->render.vertex_used; 1647 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1648 1649 op->emit_boxes(op, box, nbox_this_time, v); 1650 box += nbox_this_time; 1651 } while (nbox); 1652} 1653 1654static void 1655gen7_render_composite_boxes__thread(struct sna *sna, 1656 const struct sna_composite_op *op, 1657 const BoxRec *box, int nbox) 1658{ 1659 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1660 1661 sna_vertex_lock(&sna->render); 1662 do { 1663 int nbox_this_time; 1664 float *v; 1665 1666 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1667 gen7_emit_composite_state); 1668 assert(nbox_this_time); 1669 nbox -= nbox_this_time; 1670 1671 v = sna->render.vertices + sna->render.vertex_used; 1672 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1673 1674 sna_vertex_acquire__locked(&sna->render); 1675 sna_vertex_unlock(&sna->render); 1676 1677 op->emit_boxes(op, box, nbox_this_time, v); 1678 box += nbox_this_time; 1679 1680 sna_vertex_lock(&sna->render); 1681 sna_vertex_release__locked(&sna->render); 1682 } while (nbox); 1683 sna_vertex_unlock(&sna->render); 1684} 1685 1686#ifndef MAX 1687#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1688#endif 1689 1690static uint32_t 1691gen7_composite_create_blend_state(struct sna_static_stream *stream) 1692{ 1693 char *base, *ptr; 1694 int src, dst; 1695 1696 base = sna_static_stream_map(stream, 1697 GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE, 1698 64); 1699 1700 ptr = base; 1701 for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) { 1702 for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) { 1703 struct gen7_blend_state *blend = 1704 (struct gen7_blend_state *)ptr; 1705 1706 blend->blend0.dest_blend_factor = dst; 1707 blend->blend0.source_blend_factor = src; 1708 blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD; 1709 blend->blend0.blend_enable = 1710 !(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE); 1711 1712 blend->blend1.post_blend_clamp_enable = 1; 1713 blend->blend1.pre_blend_clamp_enable = 1; 1714 1715 ptr += GEN7_BLEND_STATE_PADDED_SIZE; 1716 } 1717 } 1718 1719 return sna_static_stream_offsetof(stream, base); 1720} 1721 1722static uint32_t gen7_bind_video_source(struct sna *sna, 1723 struct kgem_bo *bo, 1724 uint32_t offset, 1725 int width, 1726 int height, 1727 int pitch, 1728 uint32_t format) 1729{ 1730 uint32_t *ss, bind; 1731 1732 bind = sna->kgem.surface -= 1733 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1734 1735 assert(bo->tiling == I915_TILING_NONE); 1736 1737 ss = sna->kgem.batch + bind; 1738 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1739 format << GEN7_SURFACE_FORMAT_SHIFT); 1740 ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, 1741 I915_GEM_DOMAIN_SAMPLER << 16, 1742 offset); 1743 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1744 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1745 ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1746 ss[4] = 0; 1747 ss[5] = 0; 1748 ss[6] = 0; 1749 ss[7] = 0; 1750 if (is_hsw(sna)) 1751 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1752 1753 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", 1754 bind, bo->handle, ss[1], 1755 format, width, height, pitch, offset)); 1756 1757 return bind * sizeof(uint32_t); 1758} 1759 1760static void gen7_emit_video_state(struct sna *sna, 1761 const struct sna_composite_op *op) 1762{ 1763 struct sna_video_frame *frame = op->priv; 1764 uint32_t src_surf_format; 1765 uint32_t src_surf_base[6]; 1766 int src_width[6]; 1767 int src_height[6]; 1768 int src_pitch[6]; 1769 uint32_t *binding_table; 1770 uint16_t offset, dirty; 1771 int n_src, n; 1772 1773 gen7_get_batch(sna, op); 1774 1775 src_surf_base[0] = 0; 1776 src_surf_base[1] = 0; 1777 src_surf_base[2] = frame->VBufOffset; 1778 src_surf_base[3] = frame->VBufOffset; 1779 src_surf_base[4] = frame->UBufOffset; 1780 src_surf_base[5] = frame->UBufOffset; 1781 1782 if (is_planar_fourcc(frame->id)) { 1783 src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM; 1784 src_width[1] = src_width[0] = frame->width; 1785 src_height[1] = src_height[0] = frame->height; 1786 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1787 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1788 frame->width / 2; 1789 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1790 frame->height / 2; 1791 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1792 frame->pitch[0]; 1793 n_src = 6; 1794 } else { 1795 if (frame->id == FOURCC_UYVY) 1796 src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; 1797 else 1798 src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; 1799 1800 src_width[0] = frame->width; 1801 src_height[0] = frame->height; 1802 src_pitch[0] = frame->pitch[0]; 1803 n_src = 1; 1804 } 1805 1806 binding_table = gen7_composite_get_binding_table(sna, &offset); 1807 1808 dirty = kgem_bo_is_dirty(op->dst.bo); 1809 1810 binding_table[0] = 1811 gen7_bind_bo(sna, 1812 op->dst.bo, op->dst.width, op->dst.height, 1813 gen7_get_dest_format(op->dst.format), 1814 true); 1815 for (n = 0; n < n_src; n++) { 1816 binding_table[1+n] = 1817 gen7_bind_video_source(sna, 1818 frame->bo, 1819 src_surf_base[n], 1820 src_width[n], 1821 src_height[n], 1822 src_pitch[n], 1823 src_surf_format); 1824 } 1825 1826 gen7_emit_state(sna, op, offset | dirty); 1827} 1828 1829static bool 1830gen7_render_video(struct sna *sna, 1831 struct sna_video *video, 1832 struct sna_video_frame *frame, 1833 RegionPtr dstRegion, 1834 PixmapPtr pixmap) 1835{ 1836 struct sna_composite_op tmp; 1837 struct sna_pixmap *priv = sna_pixmap(pixmap); 1838 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1839 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1840 int src_width = frame->src.x2 - frame->src.x1; 1841 int src_height = frame->src.y2 - frame->src.y1; 1842 float src_offset_x, src_offset_y; 1843 float src_scale_x, src_scale_y; 1844 int nbox, pix_xoff, pix_yoff; 1845 unsigned filter; 1846 const BoxRec *box; 1847 1848 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1849 __FUNCTION__, 1850 src_width, src_height, dst_width, dst_height, 1851 region_num_rects(dstRegion), 1852 REGION_EXTENTS(NULL, dstRegion)->x1, 1853 REGION_EXTENTS(NULL, dstRegion)->y1, 1854 REGION_EXTENTS(NULL, dstRegion)->x2, 1855 REGION_EXTENTS(NULL, dstRegion)->y2)); 1856 1857 assert(priv->gpu_bo); 1858 memset(&tmp, 0, sizeof(tmp)); 1859 1860 tmp.dst.pixmap = pixmap; 1861 tmp.dst.width = pixmap->drawable.width; 1862 tmp.dst.height = pixmap->drawable.height; 1863 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1864 tmp.dst.bo = priv->gpu_bo; 1865 1866 tmp.src.bo = frame->bo; 1867 tmp.mask.bo = NULL; 1868 1869 tmp.floats_per_vertex = 3; 1870 tmp.floats_per_rect = 9; 1871 1872 if (src_width == dst_width && src_height == dst_height) 1873 filter = SAMPLER_FILTER_NEAREST; 1874 else 1875 filter = SAMPLER_FILTER_BILINEAR; 1876 1877 tmp.u.gen7.flags = 1878 GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1879 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1880 NO_BLEND, 1881 is_planar_fourcc(frame->id) ? 1882 GEN7_WM_KERNEL_VIDEO_PLANAR : 1883 GEN7_WM_KERNEL_VIDEO_PACKED, 1884 2); 1885 tmp.priv = frame; 1886 1887 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1888 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1889 kgem_submit(&sna->kgem); 1890 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1891 return false; 1892 1893 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1894 } 1895 1896 gen7_align_vertex(sna, &tmp); 1897 gen7_emit_video_state(sna, &tmp); 1898 1899 /* Set up the offset for translating from the given region (in screen 1900 * coordinates) to the backing pixmap. 1901 */ 1902#ifdef COMPOSITE 1903 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1904 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1905#else 1906 pix_xoff = 0; 1907 pix_yoff = 0; 1908#endif 1909 1910 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 1911 __FUNCTION__, 1912 frame->src.x1, frame->src.y1, 1913 src_width, src_height, 1914 dst_width, dst_height, 1915 frame->width, frame->height)); 1916 1917 src_scale_x = (float)src_width / dst_width / frame->width; 1918 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1919 1920 src_scale_y = (float)src_height / dst_height / frame->height; 1921 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1922 1923 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 1924 __FUNCTION__, 1925 src_scale_x, src_scale_y, 1926 src_offset_x, src_offset_y)); 1927 1928 box = region_rects(dstRegion); 1929 nbox = region_num_rects(dstRegion); 1930 while (nbox--) { 1931 BoxRec r; 1932 1933 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 1934 __FUNCTION__, 1935 box->x1, box->y1, 1936 box->x2, box->y2, 1937 pix_xoff, pix_yoff, 1938 box->x1 * src_scale_x + src_offset_x, 1939 box->y1 * src_scale_y + src_offset_y, 1940 box->x2 * src_scale_x + src_offset_x, 1941 box->y2 * src_scale_y + src_offset_y)); 1942 1943 r.x1 = box->x1 + pix_xoff; 1944 r.x2 = box->x2 + pix_xoff; 1945 r.y1 = box->y1 + pix_yoff; 1946 r.y2 = box->y2 + pix_yoff; 1947 1948 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); 1949 1950 OUT_VERTEX(r.x2, r.y2); 1951 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1952 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1953 1954 OUT_VERTEX(r.x1, r.y2); 1955 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1956 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1957 1958 OUT_VERTEX(r.x1, r.y1); 1959 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1960 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1961 1962 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1963 sna_damage_add_box(&priv->gpu_damage, &r); 1964 sna_damage_subtract_box(&priv->cpu_damage, &r); 1965 } 1966 box++; 1967 } 1968 1969 gen4_vertex_flush(sna); 1970 return true; 1971} 1972 1973static int 1974gen7_composite_picture(struct sna *sna, 1975 PicturePtr picture, 1976 struct sna_composite_channel *channel, 1977 int x, int y, 1978 int w, int h, 1979 int dst_x, int dst_y, 1980 bool precise) 1981{ 1982 PixmapPtr pixmap; 1983 uint32_t color; 1984 int16_t dx, dy; 1985 1986 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1987 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1988 1989 channel->is_solid = false; 1990 channel->card_format = -1; 1991 1992 if (sna_picture_is_solid(picture, &color)) 1993 return gen4_channel_init_solid(sna, channel, color); 1994 1995 if (picture->pDrawable == NULL) { 1996 int ret; 1997 1998 if (picture->pSourcePict->type == SourcePictTypeLinear) 1999 return gen4_channel_init_linear(sna, picture, channel, 2000 x, y, 2001 w, h, 2002 dst_x, dst_y); 2003 2004 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 2005 ret = -1; 2006 if (!precise) 2007 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 2008 x, y, w, h, dst_x, dst_y); 2009 if (ret == -1) 2010 ret = sna_render_picture_fixup(sna, picture, channel, 2011 x, y, w, h, dst_x, dst_y); 2012 return ret; 2013 } 2014 2015 if (picture->alphaMap) { 2016 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 2017 return sna_render_picture_fixup(sna, picture, channel, 2018 x, y, w, h, dst_x, dst_y); 2019 } 2020 2021 if (!gen7_check_repeat(picture)) 2022 return sna_render_picture_fixup(sna, picture, channel, 2023 x, y, w, h, dst_x, dst_y); 2024 2025 if (!gen7_check_filter(picture)) 2026 return sna_render_picture_fixup(sna, picture, channel, 2027 x, y, w, h, dst_x, dst_y); 2028 2029 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2030 channel->filter = picture->filter; 2031 2032 assert(picture->pDrawable); 2033 pixmap = get_drawable_pixmap(picture->pDrawable); 2034 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2035 2036 x += dx + picture->pDrawable->x; 2037 y += dy + picture->pDrawable->y; 2038 2039 channel->is_affine = sna_transform_is_affine(picture->transform); 2040 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2041 DBG(("%s: integer translation (%d, %d), removing\n", 2042 __FUNCTION__, dx, dy)); 2043 x += dx; 2044 y += dy; 2045 channel->transform = NULL; 2046 channel->filter = PictFilterNearest; 2047 2048 if (channel->repeat || 2049 (x >= 0 && 2050 y >= 0 && 2051 x + w < pixmap->drawable.width && 2052 y + h < pixmap->drawable.height)) { 2053 struct sna_pixmap *priv = sna_pixmap(pixmap); 2054 if (priv && priv->clear) { 2055 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2056 return gen4_channel_init_solid(sna, channel, priv->clear_color); 2057 } 2058 } 2059 } else 2060 channel->transform = picture->transform; 2061 2062 channel->pict_format = picture->format; 2063 channel->card_format = gen7_get_card_format(picture->format); 2064 if (channel->card_format == (unsigned)-1) 2065 return sna_render_picture_convert(sna, picture, channel, pixmap, 2066 x, y, w, h, dst_x, dst_y, 2067 false); 2068 2069 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2070 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2071 pixmap->drawable.width, pixmap->drawable.height)); 2072 return sna_render_picture_extract(sna, picture, channel, 2073 x, y, w, h, dst_x, dst_y); 2074 } 2075 2076 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 2077 __FUNCTION__, 2078 channel->repeat, channel->filter, 2079 channel->transform != NULL, channel->is_affine, 2080 channel->pict_format)); 2081 if (channel->transform) { 2082 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n", 2083 __FUNCTION__, 2084 channel->transform->matrix[0][0] / 65536., 2085 channel->transform->matrix[0][1] / 65536., 2086 channel->transform->matrix[0][2] / 65536., 2087 channel->transform->matrix[1][0] / 65536., 2088 channel->transform->matrix[1][1] / 65536., 2089 channel->transform->matrix[1][2] / 65536., 2090 channel->transform->matrix[2][0] / 65536., 2091 channel->transform->matrix[2][1] / 65536., 2092 channel->transform->matrix[2][2] / 65536.)); 2093 } 2094 2095 return sna_render_pixmap_bo(sna, channel, pixmap, 2096 x, y, w, h, dst_x, dst_y); 2097} 2098 2099inline static void gen7_composite_channel_convert(struct sna_composite_channel *channel) 2100{ 2101 channel->repeat = gen7_repeat(channel->repeat); 2102 channel->filter = gen7_filter(channel->filter); 2103 if (channel->card_format == (unsigned)-1) 2104 channel->card_format = gen7_get_card_format(channel->pict_format); 2105 assert(channel->card_format != (unsigned)-1); 2106} 2107 2108static void gen7_render_composite_done(struct sna *sna, 2109 const struct sna_composite_op *op) 2110{ 2111 if (sna->render.vertex_offset) { 2112 gen4_vertex_flush(sna); 2113 gen7_magic_ca_pass(sna, op); 2114 } 2115 2116 if (op->mask.bo) 2117 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2118 if (op->src.bo) 2119 kgem_bo_destroy(&sna->kgem, op->src.bo); 2120 2121 sna_render_composite_redirect_done(sna, op); 2122} 2123 2124inline static bool 2125gen7_composite_set_target(struct sna *sna, 2126 struct sna_composite_op *op, 2127 PicturePtr dst, 2128 int x, int y, int w, int h, 2129 bool partial) 2130{ 2131 BoxRec box; 2132 unsigned int hint; 2133 2134 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2135 2136 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2137 op->dst.format = dst->format; 2138 op->dst.width = op->dst.pixmap->drawable.width; 2139 op->dst.height = op->dst.pixmap->drawable.height; 2140 2141 if (w | h) { 2142 assert(w && h); 2143 box.x1 = x; 2144 box.y1 = y; 2145 box.x2 = x + w; 2146 box.y2 = y + h; 2147 } else 2148 sna_render_picture_extents(dst, &box); 2149 2150 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 2151 if (!partial) { 2152 hint |= IGNORE_DAMAGE; 2153 if (w == op->dst.width && h == op->dst.height) 2154 hint |= REPLACES; 2155 } 2156 2157 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2158 if (op->dst.bo == NULL) 2159 return false; 2160 2161 if (hint & REPLACES) { 2162 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2163 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2164 } 2165 2166 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2167 &op->dst.x, &op->dst.y); 2168 2169 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2170 __FUNCTION__, 2171 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2172 op->dst.width, op->dst.height, 2173 op->dst.bo->pitch, 2174 op->dst.x, op->dst.y, 2175 op->damage ? *op->damage : (void *)-1)); 2176 2177 assert(op->dst.bo->proxy == NULL); 2178 2179 if (too_large(op->dst.width, op->dst.height) && 2180 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2181 return false; 2182 2183 return true; 2184} 2185 2186static bool 2187try_blt(struct sna *sna, 2188 PicturePtr dst, PicturePtr src, 2189 int width, int height) 2190{ 2191 struct kgem_bo *bo; 2192 2193 if (sna->kgem.mode == KGEM_BLT) { 2194 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2195 return true; 2196 } 2197 2198 if (too_large(width, height)) { 2199 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2200 __FUNCTION__, width, height)); 2201 return true; 2202 } 2203 2204 bo = __sna_drawable_peek_bo(dst->pDrawable); 2205 if (bo == NULL) 2206 return true; 2207 if (bo->rq) 2208 return RQ_IS_BLT(bo->rq); 2209 2210 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2211 return true; 2212 2213 if (src->pDrawable) { 2214 bo = __sna_drawable_peek_bo(src->pDrawable); 2215 if (bo == NULL) 2216 return true; 2217 2218 if (prefer_blt_bo(sna, bo)) 2219 return true; 2220 } 2221 2222 if (sna->kgem.ring == KGEM_BLT) { 2223 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2224 return true; 2225 } 2226 2227 return false; 2228} 2229 2230static bool 2231check_gradient(PicturePtr picture, bool precise) 2232{ 2233 if (picture->pDrawable) 2234 return false; 2235 2236 switch (picture->pSourcePict->type) { 2237 case SourcePictTypeSolidFill: 2238 case SourcePictTypeLinear: 2239 return false; 2240 default: 2241 return precise; 2242 } 2243} 2244 2245static bool 2246has_alphamap(PicturePtr p) 2247{ 2248 return p->alphaMap != NULL; 2249} 2250 2251static bool 2252need_upload(PicturePtr p) 2253{ 2254 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2255} 2256 2257static bool 2258source_is_busy(PixmapPtr pixmap) 2259{ 2260 struct sna_pixmap *priv = sna_pixmap(pixmap); 2261 if (priv == NULL || priv->clear) 2262 return false; 2263 2264 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2265 return true; 2266 2267 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2268 return true; 2269 2270 return priv->gpu_damage && !priv->cpu_damage; 2271} 2272 2273static bool 2274source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2275{ 2276 if (sna_picture_is_solid(p, NULL)) 2277 return false; 2278 2279 if (p->pSourcePict) 2280 return check_gradient(p, precise); 2281 2282 if (!gen7_check_repeat(p) || !gen7_check_format(p->format)) 2283 return true; 2284 2285 if (pixmap && source_is_busy(pixmap)) 2286 return false; 2287 2288 return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p); 2289} 2290 2291static bool 2292gen7_composite_fallback(struct sna *sna, 2293 PicturePtr src, 2294 PicturePtr mask, 2295 PicturePtr dst) 2296{ 2297 PixmapPtr src_pixmap; 2298 PixmapPtr mask_pixmap; 2299 PixmapPtr dst_pixmap; 2300 bool src_fallback, mask_fallback; 2301 2302 if (!gen7_check_dst_format(dst->format)) { 2303 DBG(("%s: unknown destination format: %d\n", 2304 __FUNCTION__, dst->format)); 2305 return true; 2306 } 2307 2308 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2309 2310 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2311 src_fallback = source_fallback(src, src_pixmap, 2312 dst->polyMode == PolyModePrecise); 2313 2314 if (mask) { 2315 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2316 mask_fallback = source_fallback(mask, mask_pixmap, 2317 dst->polyMode == PolyModePrecise); 2318 } else { 2319 mask_pixmap = NULL; 2320 mask_fallback = false; 2321 } 2322 2323 /* If we are using the destination as a source and need to 2324 * readback in order to upload the source, do it all 2325 * on the cpu. 2326 */ 2327 if (src_pixmap == dst_pixmap && src_fallback) { 2328 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2329 return true; 2330 } 2331 if (mask_pixmap == dst_pixmap && mask_fallback) { 2332 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2333 return true; 2334 } 2335 2336 /* If anything is on the GPU, push everything out to the GPU */ 2337 if (dst_use_gpu(dst_pixmap)) { 2338 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2339 __FUNCTION__)); 2340 return false; 2341 } 2342 2343 if (src_pixmap && !src_fallback) { 2344 DBG(("%s: src is already on the GPU, try to use GPU\n", 2345 __FUNCTION__)); 2346 return false; 2347 } 2348 if (mask_pixmap && !mask_fallback) { 2349 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2350 __FUNCTION__)); 2351 return false; 2352 } 2353 2354 /* However if the dst is not on the GPU and we need to 2355 * render one of the sources using the CPU, we may 2356 * as well do the entire operation in place onthe CPU. 2357 */ 2358 if (src_fallback) { 2359 DBG(("%s: dst is on the CPU and src will fallback\n", 2360 __FUNCTION__)); 2361 return true; 2362 } 2363 2364 if (mask && mask_fallback) { 2365 DBG(("%s: dst is on the CPU and mask will fallback\n", 2366 __FUNCTION__)); 2367 return true; 2368 } 2369 2370 if (too_large(dst_pixmap->drawable.width, 2371 dst_pixmap->drawable.height) && 2372 dst_is_cpu(dst_pixmap)) { 2373 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2374 return true; 2375 } 2376 2377 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2378 __FUNCTION__)); 2379 return dst_use_cpu(dst_pixmap); 2380} 2381 2382static int 2383reuse_source(struct sna *sna, 2384 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2385 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2386{ 2387 uint32_t color; 2388 2389 if (src_x != msk_x || src_y != msk_y) 2390 return false; 2391 2392 if (src == mask) { 2393 DBG(("%s: mask is source\n", __FUNCTION__)); 2394 *mc = *sc; 2395 mc->bo = kgem_bo_reference(mc->bo); 2396 return true; 2397 } 2398 2399 if (sna_picture_is_solid(mask, &color)) 2400 return gen4_channel_init_solid(sna, mc, color); 2401 2402 if (sc->is_solid) 2403 return false; 2404 2405 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2406 return false; 2407 2408 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2409 2410 if (!sna_transform_equal(src->transform, mask->transform)) 2411 return false; 2412 2413 if (!sna_picture_alphamap_equal(src, mask)) 2414 return false; 2415 2416 if (!gen7_check_repeat(mask)) 2417 return false; 2418 2419 if (!gen7_check_filter(mask)) 2420 return false; 2421 2422 if (!gen7_check_format(mask->format)) 2423 return false; 2424 2425 DBG(("%s: reusing source channel for mask with a twist\n", 2426 __FUNCTION__)); 2427 2428 *mc = *sc; 2429 mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2430 mc->filter = gen7_filter(mask->filter); 2431 mc->pict_format = mask->format; 2432 mc->card_format = gen7_get_card_format(mask->format); 2433 mc->bo = kgem_bo_reference(mc->bo); 2434 return true; 2435} 2436 2437static bool 2438gen7_render_composite(struct sna *sna, 2439 uint8_t op, 2440 PicturePtr src, 2441 PicturePtr mask, 2442 PicturePtr dst, 2443 int16_t src_x, int16_t src_y, 2444 int16_t msk_x, int16_t msk_y, 2445 int16_t dst_x, int16_t dst_y, 2446 int16_t width, int16_t height, 2447 unsigned flags, 2448 struct sna_composite_op *tmp) 2449{ 2450 if (op >= ARRAY_SIZE(gen7_blend_op)) 2451 return false; 2452 2453 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2454 width, height, sna->kgem.mode, sna->kgem.ring)); 2455 2456 if (mask == NULL && 2457 try_blt(sna, dst, src, width, height) && 2458 sna_blt_composite(sna, op, 2459 src, dst, 2460 src_x, src_y, 2461 dst_x, dst_y, 2462 width, height, 2463 flags, tmp)) 2464 return true; 2465 2466 if (gen7_composite_fallback(sna, src, mask, dst)) 2467 goto fallback; 2468 2469 if (need_tiling(sna, width, height)) 2470 return sna_tiling_composite(op, src, mask, dst, 2471 src_x, src_y, 2472 msk_x, msk_y, 2473 dst_x, dst_y, 2474 width, height, 2475 tmp); 2476 2477 if (op == PictOpClear && src == sna->clear) 2478 op = PictOpSrc; 2479 tmp->op = op; 2480 if (!gen7_composite_set_target(sna, tmp, dst, 2481 dst_x, dst_y, width, height, 2482 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2483 goto fallback; 2484 2485 switch (gen7_composite_picture(sna, src, &tmp->src, 2486 src_x, src_y, 2487 width, height, 2488 dst_x, dst_y, 2489 dst->polyMode == PolyModePrecise)) { 2490 case -1: 2491 goto cleanup_dst; 2492 case 0: 2493 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2494 goto cleanup_dst; 2495 /* fall through to fixup */ 2496 case 1: 2497 /* Did we just switch rings to prepare the source? */ 2498 if (mask == NULL && 2499 prefer_blt_composite(sna, tmp) && 2500 sna_blt_composite__convert(sna, 2501 dst_x, dst_y, width, height, 2502 tmp)) 2503 return true; 2504 2505 gen7_composite_channel_convert(&tmp->src); 2506 break; 2507 } 2508 2509 tmp->is_affine = tmp->src.is_affine; 2510 tmp->has_component_alpha = false; 2511 tmp->need_magic_ca_pass = false; 2512 2513 tmp->mask.bo = NULL; 2514 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2515 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2516 2517 if (mask) { 2518 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2519 tmp->has_component_alpha = true; 2520 2521 /* Check if it's component alpha that relies on a source alpha and on 2522 * the source value. We can only get one of those into the single 2523 * source value that we get to blend with. 2524 */ 2525 if (gen7_blend_op[op].src_alpha && 2526 (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) { 2527 if (op != PictOpOver) 2528 goto cleanup_src; 2529 2530 tmp->need_magic_ca_pass = true; 2531 tmp->op = PictOpOutReverse; 2532 } 2533 } 2534 2535 if (!reuse_source(sna, 2536 src, &tmp->src, src_x, src_y, 2537 mask, &tmp->mask, msk_x, msk_y)) { 2538 switch (gen7_composite_picture(sna, mask, &tmp->mask, 2539 msk_x, msk_y, 2540 width, height, 2541 dst_x, dst_y, 2542 dst->polyMode == PolyModePrecise)) { 2543 case -1: 2544 goto cleanup_src; 2545 case 0: 2546 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2547 goto cleanup_src; 2548 /* fall through to fixup */ 2549 case 1: 2550 gen7_composite_channel_convert(&tmp->mask); 2551 break; 2552 } 2553 } 2554 2555 tmp->is_affine &= tmp->mask.is_affine; 2556 } 2557 2558 tmp->u.gen7.flags = 2559 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2560 tmp->src.repeat, 2561 tmp->mask.filter, 2562 tmp->mask.repeat), 2563 gen7_get_blend(tmp->op, 2564 tmp->has_component_alpha, 2565 tmp->dst.format), 2566 gen7_choose_composite_kernel(tmp->op, 2567 tmp->mask.bo != NULL, 2568 tmp->has_component_alpha, 2569 tmp->is_affine), 2570 gen4_choose_composite_emitter(sna, tmp)); 2571 2572 tmp->blt = gen7_render_composite_blt; 2573 tmp->box = gen7_render_composite_box; 2574 tmp->boxes = gen7_render_composite_boxes__blt; 2575 if (tmp->emit_boxes){ 2576 tmp->boxes = gen7_render_composite_boxes; 2577 tmp->thread_boxes = gen7_render_composite_boxes__thread; 2578 } 2579 tmp->done = gen7_render_composite_done; 2580 2581 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2582 if (!kgem_check_bo(&sna->kgem, 2583 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2584 NULL)) { 2585 kgem_submit(&sna->kgem); 2586 if (!kgem_check_bo(&sna->kgem, 2587 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2588 NULL)) 2589 goto cleanup_mask; 2590 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2591 } 2592 2593 gen7_align_vertex(sna, tmp); 2594 gen7_emit_composite_state(sna, tmp); 2595 return true; 2596 2597cleanup_mask: 2598 if (tmp->mask.bo) { 2599 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2600 tmp->mask.bo = NULL; 2601 } 2602cleanup_src: 2603 if (tmp->src.bo) { 2604 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2605 tmp->src.bo = NULL; 2606 } 2607cleanup_dst: 2608 if (tmp->redirect.real_bo) { 2609 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2610 tmp->redirect.real_bo = NULL; 2611 } 2612fallback: 2613 return (mask == NULL && 2614 sna_blt_composite(sna, op, 2615 src, dst, 2616 src_x, src_y, 2617 dst_x, dst_y, 2618 width, height, 2619 flags | COMPOSITE_FALLBACK, tmp)); 2620} 2621 2622#if !NO_COMPOSITE_SPANS 2623fastcall static void 2624gen7_render_composite_spans_box(struct sna *sna, 2625 const struct sna_composite_spans_op *op, 2626 const BoxRec *box, float opacity) 2627{ 2628 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2629 __FUNCTION__, 2630 op->base.src.offset[0], op->base.src.offset[1], 2631 opacity, 2632 op->base.dst.x, op->base.dst.y, 2633 box->x1, box->y1, 2634 box->x2 - box->x1, 2635 box->y2 - box->y1)); 2636 2637 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state); 2638 op->prim_emit(sna, op, box, opacity); 2639} 2640 2641static void 2642gen7_render_composite_spans_boxes(struct sna *sna, 2643 const struct sna_composite_spans_op *op, 2644 const BoxRec *box, int nbox, 2645 float opacity) 2646{ 2647 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2648 __FUNCTION__, nbox, 2649 op->base.src.offset[0], op->base.src.offset[1], 2650 opacity, 2651 op->base.dst.x, op->base.dst.y)); 2652 2653 do { 2654 int nbox_this_time; 2655 2656 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2657 gen7_emit_composite_state); 2658 nbox -= nbox_this_time; 2659 2660 do { 2661 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2662 box->x1, box->y1, 2663 box->x2 - box->x1, 2664 box->y2 - box->y1)); 2665 2666 op->prim_emit(sna, op, box++, opacity); 2667 } while (--nbox_this_time); 2668 } while (nbox); 2669} 2670 2671fastcall static void 2672gen7_render_composite_spans_boxes__thread(struct sna *sna, 2673 const struct sna_composite_spans_op *op, 2674 const struct sna_opacity_box *box, 2675 int nbox) 2676{ 2677 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2678 __FUNCTION__, nbox, 2679 op->base.src.offset[0], op->base.src.offset[1], 2680 op->base.dst.x, op->base.dst.y)); 2681 2682 sna_vertex_lock(&sna->render); 2683 do { 2684 int nbox_this_time; 2685 float *v; 2686 2687 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2688 gen7_emit_composite_state); 2689 assert(nbox_this_time); 2690 nbox -= nbox_this_time; 2691 2692 v = sna->render.vertices + sna->render.vertex_used; 2693 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2694 2695 sna_vertex_acquire__locked(&sna->render); 2696 sna_vertex_unlock(&sna->render); 2697 2698 op->emit_boxes(op, box, nbox_this_time, v); 2699 box += nbox_this_time; 2700 2701 sna_vertex_lock(&sna->render); 2702 sna_vertex_release__locked(&sna->render); 2703 } while (nbox); 2704 sna_vertex_unlock(&sna->render); 2705} 2706 2707fastcall static void 2708gen7_render_composite_spans_done(struct sna *sna, 2709 const struct sna_composite_spans_op *op) 2710{ 2711 if (sna->render.vertex_offset) 2712 gen4_vertex_flush(sna); 2713 2714 DBG(("%s()\n", __FUNCTION__)); 2715 2716 if (op->base.src.bo) 2717 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2718 2719 sna_render_composite_redirect_done(sna, &op->base); 2720} 2721 2722static bool 2723gen7_check_composite_spans(struct sna *sna, 2724 uint8_t op, PicturePtr src, PicturePtr dst, 2725 int16_t width, int16_t height, unsigned flags) 2726{ 2727 if (op >= ARRAY_SIZE(gen7_blend_op)) 2728 return false; 2729 2730 if (gen7_composite_fallback(sna, src, NULL, dst)) 2731 return false; 2732 2733 if (need_tiling(sna, width, height) && 2734 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2735 DBG(("%s: fallback, tiled operation not on GPU\n", 2736 __FUNCTION__)); 2737 return false; 2738 } 2739 2740 return true; 2741} 2742 2743static bool 2744gen7_render_composite_spans(struct sna *sna, 2745 uint8_t op, 2746 PicturePtr src, 2747 PicturePtr dst, 2748 int16_t src_x, int16_t src_y, 2749 int16_t dst_x, int16_t dst_y, 2750 int16_t width, int16_t height, 2751 unsigned flags, 2752 struct sna_composite_spans_op *tmp) 2753{ 2754 DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__, 2755 width, height, flags, sna->kgem.mode, sna->kgem.ring)); 2756 2757 assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags)); 2758 2759 if (need_tiling(sna, width, height)) { 2760 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2761 __FUNCTION__, width, height)); 2762 return sna_tiling_composite_spans(op, src, dst, 2763 src_x, src_y, dst_x, dst_y, 2764 width, height, flags, tmp); 2765 } 2766 2767 tmp->base.op = op; 2768 if (!gen7_composite_set_target(sna, &tmp->base, dst, 2769 dst_x, dst_y, width, height, true)) 2770 return false; 2771 2772 switch (gen7_composite_picture(sna, src, &tmp->base.src, 2773 src_x, src_y, 2774 width, height, 2775 dst_x, dst_y, 2776 dst->polyMode == PolyModePrecise)) { 2777 case -1: 2778 goto cleanup_dst; 2779 case 0: 2780 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2781 goto cleanup_dst; 2782 /* fall through to fixup */ 2783 case 1: 2784 gen7_composite_channel_convert(&tmp->base.src); 2785 break; 2786 } 2787 tmp->base.mask.bo = NULL; 2788 2789 tmp->base.is_affine = tmp->base.src.is_affine; 2790 tmp->base.need_magic_ca_pass = false; 2791 2792 tmp->base.u.gen7.flags = 2793 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2794 tmp->base.src.repeat, 2795 SAMPLER_FILTER_NEAREST, 2796 SAMPLER_EXTEND_PAD), 2797 gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), 2798 GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2799 gen4_choose_spans_emitter(sna, tmp)); 2800 2801 tmp->box = gen7_render_composite_spans_box; 2802 tmp->boxes = gen7_render_composite_spans_boxes; 2803 if (tmp->emit_boxes) 2804 tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; 2805 tmp->done = gen7_render_composite_spans_done; 2806 2807 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2808 if (!kgem_check_bo(&sna->kgem, 2809 tmp->base.dst.bo, tmp->base.src.bo, 2810 NULL)) { 2811 kgem_submit(&sna->kgem); 2812 if (!kgem_check_bo(&sna->kgem, 2813 tmp->base.dst.bo, tmp->base.src.bo, 2814 NULL)) 2815 goto cleanup_src; 2816 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2817 } 2818 2819 gen7_align_vertex(sna, &tmp->base); 2820 gen7_emit_composite_state(sna, &tmp->base); 2821 return true; 2822 2823cleanup_src: 2824 if (tmp->base.src.bo) 2825 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2826cleanup_dst: 2827 if (tmp->base.redirect.real_bo) 2828 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2829 return false; 2830} 2831#endif 2832 2833static void 2834gen7_emit_copy_state(struct sna *sna, 2835 const struct sna_composite_op *op) 2836{ 2837 uint32_t *binding_table; 2838 uint16_t offset, dirty; 2839 2840 gen7_get_batch(sna, op); 2841 2842 binding_table = gen7_composite_get_binding_table(sna, &offset); 2843 2844 dirty = kgem_bo_is_dirty(op->dst.bo); 2845 2846 binding_table[0] = 2847 gen7_bind_bo(sna, 2848 op->dst.bo, op->dst.width, op->dst.height, 2849 gen7_get_dest_format(op->dst.format), 2850 true); 2851 binding_table[1] = 2852 gen7_bind_bo(sna, 2853 op->src.bo, op->src.width, op->src.height, 2854 op->src.card_format, 2855 false); 2856 2857 if (sna->kgem.surface == offset && 2858 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 2859 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 2860 offset = sna->render_state.gen7.surface_table; 2861 } 2862 2863 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 2864 dirty = 0; 2865 2866 assert(!GEN7_READS_DST(op->u.gen7.flags)); 2867 gen7_emit_state(sna, op, offset | dirty); 2868} 2869 2870static inline bool 2871prefer_blt_copy(struct sna *sna, 2872 struct kgem_bo *src_bo, 2873 struct kgem_bo *dst_bo, 2874 unsigned flags) 2875{ 2876 if (sna->kgem.mode == KGEM_BLT) 2877 return true; 2878 2879 assert((flags & COPY_SYNC) == 0); 2880 2881 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2882 return true; 2883 2884 if (untiled_tlb_miss(src_bo) || 2885 untiled_tlb_miss(dst_bo)) 2886 return true; 2887 2888 if (force_blt_ring(sna)) 2889 return true; 2890 2891 if (kgem_bo_is_render(dst_bo) || 2892 kgem_bo_is_render(src_bo)) 2893 return false; 2894 2895 if (prefer_render_ring(sna, dst_bo)) 2896 return false; 2897 2898 if (!prefer_blt_ring(sna, dst_bo, flags)) 2899 return false; 2900 2901 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2902} 2903 2904static bool 2905gen7_render_copy_boxes(struct sna *sna, uint8_t alu, 2906 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2907 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2908 const BoxRec *box, int n, unsigned flags) 2909{ 2910 struct sna_composite_op tmp; 2911 BoxRec extents; 2912 2913 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2914 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2915 src_bo == dst_bo, 2916 overlaps(sna, 2917 src_bo, src_dx, src_dy, 2918 dst_bo, dst_dx, dst_dy, 2919 box, n, flags, &extents))); 2920 2921 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2922 sna_blt_compare_depth(src, dst) && 2923 sna_blt_copy_boxes(sna, alu, 2924 src_bo, src_dx, src_dy, 2925 dst_bo, dst_dx, dst_dy, 2926 dst->bitsPerPixel, 2927 box, n)) 2928 return true; 2929 2930 if (!(alu == GXcopy || alu == GXclear)) { 2931fallback_blt: 2932 DBG(("%s: fallback blt\n", __FUNCTION__)); 2933 if (!sna_blt_compare_depth(src, dst)) 2934 return false; 2935 2936 return sna_blt_copy_boxes_fallback(sna, alu, 2937 src, src_bo, src_dx, src_dy, 2938 dst, dst_bo, dst_dx, dst_dy, 2939 box, n); 2940 } 2941 2942 if (overlaps(sna, 2943 src_bo, src_dx, src_dy, 2944 dst_bo, dst_dx, dst_dy, 2945 box, n, flags, 2946 &extents)) { 2947 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2948 2949 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2950 sna_blt_copy_boxes(sna, alu, 2951 src_bo, src_dx, src_dy, 2952 dst_bo, dst_dx, dst_dy, 2953 dst->bitsPerPixel, 2954 box, n)) 2955 return true; 2956 2957 if (big) 2958 goto fallback_blt; 2959 2960 assert(src_bo == dst_bo); 2961 assert(src->depth == dst->depth); 2962 assert(src->width == dst->width); 2963 assert(src->height == dst->height); 2964 return sna_render_copy_boxes__overlap(sna, alu, 2965 src, src_bo, 2966 src_dx, src_dy, 2967 dst_dx, dst_dy, 2968 box, n, &extents); 2969 } 2970 2971 if (dst->depth == src->depth) { 2972 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2973 tmp.src.pict_format = tmp.dst.format; 2974 } else { 2975 tmp.dst.format = sna_format_for_depth(dst->depth); 2976 tmp.src.pict_format = sna_format_for_depth(src->depth); 2977 } 2978 if (!gen7_check_format(tmp.src.pict_format)) 2979 goto fallback_blt; 2980 2981 tmp.dst.pixmap = (PixmapPtr)dst; 2982 tmp.dst.width = dst->width; 2983 tmp.dst.height = dst->height; 2984 tmp.dst.bo = dst_bo; 2985 tmp.dst.x = tmp.dst.y = 0; 2986 tmp.damage = NULL; 2987 2988 sna_render_composite_redirect_init(&tmp); 2989 if (too_large(tmp.dst.width, tmp.dst.height)) { 2990 int i; 2991 2992 extents = box[0]; 2993 for (i = 1; i < n; i++) { 2994 if (box[i].x1 < extents.x1) 2995 extents.x1 = box[i].x1; 2996 if (box[i].y1 < extents.y1) 2997 extents.y1 = box[i].y1; 2998 2999 if (box[i].x2 > extents.x2) 3000 extents.x2 = box[i].x2; 3001 if (box[i].y2 > extents.y2) 3002 extents.y2 = box[i].y2; 3003 } 3004 3005 if (!sna_render_composite_redirect(sna, &tmp, 3006 extents.x1 + dst_dx, 3007 extents.y1 + dst_dy, 3008 extents.x2 - extents.x1, 3009 extents.y2 - extents.y1, 3010 n > 1)) 3011 goto fallback_tiled; 3012 } 3013 3014 tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format); 3015 if (too_large(src->width, src->height)) { 3016 int i; 3017 3018 extents = box[0]; 3019 for (i = 1; i < n; i++) { 3020 if (box[i].x1 < extents.x1) 3021 extents.x1 = box[i].x1; 3022 if (box[i].y1 < extents.y1) 3023 extents.y1 = box[i].y1; 3024 3025 if (box[i].x2 > extents.x2) 3026 extents.x2 = box[i].x2; 3027 if (box[i].y2 > extents.y2) 3028 extents.y2 = box[i].y2; 3029 } 3030 3031 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3032 extents.x1 + src_dx, 3033 extents.y1 + src_dy, 3034 extents.x2 - extents.x1, 3035 extents.y2 - extents.y1)) 3036 goto fallback_tiled_dst; 3037 } else { 3038 tmp.src.bo = src_bo; 3039 tmp.src.width = src->width; 3040 tmp.src.height = src->height; 3041 tmp.src.offset[0] = tmp.src.offset[1] = 0; 3042 } 3043 3044 tmp.mask.bo = NULL; 3045 3046 tmp.floats_per_vertex = 2; 3047 tmp.floats_per_rect = 6; 3048 tmp.need_magic_ca_pass = 0; 3049 3050 tmp.u.gen7.flags = COPY_FLAGS(alu); 3051 3052 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3053 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3054 kgem_submit(&sna->kgem); 3055 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3056 if (tmp.src.bo != src_bo) 3057 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3058 if (tmp.redirect.real_bo) 3059 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3060 goto fallback_blt; 3061 } 3062 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3063 } 3064 3065 src_dx += tmp.src.offset[0]; 3066 src_dy += tmp.src.offset[1]; 3067 3068 dst_dx += tmp.dst.x; 3069 dst_dy += tmp.dst.y; 3070 3071 tmp.dst.x = tmp.dst.y = 0; 3072 3073 gen7_align_vertex(sna, &tmp); 3074 gen7_emit_copy_state(sna, &tmp); 3075 3076 do { 3077 int16_t *v; 3078 int n_this_time; 3079 3080 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3081 gen7_emit_copy_state); 3082 n -= n_this_time; 3083 3084 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3085 sna->render.vertex_used += 6 * n_this_time; 3086 assert(sna->render.vertex_used <= sna->render.vertex_size); 3087 do { 3088 3089 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3090 box->x1 + src_dx, box->y1 + src_dy, 3091 box->x1 + dst_dx, box->y1 + dst_dy, 3092 box->x2 - box->x1, box->y2 - box->y1)); 3093 v[0] = box->x2 + dst_dx; 3094 v[2] = box->x2 + src_dx; 3095 v[1] = v[5] = box->y2 + dst_dy; 3096 v[3] = v[7] = box->y2 + src_dy; 3097 v[8] = v[4] = box->x1 + dst_dx; 3098 v[10] = v[6] = box->x1 + src_dx; 3099 v[9] = box->y1 + dst_dy; 3100 v[11] = box->y1 + src_dy; 3101 v += 12; box++; 3102 } while (--n_this_time); 3103 } while (n); 3104 3105 gen4_vertex_flush(sna); 3106 sna_render_composite_redirect_done(sna, &tmp); 3107 if (tmp.src.bo != src_bo) 3108 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3109 return true; 3110 3111fallback_tiled_dst: 3112 if (tmp.redirect.real_bo) 3113 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3114fallback_tiled: 3115 DBG(("%s: fallback tiled\n", __FUNCTION__)); 3116 if (sna_blt_compare_depth(src, dst) && 3117 sna_blt_copy_boxes(sna, alu, 3118 src_bo, src_dx, src_dy, 3119 dst_bo, dst_dx, dst_dy, 3120 dst->bitsPerPixel, 3121 box, n)) 3122 return true; 3123 3124 return sna_tiling_copy_boxes(sna, alu, 3125 src, src_bo, src_dx, src_dy, 3126 dst, dst_bo, dst_dx, dst_dy, 3127 box, n); 3128} 3129 3130static void 3131gen7_render_copy_blt(struct sna *sna, 3132 const struct sna_copy_op *op, 3133 int16_t sx, int16_t sy, 3134 int16_t w, int16_t h, 3135 int16_t dx, int16_t dy) 3136{ 3137 int16_t *v; 3138 3139 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state); 3140 3141 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3142 sna->render.vertex_used += 6; 3143 assert(sna->render.vertex_used <= sna->render.vertex_size); 3144 3145 v[0] = dx+w; v[1] = dy+h; 3146 v[2] = sx+w; v[3] = sy+h; 3147 v[4] = dx; v[5] = dy+h; 3148 v[6] = sx; v[7] = sy+h; 3149 v[8] = dx; v[9] = dy; 3150 v[10] = sx; v[11] = sy; 3151} 3152 3153static void 3154gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3155{ 3156 if (sna->render.vertex_offset) 3157 gen4_vertex_flush(sna); 3158} 3159 3160static bool 3161gen7_render_copy(struct sna *sna, uint8_t alu, 3162 PixmapPtr src, struct kgem_bo *src_bo, 3163 PixmapPtr dst, struct kgem_bo *dst_bo, 3164 struct sna_copy_op *op) 3165{ 3166 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3167 __FUNCTION__, alu, 3168 src->drawable.width, src->drawable.height, 3169 dst->drawable.width, dst->drawable.height)); 3170 3171 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3172 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3173 sna_blt_copy(sna, alu, 3174 src_bo, dst_bo, 3175 dst->drawable.bitsPerPixel, 3176 op)) 3177 return true; 3178 3179 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3180 too_large(src->drawable.width, src->drawable.height) || 3181 too_large(dst->drawable.width, dst->drawable.height)) { 3182fallback: 3183 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3184 return false; 3185 3186 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3187 dst->drawable.bitsPerPixel, 3188 op); 3189 } 3190 3191 if (dst->drawable.depth == src->drawable.depth) { 3192 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3193 op->base.src.pict_format = op->base.dst.format; 3194 } else { 3195 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3196 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3197 } 3198 if (!gen7_check_format(op->base.src.pict_format)) 3199 goto fallback; 3200 3201 op->base.dst.pixmap = dst; 3202 op->base.dst.width = dst->drawable.width; 3203 op->base.dst.height = dst->drawable.height; 3204 op->base.dst.bo = dst_bo; 3205 3206 op->base.src.bo = src_bo; 3207 op->base.src.card_format = 3208 gen7_get_card_format(op->base.src.pict_format); 3209 op->base.src.width = src->drawable.width; 3210 op->base.src.height = src->drawable.height; 3211 3212 op->base.mask.bo = NULL; 3213 3214 op->base.floats_per_vertex = 2; 3215 op->base.floats_per_rect = 6; 3216 3217 op->base.u.gen7.flags = COPY_FLAGS(alu); 3218 3219 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3220 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3221 kgem_submit(&sna->kgem); 3222 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3223 goto fallback; 3224 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3225 } 3226 3227 gen7_align_vertex(sna, &op->base); 3228 gen7_emit_copy_state(sna, &op->base); 3229 3230 op->blt = gen7_render_copy_blt; 3231 op->done = gen7_render_copy_done; 3232 return true; 3233} 3234 3235static void 3236gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3237{ 3238 uint16_t dirty; 3239 uint32_t *binding_table; 3240 uint16_t offset; 3241 3242 /* XXX Render Target Fast Clear 3243 * Set RTFC Enable in PS and render a rectangle. 3244 * Limited to a clearing the full MSC surface only with a 3245 * specific kernel. 3246 */ 3247 3248 gen7_get_batch(sna, op); 3249 3250 binding_table = gen7_composite_get_binding_table(sna, &offset); 3251 3252 dirty = kgem_bo_is_dirty(op->dst.bo); 3253 3254 binding_table[0] = 3255 gen7_bind_bo(sna, 3256 op->dst.bo, op->dst.width, op->dst.height, 3257 gen7_get_dest_format(op->dst.format), 3258 true); 3259 binding_table[1] = 3260 gen7_bind_bo(sna, 3261 op->src.bo, 1, 1, 3262 GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 3263 false); 3264 3265 if (sna->kgem.surface == offset && 3266 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 3267 sna->kgem.surface += 3268 sizeof(struct gen7_surface_state)/sizeof(uint32_t); 3269 offset = sna->render_state.gen7.surface_table; 3270 } 3271 3272 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 3273 dirty = 0; 3274 3275 gen7_emit_state(sna, op, offset | dirty); 3276} 3277 3278static bool 3279gen7_render_fill_boxes(struct sna *sna, 3280 CARD8 op, 3281 PictFormat format, 3282 const xRenderColor *color, 3283 const DrawableRec *dst, struct kgem_bo *dst_bo, 3284 const BoxRec *box, int n) 3285{ 3286 struct sna_composite_op tmp; 3287 uint32_t pixel; 3288 3289 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3290 __FUNCTION__, op, 3291 color->red, color->green, color->blue, color->alpha, (int)format)); 3292 3293 if (op >= ARRAY_SIZE(gen7_blend_op)) { 3294 DBG(("%s: fallback due to unhandled blend op: %d\n", 3295 __FUNCTION__, op)); 3296 return false; 3297 } 3298 3299 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3300 !gen7_check_dst_format(format)) { 3301 uint8_t alu = GXinvalid; 3302 3303 if (op <= PictOpSrc) { 3304 pixel = 0; 3305 if (op == PictOpClear) 3306 alu = GXclear; 3307 else if (sna_get_pixel_from_rgba(&pixel, 3308 color->red, 3309 color->green, 3310 color->blue, 3311 color->alpha, 3312 format)) 3313 alu = GXcopy; 3314 } 3315 3316 if (alu != GXinvalid && 3317 sna_blt_fill_boxes(sna, alu, 3318 dst_bo, dst->bitsPerPixel, 3319 pixel, box, n)) 3320 return true; 3321 3322 if (!gen7_check_dst_format(format)) 3323 return false; 3324 } 3325 3326 if (op == PictOpClear) { 3327 pixel = 0; 3328 op = PictOpSrc; 3329 } else if (!sna_get_pixel_from_rgba(&pixel, 3330 color->red, 3331 color->green, 3332 color->blue, 3333 color->alpha, 3334 PICT_a8r8g8b8)) 3335 return false; 3336 3337 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3338 __FUNCTION__, pixel, n, 3339 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3340 3341 tmp.dst.pixmap = (PixmapPtr)dst; 3342 tmp.dst.width = dst->width; 3343 tmp.dst.height = dst->height; 3344 tmp.dst.format = format; 3345 tmp.dst.bo = dst_bo; 3346 tmp.dst.x = tmp.dst.y = 0; 3347 tmp.damage = NULL; 3348 3349 sna_render_composite_redirect_init(&tmp); 3350 if (too_large(dst->width, dst->height)) { 3351 BoxRec extents; 3352 3353 boxes_extents(box, n, &extents); 3354 if (!sna_render_composite_redirect(sna, &tmp, 3355 extents.x1, extents.y1, 3356 extents.x2 - extents.x1, 3357 extents.y2 - extents.y1, 3358 n > 1)) 3359 return sna_tiling_fill_boxes(sna, op, format, color, 3360 dst, dst_bo, box, n); 3361 } 3362 3363 tmp.src.bo = sna_render_get_solid(sna, pixel); 3364 tmp.mask.bo = NULL; 3365 3366 tmp.floats_per_vertex = 2; 3367 tmp.floats_per_rect = 6; 3368 tmp.need_magic_ca_pass = false; 3369 3370 tmp.u.gen7.flags = FILL_FLAGS(op, format); 3371 3372 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3373 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3374 kgem_submit(&sna->kgem); 3375 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3376 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3377 if (tmp.redirect.real_bo) 3378 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3379 3380 return false; 3381 } 3382 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3383 } 3384 3385 gen7_align_vertex(sna, &tmp); 3386 gen7_emit_fill_state(sna, &tmp); 3387 3388 do { 3389 int n_this_time; 3390 int16_t *v; 3391 3392 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3393 gen7_emit_fill_state); 3394 n -= n_this_time; 3395 3396 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3397 sna->render.vertex_used += 6 * n_this_time; 3398 assert(sna->render.vertex_used <= sna->render.vertex_size); 3399 do { 3400 DBG((" (%d, %d), (%d, %d)\n", 3401 box->x1, box->y1, box->x2, box->y2)); 3402 3403 v[0] = box->x2; 3404 v[5] = v[1] = box->y2; 3405 v[8] = v[4] = box->x1; 3406 v[9] = box->y1; 3407 v[2] = v[3] = v[7] = 1; 3408 v[6] = v[10] = v[11] = 0; 3409 v += 12; box++; 3410 } while (--n_this_time); 3411 } while (n); 3412 3413 gen4_vertex_flush(sna); 3414 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3415 sna_render_composite_redirect_done(sna, &tmp); 3416 return true; 3417} 3418 3419static void 3420gen7_render_fill_op_blt(struct sna *sna, 3421 const struct sna_fill_op *op, 3422 int16_t x, int16_t y, int16_t w, int16_t h) 3423{ 3424 int16_t *v; 3425 3426 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3427 3428 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3429 3430 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3431 sna->render.vertex_used += 6; 3432 assert(sna->render.vertex_used <= sna->render.vertex_size); 3433 3434 v[0] = x+w; 3435 v[4] = v[8] = x; 3436 v[1] = v[5] = y+h; 3437 v[9] = y; 3438 3439 v[2] = v[3] = v[7] = 1; 3440 v[6] = v[10] = v[11] = 0; 3441} 3442 3443fastcall static void 3444gen7_render_fill_op_box(struct sna *sna, 3445 const struct sna_fill_op *op, 3446 const BoxRec *box) 3447{ 3448 int16_t *v; 3449 3450 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3451 box->x1, box->y1, box->x2, box->y2)); 3452 3453 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3454 3455 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3456 sna->render.vertex_used += 6; 3457 assert(sna->render.vertex_used <= sna->render.vertex_size); 3458 3459 v[0] = box->x2; 3460 v[8] = v[4] = box->x1; 3461 v[5] = v[1] = box->y2; 3462 v[9] = box->y1; 3463 3464 v[7] = v[2] = v[3] = 1; 3465 v[6] = v[10] = v[11] = 0; 3466} 3467 3468fastcall static void 3469gen7_render_fill_op_boxes(struct sna *sna, 3470 const struct sna_fill_op *op, 3471 const BoxRec *box, 3472 int nbox) 3473{ 3474 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3475 box->x1, box->y1, box->x2, box->y2, nbox)); 3476 3477 do { 3478 int nbox_this_time; 3479 int16_t *v; 3480 3481 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 3482 gen7_emit_fill_state); 3483 nbox -= nbox_this_time; 3484 3485 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3486 sna->render.vertex_used += 6 * nbox_this_time; 3487 assert(sna->render.vertex_used <= sna->render.vertex_size); 3488 3489 do { 3490 v[0] = box->x2; 3491 v[8] = v[4] = box->x1; 3492 v[5] = v[1] = box->y2; 3493 v[9] = box->y1; 3494 v[7] = v[2] = v[3] = 1; 3495 v[6] = v[10] = v[11] = 0; 3496 box++; v += 12; 3497 } while (--nbox_this_time); 3498 } while (nbox); 3499} 3500 3501static void 3502gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3503{ 3504 if (sna->render.vertex_offset) 3505 gen4_vertex_flush(sna); 3506 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3507} 3508 3509static bool 3510gen7_render_fill(struct sna *sna, uint8_t alu, 3511 PixmapPtr dst, struct kgem_bo *dst_bo, 3512 uint32_t color, unsigned flags, 3513 struct sna_fill_op *op) 3514{ 3515 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3516 3517 if (prefer_blt_fill(sna, dst_bo, flags) && 3518 sna_blt_fill(sna, alu, 3519 dst_bo, dst->drawable.bitsPerPixel, 3520 color, 3521 op)) 3522 return true; 3523 3524 if (!(alu == GXcopy || alu == GXclear) || 3525 too_large(dst->drawable.width, dst->drawable.height)) 3526 return sna_blt_fill(sna, alu, 3527 dst_bo, dst->drawable.bitsPerPixel, 3528 color, 3529 op); 3530 3531 if (alu == GXclear) 3532 color = 0; 3533 3534 op->base.dst.pixmap = dst; 3535 op->base.dst.width = dst->drawable.width; 3536 op->base.dst.height = dst->drawable.height; 3537 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3538 op->base.dst.bo = dst_bo; 3539 op->base.dst.x = op->base.dst.y = 0; 3540 3541 op->base.src.bo = 3542 sna_render_get_solid(sna, 3543 sna_rgba_for_color(color, 3544 dst->drawable.depth)); 3545 op->base.mask.bo = NULL; 3546 3547 op->base.need_magic_ca_pass = false; 3548 op->base.floats_per_vertex = 2; 3549 op->base.floats_per_rect = 6; 3550 3551 op->base.u.gen7.flags = FILL_FLAGS_NOBLEND; 3552 3553 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3554 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3555 kgem_submit(&sna->kgem); 3556 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3557 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3558 return false; 3559 } 3560 3561 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3562 } 3563 3564 gen7_align_vertex(sna, &op->base); 3565 gen7_emit_fill_state(sna, &op->base); 3566 3567 op->blt = gen7_render_fill_op_blt; 3568 op->box = gen7_render_fill_op_box; 3569 op->boxes = gen7_render_fill_op_boxes; 3570 op->points = NULL; 3571 op->done = gen7_render_fill_op_done; 3572 return true; 3573} 3574 3575static bool 3576gen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3577 uint32_t color, 3578 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3579 uint8_t alu) 3580{ 3581 BoxRec box; 3582 3583 box.x1 = x1; 3584 box.y1 = y1; 3585 box.x2 = x2; 3586 box.y2 = y2; 3587 3588 return sna_blt_fill_boxes(sna, alu, 3589 bo, dst->drawable.bitsPerPixel, 3590 color, &box, 1); 3591} 3592 3593static bool 3594gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3595 uint32_t color, 3596 int16_t x1, int16_t y1, 3597 int16_t x2, int16_t y2, 3598 uint8_t alu) 3599{ 3600 struct sna_composite_op tmp; 3601 int16_t *v; 3602 3603 /* Prefer to use the BLT if already engaged */ 3604 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3605 gen7_render_fill_one_try_blt(sna, dst, bo, color, 3606 x1, y1, x2, y2, alu)) 3607 return true; 3608 3609 /* Must use the BLT if we can't RENDER... */ 3610 if (!(alu == GXcopy || alu == GXclear) || 3611 too_large(dst->drawable.width, dst->drawable.height)) 3612 return gen7_render_fill_one_try_blt(sna, dst, bo, color, 3613 x1, y1, x2, y2, alu); 3614 3615 if (alu == GXclear) 3616 color = 0; 3617 3618 tmp.dst.pixmap = dst; 3619 tmp.dst.width = dst->drawable.width; 3620 tmp.dst.height = dst->drawable.height; 3621 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3622 tmp.dst.bo = bo; 3623 tmp.dst.x = tmp.dst.y = 0; 3624 3625 tmp.src.bo = 3626 sna_render_get_solid(sna, 3627 sna_rgba_for_color(color, 3628 dst->drawable.depth)); 3629 tmp.mask.bo = NULL; 3630 3631 tmp.floats_per_vertex = 2; 3632 tmp.floats_per_rect = 6; 3633 tmp.need_magic_ca_pass = false; 3634 3635 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3636 3637 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3638 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3639 kgem_submit(&sna->kgem); 3640 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3641 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3642 return false; 3643 } 3644 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3645 } 3646 3647 gen7_align_vertex(sna, &tmp); 3648 gen7_emit_fill_state(sna, &tmp); 3649 3650 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3651 3652 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3653 3654 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3655 sna->render.vertex_used += 6; 3656 assert(sna->render.vertex_used <= sna->render.vertex_size); 3657 3658 v[0] = x2; 3659 v[8] = v[4] = x1; 3660 v[5] = v[1] = y2; 3661 v[9] = y1; 3662 v[7] = v[2] = v[3] = 1; 3663 v[6] = v[10] = v[11] = 0; 3664 3665 gen4_vertex_flush(sna); 3666 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3667 3668 return true; 3669} 3670 3671static bool 3672gen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3673{ 3674 BoxRec box; 3675 3676 box.x1 = 0; 3677 box.y1 = 0; 3678 box.x2 = dst->drawable.width; 3679 box.y2 = dst->drawable.height; 3680 3681 return sna_blt_fill_boxes(sna, GXclear, 3682 bo, dst->drawable.bitsPerPixel, 3683 0, &box, 1); 3684} 3685 3686static bool 3687gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3688{ 3689 struct sna_composite_op tmp; 3690 int16_t *v; 3691 3692 DBG(("%s: %dx%d\n", 3693 __FUNCTION__, 3694 dst->drawable.width, 3695 dst->drawable.height)); 3696 3697 /* Prefer to use the BLT if already engaged */ 3698 if (sna->kgem.mode == KGEM_BLT && 3699 gen7_render_clear_try_blt(sna, dst, bo)) 3700 return true; 3701 3702 /* Must use the BLT if we can't RENDER... */ 3703 if (too_large(dst->drawable.width, dst->drawable.height)) 3704 return gen7_render_clear_try_blt(sna, dst, bo); 3705 3706 tmp.dst.pixmap = dst; 3707 tmp.dst.width = dst->drawable.width; 3708 tmp.dst.height = dst->drawable.height; 3709 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3710 tmp.dst.bo = bo; 3711 tmp.dst.x = tmp.dst.y = 0; 3712 3713 tmp.src.bo = sna_render_get_solid(sna, 0); 3714 tmp.mask.bo = NULL; 3715 3716 tmp.floats_per_vertex = 2; 3717 tmp.floats_per_rect = 6; 3718 tmp.need_magic_ca_pass = false; 3719 3720 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3721 3722 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3723 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3724 kgem_submit(&sna->kgem); 3725 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3726 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3727 return false; 3728 } 3729 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3730 } 3731 3732 gen7_align_vertex(sna, &tmp); 3733 gen7_emit_fill_state(sna, &tmp); 3734 3735 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3736 3737 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3738 sna->render.vertex_used += 6; 3739 assert(sna->render.vertex_used <= sna->render.vertex_size); 3740 3741 v[0] = dst->drawable.width; 3742 v[5] = v[1] = dst->drawable.height; 3743 v[8] = v[4] = 0; 3744 v[9] = 0; 3745 3746 v[7] = v[2] = v[3] = 1; 3747 v[6] = v[10] = v[11] = 0; 3748 3749 gen4_vertex_flush(sna); 3750 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3751 3752 return true; 3753} 3754static void gen7_render_reset(struct sna *sna) 3755{ 3756 sna->render_state.gen7.pipe_controls_since_stall = 0; 3757 sna->render_state.gen7.emit_flush = false; 3758 sna->render_state.gen7.needs_invariant = true; 3759 sna->render_state.gen7.ve_id = 3 << 2; 3760 sna->render_state.gen7.last_primitive = -1; 3761 3762 sna->render_state.gen7.num_sf_outputs = 0; 3763 sna->render_state.gen7.samplers = -1; 3764 sna->render_state.gen7.blend = -1; 3765 sna->render_state.gen7.kernel = -1; 3766 sna->render_state.gen7.drawrect_offset = -1; 3767 sna->render_state.gen7.drawrect_limit = -1; 3768 sna->render_state.gen7.surface_table = 0; 3769 3770 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3771 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3772 discard_vbo(sna); 3773 } 3774 3775 sna->render.vertex_offset = 0; 3776 sna->render.nvertex_reloc = 0; 3777 sna->render.vb_id = 0; 3778} 3779 3780static void gen7_render_fini(struct sna *sna) 3781{ 3782 kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); 3783} 3784 3785static bool is_gt3(struct sna *sna, int devid) 3786{ 3787 assert(sna->kgem.gen == 075); 3788 return devid & 0x20; 3789} 3790 3791static bool is_gt2(struct sna *sna, int devid) 3792{ 3793 return devid & (is_hsw(sna)? 0x30 : 0x20); 3794} 3795 3796static bool is_mobile(struct sna *sna, int devid) 3797{ 3798 return (devid & 0xf) == 0x6; 3799} 3800 3801static bool gen7_render_setup(struct sna *sna, int devid) 3802{ 3803 struct gen7_render_state *state = &sna->render_state.gen7; 3804 struct sna_static_stream general; 3805 struct gen7_sampler_state *ss; 3806 int i, j, k, l, m; 3807 3808 if (is_ivb(sna)) { 3809 state->info = &ivb_gt_info; 3810 if (devid & 0xf) { 3811 state->info = &ivb_gt1_info; 3812 if (is_gt2(sna, devid)) 3813 state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ 3814 } 3815 } else if (is_byt(sna)) { 3816 state->info = &byt_gt_info; 3817 } else if (is_hsw(sna)) { 3818 state->info = &hsw_gt_info; 3819 if (devid & 0xf) { 3820 if (is_gt3(sna, devid)) 3821 state->info = &hsw_gt3_info; 3822 else if (is_gt2(sna, devid)) 3823 state->info = &hsw_gt2_info; 3824 else 3825 state->info = &hsw_gt1_info; 3826 } 3827 } else 3828 return false; 3829 3830 state->gt = state->info->gt; 3831 3832 sna_static_stream_init(&general); 3833 3834 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3835 * dumps, you know it points to zero. 3836 */ 3837 null_create(&general); 3838 3839 for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) { 3840 if (wm_kernels[m].size) { 3841 state->wm_kernel[m][1] = 3842 sna_static_stream_add(&general, 3843 wm_kernels[m].data, 3844 wm_kernels[m].size, 3845 64); 3846 } else { 3847 if (USE_8_PIXEL_DISPATCH) { 3848 state->wm_kernel[m][0] = 3849 sna_static_stream_compile_wm(sna, &general, 3850 wm_kernels[m].data, 8); 3851 } 3852 3853 if (USE_16_PIXEL_DISPATCH) { 3854 state->wm_kernel[m][1] = 3855 sna_static_stream_compile_wm(sna, &general, 3856 wm_kernels[m].data, 16); 3857 } 3858 3859 if (USE_32_PIXEL_DISPATCH) { 3860 state->wm_kernel[m][2] = 3861 sna_static_stream_compile_wm(sna, &general, 3862 wm_kernels[m].data, 32); 3863 } 3864 } 3865 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3866 } 3867 3868 ss = sna_static_stream_map(&general, 3869 2 * sizeof(*ss) * 3870 (2 + 3871 FILTER_COUNT * EXTEND_COUNT * 3872 FILTER_COUNT * EXTEND_COUNT), 3873 32); 3874 state->wm_state = sna_static_stream_offsetof(&general, ss); 3875 sampler_copy_init(ss); ss += 2; 3876 sampler_fill_init(ss); ss += 2; 3877 for (i = 0; i < FILTER_COUNT; i++) { 3878 for (j = 0; j < EXTEND_COUNT; j++) { 3879 for (k = 0; k < FILTER_COUNT; k++) { 3880 for (l = 0; l < EXTEND_COUNT; l++) { 3881 sampler_state_init(ss++, i, j); 3882 sampler_state_init(ss++, k, l); 3883 } 3884 } 3885 } 3886 } 3887 3888 state->cc_blend = gen7_composite_create_blend_state(&general); 3889 3890 state->general_bo = sna_static_stream_fini(sna, &general); 3891 return state->general_bo != NULL; 3892} 3893 3894const char *gen7_render_init(struct sna *sna, const char *backend) 3895{ 3896 int devid = intel_get_device_id(sna->dev); 3897 3898 if (!gen7_render_setup(sna, devid)) 3899 return backend; 3900 3901 sna->kgem.context_switch = gen6_render_context_switch; 3902 sna->kgem.retire = gen6_render_retire; 3903 sna->kgem.expire = gen4_render_expire; 3904 3905#if !NO_COMPOSITE 3906 sna->render.composite = gen7_render_composite; 3907 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3908#endif 3909#if !NO_COMPOSITE_SPANS 3910 sna->render.check_composite_spans = gen7_check_composite_spans; 3911 sna->render.composite_spans = gen7_render_composite_spans; 3912 if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna)) 3913 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3914#endif 3915 sna->render.video = gen7_render_video; 3916 3917#if !NO_COPY_BOXES 3918 sna->render.copy_boxes = gen7_render_copy_boxes; 3919#endif 3920#if !NO_COPY 3921 sna->render.copy = gen7_render_copy; 3922#endif 3923 3924#if !NO_FILL_BOXES 3925 sna->render.fill_boxes = gen7_render_fill_boxes; 3926#endif 3927#if !NO_FILL 3928 sna->render.fill = gen7_render_fill; 3929#endif 3930#if !NO_FILL_ONE 3931 sna->render.fill_one = gen7_render_fill_one; 3932#endif 3933#if !NO_FILL_CLEAR 3934 sna->render.clear = gen7_render_clear; 3935#endif 3936 3937 sna->render.flush = gen4_render_flush; 3938 sna->render.reset = gen7_render_reset; 3939 sna->render.fini = gen7_render_fini; 3940 3941 sna->render.max_3d_size = GEN7_MAX_SIZE; 3942 sna->render.max_3d_pitch = 1 << 18; 3943 return sna->render_state.gen7.info->name; 3944} 3945