1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen7_render.h" 45#include "gen4_common.h" 46#include "gen4_source.h" 47#include "gen4_vertex.h" 48#include "gen6_common.h" 49 50#define ALWAYS_INVALIDATE 0 51#define ALWAYS_FLUSH 0 52#define ALWAYS_STALL 0 53 54#define NO_COMPOSITE 0 55#define NO_COMPOSITE_SPANS 0 56#define NO_COPY 0 57#define NO_COPY_BOXES 0 58#define NO_FILL 0 59#define NO_FILL_BOXES 0 60#define NO_FILL_ONE 0 61#define NO_FILL_CLEAR 0 62 63#define USE_8_PIXEL_DISPATCH 1 64#define USE_16_PIXEL_DISPATCH 1 65#define USE_32_PIXEL_DISPATCH 0 66 67#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 68#error "Must select at least 8, 16 or 32 pixel dispatch" 69#endif 70 71#define GEN7_MAX_SIZE 16384 72 73/* XXX Todo 74 * 75 * STR (software tiled rendering) mode. No, really. 76 * 64x32 pixel blocks align with the rendering cache. Worth considering. 77 */ 78 79#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 80 81struct gt_info { 82 const char *name; 83 uint32_t max_vs_threads; 84 uint32_t max_gs_threads; 85 uint32_t max_wm_threads; 86 struct { 87 int size; 88 int max_vs_entries; 89 int max_gs_entries; 90 int push_ps_size; /* in 1KBs */ 91 } urb; 92 int gt; 93 uint32_t mocs; 94}; 95 96static const struct gt_info ivb_gt_info = { 97 .name = "Ivybridge (gen7)", 98 .max_vs_threads = 16, 99 .max_gs_threads = 16, 100 .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, 101 .urb = { 128, 64, 64, 8 }, 102 .gt = 0, 103}; 104 105static const struct gt_info ivb_gt1_info = { 106 .name = "Ivybridge (gen7, gt1)", 107 .max_vs_threads = 36, 108 .max_gs_threads = 36, 109 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 110 .urb = { 128, 512, 192, 8 }, 111 .gt = 1, 112 .mocs = 3, 113}; 114 115static const struct gt_info ivb_gt2_info = { 116 .name = "Ivybridge (gen7, gt2)", 117 .max_vs_threads = 128, 118 .max_gs_threads = 128, 119 .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, 120 .urb = { 256, 704, 320, 8 }, 121 .gt = 2, 122 .mocs = 3, 123}; 124 125static const struct gt_info byt_gt_info = { 126 .name = "Baytrail (gen7)", 127 .max_vs_threads = 36, 128 .max_gs_threads = 36, 129 .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 130 .urb = { 128, 512, 192, 8 }, 131 .gt = 1, 132}; 133 134static const struct gt_info hsw_gt_info = { 135 .name = "Haswell (gen7.5)", 136 .max_vs_threads = 8, 137 .max_gs_threads = 8, 138 .max_wm_threads = 139 (8 - 1) << HSW_PS_MAX_THREADS_SHIFT | 140 1 << HSW_PS_SAMPLE_MASK_SHIFT, 141 .urb = { 128, 64, 64, 8 }, 142 .gt = 0, 143}; 144 145static const struct gt_info hsw_gt1_info = { 146 .name = "Haswell (gen7.5, gt1)", 147 .max_vs_threads = 70, 148 .max_gs_threads = 70, 149 .max_wm_threads = 150 (70 - 1) << HSW_PS_MAX_THREADS_SHIFT | 151 1 << HSW_PS_SAMPLE_MASK_SHIFT, 152 .urb = { 128, 640, 256, 8 }, 153 .gt = 1, 154 .mocs = 5, 155}; 156 157static const struct gt_info hsw_gt2_info = { 158 .name = "Haswell (gen7.5, gt2)", 159 .max_vs_threads = 140, 160 .max_gs_threads = 140, 161 .max_wm_threads = 162 (140 - 1) << HSW_PS_MAX_THREADS_SHIFT | 163 1 << HSW_PS_SAMPLE_MASK_SHIFT, 164 .urb = { 256, 1664, 640, 8 }, 165 .gt = 2, 166 .mocs = 5, 167}; 168 169static const struct gt_info hsw_gt3_info = { 170 .name = "Haswell (gen7.5, gt3)", 171 .max_vs_threads = 280, 172 .max_gs_threads = 280, 173 .max_wm_threads = 174 (280 - 1) << HSW_PS_MAX_THREADS_SHIFT | 175 1 << HSW_PS_SAMPLE_MASK_SHIFT, 176 .urb = { 512, 3328, 1280, 16 }, 177 .gt = 3, 178 .mocs = 5, 179}; 180 181inline static bool is_ivb(struct sna *sna) 182{ 183 return sna->kgem.gen == 070; 184} 185 186inline static bool is_byt(struct sna *sna) 187{ 188 return sna->kgem.gen == 071; 189} 190 191inline static bool is_hsw(struct sna *sna) 192{ 193 return sna->kgem.gen == 075; 194} 195 196static const uint32_t ps_kernel_packed_bt601[][4] = { 197#include "exa_wm_src_affine.g7b" 198#include "exa_wm_src_sample_argb.g7b" 199#include "exa_wm_yuv_rgb_bt601.g7b" 200#include "exa_wm_write.g7b" 201}; 202 203static const uint32_t ps_kernel_planar_bt601[][4] = { 204#include "exa_wm_src_affine.g7b" 205#include "exa_wm_src_sample_planar.g7b" 206#include "exa_wm_yuv_rgb_bt601.g7b" 207#include "exa_wm_write.g7b" 208}; 209 210static const uint32_t ps_kernel_nv12_bt601[][4] = { 211#include "exa_wm_src_affine.g7b" 212#include "exa_wm_src_sample_nv12.g7b" 213#include "exa_wm_yuv_rgb_bt601.g7b" 214#include "exa_wm_write.g7b" 215}; 216 217static const uint32_t ps_kernel_packed_bt709[][4] = { 218#include "exa_wm_src_affine.g7b" 219#include "exa_wm_src_sample_argb.g7b" 220#include "exa_wm_yuv_rgb_bt709.g7b" 221#include "exa_wm_write.g7b" 222}; 223 224static const uint32_t ps_kernel_planar_bt709[][4] = { 225#include "exa_wm_src_affine.g7b" 226#include "exa_wm_src_sample_planar.g7b" 227#include "exa_wm_yuv_rgb_bt709.g7b" 228#include "exa_wm_write.g7b" 229}; 230 231static const uint32_t ps_kernel_nv12_bt709[][4] = { 232#include "exa_wm_src_affine.g7b" 233#include "exa_wm_src_sample_nv12.g7b" 234#include "exa_wm_yuv_rgb_bt709.g7b" 235#include "exa_wm_write.g7b" 236}; 237 238static const uint32_t ps_kernel_rgb[][4] = { 239#include "exa_wm_src_affine.g7b" 240#include "exa_wm_src_sample_argb.g7b" 241#include "exa_wm_write.g7b" 242}; 243 244#define KERNEL(kernel_enum, kernel, num_surfaces) \ 245 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 246#define NOKERNEL(kernel_enum, func, num_surfaces) \ 247 [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 248static const struct wm_kernel_info { 249 const char *name; 250 const void *data; 251 unsigned int size; 252 int num_surfaces; 253} wm_kernels[GEN7_WM_KERNEL_COUNT] = { 254 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 255 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 256 257 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 258 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 259 260 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 261 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 262 263 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 264 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 265 266 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 267 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 268 269 KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7), 270 KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7), 271 KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2), 272 KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), 273 KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), 274 KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), 275 KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), 276}; 277#undef KERNEL 278 279static const struct blendinfo { 280 bool src_alpha; 281 uint32_t src_blend; 282 uint32_t dst_blend; 283} gen7_blend_op[] = { 284 /* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO}, 285 /* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO}, 286 /* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE}, 287 /* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 288 /* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE}, 289 /* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 290 /* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA}, 291 /* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 292 /* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 293 /* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 294 /* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA}, 295 /* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 296 /* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE}, 297}; 298 299/** 300 * Highest-valued BLENDFACTOR used in gen7_blend_op. 301 * 302 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR, 303 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 304 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 305 */ 306#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1) 307 308#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64) 309 310#define BLEND_OFFSET(s, d) \ 311 ((d != GEN7_BLENDFACTOR_ZERO) << 15 | \ 312 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE)) 313 314#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO) 315#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO) 316 317#define SAMPLER_OFFSET(sf, se, mf, me) \ 318 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state)) 319 320#define VERTEX_2s2s 0 321 322#define COPY_SAMPLER 0 323#define COPY_VERTEX VERTEX_2s2s 324#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX) 325 326#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state)) 327#define FILL_VERTEX VERTEX_2s2s 328#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 329#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 330 331#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0) 332#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0) 333#define GEN7_READS_DST(f) (((f) >> 15) & 1) 334#define GEN7_KERNEL(f) (((f) >> 16) & 0xf) 335#define GEN7_VERTEX(f) (((f) >> 0) & 0xf) 336#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 337 338#define OUT_BATCH(v) batch_emit(sna, v) 339#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 340#define OUT_VERTEX_F(v) vertex_emit(sna, v) 341 342static inline bool too_large(int width, int height) 343{ 344 return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; 345} 346 347static uint32_t gen7_get_blend(int op, 348 bool has_component_alpha, 349 uint32_t dst_format) 350{ 351 uint32_t src, dst; 352 353 src = gen7_blend_op[op].src_blend; 354 dst = gen7_blend_op[op].dst_blend; 355 356 /* If there's no dst alpha channel, adjust the blend op so that 357 * we'll treat it always as 1. 358 */ 359 if (PICT_FORMAT_A(dst_format) == 0) { 360 if (src == GEN7_BLENDFACTOR_DST_ALPHA) 361 src = GEN7_BLENDFACTOR_ONE; 362 else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA) 363 src = GEN7_BLENDFACTOR_ZERO; 364 } 365 366 /* If the source alpha is being used, then we should only be in a 367 * case where the source blend factor is 0, and the source blend 368 * value is the mask channels multiplied by the source picture's alpha. 369 */ 370 if (has_component_alpha && gen7_blend_op[op].src_alpha) { 371 if (dst == GEN7_BLENDFACTOR_SRC_ALPHA) 372 dst = GEN7_BLENDFACTOR_SRC_COLOR; 373 else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA) 374 dst = GEN7_BLENDFACTOR_INV_SRC_COLOR; 375 } 376 377 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 378 op, dst_format, PICT_FORMAT_A(dst_format), 379 src, dst, (int)BLEND_OFFSET(src, dst))); 380 return BLEND_OFFSET(src, dst); 381} 382 383static uint32_t gen7_get_card_format(PictFormat format) 384{ 385 switch (format) { 386 default: 387 return -1; 388 case PICT_a8r8g8b8: 389 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 390 case PICT_x8r8g8b8: 391 return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; 392 case PICT_a8b8g8r8: 393 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 394 case PICT_x8b8g8r8: 395 return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; 396#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 397 case PICT_a2r10g10b10: 398 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 399 case PICT_x2r10g10b10: 400 return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; 401#endif 402 case PICT_r8g8b8: 403 return GEN7_SURFACEFORMAT_R8G8B8_UNORM; 404 case PICT_r5g6b5: 405 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 406 case PICT_a1r5g5b5: 407 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 408 case PICT_a8: 409 return GEN7_SURFACEFORMAT_A8_UNORM; 410 case PICT_a4r4g4b4: 411 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 412 } 413} 414 415static uint32_t gen7_get_dest_format(PictFormat format) 416{ 417 switch (format) { 418 default: 419 return -1; 420 case PICT_a8r8g8b8: 421 case PICT_x8r8g8b8: 422 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 423 case PICT_a8b8g8r8: 424 case PICT_x8b8g8r8: 425 return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 426#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 427 case PICT_a2r10g10b10: 428 case PICT_x2r10g10b10: 429 return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 430#endif 431 case PICT_r5g6b5: 432 return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 433 case PICT_x1r5g5b5: 434 case PICT_a1r5g5b5: 435 return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 436 case PICT_a8: 437 return GEN7_SURFACEFORMAT_A8_UNORM; 438 case PICT_a4r4g4b4: 439 case PICT_x4r4g4b4: 440 return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 441 } 442} 443 444static bool gen7_check_dst_format(PictFormat format) 445{ 446 if (gen7_get_dest_format(format) != -1) 447 return true; 448 449 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 450 return false; 451} 452 453static bool gen7_check_format(uint32_t format) 454{ 455 if (gen7_get_card_format(format) != -1) 456 return true; 457 458 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 459 return false; 460} 461 462static uint32_t gen7_filter(uint32_t filter) 463{ 464 switch (filter) { 465 default: 466 assert(0); 467 case PictFilterNearest: 468 return SAMPLER_FILTER_NEAREST; 469 case PictFilterBilinear: 470 return SAMPLER_FILTER_BILINEAR; 471 } 472} 473 474static uint32_t gen7_check_filter(PicturePtr picture) 475{ 476 switch (picture->filter) { 477 case PictFilterNearest: 478 case PictFilterBilinear: 479 return true; 480 default: 481 return false; 482 } 483} 484 485static uint32_t gen7_repeat(uint32_t repeat) 486{ 487 switch (repeat) { 488 default: 489 assert(0); 490 case RepeatNone: 491 return SAMPLER_EXTEND_NONE; 492 case RepeatNormal: 493 return SAMPLER_EXTEND_REPEAT; 494 case RepeatPad: 495 return SAMPLER_EXTEND_PAD; 496 case RepeatReflect: 497 return SAMPLER_EXTEND_REFLECT; 498 } 499} 500 501static bool gen7_check_repeat(PicturePtr picture) 502{ 503 if (!picture->repeat) 504 return true; 505 506 switch (picture->repeatType) { 507 case RepeatNone: 508 case RepeatNormal: 509 case RepeatPad: 510 case RepeatReflect: 511 return true; 512 default: 513 return false; 514 } 515} 516 517static int 518gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 519{ 520 int base; 521 522 if (has_mask) { 523 if (is_ca) { 524 if (gen7_blend_op[op].src_alpha) 525 base = GEN7_WM_KERNEL_MASKSA; 526 else 527 base = GEN7_WM_KERNEL_MASKCA; 528 } else 529 base = GEN7_WM_KERNEL_MASK; 530 } else 531 base = GEN7_WM_KERNEL_NOMASK; 532 533 return base + !is_affine; 534} 535 536static void 537gen7_emit_urb(struct sna *sna) 538{ 539 OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 540 OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); 541 542 /* num of VS entries must be divisible by 8 if size < 9 */ 543 OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); 544 OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | 545 (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | 546 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 547 548 OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); 549 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 550 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 551 552 OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); 553 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 554 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 555 556 OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); 557 OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 558 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 559} 560 561static void 562gen7_emit_state_base_address(struct sna *sna) 563{ 564 uint32_t mocs = sna->render_state.gen7.info->mocs << 8; 565 566 OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); 567 OUT_BATCH(0); /* general */ 568 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 569 sna->kgem.nbatch, 570 NULL, 571 I915_GEM_DOMAIN_INSTRUCTION << 16, 572 BASE_ADDRESS_MODIFY)); 573 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ 574 sna->kgem.nbatch, 575 sna->render_state.gen7.general_bo, 576 I915_GEM_DOMAIN_INSTRUCTION << 16, 577 mocs | BASE_ADDRESS_MODIFY)); 578 OUT_BATCH(0); /* indirect */ 579 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 580 sna->kgem.nbatch, 581 sna->render_state.gen7.general_bo, 582 I915_GEM_DOMAIN_INSTRUCTION << 16, 583 mocs | BASE_ADDRESS_MODIFY)); 584 585 /* upper bounds, disable */ 586 OUT_BATCH(0); 587 OUT_BATCH(BASE_ADDRESS_MODIFY); 588 OUT_BATCH(0); 589 OUT_BATCH(BASE_ADDRESS_MODIFY); 590} 591 592static void 593gen7_disable_vs(struct sna *sna) 594{ 595 /* For future reference: 596 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs 597 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS, 598 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, 599 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS. 600 * 601 * Here we saved by the full-flush incurred when emitting 602 * the batchbuffer. 603 */ 604 OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2)); 605 OUT_BATCH(0); /* no VS kernel */ 606 OUT_BATCH(0); 607 OUT_BATCH(0); 608 OUT_BATCH(0); 609 OUT_BATCH(0); /* pass-through */ 610 611#if 0 612 OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2)); 613 OUT_BATCH(0); 614 OUT_BATCH(0); 615 OUT_BATCH(0); 616 OUT_BATCH(0); 617 OUT_BATCH(0); 618 OUT_BATCH(0); 619 620 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 621 OUT_BATCH(0); 622 623 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 624 OUT_BATCH(0); 625#endif 626} 627 628static void 629gen7_disable_hs(struct sna *sna) 630{ 631 OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); 632 OUT_BATCH(0); /* no HS kernel */ 633 OUT_BATCH(0); 634 OUT_BATCH(0); 635 OUT_BATCH(0); 636 OUT_BATCH(0); 637 OUT_BATCH(0); /* pass-through */ 638 639#if 0 640 OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); 641 OUT_BATCH(0); 642 OUT_BATCH(0); 643 OUT_BATCH(0); 644 OUT_BATCH(0); 645 OUT_BATCH(0); 646 OUT_BATCH(0); 647 648 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 649 OUT_BATCH(0); 650 651 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 652 OUT_BATCH(0); 653#endif 654} 655 656static void 657gen7_disable_te(struct sna *sna) 658{ 659 OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); 660 OUT_BATCH(0); 661 OUT_BATCH(0); 662 OUT_BATCH(0); 663} 664 665static void 666gen7_disable_ds(struct sna *sna) 667{ 668 OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); 669 OUT_BATCH(0); 670 OUT_BATCH(0); 671 OUT_BATCH(0); 672 OUT_BATCH(0); 673 OUT_BATCH(0); 674 675#if 0 676 OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); 677 OUT_BATCH(0); 678 OUT_BATCH(0); 679 OUT_BATCH(0); 680 OUT_BATCH(0); 681 OUT_BATCH(0); 682 OUT_BATCH(0); 683 684 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 685 OUT_BATCH(0); 686 687 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 688 OUT_BATCH(0); 689#endif 690} 691 692static void 693gen7_disable_gs(struct sna *sna) 694{ 695 OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2)); 696 OUT_BATCH(0); /* no GS kernel */ 697 OUT_BATCH(0); 698 OUT_BATCH(0); 699 OUT_BATCH(0); 700 OUT_BATCH(0); 701 OUT_BATCH(0); /* pass-through */ 702 703#if 0 704 OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2)); 705 OUT_BATCH(0); 706 OUT_BATCH(0); 707 OUT_BATCH(0); 708 OUT_BATCH(0); 709 OUT_BATCH(0); 710 OUT_BATCH(0); 711 712 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 713 OUT_BATCH(0); 714 715 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 716 OUT_BATCH(0); 717#endif 718} 719 720static void 721gen7_disable_streamout(struct sna *sna) 722{ 723 OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); 724 OUT_BATCH(0); 725 OUT_BATCH(0); 726} 727 728static void 729gen7_emit_sf_invariant(struct sna *sna) 730{ 731 OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2)); 732 OUT_BATCH(0); 733 OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE); 734 OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); 735 OUT_BATCH(0); 736 OUT_BATCH(0); 737 OUT_BATCH(0); 738} 739 740static void 741gen7_emit_cc_invariant(struct sna *sna) 742{ 743#if 0 /* unused, no change */ 744 OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 745 OUT_BATCH(0); 746 747 OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); 748 OUT_BATCH(0); 749#endif 750 751 /* XXX clear to be safe */ 752 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 753 OUT_BATCH(0); 754} 755 756static void 757gen7_disable_clip(struct sna *sna) 758{ 759 OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2)); 760 OUT_BATCH(0); 761 OUT_BATCH(0); /* pass-through */ 762 OUT_BATCH(0); 763 764 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); 765 OUT_BATCH(0); 766} 767 768static void 769gen7_emit_wm_invariant(struct sna *sna) 770{ 771 OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2)); 772 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 773 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 774 OUT_BATCH(0); 775 776#if 0 777 /* XXX length bias of 7 in old spec? */ 778 OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2)); 779 OUT_BATCH(0); 780 OUT_BATCH(0); 781 OUT_BATCH(0); 782 OUT_BATCH(0); 783 OUT_BATCH(0); 784 OUT_BATCH(0); 785#endif 786} 787 788static void 789gen7_emit_null_depth_buffer(struct sna *sna) 790{ 791 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); 792 OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 793 GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 794 OUT_BATCH(0); /* disable depth, stencil and hiz */ 795 OUT_BATCH(0); 796 OUT_BATCH(0); 797 OUT_BATCH(0); 798 OUT_BATCH(0); 799 800#if 0 801 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); 802 OUT_BATCH(0); 803 OUT_BATCH(0); 804#endif 805} 806 807static void 808gen7_emit_invariant(struct sna *sna) 809{ 810 OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); 811 812 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2)); 813 OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 814 GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 815 OUT_BATCH(0); 816 OUT_BATCH(0); 817 818 OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2)); 819 OUT_BATCH(1); 820 821 gen7_emit_urb(sna); 822 823 gen7_emit_state_base_address(sna); 824 825 gen7_disable_vs(sna); 826 gen7_disable_hs(sna); 827 gen7_disable_te(sna); 828 gen7_disable_ds(sna); 829 gen7_disable_gs(sna); 830 gen7_disable_clip(sna); 831 gen7_emit_sf_invariant(sna); 832 gen7_emit_wm_invariant(sna); 833 gen7_emit_cc_invariant(sna); 834 gen7_disable_streamout(sna); 835 gen7_emit_null_depth_buffer(sna); 836 837 sna->render_state.gen7.needs_invariant = false; 838} 839 840static void 841gen7_emit_cc(struct sna *sna, uint32_t blend_offset) 842{ 843 struct gen7_render_state *render = &sna->render_state.gen7; 844 845 if (render->blend == blend_offset) 846 return; 847 848 DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); 849 850 /* XXX can have up to 8 blend states preload, selectable via 851 * Render Target Index. What other side-effects of Render Target Index? 852 */ 853 854 assert (is_aligned(render->cc_blend + blend_offset, 64)); 855 OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 856 OUT_BATCH((render->cc_blend + blend_offset) | 1); 857 858 render->blend = blend_offset; 859} 860 861static void 862gen7_emit_sampler(struct sna *sna, uint32_t state) 863{ 864 if (sna->render_state.gen7.samplers == state) 865 return; 866 867 sna->render_state.gen7.samplers = state; 868 869 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 870 871 assert (is_aligned(sna->render_state.gen7.wm_state + state, 32)); 872 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 873 OUT_BATCH(sna->render_state.gen7.wm_state + state); 874} 875 876static void 877gen7_emit_sf(struct sna *sna, bool has_mask) 878{ 879 int num_sf_outputs = has_mask ? 2 : 1; 880 881 if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs) 882 return; 883 884 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 885 __FUNCTION__, num_sf_outputs, 1, 0)); 886 887 sna->render_state.gen7.num_sf_outputs = num_sf_outputs; 888 889 OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); 890 OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT | 891 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | 892 1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); 893 OUT_BATCH(0); 894 OUT_BATCH(0); /* dw4 */ 895 OUT_BATCH(0); 896 OUT_BATCH(0); 897 OUT_BATCH(0); 898 OUT_BATCH(0); /* dw8 */ 899 OUT_BATCH(0); 900 OUT_BATCH(0); 901 OUT_BATCH(0); 902 OUT_BATCH(0); /* dw12 */ 903 OUT_BATCH(0); 904 OUT_BATCH(0); 905} 906 907static void 908gen7_emit_wm(struct sna *sna, int kernel) 909{ 910 const uint32_t *kernels; 911 912 if (sna->render_state.gen7.kernel == kernel) 913 return; 914 915 sna->render_state.gen7.kernel = kernel; 916 kernels = sna->render_state.gen7.wm_kernel[kernel]; 917 918 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 919 __FUNCTION__, 920 wm_kernels[kernel].name, 921 wm_kernels[kernel].num_surfaces, 922 kernels[0], kernels[1], kernels[2])); 923 924 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 925 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 926 OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | 927 wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 928 OUT_BATCH(0); /* scratch address */ 929 OUT_BATCH(sna->render_state.gen7.info->max_wm_threads | 930 (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) | 931 (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) | 932 (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) | 933 GEN7_PS_ATTRIBUTE_ENABLE); 934 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | 935 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | 936 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); 937 OUT_BATCH(kernels[2]); 938 OUT_BATCH(kernels[1]); 939} 940 941static bool 942gen7_emit_binding_table(struct sna *sna, uint16_t offset) 943{ 944 if (sna->render_state.gen7.surface_table == offset) 945 return false; 946 947 /* Binding table pointers */ 948 assert(is_aligned(4*offset, 32)); 949 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 950 OUT_BATCH(offset*4); 951 952 sna->render_state.gen7.surface_table = offset; 953 return true; 954} 955 956static bool 957gen7_emit_drawing_rectangle(struct sna *sna, 958 const struct sna_composite_op *op) 959{ 960 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 961 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 962 963 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 964 assert(!too_large(op->dst.width, op->dst.height)); 965 966 if (sna->render_state.gen7.drawrect_limit == limit && 967 sna->render_state.gen7.drawrect_offset == offset) 968 return true; 969 970 sna->render_state.gen7.drawrect_offset = offset; 971 sna->render_state.gen7.drawrect_limit = limit; 972 973 OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 974 OUT_BATCH(0); 975 OUT_BATCH(limit); 976 OUT_BATCH(offset); 977 return false; 978} 979 980static void 981gen7_emit_vertex_elements(struct sna *sna, 982 const struct sna_composite_op *op) 983{ 984 /* 985 * vertex data in vertex buffer 986 * position: (x, y) 987 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 988 * texture coordinate 1 if (has_mask is true): same as above 989 */ 990 struct gen7_render_state *render = &sna->render_state.gen7; 991 uint32_t src_format, dw; 992 int id = GEN7_VERTEX(op->u.gen7.flags); 993 bool has_mask; 994 995 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 996 997 if (render->ve_id == id) 998 return; 999 render->ve_id = id; 1000 1001 /* The VUE layout 1002 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1003 * dword 4-7: position (x, y, 1.0, 1.0), 1004 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1005 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1006 * 1007 * dword 4-15 are fetched from vertex buffer 1008 */ 1009 has_mask = (id >> 2) != 0; 1010 OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | 1011 ((2 * (3 + has_mask)) + 1 - 2)); 1012 1013 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1014 GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 1015 0 << GEN7_VE0_OFFSET_SHIFT); 1016 OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | 1017 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | 1018 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 1019 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); 1020 1021 /* x,y */ 1022 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1023 GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 1024 0 << GEN7_VE0_OFFSET_SHIFT); 1025 OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | 1026 GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | 1027 GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 1028 GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); 1029 1030 /* u0, v0, w0 */ 1031 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 1032 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 1033 switch (id & 3) { 1034 default: 1035 assert(0); 1036 case 0: 1037 src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED; 1038 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1039 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1040 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1041 break; 1042 case 1: 1043 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1044 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1045 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1046 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1047 break; 1048 case 2: 1049 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1050 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1051 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1052 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1053 break; 1054 case 3: 1055 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1056 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1057 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1058 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1059 break; 1060 } 1061 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1062 src_format << GEN7_VE0_FORMAT_SHIFT | 1063 4 << GEN7_VE0_OFFSET_SHIFT); 1064 OUT_BATCH(dw); 1065 1066 /* u1, v1, w1 */ 1067 if (has_mask) { 1068 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1069 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 1070 dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 1071 switch (id >> 2) { 1072 case 1: 1073 src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1074 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1075 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1076 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1077 break; 1078 default: 1079 assert(0); 1080 case 2: 1081 src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1082 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1083 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1084 dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1085 break; 1086 case 3: 1087 src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1088 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1089 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1090 dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1091 break; 1092 } 1093 OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1094 src_format << GEN7_VE0_FORMAT_SHIFT | 1095 offset << GEN7_VE0_OFFSET_SHIFT); 1096 OUT_BATCH(dw); 1097 } 1098} 1099 1100inline static void 1101gen7_emit_pipe_invalidate(struct sna *sna) 1102{ 1103 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1104 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | 1105 GEN7_PIPE_CONTROL_TC_FLUSH | 1106 GEN7_PIPE_CONTROL_CS_STALL); 1107 OUT_BATCH(0); 1108 OUT_BATCH(0); 1109 sna->render_state.gen7.pipe_controls_since_stall = 0; 1110} 1111 1112inline static void 1113gen7_emit_pipe_flush(struct sna *sna, bool need_stall) 1114{ 1115 unsigned stall; 1116 1117 stall = 0; 1118 if (need_stall) { 1119 stall = GEN7_PIPE_CONTROL_CS_STALL; 1120 sna->render_state.gen7.pipe_controls_since_stall = 0; 1121 } else 1122 sna->render_state.gen7.pipe_controls_since_stall++; 1123 1124 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1125 OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); 1126 OUT_BATCH(0); 1127 OUT_BATCH(0); 1128} 1129 1130inline static void 1131gen7_emit_pipe_stall(struct sna *sna) 1132{ 1133 OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1134 OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | 1135 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); 1136 OUT_BATCH(0); 1137 OUT_BATCH(0); 1138 sna->render_state.gen7.pipe_controls_since_stall = 0; 1139} 1140 1141static void 1142gen7_emit_state(struct sna *sna, 1143 const struct sna_composite_op *op, 1144 uint16_t wm_binding_table) 1145{ 1146 bool need_invalidate; 1147 bool need_flush; 1148 bool need_stall; 1149 1150 assert(op->dst.bo->exec); 1151 1152 need_flush = wm_binding_table & 1 || 1153 (sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags)); 1154 if (ALWAYS_FLUSH) 1155 need_flush = true; 1156 1157 wm_binding_table &= ~1; 1158 1159 need_stall = sna->render_state.gen7.surface_table != wm_binding_table; 1160 1161 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1162 if (ALWAYS_INVALIDATE) 1163 need_invalidate = true; 1164 1165 need_stall &= gen7_emit_drawing_rectangle(sna, op); 1166 if (ALWAYS_STALL) 1167 need_stall = true; 1168 if (sna->kgem.gen < 075 && 1169 sna->render_state.gen7.pipe_controls_since_stall >= 3) 1170 need_stall = true; 1171 1172 if (need_invalidate) { 1173 gen7_emit_pipe_invalidate(sna); 1174 kgem_clear_dirty(&sna->kgem); 1175 assert(op->dst.bo->exec); 1176 kgem_bo_mark_dirty(op->dst.bo); 1177 1178 need_flush = false; 1179 need_stall = false; 1180 } 1181 if (need_flush) { 1182 gen7_emit_pipe_flush(sna, need_stall); 1183 need_stall = false; 1184 } 1185 if (need_stall) 1186 gen7_emit_pipe_stall(sna); 1187 1188 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1189 gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); 1190 gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); 1191 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1192 gen7_emit_vertex_elements(sna, op); 1193 gen7_emit_binding_table(sna, wm_binding_table); 1194 1195 sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); 1196} 1197 1198static bool gen7_magic_ca_pass(struct sna *sna, 1199 const struct sna_composite_op *op) 1200{ 1201 struct gen7_render_state *state = &sna->render_state.gen7; 1202 1203 if (!op->need_magic_ca_pass) 1204 return false; 1205 1206 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1207 sna->render.vertex_start, sna->render.vertex_index)); 1208 1209 gen7_emit_pipe_stall(sna); 1210 1211 gen7_emit_cc(sna, 1212 GEN7_BLEND(gen7_get_blend(PictOpAdd, true, 1213 op->dst.format))); 1214 gen7_emit_wm(sna, 1215 gen7_choose_composite_kernel(PictOpAdd, 1216 true, true, 1217 op->is_affine)); 1218 1219 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1220 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1221 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1222 OUT_BATCH(sna->render.vertex_start); 1223 OUT_BATCH(1); /* single instance */ 1224 OUT_BATCH(0); /* start instance location */ 1225 OUT_BATCH(0); /* index buffer offset, ignored */ 1226 1227 state->last_primitive = sna->kgem.nbatch; 1228 return true; 1229} 1230 1231static void null_create(struct sna_static_stream *stream) 1232{ 1233 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1234 sna_static_stream_map(stream, 64, 64); 1235} 1236 1237static void 1238sampler_state_init(struct gen7_sampler_state *sampler_state, 1239 sampler_filter_t filter, 1240 sampler_extend_t extend) 1241{ 1242 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1243 1244 /* We use the legacy mode to get the semantics specified by 1245 * the Render extension. */ 1246 sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY; 1247 1248 switch (filter) { 1249 default: 1250 case SAMPLER_FILTER_NEAREST: 1251 sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST; 1252 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST; 1253 break; 1254 case SAMPLER_FILTER_BILINEAR: 1255 sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR; 1256 sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR; 1257 break; 1258 } 1259 1260 switch (extend) { 1261 default: 1262 case SAMPLER_EXTEND_NONE: 1263 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1264 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1265 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1266 break; 1267 case SAMPLER_EXTEND_REPEAT: 1268 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1269 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1270 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1271 break; 1272 case SAMPLER_EXTEND_PAD: 1273 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1274 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1275 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1276 break; 1277 case SAMPLER_EXTEND_REFLECT: 1278 sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1279 sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1280 sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1281 break; 1282 } 1283} 1284 1285static void 1286sampler_copy_init(struct gen7_sampler_state *ss) 1287{ 1288 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1289 ss->ss3.non_normalized_coord = 1; 1290 1291 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1292} 1293 1294static void 1295sampler_fill_init(struct gen7_sampler_state *ss) 1296{ 1297 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1298 ss->ss3.non_normalized_coord = 1; 1299 1300 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1301} 1302 1303static uint32_t 1304gen7_tiling_bits(uint32_t tiling) 1305{ 1306 switch (tiling) { 1307 default: assert(0); 1308 case I915_TILING_NONE: return 0; 1309 case I915_TILING_X: return GEN7_SURFACE_TILED; 1310 case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y; 1311 } 1312} 1313 1314/** 1315 * Sets up the common fields for a surface state buffer for the given 1316 * picture in the given surface state buffer. 1317 */ 1318static uint32_t 1319gen7_bind_bo(struct sna *sna, 1320 struct kgem_bo *bo, 1321 uint32_t width, 1322 uint32_t height, 1323 uint32_t format, 1324 bool is_dst) 1325{ 1326 uint32_t *ss; 1327 uint32_t domains; 1328 int offset; 1329 uint32_t is_scanout = is_dst && bo->scanout; 1330 1331 COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); 1332 1333 /* After the first bind, we manage the cache domains within the batch */ 1334 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1335 if (offset) { 1336 assert(offset >= sna->kgem.surface); 1337 if (is_dst) 1338 kgem_bo_mark_dirty(bo); 1339 return offset * sizeof(uint32_t); 1340 } 1341 1342 offset = sna->kgem.surface -= 1343 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1344 ss = sna->kgem.batch + offset; 1345 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1346 gen7_tiling_bits(bo->tiling) | 1347 format << GEN7_SURFACE_FORMAT_SHIFT); 1348 if (bo->tiling == I915_TILING_Y) 1349 ss[0] |= GEN7_SURFACE_VALIGN_4; 1350 if (is_dst) { 1351 ss[0] |= GEN7_SURFACE_RC_READ_WRITE; 1352 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1353 } else 1354 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1355 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1356 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1357 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1358 ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1359 ss[4] = 0; 1360 ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16; 1361 ss[6] = 0; 1362 ss[7] = 0; 1363 if (is_hsw(sna)) 1364 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1365 1366 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1367 1368 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1369 offset, bo->handle, ss[1], 1370 format, width, height, bo->pitch, bo->tiling, 1371 domains & 0xffff ? "render" : "sampler")); 1372 1373 return offset * sizeof(uint32_t); 1374} 1375 1376static void gen7_emit_vertex_buffer(struct sna *sna, 1377 const struct sna_composite_op *op) 1378{ 1379 int id = GEN7_VERTEX(op->u.gen7.flags); 1380 1381 OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1382 OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT | 1383 GEN7_VB0_VERTEXDATA | 1384 GEN7_VB0_ADDRESS_MODIFY_ENABLE | 1385 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); 1386 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1387 OUT_BATCH(0); 1388 OUT_BATCH(~0); /* max address: disabled */ 1389 OUT_BATCH(0); 1390 1391 sna->render.vb_id |= 1 << id; 1392} 1393 1394static void gen7_emit_primitive(struct sna *sna) 1395{ 1396 if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) { 1397 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1398 return; 1399 } 1400 1401 OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1402 OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1403 sna->render.vertex_offset = sna->kgem.nbatch; 1404 OUT_BATCH(0); /* vertex count, to be filled in later */ 1405 OUT_BATCH(sna->render.vertex_index); 1406 OUT_BATCH(1); /* single instance */ 1407 OUT_BATCH(0); /* start instance location */ 1408 OUT_BATCH(0); /* index buffer offset, ignored */ 1409 sna->render.vertex_start = sna->render.vertex_index; 1410 1411 sna->render_state.gen7.last_primitive = sna->kgem.nbatch; 1412} 1413 1414static bool gen7_rectangle_begin(struct sna *sna, 1415 const struct sna_composite_op *op) 1416{ 1417 int id = 1 << GEN7_VERTEX(op->u.gen7.flags); 1418 int ndwords; 1419 1420 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1421 return true; 1422 1423 ndwords = op->need_magic_ca_pass ? 60 : 6; 1424 if ((sna->render.vb_id & id) == 0) 1425 ndwords += 5; 1426 if (!kgem_check_batch(&sna->kgem, ndwords)) 1427 return false; 1428 1429 if ((sna->render.vb_id & id) == 0) 1430 gen7_emit_vertex_buffer(sna, op); 1431 1432 gen7_emit_primitive(sna); 1433 return true; 1434} 1435 1436static int gen7_get_rectangles__flush(struct sna *sna, 1437 const struct sna_composite_op *op) 1438{ 1439 /* Preventing discarding new vbo after lock contention */ 1440 if (sna_vertex_wait__locked(&sna->render)) { 1441 int rem = vertex_space(sna); 1442 if (rem > op->floats_per_rect) 1443 return rem; 1444 } 1445 1446 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1447 return 0; 1448 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1449 return 0; 1450 1451 if (sna->render.vertex_offset) { 1452 gen4_vertex_flush(sna); 1453 if (gen7_magic_ca_pass(sna, op)) { 1454 gen7_emit_pipe_stall(sna); 1455 gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1456 gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1457 } 1458 } 1459 1460 return gen4_vertex_finish(sna); 1461} 1462 1463inline static int gen7_get_rectangles(struct sna *sna, 1464 const struct sna_composite_op *op, 1465 int want, 1466 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1467{ 1468 int rem; 1469 1470 assert(want); 1471 1472start: 1473 rem = vertex_space(sna); 1474 if (unlikely(rem < op->floats_per_rect)) { 1475 DBG(("flushing vbo for %s: %d < %d\n", 1476 __FUNCTION__, rem, op->floats_per_rect)); 1477 rem = gen7_get_rectangles__flush(sna, op); 1478 if (unlikely(rem == 0)) 1479 goto flush; 1480 } 1481 1482 if (unlikely(sna->render.vertex_offset == 0)) { 1483 if (!gen7_rectangle_begin(sna, op)) 1484 goto flush; 1485 else 1486 goto start; 1487 } 1488 1489 assert(rem <= vertex_space(sna)); 1490 assert(op->floats_per_rect <= rem); 1491 if (want > 1 && want * op->floats_per_rect > rem) 1492 want = rem / op->floats_per_rect; 1493 1494 assert(want > 0); 1495 sna->render.vertex_index += 3*want; 1496 return want; 1497 1498flush: 1499 if (sna->render.vertex_offset) { 1500 gen4_vertex_flush(sna); 1501 gen7_magic_ca_pass(sna, op); 1502 } 1503 sna_vertex_wait__locked(&sna->render); 1504 _kgem_submit(&sna->kgem); 1505 emit_state(sna, op); 1506 goto start; 1507} 1508 1509inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, 1510 uint16_t *offset) 1511{ 1512 uint32_t *table; 1513 1514 sna->kgem.surface -= 1515 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1516 /* Clear all surplus entries to zero in case of prefetch */ 1517 table = memset(sna->kgem.batch + sna->kgem.surface, 1518 0, sizeof(struct gen7_surface_state)); 1519 1520 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1521 1522 *offset = sna->kgem.surface; 1523 return table; 1524} 1525 1526static void 1527gen7_get_batch(struct sna *sna, const struct sna_composite_op *op) 1528{ 1529 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1530 1531 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1532 DBG(("%s: flushing batch: %d < %d+%d\n", 1533 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1534 150, 4*8)); 1535 _kgem_submit(&sna->kgem); 1536 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1537 } 1538 1539 assert(sna->kgem.mode == KGEM_RENDER); 1540 assert(sna->kgem.ring == KGEM_RENDER); 1541 1542 if (sna->render_state.gen7.needs_invariant) 1543 gen7_emit_invariant(sna); 1544} 1545 1546static void gen7_emit_composite_state(struct sna *sna, 1547 const struct sna_composite_op *op) 1548{ 1549 uint32_t *binding_table; 1550 uint16_t offset, dirty; 1551 1552 gen7_get_batch(sna, op); 1553 1554 binding_table = gen7_composite_get_binding_table(sna, &offset); 1555 1556 dirty = kgem_bo_is_dirty(op->dst.bo); 1557 1558 binding_table[0] = 1559 gen7_bind_bo(sna, 1560 op->dst.bo, op->dst.width, op->dst.height, 1561 gen7_get_dest_format(op->dst.format), 1562 true); 1563 binding_table[1] = 1564 gen7_bind_bo(sna, 1565 op->src.bo, op->src.width, op->src.height, 1566 op->src.card_format, 1567 false); 1568 if (op->mask.bo) { 1569 binding_table[2] = 1570 gen7_bind_bo(sna, 1571 op->mask.bo, 1572 op->mask.width, 1573 op->mask.height, 1574 op->mask.card_format, 1575 false); 1576 } 1577 1578 if (sna->kgem.surface == offset && 1579 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table && 1580 (op->mask.bo == NULL || 1581 sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) { 1582 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1583 offset = sna->render_state.gen7.surface_table; 1584 } 1585 1586 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 1587 dirty = 0; 1588 1589 gen7_emit_state(sna, op, offset | dirty); 1590} 1591 1592static void 1593gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1594{ 1595 if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { 1596 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1597 sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex)); 1598 gen4_vertex_align(sna, op); 1599 sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex; 1600 } 1601} 1602 1603fastcall static void 1604gen7_render_composite_blt(struct sna *sna, 1605 const struct sna_composite_op *op, 1606 const struct sna_composite_rectangles *r) 1607{ 1608 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1609 op->prim_emit(sna, op, r); 1610} 1611 1612fastcall static void 1613gen7_render_composite_box(struct sna *sna, 1614 const struct sna_composite_op *op, 1615 const BoxRec *box) 1616{ 1617 struct sna_composite_rectangles r; 1618 1619 gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1620 1621 DBG((" %s: (%d, %d), (%d, %d)\n", 1622 __FUNCTION__, 1623 box->x1, box->y1, box->x2, box->y2)); 1624 1625 r.dst.x = box->x1; 1626 r.dst.y = box->y1; 1627 r.width = box->x2 - box->x1; 1628 r.height = box->y2 - box->y1; 1629 r.src = r.mask = r.dst; 1630 1631 op->prim_emit(sna, op, &r); 1632} 1633 1634static void 1635gen7_render_composite_boxes__blt(struct sna *sna, 1636 const struct sna_composite_op *op, 1637 const BoxRec *box, int nbox) 1638{ 1639 DBG(("composite_boxes(%d)\n", nbox)); 1640 1641 do { 1642 int nbox_this_time; 1643 1644 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1645 gen7_emit_composite_state); 1646 nbox -= nbox_this_time; 1647 1648 do { 1649 struct sna_composite_rectangles r; 1650 1651 DBG((" %s: (%d, %d), (%d, %d)\n", 1652 __FUNCTION__, 1653 box->x1, box->y1, box->x2, box->y2)); 1654 1655 r.dst.x = box->x1; 1656 r.dst.y = box->y1; 1657 r.width = box->x2 - box->x1; 1658 r.height = box->y2 - box->y1; 1659 r.src = r.mask = r.dst; 1660 1661 op->prim_emit(sna, op, &r); 1662 box++; 1663 } while (--nbox_this_time); 1664 } while (nbox); 1665} 1666 1667static void 1668gen7_render_composite_boxes(struct sna *sna, 1669 const struct sna_composite_op *op, 1670 const BoxRec *box, int nbox) 1671{ 1672 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1673 1674 do { 1675 int nbox_this_time; 1676 float *v; 1677 1678 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1679 gen7_emit_composite_state); 1680 assert(nbox_this_time); 1681 nbox -= nbox_this_time; 1682 1683 v = sna->render.vertices + sna->render.vertex_used; 1684 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1685 1686 op->emit_boxes(op, box, nbox_this_time, v); 1687 box += nbox_this_time; 1688 } while (nbox); 1689} 1690 1691static void 1692gen7_render_composite_boxes__thread(struct sna *sna, 1693 const struct sna_composite_op *op, 1694 const BoxRec *box, int nbox) 1695{ 1696 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1697 1698 sna_vertex_lock(&sna->render); 1699 do { 1700 int nbox_this_time; 1701 float *v; 1702 1703 nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1704 gen7_emit_composite_state); 1705 assert(nbox_this_time); 1706 nbox -= nbox_this_time; 1707 1708 v = sna->render.vertices + sna->render.vertex_used; 1709 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1710 1711 sna_vertex_acquire__locked(&sna->render); 1712 sna_vertex_unlock(&sna->render); 1713 1714 op->emit_boxes(op, box, nbox_this_time, v); 1715 box += nbox_this_time; 1716 1717 sna_vertex_lock(&sna->render); 1718 sna_vertex_release__locked(&sna->render); 1719 } while (nbox); 1720 sna_vertex_unlock(&sna->render); 1721} 1722 1723#ifndef MAX 1724#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1725#endif 1726 1727static uint32_t 1728gen7_composite_create_blend_state(struct sna_static_stream *stream) 1729{ 1730 char *base, *ptr; 1731 int src, dst; 1732 1733 base = sna_static_stream_map(stream, 1734 GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE, 1735 64); 1736 1737 ptr = base; 1738 for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) { 1739 for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) { 1740 struct gen7_blend_state *blend = 1741 (struct gen7_blend_state *)ptr; 1742 1743 blend->blend0.dest_blend_factor = dst; 1744 blend->blend0.source_blend_factor = src; 1745 blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD; 1746 blend->blend0.blend_enable = 1747 !(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE); 1748 1749 blend->blend1.post_blend_clamp_enable = 1; 1750 blend->blend1.pre_blend_clamp_enable = 1; 1751 1752 ptr += GEN7_BLEND_STATE_PADDED_SIZE; 1753 } 1754 } 1755 1756 return sna_static_stream_offsetof(stream, base); 1757} 1758 1759static uint32_t gen7_bind_video_source(struct sna *sna, 1760 struct kgem_bo *bo, 1761 uint32_t offset, 1762 int width, 1763 int height, 1764 int pitch, 1765 uint32_t format) 1766{ 1767 uint32_t *ss, bind; 1768 1769 bind = sna->kgem.surface -= 1770 sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1771 1772 assert(bo->tiling == I915_TILING_NONE); 1773 1774 ss = sna->kgem.batch + bind; 1775 ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1776 format << GEN7_SURFACE_FORMAT_SHIFT); 1777 ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, 1778 I915_GEM_DOMAIN_SAMPLER << 16, 1779 offset); 1780 ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1781 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1782 ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1783 ss[4] = 0; 1784 ss[5] = 0; 1785 ss[6] = 0; 1786 ss[7] = 0; 1787 if (is_hsw(sna)) 1788 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1789 1790 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", 1791 bind, bo->handle, ss[1], 1792 format, width, height, pitch, offset)); 1793 1794 return bind * sizeof(uint32_t); 1795} 1796 1797static void gen7_emit_video_state(struct sna *sna, 1798 const struct sna_composite_op *op) 1799{ 1800 struct sna_video_frame *frame = op->priv; 1801 uint32_t src_surf_format[6]; 1802 uint32_t src_surf_base[6]; 1803 int src_width[6]; 1804 int src_height[6]; 1805 int src_pitch[6]; 1806 uint32_t *binding_table; 1807 uint16_t offset, dirty; 1808 int n_src, n; 1809 1810 gen7_get_batch(sna, op); 1811 1812 src_surf_base[0] = 0; 1813 src_surf_base[1] = 0; 1814 src_surf_base[2] = frame->VBufOffset; 1815 src_surf_base[3] = frame->VBufOffset; 1816 src_surf_base[4] = frame->UBufOffset; 1817 src_surf_base[5] = frame->UBufOffset; 1818 1819 if (is_planar_fourcc(frame->id)) { 1820 for (n = 0; n < 2; n++) { 1821 src_surf_format[n] = GEN7_SURFACEFORMAT_R8_UNORM; 1822 src_width[n] = frame->width; 1823 src_height[n] = frame->height; 1824 src_pitch[n] = frame->pitch[1]; 1825 } 1826 for (; n < 6; n++) { 1827 if (is_nv12_fourcc(frame->id)) 1828 src_surf_format[n] = GEN7_SURFACEFORMAT_R8G8_UNORM; 1829 else 1830 src_surf_format[n] = GEN7_SURFACEFORMAT_R8_UNORM; 1831 src_width[n] = frame->width / 2; 1832 src_height[n] = frame->height / 2; 1833 src_pitch[n] = frame->pitch[0]; 1834 } 1835 n_src = 6; 1836 } else { 1837 if (frame->id == FOURCC_RGB888) 1838 src_surf_format[0] = GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; 1839 else if (frame->id == FOURCC_UYVY) 1840 src_surf_format[0] = GEN7_SURFACEFORMAT_YCRCB_SWAPY; 1841 else 1842 src_surf_format[0] = GEN7_SURFACEFORMAT_YCRCB_NORMAL; 1843 1844 src_width[0] = frame->width; 1845 src_height[0] = frame->height; 1846 src_pitch[0] = frame->pitch[0]; 1847 n_src = 1; 1848 } 1849 1850 binding_table = gen7_composite_get_binding_table(sna, &offset); 1851 1852 dirty = kgem_bo_is_dirty(op->dst.bo); 1853 1854 binding_table[0] = 1855 gen7_bind_bo(sna, 1856 op->dst.bo, op->dst.width, op->dst.height, 1857 gen7_get_dest_format(op->dst.format), 1858 true); 1859 for (n = 0; n < n_src; n++) { 1860 binding_table[1+n] = 1861 gen7_bind_video_source(sna, 1862 frame->bo, 1863 src_surf_base[n], 1864 src_width[n], 1865 src_height[n], 1866 src_pitch[n], 1867 src_surf_format[n]); 1868 } 1869 1870 gen7_emit_state(sna, op, offset | dirty); 1871} 1872 1873static unsigned select_video_kernel(const struct sna_video *video, 1874 const struct sna_video_frame *frame) 1875{ 1876 switch (frame->id) { 1877 case FOURCC_YV12: 1878 case FOURCC_I420: 1879 case FOURCC_XVMC: 1880 return video->colorspace ? 1881 GEN7_WM_KERNEL_VIDEO_PLANAR_BT709 : 1882 GEN7_WM_KERNEL_VIDEO_PLANAR_BT601; 1883 1884 case FOURCC_NV12: 1885 return video->colorspace ? 1886 GEN7_WM_KERNEL_VIDEO_NV12_BT709 : 1887 GEN7_WM_KERNEL_VIDEO_NV12_BT601; 1888 1889 case FOURCC_RGB888: 1890 case FOURCC_RGB565: 1891 return GEN7_WM_KERNEL_VIDEO_RGB; 1892 1893 default: 1894 return video->colorspace ? 1895 GEN7_WM_KERNEL_VIDEO_PACKED_BT709 : 1896 GEN7_WM_KERNEL_VIDEO_PACKED_BT601; 1897 } 1898} 1899 1900static bool 1901gen7_render_video(struct sna *sna, 1902 struct sna_video *video, 1903 struct sna_video_frame *frame, 1904 RegionPtr dstRegion, 1905 PixmapPtr pixmap) 1906{ 1907 struct sna_composite_op tmp; 1908 struct sna_pixmap *priv = sna_pixmap(pixmap); 1909 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1910 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1911 int src_width = frame->src.x2 - frame->src.x1; 1912 int src_height = frame->src.y2 - frame->src.y1; 1913 float src_offset_x, src_offset_y; 1914 float src_scale_x, src_scale_y; 1915 unsigned filter; 1916 const BoxRec *box; 1917 int nbox; 1918 1919 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1920 __FUNCTION__, 1921 src_width, src_height, dst_width, dst_height, 1922 region_num_rects(dstRegion), 1923 REGION_EXTENTS(NULL, dstRegion)->x1, 1924 REGION_EXTENTS(NULL, dstRegion)->y1, 1925 REGION_EXTENTS(NULL, dstRegion)->x2, 1926 REGION_EXTENTS(NULL, dstRegion)->y2)); 1927 1928 assert(priv->gpu_bo); 1929 memset(&tmp, 0, sizeof(tmp)); 1930 1931 tmp.dst.pixmap = pixmap; 1932 tmp.dst.width = pixmap->drawable.width; 1933 tmp.dst.height = pixmap->drawable.height; 1934 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1935 tmp.dst.bo = priv->gpu_bo; 1936 1937 tmp.src.bo = frame->bo; 1938 tmp.mask.bo = NULL; 1939 1940 tmp.floats_per_vertex = 3; 1941 tmp.floats_per_rect = 9; 1942 1943 if (src_width == dst_width && src_height == dst_height) 1944 filter = SAMPLER_FILTER_NEAREST; 1945 else 1946 filter = SAMPLER_FILTER_BILINEAR; 1947 1948 tmp.u.gen7.flags = 1949 GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1950 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1951 NO_BLEND, 1952 select_video_kernel(video, frame), 1953 2); 1954 tmp.priv = frame; 1955 1956 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1957 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1958 kgem_submit(&sna->kgem); 1959 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1960 return false; 1961 1962 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1963 } 1964 1965 gen7_align_vertex(sna, &tmp); 1966 gen7_emit_video_state(sna, &tmp); 1967 1968 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 1969 __FUNCTION__, 1970 frame->src.x1, frame->src.y1, 1971 src_width, src_height, 1972 dst_width, dst_height, 1973 frame->width, frame->height)); 1974 1975 src_scale_x = (float)src_width / dst_width / frame->width; 1976 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1977 1978 src_scale_y = (float)src_height / dst_height / frame->height; 1979 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1980 1981 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 1982 __FUNCTION__, 1983 src_scale_x, src_scale_y, 1984 src_offset_x, src_offset_y)); 1985 1986 box = region_rects(dstRegion); 1987 nbox = region_num_rects(dstRegion); 1988 while (nbox--) { 1989 DBG(("%s: dst=(%d, %d), (%d, %d); src=(%f, %f), (%f, %f)\n", 1990 __FUNCTION__, 1991 box->x1, box->y1, 1992 box->x2, box->y2, 1993 box->x1 * src_scale_x + src_offset_x, 1994 box->y1 * src_scale_y + src_offset_y, 1995 box->x2 * src_scale_x + src_offset_x, 1996 box->y2 * src_scale_y + src_offset_y)); 1997 1998 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); 1999 2000 OUT_VERTEX(box->x2, box->y2); 2001 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 2002 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 2003 2004 OUT_VERTEX(box->x1, box->y2); 2005 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 2006 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 2007 2008 OUT_VERTEX(box->x1, box->y1); 2009 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 2010 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 2011 2012 box++; 2013 } 2014 gen4_vertex_flush(sna); 2015 2016 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 2017 sna_damage_add(&priv->gpu_damage, dstRegion); 2018 2019 return true; 2020} 2021 2022static int 2023gen7_composite_picture(struct sna *sna, 2024 PicturePtr picture, 2025 struct sna_composite_channel *channel, 2026 int x, int y, 2027 int w, int h, 2028 int dst_x, int dst_y, 2029 bool precise) 2030{ 2031 PixmapPtr pixmap; 2032 uint32_t color; 2033 int16_t dx, dy; 2034 2035 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 2036 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 2037 2038 channel->is_solid = false; 2039 channel->card_format = -1; 2040 2041 if (sna_picture_is_solid(picture, &color)) 2042 return gen4_channel_init_solid(sna, channel, color); 2043 2044 if (picture->pDrawable == NULL) { 2045 int ret; 2046 2047 if (picture->pSourcePict->type == SourcePictTypeLinear) 2048 return gen4_channel_init_linear(sna, picture, channel, 2049 x, y, 2050 w, h, 2051 dst_x, dst_y); 2052 2053 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 2054 ret = -1; 2055 if (!precise) 2056 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 2057 x, y, w, h, dst_x, dst_y); 2058 if (ret == -1) 2059 ret = sna_render_picture_fixup(sna, picture, channel, 2060 x, y, w, h, dst_x, dst_y); 2061 return ret; 2062 } 2063 2064 if (picture->alphaMap) { 2065 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 2066 return sna_render_picture_fixup(sna, picture, channel, 2067 x, y, w, h, dst_x, dst_y); 2068 } 2069 2070 if (!gen7_check_repeat(picture)) 2071 return sna_render_picture_fixup(sna, picture, channel, 2072 x, y, w, h, dst_x, dst_y); 2073 2074 if (!gen7_check_filter(picture)) 2075 return sna_render_picture_fixup(sna, picture, channel, 2076 x, y, w, h, dst_x, dst_y); 2077 2078 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2079 channel->filter = picture->filter; 2080 2081 assert(picture->pDrawable); 2082 pixmap = get_drawable_pixmap(picture->pDrawable); 2083 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2084 2085 x += dx + picture->pDrawable->x; 2086 y += dy + picture->pDrawable->y; 2087 2088 channel->is_affine = sna_transform_is_affine(picture->transform); 2089 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2090 DBG(("%s: integer translation (%d, %d), removing\n", 2091 __FUNCTION__, dx, dy)); 2092 x += dx; 2093 y += dy; 2094 channel->transform = NULL; 2095 channel->filter = PictFilterNearest; 2096 2097 if (channel->repeat || 2098 (x >= 0 && 2099 y >= 0 && 2100 x + w <= pixmap->drawable.width && 2101 y + h <= pixmap->drawable.height)) { 2102 struct sna_pixmap *priv = sna_pixmap(pixmap); 2103 if (priv && priv->clear) { 2104 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2105 return gen4_channel_init_solid(sna, channel, 2106 solid_color(picture->format, priv->clear_color)); 2107 } 2108 } 2109 } else 2110 channel->transform = picture->transform; 2111 2112 channel->pict_format = picture->format; 2113 channel->card_format = gen7_get_card_format(picture->format); 2114 if (channel->card_format == (unsigned)-1) 2115 return sna_render_picture_convert(sna, picture, channel, pixmap, 2116 x, y, w, h, dst_x, dst_y, 2117 false); 2118 2119 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2120 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2121 pixmap->drawable.width, pixmap->drawable.height)); 2122 return sna_render_picture_extract(sna, picture, channel, 2123 x, y, w, h, dst_x, dst_y); 2124 } 2125 2126 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 2127 __FUNCTION__, 2128 channel->repeat, channel->filter, 2129 channel->transform != NULL, channel->is_affine, 2130 channel->pict_format)); 2131 if (channel->transform) { 2132 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n", 2133 __FUNCTION__, 2134 channel->transform->matrix[0][0] / 65536., 2135 channel->transform->matrix[0][1] / 65536., 2136 channel->transform->matrix[0][2] / 65536., 2137 channel->transform->matrix[1][0] / 65536., 2138 channel->transform->matrix[1][1] / 65536., 2139 channel->transform->matrix[1][2] / 65536., 2140 channel->transform->matrix[2][0] / 65536., 2141 channel->transform->matrix[2][1] / 65536., 2142 channel->transform->matrix[2][2] / 65536.)); 2143 } 2144 2145 return sna_render_pixmap_bo(sna, channel, pixmap, 2146 x, y, w, h, dst_x, dst_y); 2147} 2148 2149inline static void gen7_composite_channel_convert(struct sna_composite_channel *channel) 2150{ 2151 channel->repeat = gen7_repeat(channel->repeat); 2152 channel->filter = gen7_filter(channel->filter); 2153 if (channel->card_format == (unsigned)-1) 2154 channel->card_format = gen7_get_card_format(channel->pict_format); 2155 assert(channel->card_format != (unsigned)-1); 2156} 2157 2158static void gen7_render_composite_done(struct sna *sna, 2159 const struct sna_composite_op *op) 2160{ 2161 if (sna->render.vertex_offset) { 2162 gen4_vertex_flush(sna); 2163 gen7_magic_ca_pass(sna, op); 2164 } 2165 2166 if (op->mask.bo) 2167 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2168 if (op->src.bo) 2169 kgem_bo_destroy(&sna->kgem, op->src.bo); 2170 2171 sna_render_composite_redirect_done(sna, op); 2172} 2173 2174inline static bool 2175gen7_composite_set_target(struct sna *sna, 2176 struct sna_composite_op *op, 2177 PicturePtr dst, 2178 int x, int y, int w, int h, 2179 bool partial) 2180{ 2181 BoxRec box; 2182 unsigned int hint; 2183 2184 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2185 2186 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2187 op->dst.format = dst->format; 2188 op->dst.width = op->dst.pixmap->drawable.width; 2189 op->dst.height = op->dst.pixmap->drawable.height; 2190 2191 if (w | h) { 2192 assert(w && h); 2193 box.x1 = x; 2194 box.y1 = y; 2195 box.x2 = x + w; 2196 box.y2 = y + h; 2197 } else 2198 sna_render_picture_extents(dst, &box); 2199 2200 hint = PREFER_GPU | RENDER_GPU; 2201 if (!need_tiling(sna, op->dst.width, op->dst.height)) 2202 hint |= FORCE_GPU; 2203 if (!partial) { 2204 hint |= IGNORE_DAMAGE; 2205 if (w == op->dst.width && h == op->dst.height) 2206 hint |= REPLACES; 2207 } 2208 2209 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2210 if (op->dst.bo == NULL) 2211 return false; 2212 2213 if (hint & REPLACES) { 2214 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2215 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2216 } 2217 2218 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2219 &op->dst.x, &op->dst.y); 2220 2221 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2222 __FUNCTION__, 2223 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2224 op->dst.width, op->dst.height, 2225 op->dst.bo->pitch, 2226 op->dst.x, op->dst.y, 2227 op->damage ? *op->damage : (void *)-1)); 2228 2229 assert(op->dst.bo->proxy == NULL); 2230 2231 if (too_large(op->dst.width, op->dst.height) && 2232 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2233 return false; 2234 2235 return true; 2236} 2237 2238static bool 2239try_blt(struct sna *sna, 2240 uint8_t op, 2241 PicturePtr src, 2242 PicturePtr mask, 2243 PicturePtr dst, 2244 int16_t src_x, int16_t src_y, 2245 int16_t msk_x, int16_t msk_y, 2246 int16_t dst_x, int16_t dst_y, 2247 int16_t width, int16_t height, 2248 unsigned flags, 2249 struct sna_composite_op *tmp) 2250{ 2251 struct kgem_bo *bo; 2252 2253 if (sna->kgem.mode == KGEM_BLT) { 2254 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2255 goto execute; 2256 } 2257 2258 if (too_large(width, height)) { 2259 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2260 __FUNCTION__, width, height)); 2261 goto execute; 2262 } 2263 2264 bo = __sna_drawable_peek_bo(dst->pDrawable); 2265 if (bo == NULL) 2266 goto execute; 2267 2268 if (untiled_tlb_miss(bo)) 2269 goto execute; 2270 2271 if (bo->rq) { 2272 if (RQ_IS_BLT(bo->rq)) 2273 goto execute; 2274 2275 return false; 2276 } 2277 2278 if (bo->tiling == I915_TILING_Y) 2279 goto upload; 2280 2281 if (src->pDrawable == dst->pDrawable && 2282 (sna->render_state.gt < 3 || width*height < 1024) && 2283 can_switch_to_blt(sna, bo, 0)) 2284 goto execute; 2285 2286 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2287 goto execute; 2288 2289 if (src->pDrawable) { 2290 struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); 2291 if (s == NULL) 2292 goto upload; 2293 2294 if (prefer_blt_bo(sna, s, bo)) 2295 goto execute; 2296 } 2297 2298 if (sna->kgem.ring == KGEM_BLT) { 2299 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2300 goto execute; 2301 } 2302 2303upload: 2304 flags |= COMPOSITE_UPLOAD; 2305execute: 2306 return sna_blt_composite(sna, op, 2307 src, dst, 2308 src_x, src_y, 2309 dst_x, dst_y, 2310 width, height, 2311 flags, tmp); 2312} 2313 2314static bool 2315check_gradient(PicturePtr picture, bool precise) 2316{ 2317 if (picture->pDrawable) 2318 return false; 2319 2320 switch (picture->pSourcePict->type) { 2321 case SourcePictTypeSolidFill: 2322 case SourcePictTypeLinear: 2323 return false; 2324 default: 2325 return precise; 2326 } 2327} 2328 2329static bool 2330has_alphamap(PicturePtr p) 2331{ 2332 return p->alphaMap != NULL; 2333} 2334 2335static bool 2336need_upload(PicturePtr p) 2337{ 2338 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2339} 2340 2341static bool 2342source_is_busy(PixmapPtr pixmap) 2343{ 2344 struct sna_pixmap *priv = sna_pixmap(pixmap); 2345 if (priv == NULL || priv->clear) 2346 return false; 2347 2348 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2349 return true; 2350 2351 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2352 return true; 2353 2354 return priv->gpu_damage && !priv->cpu_damage; 2355} 2356 2357static bool 2358source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2359{ 2360 if (sna_picture_is_solid(p, NULL)) 2361 return false; 2362 2363 if (p->pSourcePict) 2364 return check_gradient(p, precise); 2365 2366 if (!gen7_check_repeat(p) || !gen7_check_format(p->format)) 2367 return true; 2368 2369 if (pixmap && source_is_busy(pixmap)) 2370 return false; 2371 2372 return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p); 2373} 2374 2375static bool 2376gen7_composite_fallback(struct sna *sna, 2377 PicturePtr src, 2378 PicturePtr mask, 2379 PicturePtr dst) 2380{ 2381 PixmapPtr src_pixmap; 2382 PixmapPtr mask_pixmap; 2383 PixmapPtr dst_pixmap; 2384 bool src_fallback, mask_fallback; 2385 2386 if (!gen7_check_dst_format(dst->format)) { 2387 DBG(("%s: unknown destination format: %d\n", 2388 __FUNCTION__, dst->format)); 2389 return true; 2390 } 2391 2392 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2393 2394 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2395 src_fallback = source_fallback(src, src_pixmap, 2396 dst->polyMode == PolyModePrecise); 2397 2398 if (mask) { 2399 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2400 mask_fallback = source_fallback(mask, mask_pixmap, 2401 dst->polyMode == PolyModePrecise); 2402 } else { 2403 mask_pixmap = NULL; 2404 mask_fallback = false; 2405 } 2406 2407 /* If we are using the destination as a source and need to 2408 * readback in order to upload the source, do it all 2409 * on the cpu. 2410 */ 2411 if (src_pixmap == dst_pixmap && src_fallback) { 2412 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2413 return true; 2414 } 2415 if (mask_pixmap == dst_pixmap && mask_fallback) { 2416 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2417 return true; 2418 } 2419 2420 /* If anything is on the GPU, push everything out to the GPU */ 2421 if (dst_use_gpu(dst_pixmap)) { 2422 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2423 __FUNCTION__)); 2424 return false; 2425 } 2426 2427 if (src_pixmap && !src_fallback) { 2428 DBG(("%s: src is already on the GPU, try to use GPU\n", 2429 __FUNCTION__)); 2430 return false; 2431 } 2432 if (mask_pixmap && !mask_fallback) { 2433 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2434 __FUNCTION__)); 2435 return false; 2436 } 2437 2438 /* However if the dst is not on the GPU and we need to 2439 * render one of the sources using the CPU, we may 2440 * as well do the entire operation in place onthe CPU. 2441 */ 2442 if (src_fallback) { 2443 DBG(("%s: dst is on the CPU and src will fallback\n", 2444 __FUNCTION__)); 2445 return true; 2446 } 2447 2448 if (mask && mask_fallback) { 2449 DBG(("%s: dst is on the CPU and mask will fallback\n", 2450 __FUNCTION__)); 2451 return true; 2452 } 2453 2454 if (too_large(dst_pixmap->drawable.width, 2455 dst_pixmap->drawable.height) && 2456 dst_is_cpu(dst_pixmap)) { 2457 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2458 return true; 2459 } 2460 2461 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2462 __FUNCTION__)); 2463 return dst_use_cpu(dst_pixmap); 2464} 2465 2466static int 2467reuse_source(struct sna *sna, 2468 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2469 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2470{ 2471 uint32_t color; 2472 2473 if (src_x != msk_x || src_y != msk_y) 2474 return false; 2475 2476 if (src == mask) { 2477 DBG(("%s: mask is source\n", __FUNCTION__)); 2478 *mc = *sc; 2479 mc->bo = kgem_bo_reference(mc->bo); 2480 return true; 2481 } 2482 2483 if (sna_picture_is_solid(mask, &color)) 2484 return gen4_channel_init_solid(sna, mc, color); 2485 2486 if (sc->is_solid) 2487 return false; 2488 2489 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2490 return false; 2491 2492 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2493 2494 if (!sna_transform_equal(src->transform, mask->transform)) 2495 return false; 2496 2497 if (!sna_picture_alphamap_equal(src, mask)) 2498 return false; 2499 2500 if (!gen7_check_repeat(mask)) 2501 return false; 2502 2503 if (!gen7_check_filter(mask)) 2504 return false; 2505 2506 if (!gen7_check_format(mask->format)) 2507 return false; 2508 2509 DBG(("%s: reusing source channel for mask with a twist\n", 2510 __FUNCTION__)); 2511 2512 *mc = *sc; 2513 mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2514 mc->filter = gen7_filter(mask->filter); 2515 mc->pict_format = mask->format; 2516 mc->card_format = gen7_get_card_format(mask->format); 2517 mc->bo = kgem_bo_reference(mc->bo); 2518 return true; 2519} 2520 2521static bool 2522gen7_render_composite(struct sna *sna, 2523 uint8_t op, 2524 PicturePtr src, 2525 PicturePtr mask, 2526 PicturePtr dst, 2527 int16_t src_x, int16_t src_y, 2528 int16_t msk_x, int16_t msk_y, 2529 int16_t dst_x, int16_t dst_y, 2530 int16_t width, int16_t height, 2531 unsigned flags, 2532 struct sna_composite_op *tmp) 2533{ 2534 if (op >= ARRAY_SIZE(gen7_blend_op)) 2535 return false; 2536 2537 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2538 width, height, sna->kgem.mode, sna->kgem.ring)); 2539 2540 if (mask == NULL && 2541 try_blt(sna, op, 2542 src, mask, dst, 2543 src_x, src_y, 2544 msk_x, msk_y, 2545 dst_x, dst_y, 2546 width, height, 2547 flags, tmp)) 2548 return true; 2549 2550 if (gen7_composite_fallback(sna, src, mask, dst)) 2551 goto fallback; 2552 2553 if (need_tiling(sna, width, height)) 2554 return sna_tiling_composite(op, src, mask, dst, 2555 src_x, src_y, 2556 msk_x, msk_y, 2557 dst_x, dst_y, 2558 width, height, 2559 tmp); 2560 2561 if (op == PictOpClear && src == sna->clear) 2562 op = PictOpSrc; 2563 tmp->op = op; 2564 if (!gen7_composite_set_target(sna, tmp, dst, 2565 dst_x, dst_y, width, height, 2566 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2567 goto fallback; 2568 2569 switch (gen7_composite_picture(sna, src, &tmp->src, 2570 src_x, src_y, 2571 width, height, 2572 dst_x, dst_y, 2573 dst->polyMode == PolyModePrecise)) { 2574 case -1: 2575 goto cleanup_dst; 2576 case 0: 2577 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2578 goto cleanup_dst; 2579 /* fall through to fixup */ 2580 case 1: 2581 /* Did we just switch rings to prepare the source? */ 2582 if (mask == NULL && 2583 prefer_blt_composite(sna, tmp) && 2584 sna_blt_composite__convert(sna, 2585 dst_x, dst_y, width, height, 2586 tmp)) 2587 return true; 2588 2589 gen7_composite_channel_convert(&tmp->src); 2590 break; 2591 } 2592 2593 tmp->is_affine = tmp->src.is_affine; 2594 tmp->has_component_alpha = false; 2595 tmp->need_magic_ca_pass = false; 2596 2597 tmp->mask.bo = NULL; 2598 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2599 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2600 2601 if (mask) { 2602 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2603 tmp->has_component_alpha = true; 2604 2605 /* Check if it's component alpha that relies on a source alpha and on 2606 * the source value. We can only get one of those into the single 2607 * source value that we get to blend with. 2608 */ 2609 if (gen7_blend_op[op].src_alpha && 2610 (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) { 2611 if (op != PictOpOver) 2612 goto cleanup_src; 2613 2614 tmp->need_magic_ca_pass = true; 2615 tmp->op = PictOpOutReverse; 2616 } 2617 } 2618 2619 if (!reuse_source(sna, 2620 src, &tmp->src, src_x, src_y, 2621 mask, &tmp->mask, msk_x, msk_y)) { 2622 switch (gen7_composite_picture(sna, mask, &tmp->mask, 2623 msk_x, msk_y, 2624 width, height, 2625 dst_x, dst_y, 2626 dst->polyMode == PolyModePrecise)) { 2627 case -1: 2628 goto cleanup_src; 2629 case 0: 2630 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2631 goto cleanup_src; 2632 /* fall through to fixup */ 2633 case 1: 2634 gen7_composite_channel_convert(&tmp->mask); 2635 break; 2636 } 2637 } 2638 2639 tmp->is_affine &= tmp->mask.is_affine; 2640 } 2641 2642 tmp->u.gen7.flags = 2643 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2644 tmp->src.repeat, 2645 tmp->mask.filter, 2646 tmp->mask.repeat), 2647 gen7_get_blend(tmp->op, 2648 tmp->has_component_alpha, 2649 tmp->dst.format), 2650 gen7_choose_composite_kernel(tmp->op, 2651 tmp->mask.bo != NULL, 2652 tmp->has_component_alpha, 2653 tmp->is_affine), 2654 gen4_choose_composite_emitter(sna, tmp)); 2655 2656 tmp->blt = gen7_render_composite_blt; 2657 tmp->box = gen7_render_composite_box; 2658 tmp->boxes = gen7_render_composite_boxes__blt; 2659 if (tmp->emit_boxes){ 2660 tmp->boxes = gen7_render_composite_boxes; 2661 tmp->thread_boxes = gen7_render_composite_boxes__thread; 2662 } 2663 tmp->done = gen7_render_composite_done; 2664 2665 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2666 if (!kgem_check_bo(&sna->kgem, 2667 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2668 NULL)) { 2669 kgem_submit(&sna->kgem); 2670 if (!kgem_check_bo(&sna->kgem, 2671 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2672 NULL)) 2673 goto cleanup_mask; 2674 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2675 } 2676 2677 gen7_align_vertex(sna, tmp); 2678 gen7_emit_composite_state(sna, tmp); 2679 return true; 2680 2681cleanup_mask: 2682 if (tmp->mask.bo) { 2683 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2684 tmp->mask.bo = NULL; 2685 } 2686cleanup_src: 2687 if (tmp->src.bo) { 2688 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2689 tmp->src.bo = NULL; 2690 } 2691cleanup_dst: 2692 if (tmp->redirect.real_bo) { 2693 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2694 tmp->redirect.real_bo = NULL; 2695 } 2696fallback: 2697 return (mask == NULL && 2698 sna_blt_composite(sna, op, 2699 src, dst, 2700 src_x, src_y, 2701 dst_x, dst_y, 2702 width, height, 2703 flags | COMPOSITE_FALLBACK, tmp)); 2704} 2705 2706#if !NO_COMPOSITE_SPANS 2707fastcall static void 2708gen7_render_composite_spans_box(struct sna *sna, 2709 const struct sna_composite_spans_op *op, 2710 const BoxRec *box, float opacity) 2711{ 2712 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2713 __FUNCTION__, 2714 op->base.src.offset[0], op->base.src.offset[1], 2715 opacity, 2716 op->base.dst.x, op->base.dst.y, 2717 box->x1, box->y1, 2718 box->x2 - box->x1, 2719 box->y2 - box->y1)); 2720 2721 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state); 2722 op->prim_emit(sna, op, box, opacity); 2723} 2724 2725static void 2726gen7_render_composite_spans_boxes(struct sna *sna, 2727 const struct sna_composite_spans_op *op, 2728 const BoxRec *box, int nbox, 2729 float opacity) 2730{ 2731 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2732 __FUNCTION__, nbox, 2733 op->base.src.offset[0], op->base.src.offset[1], 2734 opacity, 2735 op->base.dst.x, op->base.dst.y)); 2736 2737 do { 2738 int nbox_this_time; 2739 2740 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2741 gen7_emit_composite_state); 2742 nbox -= nbox_this_time; 2743 2744 do { 2745 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2746 box->x1, box->y1, 2747 box->x2 - box->x1, 2748 box->y2 - box->y1)); 2749 2750 op->prim_emit(sna, op, box++, opacity); 2751 } while (--nbox_this_time); 2752 } while (nbox); 2753} 2754 2755fastcall static void 2756gen7_render_composite_spans_boxes__thread(struct sna *sna, 2757 const struct sna_composite_spans_op *op, 2758 const struct sna_opacity_box *box, 2759 int nbox) 2760{ 2761 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2762 __FUNCTION__, nbox, 2763 op->base.src.offset[0], op->base.src.offset[1], 2764 op->base.dst.x, op->base.dst.y)); 2765 2766 sna_vertex_lock(&sna->render); 2767 do { 2768 int nbox_this_time; 2769 float *v; 2770 2771 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2772 gen7_emit_composite_state); 2773 assert(nbox_this_time); 2774 nbox -= nbox_this_time; 2775 2776 v = sna->render.vertices + sna->render.vertex_used; 2777 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2778 2779 sna_vertex_acquire__locked(&sna->render); 2780 sna_vertex_unlock(&sna->render); 2781 2782 op->emit_boxes(op, box, nbox_this_time, v); 2783 box += nbox_this_time; 2784 2785 sna_vertex_lock(&sna->render); 2786 sna_vertex_release__locked(&sna->render); 2787 } while (nbox); 2788 sna_vertex_unlock(&sna->render); 2789} 2790 2791fastcall static void 2792gen7_render_composite_spans_done(struct sna *sna, 2793 const struct sna_composite_spans_op *op) 2794{ 2795 if (sna->render.vertex_offset) 2796 gen4_vertex_flush(sna); 2797 2798 DBG(("%s()\n", __FUNCTION__)); 2799 2800 if (op->base.src.bo) 2801 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2802 2803 sna_render_composite_redirect_done(sna, &op->base); 2804} 2805 2806static bool 2807gen7_check_composite_spans(struct sna *sna, 2808 uint8_t op, PicturePtr src, PicturePtr dst, 2809 int16_t width, int16_t height, unsigned flags) 2810{ 2811 if (op >= ARRAY_SIZE(gen7_blend_op)) 2812 return false; 2813 2814 if (gen7_composite_fallback(sna, src, NULL, dst)) 2815 return false; 2816 2817 if (need_tiling(sna, width, height) && 2818 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2819 DBG(("%s: fallback, tiled operation not on GPU\n", 2820 __FUNCTION__)); 2821 return false; 2822 } 2823 2824 return true; 2825} 2826 2827static bool 2828gen7_render_composite_spans(struct sna *sna, 2829 uint8_t op, 2830 PicturePtr src, 2831 PicturePtr dst, 2832 int16_t src_x, int16_t src_y, 2833 int16_t dst_x, int16_t dst_y, 2834 int16_t width, int16_t height, 2835 unsigned flags, 2836 struct sna_composite_spans_op *tmp) 2837{ 2838 DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__, 2839 width, height, flags, sna->kgem.mode, sna->kgem.ring)); 2840 2841 assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags)); 2842 2843 if (need_tiling(sna, width, height)) { 2844 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2845 __FUNCTION__, width, height)); 2846 return sna_tiling_composite_spans(op, src, dst, 2847 src_x, src_y, dst_x, dst_y, 2848 width, height, flags, tmp); 2849 } 2850 2851 tmp->base.op = op; 2852 if (!gen7_composite_set_target(sna, &tmp->base, dst, 2853 dst_x, dst_y, width, height, true)) 2854 return false; 2855 2856 switch (gen7_composite_picture(sna, src, &tmp->base.src, 2857 src_x, src_y, 2858 width, height, 2859 dst_x, dst_y, 2860 dst->polyMode == PolyModePrecise)) { 2861 case -1: 2862 goto cleanup_dst; 2863 case 0: 2864 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2865 goto cleanup_dst; 2866 /* fall through to fixup */ 2867 case 1: 2868 gen7_composite_channel_convert(&tmp->base.src); 2869 break; 2870 } 2871 tmp->base.mask.bo = NULL; 2872 2873 tmp->base.is_affine = tmp->base.src.is_affine; 2874 tmp->base.need_magic_ca_pass = false; 2875 2876 tmp->base.u.gen7.flags = 2877 GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2878 tmp->base.src.repeat, 2879 SAMPLER_FILTER_NEAREST, 2880 SAMPLER_EXTEND_PAD), 2881 gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), 2882 GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2883 gen4_choose_spans_emitter(sna, tmp)); 2884 2885 tmp->box = gen7_render_composite_spans_box; 2886 tmp->boxes = gen7_render_composite_spans_boxes; 2887 if (tmp->emit_boxes) 2888 tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; 2889 tmp->done = gen7_render_composite_spans_done; 2890 2891 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2892 if (!kgem_check_bo(&sna->kgem, 2893 tmp->base.dst.bo, tmp->base.src.bo, 2894 NULL)) { 2895 kgem_submit(&sna->kgem); 2896 if (!kgem_check_bo(&sna->kgem, 2897 tmp->base.dst.bo, tmp->base.src.bo, 2898 NULL)) 2899 goto cleanup_src; 2900 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2901 } 2902 2903 gen7_align_vertex(sna, &tmp->base); 2904 gen7_emit_composite_state(sna, &tmp->base); 2905 return true; 2906 2907cleanup_src: 2908 if (tmp->base.src.bo) 2909 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2910cleanup_dst: 2911 if (tmp->base.redirect.real_bo) 2912 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2913 return false; 2914} 2915#endif 2916 2917static void 2918gen7_emit_copy_state(struct sna *sna, 2919 const struct sna_composite_op *op) 2920{ 2921 uint32_t *binding_table; 2922 uint16_t offset, dirty; 2923 2924 gen7_get_batch(sna, op); 2925 2926 binding_table = gen7_composite_get_binding_table(sna, &offset); 2927 2928 dirty = kgem_bo_is_dirty(op->dst.bo); 2929 2930 binding_table[0] = 2931 gen7_bind_bo(sna, 2932 op->dst.bo, op->dst.width, op->dst.height, 2933 gen7_get_dest_format(op->dst.format), 2934 true); 2935 binding_table[1] = 2936 gen7_bind_bo(sna, 2937 op->src.bo, op->src.width, op->src.height, 2938 op->src.card_format, 2939 false); 2940 2941 if (sna->kgem.surface == offset && 2942 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 2943 sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 2944 offset = sna->render_state.gen7.surface_table; 2945 } 2946 2947 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 2948 dirty = 0; 2949 2950 assert(!GEN7_READS_DST(op->u.gen7.flags)); 2951 gen7_emit_state(sna, op, offset | dirty); 2952} 2953 2954static inline bool 2955prefer_blt_copy(struct sna *sna, 2956 struct kgem_bo *src_bo, 2957 struct kgem_bo *dst_bo, 2958 unsigned flags) 2959{ 2960 if (sna->kgem.mode == KGEM_BLT) 2961 return true; 2962 2963 assert((flags & COPY_SYNC) == 0); 2964 2965 if (untiled_tlb_miss(src_bo) || 2966 untiled_tlb_miss(dst_bo)) 2967 return true; 2968 2969 if (flags & COPY_DRI && !sna->kgem.has_semaphores) 2970 return false; 2971 2972 if (force_blt_ring(sna, dst_bo)) 2973 return true; 2974 2975 if ((flags & COPY_SMALL || 2976 (sna->render_state.gt < 3 && src_bo == dst_bo)) && 2977 can_switch_to_blt(sna, dst_bo, flags)) 2978 return true; 2979 2980 if (kgem_bo_is_render(dst_bo) || 2981 kgem_bo_is_render(src_bo)) 2982 return false; 2983 2984 if (flags & COPY_LAST && 2985 sna->render_state.gt < 3 && 2986 can_switch_to_blt(sna, dst_bo, flags)) 2987 return true; 2988 2989 if (prefer_render_ring(sna, dst_bo)) 2990 return false; 2991 2992 if (!prefer_blt_ring(sna, dst_bo, flags)) 2993 return false; 2994 2995 return prefer_blt_bo(sna, src_bo, dst_bo); 2996} 2997 2998static bool 2999gen7_render_copy_boxes(struct sna *sna, uint8_t alu, 3000 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3001 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3002 const BoxRec *box, int n, unsigned flags) 3003{ 3004 struct sna_composite_op tmp; 3005 BoxRec extents; 3006 3007 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 3008 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 3009 src_bo == dst_bo, 3010 overlaps(sna, 3011 src_bo, src_dx, src_dy, 3012 dst_bo, dst_dx, dst_dy, 3013 box, n, flags, &extents))); 3014 3015 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 3016 sna_blt_compare_depth(src, dst) && 3017 sna_blt_copy_boxes(sna, alu, 3018 src_bo, src_dx, src_dy, 3019 dst_bo, dst_dx, dst_dy, 3020 dst->bitsPerPixel, 3021 box, n)) 3022 return true; 3023 3024 if (!(alu == GXcopy || alu == GXclear)) { 3025fallback_blt: 3026 DBG(("%s: fallback blt\n", __FUNCTION__)); 3027 if (!sna_blt_compare_depth(src, dst)) 3028 return false; 3029 3030 return sna_blt_copy_boxes_fallback(sna, alu, 3031 src, src_bo, src_dx, src_dy, 3032 dst, dst_bo, dst_dx, dst_dy, 3033 box, n); 3034 } 3035 3036 if (overlaps(sna, 3037 src_bo, src_dx, src_dy, 3038 dst_bo, dst_dx, dst_dy, 3039 box, n, flags, 3040 &extents)) { 3041 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 3042 3043 if ((big || !prefer_render_ring(sna, dst_bo)) && 3044 sna_blt_copy_boxes(sna, alu, 3045 src_bo, src_dx, src_dy, 3046 dst_bo, dst_dx, dst_dy, 3047 dst->bitsPerPixel, 3048 box, n)) 3049 return true; 3050 3051 if (big) 3052 goto fallback_blt; 3053 3054 assert(src_bo == dst_bo); 3055 assert(src->depth == dst->depth); 3056 assert(src->width == dst->width); 3057 assert(src->height == dst->height); 3058 return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, 3059 src_dx, src_dy, 3060 dst_dx, dst_dy, 3061 box, n, &extents); 3062 } 3063 3064 if (dst->depth == src->depth) { 3065 tmp.dst.format = sna_render_format_for_depth(dst->depth); 3066 tmp.src.pict_format = tmp.dst.format; 3067 } else { 3068 tmp.dst.format = sna_format_for_depth(dst->depth); 3069 tmp.src.pict_format = sna_format_for_depth(src->depth); 3070 } 3071 if (!gen7_check_format(tmp.src.pict_format)) 3072 goto fallback_blt; 3073 3074 tmp.dst.pixmap = (PixmapPtr)dst; 3075 tmp.dst.width = dst->width; 3076 tmp.dst.height = dst->height; 3077 tmp.dst.bo = dst_bo; 3078 tmp.dst.x = tmp.dst.y = 0; 3079 tmp.damage = NULL; 3080 3081 sna_render_composite_redirect_init(&tmp); 3082 if (too_large(tmp.dst.width, tmp.dst.height)) { 3083 int i; 3084 3085 extents = box[0]; 3086 for (i = 1; i < n; i++) { 3087 if (box[i].x1 < extents.x1) 3088 extents.x1 = box[i].x1; 3089 if (box[i].y1 < extents.y1) 3090 extents.y1 = box[i].y1; 3091 3092 if (box[i].x2 > extents.x2) 3093 extents.x2 = box[i].x2; 3094 if (box[i].y2 > extents.y2) 3095 extents.y2 = box[i].y2; 3096 } 3097 3098 if (!sna_render_composite_redirect(sna, &tmp, 3099 extents.x1 + dst_dx, 3100 extents.y1 + dst_dy, 3101 extents.x2 - extents.x1, 3102 extents.y2 - extents.y1, 3103 n > 1)) 3104 goto fallback_tiled; 3105 } 3106 3107 tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format); 3108 if (too_large(src->width, src->height)) { 3109 int i; 3110 3111 extents = box[0]; 3112 for (i = 1; i < n; i++) { 3113 if (box[i].x1 < extents.x1) 3114 extents.x1 = box[i].x1; 3115 if (box[i].y1 < extents.y1) 3116 extents.y1 = box[i].y1; 3117 3118 if (box[i].x2 > extents.x2) 3119 extents.x2 = box[i].x2; 3120 if (box[i].y2 > extents.y2) 3121 extents.y2 = box[i].y2; 3122 } 3123 3124 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3125 extents.x1 + src_dx, 3126 extents.y1 + src_dy, 3127 extents.x2 - extents.x1, 3128 extents.y2 - extents.y1)) 3129 goto fallback_tiled_dst; 3130 } else { 3131 tmp.src.bo = src_bo; 3132 tmp.src.width = src->width; 3133 tmp.src.height = src->height; 3134 tmp.src.offset[0] = tmp.src.offset[1] = 0; 3135 } 3136 3137 tmp.mask.bo = NULL; 3138 3139 tmp.floats_per_vertex = 2; 3140 tmp.floats_per_rect = 6; 3141 tmp.need_magic_ca_pass = 0; 3142 3143 tmp.u.gen7.flags = COPY_FLAGS(alu); 3144 3145 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3146 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3147 kgem_submit(&sna->kgem); 3148 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3149 if (tmp.src.bo != src_bo) 3150 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3151 if (tmp.redirect.real_bo) 3152 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3153 goto fallback_blt; 3154 } 3155 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3156 } 3157 3158 src_dx += tmp.src.offset[0]; 3159 src_dy += tmp.src.offset[1]; 3160 3161 dst_dx += tmp.dst.x; 3162 dst_dy += tmp.dst.y; 3163 3164 tmp.dst.x = tmp.dst.y = 0; 3165 3166 gen7_align_vertex(sna, &tmp); 3167 gen7_emit_copy_state(sna, &tmp); 3168 3169 do { 3170 int16_t *v; 3171 int n_this_time; 3172 3173 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3174 gen7_emit_copy_state); 3175 n -= n_this_time; 3176 3177 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3178 sna->render.vertex_used += 6 * n_this_time; 3179 assert(sna->render.vertex_used <= sna->render.vertex_size); 3180 do { 3181 3182 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3183 box->x1 + src_dx, box->y1 + src_dy, 3184 box->x1 + dst_dx, box->y1 + dst_dy, 3185 box->x2 - box->x1, box->y2 - box->y1)); 3186 v[0] = box->x2 + dst_dx; 3187 v[2] = box->x2 + src_dx; 3188 v[1] = v[5] = box->y2 + dst_dy; 3189 v[3] = v[7] = box->y2 + src_dy; 3190 v[8] = v[4] = box->x1 + dst_dx; 3191 v[10] = v[6] = box->x1 + src_dx; 3192 v[9] = box->y1 + dst_dy; 3193 v[11] = box->y1 + src_dy; 3194 v += 12; box++; 3195 } while (--n_this_time); 3196 } while (n); 3197 3198 gen4_vertex_flush(sna); 3199 sna_render_composite_redirect_done(sna, &tmp); 3200 if (tmp.src.bo != src_bo) 3201 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3202 return true; 3203 3204fallback_tiled_dst: 3205 if (tmp.redirect.real_bo) 3206 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3207fallback_tiled: 3208 DBG(("%s: fallback tiled\n", __FUNCTION__)); 3209 if (sna_blt_compare_depth(src, dst) && 3210 sna_blt_copy_boxes(sna, alu, 3211 src_bo, src_dx, src_dy, 3212 dst_bo, dst_dx, dst_dy, 3213 dst->bitsPerPixel, 3214 box, n)) 3215 return true; 3216 3217 return sna_tiling_copy_boxes(sna, alu, 3218 src, src_bo, src_dx, src_dy, 3219 dst, dst_bo, dst_dx, dst_dy, 3220 box, n); 3221} 3222 3223static void 3224gen7_render_copy_blt(struct sna *sna, 3225 const struct sna_copy_op *op, 3226 int16_t sx, int16_t sy, 3227 int16_t w, int16_t h, 3228 int16_t dx, int16_t dy) 3229{ 3230 int16_t *v; 3231 3232 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state); 3233 3234 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3235 sna->render.vertex_used += 6; 3236 assert(sna->render.vertex_used <= sna->render.vertex_size); 3237 3238 v[0] = dx+w; v[1] = dy+h; 3239 v[2] = sx+w; v[3] = sy+h; 3240 v[4] = dx; v[5] = dy+h; 3241 v[6] = sx; v[7] = sy+h; 3242 v[8] = dx; v[9] = dy; 3243 v[10] = sx; v[11] = sy; 3244} 3245 3246static void 3247gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3248{ 3249 if (sna->render.vertex_offset) 3250 gen4_vertex_flush(sna); 3251} 3252 3253static bool 3254gen7_render_copy(struct sna *sna, uint8_t alu, 3255 PixmapPtr src, struct kgem_bo *src_bo, 3256 PixmapPtr dst, struct kgem_bo *dst_bo, 3257 struct sna_copy_op *op) 3258{ 3259 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3260 __FUNCTION__, alu, 3261 src->drawable.width, src->drawable.height, 3262 dst->drawable.width, dst->drawable.height)); 3263 3264 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3265 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3266 sna_blt_copy(sna, alu, 3267 src_bo, dst_bo, 3268 dst->drawable.bitsPerPixel, 3269 op)) 3270 return true; 3271 3272 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3273 too_large(src->drawable.width, src->drawable.height) || 3274 too_large(dst->drawable.width, dst->drawable.height)) { 3275fallback: 3276 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3277 return false; 3278 3279 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3280 dst->drawable.bitsPerPixel, 3281 op); 3282 } 3283 3284 if (dst->drawable.depth == src->drawable.depth) { 3285 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3286 op->base.src.pict_format = op->base.dst.format; 3287 } else { 3288 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3289 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3290 } 3291 if (!gen7_check_format(op->base.src.pict_format)) 3292 goto fallback; 3293 3294 op->base.dst.pixmap = dst; 3295 op->base.dst.width = dst->drawable.width; 3296 op->base.dst.height = dst->drawable.height; 3297 op->base.dst.bo = dst_bo; 3298 3299 op->base.src.bo = src_bo; 3300 op->base.src.card_format = 3301 gen7_get_card_format(op->base.src.pict_format); 3302 op->base.src.width = src->drawable.width; 3303 op->base.src.height = src->drawable.height; 3304 3305 op->base.mask.bo = NULL; 3306 3307 op->base.floats_per_vertex = 2; 3308 op->base.floats_per_rect = 6; 3309 3310 op->base.u.gen7.flags = COPY_FLAGS(alu); 3311 3312 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3313 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3314 kgem_submit(&sna->kgem); 3315 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3316 goto fallback; 3317 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3318 } 3319 3320 gen7_align_vertex(sna, &op->base); 3321 gen7_emit_copy_state(sna, &op->base); 3322 3323 op->blt = gen7_render_copy_blt; 3324 op->done = gen7_render_copy_done; 3325 return true; 3326} 3327 3328static void 3329gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3330{ 3331 uint16_t dirty; 3332 uint32_t *binding_table; 3333 uint16_t offset; 3334 3335 /* XXX Render Target Fast Clear 3336 * Set RTFC Enable in PS and render a rectangle. 3337 * Limited to a clearing the full MSC surface only with a 3338 * specific kernel. 3339 */ 3340 3341 gen7_get_batch(sna, op); 3342 3343 binding_table = gen7_composite_get_binding_table(sna, &offset); 3344 3345 dirty = kgem_bo_is_dirty(op->dst.bo); 3346 3347 binding_table[0] = 3348 gen7_bind_bo(sna, 3349 op->dst.bo, op->dst.width, op->dst.height, 3350 gen7_get_dest_format(op->dst.format), 3351 true); 3352 binding_table[1] = 3353 gen7_bind_bo(sna, 3354 op->src.bo, 1, 1, 3355 GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 3356 false); 3357 3358 if (sna->kgem.surface == offset && 3359 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 3360 sna->kgem.surface += 3361 sizeof(struct gen7_surface_state)/sizeof(uint32_t); 3362 offset = sna->render_state.gen7.surface_table; 3363 } 3364 3365 if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 3366 dirty = 0; 3367 3368 gen7_emit_state(sna, op, offset | dirty); 3369} 3370 3371static bool 3372gen7_render_fill_boxes(struct sna *sna, 3373 CARD8 op, 3374 PictFormat format, 3375 const xRenderColor *color, 3376 const DrawableRec *dst, struct kgem_bo *dst_bo, 3377 const BoxRec *box, int n) 3378{ 3379 struct sna_composite_op tmp; 3380 uint32_t pixel; 3381 3382 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3383 __FUNCTION__, op, 3384 color->red, color->green, color->blue, color->alpha, (int)format)); 3385 3386 if (op >= ARRAY_SIZE(gen7_blend_op)) { 3387 DBG(("%s: fallback due to unhandled blend op: %d\n", 3388 __FUNCTION__, op)); 3389 return false; 3390 } 3391 3392 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3393 !gen7_check_dst_format(format)) { 3394 uint8_t alu = GXinvalid; 3395 3396 if (op <= PictOpSrc) { 3397 pixel = 0; 3398 if (op == PictOpClear) 3399 alu = GXclear; 3400 else if (sna_get_pixel_from_rgba(&pixel, 3401 color->red, 3402 color->green, 3403 color->blue, 3404 color->alpha, 3405 format)) 3406 alu = GXcopy; 3407 } 3408 3409 if (alu != GXinvalid && 3410 sna_blt_fill_boxes(sna, alu, 3411 dst_bo, dst->bitsPerPixel, 3412 pixel, box, n)) 3413 return true; 3414 3415 if (!gen7_check_dst_format(format)) 3416 return false; 3417 } 3418 3419 if (op == PictOpClear) { 3420 pixel = 0; 3421 op = PictOpSrc; 3422 } else if (!sna_get_pixel_from_rgba(&pixel, 3423 color->red, 3424 color->green, 3425 color->blue, 3426 color->alpha, 3427 PICT_a8r8g8b8)) 3428 return false; 3429 3430 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3431 __FUNCTION__, pixel, n, 3432 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3433 3434 tmp.dst.pixmap = (PixmapPtr)dst; 3435 tmp.dst.width = dst->width; 3436 tmp.dst.height = dst->height; 3437 tmp.dst.format = format; 3438 tmp.dst.bo = dst_bo; 3439 tmp.dst.x = tmp.dst.y = 0; 3440 tmp.damage = NULL; 3441 3442 sna_render_composite_redirect_init(&tmp); 3443 if (too_large(dst->width, dst->height)) { 3444 BoxRec extents; 3445 3446 boxes_extents(box, n, &extents); 3447 if (!sna_render_composite_redirect(sna, &tmp, 3448 extents.x1, extents.y1, 3449 extents.x2 - extents.x1, 3450 extents.y2 - extents.y1, 3451 n > 1)) 3452 return sna_tiling_fill_boxes(sna, op, format, color, 3453 dst, dst_bo, box, n); 3454 } 3455 3456 tmp.src.bo = sna_render_get_solid(sna, pixel); 3457 tmp.mask.bo = NULL; 3458 3459 tmp.floats_per_vertex = 2; 3460 tmp.floats_per_rect = 6; 3461 tmp.need_magic_ca_pass = false; 3462 3463 tmp.u.gen7.flags = FILL_FLAGS(op, format); 3464 3465 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3466 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3467 kgem_submit(&sna->kgem); 3468 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3469 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3470 if (tmp.redirect.real_bo) 3471 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3472 3473 return false; 3474 } 3475 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3476 } 3477 3478 gen7_align_vertex(sna, &tmp); 3479 gen7_emit_fill_state(sna, &tmp); 3480 3481 do { 3482 int n_this_time; 3483 int16_t *v; 3484 3485 n_this_time = gen7_get_rectangles(sna, &tmp, n, 3486 gen7_emit_fill_state); 3487 n -= n_this_time; 3488 3489 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3490 sna->render.vertex_used += 6 * n_this_time; 3491 assert(sna->render.vertex_used <= sna->render.vertex_size); 3492 do { 3493 DBG((" (%d, %d), (%d, %d)\n", 3494 box->x1, box->y1, box->x2, box->y2)); 3495 3496 v[0] = box->x2; 3497 v[5] = v[1] = box->y2; 3498 v[8] = v[4] = box->x1; 3499 v[9] = box->y1; 3500 v[2] = v[3] = v[7] = 1; 3501 v[6] = v[10] = v[11] = 0; 3502 v += 12; box++; 3503 } while (--n_this_time); 3504 } while (n); 3505 3506 gen4_vertex_flush(sna); 3507 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3508 sna_render_composite_redirect_done(sna, &tmp); 3509 return true; 3510} 3511 3512static void 3513gen7_render_fill_op_blt(struct sna *sna, 3514 const struct sna_fill_op *op, 3515 int16_t x, int16_t y, int16_t w, int16_t h) 3516{ 3517 int16_t *v; 3518 3519 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3520 3521 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3522 3523 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3524 sna->render.vertex_used += 6; 3525 assert(sna->render.vertex_used <= sna->render.vertex_size); 3526 3527 v[0] = x+w; 3528 v[4] = v[8] = x; 3529 v[1] = v[5] = y+h; 3530 v[9] = y; 3531 3532 v[2] = v[3] = v[7] = 1; 3533 v[6] = v[10] = v[11] = 0; 3534} 3535 3536fastcall static void 3537gen7_render_fill_op_box(struct sna *sna, 3538 const struct sna_fill_op *op, 3539 const BoxRec *box) 3540{ 3541 int16_t *v; 3542 3543 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3544 box->x1, box->y1, box->x2, box->y2)); 3545 3546 gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3547 3548 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3549 sna->render.vertex_used += 6; 3550 assert(sna->render.vertex_used <= sna->render.vertex_size); 3551 3552 v[0] = box->x2; 3553 v[8] = v[4] = box->x1; 3554 v[5] = v[1] = box->y2; 3555 v[9] = box->y1; 3556 3557 v[7] = v[2] = v[3] = 1; 3558 v[6] = v[10] = v[11] = 0; 3559} 3560 3561fastcall static void 3562gen7_render_fill_op_boxes(struct sna *sna, 3563 const struct sna_fill_op *op, 3564 const BoxRec *box, 3565 int nbox) 3566{ 3567 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3568 box->x1, box->y1, box->x2, box->y2, nbox)); 3569 3570 do { 3571 int nbox_this_time; 3572 int16_t *v; 3573 3574 nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 3575 gen7_emit_fill_state); 3576 nbox -= nbox_this_time; 3577 3578 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3579 sna->render.vertex_used += 6 * nbox_this_time; 3580 assert(sna->render.vertex_used <= sna->render.vertex_size); 3581 3582 do { 3583 v[0] = box->x2; 3584 v[8] = v[4] = box->x1; 3585 v[5] = v[1] = box->y2; 3586 v[9] = box->y1; 3587 v[7] = v[2] = v[3] = 1; 3588 v[6] = v[10] = v[11] = 0; 3589 box++; v += 12; 3590 } while (--nbox_this_time); 3591 } while (nbox); 3592} 3593 3594static void 3595gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3596{ 3597 if (sna->render.vertex_offset) 3598 gen4_vertex_flush(sna); 3599 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3600} 3601 3602static bool 3603gen7_render_fill(struct sna *sna, uint8_t alu, 3604 PixmapPtr dst, struct kgem_bo *dst_bo, 3605 uint32_t color, unsigned flags, 3606 struct sna_fill_op *op) 3607{ 3608 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3609 3610 if (prefer_blt_fill(sna, dst_bo, flags) && 3611 sna_blt_fill(sna, alu, 3612 dst_bo, dst->drawable.bitsPerPixel, 3613 color, 3614 op)) 3615 return true; 3616 3617 if (!(alu == GXcopy || alu == GXclear) || 3618 too_large(dst->drawable.width, dst->drawable.height)) 3619 return sna_blt_fill(sna, alu, 3620 dst_bo, dst->drawable.bitsPerPixel, 3621 color, 3622 op); 3623 3624 if (alu == GXclear) 3625 color = 0; 3626 3627 op->base.dst.pixmap = dst; 3628 op->base.dst.width = dst->drawable.width; 3629 op->base.dst.height = dst->drawable.height; 3630 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3631 op->base.dst.bo = dst_bo; 3632 op->base.dst.x = op->base.dst.y = 0; 3633 3634 op->base.src.bo = 3635 sna_render_get_solid(sna, 3636 sna_rgba_for_color(color, 3637 dst->drawable.depth)); 3638 op->base.mask.bo = NULL; 3639 3640 op->base.need_magic_ca_pass = false; 3641 op->base.floats_per_vertex = 2; 3642 op->base.floats_per_rect = 6; 3643 3644 op->base.u.gen7.flags = FILL_FLAGS_NOBLEND; 3645 3646 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3647 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3648 kgem_submit(&sna->kgem); 3649 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3650 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3651 return false; 3652 } 3653 3654 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3655 } 3656 3657 gen7_align_vertex(sna, &op->base); 3658 gen7_emit_fill_state(sna, &op->base); 3659 3660 op->blt = gen7_render_fill_op_blt; 3661 op->box = gen7_render_fill_op_box; 3662 op->boxes = gen7_render_fill_op_boxes; 3663 op->points = NULL; 3664 op->done = gen7_render_fill_op_done; 3665 return true; 3666} 3667 3668static bool 3669gen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3670 uint32_t color, 3671 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3672 uint8_t alu) 3673{ 3674 BoxRec box; 3675 3676 box.x1 = x1; 3677 box.y1 = y1; 3678 box.x2 = x2; 3679 box.y2 = y2; 3680 3681 return sna_blt_fill_boxes(sna, alu, 3682 bo, dst->drawable.bitsPerPixel, 3683 color, &box, 1); 3684} 3685 3686static bool 3687gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3688 uint32_t color, 3689 int16_t x1, int16_t y1, 3690 int16_t x2, int16_t y2, 3691 uint8_t alu) 3692{ 3693 struct sna_composite_op tmp; 3694 int16_t *v; 3695 3696 /* Prefer to use the BLT if already engaged */ 3697 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3698 gen7_render_fill_one_try_blt(sna, dst, bo, color, 3699 x1, y1, x2, y2, alu)) 3700 return true; 3701 3702 /* Must use the BLT if we can't RENDER... */ 3703 if (!(alu == GXcopy || alu == GXclear) || 3704 too_large(dst->drawable.width, dst->drawable.height)) 3705 return gen7_render_fill_one_try_blt(sna, dst, bo, color, 3706 x1, y1, x2, y2, alu); 3707 3708 if (alu == GXclear) 3709 color = 0; 3710 3711 tmp.dst.pixmap = dst; 3712 tmp.dst.width = dst->drawable.width; 3713 tmp.dst.height = dst->drawable.height; 3714 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3715 tmp.dst.bo = bo; 3716 tmp.dst.x = tmp.dst.y = 0; 3717 3718 tmp.src.bo = 3719 sna_render_get_solid(sna, 3720 sna_rgba_for_color(color, 3721 dst->drawable.depth)); 3722 tmp.mask.bo = NULL; 3723 3724 tmp.floats_per_vertex = 2; 3725 tmp.floats_per_rect = 6; 3726 tmp.need_magic_ca_pass = false; 3727 3728 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3729 3730 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3731 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3732 kgem_submit(&sna->kgem); 3733 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3734 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3735 return false; 3736 } 3737 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3738 } 3739 3740 gen7_align_vertex(sna, &tmp); 3741 gen7_emit_fill_state(sna, &tmp); 3742 3743 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3744 3745 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3746 3747 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3748 sna->render.vertex_used += 6; 3749 assert(sna->render.vertex_used <= sna->render.vertex_size); 3750 3751 v[0] = x2; 3752 v[8] = v[4] = x1; 3753 v[5] = v[1] = y2; 3754 v[9] = y1; 3755 v[7] = v[2] = v[3] = 1; 3756 v[6] = v[10] = v[11] = 0; 3757 3758 gen4_vertex_flush(sna); 3759 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3760 3761 return true; 3762} 3763 3764static bool 3765gen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3766{ 3767 BoxRec box; 3768 3769 box.x1 = 0; 3770 box.y1 = 0; 3771 box.x2 = dst->drawable.width; 3772 box.y2 = dst->drawable.height; 3773 3774 return sna_blt_fill_boxes(sna, GXclear, 3775 bo, dst->drawable.bitsPerPixel, 3776 0, &box, 1); 3777} 3778 3779static bool 3780gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3781{ 3782 struct sna_composite_op tmp; 3783 int16_t *v; 3784 3785 DBG(("%s: %dx%d\n", 3786 __FUNCTION__, 3787 dst->drawable.width, 3788 dst->drawable.height)); 3789 3790 /* Prefer to use the BLT if already engaged */ 3791 if (sna->kgem.mode == KGEM_BLT && 3792 gen7_render_clear_try_blt(sna, dst, bo)) 3793 return true; 3794 3795 /* Must use the BLT if we can't RENDER... */ 3796 if (too_large(dst->drawable.width, dst->drawable.height)) 3797 return gen7_render_clear_try_blt(sna, dst, bo); 3798 3799 tmp.dst.pixmap = dst; 3800 tmp.dst.width = dst->drawable.width; 3801 tmp.dst.height = dst->drawable.height; 3802 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3803 tmp.dst.bo = bo; 3804 tmp.dst.x = tmp.dst.y = 0; 3805 3806 tmp.src.bo = sna_render_get_solid(sna, 0); 3807 tmp.mask.bo = NULL; 3808 3809 tmp.floats_per_vertex = 2; 3810 tmp.floats_per_rect = 6; 3811 tmp.need_magic_ca_pass = false; 3812 3813 tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3814 3815 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3816 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3817 kgem_submit(&sna->kgem); 3818 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3819 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3820 return false; 3821 } 3822 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3823 } 3824 3825 gen7_align_vertex(sna, &tmp); 3826 gen7_emit_fill_state(sna, &tmp); 3827 3828 gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3829 3830 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3831 sna->render.vertex_used += 6; 3832 assert(sna->render.vertex_used <= sna->render.vertex_size); 3833 3834 v[0] = dst->drawable.width; 3835 v[5] = v[1] = dst->drawable.height; 3836 v[8] = v[4] = 0; 3837 v[9] = 0; 3838 3839 v[7] = v[2] = v[3] = 1; 3840 v[6] = v[10] = v[11] = 0; 3841 3842 gen4_vertex_flush(sna); 3843 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3844 3845 return true; 3846} 3847static void gen7_render_reset(struct sna *sna) 3848{ 3849 sna->render_state.gen7.pipe_controls_since_stall = 0; 3850 sna->render_state.gen7.emit_flush = false; 3851 sna->render_state.gen7.needs_invariant = true; 3852 sna->render_state.gen7.ve_id = 3 << 2; 3853 sna->render_state.gen7.last_primitive = -1; 3854 3855 sna->render_state.gen7.num_sf_outputs = 0; 3856 sna->render_state.gen7.samplers = -1; 3857 sna->render_state.gen7.blend = -1; 3858 sna->render_state.gen7.kernel = -1; 3859 sna->render_state.gen7.drawrect_offset = -1; 3860 sna->render_state.gen7.drawrect_limit = -1; 3861 sna->render_state.gen7.surface_table = 0; 3862 3863 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3864 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3865 discard_vbo(sna); 3866 } 3867 3868 sna->render.vertex_offset = 0; 3869 sna->render.nvertex_reloc = 0; 3870 sna->render.vb_id = 0; 3871} 3872 3873static void gen7_render_fini(struct sna *sna) 3874{ 3875 kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); 3876} 3877 3878static bool is_gt3(struct sna *sna, int devid) 3879{ 3880 assert(sna->kgem.gen == 075); 3881 return devid & 0x20; 3882} 3883 3884static bool is_gt2(struct sna *sna, int devid) 3885{ 3886 return devid & (is_hsw(sna)? 0x30 : 0x20); 3887} 3888 3889static bool is_mobile(struct sna *sna, int devid) 3890{ 3891 return (devid & 0xf) == 0x6; 3892} 3893 3894static bool gen7_render_setup(struct sna *sna, int devid) 3895{ 3896 struct gen7_render_state *state = &sna->render_state.gen7; 3897 struct sna_static_stream general; 3898 struct gen7_sampler_state *ss; 3899 int i, j, k, l, m; 3900 3901 if (is_ivb(sna)) { 3902 state->info = &ivb_gt_info; 3903 if (devid & 0xf) { 3904 state->info = &ivb_gt1_info; 3905 if (is_gt2(sna, devid)) 3906 state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ 3907 } 3908 } else if (is_byt(sna)) { 3909 state->info = &byt_gt_info; 3910 } else if (is_hsw(sna)) { 3911 state->info = &hsw_gt_info; 3912 if (devid & 0xf) { 3913 if (is_gt3(sna, devid)) 3914 state->info = &hsw_gt3_info; 3915 else if (is_gt2(sna, devid)) 3916 state->info = &hsw_gt2_info; 3917 else 3918 state->info = &hsw_gt1_info; 3919 } 3920 } else 3921 return false; 3922 3923 state->gt = state->info->gt; 3924 3925 sna_static_stream_init(&general); 3926 3927 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3928 * dumps, you know it points to zero. 3929 */ 3930 null_create(&general); 3931 3932 for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) { 3933 if (wm_kernels[m].size) { 3934 state->wm_kernel[m][1] = 3935 sna_static_stream_add(&general, 3936 wm_kernels[m].data, 3937 wm_kernels[m].size, 3938 64); 3939 } else { 3940 if (USE_8_PIXEL_DISPATCH) { 3941 state->wm_kernel[m][0] = 3942 sna_static_stream_compile_wm(sna, &general, 3943 wm_kernels[m].data, 8); 3944 } 3945 3946 if (USE_16_PIXEL_DISPATCH) { 3947 state->wm_kernel[m][1] = 3948 sna_static_stream_compile_wm(sna, &general, 3949 wm_kernels[m].data, 16); 3950 } 3951 3952 if (USE_32_PIXEL_DISPATCH) { 3953 state->wm_kernel[m][2] = 3954 sna_static_stream_compile_wm(sna, &general, 3955 wm_kernels[m].data, 32); 3956 } 3957 } 3958 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3959 } 3960 3961 ss = sna_static_stream_map(&general, 3962 2 * sizeof(*ss) * 3963 (2 + 3964 FILTER_COUNT * EXTEND_COUNT * 3965 FILTER_COUNT * EXTEND_COUNT), 3966 32); 3967 state->wm_state = sna_static_stream_offsetof(&general, ss); 3968 sampler_copy_init(ss); ss += 2; 3969 sampler_fill_init(ss); ss += 2; 3970 for (i = 0; i < FILTER_COUNT; i++) { 3971 for (j = 0; j < EXTEND_COUNT; j++) { 3972 for (k = 0; k < FILTER_COUNT; k++) { 3973 for (l = 0; l < EXTEND_COUNT; l++) { 3974 sampler_state_init(ss++, i, j); 3975 sampler_state_init(ss++, k, l); 3976 } 3977 } 3978 } 3979 } 3980 3981 state->cc_blend = gen7_composite_create_blend_state(&general); 3982 3983 state->general_bo = sna_static_stream_fini(sna, &general); 3984 return state->general_bo != NULL; 3985} 3986 3987const char *gen7_render_init(struct sna *sna, const char *backend) 3988{ 3989 int devid = intel_get_device_id(sna->dev); 3990 3991 if (!gen7_render_setup(sna, devid)) 3992 return backend; 3993 3994 sna->kgem.context_switch = gen6_render_context_switch; 3995 sna->kgem.retire = gen6_render_retire; 3996 sna->kgem.expire = gen4_render_expire; 3997 3998#if !NO_COMPOSITE 3999 sna->render.composite = gen7_render_composite; 4000 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 4001#endif 4002#if !NO_COMPOSITE_SPANS 4003 sna->render.check_composite_spans = gen7_check_composite_spans; 4004 sna->render.composite_spans = gen7_render_composite_spans; 4005 if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna)) 4006 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 4007#endif 4008 sna->render.video = gen7_render_video; 4009 4010#if !NO_COPY_BOXES 4011 sna->render.copy_boxes = gen7_render_copy_boxes; 4012#endif 4013#if !NO_COPY 4014 sna->render.copy = gen7_render_copy; 4015#endif 4016 4017#if !NO_FILL_BOXES 4018 sna->render.fill_boxes = gen7_render_fill_boxes; 4019#endif 4020#if !NO_FILL 4021 sna->render.fill = gen7_render_fill; 4022#endif 4023#if !NO_FILL_ONE 4024 sna->render.fill_one = gen7_render_fill_one; 4025#endif 4026#if !NO_FILL_CLEAR 4027 sna->render.clear = gen7_render_clear; 4028#endif 4029 4030 sna->render.flush = gen4_render_flush; 4031 sna->render.reset = gen7_render_reset; 4032 sna->render.fini = gen7_render_fini; 4033 4034 sna->render.max_3d_size = GEN7_MAX_SIZE; 4035 sna->render.max_3d_pitch = 1 << 18; 4036 return sna->render_state.gen7.info->name; 4037} 4038