gen9_render.c revision fe8aea9e
1/* 2 * Copyright © 2012,2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34#include "sna_render.h" 35#include "sna_render_inline.h" 36#include "sna_video.h" 37 38#include "gen9_render.h" 39#include "gen8_eu.h" 40#include "gen4_common.h" 41#include "gen4_source.h" 42#include "gen4_vertex.h" 43#include "gen6_common.h" 44#include "gen8_vertex.h" 45 46#define SIM 1 47 48#define ALWAYS_INVALIDATE 0 49#define ALWAYS_FLUSH 0 50#define ALWAYS_STALL 0 51 52#define NO_COMPOSITE 0 53#define NO_COMPOSITE_SPANS 0 54#define NO_COPY 0 55#define NO_COPY_BOXES 0 56#define NO_FILL 0 57#define NO_FILL_BOXES 0 58#define NO_FILL_ONE 0 59#define NO_FILL_CLEAR 0 60#define NO_VIDEO 0 61 62#define USE_8_PIXEL_DISPATCH 1 63#define USE_16_PIXEL_DISPATCH 1 64#define USE_32_PIXEL_DISPATCH 0 65 66#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67#error "Must select at least 8, 16 or 32 pixel dispatch" 68#endif 69 70#define GEN9_MAX_SIZE 16384 71#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */ 72 73/* XXX Todo 74 * 75 * STR (software tiled rendering) mode. No, really. 76 * 64x32 pixel blocks align with the rendering cache. Worth considering. 77 */ 78 79#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 80 81/* Pipeline stages: 82 * 1. Command Streamer (CS) 83 * 2. Vertex Fetch (VF) 84 * 3. Vertex Shader (VS) 85 * 4. Hull Shader (HS) 86 * 5. Tesselation Engine (TE) 87 * 6. Domain Shader (DS) 88 * 7. Geometry Shader (GS) 89 * 8. Stream Output Logic (SOL) 90 * 9. Clipper (CLIP) 91 * 10. Strip/Fan (SF) 92 * 11. Windower/Masker (WM) 93 * 12. Color Calculator (CC) 94 */ 95 96#if !NO_VIDEO 97static const uint32_t ps_kernel_packed_bt601[][4] = { 98#include "exa_wm_src_affine.g8b" 99#include "exa_wm_src_sample_argb.g8b" 100#include "exa_wm_yuv_rgb_bt601.g8b" 101#include "exa_wm_write.g8b" 102}; 103 104static const uint32_t ps_kernel_planar_bt601[][4] = { 105#include "exa_wm_src_affine.g8b" 106#include "exa_wm_src_sample_planar.g8b" 107#include "exa_wm_yuv_rgb_bt601.g8b" 108#include "exa_wm_write.g8b" 109}; 110 111static const uint32_t ps_kernel_nv12_bt601[][4] = { 112#include "exa_wm_src_affine.g8b" 113#include "exa_wm_src_sample_nv12.g8b" 114#include "exa_wm_yuv_rgb_bt601.g8b" 115#include "exa_wm_write.g8b" 116}; 117 118static const uint32_t ps_kernel_packed_bt709[][4] = { 119#include "exa_wm_src_affine.g8b" 120#include "exa_wm_src_sample_argb.g8b" 121#include "exa_wm_yuv_rgb_bt709.g8b" 122#include "exa_wm_write.g8b" 123}; 124 125static const uint32_t ps_kernel_planar_bt709[][4] = { 126#include "exa_wm_src_affine.g8b" 127#include "exa_wm_src_sample_planar.g8b" 128#include "exa_wm_yuv_rgb_bt709.g8b" 129#include "exa_wm_write.g8b" 130}; 131 132static const uint32_t ps_kernel_ayuv_bt601[][4] = { 133#include "exa_wm_src_affine.g8b" 134#include "exa_wm_src_sample_argb_ayuv.g8b" 135#include "exa_wm_yuv_rgb_bt601.g8b" 136#include "exa_wm_write.g8b" 137}; 138 139static const uint32_t ps_kernel_ayuv_bt709[][4] = { 140#include "exa_wm_src_affine.g8b" 141#include "exa_wm_src_sample_argb_ayuv.g8b" 142#include "exa_wm_yuv_rgb_bt709.g8b" 143#include "exa_wm_write.g8b" 144}; 145 146static const uint32_t ps_kernel_nv12_bt709[][4] = { 147#include "exa_wm_src_affine.g8b" 148#include "exa_wm_src_sample_nv12.g8b" 149#include "exa_wm_yuv_rgb_bt709.g8b" 150#include "exa_wm_write.g8b" 151}; 152 153static const uint32_t ps_kernel_rgb[][4] = { 154#include "exa_wm_src_affine.g8b" 155#include "exa_wm_src_sample_argb.g8b" 156#include "exa_wm_write.g8b" 157}; 158#endif 159 160#define SURFACE_DW (64 / sizeof(uint32_t)); 161 162#define KERNEL(kernel_enum, kernel, num_surfaces) \ 163 [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 164#define NOKERNEL(kernel_enum, func, num_surfaces) \ 165 [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 166static const struct wm_kernel_info { 167 const char *name; 168 const void *data; 169 unsigned int size; 170 int num_surfaces; 171} wm_kernels[] = { 172 NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 173 NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 174 175 NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 176 NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 177 178 NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 179 NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 180 181 NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 182 NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 183 184 NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 185 NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 186 187#if !NO_VIDEO 188 KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7), 189 KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7), 190 KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2), 191 KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), 192 KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), 193 KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), 194 KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2), 195 KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2), 196 KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), 197#endif 198}; 199#undef KERNEL 200 201static const struct blendinfo { 202 uint8_t src_alpha; 203 uint8_t src_blend; 204 uint8_t dst_blend; 205} gen9_blend_op[] = { 206 /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 207 /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 208 /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 209 /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 210 /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 211 /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 212 /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 213 /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 214 /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 215 /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 216 /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 217 /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 218 /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 219}; 220 221/** 222 * Highest-valued BLENDFACTOR used in gen9_blend_op. 223 * 224 * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR, 225 * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 226 * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 227 */ 228#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 229 230#define GEN9_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen9_blend_state), 64) 231 232#define BLEND_OFFSET(s, d) \ 233 ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4) 234 235#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 236#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 237 238#define SAMPLER_OFFSET(sf, se, mf, me) \ 239 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 240 241#define VERTEX_2s2s 0 242 243#define COPY_SAMPLER 0 244#define COPY_VERTEX VERTEX_2s2s 245#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX) 246 247#define FILL_SAMPLER 1 248#define FILL_VERTEX VERTEX_2s2s 249#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), FILL_VERTEX) 250#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX) 251 252#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff) 253#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff) 254#define GEN9_READS_DST(f) (((f) >> 15) & 1) 255#define GEN9_VERTEX(f) (((f) >> 0) & 0xf) 256#define GEN9_SET_FLAGS(S, B, V) ((S) << 20 | (B) | (V)) 257 258#define OUT_BATCH(v) batch_emit(sna, v) 259#define OUT_BATCH64(v) batch_emit64(sna, v) 260#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 261#define OUT_VERTEX_F(v) vertex_emit(sna, v) 262 263struct gt_info { 264 const char *name; 265 struct { 266 int max_vs_entries; 267 } urb; 268}; 269 270static const struct gt_info min_gt_info = { 271 .name = "Skylake (gen9)", 272 .urb = { .max_vs_entries = 240 }, 273}; 274 275static const struct gt_info skl_gt_info = { 276 .name = "Skylake (gen9)", 277 .urb = { .max_vs_entries = 960 }, 278}; 279 280static const struct gt_info bxt_gt_info = { 281 .name = "Broxton (gen9)", 282 .urb = { .max_vs_entries = 320 }, 283}; 284 285static const struct gt_info kbl_gt_info = { 286 .name = "Kabylake (gen9)", 287 .urb = { .max_vs_entries = 960 }, 288}; 289 290static const struct gt_info glk_gt_info = { 291 .name = "Geminilake (gen9)", 292 .urb = { .max_vs_entries = 320 }, 293}; 294 295static const struct gt_info cfl_gt_info = { 296 .name = "Coffeelake (gen9)", 297 .urb = { .max_vs_entries = 960 }, 298}; 299 300static bool is_skl(struct sna *sna) 301{ 302 return sna->kgem.gen == 0110; 303} 304 305static bool is_bxt(struct sna *sna) 306{ 307 return sna->kgem.gen == 0111; 308} 309 310static bool is_kbl(struct sna *sna) 311{ 312 return sna->kgem.gen == 0112; 313} 314 315static bool is_glk(struct sna *sna) 316{ 317 return sna->kgem.gen == 0113; 318} 319 320static bool is_cfl(struct sna *sna) 321{ 322 return sna->kgem.gen == 0114; 323} 324 325static inline bool too_large(int width, int height) 326{ 327 return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE; 328} 329 330static inline bool unaligned(struct kgem_bo *bo, int bpp) 331{ 332 /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 333#if 0 334 int x, y; 335 336 if (bo->proxy == NULL) 337 return false; 338 339 /* Assume that all tiled proxies are constructed correctly. */ 340 if (bo->tiling) 341 return false; 342 343 DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 344 __FUNCTION__, bo->delta, bo->pitch, bpp, 345 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 346 347 /* This may be a random userptr map, check that it meets the 348 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 349 */ 350 y = bo->delta / bo->pitch; 351 if (y & 3) 352 return true; 353 354 x = 8 * (bo->delta - y * bo->pitch); 355 if (x & (4*bpp - 1)) 356 return true; 357 358 return false; 359#else 360 return false; 361#endif 362} 363 364static uint32_t gen9_get_blend(int op, 365 bool has_component_alpha, 366 uint32_t dst_format) 367{ 368 uint32_t src, dst; 369 370 COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 371 372 src = gen9_blend_op[op].src_blend; 373 dst = gen9_blend_op[op].dst_blend; 374 375 /* If there's no dst alpha channel, adjust the blend op so that 376 * we'll treat it always as 1. 377 */ 378 if (PICT_FORMAT_A(dst_format) == 0) { 379 if (src == BLENDFACTOR_DST_ALPHA) 380 src = BLENDFACTOR_ONE; 381 else if (src == BLENDFACTOR_INV_DST_ALPHA) 382 src = BLENDFACTOR_ZERO; 383 } 384 385 /* If the source alpha is being used, then we should only be in a 386 * case where the source blend factor is 0, and the source blend 387 * value is the mask channels multiplied by the source picture's alpha. 388 */ 389 if (has_component_alpha && gen9_blend_op[op].src_alpha) { 390 if (dst == BLENDFACTOR_SRC_ALPHA) 391 dst = BLENDFACTOR_SRC_COLOR; 392 else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 393 dst = BLENDFACTOR_INV_SRC_COLOR; 394 } 395 396 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 397 op, dst_format, PICT_FORMAT_A(dst_format), 398 src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 399 assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 400 return BLEND_OFFSET(src, dst); 401} 402 403static uint32_t gen9_get_card_format(PictFormat format) 404{ 405 switch (format) { 406 default: 407 return -1; 408 case PICT_a8r8g8b8: 409 return SURFACEFORMAT_B8G8R8A8_UNORM; 410 case PICT_x8r8g8b8: 411 return SURFACEFORMAT_B8G8R8X8_UNORM; 412 case PICT_a8b8g8r8: 413 return SURFACEFORMAT_R8G8B8A8_UNORM; 414 case PICT_x8b8g8r8: 415 return SURFACEFORMAT_R8G8B8X8_UNORM; 416#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 417 case PICT_a2r10g10b10: 418 return SURFACEFORMAT_B10G10R10A2_UNORM; 419 case PICT_x2r10g10b10: 420 return SURFACEFORMAT_B10G10R10X2_UNORM; 421#endif 422 case PICT_r8g8b8: 423 return SURFACEFORMAT_R8G8B8_UNORM; 424 case PICT_r5g6b5: 425 return SURFACEFORMAT_B5G6R5_UNORM; 426 case PICT_a1r5g5b5: 427 return SURFACEFORMAT_B5G5R5A1_UNORM; 428 case PICT_a8: 429 return SURFACEFORMAT_A8_UNORM; 430 case PICT_a4r4g4b4: 431 return SURFACEFORMAT_B4G4R4A4_UNORM; 432 } 433} 434 435static uint32_t gen9_get_dest_format(PictFormat format) 436{ 437 switch (format) { 438 default: 439 return -1; 440 case PICT_a8r8g8b8: 441 case PICT_x8r8g8b8: 442 return SURFACEFORMAT_B8G8R8A8_UNORM; 443 case PICT_a8b8g8r8: 444 case PICT_x8b8g8r8: 445 return SURFACEFORMAT_R8G8B8A8_UNORM; 446#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 447 case PICT_a2r10g10b10: 448 case PICT_x2r10g10b10: 449 return SURFACEFORMAT_B10G10R10A2_UNORM; 450#endif 451 case PICT_r5g6b5: 452 return SURFACEFORMAT_B5G6R5_UNORM; 453 case PICT_x1r5g5b5: 454 case PICT_a1r5g5b5: 455 return SURFACEFORMAT_B5G5R5A1_UNORM; 456 case PICT_a8: 457 return SURFACEFORMAT_A8_UNORM; 458 case PICT_a4r4g4b4: 459 case PICT_x4r4g4b4: 460 return SURFACEFORMAT_B4G4R4A4_UNORM; 461 } 462} 463 464static bool gen9_check_dst_format(PictFormat format) 465{ 466 if (gen9_get_dest_format(format) != -1) 467 return true; 468 469 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 470 return false; 471} 472 473static bool gen9_check_format(uint32_t format) 474{ 475 if (gen9_get_card_format(format) != -1) 476 return true; 477 478 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 479 return false; 480} 481 482static uint32_t gen9_filter(uint32_t filter) 483{ 484 switch (filter) { 485 default: 486 assert(0); 487 case PictFilterNearest: 488 return SAMPLER_FILTER_NEAREST; 489 case PictFilterBilinear: 490 return SAMPLER_FILTER_BILINEAR; 491 } 492} 493 494static uint32_t gen9_check_filter(PicturePtr picture) 495{ 496 switch (picture->filter) { 497 case PictFilterNearest: 498 case PictFilterBilinear: 499 return true; 500 default: 501 return false; 502 } 503} 504 505static uint32_t gen9_repeat(uint32_t repeat) 506{ 507 switch (repeat) { 508 default: 509 assert(0); 510 case RepeatNone: 511 return SAMPLER_EXTEND_NONE; 512 case RepeatNormal: 513 return SAMPLER_EXTEND_REPEAT; 514 case RepeatPad: 515 return SAMPLER_EXTEND_PAD; 516 case RepeatReflect: 517 return SAMPLER_EXTEND_REFLECT; 518 } 519} 520 521static bool gen9_check_repeat(PicturePtr picture) 522{ 523 if (!picture->repeat) 524 return true; 525 526 switch (picture->repeatType) { 527 case RepeatNone: 528 case RepeatNormal: 529 case RepeatPad: 530 case RepeatReflect: 531 return true; 532 default: 533 return false; 534 } 535} 536 537static int 538gen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 539{ 540 int base; 541 542 if (has_mask) { 543 if (is_ca) { 544 if (gen9_blend_op[op].src_alpha) 545 base = GEN9_WM_KERNEL_MASKSA; 546 else 547 base = GEN9_WM_KERNEL_MASKCA; 548 } else 549 base = GEN9_WM_KERNEL_MASK; 550 } else 551 base = GEN9_WM_KERNEL_NOMASK; 552 553 return base + !is_affine; 554} 555 556static void 557gen9_emit_push_constants(struct sna *sna) 558{ 559#if SIM 560 OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 561 OUT_BATCH(0); 562 563 OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 564 OUT_BATCH(0); 565 566 OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 567 OUT_BATCH(0); 568 569 OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 570 OUT_BATCH(0); 571 572 OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 573 OUT_BATCH(0); 574#endif 575} 576 577static void 578gen9_emit_urb(struct sna *sna) 579{ 580 /* num of VS entries must be divisible by 8 if size < 9 */ 581 OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2)); 582 OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | 583 (2 - 1) << URB_ENTRY_SIZE_SHIFT | 584 4 << URB_STARTING_ADDRESS_SHIFT); 585 586 OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2)); 587 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 588 4 << URB_STARTING_ADDRESS_SHIFT); 589 590 OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2)); 591 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 592 4 << URB_STARTING_ADDRESS_SHIFT); 593 594 OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2)); 595 OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 596 4 << URB_STARTING_ADDRESS_SHIFT); 597} 598 599static void 600gen9_emit_state_base_address(struct sna *sna) 601{ 602 uint32_t num_pages; 603 604 assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 605 606 /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ 607 OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2)); 608 OUT_BATCH64(0); /* general */ 609 OUT_BATCH(0); /* stateless dataport */ 610 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 611 sna->kgem.nbatch, 612 NULL, 613 I915_GEM_DOMAIN_INSTRUCTION << 16, 614 BASE_ADDRESS_MODIFY)); 615 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 616 sna->kgem.nbatch, 617 sna->render_state.gen9.general_bo, 618 I915_GEM_DOMAIN_INSTRUCTION << 16, 619 BASE_ADDRESS_MODIFY)); 620 OUT_BATCH64(0); /* indirect */ 621 OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 622 sna->kgem.nbatch, 623 sna->render_state.gen9.general_bo, 624 I915_GEM_DOMAIN_INSTRUCTION << 16, 625 BASE_ADDRESS_MODIFY)); 626 /* upper bounds */ 627 num_pages = sna->render_state.gen9.general_bo->size.pages.count; 628 OUT_BATCH(0); /* general */ 629 OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 630 OUT_BATCH(0); /* indirect */ 631 OUT_BATCH(num_pages << 12 | 1); /* instruction */ 632 633 /* Bindless */ 634 OUT_BATCH(0); 635 OUT_BATCH(0); 636 OUT_BATCH(0); 637} 638 639static void 640gen9_emit_vs_invariant(struct sna *sna) 641{ 642 OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2)); 643 OUT_BATCH64(0); /* no VS kernel */ 644 OUT_BATCH(0); 645 OUT_BATCH64(0); /* scratch */ 646 OUT_BATCH(0); 647 OUT_BATCH(1 << 1); /* pass-through */ 648 OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 649 650#if SIM 651 OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2)); 652 OUT_BATCH(0); 653 OUT_BATCH(0); 654 OUT_BATCH64(0); 655 OUT_BATCH64(0); 656 OUT_BATCH64(0); 657 OUT_BATCH64(0); 658 659 OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 660 OUT_BATCH(0); 661 662 OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 663 OUT_BATCH(0); 664#endif 665} 666 667static void 668gen9_emit_hs_invariant(struct sna *sna) 669{ 670 OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2)); 671 OUT_BATCH(0); 672 OUT_BATCH(0); 673 OUT_BATCH64(0); /* no HS kernel */ 674 OUT_BATCH64(0); /* scratch */ 675 OUT_BATCH(0); 676 OUT_BATCH(0); /* pass-through */ 677 678#if SIM 679 OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2)); 680 OUT_BATCH(0); 681 OUT_BATCH(0); 682 OUT_BATCH64(0); 683 OUT_BATCH64(0); 684 OUT_BATCH64(0); 685 OUT_BATCH64(0); 686 687#if 1 688 OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 689 OUT_BATCH(0); 690 691 OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 692 OUT_BATCH(0); 693#endif 694#endif 695} 696 697static void 698gen9_emit_te_invariant(struct sna *sna) 699{ 700 OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2)); 701 OUT_BATCH(0); 702 OUT_BATCH(0); 703 OUT_BATCH(0); 704} 705 706static void 707gen9_emit_ds_invariant(struct sna *sna) 708{ 709 OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2)); 710 OUT_BATCH64(0); /* no kernel */ 711 OUT_BATCH(0); 712 OUT_BATCH64(0); /* scratch */ 713 OUT_BATCH(0); 714 OUT_BATCH(0); 715 OUT_BATCH(0); 716 OUT_BATCH(0); 717 OUT_BATCH(0); 718 719#if SIM 720 OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2)); 721 OUT_BATCH(0); 722 OUT_BATCH(0); 723 OUT_BATCH64(0); 724 OUT_BATCH64(0); 725 OUT_BATCH64(0); 726 OUT_BATCH64(0); 727 728#if 1 729 OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 730 OUT_BATCH(0); 731 732 OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 733 OUT_BATCH(0); 734#endif 735#endif 736} 737 738static void 739gen9_emit_gs_invariant(struct sna *sna) 740{ 741 OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2)); 742 OUT_BATCH64(0); /* no GS kernel */ 743 OUT_BATCH(0); 744 OUT_BATCH64(0); /* scratch */ 745 OUT_BATCH(0); 746 OUT_BATCH(0); /* pass-through */ 747 OUT_BATCH(0); 748 OUT_BATCH(0); 749 750#if SIM 751 OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2)); 752 OUT_BATCH(0); 753 OUT_BATCH(0); 754 OUT_BATCH64(0); 755 OUT_BATCH64(0); 756 OUT_BATCH64(0); 757 OUT_BATCH64(0); 758 759#if 1 760 OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 761 OUT_BATCH(0); 762 763 OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 764 OUT_BATCH(0); 765#endif 766#endif 767} 768 769static void 770gen9_emit_sol_invariant(struct sna *sna) 771{ 772 OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2)); 773 OUT_BATCH(0); 774 OUT_BATCH(0); 775 OUT_BATCH(0); 776 OUT_BATCH(0); 777} 778 779static void 780gen9_emit_sf_invariant(struct sna *sna) 781{ 782 OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2)); 783 OUT_BATCH(0); 784 OUT_BATCH(0); 785 OUT_BATCH(0); 786} 787 788static void 789gen9_emit_clip_invariant(struct sna *sna) 790{ 791 OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2)); 792 OUT_BATCH(0); 793 OUT_BATCH(0); /* pass-through */ 794 OUT_BATCH(0); 795 796 OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 797 OUT_BATCH(0); 798 799 OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 800 OUT_BATCH(0); 801} 802 803static void 804gen9_emit_null_depth_buffer(struct sna *sna) 805{ 806 OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2)); 807#if 1 808 OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 809 DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 810#else 811 OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 812 DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 813#endif 814 OUT_BATCH64(0); 815 OUT_BATCH(0); 816 OUT_BATCH(0); 817 OUT_BATCH(0); 818 OUT_BATCH(0); 819 820#if SIM 821 OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 822 OUT_BATCH(0); 823 OUT_BATCH64(0); 824 OUT_BATCH(0); 825#endif 826 827#if SIM 828 OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2)); 829 OUT_BATCH(0); 830 OUT_BATCH64(0); 831 OUT_BATCH(0); 832#endif 833 834#if SIM 835 OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2)); 836 OUT_BATCH(0); 837 OUT_BATCH(0); 838 OUT_BATCH(0); 839#endif 840 841#if SIM 842 OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2)); 843 OUT_BATCH(0); 844 OUT_BATCH(0); 845#endif 846} 847 848static void 849gen9_emit_wm_invariant(struct sna *sna) 850{ 851 gen9_emit_null_depth_buffer(sna); 852 853#if SIM 854 OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 855 OUT_BATCH(0); 856#endif 857 858 OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2)); 859 //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 860 OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 861 862#if SIM 863 OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2)); 864 OUT_BATCH(0); 865#endif 866 867#if 0 868 OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2)); 869 OUT_BATCH(0); 870 OUT_BATCH(0); 871 OUT_BATCH(0); 872 OUT_BATCH(0); 873#endif 874 875 OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2)); 876 OUT_BATCH(PSX_PIXEL_SHADER_VALID | 877 PSX_ATTRIBUTE_ENABLE); 878 879 OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2)); 880 OUT_BATCH(RASTER_FRONT_WINDING_CCW | 881 RASTER_CULL_NONE); 882 OUT_BATCH(0); 883 OUT_BATCH(0); 884 OUT_BATCH(0); 885 886 OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2)); 887 OUT_BATCH(0); 888 OUT_BATCH(0); 889 OUT_BATCH(0); 890 OUT_BATCH(0); 891 OUT_BATCH(0); 892 OUT_BATCH(0); 893 OUT_BATCH(0); 894 OUT_BATCH(0); 895 OUT_BATCH(0); 896 OUT_BATCH(0); 897 898#if SIM 899 OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2)); 900 OUT_BATCH(0); 901 OUT_BATCH(0); 902 OUT_BATCH64(0); 903 OUT_BATCH64(0); 904 OUT_BATCH64(0); 905 OUT_BATCH64(0); 906#endif 907} 908 909static void 910gen9_emit_cc_invariant(struct sna *sna) 911{ 912} 913 914static void 915gen9_emit_vf_invariant(struct sna *sna) 916{ 917 int n; 918 919#if 1 920 OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2)); 921 OUT_BATCH(0); 922#endif 923 924 OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2)); 925 OUT_BATCH(0); 926 927 OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2)); 928 OUT_BATCH(RECTLIST); 929 930 OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0); 931 932 for (n = 1; n <= 3; n++) { 933 OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2)); 934 OUT_BATCH(n); 935 OUT_BATCH(0); 936 } 937} 938 939static void 940gen9_emit_invariant(struct sna *sna) 941{ 942 OUT_BATCH(GEN9_PIPELINE_SELECT | 943 PIPELINE_SELECTION_MASK | 944 PIPELINE_SELECT_3D); 945 946#if SIM 947 OUT_BATCH(GEN9_STATE_SIP | (3 - 2)); 948 OUT_BATCH64(0); 949#endif 950 951 OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2)); 952 OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 953 MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 954 955 OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2)); 956 OUT_BATCH(1); 957 958#if SIM 959 OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 960 OUT_BATCH(0); 961 OUT_BATCH(0); 962 OUT_BATCH(0); 963 //OUT_BATCH(8<<20 | 8<<16); 964 OUT_BATCH(0); 965#endif 966 967 gen9_emit_push_constants(sna); 968 gen9_emit_urb(sna); 969 970 gen9_emit_state_base_address(sna); 971 972 gen9_emit_vf_invariant(sna); 973 gen9_emit_vs_invariant(sna); 974 gen9_emit_hs_invariant(sna); 975 gen9_emit_te_invariant(sna); 976 gen9_emit_ds_invariant(sna); 977 gen9_emit_gs_invariant(sna); 978 gen9_emit_sol_invariant(sna); 979 gen9_emit_clip_invariant(sna); 980 gen9_emit_sf_invariant(sna); 981 gen9_emit_wm_invariant(sna); 982 gen9_emit_cc_invariant(sna); 983 984 sna->render_state.gen9.needs_invariant = false; 985} 986 987static void 988gen9_emit_cc(struct sna *sna, uint32_t blend) 989{ 990 struct gen9_render_state *render = &sna->render_state.gen9; 991 992 if (render->blend == blend) 993 return; 994 995 DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 996 __FUNCTION__, blend, render->blend, 997 blend / GEN9_BLENDFACTOR_COUNT, 998 blend % GEN9_BLENDFACTOR_COUNT)); 999 1000 assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT); 1001 assert(blend / GEN9_BLENDFACTOR_COUNT > 0); 1002 assert(blend % GEN9_BLENDFACTOR_COUNT > 0); 1003 1004 /* XXX can have up to 8 blend states preload, selectable via 1005 * Render Target Index. What other side-effects of Render Target Index? 1006 */ 1007 1008 OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2)); 1009 if (blend != GEN9_BLEND(NO_BLEND)) { 1010 uint32_t src = blend / GEN9_BLENDFACTOR_COUNT; 1011 uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT; 1012 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 1013 PS_BLEND_COLOR_BLEND_ENABLE | 1014 src << PS_BLEND_SRC_ALPHA_SHIFT | 1015 dst << PS_BLEND_DST_ALPHA_SHIFT | 1016 src << PS_BLEND_SRC_SHIFT | 1017 dst << PS_BLEND_DST_SHIFT); 1018 } else 1019 OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 1020 1021 assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64)); 1022 OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 1023 OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1); 1024 1025 /* Force a CC_STATE pointer change to improve blend performance */ 1026 OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 1027 OUT_BATCH(0); 1028 1029 render->blend = blend; 1030} 1031 1032static void 1033gen9_emit_sampler(struct sna *sna, uint32_t state) 1034{ 1035 if (sna->render_state.gen9.samplers == state) 1036 return; 1037 1038 sna->render_state.gen9.samplers = state; 1039 1040 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 1041 1042 assert(2 * sizeof(struct gen9_sampler_state) == 32); 1043 OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 1044 OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state)); 1045} 1046 1047static void 1048gen9_emit_sf(struct sna *sna, bool has_mask) 1049{ 1050 int num_sf_outputs = has_mask ? 2 : 1; 1051 1052 if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs) 1053 return; 1054 1055 DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 1056 1057 sna->render_state.gen9.num_sf_outputs = num_sf_outputs; 1058 1059 OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2)); 1060 OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 1061 SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ 1062 SBE_FORCE_VERTEX_URB_READ_OFFSET | 1063 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 1064 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 1065 OUT_BATCH(0); 1066 OUT_BATCH(0); 1067 OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 | 1068 SBE_ACTIVE_COMPONENT_XYZW << 1); 1069 OUT_BATCH(0); 1070} 1071 1072static void 1073gen9_emit_wm(struct sna *sna, int kernel) 1074{ 1075 const uint32_t *kernels; 1076 1077 assert(kernel < ARRAY_SIZE(wm_kernels)); 1078 if (sna->render_state.gen9.kernel == kernel) 1079 return; 1080 1081 sna->render_state.gen9.kernel = kernel; 1082 kernels = sna->render_state.gen9.wm_kernel[kernel]; 1083 1084 DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 1085 __FUNCTION__, 1086 wm_kernels[kernel].name, 1087 wm_kernels[kernel].num_surfaces, 1088 kernels[0], kernels[1], kernels[2])); 1089 assert(is_aligned(kernels[0], 64)); 1090 assert(is_aligned(kernels[1], 64)); 1091 assert(is_aligned(kernels[2], 64)); 1092 1093 OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2)); 1094 OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 1095 OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 1096 PS_VECTOR_MASK_ENABLE | 1097 wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 1098 OUT_BATCH64(0); /* scratch address */ 1099 OUT_BATCH(PS_MAX_THREADS | 1100 (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 1101 (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 1102 (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 1103 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 1104 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 1105 6 << PS_DISPATCH_START_GRF_SHIFT_2); 1106 OUT_BATCH64(kernels[2]); 1107 OUT_BATCH64(kernels[1]); 1108} 1109 1110static bool 1111gen9_emit_binding_table(struct sna *sna, uint16_t offset) 1112{ 1113 if (sna->render_state.gen9.surface_table == offset) 1114 return false; 1115 1116 /* Binding table pointers */ 1117 assert(is_aligned(4*offset, 32)); 1118 OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 1119 OUT_BATCH(offset*4); 1120 1121 sna->render_state.gen9.surface_table = offset; 1122 return true; 1123} 1124 1125static bool 1126gen9_emit_drawing_rectangle(struct sna *sna, 1127 const struct sna_composite_op *op) 1128{ 1129 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 1130 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 1131 1132 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 1133 assert(!too_large(op->dst.width, op->dst.height)); 1134 1135 if (sna->render_state.gen9.drawrect_limit == limit && 1136 sna->render_state.gen9.drawrect_offset == offset) 1137 return true; 1138 1139 sna->render_state.gen9.drawrect_offset = offset; 1140 sna->render_state.gen9.drawrect_limit = limit; 1141 1142 OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1143 OUT_BATCH(0); 1144 OUT_BATCH(limit); 1145 OUT_BATCH(offset); 1146 return false; 1147} 1148 1149static void 1150gen9_emit_vertex_elements(struct sna *sna, 1151 const struct sna_composite_op *op) 1152{ 1153 /* 1154 * vertex data in vertex buffer 1155 * position: (x, y) 1156 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1157 * texture coordinate 1 if (has_mask is true): same as above 1158 */ 1159 struct gen9_render_state *render = &sna->render_state.gen9; 1160 uint32_t src_format, dw; 1161 int id = GEN9_VERTEX(op->u.gen9.flags); 1162 bool has_mask; 1163 1164 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1165 1166 if (render->ve_id == id) 1167 return; 1168 render->ve_id = id; 1169 1170 if (render->ve_dirty) { 1171 /* dummy primitive to flush vertex before change? */ 1172 OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1173 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1174 OUT_BATCH(0); 1175 OUT_BATCH(0); 1176 OUT_BATCH(1); /* single instance */ 1177 OUT_BATCH(0); /* start instance location */ 1178 OUT_BATCH(0); /* index buffer offset, ignored */ 1179 } 1180 1181 /* The VUE layout 1182 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1183 * dword 4-7: position (x, y, 1.0, 1.0), 1184 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1185 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1186 * 1187 * dword 4-15 are fetched from vertex buffer 1188 */ 1189 has_mask = (id >> 2) != 0; 1190 OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS | 1191 ((2 * (3 + has_mask)) + 1 - 2)); 1192 1193 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1194 SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1195 0 << VE_OFFSET_SHIFT); 1196 OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1197 COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1198 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1199 COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1200 1201 /* x,y */ 1202 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1203 SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1204 0 << VE_OFFSET_SHIFT); 1205 OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1206 COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1207 COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1208 COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1209 1210 /* u0, v0, w0 */ 1211 DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1212 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1213 switch (id & 3) { 1214 default: 1215 assert(0); 1216 case 0: 1217 src_format = SURFACEFORMAT_R16G16_SSCALED; 1218 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1219 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1220 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1221 break; 1222 case 1: 1223 src_format = SURFACEFORMAT_R32_FLOAT; 1224 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1225 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1226 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1227 break; 1228 case 2: 1229 src_format = SURFACEFORMAT_R32G32_FLOAT; 1230 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1231 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1232 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1233 break; 1234 case 3: 1235 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1236 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1237 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1238 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1239 break; 1240 } 1241 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1242 src_format << VE_FORMAT_SHIFT | 1243 4 << VE_OFFSET_SHIFT); 1244 OUT_BATCH(dw); 1245 1246 /* u1, v1, w1 */ 1247 if (has_mask) { 1248 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1249 DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1250 dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1251 switch (id >> 2) { 1252 case 1: 1253 src_format = SURFACEFORMAT_R32_FLOAT; 1254 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1255 dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1256 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1257 break; 1258 default: 1259 assert(0); 1260 case 2: 1261 src_format = SURFACEFORMAT_R32G32_FLOAT; 1262 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1263 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1264 dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1265 break; 1266 case 3: 1267 src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1268 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1269 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1270 dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1271 break; 1272 } 1273 OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1274 src_format << VE_FORMAT_SHIFT | 1275 offset << VE_OFFSET_SHIFT); 1276 OUT_BATCH(dw); 1277 } 1278 1279 render->ve_dirty = true; 1280} 1281 1282inline static void 1283gen9_emit_pipe_invalidate(struct sna *sna) 1284{ 1285 OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1286 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1287 PIPE_CONTROL_TC_FLUSH | 1288 PIPE_CONTROL_CS_STALL); 1289 OUT_BATCH64(0); 1290 OUT_BATCH64(0); 1291} 1292 1293inline static void 1294gen9_emit_pipe_flush(struct sna *sna, bool need_stall) 1295{ 1296 unsigned stall; 1297 1298 stall = 0; 1299 if (need_stall) 1300 stall = (PIPE_CONTROL_CS_STALL | 1301 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1302 1303 OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1304 OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1305 OUT_BATCH64(0); 1306 OUT_BATCH64(0); 1307} 1308 1309inline static void 1310gen9_emit_pipe_stall(struct sna *sna) 1311{ 1312 OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1313 OUT_BATCH(PIPE_CONTROL_CS_STALL | 1314 PIPE_CONTROL_FLUSH | 1315 PIPE_CONTROL_STALL_AT_SCOREBOARD); 1316 OUT_BATCH64(0); 1317 OUT_BATCH64(0); 1318} 1319 1320static void 1321gen9_emit_state(struct sna *sna, 1322 const struct sna_composite_op *op, 1323 uint16_t wm_binding_table) 1324{ 1325 bool need_invalidate; 1326 bool need_flush; 1327 bool need_stall; 1328 1329 assert(op->dst.bo->exec); 1330 1331 need_flush = wm_binding_table & 1 || 1332 (sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags)); 1333 if (ALWAYS_FLUSH) 1334 need_flush = true; 1335 1336 wm_binding_table &= ~1; 1337 1338 need_stall = sna->render_state.gen9.surface_table != wm_binding_table; 1339 1340 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1341 if (ALWAYS_INVALIDATE) 1342 need_invalidate = true; 1343 1344 need_stall &= gen9_emit_drawing_rectangle(sna, op); 1345 if (ALWAYS_STALL) 1346 need_stall = true; 1347 1348 if (need_invalidate) { 1349 gen9_emit_pipe_invalidate(sna); 1350 kgem_clear_dirty(&sna->kgem); 1351 assert(op->dst.bo->exec); 1352 kgem_bo_mark_dirty(op->dst.bo); 1353 1354 need_flush = false; 1355 need_stall = false; 1356 } 1357 if (need_flush) { 1358 gen9_emit_pipe_flush(sna, need_stall); 1359 need_stall = false; 1360 } 1361 if (need_stall) 1362 gen9_emit_pipe_stall(sna); 1363 1364 gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); 1365 gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags)); 1366 gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2); 1367 gen9_emit_wm(sna, op->u.gen9.wm_kernel); 1368 gen9_emit_vertex_elements(sna, op); 1369 gen9_emit_binding_table(sna, wm_binding_table); 1370 1371 sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags); 1372} 1373 1374static bool gen9_magic_ca_pass(struct sna *sna, 1375 const struct sna_composite_op *op) 1376{ 1377 struct gen9_render_state *state = &sna->render_state.gen9; 1378 1379 if (!op->need_magic_ca_pass) 1380 return false; 1381 1382 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1383 sna->render.vertex_start, sna->render.vertex_index)); 1384 1385 gen9_emit_pipe_stall(sna); 1386 1387 gen9_emit_cc(sna, 1388 GEN9_BLEND(gen9_get_blend(PictOpAdd, true, 1389 op->dst.format))); 1390 gen9_emit_wm(sna, 1391 gen9_choose_composite_kernel(PictOpAdd, 1392 true, true, 1393 op->is_affine)); 1394 1395 OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1396 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1397 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1398 OUT_BATCH(sna->render.vertex_start); 1399 OUT_BATCH(1); /* single instance */ 1400 OUT_BATCH(0); /* start instance location */ 1401 OUT_BATCH(0); /* index buffer offset, ignored */ 1402 1403 state->last_primitive = sna->kgem.nbatch; 1404 state->ve_dirty = false; 1405 return true; 1406} 1407 1408static void null_create(struct sna_static_stream *stream) 1409{ 1410 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1411 sna_static_stream_map(stream, 64, 64); 1412} 1413 1414static void 1415sampler_state_init(struct gen9_sampler_state *sampler_state, 1416 sampler_filter_t filter, 1417 sampler_extend_t extend) 1418{ 1419 COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1420 1421 sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1422 sampler_state->ss0.default_color_mode = 1; 1423 1424 switch (filter) { 1425 default: 1426 case SAMPLER_FILTER_NEAREST: 1427 sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1428 sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1429 break; 1430 case SAMPLER_FILTER_BILINEAR: 1431 sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1432 sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1433 break; 1434 } 1435 1436 /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1437 1438 switch (extend) { 1439 default: 1440 case SAMPLER_EXTEND_NONE: 1441 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1442 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1443 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1444 break; 1445 case SAMPLER_EXTEND_REPEAT: 1446 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1447 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1448 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1449 break; 1450 case SAMPLER_EXTEND_PAD: 1451 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1452 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1453 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1454 break; 1455 case SAMPLER_EXTEND_REFLECT: 1456 sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1457 sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1458 sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1459 break; 1460 } 1461} 1462 1463static void 1464sampler_copy_init(struct gen9_sampler_state *ss) 1465{ 1466 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1467 ss->ss3.non_normalized_coord = 1; 1468 1469 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1470} 1471 1472static void 1473sampler_fill_init(struct gen9_sampler_state *ss) 1474{ 1475 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1476 ss->ss3.non_normalized_coord = 1; 1477 1478 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1479} 1480 1481static uint32_t 1482gen9_tiling_bits(uint32_t tiling) 1483{ 1484 switch (tiling) { 1485 default: assert(0); 1486 case I915_TILING_NONE: return 0; 1487 case I915_TILING_X: return SURFACE_TILED; 1488 case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1489 } 1490} 1491 1492#define MOCS_PTE (1 << 1) 1493#define MOCS_WB (2 << 1) 1494 1495/** 1496 * Sets up the common fields for a surface state buffer for the given 1497 * picture in the given surface state buffer. 1498 */ 1499static uint32_t 1500gen9_bind_bo(struct sna *sna, 1501 struct kgem_bo *bo, 1502 uint32_t width, 1503 uint32_t height, 1504 uint32_t format, 1505 bool is_dst) 1506{ 1507 uint32_t *ss; 1508 uint32_t domains; 1509 int offset; 1510 uint32_t is_scanout = is_dst && bo->scanout; 1511 1512 /* After the first bind, we manage the cache domains within the batch */ 1513 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1514 if (offset) { 1515 if (is_dst) 1516 kgem_bo_mark_dirty(bo); 1517 assert(offset >= sna->kgem.surface); 1518 return offset * sizeof(uint32_t); 1519 } 1520 1521 offset = sna->kgem.surface -= SURFACE_DW; 1522 ss = sna->kgem.batch + offset; 1523 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1524 gen9_tiling_bits(bo->tiling) | 1525 format << SURFACE_FORMAT_SHIFT | 1526 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1527 if (is_dst) { 1528 ss[0] |= SURFACE_RC_READ_WRITE; 1529 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1530 } else 1531 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1532 ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24; 1533 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1534 (height - 1) << SURFACE_HEIGHT_SHIFT); 1535 ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1536 ss[4] = 0; 1537 ss[5] = 0; 1538 ss[6] = 0; 1539 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1540 *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1541 ss[10] = 0; 1542 ss[11] = 0; 1543 ss[12] = 0; 1544 ss[13] = 0; 1545 ss[14] = 0; 1546 ss[15] = 0; 1547 1548 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1549 1550 DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1551 offset, bo->handle, *(uint64_t *)(ss+8), 1552 format, width, height, bo->pitch, bo->tiling, 1553 domains & 0xffff ? "render" : "sampler")); 1554 1555 return offset * sizeof(uint32_t); 1556} 1557 1558static void gen9_emit_vertex_buffer(struct sna *sna, 1559 const struct sna_composite_op *op) 1560{ 1561 int id = GEN9_VERTEX(op->u.gen9.flags); 1562 1563 OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1564 OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1565 4*op->floats_per_vertex); 1566 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1567 OUT_BATCH64(0); 1568 OUT_BATCH(~0); /* buffer size: disabled */ 1569 1570 sna->render.vb_id |= 1 << id; 1571} 1572 1573static void gen9_emit_primitive(struct sna *sna) 1574{ 1575 if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) { 1576 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1577 return; 1578 } 1579 1580 OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1581 OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1582 sna->render.vertex_offset = sna->kgem.nbatch; 1583 OUT_BATCH(0); /* vertex count, to be filled in later */ 1584 OUT_BATCH(sna->render.vertex_index); 1585 OUT_BATCH(1); /* single instance */ 1586 OUT_BATCH(0); /* start instance location */ 1587 OUT_BATCH(0); /* index buffer offset, ignored */ 1588 sna->render.vertex_start = sna->render.vertex_index; 1589 1590 sna->render_state.gen9.last_primitive = sna->kgem.nbatch; 1591 sna->render_state.gen9.ve_dirty = false; 1592} 1593 1594static bool gen9_rectangle_begin(struct sna *sna, 1595 const struct sna_composite_op *op) 1596{ 1597 int id = 1 << GEN9_VERTEX(op->u.gen9.flags); 1598 int ndwords; 1599 1600 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1601 return true; 1602 1603 ndwords = op->need_magic_ca_pass ? 60 : 6; 1604 if ((sna->render.vb_id & id) == 0) 1605 ndwords += 5; 1606 if (!kgem_check_batch(&sna->kgem, ndwords)) 1607 return false; 1608 1609 if ((sna->render.vb_id & id) == 0) 1610 gen9_emit_vertex_buffer(sna, op); 1611 1612 gen9_emit_primitive(sna); 1613 return true; 1614} 1615 1616static int gen9_get_rectangles__flush(struct sna *sna, 1617 const struct sna_composite_op *op) 1618{ 1619 /* Preventing discarding new vbo after lock contention */ 1620 if (sna_vertex_wait__locked(&sna->render)) { 1621 int rem = vertex_space(sna); 1622 if (rem > op->floats_per_rect) 1623 return rem; 1624 } 1625 1626 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1627 return 0; 1628 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1629 return 0; 1630 1631 if (sna->render.vertex_offset) { 1632 gen8_vertex_flush(sna); 1633 if (gen9_magic_ca_pass(sna, op)) { 1634 gen9_emit_pipe_invalidate(sna); 1635 gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); 1636 gen9_emit_wm(sna, op->u.gen9.wm_kernel); 1637 } 1638 } 1639 1640 return gen8_vertex_finish(sna); 1641} 1642 1643inline static int gen9_get_rectangles(struct sna *sna, 1644 const struct sna_composite_op *op, 1645 int want, 1646 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1647{ 1648 int rem; 1649 1650 assert(want); 1651 1652start: 1653 rem = vertex_space(sna); 1654 if (unlikely(rem < op->floats_per_rect)) { 1655 DBG(("flushing vbo for %s: %d < %d\n", 1656 __FUNCTION__, rem, op->floats_per_rect)); 1657 rem = gen9_get_rectangles__flush(sna, op); 1658 if (unlikely(rem == 0)) 1659 goto flush; 1660 } 1661 1662 if (unlikely(sna->render.vertex_offset == 0)) { 1663 if (!gen9_rectangle_begin(sna, op)) 1664 goto flush; 1665 else 1666 goto start; 1667 } 1668 1669 assert(rem <= vertex_space(sna)); 1670 assert(op->floats_per_rect <= rem); 1671 if (want > 1 && want * op->floats_per_rect > rem) 1672 want = rem / op->floats_per_rect; 1673 1674 assert(want > 0); 1675 sna->render.vertex_index += 3*want; 1676 return want; 1677 1678flush: 1679 if (sna->render.vertex_offset) { 1680 gen8_vertex_flush(sna); 1681 gen9_magic_ca_pass(sna, op); 1682 } 1683 sna_vertex_wait__locked(&sna->render); 1684 _kgem_submit(&sna->kgem); 1685 emit_state(sna, op); 1686 goto start; 1687} 1688 1689inline static uint32_t *gen9_composite_get_binding_table(struct sna *sna, 1690 uint16_t *offset) 1691{ 1692 uint32_t *table; 1693 1694 assert(sna->kgem.surface <= 16384); 1695 sna->kgem.surface -= SURFACE_DW; 1696 /* Clear all surplus entries to zero in case of prefetch */ 1697 table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1698 1699 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1700 1701 *offset = sna->kgem.surface; 1702 return table; 1703} 1704 1705static void 1706gen9_get_batch(struct sna *sna, const struct sna_composite_op *op) 1707{ 1708 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1709 1710 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1711 DBG(("%s: flushing batch: %d < %d+%d\n", 1712 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1713 150, 4*8*2)); 1714 _kgem_submit(&sna->kgem); 1715 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1716 } 1717 1718 assert(sna->kgem.mode == KGEM_RENDER); 1719 assert(sna->kgem.ring == KGEM_RENDER); 1720 1721 if (sna->render_state.gen9.needs_invariant) 1722 gen9_emit_invariant(sna); 1723} 1724 1725static void gen9_emit_composite_state(struct sna *sna, 1726 const struct sna_composite_op *op) 1727{ 1728 uint32_t *binding_table; 1729 uint16_t offset, dirty; 1730 1731 gen9_get_batch(sna, op); 1732 1733 binding_table = gen9_composite_get_binding_table(sna, &offset); 1734 1735 dirty = kgem_bo_is_dirty(op->dst.bo); 1736 1737 binding_table[0] = 1738 gen9_bind_bo(sna, 1739 op->dst.bo, op->dst.width, op->dst.height, 1740 gen9_get_dest_format(op->dst.format), 1741 true); 1742 binding_table[1] = 1743 gen9_bind_bo(sna, 1744 op->src.bo, op->src.width, op->src.height, 1745 op->src.card_format, 1746 false); 1747 if (op->mask.bo) { 1748 binding_table[2] = 1749 gen9_bind_bo(sna, 1750 op->mask.bo, 1751 op->mask.width, 1752 op->mask.height, 1753 op->mask.card_format, 1754 false); 1755 } 1756 1757 if (sna->kgem.surface == offset && 1758 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table && 1759 (op->mask.bo == NULL || 1760 sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) { 1761 sna->kgem.surface += SURFACE_DW; 1762 offset = sna->render_state.gen9.surface_table; 1763 } 1764 1765 if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 1766 dirty = 0; 1767 1768 gen9_emit_state(sna, op, offset | dirty); 1769} 1770 1771static void 1772gen9_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1773{ 1774 if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) { 1775 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1776 sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex)); 1777 gen8_vertex_align(sna, op); 1778 sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex; 1779 } 1780} 1781 1782fastcall static void 1783gen9_render_composite_blt(struct sna *sna, 1784 const struct sna_composite_op *op, 1785 const struct sna_composite_rectangles *r) 1786{ 1787 gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); 1788 op->prim_emit(sna, op, r); 1789} 1790 1791fastcall static void 1792gen9_render_composite_box(struct sna *sna, 1793 const struct sna_composite_op *op, 1794 const BoxRec *box) 1795{ 1796 struct sna_composite_rectangles r; 1797 1798 gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); 1799 1800 DBG((" %s: (%d, %d), (%d, %d)\n", 1801 __FUNCTION__, 1802 box->x1, box->y1, box->x2, box->y2)); 1803 1804 r.dst.x = box->x1; 1805 r.dst.y = box->y1; 1806 r.width = box->x2 - box->x1; 1807 r.height = box->y2 - box->y1; 1808 r.src = r.mask = r.dst; 1809 1810 op->prim_emit(sna, op, &r); 1811} 1812 1813static void 1814gen9_render_composite_boxes__blt(struct sna *sna, 1815 const struct sna_composite_op *op, 1816 const BoxRec *box, int nbox) 1817{ 1818 DBG(("composite_boxes(%d)\n", nbox)); 1819 1820 do { 1821 int nbox_this_time; 1822 1823 nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1824 gen9_emit_composite_state); 1825 nbox -= nbox_this_time; 1826 1827 do { 1828 struct sna_composite_rectangles r; 1829 1830 DBG((" %s: (%d, %d), (%d, %d)\n", 1831 __FUNCTION__, 1832 box->x1, box->y1, box->x2, box->y2)); 1833 1834 r.dst.x = box->x1; 1835 r.dst.y = box->y1; 1836 r.width = box->x2 - box->x1; 1837 r.height = box->y2 - box->y1; 1838 r.src = r.mask = r.dst; 1839 1840 op->prim_emit(sna, op, &r); 1841 box++; 1842 } while (--nbox_this_time); 1843 } while (nbox); 1844} 1845 1846static void 1847gen9_render_composite_boxes(struct sna *sna, 1848 const struct sna_composite_op *op, 1849 const BoxRec *box, int nbox) 1850{ 1851 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1852 1853 do { 1854 int nbox_this_time; 1855 float *v; 1856 1857 nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1858 gen9_emit_composite_state); 1859 assert(nbox_this_time); 1860 nbox -= nbox_this_time; 1861 1862 v = sna->render.vertices + sna->render.vertex_used; 1863 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1864 1865 op->emit_boxes(op, box, nbox_this_time, v); 1866 box += nbox_this_time; 1867 } while (nbox); 1868} 1869 1870static void 1871gen9_render_composite_boxes__thread(struct sna *sna, 1872 const struct sna_composite_op *op, 1873 const BoxRec *box, int nbox) 1874{ 1875 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1876 1877 sna_vertex_lock(&sna->render); 1878 do { 1879 int nbox_this_time; 1880 float *v; 1881 1882 nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1883 gen9_emit_composite_state); 1884 assert(nbox_this_time); 1885 nbox -= nbox_this_time; 1886 1887 v = sna->render.vertices + sna->render.vertex_used; 1888 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1889 1890 sna_vertex_acquire__locked(&sna->render); 1891 sna_vertex_unlock(&sna->render); 1892 1893 op->emit_boxes(op, box, nbox_this_time, v); 1894 box += nbox_this_time; 1895 1896 sna_vertex_lock(&sna->render); 1897 sna_vertex_release__locked(&sna->render); 1898 } while (nbox); 1899 sna_vertex_unlock(&sna->render); 1900} 1901 1902static uint32_t 1903gen9_create_blend_state(struct sna_static_stream *stream) 1904{ 1905 char *base, *ptr; 1906 int src, dst; 1907 1908 COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1909 1910 base = sna_static_stream_map(stream, 1911 GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE, 1912 64); 1913 1914 ptr = base; 1915 for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) { 1916 for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) { 1917 struct gen9_blend_state *blend = 1918 (struct gen9_blend_state *)ptr; 1919 1920 assert(((ptr - base) & 63) == 0); 1921 COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1922 COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1923 COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); 1924 1925 blend->rt.post_blend_clamp = 1; 1926 blend->rt.pre_blend_clamp = 1; 1927 1928 blend->rt.color_blend = 1929 !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1930 blend->rt.dest_blend_factor = dst; 1931 blend->rt.source_blend_factor = src; 1932 blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1933 1934 blend->rt.dest_alpha_blend_factor = dst; 1935 blend->rt.source_alpha_blend_factor = src; 1936 blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1937 1938 ptr += GEN9_BLEND_STATE_PADDED_SIZE; 1939 } 1940 } 1941 1942 return sna_static_stream_offsetof(stream, base); 1943} 1944 1945static int 1946gen9_composite_picture(struct sna *sna, 1947 PicturePtr picture, 1948 struct sna_composite_channel *channel, 1949 int x, int y, 1950 int w, int h, 1951 int dst_x, int dst_y, 1952 bool precise) 1953{ 1954 PixmapPtr pixmap; 1955 uint32_t color; 1956 int16_t dx, dy; 1957 1958 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1959 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1960 1961 channel->is_solid = false; 1962 channel->card_format = -1; 1963 1964 if (sna_picture_is_solid(picture, &color)) 1965 return gen4_channel_init_solid(sna, channel, color); 1966 1967 if (picture->pDrawable == NULL) { 1968 int ret; 1969 1970 if (picture->pSourcePict->type == SourcePictTypeLinear) 1971 return gen4_channel_init_linear(sna, picture, channel, 1972 x, y, 1973 w, h, 1974 dst_x, dst_y); 1975 1976 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1977 ret = -1; 1978 if (!precise) 1979 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1980 x, y, w, h, dst_x, dst_y); 1981 if (ret == -1) 1982 ret = sna_render_picture_fixup(sna, picture, channel, 1983 x, y, w, h, dst_x, dst_y); 1984 return ret; 1985 } 1986 1987 if (picture->alphaMap) { 1988 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1989 return sna_render_picture_fixup(sna, picture, channel, 1990 x, y, w, h, dst_x, dst_y); 1991 } 1992 1993 if (!gen9_check_repeat(picture)) 1994 return sna_render_picture_fixup(sna, picture, channel, 1995 x, y, w, h, dst_x, dst_y); 1996 1997 if (!gen9_check_filter(picture)) 1998 return sna_render_picture_fixup(sna, picture, channel, 1999 x, y, w, h, dst_x, dst_y); 2000 2001 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2002 channel->filter = picture->filter; 2003 2004 pixmap = get_drawable_pixmap(picture->pDrawable); 2005 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2006 2007 x += dx + picture->pDrawable->x; 2008 y += dy + picture->pDrawable->y; 2009 2010 channel->is_affine = sna_transform_is_affine(picture->transform); 2011 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2012 DBG(("%s: integer translation (%d, %d), removing\n", 2013 __FUNCTION__, dx, dy)); 2014 x += dx; 2015 y += dy; 2016 channel->transform = NULL; 2017 channel->filter = PictFilterNearest; 2018 2019 if (channel->repeat || 2020 (x >= 0 && 2021 y >= 0 && 2022 x + w <= pixmap->drawable.width && 2023 y + h <= pixmap->drawable.height)) { 2024 struct sna_pixmap *priv = sna_pixmap(pixmap); 2025 if (priv && priv->clear) { 2026 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2027 return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); 2028 } 2029 } 2030 } else 2031 channel->transform = picture->transform; 2032 2033 channel->pict_format = picture->format; 2034 channel->card_format = gen9_get_card_format(picture->format); 2035 if (channel->card_format == (unsigned)-1) 2036 return sna_render_picture_convert(sna, picture, channel, pixmap, 2037 x, y, w, h, dst_x, dst_y, 2038 false); 2039 2040 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2041 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2042 pixmap->drawable.width, pixmap->drawable.height)); 2043 return sna_render_picture_extract(sna, picture, channel, 2044 x, y, w, h, dst_x, dst_y); 2045 } 2046 2047 return sna_render_pixmap_bo(sna, channel, pixmap, 2048 x, y, w, h, dst_x, dst_y); 2049} 2050 2051inline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel) 2052{ 2053 if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 2054 return false; 2055 2056 channel->repeat = gen9_repeat(channel->repeat); 2057 channel->filter = gen9_filter(channel->filter); 2058 if (channel->card_format == (unsigned)-1) 2059 channel->card_format = gen9_get_card_format(channel->pict_format); 2060 assert(channel->card_format != (unsigned)-1); 2061 2062 return true; 2063} 2064 2065static void gen9_render_composite_done(struct sna *sna, 2066 const struct sna_composite_op *op) 2067{ 2068 if (sna->render.vertex_offset) { 2069 gen8_vertex_flush(sna); 2070 gen9_magic_ca_pass(sna, op); 2071 } 2072 2073 if (op->mask.bo) 2074 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2075 if (op->src.bo) 2076 kgem_bo_destroy(&sna->kgem, op->src.bo); 2077 2078 sna_render_composite_redirect_done(sna, op); 2079} 2080 2081inline static bool 2082gen9_composite_set_target(struct sna *sna, 2083 struct sna_composite_op *op, 2084 PicturePtr dst, 2085 int x, int y, int w, int h, 2086 bool partial) 2087{ 2088 BoxRec box; 2089 unsigned int hint; 2090 2091 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2092 2093 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2094 op->dst.format = dst->format; 2095 op->dst.width = op->dst.pixmap->drawable.width; 2096 op->dst.height = op->dst.pixmap->drawable.height; 2097 2098 if (w | h) { 2099 assert(w && h); 2100 box.x1 = x; 2101 box.y1 = y; 2102 box.x2 = x + w; 2103 box.y2 = y + h; 2104 } else 2105 sna_render_picture_extents(dst, &box); 2106 2107 hint = PREFER_GPU | RENDER_GPU; 2108 if (!need_tiling(sna, op->dst.width, op->dst.height)) 2109 hint |= FORCE_GPU; 2110 if (!partial) { 2111 hint |= IGNORE_DAMAGE; 2112 if (w == op->dst.width && h == op->dst.height) 2113 hint |= REPLACES; 2114 } 2115 2116 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2117 if (op->dst.bo == NULL) 2118 return false; 2119 2120 assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); 2121 2122 if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 2123 return false; 2124 2125 if (hint & REPLACES) { 2126 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2127 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2128 } 2129 2130 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2131 &op->dst.x, &op->dst.y); 2132 2133 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2134 __FUNCTION__, 2135 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2136 op->dst.width, op->dst.height, 2137 op->dst.bo->pitch, 2138 op->dst.x, op->dst.y, 2139 op->damage ? *op->damage : (void *)-1)); 2140 2141 assert(op->dst.bo->proxy == NULL); 2142 2143 if (too_large(op->dst.width, op->dst.height) && 2144 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2145 return false; 2146 2147 return true; 2148} 2149 2150static bool 2151try_blt(struct sna *sna, 2152 uint8_t op, 2153 PicturePtr src, 2154 PicturePtr mask, 2155 PicturePtr dst, 2156 int16_t src_x, int16_t src_y, 2157 int16_t msk_x, int16_t msk_y, 2158 int16_t dst_x, int16_t dst_y, 2159 int16_t width, int16_t height, 2160 unsigned flags, 2161 struct sna_composite_op *tmp) 2162{ 2163 struct kgem_bo *bo; 2164 2165 if (sna->kgem.mode == KGEM_BLT) { 2166 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2167 goto execute; 2168 } 2169 2170 if (too_large(width, height)) { 2171 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2172 __FUNCTION__, width, height)); 2173 goto execute; 2174 } 2175 2176 bo = __sna_drawable_peek_bo(dst->pDrawable); 2177 if (bo == NULL) 2178 goto execute; 2179 2180 if (untiled_tlb_miss(bo)) 2181 goto execute; 2182 2183 if (bo->rq) { 2184 if (RQ_IS_BLT(bo->rq)) 2185 goto execute; 2186 2187 return false; 2188 } 2189 2190 if (bo->tiling == I915_TILING_Y) 2191 goto upload; 2192 2193 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2194 goto execute; 2195 2196 if (src->pDrawable == dst->pDrawable && 2197 (sna->render_state.gt < 3 || width*height < 1024) && 2198 can_switch_to_blt(sna, bo, 0)) 2199 goto execute; 2200 2201 if (src->pDrawable) { 2202 struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); 2203 if (s == NULL) 2204 goto upload; 2205 2206 if (prefer_blt_bo(sna, s, bo)) 2207 goto execute; 2208 } 2209 2210 if (sna->kgem.ring == KGEM_BLT) { 2211 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2212 goto execute; 2213 } 2214 2215upload: 2216 flags |= COMPOSITE_UPLOAD; 2217execute: 2218 return sna_blt_composite(sna, op, 2219 src, dst, 2220 src_x, src_y, 2221 dst_x, dst_y, 2222 width, height, 2223 flags, tmp); 2224} 2225 2226static bool 2227check_gradient(PicturePtr picture, bool precise) 2228{ 2229 if (picture->pDrawable) 2230 return false; 2231 2232 switch (picture->pSourcePict->type) { 2233 case SourcePictTypeSolidFill: 2234 case SourcePictTypeLinear: 2235 return false; 2236 default: 2237 return precise; 2238 } 2239} 2240 2241static bool 2242has_alphamap(PicturePtr p) 2243{ 2244 return p->alphaMap != NULL; 2245} 2246 2247static bool 2248need_upload(PicturePtr p) 2249{ 2250 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2251} 2252 2253static bool 2254source_is_busy(PixmapPtr pixmap) 2255{ 2256 struct sna_pixmap *priv = sna_pixmap(pixmap); 2257 if (priv == NULL || priv->clear) 2258 return false; 2259 2260 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2261 return true; 2262 2263 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2264 return true; 2265 2266 return priv->gpu_damage && !priv->cpu_damage; 2267} 2268 2269static bool 2270source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2271{ 2272 if (sna_picture_is_solid(p, NULL)) 2273 return false; 2274 2275 if (p->pSourcePict) 2276 return check_gradient(p, precise); 2277 2278 if (!gen9_check_repeat(p) || !gen9_check_format(p->format)) 2279 return true; 2280 2281 if (pixmap && source_is_busy(pixmap)) 2282 return false; 2283 2284 return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p); 2285} 2286 2287static bool 2288gen9_composite_fallback(struct sna *sna, 2289 PicturePtr src, 2290 PicturePtr mask, 2291 PicturePtr dst) 2292{ 2293 PixmapPtr src_pixmap; 2294 PixmapPtr mask_pixmap; 2295 PixmapPtr dst_pixmap; 2296 bool src_fallback, mask_fallback; 2297 2298 if (!gen9_check_dst_format(dst->format)) { 2299 DBG(("%s: unknown destination format: %d\n", 2300 __FUNCTION__, dst->format)); 2301 return true; 2302 } 2303 2304 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2305 2306 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2307 src_fallback = source_fallback(src, src_pixmap, 2308 dst->polyMode == PolyModePrecise); 2309 2310 if (mask) { 2311 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2312 mask_fallback = source_fallback(mask, mask_pixmap, 2313 dst->polyMode == PolyModePrecise); 2314 } else { 2315 mask_pixmap = NULL; 2316 mask_fallback = false; 2317 } 2318 2319 /* If we are using the destination as a source and need to 2320 * readback in order to upload the source, do it all 2321 * on the cpu. 2322 */ 2323 if (src_pixmap == dst_pixmap && src_fallback) { 2324 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2325 return true; 2326 } 2327 if (mask_pixmap == dst_pixmap && mask_fallback) { 2328 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2329 return true; 2330 } 2331 2332 /* If anything is on the GPU, push everything out to the GPU */ 2333 if (dst_use_gpu(dst_pixmap)) { 2334 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2335 __FUNCTION__)); 2336 return false; 2337 } 2338 2339 if (src_pixmap && !src_fallback) { 2340 DBG(("%s: src is already on the GPU, try to use GPU\n", 2341 __FUNCTION__)); 2342 return false; 2343 } 2344 if (mask_pixmap && !mask_fallback) { 2345 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2346 __FUNCTION__)); 2347 return false; 2348 } 2349 2350 /* However if the dst is not on the GPU and we need to 2351 * render one of the sources using the CPU, we may 2352 * as well do the entire operation in place onthe CPU. 2353 */ 2354 if (src_fallback) { 2355 DBG(("%s: dst is on the CPU and src will fallback\n", 2356 __FUNCTION__)); 2357 return true; 2358 } 2359 2360 if (mask && mask_fallback) { 2361 DBG(("%s: dst is on the CPU and mask will fallback\n", 2362 __FUNCTION__)); 2363 return true; 2364 } 2365 2366 if (too_large(dst_pixmap->drawable.width, 2367 dst_pixmap->drawable.height) && 2368 dst_is_cpu(dst_pixmap)) { 2369 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2370 return true; 2371 } 2372 2373 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2374 __FUNCTION__)); 2375 return dst_use_cpu(dst_pixmap); 2376} 2377 2378static int 2379reuse_source(struct sna *sna, 2380 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2381 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2382{ 2383 uint32_t color; 2384 2385 if (src_x != msk_x || src_y != msk_y) 2386 return false; 2387 2388 if (src == mask) { 2389 DBG(("%s: mask is source\n", __FUNCTION__)); 2390 *mc = *sc; 2391 mc->bo = kgem_bo_reference(mc->bo); 2392 return true; 2393 } 2394 2395 if (sna_picture_is_solid(mask, &color)) 2396 return gen4_channel_init_solid(sna, mc, color); 2397 2398 if (sc->is_solid) 2399 return false; 2400 2401 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2402 return false; 2403 2404 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2405 2406 if (!sna_transform_equal(src->transform, mask->transform)) 2407 return false; 2408 2409 if (!sna_picture_alphamap_equal(src, mask)) 2410 return false; 2411 2412 if (!gen9_check_repeat(mask)) 2413 return false; 2414 2415 if (!gen9_check_filter(mask)) 2416 return false; 2417 2418 if (!gen9_check_format(mask->format)) 2419 return false; 2420 2421 DBG(("%s: reusing source channel for mask with a twist\n", 2422 __FUNCTION__)); 2423 2424 *mc = *sc; 2425 mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2426 mc->filter = gen9_filter(mask->filter); 2427 mc->pict_format = mask->format; 2428 mc->card_format = gen9_get_card_format(mask->format); 2429 mc->bo = kgem_bo_reference(mc->bo); 2430 return true; 2431} 2432 2433static bool 2434gen9_render_composite(struct sna *sna, 2435 uint8_t op, 2436 PicturePtr src, 2437 PicturePtr mask, 2438 PicturePtr dst, 2439 int16_t src_x, int16_t src_y, 2440 int16_t msk_x, int16_t msk_y, 2441 int16_t dst_x, int16_t dst_y, 2442 int16_t width, int16_t height, 2443 unsigned flags, 2444 struct sna_composite_op *tmp) 2445{ 2446 if (op >= ARRAY_SIZE(gen9_blend_op)) 2447 return false; 2448 2449 DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2450 width, height, sna->kgem.mode, sna->kgem.ring)); 2451 2452 if (mask == NULL && 2453 try_blt(sna, op, 2454 src, mask, dst, 2455 src_x, src_y, 2456 msk_x, msk_y, 2457 dst_x, dst_y, 2458 width, height, 2459 flags, tmp)) 2460 return true; 2461 2462 if (gen9_composite_fallback(sna, src, mask, dst)) 2463 goto fallback; 2464 2465 if (need_tiling(sna, width, height)) 2466 return sna_tiling_composite(op, src, mask, dst, 2467 src_x, src_y, 2468 msk_x, msk_y, 2469 dst_x, dst_y, 2470 width, height, 2471 tmp); 2472 2473 if (op == PictOpClear && src == sna->clear) 2474 op = PictOpSrc; 2475 tmp->op = op; 2476 if (!gen9_composite_set_target(sna, tmp, dst, 2477 dst_x, dst_y, width, height, 2478 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2479 goto fallback; 2480 2481 switch (gen9_composite_picture(sna, src, &tmp->src, 2482 src_x, src_y, 2483 width, height, 2484 dst_x, dst_y, 2485 dst->polyMode == PolyModePrecise)) { 2486 case -1: 2487 goto cleanup_dst; 2488 case 0: 2489 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2490 goto cleanup_dst; 2491 /* fall through to fixup */ 2492 case 1: 2493 /* Did we just switch rings to prepare the source? */ 2494 if (mask == NULL && 2495 (prefer_blt_composite(sna, tmp) || 2496 unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2497 sna_blt_composite__convert(sna, 2498 dst_x, dst_y, width, height, 2499 tmp)) 2500 return true; 2501 2502 if (!gen9_composite_channel_convert(&tmp->src)) 2503 goto cleanup_src; 2504 2505 break; 2506 } 2507 2508 tmp->is_affine = tmp->src.is_affine; 2509 tmp->has_component_alpha = false; 2510 tmp->need_magic_ca_pass = false; 2511 2512 tmp->mask.bo = NULL; 2513 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2514 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2515 2516 if (mask) { 2517 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2518 tmp->has_component_alpha = true; 2519 2520 /* Check if it's component alpha that relies on a source alpha and on 2521 * the source value. We can only get one of those into the single 2522 * source value that we get to blend with. 2523 */ 2524 if (gen9_blend_op[op].src_alpha && 2525 (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2526 if (op != PictOpOver) 2527 goto cleanup_src; 2528 2529 tmp->need_magic_ca_pass = true; 2530 tmp->op = PictOpOutReverse; 2531 } 2532 } 2533 2534 if (!reuse_source(sna, 2535 src, &tmp->src, src_x, src_y, 2536 mask, &tmp->mask, msk_x, msk_y)) { 2537 switch (gen9_composite_picture(sna, mask, &tmp->mask, 2538 msk_x, msk_y, 2539 width, height, 2540 dst_x, dst_y, 2541 dst->polyMode == PolyModePrecise)) { 2542 case -1: 2543 goto cleanup_src; 2544 case 0: 2545 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2546 goto cleanup_src; 2547 /* fall through to fixup */ 2548 case 1: 2549 if (!gen9_composite_channel_convert(&tmp->mask)) 2550 goto cleanup_mask; 2551 break; 2552 } 2553 } 2554 2555 tmp->is_affine &= tmp->mask.is_affine; 2556 } 2557 2558 tmp->u.gen9.flags = 2559 GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2560 tmp->src.repeat, 2561 tmp->mask.filter, 2562 tmp->mask.repeat), 2563 gen9_get_blend(tmp->op, 2564 tmp->has_component_alpha, 2565 tmp->dst.format), 2566 gen4_choose_composite_emitter(sna, tmp)); 2567 tmp->u.gen9.wm_kernel = gen9_choose_composite_kernel(tmp->op, 2568 tmp->mask.bo != NULL, 2569 tmp->has_component_alpha, 2570 tmp->is_affine); 2571 2572 tmp->blt = gen9_render_composite_blt; 2573 tmp->box = gen9_render_composite_box; 2574 tmp->boxes = gen9_render_composite_boxes__blt; 2575 if (tmp->emit_boxes){ 2576 tmp->boxes = gen9_render_composite_boxes; 2577 tmp->thread_boxes = gen9_render_composite_boxes__thread; 2578 } 2579 tmp->done = gen9_render_composite_done; 2580 2581 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2582 if (!kgem_check_bo(&sna->kgem, 2583 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2584 NULL)) { 2585 kgem_submit(&sna->kgem); 2586 if (!kgem_check_bo(&sna->kgem, 2587 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2588 NULL)) 2589 goto cleanup_mask; 2590 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2591 } 2592 2593 gen9_align_vertex(sna, tmp); 2594 gen9_emit_composite_state(sna, tmp); 2595 return true; 2596 2597cleanup_mask: 2598 if (tmp->mask.bo) { 2599 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2600 tmp->mask.bo = NULL; 2601 } 2602cleanup_src: 2603 if (tmp->src.bo) { 2604 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2605 tmp->src.bo = NULL; 2606 } 2607cleanup_dst: 2608 if (tmp->redirect.real_bo) { 2609 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2610 tmp->redirect.real_bo = NULL; 2611 } 2612fallback: 2613 return (mask == NULL && 2614 sna_blt_composite(sna, op, 2615 src, dst, 2616 src_x, src_y, 2617 dst_x, dst_y, 2618 width, height, 2619 flags | COMPOSITE_FALLBACK, tmp)); 2620} 2621 2622#if !NO_COMPOSITE_SPANS 2623fastcall static void 2624gen9_render_composite_spans_box(struct sna *sna, 2625 const struct sna_composite_spans_op *op, 2626 const BoxRec *box, float opacity) 2627{ 2628 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2629 __FUNCTION__, 2630 op->base.src.offset[0], op->base.src.offset[1], 2631 opacity, 2632 op->base.dst.x, op->base.dst.y, 2633 box->x1, box->y1, 2634 box->x2 - box->x1, 2635 box->y2 - box->y1)); 2636 2637 gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state); 2638 op->prim_emit(sna, op, box, opacity); 2639} 2640 2641static void 2642gen9_render_composite_spans_boxes(struct sna *sna, 2643 const struct sna_composite_spans_op *op, 2644 const BoxRec *box, int nbox, 2645 float opacity) 2646{ 2647 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2648 __FUNCTION__, nbox, 2649 op->base.src.offset[0], op->base.src.offset[1], 2650 opacity, 2651 op->base.dst.x, op->base.dst.y)); 2652 2653 do { 2654 int nbox_this_time; 2655 2656 nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 2657 gen9_emit_composite_state); 2658 nbox -= nbox_this_time; 2659 2660 do { 2661 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2662 box->x1, box->y1, 2663 box->x2 - box->x1, 2664 box->y2 - box->y1)); 2665 2666 op->prim_emit(sna, op, box++, opacity); 2667 } while (--nbox_this_time); 2668 } while (nbox); 2669} 2670 2671fastcall static void 2672gen9_render_composite_spans_boxes__thread(struct sna *sna, 2673 const struct sna_composite_spans_op *op, 2674 const struct sna_opacity_box *box, 2675 int nbox) 2676{ 2677 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2678 __FUNCTION__, nbox, 2679 op->base.src.offset[0], op->base.src.offset[1], 2680 op->base.dst.x, op->base.dst.y)); 2681 2682 sna_vertex_lock(&sna->render); 2683 do { 2684 int nbox_this_time; 2685 float *v; 2686 2687 nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 2688 gen9_emit_composite_state); 2689 assert(nbox_this_time); 2690 nbox -= nbox_this_time; 2691 2692 v = sna->render.vertices + sna->render.vertex_used; 2693 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2694 2695 sna_vertex_acquire__locked(&sna->render); 2696 sna_vertex_unlock(&sna->render); 2697 2698 op->emit_boxes(op, box, nbox_this_time, v); 2699 box += nbox_this_time; 2700 2701 sna_vertex_lock(&sna->render); 2702 sna_vertex_release__locked(&sna->render); 2703 } while (nbox); 2704 sna_vertex_unlock(&sna->render); 2705} 2706 2707fastcall static void 2708gen9_render_composite_spans_done(struct sna *sna, 2709 const struct sna_composite_spans_op *op) 2710{ 2711 if (sna->render.vertex_offset) 2712 gen8_vertex_flush(sna); 2713 2714 DBG(("%s()\n", __FUNCTION__)); 2715 2716 if (op->base.src.bo) 2717 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2718 2719 sna_render_composite_redirect_done(sna, &op->base); 2720} 2721 2722static bool 2723gen9_check_composite_spans(struct sna *sna, 2724 uint8_t op, PicturePtr src, PicturePtr dst, 2725 int16_t width, int16_t height, unsigned flags) 2726{ 2727 if (op >= ARRAY_SIZE(gen9_blend_op)) 2728 return false; 2729 2730 if (gen9_composite_fallback(sna, src, NULL, dst)) 2731 return false; 2732 2733 if (need_tiling(sna, width, height) && 2734 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2735 DBG(("%s: fallback, tiled operation not on GPU\n", 2736 __FUNCTION__)); 2737 return false; 2738 } 2739 2740 return true; 2741} 2742 2743static bool 2744gen9_render_composite_spans(struct sna *sna, 2745 uint8_t op, 2746 PicturePtr src, 2747 PicturePtr dst, 2748 int16_t src_x, int16_t src_y, 2749 int16_t dst_x, int16_t dst_y, 2750 int16_t width, int16_t height, 2751 unsigned flags, 2752 struct sna_composite_spans_op *tmp) 2753{ 2754 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2755 width, height, flags, sna->kgem.ring)); 2756 2757 assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags)); 2758 2759 if (need_tiling(sna, width, height)) { 2760 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2761 __FUNCTION__, width, height)); 2762 return sna_tiling_composite_spans(op, src, dst, 2763 src_x, src_y, dst_x, dst_y, 2764 width, height, flags, tmp); 2765 } 2766 2767 tmp->base.op = op; 2768 if (!gen9_composite_set_target(sna, &tmp->base, dst, 2769 dst_x, dst_y, width, height, true)) 2770 return false; 2771 2772 switch (gen9_composite_picture(sna, src, &tmp->base.src, 2773 src_x, src_y, 2774 width, height, 2775 dst_x, dst_y, 2776 dst->polyMode == PolyModePrecise)) { 2777 case -1: 2778 goto cleanup_dst; 2779 case 0: 2780 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2781 goto cleanup_dst; 2782 /* fall through to fixup */ 2783 case 1: 2784 if (!gen9_composite_channel_convert(&tmp->base.src)) 2785 goto cleanup_src; 2786 break; 2787 } 2788 tmp->base.mask.bo = NULL; 2789 2790 tmp->base.is_affine = tmp->base.src.is_affine; 2791 tmp->base.need_magic_ca_pass = false; 2792 2793 tmp->base.u.gen9.flags = 2794 GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2795 tmp->base.src.repeat, 2796 SAMPLER_FILTER_NEAREST, 2797 SAMPLER_EXTEND_PAD), 2798 gen9_get_blend(tmp->base.op, false, tmp->base.dst.format), 2799 gen4_choose_spans_emitter(sna, tmp)); 2800 tmp->base.u.gen9.wm_kernel = 2801 GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine; 2802 2803 tmp->box = gen9_render_composite_spans_box; 2804 tmp->boxes = gen9_render_composite_spans_boxes; 2805 if (tmp->emit_boxes) 2806 tmp->thread_boxes = gen9_render_composite_spans_boxes__thread; 2807 tmp->done = gen9_render_composite_spans_done; 2808 2809 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2810 if (!kgem_check_bo(&sna->kgem, 2811 tmp->base.dst.bo, tmp->base.src.bo, 2812 NULL)) { 2813 kgem_submit(&sna->kgem); 2814 if (!kgem_check_bo(&sna->kgem, 2815 tmp->base.dst.bo, tmp->base.src.bo, 2816 NULL)) 2817 goto cleanup_src; 2818 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2819 } 2820 2821 gen9_align_vertex(sna, &tmp->base); 2822 gen9_emit_composite_state(sna, &tmp->base); 2823 return true; 2824 2825cleanup_src: 2826 if (tmp->base.src.bo) 2827 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2828cleanup_dst: 2829 if (tmp->base.redirect.real_bo) 2830 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2831 return false; 2832} 2833#endif 2834 2835static void 2836gen9_emit_copy_state(struct sna *sna, 2837 const struct sna_composite_op *op) 2838{ 2839 uint32_t *binding_table; 2840 uint16_t offset, dirty; 2841 2842 gen9_get_batch(sna, op); 2843 2844 binding_table = gen9_composite_get_binding_table(sna, &offset); 2845 2846 dirty = kgem_bo_is_dirty(op->dst.bo); 2847 2848 binding_table[0] = 2849 gen9_bind_bo(sna, 2850 op->dst.bo, op->dst.width, op->dst.height, 2851 gen9_get_dest_format(op->dst.format), 2852 true); 2853 binding_table[1] = 2854 gen9_bind_bo(sna, 2855 op->src.bo, op->src.width, op->src.height, 2856 op->src.card_format, 2857 false); 2858 2859 if (sna->kgem.surface == offset && 2860 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { 2861 sna->kgem.surface += SURFACE_DW; 2862 offset = sna->render_state.gen9.surface_table; 2863 } 2864 2865 if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 2866 dirty = 0; 2867 2868 assert(!GEN9_READS_DST(op->u.gen9.flags)); 2869 gen9_emit_state(sna, op, offset | dirty); 2870} 2871 2872static inline bool 2873prefer_blt_copy(struct sna *sna, 2874 struct kgem_bo *src_bo, 2875 struct kgem_bo *dst_bo, 2876 unsigned flags) 2877{ 2878 if (sna->kgem.mode == KGEM_BLT) 2879 return true; 2880 2881 assert((flags & COPY_SYNC) == 0); 2882 2883 if (untiled_tlb_miss(src_bo) || 2884 untiled_tlb_miss(dst_bo)) 2885 return true; 2886 2887 if (flags & COPY_DRI && !sna->kgem.has_semaphores) 2888 return false; 2889 2890 if (force_blt_ring(sna, dst_bo)) 2891 return true; 2892 2893 if ((flags & COPY_SMALL || 2894 (sna->render_state.gt < 3 && src_bo == dst_bo)) && 2895 can_switch_to_blt(sna, dst_bo, flags)) 2896 return true; 2897 2898 if (kgem_bo_is_render(dst_bo) || 2899 kgem_bo_is_render(src_bo)) 2900 return false; 2901 2902 if (flags & COPY_LAST && 2903 sna->render_state.gt < 3 && 2904 can_switch_to_blt(sna, dst_bo, flags)) 2905 return true; 2906 2907 if (prefer_render_ring(sna, dst_bo)) 2908 return false; 2909 2910 if (!prefer_blt_ring(sna, dst_bo, flags)) 2911 return false; 2912 2913 return prefer_blt_bo(sna, src_bo, dst_bo); 2914} 2915 2916static bool 2917gen9_render_copy_boxes(struct sna *sna, uint8_t alu, 2918 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2919 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2920 const BoxRec *box, int n, unsigned flags) 2921{ 2922 struct sna_composite_op tmp; 2923 BoxRec extents; 2924 2925 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2926 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2927 src_bo == dst_bo, 2928 overlaps(sna, 2929 src_bo, src_dx, src_dy, 2930 dst_bo, dst_dx, dst_dy, 2931 box, n, flags, &extents))); 2932 2933 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2934 sna_blt_compare_depth(src, dst) && 2935 sna_blt_copy_boxes(sna, alu, 2936 src_bo, src_dx, src_dy, 2937 dst_bo, dst_dx, dst_dy, 2938 dst->bitsPerPixel, 2939 box, n)) 2940 return true; 2941 2942 if (!(alu == GXcopy || alu == GXclear) || 2943 unaligned(src_bo, src->bitsPerPixel) || 2944 unaligned(dst_bo, dst->bitsPerPixel)) { 2945fallback_blt: 2946 DBG(("%s: fallback blt\n", __FUNCTION__)); 2947 if (!sna_blt_compare_depth(src, dst)) 2948 return false; 2949 2950 return sna_blt_copy_boxes_fallback(sna, alu, 2951 src, src_bo, src_dx, src_dy, 2952 dst, dst_bo, dst_dx, dst_dy, 2953 box, n); 2954 } 2955 2956 if (overlaps(sna, 2957 src_bo, src_dx, src_dy, 2958 dst_bo, dst_dx, dst_dy, 2959 box, n, flags, 2960 &extents)) { 2961 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2962 2963 if ((big || !prefer_render_ring(sna, dst_bo)) && 2964 sna_blt_copy_boxes(sna, alu, 2965 src_bo, src_dx, src_dy, 2966 dst_bo, dst_dx, dst_dy, 2967 dst->bitsPerPixel, 2968 box, n)) 2969 return true; 2970 2971 if (big) 2972 goto fallback_blt; 2973 2974 assert(src_bo == dst_bo); 2975 assert(src->depth == dst->depth); 2976 assert(src->width == dst->width); 2977 assert(src->height == dst->height); 2978 return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, 2979 src_dx, src_dy, 2980 dst_dx, dst_dy, 2981 box, n, &extents); 2982 } 2983 2984 if (dst->depth == src->depth) { 2985 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2986 tmp.src.pict_format = tmp.dst.format; 2987 } else { 2988 tmp.dst.format = sna_format_for_depth(dst->depth); 2989 tmp.src.pict_format = sna_format_for_depth(src->depth); 2990 } 2991 if (!gen9_check_format(tmp.src.pict_format)) 2992 goto fallback_blt; 2993 2994 tmp.dst.pixmap = (PixmapPtr)dst; 2995 tmp.dst.width = dst->width; 2996 tmp.dst.height = dst->height; 2997 tmp.dst.bo = dst_bo; 2998 tmp.dst.x = tmp.dst.y = 0; 2999 tmp.damage = NULL; 3000 3001 sna_render_composite_redirect_init(&tmp); 3002 if (too_large(tmp.dst.width, tmp.dst.height)) { 3003 int i; 3004 3005 extents = box[0]; 3006 for (i = 1; i < n; i++) { 3007 if (box[i].x1 < extents.x1) 3008 extents.x1 = box[i].x1; 3009 if (box[i].y1 < extents.y1) 3010 extents.y1 = box[i].y1; 3011 3012 if (box[i].x2 > extents.x2) 3013 extents.x2 = box[i].x2; 3014 if (box[i].y2 > extents.y2) 3015 extents.y2 = box[i].y2; 3016 } 3017 3018 if (!sna_render_composite_redirect(sna, &tmp, 3019 extents.x1 + dst_dx, 3020 extents.y1 + dst_dy, 3021 extents.x2 - extents.x1, 3022 extents.y2 - extents.y1, 3023 n > 1)) 3024 goto fallback_tiled; 3025 } 3026 3027 tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format); 3028 if (too_large(src->width, src->height)) { 3029 int i; 3030 3031 extents = box[0]; 3032 for (i = 1; i < n; i++) { 3033 if (box[i].x1 < extents.x1) 3034 extents.x1 = box[i].x1; 3035 if (box[i].y1 < extents.y1) 3036 extents.y1 = box[i].y1; 3037 3038 if (box[i].x2 > extents.x2) 3039 extents.x2 = box[i].x2; 3040 if (box[i].y2 > extents.y2) 3041 extents.y2 = box[i].y2; 3042 } 3043 3044 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3045 extents.x1 + src_dx, 3046 extents.y1 + src_dy, 3047 extents.x2 - extents.x1, 3048 extents.y2 - extents.y1)) 3049 goto fallback_tiled_dst; 3050 } else { 3051 tmp.src.bo = src_bo; 3052 tmp.src.width = src->width; 3053 tmp.src.height = src->height; 3054 tmp.src.offset[0] = tmp.src.offset[1] = 0; 3055 } 3056 3057 tmp.mask.bo = NULL; 3058 3059 tmp.floats_per_vertex = 2; 3060 tmp.floats_per_rect = 6; 3061 tmp.need_magic_ca_pass = 0; 3062 3063 tmp.u.gen9.flags = COPY_FLAGS(alu); 3064 tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3065 3066 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3067 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3068 kgem_submit(&sna->kgem); 3069 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3070 if (tmp.src.bo != src_bo) 3071 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3072 if (tmp.redirect.real_bo) 3073 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3074 goto fallback_blt; 3075 } 3076 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3077 } 3078 3079 src_dx += tmp.src.offset[0]; 3080 src_dy += tmp.src.offset[1]; 3081 3082 dst_dx += tmp.dst.x; 3083 dst_dy += tmp.dst.y; 3084 3085 tmp.dst.x = tmp.dst.y = 0; 3086 3087 gen9_align_vertex(sna, &tmp); 3088 gen9_emit_copy_state(sna, &tmp); 3089 3090 do { 3091 int16_t *v; 3092 int n_this_time; 3093 3094 n_this_time = gen9_get_rectangles(sna, &tmp, n, 3095 gen9_emit_copy_state); 3096 n -= n_this_time; 3097 3098 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3099 sna->render.vertex_used += 6 * n_this_time; 3100 assert(sna->render.vertex_used <= sna->render.vertex_size); 3101 do { 3102 3103 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3104 box->x1 + src_dx, box->y1 + src_dy, 3105 box->x1 + dst_dx, box->y1 + dst_dy, 3106 box->x2 - box->x1, box->y2 - box->y1)); 3107 v[0] = box->x2 + dst_dx; 3108 v[2] = box->x2 + src_dx; 3109 v[1] = v[5] = box->y2 + dst_dy; 3110 v[3] = v[7] = box->y2 + src_dy; 3111 v[8] = v[4] = box->x1 + dst_dx; 3112 v[10] = v[6] = box->x1 + src_dx; 3113 v[9] = box->y1 + dst_dy; 3114 v[11] = box->y1 + src_dy; 3115 v += 12; box++; 3116 } while (--n_this_time); 3117 } while (n); 3118 3119 gen8_vertex_flush(sna); 3120 sna_render_composite_redirect_done(sna, &tmp); 3121 if (tmp.src.bo != src_bo) 3122 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3123 return true; 3124 3125fallback_tiled_dst: 3126 if (tmp.redirect.real_bo) 3127 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3128fallback_tiled: 3129 DBG(("%s: fallback tiled\n", __FUNCTION__)); 3130 if (sna_blt_compare_depth(src, dst) && 3131 sna_blt_copy_boxes(sna, alu, 3132 src_bo, src_dx, src_dy, 3133 dst_bo, dst_dx, dst_dy, 3134 dst->bitsPerPixel, 3135 box, n)) 3136 return true; 3137 3138 return sna_tiling_copy_boxes(sna, alu, 3139 src, src_bo, src_dx, src_dy, 3140 dst, dst_bo, dst_dx, dst_dy, 3141 box, n); 3142} 3143 3144static void 3145gen9_render_copy_blt(struct sna *sna, 3146 const struct sna_copy_op *op, 3147 int16_t sx, int16_t sy, 3148 int16_t w, int16_t h, 3149 int16_t dx, int16_t dy) 3150{ 3151 int16_t *v; 3152 3153 gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state); 3154 3155 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3156 sna->render.vertex_used += 6; 3157 assert(sna->render.vertex_used <= sna->render.vertex_size); 3158 3159 v[0] = dx+w; v[1] = dy+h; 3160 v[2] = sx+w; v[3] = sy+h; 3161 v[4] = dx; v[5] = dy+h; 3162 v[6] = sx; v[7] = sy+h; 3163 v[8] = dx; v[9] = dy; 3164 v[10] = sx; v[11] = sy; 3165} 3166 3167static void 3168gen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3169{ 3170 if (sna->render.vertex_offset) 3171 gen8_vertex_flush(sna); 3172} 3173 3174static bool 3175gen9_render_copy(struct sna *sna, uint8_t alu, 3176 PixmapPtr src, struct kgem_bo *src_bo, 3177 PixmapPtr dst, struct kgem_bo *dst_bo, 3178 struct sna_copy_op *op) 3179{ 3180 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3181 __FUNCTION__, alu, 3182 src->drawable.width, src->drawable.height, 3183 dst->drawable.width, dst->drawable.height)); 3184 3185 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3186 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3187 sna_blt_copy(sna, alu, 3188 src_bo, dst_bo, 3189 dst->drawable.bitsPerPixel, 3190 op)) 3191 return true; 3192 3193 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3194 too_large(src->drawable.width, src->drawable.height) || 3195 too_large(dst->drawable.width, dst->drawable.height) || 3196 unaligned(src_bo, src->drawable.bitsPerPixel) || 3197 unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3198fallback: 3199 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3200 return false; 3201 3202 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3203 dst->drawable.bitsPerPixel, 3204 op); 3205 } 3206 3207 if (dst->drawable.depth == src->drawable.depth) { 3208 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3209 op->base.src.pict_format = op->base.dst.format; 3210 } else { 3211 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3212 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3213 } 3214 if (!gen9_check_format(op->base.src.pict_format)) 3215 goto fallback; 3216 3217 op->base.dst.pixmap = dst; 3218 op->base.dst.width = dst->drawable.width; 3219 op->base.dst.height = dst->drawable.height; 3220 op->base.dst.bo = dst_bo; 3221 3222 op->base.src.bo = src_bo; 3223 op->base.src.card_format = 3224 gen9_get_card_format(op->base.src.pict_format); 3225 op->base.src.width = src->drawable.width; 3226 op->base.src.height = src->drawable.height; 3227 3228 op->base.mask.bo = NULL; 3229 3230 op->base.floats_per_vertex = 2; 3231 op->base.floats_per_rect = 6; 3232 3233 op->base.u.gen9.flags = COPY_FLAGS(alu); 3234 op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3235 3236 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3237 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3238 kgem_submit(&sna->kgem); 3239 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3240 goto fallback; 3241 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3242 } 3243 3244 gen9_align_vertex(sna, &op->base); 3245 gen9_emit_copy_state(sna, &op->base); 3246 3247 op->blt = gen9_render_copy_blt; 3248 op->done = gen9_render_copy_done; 3249 return true; 3250} 3251 3252static void 3253gen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3254{ 3255 uint32_t *binding_table; 3256 uint16_t offset, dirty; 3257 3258 /* XXX Render Target Fast Clear 3259 * Set RTFC Enable in PS and render a rectangle. 3260 * Limited to a clearing the full MSC surface only with a 3261 * specific kernel. 3262 */ 3263 3264 gen9_get_batch(sna, op); 3265 3266 binding_table = gen9_composite_get_binding_table(sna, &offset); 3267 3268 dirty = kgem_bo_is_dirty(op->dst.bo); 3269 3270 binding_table[0] = 3271 gen9_bind_bo(sna, 3272 op->dst.bo, op->dst.width, op->dst.height, 3273 gen9_get_dest_format(op->dst.format), 3274 true); 3275 binding_table[1] = 3276 gen9_bind_bo(sna, 3277 op->src.bo, 1, 1, 3278 SURFACEFORMAT_B8G8R8A8_UNORM, 3279 false); 3280 3281 if (sna->kgem.surface == offset && 3282 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { 3283 sna->kgem.surface += SURFACE_DW; 3284 offset = sna->render_state.gen9.surface_table; 3285 } 3286 3287 if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 3288 dirty = 0; 3289 3290 gen9_emit_state(sna, op, offset | dirty); 3291} 3292 3293static bool 3294gen9_render_fill_boxes(struct sna *sna, 3295 CARD8 op, 3296 PictFormat format, 3297 const xRenderColor *color, 3298 const DrawableRec *dst, struct kgem_bo *dst_bo, 3299 const BoxRec *box, int n) 3300{ 3301 struct sna_composite_op tmp; 3302 uint32_t pixel; 3303 3304 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3305 __FUNCTION__, op, 3306 color->red, color->green, color->blue, color->alpha, (int)format)); 3307 3308 if (op >= ARRAY_SIZE(gen9_blend_op)) { 3309 DBG(("%s: fallback due to unhandled blend op: %d\n", 3310 __FUNCTION__, op)); 3311 return false; 3312 } 3313 3314 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3315 !gen9_check_dst_format(format) || 3316 unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3317 uint8_t alu = GXinvalid; 3318 3319 if (op <= PictOpSrc) { 3320 pixel = 0; 3321 if (op == PictOpClear) 3322 alu = GXclear; 3323 else if (sna_get_pixel_from_rgba(&pixel, 3324 color->red, 3325 color->green, 3326 color->blue, 3327 color->alpha, 3328 format)) 3329 alu = GXcopy; 3330 } 3331 3332 if (alu != GXinvalid && 3333 sna_blt_fill_boxes(sna, alu, 3334 dst_bo, dst->bitsPerPixel, 3335 pixel, box, n)) 3336 return true; 3337 3338 if (!gen9_check_dst_format(format)) 3339 return false; 3340 } 3341 3342 if (op == PictOpClear) { 3343 pixel = 0; 3344 op = PictOpSrc; 3345 } else if (!sna_get_pixel_from_rgba(&pixel, 3346 color->red, 3347 color->green, 3348 color->blue, 3349 color->alpha, 3350 PICT_a8r8g8b8)) 3351 return false; 3352 3353 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3354 __FUNCTION__, pixel, n, 3355 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3356 3357 tmp.dst.pixmap = (PixmapPtr)dst; 3358 tmp.dst.width = dst->width; 3359 tmp.dst.height = dst->height; 3360 tmp.dst.format = format; 3361 tmp.dst.bo = dst_bo; 3362 tmp.dst.x = tmp.dst.y = 0; 3363 tmp.damage = NULL; 3364 3365 sna_render_composite_redirect_init(&tmp); 3366 if (too_large(dst->width, dst->height)) { 3367 BoxRec extents; 3368 3369 boxes_extents(box, n, &extents); 3370 if (!sna_render_composite_redirect(sna, &tmp, 3371 extents.x1, extents.y1, 3372 extents.x2 - extents.x1, 3373 extents.y2 - extents.y1, 3374 n > 1)) 3375 return sna_tiling_fill_boxes(sna, op, format, color, 3376 dst, dst_bo, box, n); 3377 } 3378 3379 tmp.src.bo = sna_render_get_solid(sna, pixel); 3380 tmp.mask.bo = NULL; 3381 3382 tmp.floats_per_vertex = 2; 3383 tmp.floats_per_rect = 6; 3384 tmp.need_magic_ca_pass = false; 3385 3386 tmp.u.gen9.flags = FILL_FLAGS(op, format); 3387 tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3388 3389 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3390 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3391 kgem_submit(&sna->kgem); 3392 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3393 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3394 tmp.src.bo = NULL; 3395 3396 if (tmp.redirect.real_bo) { 3397 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3398 tmp.redirect.real_bo = NULL; 3399 } 3400 3401 return false; 3402 } 3403 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3404 } 3405 3406 gen9_align_vertex(sna, &tmp); 3407 gen9_emit_fill_state(sna, &tmp); 3408 3409 do { 3410 int n_this_time; 3411 int16_t *v; 3412 3413 n_this_time = gen9_get_rectangles(sna, &tmp, n, 3414 gen9_emit_fill_state); 3415 n -= n_this_time; 3416 3417 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3418 sna->render.vertex_used += 6 * n_this_time; 3419 assert(sna->render.vertex_used <= sna->render.vertex_size); 3420 do { 3421 DBG((" (%d, %d), (%d, %d)\n", 3422 box->x1, box->y1, box->x2, box->y2)); 3423 3424 v[0] = box->x2; 3425 v[5] = v[1] = box->y2; 3426 v[8] = v[4] = box->x1; 3427 v[9] = box->y1; 3428 v[2] = v[3] = v[7] = 1; 3429 v[6] = v[10] = v[11] = 0; 3430 v += 12; box++; 3431 } while (--n_this_time); 3432 } while (n); 3433 3434 gen8_vertex_flush(sna); 3435 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3436 sna_render_composite_redirect_done(sna, &tmp); 3437 return true; 3438} 3439 3440static void 3441gen9_render_fill_op_blt(struct sna *sna, 3442 const struct sna_fill_op *op, 3443 int16_t x, int16_t y, int16_t w, int16_t h) 3444{ 3445 int16_t *v; 3446 3447 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3448 3449 gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); 3450 3451 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3452 sna->render.vertex_used += 6; 3453 assert(sna->render.vertex_used <= sna->render.vertex_size); 3454 3455 v[0] = x+w; 3456 v[4] = v[8] = x; 3457 v[1] = v[5] = y+h; 3458 v[9] = y; 3459 3460 v[2] = v[3] = v[7] = 1; 3461 v[6] = v[10] = v[11] = 0; 3462} 3463 3464fastcall static void 3465gen9_render_fill_op_box(struct sna *sna, 3466 const struct sna_fill_op *op, 3467 const BoxRec *box) 3468{ 3469 int16_t *v; 3470 3471 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3472 box->x1, box->y1, box->x2, box->y2)); 3473 3474 gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); 3475 3476 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3477 sna->render.vertex_used += 6; 3478 assert(sna->render.vertex_used <= sna->render.vertex_size); 3479 3480 v[0] = box->x2; 3481 v[8] = v[4] = box->x1; 3482 v[5] = v[1] = box->y2; 3483 v[9] = box->y1; 3484 3485 v[7] = v[2] = v[3] = 1; 3486 v[6] = v[10] = v[11] = 0; 3487} 3488 3489fastcall static void 3490gen9_render_fill_op_boxes(struct sna *sna, 3491 const struct sna_fill_op *op, 3492 const BoxRec *box, 3493 int nbox) 3494{ 3495 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3496 box->x1, box->y1, box->x2, box->y2, nbox)); 3497 3498 do { 3499 int nbox_this_time; 3500 int16_t *v; 3501 3502 nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 3503 gen9_emit_fill_state); 3504 nbox -= nbox_this_time; 3505 3506 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3507 sna->render.vertex_used += 6 * nbox_this_time; 3508 assert(sna->render.vertex_used <= sna->render.vertex_size); 3509 3510 do { 3511 v[0] = box->x2; 3512 v[8] = v[4] = box->x1; 3513 v[5] = v[1] = box->y2; 3514 v[9] = box->y1; 3515 v[7] = v[2] = v[3] = 1; 3516 v[6] = v[10] = v[11] = 0; 3517 box++; v += 12; 3518 } while (--nbox_this_time); 3519 } while (nbox); 3520} 3521 3522static void 3523gen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3524{ 3525 if (sna->render.vertex_offset) 3526 gen8_vertex_flush(sna); 3527 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3528} 3529 3530static bool 3531gen9_render_fill(struct sna *sna, uint8_t alu, 3532 PixmapPtr dst, struct kgem_bo *dst_bo, 3533 uint32_t color, unsigned flags, 3534 struct sna_fill_op *op) 3535{ 3536 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3537 3538 if (prefer_blt_fill(sna, dst_bo, flags) && 3539 sna_blt_fill(sna, alu, 3540 dst_bo, dst->drawable.bitsPerPixel, 3541 color, 3542 op)) 3543 return true; 3544 3545 if (!(alu == GXcopy || alu == GXclear) || 3546 too_large(dst->drawable.width, dst->drawable.height) || 3547 unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3548 return sna_blt_fill(sna, alu, 3549 dst_bo, dst->drawable.bitsPerPixel, 3550 color, 3551 op); 3552 3553 if (alu == GXclear) 3554 color = 0; 3555 3556 op->base.dst.pixmap = dst; 3557 op->base.dst.width = dst->drawable.width; 3558 op->base.dst.height = dst->drawable.height; 3559 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3560 op->base.dst.bo = dst_bo; 3561 op->base.dst.x = op->base.dst.y = 0; 3562 3563 op->base.src.bo = 3564 sna_render_get_solid(sna, 3565 sna_rgba_for_color(color, 3566 dst->drawable.depth)); 3567 op->base.mask.bo = NULL; 3568 3569 op->base.need_magic_ca_pass = false; 3570 op->base.floats_per_vertex = 2; 3571 op->base.floats_per_rect = 6; 3572 3573 op->base.u.gen9.flags = FILL_FLAGS_NOBLEND; 3574 op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3575 3576 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3577 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3578 kgem_submit(&sna->kgem); 3579 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3580 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3581 return false; 3582 } 3583 3584 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3585 } 3586 3587 gen9_align_vertex(sna, &op->base); 3588 gen9_emit_fill_state(sna, &op->base); 3589 3590 op->blt = gen9_render_fill_op_blt; 3591 op->box = gen9_render_fill_op_box; 3592 op->boxes = gen9_render_fill_op_boxes; 3593 op->points = NULL; 3594 op->done = gen9_render_fill_op_done; 3595 return true; 3596} 3597 3598static bool 3599gen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3600 uint32_t color, 3601 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3602 uint8_t alu) 3603{ 3604 BoxRec box; 3605 3606 box.x1 = x1; 3607 box.y1 = y1; 3608 box.x2 = x2; 3609 box.y2 = y2; 3610 3611 return sna_blt_fill_boxes(sna, alu, 3612 bo, dst->drawable.bitsPerPixel, 3613 color, &box, 1); 3614} 3615 3616static bool 3617gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3618 uint32_t color, 3619 int16_t x1, int16_t y1, 3620 int16_t x2, int16_t y2, 3621 uint8_t alu) 3622{ 3623 struct sna_composite_op tmp; 3624 int16_t *v; 3625 3626 /* Prefer to use the BLT if already engaged */ 3627 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3628 gen9_render_fill_one_try_blt(sna, dst, bo, color, 3629 x1, y1, x2, y2, alu)) 3630 return true; 3631 3632 /* Must use the BLT if we can't RENDER... */ 3633 if (!(alu == GXcopy || alu == GXclear) || 3634 too_large(dst->drawable.width, dst->drawable.height) || 3635 unaligned(bo, dst->drawable.bitsPerPixel)) 3636 return gen9_render_fill_one_try_blt(sna, dst, bo, color, 3637 x1, y1, x2, y2, alu); 3638 3639 if (alu == GXclear) 3640 color = 0; 3641 3642 tmp.dst.pixmap = dst; 3643 tmp.dst.width = dst->drawable.width; 3644 tmp.dst.height = dst->drawable.height; 3645 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3646 tmp.dst.bo = bo; 3647 tmp.dst.x = tmp.dst.y = 0; 3648 3649 tmp.src.bo = 3650 sna_render_get_solid(sna, 3651 sna_rgba_for_color(color, 3652 dst->drawable.depth)); 3653 tmp.mask.bo = NULL; 3654 3655 tmp.floats_per_vertex = 2; 3656 tmp.floats_per_rect = 6; 3657 tmp.need_magic_ca_pass = false; 3658 3659 tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; 3660 tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3661 3662 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3663 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3664 kgem_submit(&sna->kgem); 3665 if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3666 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3667 return false; 3668 } 3669 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3670 } 3671 3672 gen9_align_vertex(sna, &tmp); 3673 gen9_emit_fill_state(sna, &tmp); 3674 3675 gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); 3676 3677 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3678 3679 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3680 sna->render.vertex_used += 6; 3681 assert(sna->render.vertex_used <= sna->render.vertex_size); 3682 3683 v[0] = x2; 3684 v[8] = v[4] = x1; 3685 v[5] = v[1] = y2; 3686 v[9] = y1; 3687 v[7] = v[2] = v[3] = 1; 3688 v[6] = v[10] = v[11] = 0; 3689 3690 gen8_vertex_flush(sna); 3691 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3692 3693 return true; 3694} 3695 3696static bool 3697gen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3698{ 3699 BoxRec box; 3700 3701 box.x1 = 0; 3702 box.y1 = 0; 3703 box.x2 = dst->drawable.width; 3704 box.y2 = dst->drawable.height; 3705 3706 return sna_blt_fill_boxes(sna, GXclear, 3707 bo, dst->drawable.bitsPerPixel, 3708 0, &box, 1); 3709} 3710 3711static bool 3712gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3713{ 3714 struct sna_composite_op tmp; 3715 int16_t *v; 3716 3717 DBG(("%s: %dx%d\n", 3718 __FUNCTION__, 3719 dst->drawable.width, 3720 dst->drawable.height)); 3721 3722 /* Prefer to use the BLT if already engaged */ 3723 if (sna->kgem.mode == KGEM_BLT && 3724 gen9_render_clear_try_blt(sna, dst, bo)) 3725 return true; 3726 3727 /* Must use the BLT if we can't RENDER... */ 3728 if (too_large(dst->drawable.width, dst->drawable.height) || 3729 unaligned(bo, dst->drawable.bitsPerPixel)) 3730 return gen9_render_clear_try_blt(sna, dst, bo); 3731 3732 tmp.dst.pixmap = dst; 3733 tmp.dst.width = dst->drawable.width; 3734 tmp.dst.height = dst->drawable.height; 3735 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3736 tmp.dst.bo = bo; 3737 tmp.dst.x = tmp.dst.y = 0; 3738 3739 tmp.src.bo = sna_render_get_solid(sna, 0); 3740 tmp.mask.bo = NULL; 3741 3742 tmp.floats_per_vertex = 2; 3743 tmp.floats_per_rect = 6; 3744 tmp.need_magic_ca_pass = false; 3745 3746 tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; 3747 tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3748 3749 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3750 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3751 kgem_submit(&sna->kgem); 3752 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3753 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3754 return false; 3755 } 3756 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3757 } 3758 3759 gen9_align_vertex(sna, &tmp); 3760 gen9_emit_fill_state(sna, &tmp); 3761 3762 gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); 3763 3764 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3765 sna->render.vertex_used += 6; 3766 assert(sna->render.vertex_used <= sna->render.vertex_size); 3767 3768 v[0] = dst->drawable.width; 3769 v[5] = v[1] = dst->drawable.height; 3770 v[8] = v[4] = 0; 3771 v[9] = 0; 3772 3773 v[7] = v[2] = v[3] = 1; 3774 v[6] = v[10] = v[11] = 0; 3775 3776 gen8_vertex_flush(sna); 3777 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3778 3779 return true; 3780} 3781 3782#if !NO_VIDEO 3783static uint32_t gen9_bind_video_source(struct sna *sna, 3784 struct kgem_bo *bo, 3785 uint32_t delta, 3786 int width, 3787 int height, 3788 int pitch, 3789 uint32_t format) 3790{ 3791 uint32_t *ss; 3792 int offset; 3793 3794 offset = sna->kgem.surface -= SURFACE_DW; 3795 ss = sna->kgem.batch + offset; 3796 ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3797 gen9_tiling_bits(bo->tiling) | 3798 format << SURFACE_FORMAT_SHIFT | 3799 SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3800 ss[1] = 0; 3801 ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3802 (height - 1) << SURFACE_HEIGHT_SHIFT); 3803 ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3804 ss[4] = 0; 3805 ss[5] = 0; 3806 ss[6] = 0; 3807 ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3808 *(uint64_t *)(ss+8) = 3809 kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3810 I915_GEM_DOMAIN_SAMPLER << 16, 3811 delta); 3812 ss[10] = 0; 3813 ss[11] = 0; 3814 ss[12] = 0; 3815 ss[13] = 0; 3816 ss[14] = 0; 3817 ss[15] = 0; 3818 3819 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3820 offset, bo->handle, ss[1], 3821 format, width, height, bo->pitch, bo->tiling)); 3822 3823 return offset * sizeof(uint32_t); 3824} 3825 3826static void gen9_emit_video_state(struct sna *sna, 3827 const struct sna_composite_op *op) 3828{ 3829 struct sna_video_frame *frame = op->priv; 3830 uint32_t src_surf_format[6]; 3831 uint32_t src_surf_base[6]; 3832 int src_width[6]; 3833 int src_height[6]; 3834 int src_pitch[6]; 3835 uint32_t *binding_table; 3836 uint16_t offset; 3837 int n_src, n; 3838 3839 /* XXX VeBox, bicubic */ 3840 3841 gen9_get_batch(sna, op); 3842 3843 src_surf_base[0] = 0; 3844 src_surf_base[1] = 0; 3845 src_surf_base[2] = frame->VBufOffset; 3846 src_surf_base[3] = frame->VBufOffset; 3847 src_surf_base[4] = frame->UBufOffset; 3848 src_surf_base[5] = frame->UBufOffset; 3849 3850 if (is_planar_fourcc(frame->id)) { 3851 for (n = 0; n < 2; n++) { 3852 src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3853 src_width[n] = frame->width; 3854 src_height[n] = frame->height; 3855 src_pitch[n] = frame->pitch[1]; 3856 } 3857 for (; n < 6; n++) { 3858 if (is_nv12_fourcc(frame->id)) 3859 src_surf_format[n] = SURFACEFORMAT_R8G8_UNORM; 3860 else 3861 src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3862 src_width[n] = frame->width / 2; 3863 src_height[n] = frame->height / 2; 3864 src_pitch[n] = frame->pitch[0]; 3865 } 3866 n_src = 6; 3867 } else { 3868 if (frame->id == FOURCC_RGB888) 3869 src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; 3870 else if (frame->id == FOURCC_UYVY) 3871 src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY; 3872 else if (is_ayuv_fourcc(frame->id)) 3873 src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; 3874 else 3875 src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL; 3876 3877 src_width[0] = frame->width; 3878 src_height[0] = frame->height; 3879 src_pitch[0] = frame->pitch[0]; 3880 n_src = 1; 3881 } 3882 3883 binding_table = gen9_composite_get_binding_table(sna, &offset); 3884 3885 binding_table[0] = 3886 gen9_bind_bo(sna, 3887 op->dst.bo, op->dst.width, op->dst.height, 3888 gen9_get_dest_format(op->dst.format), 3889 true); 3890 for (n = 0; n < n_src; n++) { 3891 binding_table[1+n] = 3892 gen9_bind_video_source(sna, 3893 frame->bo, 3894 src_surf_base[n], 3895 src_width[n], 3896 src_height[n], 3897 src_pitch[n], 3898 src_surf_format[n]); 3899 } 3900 3901 gen9_emit_state(sna, op, offset); 3902} 3903 3904static unsigned select_video_kernel(const struct sna_video *video, 3905 const struct sna_video_frame *frame) 3906{ 3907 switch (frame->id) { 3908 case FOURCC_YV12: 3909 case FOURCC_I420: 3910 case FOURCC_XVMC: 3911 return video->colorspace ? 3912 GEN9_WM_KERNEL_VIDEO_PLANAR_BT709 : 3913 GEN9_WM_KERNEL_VIDEO_PLANAR_BT601; 3914 3915 case FOURCC_NV12: 3916 return video->colorspace ? 3917 GEN9_WM_KERNEL_VIDEO_NV12_BT709 : 3918 GEN9_WM_KERNEL_VIDEO_NV12_BT601; 3919 3920 case FOURCC_RGB888: 3921 case FOURCC_RGB565: 3922 return GEN9_WM_KERNEL_VIDEO_RGB; 3923 3924 case FOURCC_AYUV: 3925 return video->colorspace ? 3926 GEN9_WM_KERNEL_VIDEO_AYUV_BT709 : 3927 GEN9_WM_KERNEL_VIDEO_AYUV_BT601; 3928 3929 default: 3930 return video->colorspace ? 3931 GEN9_WM_KERNEL_VIDEO_PACKED_BT709 : 3932 GEN9_WM_KERNEL_VIDEO_PACKED_BT601; 3933 } 3934} 3935 3936static bool 3937gen9_render_video(struct sna *sna, 3938 struct sna_video *video, 3939 struct sna_video_frame *frame, 3940 RegionPtr dstRegion, 3941 PixmapPtr pixmap) 3942{ 3943 struct sna_composite_op tmp; 3944 struct sna_pixmap *priv = sna_pixmap(pixmap); 3945 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3946 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3947 int src_width = frame->src.x2 - frame->src.x1; 3948 int src_height = frame->src.y2 - frame->src.y1; 3949 float src_offset_x, src_offset_y; 3950 float src_scale_x, src_scale_y; 3951 unsigned filter; 3952 const BoxRec *box; 3953 int nbox; 3954 3955 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3956 __FUNCTION__, 3957 src_width, src_height, dst_width, dst_height, 3958 region_num_rects(dstRegion), 3959 REGION_EXTENTS(NULL, dstRegion)->x1, 3960 REGION_EXTENTS(NULL, dstRegion)->y1, 3961 REGION_EXTENTS(NULL, dstRegion)->x2, 3962 REGION_EXTENTS(NULL, dstRegion)->y2)); 3963 3964 assert(priv->gpu_bo); 3965 assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3966 assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3967 3968 memset(&tmp, 0, sizeof(tmp)); 3969 3970 tmp.dst.pixmap = pixmap; 3971 tmp.dst.width = pixmap->drawable.width; 3972 tmp.dst.height = pixmap->drawable.height; 3973 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3974 tmp.dst.bo = priv->gpu_bo; 3975 3976 tmp.src.bo = frame->bo; 3977 tmp.mask.bo = NULL; 3978 3979 tmp.floats_per_vertex = 3; 3980 tmp.floats_per_rect = 9; 3981 3982 DBG(("%s: scaling?=%d, planar?=%d [%x]\n", 3983 __FUNCTION__, 3984 src_width != dst_width || src_height != dst_height, 3985 is_planar_fourcc(frame->id), frame->id)); 3986 3987 if (src_width == dst_width && src_height == dst_height) 3988 filter = SAMPLER_FILTER_NEAREST; 3989 else 3990 filter = SAMPLER_FILTER_BILINEAR; 3991 3992 tmp.u.gen9.flags = 3993 GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3994 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3995 NO_BLEND, 3996 2); 3997 tmp.u.gen9.wm_kernel = select_video_kernel(video, frame); 3998 tmp.priv = frame; 3999 4000 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 4001 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 4002 kgem_submit(&sna->kgem); 4003 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 4004 return false; 4005 4006 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 4007 } 4008 4009 gen9_align_vertex(sna, &tmp); 4010 gen9_emit_video_state(sna, &tmp); 4011 4012 DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 4013 __FUNCTION__, 4014 frame->src.x1, frame->src.y1, 4015 src_width, src_height, 4016 dst_width, dst_height, 4017 frame->width, frame->height)); 4018 4019 src_scale_x = (float)src_width / dst_width / frame->width; 4020 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 4021 4022 src_scale_y = (float)src_height / dst_height / frame->height; 4023 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 4024 4025 DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 4026 __FUNCTION__, 4027 src_scale_x, src_scale_y, 4028 src_offset_x, src_offset_y)); 4029 4030 box = region_rects(dstRegion); 4031 nbox = region_num_rects(dstRegion); 4032 while (nbox--) { 4033 DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 4034 __FUNCTION__, 4035 box->x1, box->y1, 4036 box->x2, box->y2, 4037 box->x1 * src_scale_x + src_offset_x, 4038 box->y1 * src_scale_y + src_offset_y, 4039 box->x2 * src_scale_x + src_offset_x, 4040 box->y2 * src_scale_y + src_offset_y)); 4041 4042 gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state); 4043 4044 OUT_VERTEX(box->x2, box->y2); 4045 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 4046 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 4047 4048 OUT_VERTEX(box->x1, box->y2); 4049 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 4050 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 4051 4052 OUT_VERTEX(box->x1, box->y1); 4053 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 4054 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 4055 4056 box++; 4057 } 4058 gen8_vertex_flush(sna); 4059 4060 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 4061 sna_damage_add(&priv->gpu_damage, dstRegion); 4062 4063 return true; 4064} 4065#endif 4066 4067static void gen9_render_flush(struct sna *sna) 4068{ 4069 gen8_vertex_close(sna); 4070 4071 assert(sna->render.vb_id == 0); 4072 assert(sna->render.vertex_offset == 0); 4073} 4074 4075static void gen9_render_reset(struct sna *sna) 4076{ 4077 sna->render_state.gen9.emit_flush = false; 4078 sna->render_state.gen9.needs_invariant = true; 4079 sna->render_state.gen9.ve_id = 3 << 2; 4080 sna->render_state.gen9.ve_dirty = false; 4081 sna->render_state.gen9.last_primitive = -1; 4082 4083 sna->render_state.gen9.num_sf_outputs = 0; 4084 sna->render_state.gen9.samplers = -1; 4085 sna->render_state.gen9.blend = -1; 4086 sna->render_state.gen9.kernel = -1; 4087 sna->render_state.gen9.drawrect_offset = -1; 4088 sna->render_state.gen9.drawrect_limit = -1; 4089 sna->render_state.gen9.surface_table = 0; 4090 4091 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 4092 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 4093 discard_vbo(sna); 4094 } 4095 4096 sna->render.vertex_offset = 0; 4097 sna->render.nvertex_reloc = 0; 4098 sna->render.vb_id = 0; 4099} 4100 4101static void gen9_render_fini(struct sna *sna) 4102{ 4103 kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo); 4104} 4105 4106static bool gen9_render_setup(struct sna *sna) 4107{ 4108 struct gen9_render_state *state = &sna->render_state.gen9; 4109 struct sna_static_stream general; 4110 struct gen9_sampler_state *ss; 4111 int i, j, k, l, m; 4112 uint32_t devid; 4113 4114 devid = intel_get_device_id(sna->dev); 4115 if (devid & 0xf) 4116 state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1; 4117 DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 4118 4119 state->info = &min_gt_info; 4120 if (is_skl(sna)) 4121 state->info = &skl_gt_info; 4122 if (is_bxt(sna)) 4123 state->info = &bxt_gt_info; 4124 if (is_kbl(sna)) 4125 state->info = &kbl_gt_info; 4126 if (is_glk(sna)) 4127 state->info = &glk_gt_info; 4128 if (is_cfl(sna)) 4129 state->info = &cfl_gt_info; 4130 4131 sna_static_stream_init(&general); 4132 4133 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 4134 * dumps, you know it points to zero. 4135 */ 4136 null_create(&general); 4137 4138 for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 4139 if (wm_kernels[m].size) { 4140 state->wm_kernel[m][1] = 4141 sna_static_stream_add(&general, 4142 wm_kernels[m].data, 4143 wm_kernels[m].size, 4144 64); 4145 } else { 4146 if (USE_8_PIXEL_DISPATCH) { 4147 state->wm_kernel[m][0] = 4148 sna_static_stream_compile_wm(sna, &general, 4149 wm_kernels[m].data, 8); 4150 } 4151 4152 if (USE_16_PIXEL_DISPATCH) { 4153 state->wm_kernel[m][1] = 4154 sna_static_stream_compile_wm(sna, &general, 4155 wm_kernels[m].data, 16); 4156 } 4157 4158 if (USE_32_PIXEL_DISPATCH) { 4159 state->wm_kernel[m][2] = 4160 sna_static_stream_compile_wm(sna, &general, 4161 wm_kernels[m].data, 32); 4162 } 4163 } 4164 assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 4165 } 4166 4167 COMPILE_TIME_ASSERT(GEN9_WM_KERNEL_COUNT <= 4168 1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen9.wm_kernel) * 8)); 4169 4170 COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 4171 ss = sna_static_stream_map(&general, 4172 2 * sizeof(*ss) * 4173 (2 + 4174 FILTER_COUNT * EXTEND_COUNT * 4175 FILTER_COUNT * EXTEND_COUNT), 4176 32); 4177 state->wm_state = sna_static_stream_offsetof(&general, ss); 4178 sampler_copy_init(ss); ss += 2; 4179 sampler_fill_init(ss); ss += 2; 4180 for (i = 0; i < FILTER_COUNT; i++) { 4181 for (j = 0; j < EXTEND_COUNT; j++) { 4182 for (k = 0; k < FILTER_COUNT; k++) { 4183 for (l = 0; l < EXTEND_COUNT; l++) { 4184 sampler_state_init(ss++, i, j); 4185 sampler_state_init(ss++, k, l); 4186 } 4187 } 4188 } 4189 } 4190 4191 state->cc_blend = gen9_create_blend_state(&general); 4192 4193 state->general_bo = sna_static_stream_fini(sna, &general); 4194 return state->general_bo != NULL; 4195} 4196 4197const char *gen9_render_init(struct sna *sna, const char *backend) 4198{ 4199 if (!gen9_render_setup(sna)) 4200 return backend; 4201 4202 sna->kgem.context_switch = gen6_render_context_switch; 4203 sna->kgem.retire = gen6_render_retire; 4204 sna->kgem.expire = gen4_render_expire; 4205 4206#if !NO_COMPOSITE 4207 sna->render.composite = gen9_render_composite; 4208 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 4209#endif 4210#if !NO_COMPOSITE_SPANS 4211 sna->render.check_composite_spans = gen9_check_composite_spans; 4212 sna->render.composite_spans = gen9_render_composite_spans; 4213 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 4214#endif 4215#if !NO_VIDEO 4216 sna->render.video = gen9_render_video; 4217#endif 4218 4219#if !NO_COPY_BOXES 4220 sna->render.copy_boxes = gen9_render_copy_boxes; 4221#endif 4222#if !NO_COPY 4223 sna->render.copy = gen9_render_copy; 4224#endif 4225 4226#if !NO_FILL_BOXES 4227 sna->render.fill_boxes = gen9_render_fill_boxes; 4228#endif 4229#if !NO_FILL 4230 sna->render.fill = gen9_render_fill; 4231#endif 4232#if !NO_FILL_ONE 4233 sna->render.fill_one = gen9_render_fill_one; 4234#endif 4235#if !NO_FILL_CLEAR 4236 sna->render.clear = gen9_render_clear; 4237#endif 4238 4239 sna->render.flush = gen9_render_flush; 4240 sna->render.reset = gen9_render_reset; 4241 sna->render.fini = gen9_render_fini; 4242 4243 sna->render.max_3d_size = GEN9_MAX_SIZE; 4244 sna->render.max_3d_pitch = 1 << 18; 4245 return sna->render_state.gen9.info->name; 4246} 4247