gen6_render.c revision 42542f5f
1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen6_render.h" 45#include "gen6_common.h" 46#include "gen4_common.h" 47#include "gen4_source.h" 48#include "gen4_vertex.h" 49 50#define NO_COMPOSITE 0 51#define NO_COMPOSITE_SPANS 0 52#define NO_COPY 0 53#define NO_COPY_BOXES 0 54#define NO_FILL 0 55#define NO_FILL_BOXES 0 56#define NO_FILL_ONE 0 57#define NO_FILL_CLEAR 0 58 59#define USE_8_PIXEL_DISPATCH 1 60#define USE_16_PIXEL_DISPATCH 1 61#define USE_32_PIXEL_DISPATCH 0 62 63#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 64#error "Must select at least 8, 16 or 32 pixel dispatch" 65#endif 66 67#define GEN6_MAX_SIZE 8192 68 69struct gt_info { 70 const char *name; 71 int max_vs_threads; 72 int max_gs_threads; 73 int max_wm_threads; 74 struct { 75 int size; 76 int max_vs_entries; 77 int max_gs_entries; 78 } urb; 79 int gt; 80}; 81 82static const struct gt_info gt1_info = { 83 .name = "Sandybridge (gen6, gt1)", 84 .max_vs_threads = 24, 85 .max_gs_threads = 21, 86 .max_wm_threads = 40, 87 .urb = { 32, 256, 256 }, 88 .gt = 1, 89}; 90 91static const struct gt_info gt2_info = { 92 .name = "Sandybridge (gen6, gt2)", 93 .max_vs_threads = 60, 94 .max_gs_threads = 60, 95 .max_wm_threads = 80, 96 .urb = { 64, 256, 256 }, 97 .gt = 2, 98}; 99 100static const uint32_t ps_kernel_packed[][4] = { 101#include "exa_wm_src_affine.g6b" 102#include "exa_wm_src_sample_argb.g6b" 103#include "exa_wm_yuv_rgb.g6b" 104#include "exa_wm_write.g6b" 105}; 106 107static const uint32_t ps_kernel_planar[][4] = { 108#include "exa_wm_src_affine.g6b" 109#include "exa_wm_src_sample_planar.g6b" 110#include "exa_wm_yuv_rgb.g6b" 111#include "exa_wm_write.g6b" 112}; 113 114#define NOKERNEL(kernel_enum, func, ns) \ 115 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} 116#define KERNEL(kernel_enum, kernel, ns) \ 117 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} 118 119static const struct wm_kernel_info { 120 const char *name; 121 const void *data; 122 unsigned int size; 123 unsigned int num_surfaces; 124} wm_kernels[] = { 125 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 126 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 127 128 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 129 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 130 131 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 132 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 133 134 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 135 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 136 137 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 138 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 139 140 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 141 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 142}; 143#undef KERNEL 144 145static const struct blendinfo { 146 bool src_alpha; 147 uint32_t src_blend; 148 uint32_t dst_blend; 149} gen6_blend_op[] = { 150 /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, 151 /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, 152 /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, 153 /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 154 /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, 155 /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 156 /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, 157 /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 158 /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 159 /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 160 /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, 161 /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 162 /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, 163}; 164 165/** 166 * Highest-valued BLENDFACTOR used in gen6_blend_op. 167 * 168 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, 169 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 170 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 171 */ 172#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) 173 174#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 175 176#define BLEND_OFFSET(s, d) \ 177 (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) 178 179#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) 180#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) 181 182#define SAMPLER_OFFSET(sf, se, mf, me) \ 183 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) 184 185#define VERTEX_2s2s 0 186 187#define COPY_SAMPLER 0 188#define COPY_VERTEX VERTEX_2s2s 189#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) 190 191#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) 192#define FILL_VERTEX VERTEX_2s2s 193#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 194#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 195 196#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) 197#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) 198#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) 199#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) 200#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 201 202#define OUT_BATCH(v) batch_emit(sna, v) 203#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 204#define OUT_VERTEX_F(v) vertex_emit(sna, v) 205 206static inline bool too_large(int width, int height) 207{ 208 return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; 209} 210 211static uint32_t gen6_get_blend(int op, 212 bool has_component_alpha, 213 uint32_t dst_format) 214{ 215 uint32_t src, dst; 216 217 src = gen6_blend_op[op].src_blend; 218 dst = gen6_blend_op[op].dst_blend; 219 220 /* If there's no dst alpha channel, adjust the blend op so that 221 * we'll treat it always as 1. 222 */ 223 if (PICT_FORMAT_A(dst_format) == 0) { 224 if (src == GEN6_BLENDFACTOR_DST_ALPHA) 225 src = GEN6_BLENDFACTOR_ONE; 226 else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) 227 src = GEN6_BLENDFACTOR_ZERO; 228 } 229 230 /* If the source alpha is being used, then we should only be in a 231 * case where the source blend factor is 0, and the source blend 232 * value is the mask channels multiplied by the source picture's alpha. 233 */ 234 if (has_component_alpha && gen6_blend_op[op].src_alpha) { 235 if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) 236 dst = GEN6_BLENDFACTOR_SRC_COLOR; 237 else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) 238 dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; 239 } 240 241 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 242 op, dst_format, PICT_FORMAT_A(dst_format), 243 src, dst, (int)BLEND_OFFSET(src, dst))); 244 return BLEND_OFFSET(src, dst); 245} 246 247static uint32_t gen6_get_card_format(PictFormat format) 248{ 249 switch (format) { 250 default: 251 return -1; 252 case PICT_a8r8g8b8: 253 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 254 case PICT_x8r8g8b8: 255 return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; 256 case PICT_a8b8g8r8: 257 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 258 case PICT_x8b8g8r8: 259 return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; 260#ifdef PICT_a2r10g10b10 261 case PICT_a2r10g10b10: 262 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 263 case PICT_x2r10g10b10: 264 return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; 265#endif 266 case PICT_r8g8b8: 267 return GEN6_SURFACEFORMAT_R8G8B8_UNORM; 268 case PICT_r5g6b5: 269 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 270 case PICT_a1r5g5b5: 271 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 272 case PICT_a8: 273 return GEN6_SURFACEFORMAT_A8_UNORM; 274 case PICT_a4r4g4b4: 275 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 276 } 277} 278 279static uint32_t gen6_get_dest_format(PictFormat format) 280{ 281 switch (format) { 282 default: 283 return -1; 284 case PICT_a8r8g8b8: 285 case PICT_x8r8g8b8: 286 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 287 case PICT_a8b8g8r8: 288 case PICT_x8b8g8r8: 289 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 290#ifdef PICT_a2r10g10b10 291 case PICT_a2r10g10b10: 292 case PICT_x2r10g10b10: 293 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 294#endif 295 case PICT_r5g6b5: 296 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 297 case PICT_x1r5g5b5: 298 case PICT_a1r5g5b5: 299 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 300 case PICT_a8: 301 return GEN6_SURFACEFORMAT_A8_UNORM; 302 case PICT_a4r4g4b4: 303 case PICT_x4r4g4b4: 304 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 305 } 306} 307 308static bool gen6_check_dst_format(PictFormat format) 309{ 310 if (gen6_get_dest_format(format) != -1) 311 return true; 312 313 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 314 return false; 315} 316 317static bool gen6_check_format(uint32_t format) 318{ 319 if (gen6_get_card_format(format) != -1) 320 return true; 321 322 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 323 return false; 324} 325 326static uint32_t gen6_filter(uint32_t filter) 327{ 328 switch (filter) { 329 default: 330 assert(0); 331 case PictFilterNearest: 332 return SAMPLER_FILTER_NEAREST; 333 case PictFilterBilinear: 334 return SAMPLER_FILTER_BILINEAR; 335 } 336} 337 338static uint32_t gen6_check_filter(PicturePtr picture) 339{ 340 switch (picture->filter) { 341 case PictFilterNearest: 342 case PictFilterBilinear: 343 return true; 344 default: 345 return false; 346 } 347} 348 349static uint32_t gen6_repeat(uint32_t repeat) 350{ 351 switch (repeat) { 352 default: 353 assert(0); 354 case RepeatNone: 355 return SAMPLER_EXTEND_NONE; 356 case RepeatNormal: 357 return SAMPLER_EXTEND_REPEAT; 358 case RepeatPad: 359 return SAMPLER_EXTEND_PAD; 360 case RepeatReflect: 361 return SAMPLER_EXTEND_REFLECT; 362 } 363} 364 365static bool gen6_check_repeat(PicturePtr picture) 366{ 367 if (!picture->repeat) 368 return true; 369 370 switch (picture->repeatType) { 371 case RepeatNone: 372 case RepeatNormal: 373 case RepeatPad: 374 case RepeatReflect: 375 return true; 376 default: 377 return false; 378 } 379} 380 381static int 382gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 383{ 384 int base; 385 386 if (has_mask) { 387 if (is_ca) { 388 if (gen6_blend_op[op].src_alpha) 389 base = GEN6_WM_KERNEL_MASKSA; 390 else 391 base = GEN6_WM_KERNEL_MASKCA; 392 } else 393 base = GEN6_WM_KERNEL_MASK; 394 } else 395 base = GEN6_WM_KERNEL_NOMASK; 396 397 return base + !is_affine; 398} 399 400static void 401gen6_emit_urb(struct sna *sna) 402{ 403 OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); 404 OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | 405 (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ 406 OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | 407 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ 408} 409 410static void 411gen6_emit_state_base_address(struct sna *sna) 412{ 413 OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); 414 OUT_BATCH(0); /* general */ 415 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 416 sna->kgem.nbatch, 417 NULL, 418 I915_GEM_DOMAIN_INSTRUCTION << 16, 419 BASE_ADDRESS_MODIFY)); 420 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 421 sna->kgem.nbatch, 422 sna->render_state.gen6.general_bo, 423 I915_GEM_DOMAIN_INSTRUCTION << 16, 424 BASE_ADDRESS_MODIFY)); 425 OUT_BATCH(0); /* indirect */ 426 OUT_BATCH(kgem_add_reloc(&sna->kgem, 427 sna->kgem.nbatch, 428 sna->render_state.gen6.general_bo, 429 I915_GEM_DOMAIN_INSTRUCTION << 16, 430 BASE_ADDRESS_MODIFY)); 431 432 /* upper bounds, disable */ 433 OUT_BATCH(0); 434 OUT_BATCH(BASE_ADDRESS_MODIFY); 435 OUT_BATCH(0); 436 OUT_BATCH(BASE_ADDRESS_MODIFY); 437} 438 439static void 440gen6_emit_viewports(struct sna *sna) 441{ 442 OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | 443 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | 444 (4 - 2)); 445 OUT_BATCH(0); 446 OUT_BATCH(0); 447 OUT_BATCH(0); 448} 449 450static void 451gen6_emit_vs(struct sna *sna) 452{ 453 /* disable VS constant buffer */ 454 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); 455 OUT_BATCH(0); 456 OUT_BATCH(0); 457 OUT_BATCH(0); 458 OUT_BATCH(0); 459 460 OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); 461 OUT_BATCH(0); /* no VS kernel */ 462 OUT_BATCH(0); 463 OUT_BATCH(0); 464 OUT_BATCH(0); 465 OUT_BATCH(0); /* pass-through */ 466} 467 468static void 469gen6_emit_gs(struct sna *sna) 470{ 471 /* disable GS constant buffer */ 472 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); 473 OUT_BATCH(0); 474 OUT_BATCH(0); 475 OUT_BATCH(0); 476 OUT_BATCH(0); 477 478 OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); 479 OUT_BATCH(0); /* no GS kernel */ 480 OUT_BATCH(0); 481 OUT_BATCH(0); 482 OUT_BATCH(0); 483 OUT_BATCH(0); 484 OUT_BATCH(0); /* pass-through */ 485} 486 487static void 488gen6_emit_clip(struct sna *sna) 489{ 490 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); 491 OUT_BATCH(0); 492 OUT_BATCH(0); /* pass-through */ 493 OUT_BATCH(0); 494} 495 496static void 497gen6_emit_wm_constants(struct sna *sna) 498{ 499 /* disable WM constant buffer */ 500 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 501 OUT_BATCH(0); 502 OUT_BATCH(0); 503 OUT_BATCH(0); 504 OUT_BATCH(0); 505} 506 507static void 508gen6_emit_null_depth_buffer(struct sna *sna) 509{ 510 OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); 511 OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 512 GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 513 OUT_BATCH(0); 514 OUT_BATCH(0); 515 OUT_BATCH(0); 516 OUT_BATCH(0); 517 OUT_BATCH(0); 518 519 OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); 520 OUT_BATCH(0); 521} 522 523static void 524gen6_emit_invariant(struct sna *sna) 525{ 526 OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); 527 528 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); 529 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 530 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 531 OUT_BATCH(0); 532 533 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); 534 OUT_BATCH(1); 535 536 gen6_emit_urb(sna); 537 538 gen6_emit_state_base_address(sna); 539 540 gen6_emit_viewports(sna); 541 gen6_emit_vs(sna); 542 gen6_emit_gs(sna); 543 gen6_emit_clip(sna); 544 gen6_emit_wm_constants(sna); 545 gen6_emit_null_depth_buffer(sna); 546 547 sna->render_state.gen6.needs_invariant = false; 548} 549 550static bool 551gen6_emit_cc(struct sna *sna, int blend) 552{ 553 struct gen6_render_state *render = &sna->render_state.gen6; 554 555 if (render->blend == blend) 556 return blend != NO_BLEND; 557 558 DBG(("%s: blend = %x\n", __FUNCTION__, blend)); 559 560 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); 561 OUT_BATCH((render->cc_blend + blend) | 1); 562 if (render->blend == (unsigned)-1) { 563 OUT_BATCH(1); 564 OUT_BATCH(1); 565 } else { 566 OUT_BATCH(0); 567 OUT_BATCH(0); 568 } 569 570 render->blend = blend; 571 return blend != NO_BLEND; 572} 573 574static void 575gen6_emit_sampler(struct sna *sna, uint32_t state) 576{ 577 if (sna->render_state.gen6.samplers == state) 578 return; 579 580 sna->render_state.gen6.samplers = state; 581 582 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 583 584 OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | 585 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | 586 (4 - 2)); 587 OUT_BATCH(0); /* VS */ 588 OUT_BATCH(0); /* GS */ 589 OUT_BATCH(sna->render_state.gen6.wm_state + state); 590} 591 592static void 593gen6_emit_sf(struct sna *sna, bool has_mask) 594{ 595 int num_sf_outputs = has_mask ? 2 : 1; 596 597 if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) 598 return; 599 600 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 601 __FUNCTION__, num_sf_outputs, 1, 0)); 602 603 sna->render_state.gen6.num_sf_outputs = num_sf_outputs; 604 605 OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); 606 OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | 607 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | 608 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); 609 OUT_BATCH(0); 610 OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); 611 OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ 612 OUT_BATCH(0); 613 OUT_BATCH(0); 614 OUT_BATCH(0); 615 OUT_BATCH(0); 616 OUT_BATCH(0); /* DW9 */ 617 OUT_BATCH(0); 618 OUT_BATCH(0); 619 OUT_BATCH(0); 620 OUT_BATCH(0); 621 OUT_BATCH(0); /* DW14 */ 622 OUT_BATCH(0); 623 OUT_BATCH(0); 624 OUT_BATCH(0); 625 OUT_BATCH(0); 626 OUT_BATCH(0); /* DW19 */ 627} 628 629static void 630gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) 631{ 632 const uint32_t *kernels; 633 634 if (sna->render_state.gen6.kernel == kernel) 635 return; 636 637 sna->render_state.gen6.kernel = kernel; 638 kernels = sna->render_state.gen6.wm_kernel[kernel]; 639 640 DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", 641 __FUNCTION__, 642 wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, 643 kernels[0], kernels[1], kernels[2])); 644 645 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 646 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 647 OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | 648 wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 649 OUT_BATCH(0); /* scratch space */ 650 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | 651 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | 652 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); 653 OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | 654 (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | 655 (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | 656 (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | 657 GEN6_3DSTATE_WM_DISPATCH_ENABLE); 658 OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | 659 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 660 OUT_BATCH(kernels[2]); 661 OUT_BATCH(kernels[1]); 662} 663 664static bool 665gen6_emit_binding_table(struct sna *sna, uint16_t offset) 666{ 667 if (sna->render_state.gen6.surface_table == offset) 668 return false; 669 670 /* Binding table pointers */ 671 OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | 672 GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | 673 (4 - 2)); 674 OUT_BATCH(0); /* vs */ 675 OUT_BATCH(0); /* gs */ 676 /* Only the PS uses the binding table */ 677 OUT_BATCH(offset*4); 678 679 sna->render_state.gen6.surface_table = offset; 680 return true; 681} 682 683static bool 684gen6_emit_drawing_rectangle(struct sna *sna, 685 const struct sna_composite_op *op) 686{ 687 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 688 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 689 690 assert(!too_large(op->dst.x, op->dst.y)); 691 assert(!too_large(op->dst.width, op->dst.height)); 692 693 if (sna->render_state.gen6.drawrect_limit == limit && 694 sna->render_state.gen6.drawrect_offset == offset) 695 return false; 696 697 /* [DevSNB-C+{W/A}] Before any depth stall flush (including those 698 * produced by non-pipelined state commands), software needs to first 699 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 700 * 0. 701 * 702 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 703 * BEFORE the pipe-control with a post-sync op and no write-cache 704 * flushes. 705 */ 706 if (!sna->render_state.gen6.first_state_packet) { 707 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 708 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 709 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 710 OUT_BATCH(0); 711 OUT_BATCH(0); 712 } 713 714 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 715 OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); 716 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 717 sna->render_state.gen6.general_bo, 718 I915_GEM_DOMAIN_INSTRUCTION << 16 | 719 I915_GEM_DOMAIN_INSTRUCTION, 720 64)); 721 OUT_BATCH(0); 722 723 DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n", 724 __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.width)); 725 OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 726 OUT_BATCH(0); 727 OUT_BATCH(limit); 728 OUT_BATCH(offset); 729 730 sna->render_state.gen6.drawrect_offset = offset; 731 sna->render_state.gen6.drawrect_limit = limit; 732 return true; 733} 734 735static void 736gen6_emit_vertex_elements(struct sna *sna, 737 const struct sna_composite_op *op) 738{ 739 /* 740 * vertex data in vertex buffer 741 * position: (x, y) 742 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 743 * texture coordinate 1 if (has_mask is true): same as above 744 */ 745 struct gen6_render_state *render = &sna->render_state.gen6; 746 uint32_t src_format, dw; 747 int id = GEN6_VERTEX(op->u.gen6.flags); 748 bool has_mask; 749 750 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 751 752 if (render->ve_id == id) 753 return; 754 render->ve_id = id; 755 756 /* The VUE layout 757 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 758 * dword 4-7: position (x, y, 1.0, 1.0), 759 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 760 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 761 * 762 * dword 4-15 are fetched from vertex buffer 763 */ 764 has_mask = (id >> 2) != 0; 765 OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | 766 ((2 * (3 + has_mask)) + 1 - 2)); 767 768 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 769 GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | 770 0 << VE0_OFFSET_SHIFT); 771 OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 772 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 773 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 774 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); 775 776 /* x,y */ 777 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 778 GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 779 0 << VE0_OFFSET_SHIFT); 780 OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 781 GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 782 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 783 GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); 784 785 /* u0, v0, w0 */ 786 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 787 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 788 switch (id & 3) { 789 default: 790 assert(0); 791 case 0: 792 src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; 793 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 794 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 795 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 796 break; 797 case 1: 798 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 799 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 800 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 801 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 802 break; 803 case 2: 804 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 805 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 806 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 807 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 808 break; 809 case 3: 810 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 811 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 812 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 813 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 814 break; 815 } 816 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 817 src_format << VE0_FORMAT_SHIFT | 818 4 << VE0_OFFSET_SHIFT); 819 OUT_BATCH(dw); 820 821 /* u1, v1, w1 */ 822 if (has_mask) { 823 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 824 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 825 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 826 switch (id >> 2) { 827 case 1: 828 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 829 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 830 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 831 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 832 break; 833 default: 834 assert(0); 835 case 2: 836 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 837 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 838 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 839 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 840 break; 841 case 3: 842 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 843 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 844 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 845 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 846 break; 847 } 848 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 849 src_format << VE0_FORMAT_SHIFT | 850 offset << VE0_OFFSET_SHIFT); 851 OUT_BATCH(dw); 852 } 853} 854 855static void 856gen6_emit_flush(struct sna *sna) 857{ 858 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 859 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | 860 GEN6_PIPE_CONTROL_TC_FLUSH | 861 GEN6_PIPE_CONTROL_CS_STALL); 862 OUT_BATCH(0); 863 OUT_BATCH(0); 864} 865 866static void 867gen6_emit_state(struct sna *sna, 868 const struct sna_composite_op *op, 869 uint16_t wm_binding_table) 870{ 871 bool need_flush, need_stall; 872 873 assert(op->dst.bo->exec); 874 875 need_stall = wm_binding_table & 1; 876 need_flush = false; 877 if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) 878 need_flush = need_stall; 879 gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); 880 gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); 881 gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); 882 gen6_emit_vertex_elements(sna, op); 883 884 need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); 885 if (gen6_emit_drawing_rectangle(sna, op)) 886 need_stall = false; 887 if (need_flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 888 gen6_emit_flush(sna); 889 kgem_clear_dirty(&sna->kgem); 890 assert(op->dst.bo->exec); 891 kgem_bo_mark_dirty(op->dst.bo); 892 need_stall = false; 893 } 894 if (need_stall) { 895 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 896 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 897 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 898 OUT_BATCH(0); 899 OUT_BATCH(0); 900 } 901 sna->render_state.gen6.first_state_packet = false; 902} 903 904static bool gen6_magic_ca_pass(struct sna *sna, 905 const struct sna_composite_op *op) 906{ 907 struct gen6_render_state *state = &sna->render_state.gen6; 908 909 if (!op->need_magic_ca_pass) 910 return false; 911 912 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 913 sna->render.vertex_start, sna->render.vertex_index)); 914 915 gen6_emit_flush(sna); 916 917 gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); 918 gen6_emit_wm(sna, 919 gen6_choose_composite_kernel(PictOpAdd, 920 true, true, 921 op->is_affine), 922 true); 923 924 OUT_BATCH(GEN6_3DPRIMITIVE | 925 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 926 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 927 0 << 9 | 928 4); 929 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 930 OUT_BATCH(sna->render.vertex_start); 931 OUT_BATCH(1); /* single instance */ 932 OUT_BATCH(0); /* start instance location */ 933 OUT_BATCH(0); /* index buffer offset, ignored */ 934 935 state->last_primitive = sna->kgem.nbatch; 936 return true; 937} 938 939typedef struct gen6_surface_state_padded { 940 struct gen6_surface_state state; 941 char pad[32 - sizeof(struct gen6_surface_state)]; 942} gen6_surface_state_padded; 943 944static void null_create(struct sna_static_stream *stream) 945{ 946 /* A bunch of zeros useful for legacy border color and depth-stencil */ 947 sna_static_stream_map(stream, 64, 64); 948} 949 950static void scratch_create(struct sna_static_stream *stream) 951{ 952 /* 64 bytes of scratch space for random writes, such as 953 * the pipe-control w/a. 954 */ 955 sna_static_stream_map(stream, 64, 64); 956} 957 958static void 959sampler_state_init(struct gen6_sampler_state *sampler_state, 960 sampler_filter_t filter, 961 sampler_extend_t extend) 962{ 963 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 964 965 /* We use the legacy mode to get the semantics specified by 966 * the Render extension. */ 967 sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; 968 969 switch (filter) { 970 default: 971 case SAMPLER_FILTER_NEAREST: 972 sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; 973 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; 974 break; 975 case SAMPLER_FILTER_BILINEAR: 976 sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; 977 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; 978 break; 979 } 980 981 switch (extend) { 982 default: 983 case SAMPLER_EXTEND_NONE: 984 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 985 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 986 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 987 break; 988 case SAMPLER_EXTEND_REPEAT: 989 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 990 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 991 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 992 break; 993 case SAMPLER_EXTEND_PAD: 994 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 995 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 996 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 997 break; 998 case SAMPLER_EXTEND_REFLECT: 999 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1000 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1001 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1002 break; 1003 } 1004} 1005 1006static void 1007sampler_copy_init(struct gen6_sampler_state *ss) 1008{ 1009 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1010 ss->ss3.non_normalized_coord = 1; 1011 1012 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1013} 1014 1015static void 1016sampler_fill_init(struct gen6_sampler_state *ss) 1017{ 1018 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1019 ss->ss3.non_normalized_coord = 1; 1020 1021 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1022} 1023 1024static uint32_t 1025gen6_tiling_bits(uint32_t tiling) 1026{ 1027 switch (tiling) { 1028 default: assert(0); 1029 case I915_TILING_NONE: return 0; 1030 case I915_TILING_X: return GEN6_SURFACE_TILED; 1031 case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; 1032 } 1033} 1034 1035/** 1036 * Sets up the common fields for a surface state buffer for the given 1037 * picture in the given surface state buffer. 1038 */ 1039static int 1040gen6_bind_bo(struct sna *sna, 1041 struct kgem_bo *bo, 1042 uint32_t width, 1043 uint32_t height, 1044 uint32_t format, 1045 bool is_dst) 1046{ 1047 uint32_t *ss; 1048 uint32_t domains; 1049 uint16_t offset; 1050 uint32_t is_scanout = is_dst && bo->scanout; 1051 1052 /* After the first bind, we manage the cache domains within the batch */ 1053 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1054 if (offset) { 1055 DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", 1056 offset, bo->handle, format, 1057 is_dst ? "render" : "sampler")); 1058 assert(offset >= sna->kgem.surface); 1059 if (is_dst) 1060 kgem_bo_mark_dirty(bo); 1061 return offset * sizeof(uint32_t); 1062 } 1063 1064 offset = sna->kgem.surface -= 1065 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1066 ss = sna->kgem.batch + offset; 1067 ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | 1068 GEN6_SURFACE_BLEND_ENABLED | 1069 format << GEN6_SURFACE_FORMAT_SHIFT); 1070 if (is_dst) { 1071 ss[0] |= GEN6_SURFACE_RC_READ_WRITE; 1072 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1073 } else 1074 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1075 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1076 ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | 1077 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); 1078 assert(bo->pitch <= (1 << 18)); 1079 ss[3] = (gen6_tiling_bits(bo->tiling) | 1080 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); 1081 ss[4] = 0; 1082 ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; 1083 1084 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1085 1086 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1087 offset, bo->handle, ss[1], 1088 format, width, height, bo->pitch, bo->tiling, 1089 domains & 0xffff ? "render" : "sampler")); 1090 1091 return offset * sizeof(uint32_t); 1092} 1093 1094static void gen6_emit_vertex_buffer(struct sna *sna, 1095 const struct sna_composite_op *op) 1096{ 1097 int id = GEN6_VERTEX(op->u.gen6.flags); 1098 1099 OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); 1100 OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 1101 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); 1102 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1103 OUT_BATCH(0); 1104 OUT_BATCH(~0); /* max address: disabled */ 1105 OUT_BATCH(0); 1106 1107 sna->render.vb_id |= 1 << id; 1108} 1109 1110static void gen6_emit_primitive(struct sna *sna) 1111{ 1112 if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { 1113 DBG(("%s: continuing previous primitive, start=%d, index=%d\n", 1114 __FUNCTION__, 1115 sna->render.vertex_start, 1116 sna->render.vertex_index)); 1117 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1118 return; 1119 } 1120 1121 OUT_BATCH(GEN6_3DPRIMITIVE | 1122 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1123 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1124 0 << 9 | 1125 4); 1126 sna->render.vertex_offset = sna->kgem.nbatch; 1127 OUT_BATCH(0); /* vertex count, to be filled in later */ 1128 OUT_BATCH(sna->render.vertex_index); 1129 OUT_BATCH(1); /* single instance */ 1130 OUT_BATCH(0); /* start instance location */ 1131 OUT_BATCH(0); /* index buffer offset, ignored */ 1132 sna->render.vertex_start = sna->render.vertex_index; 1133 DBG(("%s: started new primitive: index=%d\n", 1134 __FUNCTION__, sna->render.vertex_start)); 1135 1136 sna->render_state.gen6.last_primitive = sna->kgem.nbatch; 1137} 1138 1139static bool gen6_rectangle_begin(struct sna *sna, 1140 const struct sna_composite_op *op) 1141{ 1142 int id = 1 << GEN6_VERTEX(op->u.gen6.flags); 1143 int ndwords; 1144 1145 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1146 return true; 1147 1148 ndwords = op->need_magic_ca_pass ? 60 : 6; 1149 if ((sna->render.vb_id & id) == 0) 1150 ndwords += 5; 1151 if (!kgem_check_batch(&sna->kgem, ndwords)) 1152 return false; 1153 1154 if ((sna->render.vb_id & id) == 0) 1155 gen6_emit_vertex_buffer(sna, op); 1156 1157 gen6_emit_primitive(sna); 1158 return true; 1159} 1160 1161static int gen6_get_rectangles__flush(struct sna *sna, 1162 const struct sna_composite_op *op) 1163{ 1164 /* Preventing discarding new vbo after lock contention */ 1165 if (sna_vertex_wait__locked(&sna->render)) { 1166 int rem = vertex_space(sna); 1167 if (rem > op->floats_per_rect) 1168 return rem; 1169 } 1170 1171 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) 1172 return 0; 1173 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1174 return 0; 1175 1176 if (sna->render.vertex_offset) { 1177 gen4_vertex_flush(sna); 1178 if (gen6_magic_ca_pass(sna, op)) { 1179 gen6_emit_flush(sna); 1180 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 1181 gen6_emit_wm(sna, 1182 GEN6_KERNEL(op->u.gen6.flags), 1183 GEN6_VERTEX(op->u.gen6.flags) >> 2); 1184 } 1185 } 1186 1187 return gen4_vertex_finish(sna); 1188} 1189 1190inline static int gen6_get_rectangles(struct sna *sna, 1191 const struct sna_composite_op *op, 1192 int want, 1193 void (*emit_state)(struct sna *, const struct sna_composite_op *op)) 1194{ 1195 int rem; 1196 1197 assert(want); 1198 1199start: 1200 rem = vertex_space(sna); 1201 if (unlikely(rem < op->floats_per_rect)) { 1202 DBG(("flushing vbo for %s: %d < %d\n", 1203 __FUNCTION__, rem, op->floats_per_rect)); 1204 rem = gen6_get_rectangles__flush(sna, op); 1205 if (unlikely(rem == 0)) 1206 goto flush; 1207 } 1208 1209 if (unlikely(sna->render.vertex_offset == 0)) { 1210 if (!gen6_rectangle_begin(sna, op)) 1211 goto flush; 1212 else 1213 goto start; 1214 } 1215 1216 assert(rem <= vertex_space(sna)); 1217 assert(op->floats_per_rect <= rem); 1218 if (want > 1 && want * op->floats_per_rect > rem) 1219 want = rem / op->floats_per_rect; 1220 1221 assert(want > 0); 1222 sna->render.vertex_index += 3*want; 1223 return want; 1224 1225flush: 1226 if (sna->render.vertex_offset) { 1227 gen4_vertex_flush(sna); 1228 gen6_magic_ca_pass(sna, op); 1229 } 1230 sna_vertex_wait__locked(&sna->render); 1231 _kgem_submit(&sna->kgem); 1232 emit_state(sna, op); 1233 goto start; 1234} 1235 1236inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, 1237 uint16_t *offset) 1238{ 1239 uint32_t *table; 1240 1241 sna->kgem.surface -= 1242 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1243 /* Clear all surplus entries to zero in case of prefetch */ 1244 table = memset(sna->kgem.batch + sna->kgem.surface, 1245 0, sizeof(struct gen6_surface_state_padded)); 1246 1247 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1248 1249 *offset = sna->kgem.surface; 1250 return table; 1251} 1252 1253static bool 1254gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) 1255{ 1256 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1257 1258 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1259 DBG(("%s: flushing batch: %d < %d+%d\n", 1260 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1261 150, 4*8)); 1262 kgem_submit(&sna->kgem); 1263 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1264 } 1265 1266 if (sna->render_state.gen6.needs_invariant) 1267 gen6_emit_invariant(sna); 1268 1269 return kgem_bo_is_dirty(op->dst.bo); 1270} 1271 1272static void gen6_emit_composite_state(struct sna *sna, 1273 const struct sna_composite_op *op) 1274{ 1275 uint32_t *binding_table; 1276 uint16_t offset; 1277 bool dirty; 1278 1279 dirty = gen6_get_batch(sna, op); 1280 1281 binding_table = gen6_composite_get_binding_table(sna, &offset); 1282 1283 binding_table[0] = 1284 gen6_bind_bo(sna, 1285 op->dst.bo, op->dst.width, op->dst.height, 1286 gen6_get_dest_format(op->dst.format), 1287 true); 1288 binding_table[1] = 1289 gen6_bind_bo(sna, 1290 op->src.bo, op->src.width, op->src.height, 1291 op->src.card_format, 1292 false); 1293 if (op->mask.bo) { 1294 binding_table[2] = 1295 gen6_bind_bo(sna, 1296 op->mask.bo, 1297 op->mask.width, 1298 op->mask.height, 1299 op->mask.card_format, 1300 false); 1301 } 1302 1303 if (sna->kgem.surface == offset && 1304 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && 1305 (op->mask.bo == NULL || 1306 sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { 1307 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1308 offset = sna->render_state.gen6.surface_table; 1309 } 1310 1311 gen6_emit_state(sna, op, offset | dirty); 1312} 1313 1314static void 1315gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1316{ 1317 assert (sna->render.vertex_offset == 0); 1318 if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { 1319 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1320 sna->render_state.gen6.floats_per_vertex, 1321 op->floats_per_vertex)); 1322 gen4_vertex_align(sna, op); 1323 sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; 1324 } 1325 assert((sna->render.vertex_used % op->floats_per_vertex) == 0); 1326} 1327 1328fastcall static void 1329gen6_render_composite_blt(struct sna *sna, 1330 const struct sna_composite_op *op, 1331 const struct sna_composite_rectangles *r) 1332{ 1333 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1334 op->prim_emit(sna, op, r); 1335} 1336 1337fastcall static void 1338gen6_render_composite_box(struct sna *sna, 1339 const struct sna_composite_op *op, 1340 const BoxRec *box) 1341{ 1342 struct sna_composite_rectangles r; 1343 1344 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1345 1346 DBG((" %s: (%d, %d), (%d, %d)\n", 1347 __FUNCTION__, 1348 box->x1, box->y1, box->x2, box->y2)); 1349 1350 r.dst.x = box->x1; 1351 r.dst.y = box->y1; 1352 r.width = box->x2 - box->x1; 1353 r.height = box->y2 - box->y1; 1354 r.src = r.mask = r.dst; 1355 1356 op->prim_emit(sna, op, &r); 1357} 1358 1359static void 1360gen6_render_composite_boxes__blt(struct sna *sna, 1361 const struct sna_composite_op *op, 1362 const BoxRec *box, int nbox) 1363{ 1364 DBG(("composite_boxes(%d)\n", nbox)); 1365 1366 do { 1367 int nbox_this_time; 1368 1369 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1370 gen6_emit_composite_state); 1371 nbox -= nbox_this_time; 1372 1373 do { 1374 struct sna_composite_rectangles r; 1375 1376 DBG((" %s: (%d, %d), (%d, %d)\n", 1377 __FUNCTION__, 1378 box->x1, box->y1, box->x2, box->y2)); 1379 1380 r.dst.x = box->x1; 1381 r.dst.y = box->y1; 1382 r.width = box->x2 - box->x1; 1383 r.height = box->y2 - box->y1; 1384 r.src = r.mask = r.dst; 1385 1386 op->prim_emit(sna, op, &r); 1387 box++; 1388 } while (--nbox_this_time); 1389 } while (nbox); 1390} 1391 1392static void 1393gen6_render_composite_boxes(struct sna *sna, 1394 const struct sna_composite_op *op, 1395 const BoxRec *box, int nbox) 1396{ 1397 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1398 1399 do { 1400 int nbox_this_time; 1401 float *v; 1402 1403 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1404 gen6_emit_composite_state); 1405 assert(nbox_this_time); 1406 nbox -= nbox_this_time; 1407 1408 v = sna->render.vertices + sna->render.vertex_used; 1409 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1410 1411 op->emit_boxes(op, box, nbox_this_time, v); 1412 box += nbox_this_time; 1413 } while (nbox); 1414} 1415 1416static void 1417gen6_render_composite_boxes__thread(struct sna *sna, 1418 const struct sna_composite_op *op, 1419 const BoxRec *box, int nbox) 1420{ 1421 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1422 1423 sna_vertex_lock(&sna->render); 1424 do { 1425 int nbox_this_time; 1426 float *v; 1427 1428 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1429 gen6_emit_composite_state); 1430 assert(nbox_this_time); 1431 nbox -= nbox_this_time; 1432 1433 v = sna->render.vertices + sna->render.vertex_used; 1434 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1435 1436 sna_vertex_acquire__locked(&sna->render); 1437 sna_vertex_unlock(&sna->render); 1438 1439 op->emit_boxes(op, box, nbox_this_time, v); 1440 box += nbox_this_time; 1441 1442 sna_vertex_lock(&sna->render); 1443 sna_vertex_release__locked(&sna->render); 1444 } while (nbox); 1445 sna_vertex_unlock(&sna->render); 1446} 1447 1448#ifndef MAX 1449#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1450#endif 1451 1452static uint32_t 1453gen6_composite_create_blend_state(struct sna_static_stream *stream) 1454{ 1455 char *base, *ptr; 1456 int src, dst; 1457 1458 base = sna_static_stream_map(stream, 1459 GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 1460 64); 1461 1462 ptr = base; 1463 for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { 1464 for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { 1465 struct gen6_blend_state *blend = 1466 (struct gen6_blend_state *)ptr; 1467 1468 blend->blend0.dest_blend_factor = dst; 1469 blend->blend0.source_blend_factor = src; 1470 blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; 1471 blend->blend0.blend_enable = 1472 !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); 1473 1474 blend->blend1.post_blend_clamp_enable = 1; 1475 blend->blend1.pre_blend_clamp_enable = 1; 1476 1477 ptr += GEN6_BLEND_STATE_PADDED_SIZE; 1478 } 1479 } 1480 1481 return sna_static_stream_offsetof(stream, base); 1482} 1483 1484static uint32_t gen6_bind_video_source(struct sna *sna, 1485 struct kgem_bo *src_bo, 1486 uint32_t src_offset, 1487 int src_width, 1488 int src_height, 1489 int src_pitch, 1490 uint32_t src_surf_format) 1491{ 1492 struct gen6_surface_state *ss; 1493 1494 sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1495 1496 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1497 ss->ss0.surface_type = GEN6_SURFACE_2D; 1498 ss->ss0.surface_format = src_surf_format; 1499 1500 ss->ss1.base_addr = 1501 kgem_add_reloc(&sna->kgem, 1502 sna->kgem.surface + 1, 1503 src_bo, 1504 I915_GEM_DOMAIN_SAMPLER << 16, 1505 src_offset); 1506 1507 ss->ss2.width = src_width - 1; 1508 ss->ss2.height = src_height - 1; 1509 ss->ss3.pitch = src_pitch - 1; 1510 1511 return sna->kgem.surface * sizeof(uint32_t); 1512} 1513 1514static void gen6_emit_video_state(struct sna *sna, 1515 const struct sna_composite_op *op) 1516{ 1517 struct sna_video_frame *frame = op->priv; 1518 uint32_t src_surf_format; 1519 uint32_t src_surf_base[6]; 1520 int src_width[6]; 1521 int src_height[6]; 1522 int src_pitch[6]; 1523 uint32_t *binding_table; 1524 uint16_t offset; 1525 bool dirty; 1526 int n_src, n; 1527 1528 dirty = gen6_get_batch(sna, op); 1529 1530 src_surf_base[0] = 0; 1531 src_surf_base[1] = 0; 1532 src_surf_base[2] = frame->VBufOffset; 1533 src_surf_base[3] = frame->VBufOffset; 1534 src_surf_base[4] = frame->UBufOffset; 1535 src_surf_base[5] = frame->UBufOffset; 1536 1537 if (is_planar_fourcc(frame->id)) { 1538 src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; 1539 src_width[1] = src_width[0] = frame->width; 1540 src_height[1] = src_height[0] = frame->height; 1541 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1542 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1543 frame->width / 2; 1544 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1545 frame->height / 2; 1546 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1547 frame->pitch[0]; 1548 n_src = 6; 1549 } else { 1550 if (frame->id == FOURCC_UYVY) 1551 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; 1552 else 1553 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; 1554 1555 src_width[0] = frame->width; 1556 src_height[0] = frame->height; 1557 src_pitch[0] = frame->pitch[0]; 1558 n_src = 1; 1559 } 1560 1561 binding_table = gen6_composite_get_binding_table(sna, &offset); 1562 1563 binding_table[0] = 1564 gen6_bind_bo(sna, 1565 op->dst.bo, op->dst.width, op->dst.height, 1566 gen6_get_dest_format(op->dst.format), 1567 true); 1568 for (n = 0; n < n_src; n++) { 1569 binding_table[1+n] = 1570 gen6_bind_video_source(sna, 1571 frame->bo, 1572 src_surf_base[n], 1573 src_width[n], 1574 src_height[n], 1575 src_pitch[n], 1576 src_surf_format); 1577 } 1578 1579 gen6_emit_state(sna, op, offset | dirty); 1580} 1581 1582static bool 1583gen6_render_video(struct sna *sna, 1584 struct sna_video *video, 1585 struct sna_video_frame *frame, 1586 RegionPtr dstRegion, 1587 PixmapPtr pixmap) 1588{ 1589 struct sna_composite_op tmp; 1590 struct sna_pixmap *priv = sna_pixmap(pixmap); 1591 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1592 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1593 int src_width = frame->src.x2 - frame->src.x1; 1594 int src_height = frame->src.y2 - frame->src.y1; 1595 float src_offset_x, src_offset_y; 1596 float src_scale_x, src_scale_y; 1597 int nbox, pix_xoff, pix_yoff; 1598 unsigned filter; 1599 const BoxRec *box; 1600 1601 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1602 __FUNCTION__, 1603 src_width, src_height, dst_width, dst_height, 1604 region_num_rects(dstRegion), 1605 REGION_EXTENTS(NULL, dstRegion)->x1, 1606 REGION_EXTENTS(NULL, dstRegion)->y1, 1607 REGION_EXTENTS(NULL, dstRegion)->x2, 1608 REGION_EXTENTS(NULL, dstRegion)->y2)); 1609 1610 assert(priv->gpu_bo); 1611 memset(&tmp, 0, sizeof(tmp)); 1612 1613 tmp.dst.pixmap = pixmap; 1614 tmp.dst.width = pixmap->drawable.width; 1615 tmp.dst.height = pixmap->drawable.height; 1616 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1617 tmp.dst.bo = priv->gpu_bo; 1618 1619 tmp.src.bo = frame->bo; 1620 tmp.mask.bo = NULL; 1621 1622 tmp.floats_per_vertex = 3; 1623 tmp.floats_per_rect = 9; 1624 1625 if (src_width == dst_width && src_height == dst_height) 1626 filter = SAMPLER_FILTER_NEAREST; 1627 else 1628 filter = SAMPLER_FILTER_BILINEAR; 1629 1630 tmp.u.gen6.flags = 1631 GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1632 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1633 NO_BLEND, 1634 is_planar_fourcc(frame->id) ? 1635 GEN6_WM_KERNEL_VIDEO_PLANAR : 1636 GEN6_WM_KERNEL_VIDEO_PACKED, 1637 2); 1638 tmp.priv = frame; 1639 1640 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1641 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1642 kgem_submit(&sna->kgem); 1643 assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); 1644 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1645 } 1646 1647 gen6_align_vertex(sna, &tmp); 1648 gen6_emit_video_state(sna, &tmp); 1649 1650 /* Set up the offset for translating from the given region (in screen 1651 * coordinates) to the backing pixmap. 1652 */ 1653#ifdef COMPOSITE 1654 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1655 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1656#else 1657 pix_xoff = 0; 1658 pix_yoff = 0; 1659#endif 1660 1661 src_scale_x = (float)src_width / dst_width / frame->width; 1662 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1663 1664 src_scale_y = (float)src_height / dst_height / frame->height; 1665 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1666 1667 box = region_rects(dstRegion); 1668 nbox = region_num_rects(dstRegion); 1669 while (nbox--) { 1670 BoxRec r; 1671 1672 r.x1 = box->x1 + pix_xoff; 1673 r.x2 = box->x2 + pix_xoff; 1674 r.y1 = box->y1 + pix_yoff; 1675 r.y2 = box->y2 + pix_yoff; 1676 1677 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); 1678 1679 OUT_VERTEX(r.x2, r.y2); 1680 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1681 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1682 1683 OUT_VERTEX(r.x1, r.y2); 1684 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1685 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1686 1687 OUT_VERTEX(r.x1, r.y1); 1688 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1689 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1690 1691 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1692 sna_damage_add_box(&priv->gpu_damage, &r); 1693 sna_damage_subtract_box(&priv->cpu_damage, &r); 1694 } 1695 box++; 1696 } 1697 1698 gen4_vertex_flush(sna); 1699 return true; 1700} 1701 1702static int 1703gen6_composite_picture(struct sna *sna, 1704 PicturePtr picture, 1705 struct sna_composite_channel *channel, 1706 int x, int y, 1707 int w, int h, 1708 int dst_x, int dst_y, 1709 bool precise) 1710{ 1711 PixmapPtr pixmap; 1712 uint32_t color; 1713 int16_t dx, dy; 1714 1715 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n", 1716 __FUNCTION__, x, y, w, h, dst_x, dst_y, precise)); 1717 1718 channel->is_solid = false; 1719 channel->card_format = -1; 1720 1721 if (sna_picture_is_solid(picture, &color)) 1722 return gen4_channel_init_solid(sna, channel, color); 1723 1724 if (picture->pDrawable == NULL) { 1725 int ret; 1726 1727 if (picture->pSourcePict->type == SourcePictTypeLinear) 1728 return gen4_channel_init_linear(sna, picture, channel, 1729 x, y, 1730 w, h, 1731 dst_x, dst_y); 1732 1733 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1734 ret = -1; 1735 if (!precise) 1736 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1737 x, y, w, h, dst_x, dst_y); 1738 if (ret == -1) 1739 ret = sna_render_picture_fixup(sna, picture, channel, 1740 x, y, w, h, dst_x, dst_y); 1741 return ret; 1742 } 1743 1744 if (picture->alphaMap) { 1745 DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); 1746 return sna_render_picture_fixup(sna, picture, channel, 1747 x, y, w, h, dst_x, dst_y); 1748 } 1749 1750 if (!gen6_check_repeat(picture)) 1751 return sna_render_picture_fixup(sna, picture, channel, 1752 x, y, w, h, dst_x, dst_y); 1753 1754 if (!gen6_check_filter(picture)) 1755 return sna_render_picture_fixup(sna, picture, channel, 1756 x, y, w, h, dst_x, dst_y); 1757 1758 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1759 channel->filter = picture->filter; 1760 1761 pixmap = get_drawable_pixmap(picture->pDrawable); 1762 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1763 1764 x += dx + picture->pDrawable->x; 1765 y += dy + picture->pDrawable->y; 1766 1767 channel->is_affine = sna_transform_is_affine(picture->transform); 1768 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1769 DBG(("%s: integer translation (%d, %d), removing\n", 1770 __FUNCTION__, dx, dy)); 1771 x += dx; 1772 y += dy; 1773 channel->transform = NULL; 1774 channel->filter = PictFilterNearest; 1775 1776 if (channel->repeat && 1777 (x >= 0 && 1778 y >= 0 && 1779 x + w < pixmap->drawable.width && 1780 y + h < pixmap->drawable.height)) { 1781 struct sna_pixmap *priv = sna_pixmap(pixmap); 1782 if (priv && priv->clear) { 1783 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1784 return gen4_channel_init_solid(sna, channel, priv->clear_color); 1785 } 1786 } 1787 } else 1788 channel->transform = picture->transform; 1789 1790 channel->pict_format = picture->format; 1791 channel->card_format = gen6_get_card_format(picture->format); 1792 if (channel->card_format == (unsigned)-1) 1793 return sna_render_picture_convert(sna, picture, channel, pixmap, 1794 x, y, w, h, dst_x, dst_y, 1795 false); 1796 1797 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1798 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1799 pixmap->drawable.width, pixmap->drawable.height)); 1800 return sna_render_picture_extract(sna, picture, channel, 1801 x, y, w, h, dst_x, dst_y); 1802 } 1803 1804 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 1805 __FUNCTION__, 1806 channel->repeat, channel->filter, 1807 channel->transform != NULL, channel->is_affine, 1808 channel->pict_format)); 1809 if (channel->transform) { 1810#define f2d(x) (((double)(x))/65536.) 1811 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n", 1812 __FUNCTION__, 1813 f2d(channel->transform->matrix[0][0]), 1814 f2d(channel->transform->matrix[0][1]), 1815 f2d(channel->transform->matrix[0][2]), 1816 f2d(channel->transform->matrix[1][0]), 1817 f2d(channel->transform->matrix[1][1]), 1818 f2d(channel->transform->matrix[1][2]), 1819 f2d(channel->transform->matrix[2][0]), 1820 f2d(channel->transform->matrix[2][1]), 1821 f2d(channel->transform->matrix[2][2]), 1822 channel->transform->matrix[0][0], 1823 channel->transform->matrix[0][1], 1824 channel->transform->matrix[0][2], 1825 channel->transform->matrix[1][0], 1826 channel->transform->matrix[1][1], 1827 channel->transform->matrix[1][2], 1828 channel->transform->matrix[2][0], 1829 channel->transform->matrix[2][1], 1830 channel->transform->matrix[2][2])); 1831#undef f2d 1832 } 1833 1834 return sna_render_pixmap_bo(sna, channel, pixmap, 1835 x, y, w, h, dst_x, dst_y); 1836} 1837 1838inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) 1839{ 1840 channel->repeat = gen6_repeat(channel->repeat); 1841 channel->filter = gen6_filter(channel->filter); 1842 if (channel->card_format == (unsigned)-1) 1843 channel->card_format = gen6_get_card_format(channel->pict_format); 1844 assert(channel->card_format != (unsigned)-1); 1845} 1846 1847static void gen6_render_composite_done(struct sna *sna, 1848 const struct sna_composite_op *op) 1849{ 1850 DBG(("%s\n", __FUNCTION__)); 1851 1852 assert(!sna->render.active); 1853 if (sna->render.vertex_offset) { 1854 gen4_vertex_flush(sna); 1855 gen6_magic_ca_pass(sna, op); 1856 } 1857 1858 if (op->mask.bo) 1859 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1860 if (op->src.bo) 1861 kgem_bo_destroy(&sna->kgem, op->src.bo); 1862 1863 sna_render_composite_redirect_done(sna, op); 1864} 1865 1866inline static bool 1867gen6_composite_set_target(struct sna *sna, 1868 struct sna_composite_op *op, 1869 PicturePtr dst, 1870 int x, int y, int w, int h, 1871 bool partial) 1872{ 1873 BoxRec box; 1874 unsigned int hint; 1875 1876 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1877 1878 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1879 op->dst.format = dst->format; 1880 op->dst.width = op->dst.pixmap->drawable.width; 1881 op->dst.height = op->dst.pixmap->drawable.height; 1882 1883 if (w && h) { 1884 box.x1 = x; 1885 box.y1 = y; 1886 box.x2 = x + w; 1887 box.y2 = y + h; 1888 } else 1889 sna_render_picture_extents(dst, &box); 1890 1891 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1892 if (!partial) { 1893 hint |= IGNORE_DAMAGE; 1894 if (w == op->dst.width && h == op->dst.height) 1895 hint |= REPLACES; 1896 } 1897 1898 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1899 if (op->dst.bo == NULL) 1900 return false; 1901 1902 if (hint & REPLACES) { 1903 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1904 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1905 } 1906 1907 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1908 &op->dst.x, &op->dst.y); 1909 1910 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1911 __FUNCTION__, 1912 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1913 op->dst.width, op->dst.height, 1914 op->dst.bo->pitch, 1915 op->dst.x, op->dst.y, 1916 op->damage ? *op->damage : (void *)-1)); 1917 1918 assert(op->dst.bo->proxy == NULL); 1919 1920 if (too_large(op->dst.width, op->dst.height) && 1921 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1922 return false; 1923 1924 return true; 1925} 1926 1927static bool 1928try_blt(struct sna *sna, 1929 PicturePtr dst, PicturePtr src, 1930 int width, int height) 1931{ 1932 struct kgem_bo *bo; 1933 1934 if (sna->kgem.mode == KGEM_BLT) { 1935 DBG(("%s: already performing BLT\n", __FUNCTION__)); 1936 return true; 1937 } 1938 1939 if (too_large(width, height)) { 1940 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 1941 __FUNCTION__, width, height)); 1942 return true; 1943 } 1944 1945 bo = __sna_drawable_peek_bo(dst->pDrawable); 1946 if (bo == NULL) 1947 return true; 1948 if (bo->rq) 1949 return RQ_IS_BLT(bo->rq); 1950 1951 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 1952 return true; 1953 1954 if (src->pDrawable) { 1955 bo = __sna_drawable_peek_bo(src->pDrawable); 1956 if (bo == NULL) 1957 return true; 1958 1959 if (prefer_blt_bo(sna, bo)) 1960 return true; 1961 } 1962 1963 if (sna->kgem.ring == KGEM_BLT) { 1964 DBG(("%s: already performing BLT\n", __FUNCTION__)); 1965 return true; 1966 } 1967 1968 return false; 1969} 1970 1971static bool 1972check_gradient(PicturePtr picture, bool precise) 1973{ 1974 if (picture->pDrawable) 1975 return false; 1976 1977 switch (picture->pSourcePict->type) { 1978 case SourcePictTypeSolidFill: 1979 case SourcePictTypeLinear: 1980 return false; 1981 default: 1982 return precise; 1983 } 1984} 1985 1986static bool 1987has_alphamap(PicturePtr p) 1988{ 1989 return p->alphaMap != NULL; 1990} 1991 1992static bool 1993need_upload(PicturePtr p) 1994{ 1995 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1996} 1997 1998static bool 1999source_is_busy(PixmapPtr pixmap) 2000{ 2001 struct sna_pixmap *priv = sna_pixmap(pixmap); 2002 if (priv == NULL || priv->clear) 2003 return false; 2004 2005 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2006 return true; 2007 2008 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2009 return true; 2010 2011 return priv->gpu_damage && !priv->cpu_damage; 2012} 2013 2014static bool 2015source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2016{ 2017 if (sna_picture_is_solid(p, NULL)) 2018 return false; 2019 2020 if (p->pSourcePict) 2021 return check_gradient(p, precise); 2022 2023 if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) 2024 return true; 2025 2026 if (pixmap && source_is_busy(pixmap)) 2027 return false; 2028 2029 return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p); 2030} 2031 2032static bool 2033gen6_composite_fallback(struct sna *sna, 2034 PicturePtr src, 2035 PicturePtr mask, 2036 PicturePtr dst) 2037{ 2038 PixmapPtr src_pixmap; 2039 PixmapPtr mask_pixmap; 2040 PixmapPtr dst_pixmap; 2041 bool src_fallback, mask_fallback; 2042 2043 if (!gen6_check_dst_format(dst->format)) { 2044 DBG(("%s: unknown destination format: %d\n", 2045 __FUNCTION__, dst->format)); 2046 return true; 2047 } 2048 2049 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2050 2051 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2052 src_fallback = source_fallback(src, src_pixmap, 2053 dst->polyMode == PolyModePrecise); 2054 2055 if (mask) { 2056 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2057 mask_fallback = source_fallback(mask, mask_pixmap, 2058 dst->polyMode == PolyModePrecise); 2059 } else { 2060 mask_pixmap = NULL; 2061 mask_fallback = false; 2062 } 2063 2064 /* If we are using the destination as a source and need to 2065 * readback in order to upload the source, do it all 2066 * on the cpu. 2067 */ 2068 if (src_pixmap == dst_pixmap && src_fallback) { 2069 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2070 return true; 2071 } 2072 if (mask_pixmap == dst_pixmap && mask_fallback) { 2073 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2074 return true; 2075 } 2076 2077 /* If anything is on the GPU, push everything out to the GPU */ 2078 if (dst_use_gpu(dst_pixmap)) { 2079 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2080 __FUNCTION__)); 2081 return false; 2082 } 2083 2084 if (src_pixmap && !src_fallback) { 2085 DBG(("%s: src is already on the GPU, try to use GPU\n", 2086 __FUNCTION__)); 2087 return false; 2088 } 2089 if (mask_pixmap && !mask_fallback) { 2090 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2091 __FUNCTION__)); 2092 return false; 2093 } 2094 2095 /* However if the dst is not on the GPU and we need to 2096 * render one of the sources using the CPU, we may 2097 * as well do the entire operation in place onthe CPU. 2098 */ 2099 if (src_fallback) { 2100 DBG(("%s: dst is on the CPU and src will fallback\n", 2101 __FUNCTION__)); 2102 return true; 2103 } 2104 2105 if (mask && mask_fallback) { 2106 DBG(("%s: dst is on the CPU and mask will fallback\n", 2107 __FUNCTION__)); 2108 return true; 2109 } 2110 2111 if (too_large(dst_pixmap->drawable.width, 2112 dst_pixmap->drawable.height) && 2113 dst_is_cpu(dst_pixmap)) { 2114 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2115 return true; 2116 } 2117 2118 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2119 __FUNCTION__)); 2120 return dst_use_cpu(dst_pixmap); 2121} 2122 2123static int 2124reuse_source(struct sna *sna, 2125 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2126 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2127{ 2128 uint32_t color; 2129 2130 if (src_x != msk_x || src_y != msk_y) 2131 return false; 2132 2133 if (src == mask) { 2134 DBG(("%s: mask is source\n", __FUNCTION__)); 2135 *mc = *sc; 2136 mc->bo = kgem_bo_reference(mc->bo); 2137 return true; 2138 } 2139 2140 if (sna_picture_is_solid(mask, &color)) 2141 return gen4_channel_init_solid(sna, mc, color); 2142 2143 if (sc->is_solid) 2144 return false; 2145 2146 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2147 return false; 2148 2149 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2150 2151 if (!sna_transform_equal(src->transform, mask->transform)) 2152 return false; 2153 2154 if (!sna_picture_alphamap_equal(src, mask)) 2155 return false; 2156 2157 if (!gen6_check_repeat(mask)) 2158 return false; 2159 2160 if (!gen6_check_filter(mask)) 2161 return false; 2162 2163 if (!gen6_check_format(mask->format)) 2164 return false; 2165 2166 DBG(("%s: reusing source channel for mask with a twist\n", 2167 __FUNCTION__)); 2168 2169 *mc = *sc; 2170 mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2171 mc->filter = gen6_filter(mask->filter); 2172 mc->pict_format = mask->format; 2173 mc->card_format = gen6_get_card_format(mask->format); 2174 mc->bo = kgem_bo_reference(mc->bo); 2175 return true; 2176} 2177 2178static bool 2179gen6_render_composite(struct sna *sna, 2180 uint8_t op, 2181 PicturePtr src, 2182 PicturePtr mask, 2183 PicturePtr dst, 2184 int16_t src_x, int16_t src_y, 2185 int16_t msk_x, int16_t msk_y, 2186 int16_t dst_x, int16_t dst_y, 2187 int16_t width, int16_t height, 2188 unsigned flags, 2189 struct sna_composite_op *tmp) 2190{ 2191 if (op >= ARRAY_SIZE(gen6_blend_op)) 2192 return false; 2193 2194 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 2195 width, height, sna->kgem.ring)); 2196 2197 if (mask == NULL && 2198 try_blt(sna, dst, src, width, height) && 2199 sna_blt_composite(sna, op, 2200 src, dst, 2201 src_x, src_y, 2202 dst_x, dst_y, 2203 width, height, 2204 flags, tmp)) 2205 return true; 2206 2207 if (gen6_composite_fallback(sna, src, mask, dst)) 2208 goto fallback; 2209 2210 if (need_tiling(sna, width, height)) 2211 return sna_tiling_composite(op, src, mask, dst, 2212 src_x, src_y, 2213 msk_x, msk_y, 2214 dst_x, dst_y, 2215 width, height, 2216 tmp); 2217 2218 if (op == PictOpClear && src == sna->clear) 2219 op = PictOpSrc; 2220 tmp->op = op; 2221 if (!gen6_composite_set_target(sna, tmp, dst, 2222 dst_x, dst_y, width, height, 2223 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2224 goto fallback; 2225 2226 switch (gen6_composite_picture(sna, src, &tmp->src, 2227 src_x, src_y, 2228 width, height, 2229 dst_x, dst_y, 2230 dst->polyMode == PolyModePrecise)) { 2231 case -1: 2232 goto cleanup_dst; 2233 case 0: 2234 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2235 goto cleanup_dst; 2236 /* fall through to fixup */ 2237 case 1: 2238 /* Did we just switch rings to prepare the source? */ 2239 if (mask == NULL && 2240 prefer_blt_composite(sna, tmp) && 2241 sna_blt_composite__convert(sna, 2242 dst_x, dst_y, width, height, 2243 tmp)) 2244 return true; 2245 2246 gen6_composite_channel_convert(&tmp->src); 2247 break; 2248 } 2249 2250 tmp->is_affine = tmp->src.is_affine; 2251 tmp->has_component_alpha = false; 2252 tmp->need_magic_ca_pass = false; 2253 2254 tmp->mask.bo = NULL; 2255 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2256 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2257 2258 if (mask) { 2259 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2260 tmp->has_component_alpha = true; 2261 2262 /* Check if it's component alpha that relies on a source alpha and on 2263 * the source value. We can only get one of those into the single 2264 * source value that we get to blend with. 2265 */ 2266 if (gen6_blend_op[op].src_alpha && 2267 (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { 2268 if (op != PictOpOver) 2269 goto cleanup_src; 2270 2271 tmp->need_magic_ca_pass = true; 2272 tmp->op = PictOpOutReverse; 2273 } 2274 } 2275 2276 if (!reuse_source(sna, 2277 src, &tmp->src, src_x, src_y, 2278 mask, &tmp->mask, msk_x, msk_y)) { 2279 switch (gen6_composite_picture(sna, mask, &tmp->mask, 2280 msk_x, msk_y, 2281 width, height, 2282 dst_x, dst_y, 2283 dst->polyMode == PolyModePrecise)) { 2284 case -1: 2285 goto cleanup_src; 2286 case 0: 2287 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2288 goto cleanup_src; 2289 /* fall through to fixup */ 2290 case 1: 2291 gen6_composite_channel_convert(&tmp->mask); 2292 break; 2293 } 2294 } 2295 2296 tmp->is_affine &= tmp->mask.is_affine; 2297 } 2298 2299 tmp->u.gen6.flags = 2300 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2301 tmp->src.repeat, 2302 tmp->mask.filter, 2303 tmp->mask.repeat), 2304 gen6_get_blend(tmp->op, 2305 tmp->has_component_alpha, 2306 tmp->dst.format), 2307 gen6_choose_composite_kernel(tmp->op, 2308 tmp->mask.bo != NULL, 2309 tmp->has_component_alpha, 2310 tmp->is_affine), 2311 gen4_choose_composite_emitter(sna, tmp)); 2312 2313 tmp->blt = gen6_render_composite_blt; 2314 tmp->box = gen6_render_composite_box; 2315 tmp->boxes = gen6_render_composite_boxes__blt; 2316 if (tmp->emit_boxes) { 2317 tmp->boxes = gen6_render_composite_boxes; 2318 tmp->thread_boxes = gen6_render_composite_boxes__thread; 2319 } 2320 tmp->done = gen6_render_composite_done; 2321 2322 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2323 if (!kgem_check_bo(&sna->kgem, 2324 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2325 NULL)) { 2326 kgem_submit(&sna->kgem); 2327 if (!kgem_check_bo(&sna->kgem, 2328 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2329 NULL)) 2330 goto cleanup_mask; 2331 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2332 } 2333 2334 gen6_align_vertex(sna, tmp); 2335 gen6_emit_composite_state(sna, tmp); 2336 return true; 2337 2338cleanup_mask: 2339 if (tmp->mask.bo) { 2340 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2341 tmp->mask.bo = NULL; 2342 } 2343cleanup_src: 2344 if (tmp->src.bo) { 2345 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2346 tmp->src.bo = NULL; 2347 } 2348cleanup_dst: 2349 if (tmp->redirect.real_bo) { 2350 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2351 tmp->redirect.real_bo = NULL; 2352 } 2353fallback: 2354 return (mask == NULL && 2355 sna_blt_composite(sna, op, 2356 src, dst, 2357 src_x, src_y, 2358 dst_x, dst_y, 2359 width, height, 2360 flags | COMPOSITE_FALLBACK, tmp)); 2361} 2362 2363#if !NO_COMPOSITE_SPANS 2364fastcall static void 2365gen6_render_composite_spans_box(struct sna *sna, 2366 const struct sna_composite_spans_op *op, 2367 const BoxRec *box, float opacity) 2368{ 2369 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2370 __FUNCTION__, 2371 op->base.src.offset[0], op->base.src.offset[1], 2372 opacity, 2373 op->base.dst.x, op->base.dst.y, 2374 box->x1, box->y1, 2375 box->x2 - box->x1, 2376 box->y2 - box->y1)); 2377 2378 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); 2379 op->prim_emit(sna, op, box, opacity); 2380} 2381 2382static void 2383gen6_render_composite_spans_boxes(struct sna *sna, 2384 const struct sna_composite_spans_op *op, 2385 const BoxRec *box, int nbox, 2386 float opacity) 2387{ 2388 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2389 __FUNCTION__, nbox, 2390 op->base.src.offset[0], op->base.src.offset[1], 2391 opacity, 2392 op->base.dst.x, op->base.dst.y)); 2393 2394 do { 2395 int nbox_this_time; 2396 2397 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2398 gen6_emit_composite_state); 2399 nbox -= nbox_this_time; 2400 2401 do { 2402 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2403 box->x1, box->y1, 2404 box->x2 - box->x1, 2405 box->y2 - box->y1)); 2406 2407 op->prim_emit(sna, op, box++, opacity); 2408 } while (--nbox_this_time); 2409 } while (nbox); 2410} 2411 2412fastcall static void 2413gen6_render_composite_spans_boxes__thread(struct sna *sna, 2414 const struct sna_composite_spans_op *op, 2415 const struct sna_opacity_box *box, 2416 int nbox) 2417{ 2418 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2419 __FUNCTION__, nbox, 2420 op->base.src.offset[0], op->base.src.offset[1], 2421 op->base.dst.x, op->base.dst.y)); 2422 2423 sna_vertex_lock(&sna->render); 2424 do { 2425 int nbox_this_time; 2426 float *v; 2427 2428 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2429 gen6_emit_composite_state); 2430 assert(nbox_this_time); 2431 nbox -= nbox_this_time; 2432 2433 v = sna->render.vertices + sna->render.vertex_used; 2434 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2435 2436 sna_vertex_acquire__locked(&sna->render); 2437 sna_vertex_unlock(&sna->render); 2438 2439 op->emit_boxes(op, box, nbox_this_time, v); 2440 box += nbox_this_time; 2441 2442 sna_vertex_lock(&sna->render); 2443 sna_vertex_release__locked(&sna->render); 2444 } while (nbox); 2445 sna_vertex_unlock(&sna->render); 2446} 2447 2448fastcall static void 2449gen6_render_composite_spans_done(struct sna *sna, 2450 const struct sna_composite_spans_op *op) 2451{ 2452 DBG(("%s()\n", __FUNCTION__)); 2453 assert(!sna->render.active); 2454 2455 if (sna->render.vertex_offset) 2456 gen4_vertex_flush(sna); 2457 2458 if (op->base.src.bo) 2459 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2460 2461 sna_render_composite_redirect_done(sna, &op->base); 2462} 2463 2464static bool 2465gen6_check_composite_spans(struct sna *sna, 2466 uint8_t op, PicturePtr src, PicturePtr dst, 2467 int16_t width, int16_t height, 2468 unsigned flags) 2469{ 2470 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2471 __FUNCTION__, op, width, height, flags)); 2472 2473 if (op >= ARRAY_SIZE(gen6_blend_op)) 2474 return false; 2475 2476 if (gen6_composite_fallback(sna, src, NULL, dst)) { 2477 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2478 return false; 2479 } 2480 2481 if (need_tiling(sna, width, height) && 2482 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2483 DBG(("%s: fallback, tiled operation not on GPU\n", 2484 __FUNCTION__)); 2485 return false; 2486 } 2487 2488 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2489 struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); 2490 assert(priv); 2491 2492 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2493 return true; 2494 2495 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2496 return false; 2497 2498 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2499 } 2500 2501 return true; 2502} 2503 2504static bool 2505gen6_render_composite_spans(struct sna *sna, 2506 uint8_t op, 2507 PicturePtr src, 2508 PicturePtr dst, 2509 int16_t src_x, int16_t src_y, 2510 int16_t dst_x, int16_t dst_y, 2511 int16_t width, int16_t height, 2512 unsigned flags, 2513 struct sna_composite_spans_op *tmp) 2514{ 2515 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2516 width, height, flags, sna->kgem.ring)); 2517 2518 assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); 2519 2520 if (need_tiling(sna, width, height)) { 2521 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2522 __FUNCTION__, width, height)); 2523 return sna_tiling_composite_spans(op, src, dst, 2524 src_x, src_y, dst_x, dst_y, 2525 width, height, flags, tmp); 2526 } 2527 2528 tmp->base.op = op; 2529 if (!gen6_composite_set_target(sna, &tmp->base, dst, 2530 dst_x, dst_y, width, height, true)) 2531 return false; 2532 2533 switch (gen6_composite_picture(sna, src, &tmp->base.src, 2534 src_x, src_y, 2535 width, height, 2536 dst_x, dst_y, 2537 dst->polyMode == PolyModePrecise)) { 2538 case -1: 2539 goto cleanup_dst; 2540 case 0: 2541 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2542 goto cleanup_dst; 2543 /* fall through to fixup */ 2544 case 1: 2545 gen6_composite_channel_convert(&tmp->base.src); 2546 break; 2547 } 2548 tmp->base.mask.bo = NULL; 2549 2550 tmp->base.is_affine = tmp->base.src.is_affine; 2551 tmp->base.need_magic_ca_pass = false; 2552 2553 tmp->base.u.gen6.flags = 2554 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2555 tmp->base.src.repeat, 2556 SAMPLER_FILTER_NEAREST, 2557 SAMPLER_EXTEND_PAD), 2558 gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), 2559 GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2560 gen4_choose_spans_emitter(sna, tmp)); 2561 2562 tmp->box = gen6_render_composite_spans_box; 2563 tmp->boxes = gen6_render_composite_spans_boxes; 2564 if (tmp->emit_boxes) 2565 tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; 2566 tmp->done = gen6_render_composite_spans_done; 2567 2568 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2569 if (!kgem_check_bo(&sna->kgem, 2570 tmp->base.dst.bo, tmp->base.src.bo, 2571 NULL)) { 2572 kgem_submit(&sna->kgem); 2573 if (!kgem_check_bo(&sna->kgem, 2574 tmp->base.dst.bo, tmp->base.src.bo, 2575 NULL)) 2576 goto cleanup_src; 2577 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2578 } 2579 2580 gen6_align_vertex(sna, &tmp->base); 2581 gen6_emit_composite_state(sna, &tmp->base); 2582 return true; 2583 2584cleanup_src: 2585 if (tmp->base.src.bo) 2586 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2587cleanup_dst: 2588 if (tmp->base.redirect.real_bo) 2589 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2590 return false; 2591} 2592#endif 2593 2594static void 2595gen6_emit_copy_state(struct sna *sna, 2596 const struct sna_composite_op *op) 2597{ 2598 uint32_t *binding_table; 2599 uint16_t offset; 2600 bool dirty; 2601 2602 dirty = gen6_get_batch(sna, op); 2603 2604 binding_table = gen6_composite_get_binding_table(sna, &offset); 2605 2606 binding_table[0] = 2607 gen6_bind_bo(sna, 2608 op->dst.bo, op->dst.width, op->dst.height, 2609 gen6_get_dest_format(op->dst.format), 2610 true); 2611 binding_table[1] = 2612 gen6_bind_bo(sna, 2613 op->src.bo, op->src.width, op->src.height, 2614 op->src.card_format, 2615 false); 2616 2617 if (sna->kgem.surface == offset && 2618 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 2619 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 2620 offset = sna->render_state.gen6.surface_table; 2621 } 2622 2623 gen6_emit_state(sna, op, offset | dirty); 2624} 2625 2626static inline bool prefer_blt_copy(struct sna *sna, 2627 struct kgem_bo *src_bo, 2628 struct kgem_bo *dst_bo, 2629 unsigned flags) 2630{ 2631 if (flags & COPY_SYNC) 2632 return false; 2633 2634 if (PREFER_RENDER) 2635 return PREFER_RENDER > 0; 2636 2637 if (sna->kgem.ring == KGEM_BLT) 2638 return true; 2639 2640 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2641 return true; 2642 2643 if (untiled_tlb_miss(src_bo) || 2644 untiled_tlb_miss(dst_bo)) 2645 return true; 2646 2647 if (force_blt_ring(sna)) 2648 return true; 2649 2650 if (kgem_bo_is_render(dst_bo) || 2651 kgem_bo_is_render(src_bo)) 2652 return false; 2653 2654 if (prefer_render_ring(sna, dst_bo)) 2655 return false; 2656 2657 if (!prefer_blt_ring(sna, dst_bo, flags)) 2658 return false; 2659 2660 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2661} 2662 2663static bool 2664gen6_render_copy_boxes(struct sna *sna, uint8_t alu, 2665 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2666 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2667 const BoxRec *box, int n, unsigned flags) 2668{ 2669 struct sna_composite_op tmp; 2670 BoxRec extents; 2671 2672 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", 2673 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2674 src_bo == dst_bo, 2675 overlaps(sna, 2676 src_bo, src_dx, src_dy, 2677 dst_bo, dst_dx, dst_dy, 2678 box, n, flags, &extents))); 2679 2680 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2681 sna_blt_compare_depth(src, dst) && 2682 sna_blt_copy_boxes(sna, alu, 2683 src_bo, src_dx, src_dy, 2684 dst_bo, dst_dx, dst_dy, 2685 dst->bitsPerPixel, 2686 box, n)) 2687 return true; 2688 2689 if (!(alu == GXcopy || alu == GXclear)) { 2690fallback_blt: 2691 if (!sna_blt_compare_depth(src, dst)) 2692 return false; 2693 2694 return sna_blt_copy_boxes_fallback(sna, alu, 2695 src, src_bo, src_dx, src_dy, 2696 dst, dst_bo, dst_dx, dst_dy, 2697 box, n); 2698 } 2699 2700 if (overlaps(sna, 2701 src_bo, src_dx, src_dy, 2702 dst_bo, dst_dx, dst_dy, 2703 box, n, flags, 2704 &extents)) { 2705 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2706 2707 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2708 sna_blt_copy_boxes(sna, alu, 2709 src_bo, src_dx, src_dy, 2710 dst_bo, dst_dx, dst_dy, 2711 dst->bitsPerPixel, 2712 box, n)) 2713 return true; 2714 2715 if (big) 2716 goto fallback_blt; 2717 2718 assert(src_bo == dst_bo); 2719 assert(src->depth == dst->depth); 2720 assert(src->width == dst->width); 2721 assert(src->height == dst->height); 2722 return sna_render_copy_boxes__overlap(sna, alu, 2723 src, src_bo, 2724 src_dx, src_dy, 2725 dst_dx, dst_dy, 2726 box, n, &extents); 2727 } 2728 2729 if (dst->depth == src->depth) { 2730 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2731 tmp.src.pict_format = tmp.dst.format; 2732 } else { 2733 tmp.dst.format = sna_format_for_depth(dst->depth); 2734 tmp.src.pict_format = sna_format_for_depth(src->depth); 2735 } 2736 if (!gen6_check_format(tmp.src.pict_format)) 2737 goto fallback_blt; 2738 2739 tmp.dst.pixmap = (PixmapPtr)dst; 2740 tmp.dst.width = dst->width; 2741 tmp.dst.height = dst->height; 2742 tmp.dst.bo = dst_bo; 2743 tmp.dst.x = tmp.dst.y = 0; 2744 tmp.damage = NULL; 2745 2746 sna_render_composite_redirect_init(&tmp); 2747 if (too_large(tmp.dst.width, tmp.dst.height)) { 2748 int i; 2749 2750 extents = box[0]; 2751 for (i = 1; i < n; i++) { 2752 if (box[i].x1 < extents.x1) 2753 extents.x1 = box[i].x1; 2754 if (box[i].y1 < extents.y1) 2755 extents.y1 = box[i].y1; 2756 2757 if (box[i].x2 > extents.x2) 2758 extents.x2 = box[i].x2; 2759 if (box[i].y2 > extents.y2) 2760 extents.y2 = box[i].y2; 2761 } 2762 2763 if (!sna_render_composite_redirect(sna, &tmp, 2764 extents.x1 + dst_dx, 2765 extents.y1 + dst_dy, 2766 extents.x2 - extents.x1, 2767 extents.y2 - extents.y1, 2768 n > 1)) 2769 goto fallback_tiled; 2770 } 2771 2772 tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); 2773 if (too_large(src->width, src->height)) { 2774 int i; 2775 2776 extents = box[0]; 2777 for (i = 1; i < n; i++) { 2778 if (box[i].x1 < extents.x1) 2779 extents.x1 = box[i].x1; 2780 if (box[i].y1 < extents.y1) 2781 extents.y1 = box[i].y1; 2782 2783 if (box[i].x2 > extents.x2) 2784 extents.x2 = box[i].x2; 2785 if (box[i].y2 > extents.y2) 2786 extents.y2 = box[i].y2; 2787 } 2788 2789 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2790 extents.x1 + src_dx, 2791 extents.y1 + src_dy, 2792 extents.x2 - extents.x1, 2793 extents.y2 - extents.y1)) { 2794 DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); 2795 goto fallback_tiled_dst; 2796 } 2797 } else { 2798 tmp.src.bo = src_bo; 2799 tmp.src.width = src->width; 2800 tmp.src.height = src->height; 2801 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2802 } 2803 2804 tmp.mask.bo = NULL; 2805 2806 tmp.floats_per_vertex = 2; 2807 tmp.floats_per_rect = 6; 2808 tmp.need_magic_ca_pass = 0; 2809 2810 tmp.u.gen6.flags = COPY_FLAGS(alu); 2811 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2812 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); 2813 assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); 2814 2815 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2816 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2817 kgem_submit(&sna->kgem); 2818 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2819 DBG(("%s: too large for a single operation\n", 2820 __FUNCTION__)); 2821 if (tmp.src.bo != src_bo) 2822 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2823 if (tmp.redirect.real_bo) 2824 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2825 goto fallback_blt; 2826 } 2827 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2828 } 2829 2830 src_dx += tmp.src.offset[0]; 2831 src_dy += tmp.src.offset[1]; 2832 2833 dst_dx += tmp.dst.x; 2834 dst_dy += tmp.dst.y; 2835 2836 tmp.dst.x = tmp.dst.y = 0; 2837 2838 gen6_align_vertex(sna, &tmp); 2839 gen6_emit_copy_state(sna, &tmp); 2840 2841 do { 2842 int16_t *v; 2843 int n_this_time; 2844 2845 n_this_time = gen6_get_rectangles(sna, &tmp, n, 2846 gen6_emit_copy_state); 2847 n -= n_this_time; 2848 2849 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2850 sna->render.vertex_used += 6 * n_this_time; 2851 assert(sna->render.vertex_used <= sna->render.vertex_size); 2852 do { 2853 2854 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2855 box->x1 + src_dx, box->y1 + src_dy, 2856 box->x1 + dst_dx, box->y1 + dst_dy, 2857 box->x2 - box->x1, box->y2 - box->y1)); 2858 v[0] = box->x2 + dst_dx; 2859 v[2] = box->x2 + src_dx; 2860 v[1] = v[5] = box->y2 + dst_dy; 2861 v[3] = v[7] = box->y2 + src_dy; 2862 v[8] = v[4] = box->x1 + dst_dx; 2863 v[10] = v[6] = box->x1 + src_dx; 2864 v[9] = box->y1 + dst_dy; 2865 v[11] = box->y1 + src_dy; 2866 v += 12; box++; 2867 } while (--n_this_time); 2868 } while (n); 2869 2870 gen4_vertex_flush(sna); 2871 sna_render_composite_redirect_done(sna, &tmp); 2872 if (tmp.src.bo != src_bo) 2873 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2874 return true; 2875 2876fallback_tiled_dst: 2877 if (tmp.redirect.real_bo) 2878 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2879fallback_tiled: 2880 if (sna_blt_compare_depth(src, dst) && 2881 sna_blt_copy_boxes(sna, alu, 2882 src_bo, src_dx, src_dy, 2883 dst_bo, dst_dx, dst_dy, 2884 dst->bitsPerPixel, 2885 box, n)) 2886 return true; 2887 2888 return sna_tiling_copy_boxes(sna, alu, 2889 src, src_bo, src_dx, src_dy, 2890 dst, dst_bo, dst_dx, dst_dy, 2891 box, n); 2892} 2893 2894static void 2895gen6_render_copy_blt(struct sna *sna, 2896 const struct sna_copy_op *op, 2897 int16_t sx, int16_t sy, 2898 int16_t w, int16_t h, 2899 int16_t dx, int16_t dy) 2900{ 2901 int16_t *v; 2902 2903 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); 2904 2905 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2906 sna->render.vertex_used += 6; 2907 assert(sna->render.vertex_used <= sna->render.vertex_size); 2908 2909 v[0] = dx+w; v[1] = dy+h; 2910 v[2] = sx+w; v[3] = sy+h; 2911 v[4] = dx; v[5] = dy+h; 2912 v[6] = sx; v[7] = sy+h; 2913 v[8] = dx; v[9] = dy; 2914 v[10] = sx; v[11] = sy; 2915} 2916 2917static void 2918gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2919{ 2920 DBG(("%s()\n", __FUNCTION__)); 2921 2922 assert(!sna->render.active); 2923 if (sna->render.vertex_offset) 2924 gen4_vertex_flush(sna); 2925} 2926 2927static bool 2928gen6_render_copy(struct sna *sna, uint8_t alu, 2929 PixmapPtr src, struct kgem_bo *src_bo, 2930 PixmapPtr dst, struct kgem_bo *dst_bo, 2931 struct sna_copy_op *op) 2932{ 2933 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2934 __FUNCTION__, alu, 2935 src->drawable.width, src->drawable.height, 2936 dst->drawable.width, dst->drawable.height)); 2937 2938 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2939 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2940 sna_blt_copy(sna, alu, 2941 src_bo, dst_bo, 2942 dst->drawable.bitsPerPixel, 2943 op)) 2944 return true; 2945 2946 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2947 too_large(src->drawable.width, src->drawable.height) || 2948 too_large(dst->drawable.width, dst->drawable.height)) { 2949fallback: 2950 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2951 return false; 2952 2953 return sna_blt_copy(sna, alu, src_bo, dst_bo, 2954 dst->drawable.bitsPerPixel, 2955 op); 2956 } 2957 2958 if (dst->drawable.depth == src->drawable.depth) { 2959 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2960 op->base.src.pict_format = op->base.dst.format; 2961 } else { 2962 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2963 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 2964 } 2965 if (!gen6_check_format(op->base.src.pict_format)) 2966 goto fallback; 2967 2968 op->base.dst.pixmap = dst; 2969 op->base.dst.width = dst->drawable.width; 2970 op->base.dst.height = dst->drawable.height; 2971 op->base.dst.bo = dst_bo; 2972 2973 op->base.src.bo = src_bo; 2974 op->base.src.card_format = 2975 gen6_get_card_format(op->base.src.pict_format); 2976 op->base.src.width = src->drawable.width; 2977 op->base.src.height = src->drawable.height; 2978 2979 op->base.mask.bo = NULL; 2980 2981 op->base.floats_per_vertex = 2; 2982 op->base.floats_per_rect = 6; 2983 2984 op->base.u.gen6.flags = COPY_FLAGS(alu); 2985 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2986 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); 2987 assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); 2988 2989 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 2990 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2991 kgem_submit(&sna->kgem); 2992 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 2993 goto fallback; 2994 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2995 } 2996 2997 gen6_align_vertex(sna, &op->base); 2998 gen6_emit_copy_state(sna, &op->base); 2999 3000 op->blt = gen6_render_copy_blt; 3001 op->done = gen6_render_copy_done; 3002 return true; 3003} 3004 3005static void 3006gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3007{ 3008 uint32_t *binding_table; 3009 uint16_t offset; 3010 bool dirty; 3011 3012 dirty = gen6_get_batch(sna, op); 3013 3014 binding_table = gen6_composite_get_binding_table(sna, &offset); 3015 3016 binding_table[0] = 3017 gen6_bind_bo(sna, 3018 op->dst.bo, op->dst.width, op->dst.height, 3019 gen6_get_dest_format(op->dst.format), 3020 true); 3021 binding_table[1] = 3022 gen6_bind_bo(sna, 3023 op->src.bo, 1, 1, 3024 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 3025 false); 3026 3027 if (sna->kgem.surface == offset && 3028 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 3029 sna->kgem.surface += 3030 sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); 3031 offset = sna->render_state.gen6.surface_table; 3032 } 3033 3034 gen6_emit_state(sna, op, offset | dirty); 3035} 3036 3037static bool 3038gen6_render_fill_boxes(struct sna *sna, 3039 CARD8 op, 3040 PictFormat format, 3041 const xRenderColor *color, 3042 const DrawableRec *dst, struct kgem_bo *dst_bo, 3043 const BoxRec *box, int n) 3044{ 3045 struct sna_composite_op tmp; 3046 uint32_t pixel; 3047 3048 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3049 __FUNCTION__, op, 3050 color->red, color->green, color->blue, color->alpha, (int)format)); 3051 3052 if (op >= ARRAY_SIZE(gen6_blend_op)) { 3053 DBG(("%s: fallback due to unhandled blend op: %d\n", 3054 __FUNCTION__, op)); 3055 return false; 3056 } 3057 3058 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3059 !gen6_check_dst_format(format)) { 3060 uint8_t alu = GXinvalid; 3061 3062 if (op <= PictOpSrc) { 3063 pixel = 0; 3064 if (op == PictOpClear) 3065 alu = GXclear; 3066 else if (sna_get_pixel_from_rgba(&pixel, 3067 color->red, 3068 color->green, 3069 color->blue, 3070 color->alpha, 3071 format)) 3072 alu = GXcopy; 3073 } 3074 3075 if (alu != GXinvalid && 3076 sna_blt_fill_boxes(sna, alu, 3077 dst_bo, dst->bitsPerPixel, 3078 pixel, box, n)) 3079 return true; 3080 3081 if (!gen6_check_dst_format(format)) 3082 return false; 3083 } 3084 3085 if (op == PictOpClear) { 3086 pixel = 0; 3087 op = PictOpSrc; 3088 } else if (!sna_get_pixel_from_rgba(&pixel, 3089 color->red, 3090 color->green, 3091 color->blue, 3092 color->alpha, 3093 PICT_a8r8g8b8)) 3094 return false; 3095 3096 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3097 __FUNCTION__, pixel, n, 3098 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3099 3100 tmp.dst.pixmap = (PixmapPtr)dst; 3101 tmp.dst.width = dst->width; 3102 tmp.dst.height = dst->height; 3103 tmp.dst.format = format; 3104 tmp.dst.bo = dst_bo; 3105 tmp.dst.x = tmp.dst.y = 0; 3106 tmp.damage = NULL; 3107 3108 sna_render_composite_redirect_init(&tmp); 3109 if (too_large(dst->width, dst->height)) { 3110 BoxRec extents; 3111 3112 boxes_extents(box, n, &extents); 3113 if (!sna_render_composite_redirect(sna, &tmp, 3114 extents.x1, extents.y1, 3115 extents.x2 - extents.x1, 3116 extents.y2 - extents.y1, 3117 n > 1)) 3118 return sna_tiling_fill_boxes(sna, op, format, color, 3119 dst, dst_bo, box, n); 3120 } 3121 3122 tmp.src.bo = sna_render_get_solid(sna, pixel); 3123 tmp.mask.bo = NULL; 3124 3125 tmp.floats_per_vertex = 2; 3126 tmp.floats_per_rect = 6; 3127 tmp.need_magic_ca_pass = false; 3128 3129 tmp.u.gen6.flags = FILL_FLAGS(op, format); 3130 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3131 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3132 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3133 3134 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3135 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3136 kgem_submit(&sna->kgem); 3137 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3138 } 3139 3140 gen6_align_vertex(sna, &tmp); 3141 gen6_emit_fill_state(sna, &tmp); 3142 3143 do { 3144 int n_this_time; 3145 int16_t *v; 3146 3147 n_this_time = gen6_get_rectangles(sna, &tmp, n, 3148 gen6_emit_fill_state); 3149 n -= n_this_time; 3150 3151 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3152 sna->render.vertex_used += 6 * n_this_time; 3153 assert(sna->render.vertex_used <= sna->render.vertex_size); 3154 do { 3155 DBG((" (%d, %d), (%d, %d)\n", 3156 box->x1, box->y1, box->x2, box->y2)); 3157 3158 v[0] = box->x2; 3159 v[5] = v[1] = box->y2; 3160 v[8] = v[4] = box->x1; 3161 v[9] = box->y1; 3162 v[2] = v[3] = v[7] = 1; 3163 v[6] = v[10] = v[11] = 0; 3164 v += 12; box++; 3165 } while (--n_this_time); 3166 } while (n); 3167 3168 gen4_vertex_flush(sna); 3169 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3170 sna_render_composite_redirect_done(sna, &tmp); 3171 return true; 3172} 3173 3174static void 3175gen6_render_op_fill_blt(struct sna *sna, 3176 const struct sna_fill_op *op, 3177 int16_t x, int16_t y, int16_t w, int16_t h) 3178{ 3179 int16_t *v; 3180 3181 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3182 3183 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3184 3185 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3186 sna->render.vertex_used += 6; 3187 assert(sna->render.vertex_used <= sna->render.vertex_size); 3188 3189 v[0] = x+w; 3190 v[4] = v[8] = x; 3191 v[1] = v[5] = y+h; 3192 v[9] = y; 3193 3194 v[2] = v[3] = v[7] = 1; 3195 v[6] = v[10] = v[11] = 0; 3196} 3197 3198fastcall static void 3199gen6_render_op_fill_box(struct sna *sna, 3200 const struct sna_fill_op *op, 3201 const BoxRec *box) 3202{ 3203 int16_t *v; 3204 3205 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3206 box->x1, box->y1, box->x2, box->y2)); 3207 3208 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3209 3210 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3211 sna->render.vertex_used += 6; 3212 assert(sna->render.vertex_used <= sna->render.vertex_size); 3213 3214 v[0] = box->x2; 3215 v[8] = v[4] = box->x1; 3216 v[5] = v[1] = box->y2; 3217 v[9] = box->y1; 3218 3219 v[7] = v[2] = v[3] = 1; 3220 v[6] = v[10] = v[11] = 0; 3221} 3222 3223fastcall static void 3224gen6_render_op_fill_boxes(struct sna *sna, 3225 const struct sna_fill_op *op, 3226 const BoxRec *box, 3227 int nbox) 3228{ 3229 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3230 box->x1, box->y1, box->x2, box->y2, nbox)); 3231 3232 do { 3233 int nbox_this_time; 3234 int16_t *v; 3235 3236 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 3237 gen6_emit_fill_state); 3238 nbox -= nbox_this_time; 3239 3240 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3241 sna->render.vertex_used += 6 * nbox_this_time; 3242 assert(sna->render.vertex_used <= sna->render.vertex_size); 3243 3244 do { 3245 v[0] = box->x2; 3246 v[8] = v[4] = box->x1; 3247 v[5] = v[1] = box->y2; 3248 v[9] = box->y1; 3249 v[7] = v[2] = v[3] = 1; 3250 v[6] = v[10] = v[11] = 0; 3251 box++; v += 12; 3252 } while (--nbox_this_time); 3253 } while (nbox); 3254} 3255 3256static void 3257gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) 3258{ 3259 DBG(("%s()\n", __FUNCTION__)); 3260 3261 assert(!sna->render.active); 3262 if (sna->render.vertex_offset) 3263 gen4_vertex_flush(sna); 3264 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3265} 3266 3267static bool 3268gen6_render_fill(struct sna *sna, uint8_t alu, 3269 PixmapPtr dst, struct kgem_bo *dst_bo, 3270 uint32_t color, unsigned flags, 3271 struct sna_fill_op *op) 3272{ 3273 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3274 3275 if (prefer_blt_fill(sna, dst_bo, flags) && 3276 sna_blt_fill(sna, alu, 3277 dst_bo, dst->drawable.bitsPerPixel, 3278 color, 3279 op)) 3280 return true; 3281 3282 if (!(alu == GXcopy || alu == GXclear) || 3283 too_large(dst->drawable.width, dst->drawable.height)) 3284 return sna_blt_fill(sna, alu, 3285 dst_bo, dst->drawable.bitsPerPixel, 3286 color, 3287 op); 3288 3289 if (alu == GXclear) 3290 color = 0; 3291 3292 op->base.dst.pixmap = dst; 3293 op->base.dst.width = dst->drawable.width; 3294 op->base.dst.height = dst->drawable.height; 3295 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3296 op->base.dst.bo = dst_bo; 3297 op->base.dst.x = op->base.dst.y = 0; 3298 3299 op->base.src.bo = 3300 sna_render_get_solid(sna, 3301 sna_rgba_for_color(color, 3302 dst->drawable.depth)); 3303 op->base.mask.bo = NULL; 3304 3305 op->base.need_magic_ca_pass = false; 3306 op->base.floats_per_vertex = 2; 3307 op->base.floats_per_rect = 6; 3308 3309 op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; 3310 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3311 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); 3312 assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); 3313 3314 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3315 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3316 kgem_submit(&sna->kgem); 3317 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3318 } 3319 3320 gen6_align_vertex(sna, &op->base); 3321 gen6_emit_fill_state(sna, &op->base); 3322 3323 op->blt = gen6_render_op_fill_blt; 3324 op->box = gen6_render_op_fill_box; 3325 op->boxes = gen6_render_op_fill_boxes; 3326 op->points = NULL; 3327 op->done = gen6_render_op_fill_done; 3328 return true; 3329} 3330 3331static bool 3332gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3333 uint32_t color, 3334 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3335 uint8_t alu) 3336{ 3337 BoxRec box; 3338 3339 box.x1 = x1; 3340 box.y1 = y1; 3341 box.x2 = x2; 3342 box.y2 = y2; 3343 3344 return sna_blt_fill_boxes(sna, alu, 3345 bo, dst->drawable.bitsPerPixel, 3346 color, &box, 1); 3347} 3348 3349static bool 3350gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3351 uint32_t color, 3352 int16_t x1, int16_t y1, 3353 int16_t x2, int16_t y2, 3354 uint8_t alu) 3355{ 3356 struct sna_composite_op tmp; 3357 int16_t *v; 3358 3359 /* Prefer to use the BLT if already engaged */ 3360 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3361 gen6_render_fill_one_try_blt(sna, dst, bo, color, 3362 x1, y1, x2, y2, alu)) 3363 return true; 3364 3365 /* Must use the BLT if we can't RENDER... */ 3366 if (!(alu == GXcopy || alu == GXclear) || 3367 too_large(dst->drawable.width, dst->drawable.height)) 3368 return gen6_render_fill_one_try_blt(sna, dst, bo, color, 3369 x1, y1, x2, y2, alu); 3370 3371 if (alu == GXclear) 3372 color = 0; 3373 3374 tmp.dst.pixmap = dst; 3375 tmp.dst.width = dst->drawable.width; 3376 tmp.dst.height = dst->drawable.height; 3377 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3378 tmp.dst.bo = bo; 3379 tmp.dst.x = tmp.dst.y = 0; 3380 3381 tmp.src.bo = 3382 sna_render_get_solid(sna, 3383 sna_rgba_for_color(color, 3384 dst->drawable.depth)); 3385 tmp.mask.bo = NULL; 3386 3387 tmp.floats_per_vertex = 2; 3388 tmp.floats_per_rect = 6; 3389 tmp.need_magic_ca_pass = false; 3390 3391 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3392 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3393 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3394 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3395 3396 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3397 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3398 kgem_submit(&sna->kgem); 3399 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3400 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3401 return false; 3402 } 3403 } 3404 3405 gen6_align_vertex(sna, &tmp); 3406 gen6_emit_fill_state(sna, &tmp); 3407 3408 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3409 3410 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3411 3412 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3413 sna->render.vertex_used += 6; 3414 assert(sna->render.vertex_used <= sna->render.vertex_size); 3415 3416 v[0] = x2; 3417 v[8] = v[4] = x1; 3418 v[5] = v[1] = y2; 3419 v[9] = y1; 3420 v[7] = v[2] = v[3] = 1; 3421 v[6] = v[10] = v[11] = 0; 3422 3423 gen4_vertex_flush(sna); 3424 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3425 3426 return true; 3427} 3428 3429static bool 3430gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3431{ 3432 BoxRec box; 3433 3434 box.x1 = 0; 3435 box.y1 = 0; 3436 box.x2 = dst->drawable.width; 3437 box.y2 = dst->drawable.height; 3438 3439 return sna_blt_fill_boxes(sna, GXclear, 3440 bo, dst->drawable.bitsPerPixel, 3441 0, &box, 1); 3442} 3443 3444static bool 3445gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3446{ 3447 struct sna_composite_op tmp; 3448 int16_t *v; 3449 3450 DBG(("%s: %dx%d\n", 3451 __FUNCTION__, 3452 dst->drawable.width, 3453 dst->drawable.height)); 3454 3455 /* Prefer to use the BLT if, and only if, already engaged */ 3456 if (sna->kgem.ring == KGEM_BLT && 3457 gen6_render_clear_try_blt(sna, dst, bo)) 3458 return true; 3459 3460 /* Must use the BLT if we can't RENDER... */ 3461 if (too_large(dst->drawable.width, dst->drawable.height)) 3462 return gen6_render_clear_try_blt(sna, dst, bo); 3463 3464 tmp.dst.pixmap = dst; 3465 tmp.dst.width = dst->drawable.width; 3466 tmp.dst.height = dst->drawable.height; 3467 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3468 tmp.dst.bo = bo; 3469 tmp.dst.x = tmp.dst.y = 0; 3470 3471 tmp.src.bo = sna_render_get_solid(sna, 0); 3472 tmp.mask.bo = NULL; 3473 3474 tmp.floats_per_vertex = 2; 3475 tmp.floats_per_rect = 6; 3476 tmp.need_magic_ca_pass = false; 3477 3478 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3479 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3480 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3481 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3482 3483 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3484 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3485 kgem_submit(&sna->kgem); 3486 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3487 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3488 return false; 3489 } 3490 } 3491 3492 gen6_align_vertex(sna, &tmp); 3493 gen6_emit_fill_state(sna, &tmp); 3494 3495 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3496 3497 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3498 sna->render.vertex_used += 6; 3499 assert(sna->render.vertex_used <= sna->render.vertex_size); 3500 3501 v[0] = dst->drawable.width; 3502 v[5] = v[1] = dst->drawable.height; 3503 v[8] = v[4] = 0; 3504 v[9] = 0; 3505 3506 v[7] = v[2] = v[3] = 1; 3507 v[6] = v[10] = v[11] = 0; 3508 3509 gen4_vertex_flush(sna); 3510 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3511 3512 return true; 3513} 3514 3515static void gen6_render_reset(struct sna *sna) 3516{ 3517 sna->render_state.gen6.needs_invariant = true; 3518 sna->render_state.gen6.first_state_packet = true; 3519 sna->render_state.gen6.ve_id = 3 << 2; 3520 sna->render_state.gen6.last_primitive = -1; 3521 3522 sna->render_state.gen6.num_sf_outputs = 0; 3523 sna->render_state.gen6.samplers = -1; 3524 sna->render_state.gen6.blend = -1; 3525 sna->render_state.gen6.kernel = -1; 3526 sna->render_state.gen6.drawrect_offset = -1; 3527 sna->render_state.gen6.drawrect_limit = -1; 3528 sna->render_state.gen6.surface_table = -1; 3529 3530 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3531 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3532 discard_vbo(sna); 3533 } 3534 3535 sna->render.vertex_offset = 0; 3536 sna->render.nvertex_reloc = 0; 3537 sna->render.vb_id = 0; 3538} 3539 3540static void gen6_render_fini(struct sna *sna) 3541{ 3542 kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); 3543} 3544 3545static bool is_gt2(struct sna *sna, int devid) 3546{ 3547 return devid & 0x30; 3548} 3549 3550static bool is_mobile(struct sna *sna, int devid) 3551{ 3552 return (devid & 0xf) == 0x6; 3553} 3554 3555static bool gen6_render_setup(struct sna *sna, int devid) 3556{ 3557 struct gen6_render_state *state = &sna->render_state.gen6; 3558 struct sna_static_stream general; 3559 struct gen6_sampler_state *ss; 3560 int i, j, k, l, m; 3561 3562 state->info = >1_info; 3563 if (is_gt2(sna, devid)) 3564 state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ 3565 state->gt = state->info->gt; 3566 3567 sna_static_stream_init(&general); 3568 3569 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3570 * dumps, you know it points to zero. 3571 */ 3572 null_create(&general); 3573 scratch_create(&general); 3574 3575 for (m = 0; m < GEN6_KERNEL_COUNT; m++) { 3576 if (wm_kernels[m].size) { 3577 state->wm_kernel[m][1] = 3578 sna_static_stream_add(&general, 3579 wm_kernels[m].data, 3580 wm_kernels[m].size, 3581 64); 3582 } else { 3583 if (USE_8_PIXEL_DISPATCH) { 3584 state->wm_kernel[m][0] = 3585 sna_static_stream_compile_wm(sna, &general, 3586 wm_kernels[m].data, 8); 3587 } 3588 3589 if (USE_16_PIXEL_DISPATCH) { 3590 state->wm_kernel[m][1] = 3591 sna_static_stream_compile_wm(sna, &general, 3592 wm_kernels[m].data, 16); 3593 } 3594 3595 if (USE_32_PIXEL_DISPATCH) { 3596 state->wm_kernel[m][2] = 3597 sna_static_stream_compile_wm(sna, &general, 3598 wm_kernels[m].data, 32); 3599 } 3600 } 3601 if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { 3602 state->wm_kernel[m][1] = 3603 sna_static_stream_compile_wm(sna, &general, 3604 wm_kernels[m].data, 16); 3605 } 3606 } 3607 3608 ss = sna_static_stream_map(&general, 3609 2 * sizeof(*ss) * 3610 (2 + 3611 FILTER_COUNT * EXTEND_COUNT * 3612 FILTER_COUNT * EXTEND_COUNT), 3613 32); 3614 state->wm_state = sna_static_stream_offsetof(&general, ss); 3615 sampler_copy_init(ss); ss += 2; 3616 sampler_fill_init(ss); ss += 2; 3617 for (i = 0; i < FILTER_COUNT; i++) { 3618 for (j = 0; j < EXTEND_COUNT; j++) { 3619 for (k = 0; k < FILTER_COUNT; k++) { 3620 for (l = 0; l < EXTEND_COUNT; l++) { 3621 sampler_state_init(ss++, i, j); 3622 sampler_state_init(ss++, k, l); 3623 } 3624 } 3625 } 3626 } 3627 3628 state->cc_blend = gen6_composite_create_blend_state(&general); 3629 3630 state->general_bo = sna_static_stream_fini(sna, &general); 3631 return state->general_bo != NULL; 3632} 3633 3634const char *gen6_render_init(struct sna *sna, const char *backend) 3635{ 3636 int devid = intel_get_device_id(sna->scrn); 3637 3638 if (!gen6_render_setup(sna, devid)) 3639 return backend; 3640 3641 sna->kgem.context_switch = gen6_render_context_switch; 3642 sna->kgem.retire = gen6_render_retire; 3643 sna->kgem.expire = gen4_render_expire; 3644 3645#if !NO_COMPOSITE 3646 sna->render.composite = gen6_render_composite; 3647 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3648#endif 3649 3650#if !NO_COMPOSITE_SPANS 3651 sna->render.check_composite_spans = gen6_check_composite_spans; 3652 sna->render.composite_spans = gen6_render_composite_spans; 3653 if (is_mobile(sna, devid)) 3654 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3655#endif 3656 sna->render.video = gen6_render_video; 3657 3658#if !NO_COPY_BOXES 3659 sna->render.copy_boxes = gen6_render_copy_boxes; 3660#endif 3661#if !NO_COPY 3662 sna->render.copy = gen6_render_copy; 3663#endif 3664 3665#if !NO_FILL_BOXES 3666 sna->render.fill_boxes = gen6_render_fill_boxes; 3667#endif 3668#if !NO_FILL 3669 sna->render.fill = gen6_render_fill; 3670#endif 3671#if !NO_FILL_ONE 3672 sna->render.fill_one = gen6_render_fill_one; 3673#endif 3674#if !NO_FILL_CLEAR 3675 sna->render.clear = gen6_render_clear; 3676#endif 3677 3678 sna->render.flush = gen4_render_flush; 3679 sna->render.reset = gen6_render_reset; 3680 sna->render.fini = gen6_render_fini; 3681 3682 sna->render.max_3d_size = GEN6_MAX_SIZE; 3683 sna->render.max_3d_pitch = 1 << 18; 3684 return sna->render_state.gen6.info->name; 3685} 3686