gen6_render.c revision 03b705cf
1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen6_render.h" 45#include "gen4_source.h" 46#include "gen4_vertex.h" 47 48#define NO_COMPOSITE 0 49#define NO_COMPOSITE_SPANS 0 50#define NO_COPY 0 51#define NO_COPY_BOXES 0 52#define NO_FILL 0 53#define NO_FILL_BOXES 0 54#define NO_FILL_ONE 0 55#define NO_FILL_CLEAR 0 56 57#define NO_RING_SWITCH 0 58#define PREFER_RENDER 0 59 60#define USE_8_PIXEL_DISPATCH 1 61#define USE_16_PIXEL_DISPATCH 1 62#define USE_32_PIXEL_DISPATCH 0 63 64#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 65#error "Must select at least 8, 16 or 32 pixel dispatch" 66#endif 67 68#define GEN6_MAX_SIZE 8192 69 70struct gt_info { 71 const char *name; 72 int max_vs_threads; 73 int max_gs_threads; 74 int max_wm_threads; 75 struct { 76 int size; 77 int max_vs_entries; 78 int max_gs_entries; 79 } urb; 80}; 81 82static const struct gt_info gt1_info = { 83 .name = "Sandybridge (gen6, gt1)", 84 .max_vs_threads = 24, 85 .max_gs_threads = 21, 86 .max_wm_threads = 40, 87 .urb = { 32, 256, 256 }, 88}; 89 90static const struct gt_info gt2_info = { 91 .name = "Sandybridge (gen6, gt2)", 92 .max_vs_threads = 60, 93 .max_gs_threads = 60, 94 .max_wm_threads = 80, 95 .urb = { 64, 256, 256 }, 96}; 97 98static const uint32_t ps_kernel_packed[][4] = { 99#include "exa_wm_src_affine.g6b" 100#include "exa_wm_src_sample_argb.g6b" 101#include "exa_wm_yuv_rgb.g6b" 102#include "exa_wm_write.g6b" 103}; 104 105static const uint32_t ps_kernel_planar[][4] = { 106#include "exa_wm_src_affine.g6b" 107#include "exa_wm_src_sample_planar.g6b" 108#include "exa_wm_yuv_rgb.g6b" 109#include "exa_wm_write.g6b" 110}; 111 112#define NOKERNEL(kernel_enum, func, ns) \ 113 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} 114#define KERNEL(kernel_enum, kernel, ns) \ 115 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} 116 117static const struct wm_kernel_info { 118 const char *name; 119 const void *data; 120 unsigned int size; 121 unsigned int num_surfaces; 122} wm_kernels[] = { 123 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 124 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 125 126 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 127 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 128 129 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 130 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 131 132 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 133 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 134 135 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 136 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 137 138 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 139 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 140}; 141#undef KERNEL 142 143static const struct blendinfo { 144 bool src_alpha; 145 uint32_t src_blend; 146 uint32_t dst_blend; 147} gen6_blend_op[] = { 148 /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, 149 /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, 150 /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, 151 /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 152 /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, 153 /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 154 /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, 155 /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 156 /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 157 /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 158 /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, 159 /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 160 /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, 161}; 162 163/** 164 * Highest-valued BLENDFACTOR used in gen6_blend_op. 165 * 166 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, 167 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 168 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 169 */ 170#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) 171 172#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 173 174#define BLEND_OFFSET(s, d) \ 175 (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) 176 177#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) 178#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) 179 180#define SAMPLER_OFFSET(sf, se, mf, me) \ 181 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) 182 183#define VERTEX_2s2s 0 184 185#define COPY_SAMPLER 0 186#define COPY_VERTEX VERTEX_2s2s 187#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) 188 189#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) 190#define FILL_VERTEX VERTEX_2s2s 191#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 192#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 193 194#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) 195#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) 196#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) 197#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) 198#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 199 200#define OUT_BATCH(v) batch_emit(sna, v) 201#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 202#define OUT_VERTEX_F(v) vertex_emit(sna, v) 203 204static inline bool too_large(int width, int height) 205{ 206 return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; 207} 208 209static uint32_t gen6_get_blend(int op, 210 bool has_component_alpha, 211 uint32_t dst_format) 212{ 213 uint32_t src, dst; 214 215 src = gen6_blend_op[op].src_blend; 216 dst = gen6_blend_op[op].dst_blend; 217 218 /* If there's no dst alpha channel, adjust the blend op so that 219 * we'll treat it always as 1. 220 */ 221 if (PICT_FORMAT_A(dst_format) == 0) { 222 if (src == GEN6_BLENDFACTOR_DST_ALPHA) 223 src = GEN6_BLENDFACTOR_ONE; 224 else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) 225 src = GEN6_BLENDFACTOR_ZERO; 226 } 227 228 /* If the source alpha is being used, then we should only be in a 229 * case where the source blend factor is 0, and the source blend 230 * value is the mask channels multiplied by the source picture's alpha. 231 */ 232 if (has_component_alpha && gen6_blend_op[op].src_alpha) { 233 if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) 234 dst = GEN6_BLENDFACTOR_SRC_COLOR; 235 else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) 236 dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; 237 } 238 239 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 240 op, dst_format, PICT_FORMAT_A(dst_format), 241 src, dst, (int)BLEND_OFFSET(src, dst))); 242 return BLEND_OFFSET(src, dst); 243} 244 245static uint32_t gen6_get_card_format(PictFormat format) 246{ 247 switch (format) { 248 default: 249 return -1; 250 case PICT_a8r8g8b8: 251 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 252 case PICT_x8r8g8b8: 253 return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; 254 case PICT_a8b8g8r8: 255 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 256 case PICT_x8b8g8r8: 257 return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; 258 case PICT_a2r10g10b10: 259 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 260 case PICT_x2r10g10b10: 261 return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; 262 case PICT_r8g8b8: 263 return GEN6_SURFACEFORMAT_R8G8B8_UNORM; 264 case PICT_r5g6b5: 265 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 266 case PICT_a1r5g5b5: 267 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 268 case PICT_a8: 269 return GEN6_SURFACEFORMAT_A8_UNORM; 270 case PICT_a4r4g4b4: 271 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 272 } 273} 274 275static uint32_t gen6_get_dest_format(PictFormat format) 276{ 277 switch (format) { 278 default: 279 return -1; 280 case PICT_a8r8g8b8: 281 case PICT_x8r8g8b8: 282 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 283 case PICT_a8b8g8r8: 284 case PICT_x8b8g8r8: 285 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 286 case PICT_a2r10g10b10: 287 case PICT_x2r10g10b10: 288 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 289 case PICT_r5g6b5: 290 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 291 case PICT_x1r5g5b5: 292 case PICT_a1r5g5b5: 293 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 294 case PICT_a8: 295 return GEN6_SURFACEFORMAT_A8_UNORM; 296 case PICT_a4r4g4b4: 297 case PICT_x4r4g4b4: 298 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 299 } 300} 301 302static bool gen6_check_dst_format(PictFormat format) 303{ 304 if (gen6_get_dest_format(format) != -1) 305 return true; 306 307 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 308 return false; 309} 310 311static bool gen6_check_format(uint32_t format) 312{ 313 if (gen6_get_card_format(format) != -1) 314 return true; 315 316 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 317 return false; 318} 319 320static uint32_t gen6_filter(uint32_t filter) 321{ 322 switch (filter) { 323 default: 324 assert(0); 325 case PictFilterNearest: 326 return SAMPLER_FILTER_NEAREST; 327 case PictFilterBilinear: 328 return SAMPLER_FILTER_BILINEAR; 329 } 330} 331 332static uint32_t gen6_check_filter(PicturePtr picture) 333{ 334 switch (picture->filter) { 335 case PictFilterNearest: 336 case PictFilterBilinear: 337 return true; 338 default: 339 return false; 340 } 341} 342 343static uint32_t gen6_repeat(uint32_t repeat) 344{ 345 switch (repeat) { 346 default: 347 assert(0); 348 case RepeatNone: 349 return SAMPLER_EXTEND_NONE; 350 case RepeatNormal: 351 return SAMPLER_EXTEND_REPEAT; 352 case RepeatPad: 353 return SAMPLER_EXTEND_PAD; 354 case RepeatReflect: 355 return SAMPLER_EXTEND_REFLECT; 356 } 357} 358 359static bool gen6_check_repeat(PicturePtr picture) 360{ 361 if (!picture->repeat) 362 return true; 363 364 switch (picture->repeatType) { 365 case RepeatNone: 366 case RepeatNormal: 367 case RepeatPad: 368 case RepeatReflect: 369 return true; 370 default: 371 return false; 372 } 373} 374 375static int 376gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 377{ 378 int base; 379 380 if (has_mask) { 381 if (is_ca) { 382 if (gen6_blend_op[op].src_alpha) 383 base = GEN6_WM_KERNEL_MASKSA; 384 else 385 base = GEN6_WM_KERNEL_MASKCA; 386 } else 387 base = GEN6_WM_KERNEL_MASK; 388 } else 389 base = GEN6_WM_KERNEL_NOMASK; 390 391 return base + !is_affine; 392} 393 394static void 395gen6_emit_urb(struct sna *sna) 396{ 397 OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); 398 OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | 399 (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ 400 OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | 401 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ 402} 403 404static void 405gen6_emit_state_base_address(struct sna *sna) 406{ 407 OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); 408 OUT_BATCH(0); /* general */ 409 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 410 sna->kgem.nbatch, 411 NULL, 412 I915_GEM_DOMAIN_INSTRUCTION << 16, 413 BASE_ADDRESS_MODIFY)); 414 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 415 sna->kgem.nbatch, 416 sna->render_state.gen6.general_bo, 417 I915_GEM_DOMAIN_INSTRUCTION << 16, 418 BASE_ADDRESS_MODIFY)); 419 OUT_BATCH(0); /* indirect */ 420 OUT_BATCH(kgem_add_reloc(&sna->kgem, 421 sna->kgem.nbatch, 422 sna->render_state.gen6.general_bo, 423 I915_GEM_DOMAIN_INSTRUCTION << 16, 424 BASE_ADDRESS_MODIFY)); 425 426 /* upper bounds, disable */ 427 OUT_BATCH(0); 428 OUT_BATCH(BASE_ADDRESS_MODIFY); 429 OUT_BATCH(0); 430 OUT_BATCH(BASE_ADDRESS_MODIFY); 431} 432 433static void 434gen6_emit_viewports(struct sna *sna) 435{ 436 OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | 437 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | 438 (4 - 2)); 439 OUT_BATCH(0); 440 OUT_BATCH(0); 441 OUT_BATCH(0); 442} 443 444static void 445gen6_emit_vs(struct sna *sna) 446{ 447 /* disable VS constant buffer */ 448 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); 449 OUT_BATCH(0); 450 OUT_BATCH(0); 451 OUT_BATCH(0); 452 OUT_BATCH(0); 453 454 OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); 455 OUT_BATCH(0); /* no VS kernel */ 456 OUT_BATCH(0); 457 OUT_BATCH(0); 458 OUT_BATCH(0); 459 OUT_BATCH(0); /* pass-through */ 460} 461 462static void 463gen6_emit_gs(struct sna *sna) 464{ 465 /* disable GS constant buffer */ 466 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); 467 OUT_BATCH(0); 468 OUT_BATCH(0); 469 OUT_BATCH(0); 470 OUT_BATCH(0); 471 472 OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); 473 OUT_BATCH(0); /* no GS kernel */ 474 OUT_BATCH(0); 475 OUT_BATCH(0); 476 OUT_BATCH(0); 477 OUT_BATCH(0); 478 OUT_BATCH(0); /* pass-through */ 479} 480 481static void 482gen6_emit_clip(struct sna *sna) 483{ 484 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); 485 OUT_BATCH(0); 486 OUT_BATCH(0); /* pass-through */ 487 OUT_BATCH(0); 488} 489 490static void 491gen6_emit_wm_constants(struct sna *sna) 492{ 493 /* disable WM constant buffer */ 494 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 495 OUT_BATCH(0); 496 OUT_BATCH(0); 497 OUT_BATCH(0); 498 OUT_BATCH(0); 499} 500 501static void 502gen6_emit_null_depth_buffer(struct sna *sna) 503{ 504 OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); 505 OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 506 GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 507 OUT_BATCH(0); 508 OUT_BATCH(0); 509 OUT_BATCH(0); 510 OUT_BATCH(0); 511 OUT_BATCH(0); 512 513 OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); 514 OUT_BATCH(0); 515} 516 517static void 518gen6_emit_invariant(struct sna *sna) 519{ 520 OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); 521 522 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); 523 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 524 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 525 OUT_BATCH(0); 526 527 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); 528 OUT_BATCH(1); 529 530 gen6_emit_urb(sna); 531 532 gen6_emit_state_base_address(sna); 533 534 gen6_emit_viewports(sna); 535 gen6_emit_vs(sna); 536 gen6_emit_gs(sna); 537 gen6_emit_clip(sna); 538 gen6_emit_wm_constants(sna); 539 gen6_emit_null_depth_buffer(sna); 540 541 sna->render_state.gen6.needs_invariant = false; 542} 543 544static bool 545gen6_emit_cc(struct sna *sna, int blend) 546{ 547 struct gen6_render_state *render = &sna->render_state.gen6; 548 549 if (render->blend == blend) 550 return blend != NO_BLEND; 551 552 DBG(("%s: blend = %x\n", __FUNCTION__, blend)); 553 554 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); 555 OUT_BATCH((render->cc_blend + blend) | 1); 556 if (render->blend == (unsigned)-1) { 557 OUT_BATCH(1); 558 OUT_BATCH(1); 559 } else { 560 OUT_BATCH(0); 561 OUT_BATCH(0); 562 } 563 564 render->blend = blend; 565 return blend != NO_BLEND; 566} 567 568static void 569gen6_emit_sampler(struct sna *sna, uint32_t state) 570{ 571 if (sna->render_state.gen6.samplers == state) 572 return; 573 574 sna->render_state.gen6.samplers = state; 575 576 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 577 578 OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | 579 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | 580 (4 - 2)); 581 OUT_BATCH(0); /* VS */ 582 OUT_BATCH(0); /* GS */ 583 OUT_BATCH(sna->render_state.gen6.wm_state + state); 584} 585 586static void 587gen6_emit_sf(struct sna *sna, bool has_mask) 588{ 589 int num_sf_outputs = has_mask ? 2 : 1; 590 591 if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) 592 return; 593 594 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 595 __FUNCTION__, num_sf_outputs, 1, 0)); 596 597 sna->render_state.gen6.num_sf_outputs = num_sf_outputs; 598 599 OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); 600 OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | 601 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | 602 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); 603 OUT_BATCH(0); 604 OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); 605 OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ 606 OUT_BATCH(0); 607 OUT_BATCH(0); 608 OUT_BATCH(0); 609 OUT_BATCH(0); 610 OUT_BATCH(0); /* DW9 */ 611 OUT_BATCH(0); 612 OUT_BATCH(0); 613 OUT_BATCH(0); 614 OUT_BATCH(0); 615 OUT_BATCH(0); /* DW14 */ 616 OUT_BATCH(0); 617 OUT_BATCH(0); 618 OUT_BATCH(0); 619 OUT_BATCH(0); 620 OUT_BATCH(0); /* DW19 */ 621} 622 623static void 624gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) 625{ 626 const uint32_t *kernels; 627 628 if (sna->render_state.gen6.kernel == kernel) 629 return; 630 631 sna->render_state.gen6.kernel = kernel; 632 kernels = sna->render_state.gen6.wm_kernel[kernel]; 633 634 DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", 635 __FUNCTION__, 636 wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, 637 kernels[0], kernels[1], kernels[2])); 638 639 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 640 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 641 OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | 642 wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 643 OUT_BATCH(0); /* scratch space */ 644 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | 645 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | 646 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); 647 OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | 648 (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | 649 (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | 650 (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | 651 GEN6_3DSTATE_WM_DISPATCH_ENABLE); 652 OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | 653 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 654 OUT_BATCH(kernels[2]); 655 OUT_BATCH(kernels[1]); 656} 657 658static bool 659gen6_emit_binding_table(struct sna *sna, uint16_t offset) 660{ 661 if (sna->render_state.gen6.surface_table == offset) 662 return false; 663 664 /* Binding table pointers */ 665 OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | 666 GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | 667 (4 - 2)); 668 OUT_BATCH(0); /* vs */ 669 OUT_BATCH(0); /* gs */ 670 /* Only the PS uses the binding table */ 671 OUT_BATCH(offset*4); 672 673 sna->render_state.gen6.surface_table = offset; 674 return true; 675} 676 677static bool 678gen6_emit_drawing_rectangle(struct sna *sna, 679 const struct sna_composite_op *op) 680{ 681 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 682 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 683 684 assert(!too_large(op->dst.x, op->dst.y)); 685 assert(!too_large(op->dst.width, op->dst.height)); 686 687 if (sna->render_state.gen6.drawrect_limit == limit && 688 sna->render_state.gen6.drawrect_offset == offset) 689 return false; 690 691 /* [DevSNB-C+{W/A}] Before any depth stall flush (including those 692 * produced by non-pipelined state commands), software needs to first 693 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 694 * 0. 695 * 696 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 697 * BEFORE the pipe-control with a post-sync op and no write-cache 698 * flushes. 699 */ 700 if (!sna->render_state.gen6.first_state_packet) { 701 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 702 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 703 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 704 OUT_BATCH(0); 705 OUT_BATCH(0); 706 } 707 708 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 709 OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); 710 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 711 sna->render_state.gen6.general_bo, 712 I915_GEM_DOMAIN_INSTRUCTION << 16 | 713 I915_GEM_DOMAIN_INSTRUCTION, 714 64)); 715 OUT_BATCH(0); 716 717 OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 718 OUT_BATCH(0); 719 OUT_BATCH(limit); 720 OUT_BATCH(offset); 721 722 sna->render_state.gen6.drawrect_offset = offset; 723 sna->render_state.gen6.drawrect_limit = limit; 724 return true; 725} 726 727static void 728gen6_emit_vertex_elements(struct sna *sna, 729 const struct sna_composite_op *op) 730{ 731 /* 732 * vertex data in vertex buffer 733 * position: (x, y) 734 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 735 * texture coordinate 1 if (has_mask is true): same as above 736 */ 737 struct gen6_render_state *render = &sna->render_state.gen6; 738 uint32_t src_format, dw; 739 int id = GEN6_VERTEX(op->u.gen6.flags); 740 bool has_mask; 741 742 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 743 744 if (render->ve_id == id) 745 return; 746 render->ve_id = id; 747 748 /* The VUE layout 749 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 750 * dword 4-7: position (x, y, 1.0, 1.0), 751 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 752 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 753 * 754 * dword 4-15 are fetched from vertex buffer 755 */ 756 has_mask = (id >> 2) != 0; 757 OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | 758 ((2 * (3 + has_mask)) + 1 - 2)); 759 760 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 761 GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | 762 0 << VE0_OFFSET_SHIFT); 763 OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 764 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 765 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 766 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); 767 768 /* x,y */ 769 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 770 GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 771 0 << VE0_OFFSET_SHIFT); 772 OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 773 GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 774 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 775 GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); 776 777 /* u0, v0, w0 */ 778 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 779 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 780 switch (id & 3) { 781 default: 782 assert(0); 783 case 0: 784 src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; 785 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 786 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 787 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 788 break; 789 case 1: 790 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 791 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 792 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 793 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 794 break; 795 case 2: 796 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 797 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 798 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 799 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 800 break; 801 case 3: 802 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 803 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 804 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 805 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 806 break; 807 } 808 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 809 src_format << VE0_FORMAT_SHIFT | 810 4 << VE0_OFFSET_SHIFT); 811 OUT_BATCH(dw); 812 813 /* u1, v1, w1 */ 814 if (has_mask) { 815 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 816 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 817 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 818 switch (id >> 2) { 819 case 1: 820 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 821 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 822 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 823 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 824 break; 825 default: 826 assert(0); 827 case 2: 828 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 829 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 830 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 831 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 832 break; 833 case 3: 834 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 835 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 836 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 837 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 838 break; 839 } 840 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 841 src_format << VE0_FORMAT_SHIFT | 842 offset << VE0_OFFSET_SHIFT); 843 OUT_BATCH(dw); 844 } 845} 846 847static void 848gen6_emit_flush(struct sna *sna) 849{ 850 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 851 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | 852 GEN6_PIPE_CONTROL_TC_FLUSH | 853 GEN6_PIPE_CONTROL_CS_STALL); 854 OUT_BATCH(0); 855 OUT_BATCH(0); 856} 857 858static void 859gen6_emit_state(struct sna *sna, 860 const struct sna_composite_op *op, 861 uint16_t wm_binding_table) 862{ 863 bool need_stall = wm_binding_table & 1; 864 865 assert(op->dst.bo->exec); 866 867 if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) 868 need_stall = false; 869 gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); 870 gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); 871 gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); 872 gen6_emit_vertex_elements(sna, op); 873 874 need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); 875 if (gen6_emit_drawing_rectangle(sna, op)) 876 need_stall = false; 877 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 878 gen6_emit_flush(sna); 879 kgem_clear_dirty(&sna->kgem); 880 assert(op->dst.bo->exec); 881 kgem_bo_mark_dirty(op->dst.bo); 882 need_stall = false; 883 } 884 if (need_stall) { 885 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 886 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 887 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 888 OUT_BATCH(0); 889 OUT_BATCH(0); 890 } 891 sna->render_state.gen6.first_state_packet = false; 892} 893 894static bool gen6_magic_ca_pass(struct sna *sna, 895 const struct sna_composite_op *op) 896{ 897 struct gen6_render_state *state = &sna->render_state.gen6; 898 899 if (!op->need_magic_ca_pass) 900 return false; 901 902 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 903 sna->render.vertex_start, sna->render.vertex_index)); 904 905 gen6_emit_flush(sna); 906 907 gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); 908 gen6_emit_wm(sna, 909 gen6_choose_composite_kernel(PictOpAdd, 910 true, true, 911 op->is_affine), 912 true); 913 914 OUT_BATCH(GEN6_3DPRIMITIVE | 915 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 916 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 917 0 << 9 | 918 4); 919 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 920 OUT_BATCH(sna->render.vertex_start); 921 OUT_BATCH(1); /* single instance */ 922 OUT_BATCH(0); /* start instance location */ 923 OUT_BATCH(0); /* index buffer offset, ignored */ 924 925 state->last_primitive = sna->kgem.nbatch; 926 return true; 927} 928 929typedef struct gen6_surface_state_padded { 930 struct gen6_surface_state state; 931 char pad[32 - sizeof(struct gen6_surface_state)]; 932} gen6_surface_state_padded; 933 934static void null_create(struct sna_static_stream *stream) 935{ 936 /* A bunch of zeros useful for legacy border color and depth-stencil */ 937 sna_static_stream_map(stream, 64, 64); 938} 939 940static void scratch_create(struct sna_static_stream *stream) 941{ 942 /* 64 bytes of scratch space for random writes, such as 943 * the pipe-control w/a. 944 */ 945 sna_static_stream_map(stream, 64, 64); 946} 947 948static void 949sampler_state_init(struct gen6_sampler_state *sampler_state, 950 sampler_filter_t filter, 951 sampler_extend_t extend) 952{ 953 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 954 955 /* We use the legacy mode to get the semantics specified by 956 * the Render extension. */ 957 sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; 958 959 switch (filter) { 960 default: 961 case SAMPLER_FILTER_NEAREST: 962 sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; 963 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; 964 break; 965 case SAMPLER_FILTER_BILINEAR: 966 sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; 967 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; 968 break; 969 } 970 971 switch (extend) { 972 default: 973 case SAMPLER_EXTEND_NONE: 974 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 975 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 976 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 977 break; 978 case SAMPLER_EXTEND_REPEAT: 979 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 980 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 981 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 982 break; 983 case SAMPLER_EXTEND_PAD: 984 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 985 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 986 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 987 break; 988 case SAMPLER_EXTEND_REFLECT: 989 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 990 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 991 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 992 break; 993 } 994} 995 996static void 997sampler_copy_init(struct gen6_sampler_state *ss) 998{ 999 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1000 ss->ss3.non_normalized_coord = 1; 1001 1002 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1003} 1004 1005static void 1006sampler_fill_init(struct gen6_sampler_state *ss) 1007{ 1008 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1009 ss->ss3.non_normalized_coord = 1; 1010 1011 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1012} 1013 1014static uint32_t 1015gen6_tiling_bits(uint32_t tiling) 1016{ 1017 switch (tiling) { 1018 default: assert(0); 1019 case I915_TILING_NONE: return 0; 1020 case I915_TILING_X: return GEN6_SURFACE_TILED; 1021 case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; 1022 } 1023} 1024 1025/** 1026 * Sets up the common fields for a surface state buffer for the given 1027 * picture in the given surface state buffer. 1028 */ 1029static int 1030gen6_bind_bo(struct sna *sna, 1031 struct kgem_bo *bo, 1032 uint32_t width, 1033 uint32_t height, 1034 uint32_t format, 1035 bool is_dst) 1036{ 1037 uint32_t *ss; 1038 uint32_t domains; 1039 uint16_t offset; 1040 uint32_t is_scanout = is_dst && bo->scanout; 1041 1042 /* After the first bind, we manage the cache domains within the batch */ 1043 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1044 if (offset) { 1045 DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", 1046 offset, bo->handle, format, 1047 is_dst ? "render" : "sampler")); 1048 if (is_dst) 1049 kgem_bo_mark_dirty(bo); 1050 return offset * sizeof(uint32_t); 1051 } 1052 1053 offset = sna->kgem.surface -= 1054 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1055 ss = sna->kgem.batch + offset; 1056 ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | 1057 GEN6_SURFACE_BLEND_ENABLED | 1058 format << GEN6_SURFACE_FORMAT_SHIFT); 1059 if (is_dst) { 1060 ss[0] |= GEN6_SURFACE_RC_READ_WRITE; 1061 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1062 } else 1063 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1064 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1065 ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | 1066 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); 1067 assert(bo->pitch <= (1 << 18)); 1068 ss[3] = (gen6_tiling_bits(bo->tiling) | 1069 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); 1070 ss[4] = 0; 1071 ss[5] = is_scanout ? 0 : 3 << 16; 1072 1073 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1074 1075 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1076 offset, bo->handle, ss[1], 1077 format, width, height, bo->pitch, bo->tiling, 1078 domains & 0xffff ? "render" : "sampler")); 1079 1080 return offset * sizeof(uint32_t); 1081} 1082 1083static void gen6_emit_vertex_buffer(struct sna *sna, 1084 const struct sna_composite_op *op) 1085{ 1086 int id = GEN6_VERTEX(op->u.gen6.flags); 1087 1088 OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); 1089 OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 1090 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); 1091 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1092 OUT_BATCH(0); 1093 OUT_BATCH(~0); /* max address: disabled */ 1094 OUT_BATCH(0); 1095 1096 sna->render.vb_id |= 1 << id; 1097} 1098 1099static void gen6_emit_primitive(struct sna *sna) 1100{ 1101 if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { 1102 DBG(("%s: continuing previous primitive, start=%d, index=%d\n", 1103 __FUNCTION__, 1104 sna->render.vertex_start, 1105 sna->render.vertex_index)); 1106 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1107 return; 1108 } 1109 1110 OUT_BATCH(GEN6_3DPRIMITIVE | 1111 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1112 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1113 0 << 9 | 1114 4); 1115 sna->render.vertex_offset = sna->kgem.nbatch; 1116 OUT_BATCH(0); /* vertex count, to be filled in later */ 1117 OUT_BATCH(sna->render.vertex_index); 1118 OUT_BATCH(1); /* single instance */ 1119 OUT_BATCH(0); /* start instance location */ 1120 OUT_BATCH(0); /* index buffer offset, ignored */ 1121 sna->render.vertex_start = sna->render.vertex_index; 1122 DBG(("%s: started new primitive: index=%d\n", 1123 __FUNCTION__, sna->render.vertex_start)); 1124 1125 sna->render_state.gen6.last_primitive = sna->kgem.nbatch; 1126} 1127 1128static bool gen6_rectangle_begin(struct sna *sna, 1129 const struct sna_composite_op *op) 1130{ 1131 int id = 1 << GEN6_VERTEX(op->u.gen6.flags); 1132 int ndwords; 1133 1134 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1135 return true; 1136 1137 ndwords = op->need_magic_ca_pass ? 60 : 6; 1138 if ((sna->render.vb_id & id) == 0) 1139 ndwords += 5; 1140 if (!kgem_check_batch(&sna->kgem, ndwords)) 1141 return false; 1142 1143 if ((sna->render.vb_id & id) == 0) 1144 gen6_emit_vertex_buffer(sna, op); 1145 1146 gen6_emit_primitive(sna); 1147 return true; 1148} 1149 1150static int gen6_get_rectangles__flush(struct sna *sna, 1151 const struct sna_composite_op *op) 1152{ 1153 /* Preventing discarding new vbo after lock contention */ 1154 if (sna_vertex_wait__locked(&sna->render)) { 1155 int rem = vertex_space(sna); 1156 if (rem > op->floats_per_rect) 1157 return rem; 1158 } 1159 1160 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) 1161 return 0; 1162 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1163 return 0; 1164 1165 if (sna->render.vertex_offset) { 1166 gen4_vertex_flush(sna); 1167 if (gen6_magic_ca_pass(sna, op)) { 1168 gen6_emit_flush(sna); 1169 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 1170 gen6_emit_wm(sna, 1171 GEN6_KERNEL(op->u.gen6.flags), 1172 GEN6_VERTEX(op->u.gen6.flags) >> 2); 1173 } 1174 } 1175 1176 return gen4_vertex_finish(sna); 1177} 1178 1179inline static int gen6_get_rectangles(struct sna *sna, 1180 const struct sna_composite_op *op, 1181 int want, 1182 void (*emit_state)(struct sna *, const struct sna_composite_op *op)) 1183{ 1184 int rem; 1185 1186 assert(want); 1187 1188start: 1189 rem = vertex_space(sna); 1190 if (unlikely(rem < op->floats_per_rect)) { 1191 DBG(("flushing vbo for %s: %d < %d\n", 1192 __FUNCTION__, rem, op->floats_per_rect)); 1193 rem = gen6_get_rectangles__flush(sna, op); 1194 if (unlikely(rem == 0)) 1195 goto flush; 1196 } 1197 1198 if (unlikely(sna->render.vertex_offset == 0)) { 1199 if (!gen6_rectangle_begin(sna, op)) 1200 goto flush; 1201 else 1202 goto start; 1203 } 1204 1205 assert(rem <= vertex_space(sna)); 1206 assert(op->floats_per_rect <= rem); 1207 if (want > 1 && want * op->floats_per_rect > rem) 1208 want = rem / op->floats_per_rect; 1209 1210 assert(want > 0); 1211 sna->render.vertex_index += 3*want; 1212 return want; 1213 1214flush: 1215 if (sna->render.vertex_offset) { 1216 gen4_vertex_flush(sna); 1217 gen6_magic_ca_pass(sna, op); 1218 } 1219 sna_vertex_wait__locked(&sna->render); 1220 _kgem_submit(&sna->kgem); 1221 emit_state(sna, op); 1222 goto start; 1223} 1224 1225inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, 1226 uint16_t *offset) 1227{ 1228 uint32_t *table; 1229 1230 sna->kgem.surface -= 1231 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1232 /* Clear all surplus entries to zero in case of prefetch */ 1233 table = memset(sna->kgem.batch + sna->kgem.surface, 1234 0, sizeof(struct gen6_surface_state_padded)); 1235 1236 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1237 1238 *offset = sna->kgem.surface; 1239 return table; 1240} 1241 1242static bool 1243gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) 1244{ 1245 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1246 1247 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1248 DBG(("%s: flushing batch: %d < %d+%d\n", 1249 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1250 150, 4*8)); 1251 kgem_submit(&sna->kgem); 1252 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1253 } 1254 1255 if (sna->render_state.gen6.needs_invariant) 1256 gen6_emit_invariant(sna); 1257 1258 return kgem_bo_is_dirty(op->dst.bo); 1259} 1260 1261static void gen6_emit_composite_state(struct sna *sna, 1262 const struct sna_composite_op *op) 1263{ 1264 uint32_t *binding_table; 1265 uint16_t offset; 1266 bool dirty; 1267 1268 dirty = gen6_get_batch(sna, op); 1269 1270 binding_table = gen6_composite_get_binding_table(sna, &offset); 1271 1272 binding_table[0] = 1273 gen6_bind_bo(sna, 1274 op->dst.bo, op->dst.width, op->dst.height, 1275 gen6_get_dest_format(op->dst.format), 1276 true); 1277 binding_table[1] = 1278 gen6_bind_bo(sna, 1279 op->src.bo, op->src.width, op->src.height, 1280 op->src.card_format, 1281 false); 1282 if (op->mask.bo) { 1283 binding_table[2] = 1284 gen6_bind_bo(sna, 1285 op->mask.bo, 1286 op->mask.width, 1287 op->mask.height, 1288 op->mask.card_format, 1289 false); 1290 } 1291 1292 if (sna->kgem.surface == offset && 1293 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && 1294 (op->mask.bo == NULL || 1295 sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { 1296 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1297 offset = sna->render_state.gen6.surface_table; 1298 } 1299 1300 gen6_emit_state(sna, op, offset | dirty); 1301} 1302 1303static void 1304gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1305{ 1306 assert (sna->render.vertex_offset == 0); 1307 if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { 1308 if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) 1309 gen4_vertex_finish(sna); 1310 1311 DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", 1312 sna->render_state.gen6.floats_per_vertex, 1313 op->floats_per_vertex, 1314 sna->render.vertex_index, 1315 (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); 1316 sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 1317 sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; 1318 sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; 1319 } 1320 assert((sna->render.vertex_used % op->floats_per_vertex) == 0); 1321} 1322 1323fastcall static void 1324gen6_render_composite_blt(struct sna *sna, 1325 const struct sna_composite_op *op, 1326 const struct sna_composite_rectangles *r) 1327{ 1328 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1329 op->prim_emit(sna, op, r); 1330} 1331 1332fastcall static void 1333gen6_render_composite_box(struct sna *sna, 1334 const struct sna_composite_op *op, 1335 const BoxRec *box) 1336{ 1337 struct sna_composite_rectangles r; 1338 1339 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1340 1341 DBG((" %s: (%d, %d), (%d, %d)\n", 1342 __FUNCTION__, 1343 box->x1, box->y1, box->x2, box->y2)); 1344 1345 r.dst.x = box->x1; 1346 r.dst.y = box->y1; 1347 r.width = box->x2 - box->x1; 1348 r.height = box->y2 - box->y1; 1349 r.src = r.mask = r.dst; 1350 1351 op->prim_emit(sna, op, &r); 1352} 1353 1354static void 1355gen6_render_composite_boxes__blt(struct sna *sna, 1356 const struct sna_composite_op *op, 1357 const BoxRec *box, int nbox) 1358{ 1359 DBG(("composite_boxes(%d)\n", nbox)); 1360 1361 do { 1362 int nbox_this_time; 1363 1364 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1365 gen6_emit_composite_state); 1366 nbox -= nbox_this_time; 1367 1368 do { 1369 struct sna_composite_rectangles r; 1370 1371 DBG((" %s: (%d, %d), (%d, %d)\n", 1372 __FUNCTION__, 1373 box->x1, box->y1, box->x2, box->y2)); 1374 1375 r.dst.x = box->x1; 1376 r.dst.y = box->y1; 1377 r.width = box->x2 - box->x1; 1378 r.height = box->y2 - box->y1; 1379 r.src = r.mask = r.dst; 1380 1381 op->prim_emit(sna, op, &r); 1382 box++; 1383 } while (--nbox_this_time); 1384 } while (nbox); 1385} 1386 1387static void 1388gen6_render_composite_boxes(struct sna *sna, 1389 const struct sna_composite_op *op, 1390 const BoxRec *box, int nbox) 1391{ 1392 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1393 1394 do { 1395 int nbox_this_time; 1396 float *v; 1397 1398 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1399 gen6_emit_composite_state); 1400 assert(nbox_this_time); 1401 nbox -= nbox_this_time; 1402 1403 v = sna->render.vertices + sna->render.vertex_used; 1404 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1405 1406 op->emit_boxes(op, box, nbox_this_time, v); 1407 box += nbox_this_time; 1408 } while (nbox); 1409} 1410 1411static void 1412gen6_render_composite_boxes__thread(struct sna *sna, 1413 const struct sna_composite_op *op, 1414 const BoxRec *box, int nbox) 1415{ 1416 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1417 1418 sna_vertex_lock(&sna->render); 1419 do { 1420 int nbox_this_time; 1421 float *v; 1422 1423 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1424 gen6_emit_composite_state); 1425 assert(nbox_this_time); 1426 nbox -= nbox_this_time; 1427 1428 v = sna->render.vertices + sna->render.vertex_used; 1429 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1430 1431 sna_vertex_acquire__locked(&sna->render); 1432 sna_vertex_unlock(&sna->render); 1433 1434 op->emit_boxes(op, box, nbox_this_time, v); 1435 box += nbox_this_time; 1436 1437 sna_vertex_lock(&sna->render); 1438 sna_vertex_release__locked(&sna->render); 1439 } while (nbox); 1440 sna_vertex_unlock(&sna->render); 1441} 1442 1443#ifndef MAX 1444#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1445#endif 1446 1447static uint32_t 1448gen6_composite_create_blend_state(struct sna_static_stream *stream) 1449{ 1450 char *base, *ptr; 1451 int src, dst; 1452 1453 base = sna_static_stream_map(stream, 1454 GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 1455 64); 1456 1457 ptr = base; 1458 for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { 1459 for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { 1460 struct gen6_blend_state *blend = 1461 (struct gen6_blend_state *)ptr; 1462 1463 blend->blend0.dest_blend_factor = dst; 1464 blend->blend0.source_blend_factor = src; 1465 blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; 1466 blend->blend0.blend_enable = 1467 !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); 1468 1469 blend->blend1.post_blend_clamp_enable = 1; 1470 blend->blend1.pre_blend_clamp_enable = 1; 1471 1472 ptr += GEN6_BLEND_STATE_PADDED_SIZE; 1473 } 1474 } 1475 1476 return sna_static_stream_offsetof(stream, base); 1477} 1478 1479static uint32_t gen6_bind_video_source(struct sna *sna, 1480 struct kgem_bo *src_bo, 1481 uint32_t src_offset, 1482 int src_width, 1483 int src_height, 1484 int src_pitch, 1485 uint32_t src_surf_format) 1486{ 1487 struct gen6_surface_state *ss; 1488 1489 sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1490 1491 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1492 ss->ss0.surface_type = GEN6_SURFACE_2D; 1493 ss->ss0.surface_format = src_surf_format; 1494 1495 ss->ss1.base_addr = 1496 kgem_add_reloc(&sna->kgem, 1497 sna->kgem.surface + 1, 1498 src_bo, 1499 I915_GEM_DOMAIN_SAMPLER << 16, 1500 src_offset); 1501 1502 ss->ss2.width = src_width - 1; 1503 ss->ss2.height = src_height - 1; 1504 ss->ss3.pitch = src_pitch - 1; 1505 1506 return sna->kgem.surface * sizeof(uint32_t); 1507} 1508 1509static void gen6_emit_video_state(struct sna *sna, 1510 const struct sna_composite_op *op) 1511{ 1512 struct sna_video_frame *frame = op->priv; 1513 uint32_t src_surf_format; 1514 uint32_t src_surf_base[6]; 1515 int src_width[6]; 1516 int src_height[6]; 1517 int src_pitch[6]; 1518 uint32_t *binding_table; 1519 uint16_t offset; 1520 bool dirty; 1521 int n_src, n; 1522 1523 dirty = gen6_get_batch(sna, op); 1524 1525 src_surf_base[0] = 0; 1526 src_surf_base[1] = 0; 1527 src_surf_base[2] = frame->VBufOffset; 1528 src_surf_base[3] = frame->VBufOffset; 1529 src_surf_base[4] = frame->UBufOffset; 1530 src_surf_base[5] = frame->UBufOffset; 1531 1532 if (is_planar_fourcc(frame->id)) { 1533 src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; 1534 src_width[1] = src_width[0] = frame->width; 1535 src_height[1] = src_height[0] = frame->height; 1536 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1537 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1538 frame->width / 2; 1539 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1540 frame->height / 2; 1541 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1542 frame->pitch[0]; 1543 n_src = 6; 1544 } else { 1545 if (frame->id == FOURCC_UYVY) 1546 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; 1547 else 1548 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; 1549 1550 src_width[0] = frame->width; 1551 src_height[0] = frame->height; 1552 src_pitch[0] = frame->pitch[0]; 1553 n_src = 1; 1554 } 1555 1556 binding_table = gen6_composite_get_binding_table(sna, &offset); 1557 1558 binding_table[0] = 1559 gen6_bind_bo(sna, 1560 op->dst.bo, op->dst.width, op->dst.height, 1561 gen6_get_dest_format(op->dst.format), 1562 true); 1563 for (n = 0; n < n_src; n++) { 1564 binding_table[1+n] = 1565 gen6_bind_video_source(sna, 1566 frame->bo, 1567 src_surf_base[n], 1568 src_width[n], 1569 src_height[n], 1570 src_pitch[n], 1571 src_surf_format); 1572 } 1573 1574 gen6_emit_state(sna, op, offset | dirty); 1575} 1576 1577static bool 1578gen6_render_video(struct sna *sna, 1579 struct sna_video *video, 1580 struct sna_video_frame *frame, 1581 RegionPtr dstRegion, 1582 PixmapPtr pixmap) 1583{ 1584 struct sna_composite_op tmp; 1585 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1586 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1587 int src_width = frame->src.x2 - frame->src.x1; 1588 int src_height = frame->src.y2 - frame->src.y1; 1589 float src_offset_x, src_offset_y; 1590 float src_scale_x, src_scale_y; 1591 int nbox, pix_xoff, pix_yoff; 1592 struct sna_pixmap *priv; 1593 unsigned filter; 1594 BoxPtr box; 1595 1596 DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n", 1597 __FUNCTION__, 1598 src_width, src_height, dst_width, dst_height, 1599 (long)REGION_NUM_RECTS(dstRegion), 1600 REGION_EXTENTS(NULL, dstRegion)->x1, 1601 REGION_EXTENTS(NULL, dstRegion)->y1, 1602 REGION_EXTENTS(NULL, dstRegion)->x2, 1603 REGION_EXTENTS(NULL, dstRegion)->y2)); 1604 1605 priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); 1606 if (priv == NULL) 1607 return false; 1608 1609 memset(&tmp, 0, sizeof(tmp)); 1610 1611 tmp.dst.pixmap = pixmap; 1612 tmp.dst.width = pixmap->drawable.width; 1613 tmp.dst.height = pixmap->drawable.height; 1614 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1615 tmp.dst.bo = priv->gpu_bo; 1616 1617 tmp.src.bo = frame->bo; 1618 tmp.mask.bo = NULL; 1619 1620 tmp.floats_per_vertex = 3; 1621 tmp.floats_per_rect = 9; 1622 1623 if (src_width == dst_width && src_height == dst_height) 1624 filter = SAMPLER_FILTER_NEAREST; 1625 else 1626 filter = SAMPLER_FILTER_BILINEAR; 1627 1628 tmp.u.gen6.flags = 1629 GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1630 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1631 NO_BLEND, 1632 is_planar_fourcc(frame->id) ? 1633 GEN6_WM_KERNEL_VIDEO_PLANAR : 1634 GEN6_WM_KERNEL_VIDEO_PACKED, 1635 2); 1636 tmp.priv = frame; 1637 1638 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1639 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1640 kgem_submit(&sna->kgem); 1641 assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); 1642 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1643 } 1644 1645 gen6_emit_video_state(sna, &tmp); 1646 gen6_align_vertex(sna, &tmp); 1647 1648 /* Set up the offset for translating from the given region (in screen 1649 * coordinates) to the backing pixmap. 1650 */ 1651#ifdef COMPOSITE 1652 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1653 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1654#else 1655 pix_xoff = 0; 1656 pix_yoff = 0; 1657#endif 1658 1659 src_scale_x = (float)src_width / dst_width / frame->width; 1660 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1661 1662 src_scale_y = (float)src_height / dst_height / frame->height; 1663 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1664 1665 box = REGION_RECTS(dstRegion); 1666 nbox = REGION_NUM_RECTS(dstRegion); 1667 while (nbox--) { 1668 BoxRec r; 1669 1670 r.x1 = box->x1 + pix_xoff; 1671 r.x2 = box->x2 + pix_xoff; 1672 r.y1 = box->y1 + pix_yoff; 1673 r.y2 = box->y2 + pix_yoff; 1674 1675 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); 1676 1677 OUT_VERTEX(r.x2, r.y2); 1678 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1679 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1680 1681 OUT_VERTEX(r.x1, r.y2); 1682 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1683 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1684 1685 OUT_VERTEX(r.x1, r.y1); 1686 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1687 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1688 1689 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1690 sna_damage_add_box(&priv->gpu_damage, &r); 1691 sna_damage_subtract_box(&priv->cpu_damage, &r); 1692 } 1693 box++; 1694 } 1695 1696 gen4_vertex_flush(sna); 1697 return true; 1698} 1699 1700static int 1701gen6_composite_picture(struct sna *sna, 1702 PicturePtr picture, 1703 struct sna_composite_channel *channel, 1704 int x, int y, 1705 int w, int h, 1706 int dst_x, int dst_y, 1707 bool precise) 1708{ 1709 PixmapPtr pixmap; 1710 uint32_t color; 1711 int16_t dx, dy; 1712 1713 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1714 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1715 1716 channel->is_solid = false; 1717 channel->card_format = -1; 1718 1719 if (sna_picture_is_solid(picture, &color)) 1720 return gen4_channel_init_solid(sna, channel, color); 1721 1722 if (picture->pDrawable == NULL) { 1723 int ret; 1724 1725 if (picture->pSourcePict->type == SourcePictTypeLinear) 1726 return gen4_channel_init_linear(sna, picture, channel, 1727 x, y, 1728 w, h, 1729 dst_x, dst_y); 1730 1731 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1732 ret = -1; 1733 if (!precise) 1734 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1735 x, y, w, h, dst_x, dst_y); 1736 if (ret == -1) 1737 ret = sna_render_picture_fixup(sna, picture, channel, 1738 x, y, w, h, dst_x, dst_y); 1739 return ret; 1740 } 1741 1742 if (picture->alphaMap) { 1743 DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); 1744 return sna_render_picture_fixup(sna, picture, channel, 1745 x, y, w, h, dst_x, dst_y); 1746 } 1747 1748 if (!gen6_check_repeat(picture)) 1749 return sna_render_picture_fixup(sna, picture, channel, 1750 x, y, w, h, dst_x, dst_y); 1751 1752 if (!gen6_check_filter(picture)) 1753 return sna_render_picture_fixup(sna, picture, channel, 1754 x, y, w, h, dst_x, dst_y); 1755 1756 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1757 channel->filter = picture->filter; 1758 1759 pixmap = get_drawable_pixmap(picture->pDrawable); 1760 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1761 1762 x += dx + picture->pDrawable->x; 1763 y += dy + picture->pDrawable->y; 1764 1765 channel->is_affine = sna_transform_is_affine(picture->transform); 1766 if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { 1767 DBG(("%s: integer translation (%d, %d), removing\n", 1768 __FUNCTION__, dx, dy)); 1769 x += dx; 1770 y += dy; 1771 channel->transform = NULL; 1772 channel->filter = PictFilterNearest; 1773 } else 1774 channel->transform = picture->transform; 1775 1776 channel->pict_format = picture->format; 1777 channel->card_format = gen6_get_card_format(picture->format); 1778 if (channel->card_format == (unsigned)-1) 1779 return sna_render_picture_convert(sna, picture, channel, pixmap, 1780 x, y, w, h, dst_x, dst_y, 1781 false); 1782 1783 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1784 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1785 pixmap->drawable.width, pixmap->drawable.height)); 1786 return sna_render_picture_extract(sna, picture, channel, 1787 x, y, w, h, dst_x, dst_y); 1788 } 1789 1790 return sna_render_pixmap_bo(sna, channel, pixmap, 1791 x, y, w, h, dst_x, dst_y); 1792} 1793 1794inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) 1795{ 1796 channel->repeat = gen6_repeat(channel->repeat); 1797 channel->filter = gen6_filter(channel->filter); 1798 if (channel->card_format == (unsigned)-1) 1799 channel->card_format = gen6_get_card_format(channel->pict_format); 1800 assert(channel->card_format != (unsigned)-1); 1801} 1802 1803static void gen6_render_composite_done(struct sna *sna, 1804 const struct sna_composite_op *op) 1805{ 1806 DBG(("%s\n", __FUNCTION__)); 1807 1808 assert(!sna->render.active); 1809 if (sna->render.vertex_offset) { 1810 gen4_vertex_flush(sna); 1811 gen6_magic_ca_pass(sna, op); 1812 } 1813 1814 if (op->mask.bo) 1815 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1816 if (op->src.bo) 1817 kgem_bo_destroy(&sna->kgem, op->src.bo); 1818 1819 sna_render_composite_redirect_done(sna, op); 1820} 1821 1822static bool 1823gen6_composite_set_target(struct sna *sna, 1824 struct sna_composite_op *op, 1825 PicturePtr dst, 1826 int x, int y, int w, int h, 1827 bool partial) 1828{ 1829 BoxRec box; 1830 1831 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1832 op->dst.format = dst->format; 1833 op->dst.width = op->dst.pixmap->drawable.width; 1834 op->dst.height = op->dst.pixmap->drawable.height; 1835 1836 if (w && h) { 1837 box.x1 = x; 1838 box.y1 = y; 1839 box.x2 = x + w; 1840 box.y2 = y + h; 1841 } else 1842 sna_render_picture_extents(dst, &box); 1843 1844 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, 1845 PREFER_GPU | FORCE_GPU | RENDER_GPU, 1846 &box, &op->damage); 1847 if (op->dst.bo == NULL) 1848 return false; 1849 1850 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1851 &op->dst.x, &op->dst.y); 1852 1853 DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1854 __FUNCTION__, 1855 op->dst.pixmap, (int)op->dst.format, 1856 op->dst.width, op->dst.height, 1857 op->dst.bo->pitch, 1858 op->dst.x, op->dst.y, 1859 op->damage ? *op->damage : (void *)-1)); 1860 1861 assert(op->dst.bo->proxy == NULL); 1862 1863 if (too_large(op->dst.width, op->dst.height) && 1864 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1865 return false; 1866 1867 return true; 1868} 1869 1870inline static bool can_switch_to_blt(struct sna *sna, 1871 struct kgem_bo *bo, 1872 unsigned flags) 1873{ 1874 if (sna->kgem.ring != KGEM_RENDER) 1875 return true; 1876 1877 if (NO_RING_SWITCH) 1878 return false; 1879 1880 if (!sna->kgem.has_semaphores) 1881 return false; 1882 1883 if (flags & COPY_LAST) 1884 return true; 1885 1886 if (bo && RQ_IS_BLT(bo->rq)) 1887 return true; 1888 1889 return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); 1890} 1891 1892static inline bool untiled_tlb_miss(struct kgem_bo *bo) 1893{ 1894 return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; 1895} 1896 1897static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) 1898{ 1899 if (RQ_IS_BLT(bo->rq)) 1900 return true; 1901 1902 return bo->tiling == I915_TILING_NONE || bo->scanout; 1903} 1904 1905inline static bool prefer_blt_ring(struct sna *sna, 1906 struct kgem_bo *bo, 1907 unsigned flags) 1908{ 1909 return can_switch_to_blt(sna, bo, flags); 1910} 1911 1912static bool 1913try_blt(struct sna *sna, 1914 PicturePtr dst, PicturePtr src, 1915 int width, int height) 1916{ 1917 struct kgem_bo *bo; 1918 1919 bo = __sna_drawable_peek_bo(dst->pDrawable); 1920 if (bo == NULL) 1921 return true; 1922 1923 if (bo->rq) 1924 return RQ_IS_BLT(bo->rq); 1925 1926 if (sna->kgem.ring == KGEM_BLT) { 1927 DBG(("%s: already performing BLT\n", __FUNCTION__)); 1928 return true; 1929 } 1930 1931 if (too_large(width, height)) { 1932 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 1933 __FUNCTION__, width, height)); 1934 return true; 1935 } 1936 1937 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0)) 1938 return true; 1939 1940 return false; 1941} 1942 1943static bool 1944check_gradient(PicturePtr picture, bool precise) 1945{ 1946 if (picture->pDrawable) 1947 return false; 1948 1949 switch (picture->pSourcePict->type) { 1950 case SourcePictTypeSolidFill: 1951 case SourcePictTypeLinear: 1952 return false; 1953 default: 1954 return precise; 1955 } 1956} 1957 1958static bool 1959has_alphamap(PicturePtr p) 1960{ 1961 return p->alphaMap != NULL; 1962} 1963 1964static bool 1965need_upload(PicturePtr p) 1966{ 1967 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1968} 1969 1970static bool 1971source_is_busy(PixmapPtr pixmap) 1972{ 1973 struct sna_pixmap *priv = sna_pixmap(pixmap); 1974 if (priv == NULL || priv->clear) 1975 return false; 1976 1977 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1978 return true; 1979 1980 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1981 return true; 1982 1983 return priv->gpu_damage && !priv->cpu_damage; 1984} 1985 1986static bool 1987source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 1988{ 1989 if (sna_picture_is_solid(p, NULL)) 1990 return false; 1991 1992 if (p->pSourcePict) 1993 return check_gradient(p, precise); 1994 1995 if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) 1996 return true; 1997 1998 if (pixmap && source_is_busy(pixmap)) 1999 return false; 2000 2001 return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p); 2002} 2003 2004static bool 2005gen6_composite_fallback(struct sna *sna, 2006 PicturePtr src, 2007 PicturePtr mask, 2008 PicturePtr dst) 2009{ 2010 PixmapPtr src_pixmap; 2011 PixmapPtr mask_pixmap; 2012 PixmapPtr dst_pixmap; 2013 bool src_fallback, mask_fallback; 2014 2015 if (!gen6_check_dst_format(dst->format)) { 2016 DBG(("%s: unknown destination format: %d\n", 2017 __FUNCTION__, dst->format)); 2018 return true; 2019 } 2020 2021 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2022 2023 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2024 src_fallback = source_fallback(src, src_pixmap, 2025 dst->polyMode == PolyModePrecise); 2026 2027 if (mask) { 2028 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2029 mask_fallback = source_fallback(mask, mask_pixmap, 2030 dst->polyMode == PolyModePrecise); 2031 } else { 2032 mask_pixmap = NULL; 2033 mask_fallback = false; 2034 } 2035 2036 /* If we are using the destination as a source and need to 2037 * readback in order to upload the source, do it all 2038 * on the cpu. 2039 */ 2040 if (src_pixmap == dst_pixmap && src_fallback) { 2041 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2042 return true; 2043 } 2044 if (mask_pixmap == dst_pixmap && mask_fallback) { 2045 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2046 return true; 2047 } 2048 2049 /* If anything is on the GPU, push everything out to the GPU */ 2050 if (dst_use_gpu(dst_pixmap)) { 2051 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2052 __FUNCTION__)); 2053 return false; 2054 } 2055 2056 if (src_pixmap && !src_fallback) { 2057 DBG(("%s: src is already on the GPU, try to use GPU\n", 2058 __FUNCTION__)); 2059 return false; 2060 } 2061 if (mask_pixmap && !mask_fallback) { 2062 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2063 __FUNCTION__)); 2064 return false; 2065 } 2066 2067 /* However if the dst is not on the GPU and we need to 2068 * render one of the sources using the CPU, we may 2069 * as well do the entire operation in place onthe CPU. 2070 */ 2071 if (src_fallback) { 2072 DBG(("%s: dst is on the CPU and src will fallback\n", 2073 __FUNCTION__)); 2074 return true; 2075 } 2076 2077 if (mask && mask_fallback) { 2078 DBG(("%s: dst is on the CPU and mask will fallback\n", 2079 __FUNCTION__)); 2080 return true; 2081 } 2082 2083 if (too_large(dst_pixmap->drawable.width, 2084 dst_pixmap->drawable.height) && 2085 dst_is_cpu(dst_pixmap)) { 2086 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2087 return true; 2088 } 2089 2090 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2091 __FUNCTION__)); 2092 return dst_use_cpu(dst_pixmap); 2093} 2094 2095static int 2096reuse_source(struct sna *sna, 2097 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2098 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2099{ 2100 uint32_t color; 2101 2102 if (src_x != msk_x || src_y != msk_y) 2103 return false; 2104 2105 if (src == mask) { 2106 DBG(("%s: mask is source\n", __FUNCTION__)); 2107 *mc = *sc; 2108 mc->bo = kgem_bo_reference(mc->bo); 2109 return true; 2110 } 2111 2112 if (sna_picture_is_solid(mask, &color)) 2113 return gen4_channel_init_solid(sna, mc, color); 2114 2115 if (sc->is_solid) 2116 return false; 2117 2118 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2119 return false; 2120 2121 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2122 2123 if (!sna_transform_equal(src->transform, mask->transform)) 2124 return false; 2125 2126 if (!sna_picture_alphamap_equal(src, mask)) 2127 return false; 2128 2129 if (!gen6_check_repeat(mask)) 2130 return false; 2131 2132 if (!gen6_check_filter(mask)) 2133 return false; 2134 2135 if (!gen6_check_format(mask->format)) 2136 return false; 2137 2138 DBG(("%s: reusing source channel for mask with a twist\n", 2139 __FUNCTION__)); 2140 2141 *mc = *sc; 2142 mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2143 mc->filter = gen6_filter(mask->filter); 2144 mc->pict_format = mask->format; 2145 mc->card_format = gen6_get_card_format(mask->format); 2146 mc->bo = kgem_bo_reference(mc->bo); 2147 return true; 2148} 2149 2150static bool 2151prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) 2152{ 2153 if (untiled_tlb_miss(tmp->dst.bo) || 2154 untiled_tlb_miss(tmp->src.bo)) 2155 return true; 2156 2157 if (kgem_bo_is_render(tmp->dst.bo) || 2158 kgem_bo_is_render(tmp->src.bo)) 2159 return false; 2160 2161 if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) 2162 return false; 2163 2164 return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); 2165} 2166 2167static bool 2168gen6_render_composite(struct sna *sna, 2169 uint8_t op, 2170 PicturePtr src, 2171 PicturePtr mask, 2172 PicturePtr dst, 2173 int16_t src_x, int16_t src_y, 2174 int16_t msk_x, int16_t msk_y, 2175 int16_t dst_x, int16_t dst_y, 2176 int16_t width, int16_t height, 2177 struct sna_composite_op *tmp) 2178{ 2179 if (op >= ARRAY_SIZE(gen6_blend_op)) 2180 return false; 2181 2182 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 2183 width, height, sna->kgem.ring)); 2184 2185 if (mask == NULL && 2186 try_blt(sna, dst, src, width, height) && 2187 sna_blt_composite(sna, op, 2188 src, dst, 2189 src_x, src_y, 2190 dst_x, dst_y, 2191 width, height, 2192 tmp, false)) 2193 return true; 2194 2195 if (gen6_composite_fallback(sna, src, mask, dst)) 2196 return false; 2197 2198 if (need_tiling(sna, width, height)) 2199 return sna_tiling_composite(op, src, mask, dst, 2200 src_x, src_y, 2201 msk_x, msk_y, 2202 dst_x, dst_y, 2203 width, height, 2204 tmp); 2205 2206 if (op == PictOpClear) 2207 op = PictOpSrc; 2208 tmp->op = op; 2209 if (!gen6_composite_set_target(sna, tmp, dst, 2210 dst_x, dst_y, width, height, 2211 op > PictOpSrc || dst->pCompositeClip->data)) 2212 return false; 2213 2214 switch (gen6_composite_picture(sna, src, &tmp->src, 2215 src_x, src_y, 2216 width, height, 2217 dst_x, dst_y, 2218 dst->polyMode == PolyModePrecise)) { 2219 case -1: 2220 goto cleanup_dst; 2221 case 0: 2222 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2223 goto cleanup_dst; 2224 /* fall through to fixup */ 2225 case 1: 2226 /* Did we just switch rings to prepare the source? */ 2227 if (mask == NULL && 2228 prefer_blt_composite(sna, tmp) && 2229 sna_blt_composite__convert(sna, 2230 dst_x, dst_y, width, height, 2231 tmp)) 2232 return true; 2233 2234 gen6_composite_channel_convert(&tmp->src); 2235 break; 2236 } 2237 2238 tmp->is_affine = tmp->src.is_affine; 2239 tmp->has_component_alpha = false; 2240 tmp->need_magic_ca_pass = false; 2241 2242 tmp->mask.bo = NULL; 2243 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2244 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2245 2246 if (mask) { 2247 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2248 tmp->has_component_alpha = true; 2249 2250 /* Check if it's component alpha that relies on a source alpha and on 2251 * the source value. We can only get one of those into the single 2252 * source value that we get to blend with. 2253 */ 2254 if (gen6_blend_op[op].src_alpha && 2255 (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { 2256 if (op != PictOpOver) 2257 goto cleanup_src; 2258 2259 tmp->need_magic_ca_pass = true; 2260 tmp->op = PictOpOutReverse; 2261 } 2262 } 2263 2264 if (!reuse_source(sna, 2265 src, &tmp->src, src_x, src_y, 2266 mask, &tmp->mask, msk_x, msk_y)) { 2267 switch (gen6_composite_picture(sna, mask, &tmp->mask, 2268 msk_x, msk_y, 2269 width, height, 2270 dst_x, dst_y, 2271 dst->polyMode == PolyModePrecise)) { 2272 case -1: 2273 goto cleanup_src; 2274 case 0: 2275 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2276 goto cleanup_src; 2277 /* fall through to fixup */ 2278 case 1: 2279 gen6_composite_channel_convert(&tmp->mask); 2280 break; 2281 } 2282 } 2283 2284 tmp->is_affine &= tmp->mask.is_affine; 2285 } 2286 2287 tmp->u.gen6.flags = 2288 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2289 tmp->src.repeat, 2290 tmp->mask.filter, 2291 tmp->mask.repeat), 2292 gen6_get_blend(tmp->op, 2293 tmp->has_component_alpha, 2294 tmp->dst.format), 2295 gen6_choose_composite_kernel(tmp->op, 2296 tmp->mask.bo != NULL, 2297 tmp->has_component_alpha, 2298 tmp->is_affine), 2299 gen4_choose_composite_emitter(sna, tmp)); 2300 2301 tmp->blt = gen6_render_composite_blt; 2302 tmp->box = gen6_render_composite_box; 2303 tmp->boxes = gen6_render_composite_boxes__blt; 2304 if (tmp->emit_boxes) { 2305 tmp->boxes = gen6_render_composite_boxes; 2306 tmp->thread_boxes = gen6_render_composite_boxes__thread; 2307 } 2308 tmp->done = gen6_render_composite_done; 2309 2310 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2311 if (!kgem_check_bo(&sna->kgem, 2312 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2313 NULL)) { 2314 kgem_submit(&sna->kgem); 2315 if (!kgem_check_bo(&sna->kgem, 2316 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2317 NULL)) 2318 goto cleanup_mask; 2319 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2320 } 2321 2322 gen6_emit_composite_state(sna, tmp); 2323 gen6_align_vertex(sna, tmp); 2324 return true; 2325 2326cleanup_mask: 2327 if (tmp->mask.bo) 2328 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2329cleanup_src: 2330 if (tmp->src.bo) 2331 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2332cleanup_dst: 2333 if (tmp->redirect.real_bo) 2334 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2335 return false; 2336} 2337 2338#if !NO_COMPOSITE_SPANS 2339fastcall static void 2340gen6_render_composite_spans_box(struct sna *sna, 2341 const struct sna_composite_spans_op *op, 2342 const BoxRec *box, float opacity) 2343{ 2344 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2345 __FUNCTION__, 2346 op->base.src.offset[0], op->base.src.offset[1], 2347 opacity, 2348 op->base.dst.x, op->base.dst.y, 2349 box->x1, box->y1, 2350 box->x2 - box->x1, 2351 box->y2 - box->y1)); 2352 2353 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); 2354 op->prim_emit(sna, op, box, opacity); 2355} 2356 2357static void 2358gen6_render_composite_spans_boxes(struct sna *sna, 2359 const struct sna_composite_spans_op *op, 2360 const BoxRec *box, int nbox, 2361 float opacity) 2362{ 2363 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2364 __FUNCTION__, nbox, 2365 op->base.src.offset[0], op->base.src.offset[1], 2366 opacity, 2367 op->base.dst.x, op->base.dst.y)); 2368 2369 do { 2370 int nbox_this_time; 2371 2372 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2373 gen6_emit_composite_state); 2374 nbox -= nbox_this_time; 2375 2376 do { 2377 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2378 box->x1, box->y1, 2379 box->x2 - box->x1, 2380 box->y2 - box->y1)); 2381 2382 op->prim_emit(sna, op, box++, opacity); 2383 } while (--nbox_this_time); 2384 } while (nbox); 2385} 2386 2387fastcall static void 2388gen6_render_composite_spans_boxes__thread(struct sna *sna, 2389 const struct sna_composite_spans_op *op, 2390 const struct sna_opacity_box *box, 2391 int nbox) 2392{ 2393 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2394 __FUNCTION__, nbox, 2395 op->base.src.offset[0], op->base.src.offset[1], 2396 op->base.dst.x, op->base.dst.y)); 2397 2398 sna_vertex_lock(&sna->render); 2399 do { 2400 int nbox_this_time; 2401 float *v; 2402 2403 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2404 gen6_emit_composite_state); 2405 assert(nbox_this_time); 2406 nbox -= nbox_this_time; 2407 2408 v = sna->render.vertices + sna->render.vertex_used; 2409 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2410 2411 sna_vertex_acquire__locked(&sna->render); 2412 sna_vertex_unlock(&sna->render); 2413 2414 op->emit_boxes(op, box, nbox_this_time, v); 2415 box += nbox_this_time; 2416 2417 sna_vertex_lock(&sna->render); 2418 sna_vertex_release__locked(&sna->render); 2419 } while (nbox); 2420 sna_vertex_unlock(&sna->render); 2421} 2422 2423fastcall static void 2424gen6_render_composite_spans_done(struct sna *sna, 2425 const struct sna_composite_spans_op *op) 2426{ 2427 DBG(("%s()\n", __FUNCTION__)); 2428 assert(!sna->render.active); 2429 2430 if (sna->render.vertex_offset) 2431 gen4_vertex_flush(sna); 2432 2433 if (op->base.src.bo) 2434 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2435 2436 sna_render_composite_redirect_done(sna, &op->base); 2437} 2438 2439static bool 2440gen6_check_composite_spans(struct sna *sna, 2441 uint8_t op, PicturePtr src, PicturePtr dst, 2442 int16_t width, int16_t height, 2443 unsigned flags) 2444{ 2445 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2446 __FUNCTION__, op, width, height, flags)); 2447 2448 if (op >= ARRAY_SIZE(gen6_blend_op)) 2449 return false; 2450 2451 if (gen6_composite_fallback(sna, src, NULL, dst)) { 2452 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2453 return false; 2454 } 2455 2456 if (need_tiling(sna, width, height) && 2457 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2458 DBG(("%s: fallback, tiled operation not on GPU\n", 2459 __FUNCTION__)); 2460 return false; 2461 } 2462 2463 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2464 struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); 2465 assert(priv); 2466 2467 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2468 return true; 2469 2470 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2471 return false; 2472 2473 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2474 } 2475 2476 return true; 2477} 2478 2479static bool 2480gen6_render_composite_spans(struct sna *sna, 2481 uint8_t op, 2482 PicturePtr src, 2483 PicturePtr dst, 2484 int16_t src_x, int16_t src_y, 2485 int16_t dst_x, int16_t dst_y, 2486 int16_t width, int16_t height, 2487 unsigned flags, 2488 struct sna_composite_spans_op *tmp) 2489{ 2490 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2491 width, height, flags, sna->kgem.ring)); 2492 2493 assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); 2494 2495 if (need_tiling(sna, width, height)) { 2496 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2497 __FUNCTION__, width, height)); 2498 return sna_tiling_composite_spans(op, src, dst, 2499 src_x, src_y, dst_x, dst_y, 2500 width, height, flags, tmp); 2501 } 2502 2503 tmp->base.op = op; 2504 if (!gen6_composite_set_target(sna, &tmp->base, dst, 2505 dst_x, dst_y, width, height, true)) 2506 return false; 2507 2508 switch (gen6_composite_picture(sna, src, &tmp->base.src, 2509 src_x, src_y, 2510 width, height, 2511 dst_x, dst_y, 2512 dst->polyMode == PolyModePrecise)) { 2513 case -1: 2514 goto cleanup_dst; 2515 case 0: 2516 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2517 goto cleanup_dst; 2518 /* fall through to fixup */ 2519 case 1: 2520 gen6_composite_channel_convert(&tmp->base.src); 2521 break; 2522 } 2523 tmp->base.mask.bo = NULL; 2524 2525 tmp->base.is_affine = tmp->base.src.is_affine; 2526 tmp->base.need_magic_ca_pass = false; 2527 2528 tmp->base.u.gen6.flags = 2529 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2530 tmp->base.src.repeat, 2531 SAMPLER_FILTER_NEAREST, 2532 SAMPLER_EXTEND_PAD), 2533 gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), 2534 GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2535 gen4_choose_spans_emitter(sna, tmp)); 2536 2537 tmp->box = gen6_render_composite_spans_box; 2538 tmp->boxes = gen6_render_composite_spans_boxes; 2539 if (tmp->emit_boxes) 2540 tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; 2541 tmp->done = gen6_render_composite_spans_done; 2542 2543 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2544 if (!kgem_check_bo(&sna->kgem, 2545 tmp->base.dst.bo, tmp->base.src.bo, 2546 NULL)) { 2547 kgem_submit(&sna->kgem); 2548 if (!kgem_check_bo(&sna->kgem, 2549 tmp->base.dst.bo, tmp->base.src.bo, 2550 NULL)) 2551 goto cleanup_src; 2552 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2553 } 2554 2555 gen6_emit_composite_state(sna, &tmp->base); 2556 gen6_align_vertex(sna, &tmp->base); 2557 return true; 2558 2559cleanup_src: 2560 if (tmp->base.src.bo) 2561 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2562cleanup_dst: 2563 if (tmp->base.redirect.real_bo) 2564 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2565 return false; 2566} 2567#endif 2568 2569static void 2570gen6_emit_copy_state(struct sna *sna, 2571 const struct sna_composite_op *op) 2572{ 2573 uint32_t *binding_table; 2574 uint16_t offset; 2575 bool dirty; 2576 2577 dirty = gen6_get_batch(sna, op); 2578 2579 binding_table = gen6_composite_get_binding_table(sna, &offset); 2580 2581 binding_table[0] = 2582 gen6_bind_bo(sna, 2583 op->dst.bo, op->dst.width, op->dst.height, 2584 gen6_get_dest_format(op->dst.format), 2585 true); 2586 binding_table[1] = 2587 gen6_bind_bo(sna, 2588 op->src.bo, op->src.width, op->src.height, 2589 op->src.card_format, 2590 false); 2591 2592 if (sna->kgem.surface == offset && 2593 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 2594 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 2595 offset = sna->render_state.gen6.surface_table; 2596 } 2597 2598 gen6_emit_state(sna, op, offset | dirty); 2599} 2600 2601static inline bool prefer_blt_copy(struct sna *sna, 2602 struct kgem_bo *src_bo, 2603 struct kgem_bo *dst_bo, 2604 unsigned flags) 2605{ 2606 if (flags & COPY_SYNC) 2607 return false; 2608 2609 if (PREFER_RENDER) 2610 return PREFER_RENDER > 0; 2611 2612 if (sna->kgem.ring == KGEM_BLT) 2613 return true; 2614 2615 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2616 return true; 2617 2618 if (untiled_tlb_miss(src_bo) || 2619 untiled_tlb_miss(dst_bo)) 2620 return true; 2621 2622 if (kgem_bo_is_render(dst_bo) || 2623 kgem_bo_is_render(src_bo)) 2624 return false; 2625 2626 if (!prefer_blt_ring(sna, dst_bo, flags)) 2627 return false; 2628 2629 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2630} 2631 2632inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) 2633{ 2634 *extents = box[0]; 2635 while (--n) { 2636 box++; 2637 2638 if (box->x1 < extents->x1) 2639 extents->x1 = box->x1; 2640 if (box->x2 > extents->x2) 2641 extents->x2 = box->x2; 2642 2643 if (box->y1 < extents->y1) 2644 extents->y1 = box->y1; 2645 if (box->y2 > extents->y2) 2646 extents->y2 = box->y2; 2647 } 2648} 2649 2650static inline bool 2651overlaps(struct sna *sna, 2652 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2653 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2654 const BoxRec *box, int n, BoxRec *extents) 2655{ 2656 if (src_bo != dst_bo) 2657 return false; 2658 2659 boxes_extents(box, n, extents); 2660 return (extents->x2 + src_dx > extents->x1 + dst_dx && 2661 extents->x1 + src_dx < extents->x2 + dst_dx && 2662 extents->y2 + src_dy > extents->y1 + dst_dy && 2663 extents->y1 + src_dy < extents->y2 + dst_dy); 2664} 2665 2666static bool 2667gen6_render_copy_boxes(struct sna *sna, uint8_t alu, 2668 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2669 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2670 const BoxRec *box, int n, unsigned flags) 2671{ 2672 struct sna_composite_op tmp; 2673 BoxRec extents; 2674 2675 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", 2676 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2677 src_bo == dst_bo, 2678 overlaps(sna, 2679 src_bo, src_dx, src_dy, 2680 dst_bo, dst_dx, dst_dy, 2681 box, n, &extents))); 2682 2683 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2684 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2685 sna_blt_copy_boxes(sna, alu, 2686 src_bo, src_dx, src_dy, 2687 dst_bo, dst_dx, dst_dy, 2688 dst->drawable.bitsPerPixel, 2689 box, n)) 2690 return true; 2691 2692 if (!(alu == GXcopy || alu == GXclear)) { 2693fallback_blt: 2694 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2695 return false; 2696 2697 return sna_blt_copy_boxes_fallback(sna, alu, 2698 src, src_bo, src_dx, src_dy, 2699 dst, dst_bo, dst_dx, dst_dy, 2700 box, n); 2701 } 2702 2703 if (overlaps(sna, 2704 src_bo, src_dx, src_dy, 2705 dst_bo, dst_dx, dst_dy, 2706 box, n, &extents)) { 2707 if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) 2708 goto fallback_blt; 2709 2710 if (can_switch_to_blt(sna, dst_bo, flags) && 2711 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2712 sna_blt_copy_boxes(sna, alu, 2713 src_bo, src_dx, src_dy, 2714 dst_bo, dst_dx, dst_dy, 2715 dst->drawable.bitsPerPixel, 2716 box, n)) 2717 return true; 2718 2719 return sna_render_copy_boxes__overlap(sna, alu, 2720 src, src_bo, src_dx, src_dy, 2721 dst, dst_bo, dst_dx, dst_dy, 2722 box, n, &extents); 2723 } 2724 2725 if (dst->drawable.depth == src->drawable.depth) { 2726 tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2727 tmp.src.pict_format = tmp.dst.format; 2728 } else { 2729 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 2730 tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); 2731 } 2732 if (!gen6_check_format(tmp.src.pict_format)) 2733 goto fallback_blt; 2734 2735 tmp.dst.pixmap = dst; 2736 tmp.dst.width = dst->drawable.width; 2737 tmp.dst.height = dst->drawable.height; 2738 tmp.dst.bo = dst_bo; 2739 tmp.dst.x = tmp.dst.y = 0; 2740 tmp.damage = NULL; 2741 2742 sna_render_composite_redirect_init(&tmp); 2743 if (too_large(tmp.dst.width, tmp.dst.height)) { 2744 int i; 2745 2746 extents = box[0]; 2747 for (i = 1; i < n; i++) { 2748 if (box[i].x1 < extents.x1) 2749 extents.x1 = box[i].x1; 2750 if (box[i].y1 < extents.y1) 2751 extents.y1 = box[i].y1; 2752 2753 if (box[i].x2 > extents.x2) 2754 extents.x2 = box[i].x2; 2755 if (box[i].y2 > extents.y2) 2756 extents.y2 = box[i].y2; 2757 } 2758 2759 if (!sna_render_composite_redirect(sna, &tmp, 2760 extents.x1 + dst_dx, 2761 extents.y1 + dst_dy, 2762 extents.x2 - extents.x1, 2763 extents.y2 - extents.y1, 2764 n > 1)) 2765 goto fallback_tiled; 2766 2767 dst_dx += tmp.dst.x; 2768 dst_dy += tmp.dst.y; 2769 2770 tmp.dst.x = tmp.dst.y = 0; 2771 } 2772 2773 tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); 2774 if (too_large(src->drawable.width, src->drawable.height)) { 2775 int i; 2776 2777 extents = box[0]; 2778 for (i = 1; i < n; i++) { 2779 if (box[i].x1 < extents.x1) 2780 extents.x1 = box[i].x1; 2781 if (box[i].y1 < extents.y1) 2782 extents.y1 = box[i].y1; 2783 2784 if (box[i].x2 > extents.x2) 2785 extents.x2 = box[i].x2; 2786 if (box[i].y2 > extents.y2) 2787 extents.y2 = box[i].y2; 2788 } 2789 2790 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2791 extents.x1 + src_dx, 2792 extents.y1 + src_dy, 2793 extents.x2 - extents.x1, 2794 extents.y2 - extents.y1)) { 2795 DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); 2796 goto fallback_tiled_dst; 2797 } 2798 2799 src_dx += tmp.src.offset[0]; 2800 src_dy += tmp.src.offset[1]; 2801 } else { 2802 tmp.src.bo = src_bo; 2803 tmp.src.width = src->drawable.width; 2804 tmp.src.height = src->drawable.height; 2805 } 2806 2807 tmp.mask.bo = NULL; 2808 2809 tmp.floats_per_vertex = 2; 2810 tmp.floats_per_rect = 6; 2811 tmp.need_magic_ca_pass = 0; 2812 2813 tmp.u.gen6.flags = COPY_FLAGS(alu); 2814 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2815 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); 2816 assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); 2817 2818 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2819 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2820 kgem_submit(&sna->kgem); 2821 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2822 DBG(("%s: too large for a single operation\n", 2823 __FUNCTION__)); 2824 goto fallback_tiled_src; 2825 } 2826 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2827 } 2828 2829 gen6_emit_copy_state(sna, &tmp); 2830 gen6_align_vertex(sna, &tmp); 2831 2832 do { 2833 int16_t *v; 2834 int n_this_time; 2835 2836 n_this_time = gen6_get_rectangles(sna, &tmp, n, 2837 gen6_emit_copy_state); 2838 n -= n_this_time; 2839 2840 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2841 sna->render.vertex_used += 6 * n_this_time; 2842 assert(sna->render.vertex_used <= sna->render.vertex_size); 2843 do { 2844 2845 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2846 box->x1 + src_dx, box->y1 + src_dy, 2847 box->x1 + dst_dx, box->y1 + dst_dy, 2848 box->x2 - box->x1, box->y2 - box->y1)); 2849 v[0] = box->x2 + dst_dx; 2850 v[2] = box->x2 + src_dx; 2851 v[1] = v[5] = box->y2 + dst_dy; 2852 v[3] = v[7] = box->y2 + src_dy; 2853 v[8] = v[4] = box->x1 + dst_dx; 2854 v[10] = v[6] = box->x1 + src_dx; 2855 v[9] = box->y1 + dst_dy; 2856 v[11] = box->y1 + src_dy; 2857 v += 12; box++; 2858 } while (--n_this_time); 2859 } while (n); 2860 2861 gen4_vertex_flush(sna); 2862 sna_render_composite_redirect_done(sna, &tmp); 2863 if (tmp.src.bo != src_bo) 2864 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2865 return true; 2866 2867fallback_tiled_src: 2868 if (tmp.src.bo != src_bo) 2869 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2870fallback_tiled_dst: 2871 if (tmp.redirect.real_bo) 2872 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2873fallback_tiled: 2874 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2875 sna_blt_copy_boxes(sna, alu, 2876 src_bo, src_dx, src_dy, 2877 dst_bo, dst_dx, dst_dy, 2878 dst->drawable.bitsPerPixel, 2879 box, n)) 2880 return true; 2881 2882 return sna_tiling_copy_boxes(sna, alu, 2883 src, src_bo, src_dx, src_dy, 2884 dst, dst_bo, dst_dx, dst_dy, 2885 box, n); 2886} 2887 2888static void 2889gen6_render_copy_blt(struct sna *sna, 2890 const struct sna_copy_op *op, 2891 int16_t sx, int16_t sy, 2892 int16_t w, int16_t h, 2893 int16_t dx, int16_t dy) 2894{ 2895 int16_t *v; 2896 2897 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); 2898 2899 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2900 sna->render.vertex_used += 6; 2901 assert(sna->render.vertex_used <= sna->render.vertex_size); 2902 2903 v[0] = dx+w; v[1] = dy+h; 2904 v[2] = sx+w; v[3] = sy+h; 2905 v[4] = dx; v[5] = dy+h; 2906 v[6] = sx; v[7] = sy+h; 2907 v[8] = dx; v[9] = dy; 2908 v[10] = sx; v[11] = sy; 2909} 2910 2911static void 2912gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2913{ 2914 DBG(("%s()\n", __FUNCTION__)); 2915 2916 assert(!sna->render.active); 2917 if (sna->render.vertex_offset) 2918 gen4_vertex_flush(sna); 2919} 2920 2921static bool 2922gen6_render_copy(struct sna *sna, uint8_t alu, 2923 PixmapPtr src, struct kgem_bo *src_bo, 2924 PixmapPtr dst, struct kgem_bo *dst_bo, 2925 struct sna_copy_op *op) 2926{ 2927 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2928 __FUNCTION__, alu, 2929 src->drawable.width, src->drawable.height, 2930 dst->drawable.width, dst->drawable.height)); 2931 2932 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2933 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2934 sna_blt_copy(sna, alu, 2935 src_bo, dst_bo, 2936 dst->drawable.bitsPerPixel, 2937 op)) 2938 return true; 2939 2940 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2941 too_large(src->drawable.width, src->drawable.height) || 2942 too_large(dst->drawable.width, dst->drawable.height)) { 2943fallback: 2944 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2945 return false; 2946 2947 return sna_blt_copy(sna, alu, src_bo, dst_bo, 2948 dst->drawable.bitsPerPixel, 2949 op); 2950 } 2951 2952 if (dst->drawable.depth == src->drawable.depth) { 2953 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2954 op->base.src.pict_format = op->base.dst.format; 2955 } else { 2956 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2957 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 2958 } 2959 if (!gen6_check_format(op->base.src.pict_format)) 2960 goto fallback; 2961 2962 op->base.dst.pixmap = dst; 2963 op->base.dst.width = dst->drawable.width; 2964 op->base.dst.height = dst->drawable.height; 2965 op->base.dst.bo = dst_bo; 2966 2967 op->base.src.bo = src_bo; 2968 op->base.src.card_format = 2969 gen6_get_card_format(op->base.src.pict_format); 2970 op->base.src.width = src->drawable.width; 2971 op->base.src.height = src->drawable.height; 2972 2973 op->base.mask.bo = NULL; 2974 2975 op->base.floats_per_vertex = 2; 2976 op->base.floats_per_rect = 6; 2977 2978 op->base.u.gen6.flags = COPY_FLAGS(alu); 2979 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2980 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); 2981 assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); 2982 2983 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 2984 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2985 kgem_submit(&sna->kgem); 2986 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 2987 goto fallback; 2988 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2989 } 2990 2991 gen6_emit_copy_state(sna, &op->base); 2992 gen6_align_vertex(sna, &op->base); 2993 2994 op->blt = gen6_render_copy_blt; 2995 op->done = gen6_render_copy_done; 2996 return true; 2997} 2998 2999static void 3000gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3001{ 3002 uint32_t *binding_table; 3003 uint16_t offset; 3004 bool dirty; 3005 3006 dirty = gen6_get_batch(sna, op); 3007 3008 binding_table = gen6_composite_get_binding_table(sna, &offset); 3009 3010 binding_table[0] = 3011 gen6_bind_bo(sna, 3012 op->dst.bo, op->dst.width, op->dst.height, 3013 gen6_get_dest_format(op->dst.format), 3014 true); 3015 binding_table[1] = 3016 gen6_bind_bo(sna, 3017 op->src.bo, 1, 1, 3018 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 3019 false); 3020 3021 if (sna->kgem.surface == offset && 3022 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 3023 sna->kgem.surface += 3024 sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); 3025 offset = sna->render_state.gen6.surface_table; 3026 } 3027 3028 gen6_emit_state(sna, op, offset | dirty); 3029} 3030 3031static inline bool prefer_blt_fill(struct sna *sna, 3032 struct kgem_bo *bo) 3033{ 3034 if (PREFER_RENDER) 3035 return PREFER_RENDER < 0; 3036 3037 if (kgem_bo_is_render(bo)) 3038 return false; 3039 3040 if (untiled_tlb_miss(bo)) 3041 return true; 3042 3043 if (!prefer_blt_ring(sna, bo, 0)) 3044 return false; 3045 3046 return prefer_blt_bo(sna, bo); 3047} 3048 3049static bool 3050gen6_render_fill_boxes(struct sna *sna, 3051 CARD8 op, 3052 PictFormat format, 3053 const xRenderColor *color, 3054 PixmapPtr dst, struct kgem_bo *dst_bo, 3055 const BoxRec *box, int n) 3056{ 3057 struct sna_composite_op tmp; 3058 uint32_t pixel; 3059 3060 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3061 __FUNCTION__, op, 3062 color->red, color->green, color->blue, color->alpha, (int)format)); 3063 3064 if (op >= ARRAY_SIZE(gen6_blend_op)) { 3065 DBG(("%s: fallback due to unhandled blend op: %d\n", 3066 __FUNCTION__, op)); 3067 return false; 3068 } 3069 3070 if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { 3071 uint8_t alu = GXinvalid; 3072 3073 if (op <= PictOpSrc) { 3074 pixel = 0; 3075 if (op == PictOpClear) 3076 alu = GXclear; 3077 else if (sna_get_pixel_from_rgba(&pixel, 3078 color->red, 3079 color->green, 3080 color->blue, 3081 color->alpha, 3082 format)) 3083 alu = GXcopy; 3084 } 3085 3086 if (alu != GXinvalid && 3087 sna_blt_fill_boxes(sna, alu, 3088 dst_bo, dst->drawable.bitsPerPixel, 3089 pixel, box, n)) 3090 return true; 3091 3092 if (!gen6_check_dst_format(format)) 3093 return false; 3094 } 3095 3096 if (op == PictOpClear) { 3097 pixel = 0; 3098 op = PictOpSrc; 3099 } else if (!sna_get_pixel_from_rgba(&pixel, 3100 color->red, 3101 color->green, 3102 color->blue, 3103 color->alpha, 3104 PICT_a8r8g8b8)) 3105 return false; 3106 3107 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3108 __FUNCTION__, pixel, n, 3109 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3110 3111 tmp.dst.pixmap = dst; 3112 tmp.dst.width = dst->drawable.width; 3113 tmp.dst.height = dst->drawable.height; 3114 tmp.dst.format = format; 3115 tmp.dst.bo = dst_bo; 3116 tmp.dst.x = tmp.dst.y = 0; 3117 tmp.damage = NULL; 3118 3119 sna_render_composite_redirect_init(&tmp); 3120 if (too_large(dst->drawable.width, dst->drawable.height)) { 3121 BoxRec extents; 3122 3123 boxes_extents(box, n, &extents); 3124 if (!sna_render_composite_redirect(sna, &tmp, 3125 extents.x1, extents.y1, 3126 extents.x2 - extents.x1, 3127 extents.y2 - extents.y1, 3128 n > 1)) 3129 return sna_tiling_fill_boxes(sna, op, format, color, 3130 dst, dst_bo, box, n); 3131 } 3132 3133 tmp.src.bo = sna_render_get_solid(sna, pixel); 3134 tmp.mask.bo = NULL; 3135 3136 tmp.floats_per_vertex = 2; 3137 tmp.floats_per_rect = 6; 3138 tmp.need_magic_ca_pass = false; 3139 3140 tmp.u.gen6.flags = FILL_FLAGS(op, format); 3141 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3142 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3143 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3144 3145 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3146 kgem_submit(&sna->kgem); 3147 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3148 } 3149 3150 gen6_emit_fill_state(sna, &tmp); 3151 gen6_align_vertex(sna, &tmp); 3152 3153 do { 3154 int n_this_time; 3155 int16_t *v; 3156 3157 n_this_time = gen6_get_rectangles(sna, &tmp, n, 3158 gen6_emit_fill_state); 3159 n -= n_this_time; 3160 3161 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3162 sna->render.vertex_used += 6 * n_this_time; 3163 assert(sna->render.vertex_used <= sna->render.vertex_size); 3164 do { 3165 DBG((" (%d, %d), (%d, %d)\n", 3166 box->x1, box->y1, box->x2, box->y2)); 3167 3168 v[0] = box->x2; 3169 v[5] = v[1] = box->y2; 3170 v[8] = v[4] = box->x1; 3171 v[9] = box->y1; 3172 v[2] = v[3] = v[7] = 1; 3173 v[6] = v[10] = v[11] = 0; 3174 v += 12; box++; 3175 } while (--n_this_time); 3176 } while (n); 3177 3178 gen4_vertex_flush(sna); 3179 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3180 sna_render_composite_redirect_done(sna, &tmp); 3181 return true; 3182} 3183 3184static void 3185gen6_render_op_fill_blt(struct sna *sna, 3186 const struct sna_fill_op *op, 3187 int16_t x, int16_t y, int16_t w, int16_t h) 3188{ 3189 int16_t *v; 3190 3191 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3192 3193 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3194 3195 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3196 sna->render.vertex_used += 6; 3197 assert(sna->render.vertex_used <= sna->render.vertex_size); 3198 3199 v[0] = x+w; 3200 v[4] = v[8] = x; 3201 v[1] = v[5] = y+h; 3202 v[9] = y; 3203 3204 v[2] = v[3] = v[7] = 1; 3205 v[6] = v[10] = v[11] = 0; 3206} 3207 3208fastcall static void 3209gen6_render_op_fill_box(struct sna *sna, 3210 const struct sna_fill_op *op, 3211 const BoxRec *box) 3212{ 3213 int16_t *v; 3214 3215 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3216 box->x1, box->y1, box->x2, box->y2)); 3217 3218 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3219 3220 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3221 sna->render.vertex_used += 6; 3222 assert(sna->render.vertex_used <= sna->render.vertex_size); 3223 3224 v[0] = box->x2; 3225 v[8] = v[4] = box->x1; 3226 v[5] = v[1] = box->y2; 3227 v[9] = box->y1; 3228 3229 v[7] = v[2] = v[3] = 1; 3230 v[6] = v[10] = v[11] = 0; 3231} 3232 3233fastcall static void 3234gen6_render_op_fill_boxes(struct sna *sna, 3235 const struct sna_fill_op *op, 3236 const BoxRec *box, 3237 int nbox) 3238{ 3239 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3240 box->x1, box->y1, box->x2, box->y2, nbox)); 3241 3242 do { 3243 int nbox_this_time; 3244 int16_t *v; 3245 3246 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 3247 gen6_emit_fill_state); 3248 nbox -= nbox_this_time; 3249 3250 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3251 sna->render.vertex_used += 6 * nbox_this_time; 3252 assert(sna->render.vertex_used <= sna->render.vertex_size); 3253 3254 do { 3255 v[0] = box->x2; 3256 v[8] = v[4] = box->x1; 3257 v[5] = v[1] = box->y2; 3258 v[9] = box->y1; 3259 v[7] = v[2] = v[3] = 1; 3260 v[6] = v[10] = v[11] = 0; 3261 box++; v += 12; 3262 } while (--nbox_this_time); 3263 } while (nbox); 3264} 3265 3266static void 3267gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) 3268{ 3269 DBG(("%s()\n", __FUNCTION__)); 3270 3271 assert(!sna->render.active); 3272 if (sna->render.vertex_offset) 3273 gen4_vertex_flush(sna); 3274 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3275} 3276 3277static bool 3278gen6_render_fill(struct sna *sna, uint8_t alu, 3279 PixmapPtr dst, struct kgem_bo *dst_bo, 3280 uint32_t color, 3281 struct sna_fill_op *op) 3282{ 3283 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3284 3285 if (prefer_blt_fill(sna, dst_bo) && 3286 sna_blt_fill(sna, alu, 3287 dst_bo, dst->drawable.bitsPerPixel, 3288 color, 3289 op)) 3290 return true; 3291 3292 if (!(alu == GXcopy || alu == GXclear) || 3293 too_large(dst->drawable.width, dst->drawable.height)) 3294 return sna_blt_fill(sna, alu, 3295 dst_bo, dst->drawable.bitsPerPixel, 3296 color, 3297 op); 3298 3299 if (alu == GXclear) 3300 color = 0; 3301 3302 op->base.dst.pixmap = dst; 3303 op->base.dst.width = dst->drawable.width; 3304 op->base.dst.height = dst->drawable.height; 3305 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3306 op->base.dst.bo = dst_bo; 3307 op->base.dst.x = op->base.dst.y = 0; 3308 3309 op->base.src.bo = 3310 sna_render_get_solid(sna, 3311 sna_rgba_for_color(color, 3312 dst->drawable.depth)); 3313 op->base.mask.bo = NULL; 3314 3315 op->base.need_magic_ca_pass = false; 3316 op->base.floats_per_vertex = 2; 3317 op->base.floats_per_rect = 6; 3318 3319 op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; 3320 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3321 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); 3322 assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); 3323 3324 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3325 kgem_submit(&sna->kgem); 3326 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3327 } 3328 3329 gen6_emit_fill_state(sna, &op->base); 3330 gen6_align_vertex(sna, &op->base); 3331 3332 op->blt = gen6_render_op_fill_blt; 3333 op->box = gen6_render_op_fill_box; 3334 op->boxes = gen6_render_op_fill_boxes; 3335 op->done = gen6_render_op_fill_done; 3336 return true; 3337} 3338 3339static bool 3340gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3341 uint32_t color, 3342 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3343 uint8_t alu) 3344{ 3345 BoxRec box; 3346 3347 box.x1 = x1; 3348 box.y1 = y1; 3349 box.x2 = x2; 3350 box.y2 = y2; 3351 3352 return sna_blt_fill_boxes(sna, alu, 3353 bo, dst->drawable.bitsPerPixel, 3354 color, &box, 1); 3355} 3356 3357static bool 3358gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3359 uint32_t color, 3360 int16_t x1, int16_t y1, 3361 int16_t x2, int16_t y2, 3362 uint8_t alu) 3363{ 3364 struct sna_composite_op tmp; 3365 int16_t *v; 3366 3367 /* Prefer to use the BLT if already engaged */ 3368 if (prefer_blt_fill(sna, bo) && 3369 gen6_render_fill_one_try_blt(sna, dst, bo, color, 3370 x1, y1, x2, y2, alu)) 3371 return true; 3372 3373 /* Must use the BLT if we can't RENDER... */ 3374 if (!(alu == GXcopy || alu == GXclear) || 3375 too_large(dst->drawable.width, dst->drawable.height)) 3376 return gen6_render_fill_one_try_blt(sna, dst, bo, color, 3377 x1, y1, x2, y2, alu); 3378 3379 if (alu == GXclear) 3380 color = 0; 3381 3382 tmp.dst.pixmap = dst; 3383 tmp.dst.width = dst->drawable.width; 3384 tmp.dst.height = dst->drawable.height; 3385 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3386 tmp.dst.bo = bo; 3387 tmp.dst.x = tmp.dst.y = 0; 3388 3389 tmp.src.bo = 3390 sna_render_get_solid(sna, 3391 sna_rgba_for_color(color, 3392 dst->drawable.depth)); 3393 tmp.mask.bo = NULL; 3394 3395 tmp.floats_per_vertex = 2; 3396 tmp.floats_per_rect = 6; 3397 tmp.need_magic_ca_pass = false; 3398 3399 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3400 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3401 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3402 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3403 3404 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3405 kgem_submit(&sna->kgem); 3406 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3407 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3408 return false; 3409 } 3410 } 3411 3412 gen6_emit_fill_state(sna, &tmp); 3413 gen6_align_vertex(sna, &tmp); 3414 3415 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3416 3417 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3418 3419 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3420 sna->render.vertex_used += 6; 3421 assert(sna->render.vertex_used <= sna->render.vertex_size); 3422 3423 v[0] = x2; 3424 v[8] = v[4] = x1; 3425 v[5] = v[1] = y2; 3426 v[9] = y1; 3427 v[7] = v[2] = v[3] = 1; 3428 v[6] = v[10] = v[11] = 0; 3429 3430 gen4_vertex_flush(sna); 3431 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3432 3433 return true; 3434} 3435 3436static bool 3437gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3438{ 3439 BoxRec box; 3440 3441 box.x1 = 0; 3442 box.y1 = 0; 3443 box.x2 = dst->drawable.width; 3444 box.y2 = dst->drawable.height; 3445 3446 return sna_blt_fill_boxes(sna, GXclear, 3447 bo, dst->drawable.bitsPerPixel, 3448 0, &box, 1); 3449} 3450 3451static bool 3452gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3453{ 3454 struct sna_composite_op tmp; 3455 int16_t *v; 3456 3457 DBG(("%s: %dx%d\n", 3458 __FUNCTION__, 3459 dst->drawable.width, 3460 dst->drawable.height)); 3461 3462 /* Prefer to use the BLT if, and only if, already engaged */ 3463 if (sna->kgem.ring == KGEM_BLT && 3464 gen6_render_clear_try_blt(sna, dst, bo)) 3465 return true; 3466 3467 /* Must use the BLT if we can't RENDER... */ 3468 if (too_large(dst->drawable.width, dst->drawable.height)) 3469 return gen6_render_clear_try_blt(sna, dst, bo); 3470 3471 tmp.dst.pixmap = dst; 3472 tmp.dst.width = dst->drawable.width; 3473 tmp.dst.height = dst->drawable.height; 3474 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3475 tmp.dst.bo = bo; 3476 tmp.dst.x = tmp.dst.y = 0; 3477 3478 tmp.src.bo = sna_render_get_solid(sna, 0); 3479 tmp.mask.bo = NULL; 3480 3481 tmp.floats_per_vertex = 2; 3482 tmp.floats_per_rect = 6; 3483 tmp.need_magic_ca_pass = false; 3484 3485 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3486 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3487 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3488 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3489 3490 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3491 kgem_submit(&sna->kgem); 3492 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3493 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3494 return false; 3495 } 3496 } 3497 3498 gen6_emit_fill_state(sna, &tmp); 3499 gen6_align_vertex(sna, &tmp); 3500 3501 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3502 3503 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3504 sna->render.vertex_used += 6; 3505 assert(sna->render.vertex_used <= sna->render.vertex_size); 3506 3507 v[0] = dst->drawable.width; 3508 v[5] = v[1] = dst->drawable.height; 3509 v[8] = v[4] = 0; 3510 v[9] = 0; 3511 3512 v[7] = v[2] = v[3] = 1; 3513 v[6] = v[10] = v[11] = 0; 3514 3515 gen4_vertex_flush(sna); 3516 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3517 3518 return true; 3519} 3520 3521static void gen6_render_flush(struct sna *sna) 3522{ 3523 gen4_vertex_close(sna); 3524 3525 assert(sna->render.vb_id == 0); 3526 assert(sna->render.vertex_offset == 0); 3527} 3528 3529static void 3530gen6_render_context_switch(struct kgem *kgem, 3531 int new_mode) 3532{ 3533 if (kgem->nbatch) { 3534 DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); 3535 _kgem_submit(kgem); 3536 } 3537 3538 kgem->ring = new_mode; 3539} 3540 3541static void 3542gen6_render_retire(struct kgem *kgem) 3543{ 3544 struct sna *sna; 3545 3546 if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) 3547 kgem->ring = kgem->mode; 3548 3549 sna = container_of(kgem, struct sna, kgem); 3550 if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { 3551 DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); 3552 sna->render.vertex_used = 0; 3553 sna->render.vertex_index = 0; 3554 } 3555} 3556 3557static void 3558gen6_render_expire(struct kgem *kgem) 3559{ 3560 struct sna *sna; 3561 3562 sna = container_of(kgem, struct sna, kgem); 3563 if (sna->render.vbo && !sna->render.vertex_used) { 3564 DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); 3565 kgem_bo_destroy(kgem, sna->render.vbo); 3566 assert(!sna->render.active); 3567 sna->render.vbo = NULL; 3568 sna->render.vertices = sna->render.vertex_data; 3569 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 3570 sna->render.vertex_used = 0; 3571 sna->render.vertex_index = 0; 3572 } 3573} 3574 3575static void gen6_render_reset(struct sna *sna) 3576{ 3577 sna->render_state.gen6.needs_invariant = true; 3578 sna->render_state.gen6.first_state_packet = true; 3579 sna->render_state.gen6.ve_id = 3 << 2; 3580 sna->render_state.gen6.last_primitive = -1; 3581 3582 sna->render_state.gen6.num_sf_outputs = 0; 3583 sna->render_state.gen6.samplers = -1; 3584 sna->render_state.gen6.blend = -1; 3585 sna->render_state.gen6.kernel = -1; 3586 sna->render_state.gen6.drawrect_offset = -1; 3587 sna->render_state.gen6.drawrect_limit = -1; 3588 sna->render_state.gen6.surface_table = -1; 3589 3590 sna->render.vertex_offset = 0; 3591 sna->render.nvertex_reloc = 0; 3592 sna->render.vb_id = 0; 3593} 3594 3595static void gen6_render_fini(struct sna *sna) 3596{ 3597 kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); 3598} 3599 3600static bool is_gt2(struct sna *sna) 3601{ 3602 return sna->PciInfo->device_id & 0x30; 3603} 3604 3605static bool is_mobile(struct sna *sna) 3606{ 3607 return (sna->PciInfo->device_id & 0xf) == 0x6; 3608} 3609 3610static bool gen6_render_setup(struct sna *sna) 3611{ 3612 struct gen6_render_state *state = &sna->render_state.gen6; 3613 struct sna_static_stream general; 3614 struct gen6_sampler_state *ss; 3615 int i, j, k, l, m; 3616 3617 state->info = >1_info; 3618 if (is_gt2(sna)) 3619 state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ 3620 3621 sna_static_stream_init(&general); 3622 3623 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3624 * dumps, you know it points to zero. 3625 */ 3626 null_create(&general); 3627 scratch_create(&general); 3628 3629 for (m = 0; m < GEN6_KERNEL_COUNT; m++) { 3630 if (wm_kernels[m].size) { 3631 state->wm_kernel[m][1] = 3632 sna_static_stream_add(&general, 3633 wm_kernels[m].data, 3634 wm_kernels[m].size, 3635 64); 3636 } else { 3637 if (USE_8_PIXEL_DISPATCH) { 3638 state->wm_kernel[m][0] = 3639 sna_static_stream_compile_wm(sna, &general, 3640 wm_kernels[m].data, 8); 3641 } 3642 3643 if (USE_16_PIXEL_DISPATCH) { 3644 state->wm_kernel[m][1] = 3645 sna_static_stream_compile_wm(sna, &general, 3646 wm_kernels[m].data, 16); 3647 } 3648 3649 if (USE_32_PIXEL_DISPATCH) { 3650 state->wm_kernel[m][2] = 3651 sna_static_stream_compile_wm(sna, &general, 3652 wm_kernels[m].data, 32); 3653 } 3654 } 3655 if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { 3656 state->wm_kernel[m][1] = 3657 sna_static_stream_compile_wm(sna, &general, 3658 wm_kernels[m].data, 16); 3659 } 3660 } 3661 3662 ss = sna_static_stream_map(&general, 3663 2 * sizeof(*ss) * 3664 (2 + 3665 FILTER_COUNT * EXTEND_COUNT * 3666 FILTER_COUNT * EXTEND_COUNT), 3667 32); 3668 state->wm_state = sna_static_stream_offsetof(&general, ss); 3669 sampler_copy_init(ss); ss += 2; 3670 sampler_fill_init(ss); ss += 2; 3671 for (i = 0; i < FILTER_COUNT; i++) { 3672 for (j = 0; j < EXTEND_COUNT; j++) { 3673 for (k = 0; k < FILTER_COUNT; k++) { 3674 for (l = 0; l < EXTEND_COUNT; l++) { 3675 sampler_state_init(ss++, i, j); 3676 sampler_state_init(ss++, k, l); 3677 } 3678 } 3679 } 3680 } 3681 3682 state->cc_blend = gen6_composite_create_blend_state(&general); 3683 3684 state->general_bo = sna_static_stream_fini(sna, &general); 3685 return state->general_bo != NULL; 3686} 3687 3688const char *gen6_render_init(struct sna *sna, const char *backend) 3689{ 3690 if (!gen6_render_setup(sna)) 3691 return backend; 3692 3693 sna->kgem.context_switch = gen6_render_context_switch; 3694 sna->kgem.retire = gen6_render_retire; 3695 sna->kgem.expire = gen6_render_expire; 3696 3697#if !NO_COMPOSITE 3698 sna->render.composite = gen6_render_composite; 3699 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3700#endif 3701 3702#if !NO_COMPOSITE_SPANS 3703 sna->render.check_composite_spans = gen6_check_composite_spans; 3704 sna->render.composite_spans = gen6_render_composite_spans; 3705 if (is_mobile(sna)) 3706 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3707#endif 3708 sna->render.video = gen6_render_video; 3709 3710#if !NO_COPY_BOXES 3711 sna->render.copy_boxes = gen6_render_copy_boxes; 3712#endif 3713#if !NO_COPY 3714 sna->render.copy = gen6_render_copy; 3715#endif 3716 3717#if !NO_FILL_BOXES 3718 sna->render.fill_boxes = gen6_render_fill_boxes; 3719#endif 3720#if !NO_FILL 3721 sna->render.fill = gen6_render_fill; 3722#endif 3723#if !NO_FILL_ONE 3724 sna->render.fill_one = gen6_render_fill_one; 3725#endif 3726#if !NO_FILL_CLEAR 3727 sna->render.clear = gen6_render_clear; 3728#endif 3729 3730 sna->render.flush = gen6_render_flush; 3731 sna->render.reset = gen6_render_reset; 3732 sna->render.fini = gen6_render_fini; 3733 3734 sna->render.max_3d_size = GEN6_MAX_SIZE; 3735 sna->render.max_3d_pitch = 1 << 18; 3736 return sna->render_state.gen6.info->name; 3737} 3738