1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen6_render.h" 45#include "gen6_common.h" 46#include "gen4_common.h" 47#include "gen4_source.h" 48#include "gen4_vertex.h" 49 50#define ALWAYS_INVALIDATE 0 51#define ALWAYS_FLUSH 0 52#define ALWAYS_STALL 0 53 54#define NO_COMPOSITE 0 55#define NO_COMPOSITE_SPANS 0 56#define NO_COPY 0 57#define NO_COPY_BOXES 0 58#define NO_FILL 0 59#define NO_FILL_BOXES 0 60#define NO_FILL_ONE 0 61#define NO_FILL_CLEAR 0 62 63#define USE_8_PIXEL_DISPATCH 1 64#define USE_16_PIXEL_DISPATCH 1 65#define USE_32_PIXEL_DISPATCH 0 66 67#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 68#error "Must select at least 8, 16 or 32 pixel dispatch" 69#endif 70 71#define GEN6_MAX_SIZE 8192 72 73struct gt_info { 74 const char *name; 75 int max_vs_threads; 76 int max_gs_threads; 77 int max_wm_threads; 78 struct { 79 int size; 80 int max_vs_entries; 81 int max_gs_entries; 82 } urb; 83 int gt; 84}; 85 86static const struct gt_info gt1_info = { 87 .name = "Sandybridge (gen6, gt1)", 88 .max_vs_threads = 24, 89 .max_gs_threads = 21, 90 .max_wm_threads = 40, 91 .urb = { 32, 256, 256 }, 92 .gt = 1, 93}; 94 95static const struct gt_info gt2_info = { 96 .name = "Sandybridge (gen6, gt2)", 97 .max_vs_threads = 60, 98 .max_gs_threads = 60, 99 .max_wm_threads = 80, 100 .urb = { 64, 256, 256 }, 101 .gt = 2, 102}; 103 104static const uint32_t ps_kernel_packed[][4] = { 105#include "exa_wm_src_affine.g6b" 106#include "exa_wm_src_sample_argb.g6b" 107#include "exa_wm_yuv_rgb.g6b" 108#include "exa_wm_write.g6b" 109}; 110 111static const uint32_t ps_kernel_planar[][4] = { 112#include "exa_wm_src_affine.g6b" 113#include "exa_wm_src_sample_planar.g6b" 114#include "exa_wm_yuv_rgb.g6b" 115#include "exa_wm_write.g6b" 116}; 117 118#define NOKERNEL(kernel_enum, func, ns) \ 119 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} 120#define KERNEL(kernel_enum, kernel, ns) \ 121 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} 122 123static const struct wm_kernel_info { 124 const char *name; 125 const void *data; 126 unsigned int size; 127 unsigned int num_surfaces; 128} wm_kernels[] = { 129 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 130 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 131 132 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 133 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 134 135 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 136 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 137 138 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 139 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 140 141 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 142 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 143 144 KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 145 KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 146}; 147#undef KERNEL 148 149static const struct blendinfo { 150 bool src_alpha; 151 uint32_t src_blend; 152 uint32_t dst_blend; 153} gen6_blend_op[] = { 154 /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, 155 /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, 156 /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, 157 /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 158 /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, 159 /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 160 /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, 161 /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 162 /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 163 /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 164 /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, 165 /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 166 /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, 167}; 168 169/** 170 * Highest-valued BLENDFACTOR used in gen6_blend_op. 171 * 172 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, 173 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 174 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 175 */ 176#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) 177 178#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 179 180#define BLEND_OFFSET(s, d) \ 181 (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) 182 183#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) 184#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) 185 186#define SAMPLER_OFFSET(sf, se, mf, me) \ 187 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) 188 189#define VERTEX_2s2s 0 190 191#define COPY_SAMPLER 0 192#define COPY_VERTEX VERTEX_2s2s 193#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) 194 195#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) 196#define FILL_VERTEX VERTEX_2s2s 197#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 198#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 199 200#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) 201#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) 202#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) 203#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) 204#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 205 206#define OUT_BATCH(v) batch_emit(sna, v) 207#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 208#define OUT_VERTEX_F(v) vertex_emit(sna, v) 209 210static inline bool too_large(int width, int height) 211{ 212 return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; 213} 214 215static uint32_t gen6_get_blend(int op, 216 bool has_component_alpha, 217 uint32_t dst_format) 218{ 219 uint32_t src, dst; 220 221 src = gen6_blend_op[op].src_blend; 222 dst = gen6_blend_op[op].dst_blend; 223 224 /* If there's no dst alpha channel, adjust the blend op so that 225 * we'll treat it always as 1. 226 */ 227 if (PICT_FORMAT_A(dst_format) == 0) { 228 if (src == GEN6_BLENDFACTOR_DST_ALPHA) 229 src = GEN6_BLENDFACTOR_ONE; 230 else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) 231 src = GEN6_BLENDFACTOR_ZERO; 232 } 233 234 /* If the source alpha is being used, then we should only be in a 235 * case where the source blend factor is 0, and the source blend 236 * value is the mask channels multiplied by the source picture's alpha. 237 */ 238 if (has_component_alpha && gen6_blend_op[op].src_alpha) { 239 if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) 240 dst = GEN6_BLENDFACTOR_SRC_COLOR; 241 else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) 242 dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; 243 } 244 245 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 246 op, dst_format, PICT_FORMAT_A(dst_format), 247 src, dst, (int)BLEND_OFFSET(src, dst))); 248 return BLEND_OFFSET(src, dst); 249} 250 251static uint32_t gen6_get_card_format(PictFormat format) 252{ 253 switch (format) { 254 default: 255 return -1; 256 case PICT_a8r8g8b8: 257 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 258 case PICT_x8r8g8b8: 259 return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; 260 case PICT_a8b8g8r8: 261 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 262 case PICT_x8b8g8r8: 263 return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; 264#ifdef PICT_a2r10g10b10 265 case PICT_a2r10g10b10: 266 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 267 case PICT_x2r10g10b10: 268 return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; 269#endif 270 case PICT_r8g8b8: 271 return GEN6_SURFACEFORMAT_R8G8B8_UNORM; 272 case PICT_r5g6b5: 273 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 274 case PICT_a1r5g5b5: 275 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 276 case PICT_a8: 277 return GEN6_SURFACEFORMAT_A8_UNORM; 278 case PICT_a4r4g4b4: 279 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 280 } 281} 282 283static uint32_t gen6_get_dest_format(PictFormat format) 284{ 285 switch (format) { 286 default: 287 return -1; 288 case PICT_a8r8g8b8: 289 case PICT_x8r8g8b8: 290 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 291 case PICT_a8b8g8r8: 292 case PICT_x8b8g8r8: 293 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 294#ifdef PICT_a2r10g10b10 295 case PICT_a2r10g10b10: 296 case PICT_x2r10g10b10: 297 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 298#endif 299 case PICT_r5g6b5: 300 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 301 case PICT_x1r5g5b5: 302 case PICT_a1r5g5b5: 303 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 304 case PICT_a8: 305 return GEN6_SURFACEFORMAT_A8_UNORM; 306 case PICT_a4r4g4b4: 307 case PICT_x4r4g4b4: 308 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 309 } 310} 311 312static bool gen6_check_dst_format(PictFormat format) 313{ 314 if (gen6_get_dest_format(format) != -1) 315 return true; 316 317 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 318 return false; 319} 320 321static bool gen6_check_format(uint32_t format) 322{ 323 if (gen6_get_card_format(format) != -1) 324 return true; 325 326 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 327 return false; 328} 329 330static uint32_t gen6_filter(uint32_t filter) 331{ 332 switch (filter) { 333 default: 334 assert(0); 335 case PictFilterNearest: 336 return SAMPLER_FILTER_NEAREST; 337 case PictFilterBilinear: 338 return SAMPLER_FILTER_BILINEAR; 339 } 340} 341 342static uint32_t gen6_check_filter(PicturePtr picture) 343{ 344 switch (picture->filter) { 345 case PictFilterNearest: 346 case PictFilterBilinear: 347 return true; 348 default: 349 return false; 350 } 351} 352 353static uint32_t gen6_repeat(uint32_t repeat) 354{ 355 switch (repeat) { 356 default: 357 assert(0); 358 case RepeatNone: 359 return SAMPLER_EXTEND_NONE; 360 case RepeatNormal: 361 return SAMPLER_EXTEND_REPEAT; 362 case RepeatPad: 363 return SAMPLER_EXTEND_PAD; 364 case RepeatReflect: 365 return SAMPLER_EXTEND_REFLECT; 366 } 367} 368 369static bool gen6_check_repeat(PicturePtr picture) 370{ 371 if (!picture->repeat) 372 return true; 373 374 switch (picture->repeatType) { 375 case RepeatNone: 376 case RepeatNormal: 377 case RepeatPad: 378 case RepeatReflect: 379 return true; 380 default: 381 return false; 382 } 383} 384 385static int 386gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 387{ 388 int base; 389 390 if (has_mask) { 391 if (is_ca) { 392 if (gen6_blend_op[op].src_alpha) 393 base = GEN6_WM_KERNEL_MASKSA; 394 else 395 base = GEN6_WM_KERNEL_MASKCA; 396 } else 397 base = GEN6_WM_KERNEL_MASK; 398 } else 399 base = GEN6_WM_KERNEL_NOMASK; 400 401 return base + !is_affine; 402} 403 404inline static void 405gen6_emit_pipe_invalidate(struct sna *sna) 406{ 407 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 408 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | 409 GEN6_PIPE_CONTROL_TC_FLUSH | 410 GEN6_PIPE_CONTROL_CS_STALL); 411 OUT_BATCH(0); 412 OUT_BATCH(0); 413} 414 415inline static void 416gen6_emit_pipe_flush(struct sna *sna, bool need_stall) 417{ 418 unsigned stall; 419 420 stall = 0; 421 if (need_stall) 422 stall = GEN6_PIPE_CONTROL_CS_STALL; 423 424 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 425 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | stall); 426 OUT_BATCH(0); 427 OUT_BATCH(0); 428} 429 430inline static void 431gen6_emit_pipe_stall(struct sna *sna) 432{ 433 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 434 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 435 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 436 OUT_BATCH(0); 437 OUT_BATCH(0); 438} 439 440static void 441gen6_emit_urb(struct sna *sna) 442{ 443 OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); 444 OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | 445 (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ 446 OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | 447 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ 448} 449 450static void 451gen6_emit_state_base_address(struct sna *sna) 452{ 453 OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); 454 OUT_BATCH(0); /* general */ 455 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 456 sna->kgem.nbatch, 457 NULL, 458 I915_GEM_DOMAIN_INSTRUCTION << 16, 459 BASE_ADDRESS_MODIFY)); 460 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 461 sna->kgem.nbatch, 462 sna->render_state.gen6.general_bo, 463 I915_GEM_DOMAIN_INSTRUCTION << 16, 464 BASE_ADDRESS_MODIFY)); 465 OUT_BATCH(0); /* indirect */ 466 OUT_BATCH(kgem_add_reloc(&sna->kgem, 467 sna->kgem.nbatch, 468 sna->render_state.gen6.general_bo, 469 I915_GEM_DOMAIN_INSTRUCTION << 16, 470 BASE_ADDRESS_MODIFY)); 471 472 /* upper bounds, disable */ 473 OUT_BATCH(0); 474 OUT_BATCH(BASE_ADDRESS_MODIFY); 475 OUT_BATCH(0); 476 OUT_BATCH(BASE_ADDRESS_MODIFY); 477} 478 479static void 480gen6_emit_viewports(struct sna *sna) 481{ 482 OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | 483 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | 484 (4 - 2)); 485 OUT_BATCH(0); 486 OUT_BATCH(0); 487 OUT_BATCH(0); 488} 489 490static void 491gen6_emit_vs(struct sna *sna) 492{ 493 /* disable VS constant buffer */ 494 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); 495 OUT_BATCH(0); 496 OUT_BATCH(0); 497 OUT_BATCH(0); 498 OUT_BATCH(0); 499 500 OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); 501 OUT_BATCH(0); /* no VS kernel */ 502 OUT_BATCH(0); 503 OUT_BATCH(0); 504 OUT_BATCH(0); 505 OUT_BATCH(0); /* pass-through */ 506} 507 508static void 509gen6_emit_gs(struct sna *sna) 510{ 511 /* disable GS constant buffer */ 512 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); 513 OUT_BATCH(0); 514 OUT_BATCH(0); 515 OUT_BATCH(0); 516 OUT_BATCH(0); 517 518 OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); 519 OUT_BATCH(0); /* no GS kernel */ 520 OUT_BATCH(0); 521 OUT_BATCH(0); 522 OUT_BATCH(0); 523 OUT_BATCH(0); 524 OUT_BATCH(0); /* pass-through */ 525} 526 527static void 528gen6_emit_clip(struct sna *sna) 529{ 530 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); 531 OUT_BATCH(0); 532 OUT_BATCH(0); /* pass-through */ 533 OUT_BATCH(0); 534} 535 536static void 537gen6_emit_wm_constants(struct sna *sna) 538{ 539 /* disable WM constant buffer */ 540 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 541 OUT_BATCH(0); 542 OUT_BATCH(0); 543 OUT_BATCH(0); 544 OUT_BATCH(0); 545} 546 547static void 548gen6_emit_null_depth_buffer(struct sna *sna) 549{ 550 OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); 551 OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 552 GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 553 OUT_BATCH(0); 554 OUT_BATCH(0); 555 OUT_BATCH(0); 556 OUT_BATCH(0); 557 OUT_BATCH(0); 558 559 OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); 560 OUT_BATCH(0); 561} 562 563static void 564gen6_emit_invariant(struct sna *sna) 565{ 566 OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); 567 568 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); 569 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 570 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 571 OUT_BATCH(0); 572 573 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); 574 OUT_BATCH(1); 575 576 gen6_emit_urb(sna); 577 578 gen6_emit_state_base_address(sna); 579 580 gen6_emit_viewports(sna); 581 gen6_emit_vs(sna); 582 gen6_emit_gs(sna); 583 gen6_emit_clip(sna); 584 gen6_emit_wm_constants(sna); 585 gen6_emit_null_depth_buffer(sna); 586 587 sna->render_state.gen6.needs_invariant = false; 588} 589 590static void 591gen6_emit_cc(struct sna *sna, int blend) 592{ 593 struct gen6_render_state *render = &sna->render_state.gen6; 594 595 if (render->blend == blend) 596 return; 597 598 DBG(("%s: blend = %x\n", __FUNCTION__, blend)); 599 600 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); 601 OUT_BATCH((render->cc_blend + blend) | 1); 602 if (render->blend == (unsigned)-1) { 603 OUT_BATCH(1); 604 OUT_BATCH(1); 605 } else { 606 OUT_BATCH(0); 607 OUT_BATCH(0); 608 } 609 610 render->blend = blend; 611} 612 613static void 614gen6_emit_sampler(struct sna *sna, uint32_t state) 615{ 616 if (sna->render_state.gen6.samplers == state) 617 return; 618 619 sna->render_state.gen6.samplers = state; 620 621 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 622 623 OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | 624 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | 625 (4 - 2)); 626 OUT_BATCH(0); /* VS */ 627 OUT_BATCH(0); /* GS */ 628 OUT_BATCH(sna->render_state.gen6.wm_state + state); 629} 630 631static void 632gen6_emit_sf(struct sna *sna, bool has_mask) 633{ 634 int num_sf_outputs = has_mask ? 2 : 1; 635 636 if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) 637 return; 638 639 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 640 __FUNCTION__, num_sf_outputs, 1, 0)); 641 642 sna->render_state.gen6.num_sf_outputs = num_sf_outputs; 643 644 OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); 645 OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | 646 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | 647 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); 648 OUT_BATCH(0); 649 OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); 650 OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ 651 OUT_BATCH(0); 652 OUT_BATCH(0); 653 OUT_BATCH(0); 654 OUT_BATCH(0); 655 OUT_BATCH(0); /* DW9 */ 656 OUT_BATCH(0); 657 OUT_BATCH(0); 658 OUT_BATCH(0); 659 OUT_BATCH(0); 660 OUT_BATCH(0); /* DW14 */ 661 OUT_BATCH(0); 662 OUT_BATCH(0); 663 OUT_BATCH(0); 664 OUT_BATCH(0); 665 OUT_BATCH(0); /* DW19 */ 666} 667 668static void 669gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) 670{ 671 const uint32_t *kernels; 672 673 if (sna->render_state.gen6.kernel == kernel) 674 return; 675 676 sna->render_state.gen6.kernel = kernel; 677 kernels = sna->render_state.gen6.wm_kernel[kernel]; 678 679 DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", 680 __FUNCTION__, 681 wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, 682 kernels[0], kernels[1], kernels[2])); 683 684 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 685 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 686 OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | 687 wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 688 OUT_BATCH(0); /* scratch space */ 689 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | 690 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | 691 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); 692 OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | 693 (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | 694 (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | 695 (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | 696 GEN6_3DSTATE_WM_DISPATCH_ENABLE); 697 OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | 698 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 699 OUT_BATCH(kernels[2]); 700 OUT_BATCH(kernels[1]); 701} 702 703static bool 704gen6_emit_binding_table(struct sna *sna, uint16_t offset) 705{ 706 if (sna->render_state.gen6.surface_table == offset) 707 return false; 708 709 /* Binding table pointers */ 710 OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | 711 GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | 712 (4 - 2)); 713 OUT_BATCH(0); /* vs */ 714 OUT_BATCH(0); /* gs */ 715 /* Only the PS uses the binding table */ 716 OUT_BATCH(offset*4); 717 718 sna->render_state.gen6.surface_table = offset; 719 return true; 720} 721 722static bool 723gen6_emit_drawing_rectangle(struct sna *sna, 724 const struct sna_composite_op *op) 725{ 726 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 727 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 728 729 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 730 assert(!too_large(op->dst.width, op->dst.height)); 731 732 if (sna->render_state.gen6.drawrect_limit == limit && 733 sna->render_state.gen6.drawrect_offset == offset) 734 return true; 735 736 /* [DevSNB-C+{W/A}] Before any depth stall flush (including those 737 * produced by non-pipelined state commands), software needs to first 738 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 739 * 0. 740 * 741 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 742 * BEFORE the pipe-control with a post-sync op and no write-cache 743 * flushes. 744 */ 745 if (!sna->render_state.gen6.first_state_packet) 746 gen6_emit_pipe_stall(sna); 747 748 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 749 OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); 750 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 751 sna->render_state.gen6.general_bo, 752 I915_GEM_DOMAIN_INSTRUCTION << 16 | 753 I915_GEM_DOMAIN_INSTRUCTION, 754 64)); 755 OUT_BATCH(0); 756 757 DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n", 758 __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.height)); 759 OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 760 OUT_BATCH(0); 761 OUT_BATCH(limit); 762 OUT_BATCH(offset); 763 764 sna->render_state.gen6.drawrect_offset = offset; 765 sna->render_state.gen6.drawrect_limit = limit; 766 return false; 767} 768 769static void 770gen6_emit_vertex_elements(struct sna *sna, 771 const struct sna_composite_op *op) 772{ 773 /* 774 * vertex data in vertex buffer 775 * position: (x, y) 776 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 777 * texture coordinate 1 if (has_mask is true): same as above 778 */ 779 struct gen6_render_state *render = &sna->render_state.gen6; 780 uint32_t src_format, dw; 781 int id = GEN6_VERTEX(op->u.gen6.flags); 782 bool has_mask; 783 784 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 785 786 if (render->ve_id == id) 787 return; 788 render->ve_id = id; 789 790 /* The VUE layout 791 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 792 * dword 4-7: position (x, y, 1.0, 1.0), 793 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 794 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 795 * 796 * dword 4-15 are fetched from vertex buffer 797 */ 798 has_mask = (id >> 2) != 0; 799 OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | 800 ((2 * (3 + has_mask)) + 1 - 2)); 801 802 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 803 GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | 804 0 << VE0_OFFSET_SHIFT); 805 OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 806 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 807 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 808 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); 809 810 /* x,y */ 811 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 812 GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 813 0 << VE0_OFFSET_SHIFT); 814 OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 815 GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 816 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 817 GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); 818 819 /* u0, v0, w0 */ 820 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 821 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 822 switch (id & 3) { 823 default: 824 assert(0); 825 case 0: 826 src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; 827 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 828 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 829 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 830 break; 831 case 1: 832 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 833 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 834 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 835 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 836 break; 837 case 2: 838 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 839 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 840 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 841 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 842 break; 843 case 3: 844 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 845 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 846 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 847 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 848 break; 849 } 850 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 851 src_format << VE0_FORMAT_SHIFT | 852 4 << VE0_OFFSET_SHIFT); 853 OUT_BATCH(dw); 854 855 /* u1, v1, w1 */ 856 if (has_mask) { 857 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 858 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 859 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 860 switch (id >> 2) { 861 case 1: 862 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 863 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 864 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 865 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 866 break; 867 default: 868 assert(0); 869 case 2: 870 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 871 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 872 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 873 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 874 break; 875 case 3: 876 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 877 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 878 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 879 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 880 break; 881 } 882 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 883 src_format << VE0_FORMAT_SHIFT | 884 offset << VE0_OFFSET_SHIFT); 885 OUT_BATCH(dw); 886 } 887} 888 889static void 890gen6_emit_state(struct sna *sna, 891 const struct sna_composite_op *op, 892 uint16_t wm_binding_table) 893{ 894 bool need_invalidate; 895 bool need_flush; 896 bool need_stall; 897 898 assert(op->dst.bo->exec); 899 900 need_flush = wm_binding_table & 1; 901 if (ALWAYS_FLUSH) 902 need_flush = true; 903 904 wm_binding_table &= ~1; 905 need_stall = sna->render_state.gen6.surface_table != wm_binding_table; 906 if (ALWAYS_STALL) 907 need_stall = true; 908 909 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 910 if (ALWAYS_INVALIDATE) 911 need_invalidate = true; 912 913 if (need_invalidate) { 914 gen6_emit_pipe_invalidate(sna); 915 kgem_clear_dirty(&sna->kgem); 916 assert(op->dst.bo->exec); 917 kgem_bo_mark_dirty(op->dst.bo); 918 919 need_flush = false; 920 need_stall = false; 921 sna->render_state.gen6.first_state_packet = true; 922 } 923 if (need_flush) { 924 gen6_emit_pipe_flush(sna, need_stall); 925 need_stall = false; 926 sna->render_state.gen6.first_state_packet = true; 927 } 928 929 need_stall &= gen6_emit_drawing_rectangle(sna, op); 930 if (need_stall) 931 gen6_emit_pipe_stall(sna); 932 933 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 934 gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); 935 gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); 936 gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); 937 gen6_emit_vertex_elements(sna, op); 938 gen6_emit_binding_table(sna, wm_binding_table); 939 940 sna->render_state.gen6.first_state_packet = false; 941} 942 943static bool gen6_magic_ca_pass(struct sna *sna, 944 const struct sna_composite_op *op) 945{ 946 struct gen6_render_state *state = &sna->render_state.gen6; 947 948 if (!op->need_magic_ca_pass) 949 return false; 950 951 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 952 sna->render.vertex_start, sna->render.vertex_index)); 953 954 gen6_emit_pipe_stall(sna); 955 956 gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); 957 gen6_emit_wm(sna, 958 gen6_choose_composite_kernel(PictOpAdd, 959 true, true, 960 op->is_affine), 961 true); 962 963 OUT_BATCH(GEN6_3DPRIMITIVE | 964 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 965 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 966 0 << 9 | 967 4); 968 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 969 OUT_BATCH(sna->render.vertex_start); 970 OUT_BATCH(1); /* single instance */ 971 OUT_BATCH(0); /* start instance location */ 972 OUT_BATCH(0); /* index buffer offset, ignored */ 973 974 state->last_primitive = sna->kgem.nbatch; 975 return true; 976} 977 978typedef struct gen6_surface_state_padded { 979 struct gen6_surface_state state; 980 char pad[32 - sizeof(struct gen6_surface_state)]; 981} gen6_surface_state_padded; 982 983static void null_create(struct sna_static_stream *stream) 984{ 985 /* A bunch of zeros useful for legacy border color and depth-stencil */ 986 sna_static_stream_map(stream, 64, 64); 987} 988 989static void scratch_create(struct sna_static_stream *stream) 990{ 991 /* 64 bytes of scratch space for random writes, such as 992 * the pipe-control w/a. 993 */ 994 sna_static_stream_map(stream, 64, 64); 995} 996 997static void 998sampler_state_init(struct gen6_sampler_state *sampler_state, 999 sampler_filter_t filter, 1000 sampler_extend_t extend) 1001{ 1002 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1003 1004 /* We use the legacy mode to get the semantics specified by 1005 * the Render extension. */ 1006 sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; 1007 1008 switch (filter) { 1009 default: 1010 case SAMPLER_FILTER_NEAREST: 1011 sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; 1012 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; 1013 break; 1014 case SAMPLER_FILTER_BILINEAR: 1015 sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; 1016 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; 1017 break; 1018 } 1019 1020 switch (extend) { 1021 default: 1022 case SAMPLER_EXTEND_NONE: 1023 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1024 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1025 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1026 break; 1027 case SAMPLER_EXTEND_REPEAT: 1028 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1029 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1030 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1031 break; 1032 case SAMPLER_EXTEND_PAD: 1033 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1034 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1035 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1036 break; 1037 case SAMPLER_EXTEND_REFLECT: 1038 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1039 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1040 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1041 break; 1042 } 1043} 1044 1045static void 1046sampler_copy_init(struct gen6_sampler_state *ss) 1047{ 1048 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1049 ss->ss3.non_normalized_coord = 1; 1050 1051 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1052} 1053 1054static void 1055sampler_fill_init(struct gen6_sampler_state *ss) 1056{ 1057 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1058 ss->ss3.non_normalized_coord = 1; 1059 1060 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1061} 1062 1063static uint32_t 1064gen6_tiling_bits(uint32_t tiling) 1065{ 1066 switch (tiling) { 1067 default: assert(0); 1068 case I915_TILING_NONE: return 0; 1069 case I915_TILING_X: return GEN6_SURFACE_TILED; 1070 case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; 1071 } 1072} 1073 1074/** 1075 * Sets up the common fields for a surface state buffer for the given 1076 * picture in the given surface state buffer. 1077 */ 1078static int 1079gen6_bind_bo(struct sna *sna, 1080 struct kgem_bo *bo, 1081 uint32_t width, 1082 uint32_t height, 1083 uint32_t format, 1084 bool is_dst) 1085{ 1086 uint32_t *ss; 1087 uint32_t domains; 1088 uint16_t offset; 1089 uint32_t is_scanout = is_dst && bo->scanout; 1090 1091 /* After the first bind, we manage the cache domains within the batch */ 1092 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1093 if (offset) { 1094 DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", 1095 offset, bo->handle, format, 1096 is_dst ? "render" : "sampler")); 1097 assert(offset >= sna->kgem.surface); 1098 if (is_dst) 1099 kgem_bo_mark_dirty(bo); 1100 return offset * sizeof(uint32_t); 1101 } 1102 1103 offset = sna->kgem.surface -= 1104 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1105 ss = sna->kgem.batch + offset; 1106 ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | 1107 GEN6_SURFACE_BLEND_ENABLED | 1108 format << GEN6_SURFACE_FORMAT_SHIFT); 1109 if (is_dst) { 1110 ss[0] |= GEN6_SURFACE_RC_READ_WRITE; 1111 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1112 } else 1113 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1114 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1115 ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | 1116 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); 1117 assert(bo->pitch <= (1 << 18)); 1118 ss[3] = (gen6_tiling_bits(bo->tiling) | 1119 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); 1120 ss[4] = 0; 1121 ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; 1122 1123 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1124 1125 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1126 offset, bo->handle, ss[1], 1127 format, width, height, bo->pitch, bo->tiling, 1128 domains & 0xffff ? "render" : "sampler")); 1129 1130 return offset * sizeof(uint32_t); 1131} 1132 1133static void gen6_emit_vertex_buffer(struct sna *sna, 1134 const struct sna_composite_op *op) 1135{ 1136 int id = GEN6_VERTEX(op->u.gen6.flags); 1137 1138 OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); 1139 OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 1140 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); 1141 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1142 OUT_BATCH(0); 1143 OUT_BATCH(~0); /* max address: disabled */ 1144 OUT_BATCH(0); 1145 1146 sna->render.vb_id |= 1 << id; 1147} 1148 1149static void gen6_emit_primitive(struct sna *sna) 1150{ 1151 if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { 1152 DBG(("%s: continuing previous primitive, start=%d, index=%d\n", 1153 __FUNCTION__, 1154 sna->render.vertex_start, 1155 sna->render.vertex_index)); 1156 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1157 return; 1158 } 1159 1160 OUT_BATCH(GEN6_3DPRIMITIVE | 1161 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1162 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1163 0 << 9 | 1164 4); 1165 sna->render.vertex_offset = sna->kgem.nbatch; 1166 OUT_BATCH(0); /* vertex count, to be filled in later */ 1167 OUT_BATCH(sna->render.vertex_index); 1168 OUT_BATCH(1); /* single instance */ 1169 OUT_BATCH(0); /* start instance location */ 1170 OUT_BATCH(0); /* index buffer offset, ignored */ 1171 sna->render.vertex_start = sna->render.vertex_index; 1172 DBG(("%s: started new primitive: index=%d\n", 1173 __FUNCTION__, sna->render.vertex_start)); 1174 1175 sna->render_state.gen6.last_primitive = sna->kgem.nbatch; 1176} 1177 1178static bool gen6_rectangle_begin(struct sna *sna, 1179 const struct sna_composite_op *op) 1180{ 1181 int id = 1 << GEN6_VERTEX(op->u.gen6.flags); 1182 int ndwords; 1183 1184 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1185 return true; 1186 1187 ndwords = op->need_magic_ca_pass ? 60 : 6; 1188 if ((sna->render.vb_id & id) == 0) 1189 ndwords += 5; 1190 if (!kgem_check_batch(&sna->kgem, ndwords)) 1191 return false; 1192 1193 if ((sna->render.vb_id & id) == 0) 1194 gen6_emit_vertex_buffer(sna, op); 1195 1196 gen6_emit_primitive(sna); 1197 return true; 1198} 1199 1200static int gen6_get_rectangles__flush(struct sna *sna, 1201 const struct sna_composite_op *op) 1202{ 1203 /* Preventing discarding new vbo after lock contention */ 1204 if (sna_vertex_wait__locked(&sna->render)) { 1205 int rem = vertex_space(sna); 1206 if (rem > op->floats_per_rect) 1207 return rem; 1208 } 1209 1210 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) 1211 return 0; 1212 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1213 return 0; 1214 1215 if (sna->render.vertex_offset) { 1216 gen4_vertex_flush(sna); 1217 if (gen6_magic_ca_pass(sna, op)) { 1218 gen6_emit_pipe_stall(sna); 1219 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 1220 gen6_emit_wm(sna, 1221 GEN6_KERNEL(op->u.gen6.flags), 1222 GEN6_VERTEX(op->u.gen6.flags) >> 2); 1223 } 1224 } 1225 1226 return gen4_vertex_finish(sna); 1227} 1228 1229inline static int gen6_get_rectangles(struct sna *sna, 1230 const struct sna_composite_op *op, 1231 int want, 1232 void (*emit_state)(struct sna *, const struct sna_composite_op *op)) 1233{ 1234 int rem; 1235 1236 assert(want); 1237 1238start: 1239 rem = vertex_space(sna); 1240 if (unlikely(rem < op->floats_per_rect)) { 1241 DBG(("flushing vbo for %s: %d < %d\n", 1242 __FUNCTION__, rem, op->floats_per_rect)); 1243 rem = gen6_get_rectangles__flush(sna, op); 1244 if (unlikely(rem == 0)) 1245 goto flush; 1246 } 1247 1248 if (unlikely(sna->render.vertex_offset == 0)) { 1249 if (!gen6_rectangle_begin(sna, op)) 1250 goto flush; 1251 else 1252 goto start; 1253 } 1254 1255 assert(rem <= vertex_space(sna)); 1256 assert(op->floats_per_rect <= rem); 1257 if (want > 1 && want * op->floats_per_rect > rem) 1258 want = rem / op->floats_per_rect; 1259 1260 assert(want > 0); 1261 sna->render.vertex_index += 3*want; 1262 return want; 1263 1264flush: 1265 if (sna->render.vertex_offset) { 1266 gen4_vertex_flush(sna); 1267 gen6_magic_ca_pass(sna, op); 1268 } 1269 sna_vertex_wait__locked(&sna->render); 1270 _kgem_submit(&sna->kgem); 1271 emit_state(sna, op); 1272 goto start; 1273} 1274 1275inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, 1276 uint16_t *offset) 1277{ 1278 uint32_t *table; 1279 1280 sna->kgem.surface -= 1281 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1282 /* Clear all surplus entries to zero in case of prefetch */ 1283 table = memset(sna->kgem.batch + sna->kgem.surface, 1284 0, sizeof(struct gen6_surface_state_padded)); 1285 1286 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1287 1288 *offset = sna->kgem.surface; 1289 return table; 1290} 1291 1292static bool 1293gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) 1294{ 1295 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1296 1297 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1298 DBG(("%s: flushing batch: %d < %d+%d\n", 1299 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1300 150, 4*8)); 1301 kgem_submit(&sna->kgem); 1302 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1303 } 1304 1305 if (sna->render_state.gen6.needs_invariant) 1306 gen6_emit_invariant(sna); 1307 1308 return kgem_bo_is_dirty(op->dst.bo); 1309} 1310 1311static void gen6_emit_composite_state(struct sna *sna, 1312 const struct sna_composite_op *op) 1313{ 1314 uint32_t *binding_table; 1315 uint16_t offset; 1316 bool dirty; 1317 1318 dirty = gen6_get_batch(sna, op); 1319 1320 binding_table = gen6_composite_get_binding_table(sna, &offset); 1321 1322 binding_table[0] = 1323 gen6_bind_bo(sna, 1324 op->dst.bo, op->dst.width, op->dst.height, 1325 gen6_get_dest_format(op->dst.format), 1326 true); 1327 binding_table[1] = 1328 gen6_bind_bo(sna, 1329 op->src.bo, op->src.width, op->src.height, 1330 op->src.card_format, 1331 false); 1332 if (op->mask.bo) { 1333 binding_table[2] = 1334 gen6_bind_bo(sna, 1335 op->mask.bo, 1336 op->mask.width, 1337 op->mask.height, 1338 op->mask.card_format, 1339 false); 1340 } 1341 1342 if (sna->kgem.surface == offset && 1343 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && 1344 (op->mask.bo == NULL || 1345 sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { 1346 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1347 offset = sna->render_state.gen6.surface_table; 1348 } 1349 1350 gen6_emit_state(sna, op, offset | dirty); 1351} 1352 1353static void 1354gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1355{ 1356 assert (sna->render.vertex_offset == 0); 1357 if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { 1358 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1359 sna->render_state.gen6.floats_per_vertex, 1360 op->floats_per_vertex)); 1361 gen4_vertex_align(sna, op); 1362 sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; 1363 } 1364 assert((sna->render.vertex_used % op->floats_per_vertex) == 0); 1365} 1366 1367fastcall static void 1368gen6_render_composite_blt(struct sna *sna, 1369 const struct sna_composite_op *op, 1370 const struct sna_composite_rectangles *r) 1371{ 1372 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1373 op->prim_emit(sna, op, r); 1374} 1375 1376fastcall static void 1377gen6_render_composite_box(struct sna *sna, 1378 const struct sna_composite_op *op, 1379 const BoxRec *box) 1380{ 1381 struct sna_composite_rectangles r; 1382 1383 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1384 1385 DBG((" %s: (%d, %d), (%d, %d)\n", 1386 __FUNCTION__, 1387 box->x1, box->y1, box->x2, box->y2)); 1388 1389 r.dst.x = box->x1; 1390 r.dst.y = box->y1; 1391 r.width = box->x2 - box->x1; 1392 r.height = box->y2 - box->y1; 1393 r.src = r.mask = r.dst; 1394 1395 op->prim_emit(sna, op, &r); 1396} 1397 1398static void 1399gen6_render_composite_boxes__blt(struct sna *sna, 1400 const struct sna_composite_op *op, 1401 const BoxRec *box, int nbox) 1402{ 1403 DBG(("composite_boxes(%d)\n", nbox)); 1404 1405 do { 1406 int nbox_this_time; 1407 1408 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1409 gen6_emit_composite_state); 1410 nbox -= nbox_this_time; 1411 1412 do { 1413 struct sna_composite_rectangles r; 1414 1415 DBG((" %s: (%d, %d), (%d, %d)\n", 1416 __FUNCTION__, 1417 box->x1, box->y1, box->x2, box->y2)); 1418 1419 r.dst.x = box->x1; 1420 r.dst.y = box->y1; 1421 r.width = box->x2 - box->x1; 1422 r.height = box->y2 - box->y1; 1423 r.src = r.mask = r.dst; 1424 1425 op->prim_emit(sna, op, &r); 1426 box++; 1427 } while (--nbox_this_time); 1428 } while (nbox); 1429} 1430 1431static void 1432gen6_render_composite_boxes(struct sna *sna, 1433 const struct sna_composite_op *op, 1434 const BoxRec *box, int nbox) 1435{ 1436 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1437 1438 do { 1439 int nbox_this_time; 1440 float *v; 1441 1442 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1443 gen6_emit_composite_state); 1444 assert(nbox_this_time); 1445 nbox -= nbox_this_time; 1446 1447 v = sna->render.vertices + sna->render.vertex_used; 1448 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1449 1450 op->emit_boxes(op, box, nbox_this_time, v); 1451 box += nbox_this_time; 1452 } while (nbox); 1453} 1454 1455static void 1456gen6_render_composite_boxes__thread(struct sna *sna, 1457 const struct sna_composite_op *op, 1458 const BoxRec *box, int nbox) 1459{ 1460 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1461 1462 sna_vertex_lock(&sna->render); 1463 do { 1464 int nbox_this_time; 1465 float *v; 1466 1467 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1468 gen6_emit_composite_state); 1469 assert(nbox_this_time); 1470 nbox -= nbox_this_time; 1471 1472 v = sna->render.vertices + sna->render.vertex_used; 1473 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1474 1475 sna_vertex_acquire__locked(&sna->render); 1476 sna_vertex_unlock(&sna->render); 1477 1478 op->emit_boxes(op, box, nbox_this_time, v); 1479 box += nbox_this_time; 1480 1481 sna_vertex_lock(&sna->render); 1482 sna_vertex_release__locked(&sna->render); 1483 } while (nbox); 1484 sna_vertex_unlock(&sna->render); 1485} 1486 1487#ifndef MAX 1488#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1489#endif 1490 1491static uint32_t 1492gen6_composite_create_blend_state(struct sna_static_stream *stream) 1493{ 1494 char *base, *ptr; 1495 int src, dst; 1496 1497 base = sna_static_stream_map(stream, 1498 GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 1499 64); 1500 1501 ptr = base; 1502 for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { 1503 for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { 1504 struct gen6_blend_state *blend = 1505 (struct gen6_blend_state *)ptr; 1506 1507 blend->blend0.dest_blend_factor = dst; 1508 blend->blend0.source_blend_factor = src; 1509 blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; 1510 blend->blend0.blend_enable = 1511 !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); 1512 1513 blend->blend1.post_blend_clamp_enable = 1; 1514 blend->blend1.pre_blend_clamp_enable = 1; 1515 1516 ptr += GEN6_BLEND_STATE_PADDED_SIZE; 1517 } 1518 } 1519 1520 return sna_static_stream_offsetof(stream, base); 1521} 1522 1523static uint32_t gen6_bind_video_source(struct sna *sna, 1524 struct kgem_bo *src_bo, 1525 uint32_t src_offset, 1526 int src_width, 1527 int src_height, 1528 int src_pitch, 1529 uint32_t src_surf_format) 1530{ 1531 struct gen6_surface_state *ss; 1532 1533 sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1534 1535 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1536 ss->ss0.surface_type = GEN6_SURFACE_2D; 1537 ss->ss0.surface_format = src_surf_format; 1538 1539 ss->ss1.base_addr = 1540 kgem_add_reloc(&sna->kgem, 1541 sna->kgem.surface + 1, 1542 src_bo, 1543 I915_GEM_DOMAIN_SAMPLER << 16, 1544 src_offset); 1545 1546 ss->ss2.width = src_width - 1; 1547 ss->ss2.height = src_height - 1; 1548 ss->ss3.pitch = src_pitch - 1; 1549 1550 return sna->kgem.surface * sizeof(uint32_t); 1551} 1552 1553static void gen6_emit_video_state(struct sna *sna, 1554 const struct sna_composite_op *op) 1555{ 1556 struct sna_video_frame *frame = op->priv; 1557 uint32_t src_surf_format; 1558 uint32_t src_surf_base[6]; 1559 int src_width[6]; 1560 int src_height[6]; 1561 int src_pitch[6]; 1562 uint32_t *binding_table; 1563 uint16_t offset; 1564 bool dirty; 1565 int n_src, n; 1566 1567 dirty = gen6_get_batch(sna, op); 1568 1569 src_surf_base[0] = 0; 1570 src_surf_base[1] = 0; 1571 src_surf_base[2] = frame->VBufOffset; 1572 src_surf_base[3] = frame->VBufOffset; 1573 src_surf_base[4] = frame->UBufOffset; 1574 src_surf_base[5] = frame->UBufOffset; 1575 1576 if (is_planar_fourcc(frame->id)) { 1577 src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; 1578 src_width[1] = src_width[0] = frame->width; 1579 src_height[1] = src_height[0] = frame->height; 1580 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1581 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1582 frame->width / 2; 1583 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1584 frame->height / 2; 1585 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1586 frame->pitch[0]; 1587 n_src = 6; 1588 } else { 1589 if (frame->id == FOURCC_UYVY) 1590 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; 1591 else 1592 src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; 1593 1594 src_width[0] = frame->width; 1595 src_height[0] = frame->height; 1596 src_pitch[0] = frame->pitch[0]; 1597 n_src = 1; 1598 } 1599 1600 binding_table = gen6_composite_get_binding_table(sna, &offset); 1601 1602 binding_table[0] = 1603 gen6_bind_bo(sna, 1604 op->dst.bo, op->dst.width, op->dst.height, 1605 gen6_get_dest_format(op->dst.format), 1606 true); 1607 for (n = 0; n < n_src; n++) { 1608 binding_table[1+n] = 1609 gen6_bind_video_source(sna, 1610 frame->bo, 1611 src_surf_base[n], 1612 src_width[n], 1613 src_height[n], 1614 src_pitch[n], 1615 src_surf_format); 1616 } 1617 1618 gen6_emit_state(sna, op, offset | dirty); 1619} 1620 1621static bool 1622gen6_render_video(struct sna *sna, 1623 struct sna_video *video, 1624 struct sna_video_frame *frame, 1625 RegionPtr dstRegion, 1626 PixmapPtr pixmap) 1627{ 1628 struct sna_composite_op tmp; 1629 struct sna_pixmap *priv = sna_pixmap(pixmap); 1630 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1631 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1632 int src_width = frame->src.x2 - frame->src.x1; 1633 int src_height = frame->src.y2 - frame->src.y1; 1634 float src_offset_x, src_offset_y; 1635 float src_scale_x, src_scale_y; 1636 int nbox, pix_xoff, pix_yoff; 1637 unsigned filter; 1638 const BoxRec *box; 1639 1640 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1641 __FUNCTION__, 1642 src_width, src_height, dst_width, dst_height, 1643 region_num_rects(dstRegion), 1644 REGION_EXTENTS(NULL, dstRegion)->x1, 1645 REGION_EXTENTS(NULL, dstRegion)->y1, 1646 REGION_EXTENTS(NULL, dstRegion)->x2, 1647 REGION_EXTENTS(NULL, dstRegion)->y2)); 1648 1649 assert(priv->gpu_bo); 1650 memset(&tmp, 0, sizeof(tmp)); 1651 1652 tmp.dst.pixmap = pixmap; 1653 tmp.dst.width = pixmap->drawable.width; 1654 tmp.dst.height = pixmap->drawable.height; 1655 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1656 tmp.dst.bo = priv->gpu_bo; 1657 1658 tmp.src.bo = frame->bo; 1659 tmp.mask.bo = NULL; 1660 1661 tmp.floats_per_vertex = 3; 1662 tmp.floats_per_rect = 9; 1663 1664 if (src_width == dst_width && src_height == dst_height) 1665 filter = SAMPLER_FILTER_NEAREST; 1666 else 1667 filter = SAMPLER_FILTER_BILINEAR; 1668 1669 tmp.u.gen6.flags = 1670 GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1671 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1672 NO_BLEND, 1673 is_planar_fourcc(frame->id) ? 1674 GEN6_WM_KERNEL_VIDEO_PLANAR : 1675 GEN6_WM_KERNEL_VIDEO_PACKED, 1676 2); 1677 tmp.priv = frame; 1678 1679 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1680 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1681 kgem_submit(&sna->kgem); 1682 assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); 1683 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1684 } 1685 1686 gen6_align_vertex(sna, &tmp); 1687 gen6_emit_video_state(sna, &tmp); 1688 1689 /* Set up the offset for translating from the given region (in screen 1690 * coordinates) to the backing pixmap. 1691 */ 1692#ifdef COMPOSITE 1693 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1694 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1695#else 1696 pix_xoff = 0; 1697 pix_yoff = 0; 1698#endif 1699 1700 src_scale_x = (float)src_width / dst_width / frame->width; 1701 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1702 1703 src_scale_y = (float)src_height / dst_height / frame->height; 1704 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1705 1706 box = region_rects(dstRegion); 1707 nbox = region_num_rects(dstRegion); 1708 while (nbox--) { 1709 BoxRec r; 1710 1711 r.x1 = box->x1 + pix_xoff; 1712 r.x2 = box->x2 + pix_xoff; 1713 r.y1 = box->y1 + pix_yoff; 1714 r.y2 = box->y2 + pix_yoff; 1715 1716 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); 1717 1718 OUT_VERTEX(r.x2, r.y2); 1719 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1720 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1721 1722 OUT_VERTEX(r.x1, r.y2); 1723 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1724 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1725 1726 OUT_VERTEX(r.x1, r.y1); 1727 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1728 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1729 1730 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1731 sna_damage_add_box(&priv->gpu_damage, &r); 1732 sna_damage_subtract_box(&priv->cpu_damage, &r); 1733 } 1734 box++; 1735 } 1736 1737 gen4_vertex_flush(sna); 1738 return true; 1739} 1740 1741static int 1742gen6_composite_picture(struct sna *sna, 1743 PicturePtr picture, 1744 struct sna_composite_channel *channel, 1745 int x, int y, 1746 int w, int h, 1747 int dst_x, int dst_y, 1748 bool precise) 1749{ 1750 PixmapPtr pixmap; 1751 uint32_t color; 1752 int16_t dx, dy; 1753 1754 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n", 1755 __FUNCTION__, x, y, w, h, dst_x, dst_y, precise)); 1756 1757 channel->is_solid = false; 1758 channel->card_format = -1; 1759 1760 if (sna_picture_is_solid(picture, &color)) 1761 return gen4_channel_init_solid(sna, channel, color); 1762 1763 if (picture->pDrawable == NULL) { 1764 int ret; 1765 1766 if (picture->pSourcePict->type == SourcePictTypeLinear) 1767 return gen4_channel_init_linear(sna, picture, channel, 1768 x, y, 1769 w, h, 1770 dst_x, dst_y); 1771 1772 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1773 ret = -1; 1774 if (!precise) 1775 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1776 x, y, w, h, dst_x, dst_y); 1777 if (ret == -1) 1778 ret = sna_render_picture_fixup(sna, picture, channel, 1779 x, y, w, h, dst_x, dst_y); 1780 return ret; 1781 } 1782 1783 if (picture->alphaMap) { 1784 DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); 1785 return sna_render_picture_fixup(sna, picture, channel, 1786 x, y, w, h, dst_x, dst_y); 1787 } 1788 1789 if (!gen6_check_repeat(picture)) 1790 return sna_render_picture_fixup(sna, picture, channel, 1791 x, y, w, h, dst_x, dst_y); 1792 1793 if (!gen6_check_filter(picture)) 1794 return sna_render_picture_fixup(sna, picture, channel, 1795 x, y, w, h, dst_x, dst_y); 1796 1797 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1798 channel->filter = picture->filter; 1799 1800 pixmap = get_drawable_pixmap(picture->pDrawable); 1801 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1802 1803 x += dx + picture->pDrawable->x; 1804 y += dy + picture->pDrawable->y; 1805 1806 channel->is_affine = sna_transform_is_affine(picture->transform); 1807 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1808 DBG(("%s: integer translation (%d, %d), removing\n", 1809 __FUNCTION__, dx, dy)); 1810 x += dx; 1811 y += dy; 1812 channel->transform = NULL; 1813 channel->filter = PictFilterNearest; 1814 1815 if (channel->repeat && 1816 (x >= 0 && 1817 y >= 0 && 1818 x + w < pixmap->drawable.width && 1819 y + h < pixmap->drawable.height)) { 1820 struct sna_pixmap *priv = sna_pixmap(pixmap); 1821 if (priv && priv->clear) { 1822 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1823 return gen4_channel_init_solid(sna, channel, priv->clear_color); 1824 } 1825 } 1826 } else 1827 channel->transform = picture->transform; 1828 1829 channel->pict_format = picture->format; 1830 channel->card_format = gen6_get_card_format(picture->format); 1831 if (channel->card_format == (unsigned)-1) 1832 return sna_render_picture_convert(sna, picture, channel, pixmap, 1833 x, y, w, h, dst_x, dst_y, 1834 false); 1835 1836 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1837 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1838 pixmap->drawable.width, pixmap->drawable.height)); 1839 return sna_render_picture_extract(sna, picture, channel, 1840 x, y, w, h, dst_x, dst_y); 1841 } 1842 1843 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 1844 __FUNCTION__, 1845 channel->repeat, channel->filter, 1846 channel->transform != NULL, channel->is_affine, 1847 channel->pict_format)); 1848 if (channel->transform) { 1849#define f2d(x) (((double)(x))/65536.) 1850 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n", 1851 __FUNCTION__, 1852 f2d(channel->transform->matrix[0][0]), 1853 f2d(channel->transform->matrix[0][1]), 1854 f2d(channel->transform->matrix[0][2]), 1855 f2d(channel->transform->matrix[1][0]), 1856 f2d(channel->transform->matrix[1][1]), 1857 f2d(channel->transform->matrix[1][2]), 1858 f2d(channel->transform->matrix[2][0]), 1859 f2d(channel->transform->matrix[2][1]), 1860 f2d(channel->transform->matrix[2][2]), 1861 channel->transform->matrix[0][0], 1862 channel->transform->matrix[0][1], 1863 channel->transform->matrix[0][2], 1864 channel->transform->matrix[1][0], 1865 channel->transform->matrix[1][1], 1866 channel->transform->matrix[1][2], 1867 channel->transform->matrix[2][0], 1868 channel->transform->matrix[2][1], 1869 channel->transform->matrix[2][2])); 1870#undef f2d 1871 } 1872 1873 return sna_render_pixmap_bo(sna, channel, pixmap, 1874 x, y, w, h, dst_x, dst_y); 1875} 1876 1877inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) 1878{ 1879 channel->repeat = gen6_repeat(channel->repeat); 1880 channel->filter = gen6_filter(channel->filter); 1881 if (channel->card_format == (unsigned)-1) 1882 channel->card_format = gen6_get_card_format(channel->pict_format); 1883 assert(channel->card_format != (unsigned)-1); 1884} 1885 1886static void gen6_render_composite_done(struct sna *sna, 1887 const struct sna_composite_op *op) 1888{ 1889 DBG(("%s\n", __FUNCTION__)); 1890 1891 assert(!sna->render.active); 1892 if (sna->render.vertex_offset) { 1893 gen4_vertex_flush(sna); 1894 gen6_magic_ca_pass(sna, op); 1895 } 1896 1897 if (op->mask.bo) 1898 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1899 if (op->src.bo) 1900 kgem_bo_destroy(&sna->kgem, op->src.bo); 1901 1902 sna_render_composite_redirect_done(sna, op); 1903} 1904 1905inline static bool 1906gen6_composite_set_target(struct sna *sna, 1907 struct sna_composite_op *op, 1908 PicturePtr dst, 1909 int x, int y, int w, int h, 1910 bool partial) 1911{ 1912 BoxRec box; 1913 unsigned int hint; 1914 1915 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1916 1917 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1918 op->dst.format = dst->format; 1919 op->dst.width = op->dst.pixmap->drawable.width; 1920 op->dst.height = op->dst.pixmap->drawable.height; 1921 1922 if (w && h) { 1923 box.x1 = x; 1924 box.y1 = y; 1925 box.x2 = x + w; 1926 box.y2 = y + h; 1927 } else 1928 sna_render_picture_extents(dst, &box); 1929 1930 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1931 if (!partial) { 1932 hint |= IGNORE_DAMAGE; 1933 if (w == op->dst.width && h == op->dst.height) 1934 hint |= REPLACES; 1935 } 1936 1937 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1938 if (op->dst.bo == NULL) 1939 return false; 1940 1941 if (hint & REPLACES) { 1942 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1943 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1944 } 1945 1946 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1947 &op->dst.x, &op->dst.y); 1948 1949 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1950 __FUNCTION__, 1951 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1952 op->dst.width, op->dst.height, 1953 op->dst.bo->pitch, 1954 op->dst.x, op->dst.y, 1955 op->damage ? *op->damage : (void *)-1)); 1956 1957 assert(op->dst.bo->proxy == NULL); 1958 1959 if (too_large(op->dst.width, op->dst.height) && 1960 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1961 return false; 1962 1963 return true; 1964} 1965 1966static bool 1967try_blt(struct sna *sna, 1968 PicturePtr dst, PicturePtr src, 1969 int width, int height) 1970{ 1971 struct kgem_bo *bo; 1972 1973 if (sna->kgem.mode == KGEM_BLT) { 1974 DBG(("%s: already performing BLT\n", __FUNCTION__)); 1975 return true; 1976 } 1977 1978 if (too_large(width, height)) { 1979 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 1980 __FUNCTION__, width, height)); 1981 return true; 1982 } 1983 1984 bo = __sna_drawable_peek_bo(dst->pDrawable); 1985 if (bo == NULL) 1986 return true; 1987 if (bo->rq) 1988 return RQ_IS_BLT(bo->rq); 1989 1990 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 1991 return true; 1992 1993 if (src->pDrawable) { 1994 bo = __sna_drawable_peek_bo(src->pDrawable); 1995 if (bo == NULL) 1996 return true; 1997 1998 if (prefer_blt_bo(sna, bo)) 1999 return true; 2000 } 2001 2002 if (sna->kgem.ring == KGEM_BLT) { 2003 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2004 return true; 2005 } 2006 2007 return false; 2008} 2009 2010static bool 2011check_gradient(PicturePtr picture, bool precise) 2012{ 2013 if (picture->pDrawable) 2014 return false; 2015 2016 switch (picture->pSourcePict->type) { 2017 case SourcePictTypeSolidFill: 2018 case SourcePictTypeLinear: 2019 return false; 2020 default: 2021 return precise; 2022 } 2023} 2024 2025static bool 2026has_alphamap(PicturePtr p) 2027{ 2028 return p->alphaMap != NULL; 2029} 2030 2031static bool 2032need_upload(PicturePtr p) 2033{ 2034 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2035} 2036 2037static bool 2038source_is_busy(PixmapPtr pixmap) 2039{ 2040 struct sna_pixmap *priv = sna_pixmap(pixmap); 2041 if (priv == NULL || priv->clear) 2042 return false; 2043 2044 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2045 return true; 2046 2047 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2048 return true; 2049 2050 return priv->gpu_damage && !priv->cpu_damage; 2051} 2052 2053static bool 2054source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2055{ 2056 if (sna_picture_is_solid(p, NULL)) 2057 return false; 2058 2059 if (p->pSourcePict) 2060 return check_gradient(p, precise); 2061 2062 if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) 2063 return true; 2064 2065 if (pixmap && source_is_busy(pixmap)) 2066 return false; 2067 2068 return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p); 2069} 2070 2071static bool 2072gen6_composite_fallback(struct sna *sna, 2073 PicturePtr src, 2074 PicturePtr mask, 2075 PicturePtr dst) 2076{ 2077 PixmapPtr src_pixmap; 2078 PixmapPtr mask_pixmap; 2079 PixmapPtr dst_pixmap; 2080 bool src_fallback, mask_fallback; 2081 2082 if (!gen6_check_dst_format(dst->format)) { 2083 DBG(("%s: unknown destination format: %d\n", 2084 __FUNCTION__, dst->format)); 2085 return true; 2086 } 2087 2088 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2089 2090 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2091 src_fallback = source_fallback(src, src_pixmap, 2092 dst->polyMode == PolyModePrecise); 2093 2094 if (mask) { 2095 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2096 mask_fallback = source_fallback(mask, mask_pixmap, 2097 dst->polyMode == PolyModePrecise); 2098 } else { 2099 mask_pixmap = NULL; 2100 mask_fallback = false; 2101 } 2102 2103 /* If we are using the destination as a source and need to 2104 * readback in order to upload the source, do it all 2105 * on the cpu. 2106 */ 2107 if (src_pixmap == dst_pixmap && src_fallback) { 2108 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2109 return true; 2110 } 2111 if (mask_pixmap == dst_pixmap && mask_fallback) { 2112 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2113 return true; 2114 } 2115 2116 /* If anything is on the GPU, push everything out to the GPU */ 2117 if (dst_use_gpu(dst_pixmap)) { 2118 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2119 __FUNCTION__)); 2120 return false; 2121 } 2122 2123 if (src_pixmap && !src_fallback) { 2124 DBG(("%s: src is already on the GPU, try to use GPU\n", 2125 __FUNCTION__)); 2126 return false; 2127 } 2128 if (mask_pixmap && !mask_fallback) { 2129 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2130 __FUNCTION__)); 2131 return false; 2132 } 2133 2134 /* However if the dst is not on the GPU and we need to 2135 * render one of the sources using the CPU, we may 2136 * as well do the entire operation in place onthe CPU. 2137 */ 2138 if (src_fallback) { 2139 DBG(("%s: dst is on the CPU and src will fallback\n", 2140 __FUNCTION__)); 2141 return true; 2142 } 2143 2144 if (mask && mask_fallback) { 2145 DBG(("%s: dst is on the CPU and mask will fallback\n", 2146 __FUNCTION__)); 2147 return true; 2148 } 2149 2150 if (too_large(dst_pixmap->drawable.width, 2151 dst_pixmap->drawable.height) && 2152 dst_is_cpu(dst_pixmap)) { 2153 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2154 return true; 2155 } 2156 2157 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2158 __FUNCTION__)); 2159 return dst_use_cpu(dst_pixmap); 2160} 2161 2162static int 2163reuse_source(struct sna *sna, 2164 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2165 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2166{ 2167 uint32_t color; 2168 2169 if (src_x != msk_x || src_y != msk_y) 2170 return false; 2171 2172 if (src == mask) { 2173 DBG(("%s: mask is source\n", __FUNCTION__)); 2174 *mc = *sc; 2175 mc->bo = kgem_bo_reference(mc->bo); 2176 return true; 2177 } 2178 2179 if (sna_picture_is_solid(mask, &color)) 2180 return gen4_channel_init_solid(sna, mc, color); 2181 2182 if (sc->is_solid) 2183 return false; 2184 2185 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2186 return false; 2187 2188 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2189 2190 if (!sna_transform_equal(src->transform, mask->transform)) 2191 return false; 2192 2193 if (!sna_picture_alphamap_equal(src, mask)) 2194 return false; 2195 2196 if (!gen6_check_repeat(mask)) 2197 return false; 2198 2199 if (!gen6_check_filter(mask)) 2200 return false; 2201 2202 if (!gen6_check_format(mask->format)) 2203 return false; 2204 2205 DBG(("%s: reusing source channel for mask with a twist\n", 2206 __FUNCTION__)); 2207 2208 *mc = *sc; 2209 mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2210 mc->filter = gen6_filter(mask->filter); 2211 mc->pict_format = mask->format; 2212 mc->card_format = gen6_get_card_format(mask->format); 2213 mc->bo = kgem_bo_reference(mc->bo); 2214 return true; 2215} 2216 2217static bool 2218gen6_render_composite(struct sna *sna, 2219 uint8_t op, 2220 PicturePtr src, 2221 PicturePtr mask, 2222 PicturePtr dst, 2223 int16_t src_x, int16_t src_y, 2224 int16_t msk_x, int16_t msk_y, 2225 int16_t dst_x, int16_t dst_y, 2226 int16_t width, int16_t height, 2227 unsigned flags, 2228 struct sna_composite_op *tmp) 2229{ 2230 if (op >= ARRAY_SIZE(gen6_blend_op)) 2231 return false; 2232 2233 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 2234 width, height, sna->kgem.ring)); 2235 2236 if (mask == NULL && 2237 try_blt(sna, dst, src, width, height) && 2238 sna_blt_composite(sna, op, 2239 src, dst, 2240 src_x, src_y, 2241 dst_x, dst_y, 2242 width, height, 2243 flags, tmp)) 2244 return true; 2245 2246 if (gen6_composite_fallback(sna, src, mask, dst)) 2247 goto fallback; 2248 2249 if (need_tiling(sna, width, height)) 2250 return sna_tiling_composite(op, src, mask, dst, 2251 src_x, src_y, 2252 msk_x, msk_y, 2253 dst_x, dst_y, 2254 width, height, 2255 tmp); 2256 2257 if (op == PictOpClear && src == sna->clear) 2258 op = PictOpSrc; 2259 tmp->op = op; 2260 if (!gen6_composite_set_target(sna, tmp, dst, 2261 dst_x, dst_y, width, height, 2262 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2263 goto fallback; 2264 2265 switch (gen6_composite_picture(sna, src, &tmp->src, 2266 src_x, src_y, 2267 width, height, 2268 dst_x, dst_y, 2269 dst->polyMode == PolyModePrecise)) { 2270 case -1: 2271 goto cleanup_dst; 2272 case 0: 2273 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2274 goto cleanup_dst; 2275 /* fall through to fixup */ 2276 case 1: 2277 /* Did we just switch rings to prepare the source? */ 2278 if (mask == NULL && 2279 prefer_blt_composite(sna, tmp) && 2280 sna_blt_composite__convert(sna, 2281 dst_x, dst_y, width, height, 2282 tmp)) 2283 return true; 2284 2285 gen6_composite_channel_convert(&tmp->src); 2286 break; 2287 } 2288 2289 tmp->is_affine = tmp->src.is_affine; 2290 tmp->has_component_alpha = false; 2291 tmp->need_magic_ca_pass = false; 2292 2293 tmp->mask.bo = NULL; 2294 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2295 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2296 2297 if (mask) { 2298 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2299 tmp->has_component_alpha = true; 2300 2301 /* Check if it's component alpha that relies on a source alpha and on 2302 * the source value. We can only get one of those into the single 2303 * source value that we get to blend with. 2304 */ 2305 if (gen6_blend_op[op].src_alpha && 2306 (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { 2307 if (op != PictOpOver) 2308 goto cleanup_src; 2309 2310 tmp->need_magic_ca_pass = true; 2311 tmp->op = PictOpOutReverse; 2312 } 2313 } 2314 2315 if (!reuse_source(sna, 2316 src, &tmp->src, src_x, src_y, 2317 mask, &tmp->mask, msk_x, msk_y)) { 2318 switch (gen6_composite_picture(sna, mask, &tmp->mask, 2319 msk_x, msk_y, 2320 width, height, 2321 dst_x, dst_y, 2322 dst->polyMode == PolyModePrecise)) { 2323 case -1: 2324 goto cleanup_src; 2325 case 0: 2326 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2327 goto cleanup_src; 2328 /* fall through to fixup */ 2329 case 1: 2330 gen6_composite_channel_convert(&tmp->mask); 2331 break; 2332 } 2333 } 2334 2335 tmp->is_affine &= tmp->mask.is_affine; 2336 } 2337 2338 tmp->u.gen6.flags = 2339 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2340 tmp->src.repeat, 2341 tmp->mask.filter, 2342 tmp->mask.repeat), 2343 gen6_get_blend(tmp->op, 2344 tmp->has_component_alpha, 2345 tmp->dst.format), 2346 gen6_choose_composite_kernel(tmp->op, 2347 tmp->mask.bo != NULL, 2348 tmp->has_component_alpha, 2349 tmp->is_affine), 2350 gen4_choose_composite_emitter(sna, tmp)); 2351 2352 tmp->blt = gen6_render_composite_blt; 2353 tmp->box = gen6_render_composite_box; 2354 tmp->boxes = gen6_render_composite_boxes__blt; 2355 if (tmp->emit_boxes) { 2356 tmp->boxes = gen6_render_composite_boxes; 2357 tmp->thread_boxes = gen6_render_composite_boxes__thread; 2358 } 2359 tmp->done = gen6_render_composite_done; 2360 2361 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2362 if (!kgem_check_bo(&sna->kgem, 2363 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2364 NULL)) { 2365 kgem_submit(&sna->kgem); 2366 if (!kgem_check_bo(&sna->kgem, 2367 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2368 NULL)) 2369 goto cleanup_mask; 2370 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2371 } 2372 2373 gen6_align_vertex(sna, tmp); 2374 gen6_emit_composite_state(sna, tmp); 2375 return true; 2376 2377cleanup_mask: 2378 if (tmp->mask.bo) { 2379 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2380 tmp->mask.bo = NULL; 2381 } 2382cleanup_src: 2383 if (tmp->src.bo) { 2384 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2385 tmp->src.bo = NULL; 2386 } 2387cleanup_dst: 2388 if (tmp->redirect.real_bo) { 2389 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2390 tmp->redirect.real_bo = NULL; 2391 } 2392fallback: 2393 return (mask == NULL && 2394 sna_blt_composite(sna, op, 2395 src, dst, 2396 src_x, src_y, 2397 dst_x, dst_y, 2398 width, height, 2399 flags | COMPOSITE_FALLBACK, tmp)); 2400} 2401 2402#if !NO_COMPOSITE_SPANS 2403fastcall static void 2404gen6_render_composite_spans_box(struct sna *sna, 2405 const struct sna_composite_spans_op *op, 2406 const BoxRec *box, float opacity) 2407{ 2408 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2409 __FUNCTION__, 2410 op->base.src.offset[0], op->base.src.offset[1], 2411 opacity, 2412 op->base.dst.x, op->base.dst.y, 2413 box->x1, box->y1, 2414 box->x2 - box->x1, 2415 box->y2 - box->y1)); 2416 2417 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); 2418 op->prim_emit(sna, op, box, opacity); 2419} 2420 2421static void 2422gen6_render_composite_spans_boxes(struct sna *sna, 2423 const struct sna_composite_spans_op *op, 2424 const BoxRec *box, int nbox, 2425 float opacity) 2426{ 2427 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2428 __FUNCTION__, nbox, 2429 op->base.src.offset[0], op->base.src.offset[1], 2430 opacity, 2431 op->base.dst.x, op->base.dst.y)); 2432 2433 do { 2434 int nbox_this_time; 2435 2436 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2437 gen6_emit_composite_state); 2438 nbox -= nbox_this_time; 2439 2440 do { 2441 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2442 box->x1, box->y1, 2443 box->x2 - box->x1, 2444 box->y2 - box->y1)); 2445 2446 op->prim_emit(sna, op, box++, opacity); 2447 } while (--nbox_this_time); 2448 } while (nbox); 2449} 2450 2451fastcall static void 2452gen6_render_composite_spans_boxes__thread(struct sna *sna, 2453 const struct sna_composite_spans_op *op, 2454 const struct sna_opacity_box *box, 2455 int nbox) 2456{ 2457 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2458 __FUNCTION__, nbox, 2459 op->base.src.offset[0], op->base.src.offset[1], 2460 op->base.dst.x, op->base.dst.y)); 2461 2462 sna_vertex_lock(&sna->render); 2463 do { 2464 int nbox_this_time; 2465 float *v; 2466 2467 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2468 gen6_emit_composite_state); 2469 assert(nbox_this_time); 2470 nbox -= nbox_this_time; 2471 2472 v = sna->render.vertices + sna->render.vertex_used; 2473 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2474 2475 sna_vertex_acquire__locked(&sna->render); 2476 sna_vertex_unlock(&sna->render); 2477 2478 op->emit_boxes(op, box, nbox_this_time, v); 2479 box += nbox_this_time; 2480 2481 sna_vertex_lock(&sna->render); 2482 sna_vertex_release__locked(&sna->render); 2483 } while (nbox); 2484 sna_vertex_unlock(&sna->render); 2485} 2486 2487fastcall static void 2488gen6_render_composite_spans_done(struct sna *sna, 2489 const struct sna_composite_spans_op *op) 2490{ 2491 DBG(("%s()\n", __FUNCTION__)); 2492 assert(!sna->render.active); 2493 2494 if (sna->render.vertex_offset) 2495 gen4_vertex_flush(sna); 2496 2497 if (op->base.src.bo) 2498 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2499 2500 sna_render_composite_redirect_done(sna, &op->base); 2501} 2502 2503static bool 2504gen6_check_composite_spans(struct sna *sna, 2505 uint8_t op, PicturePtr src, PicturePtr dst, 2506 int16_t width, int16_t height, 2507 unsigned flags) 2508{ 2509 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2510 __FUNCTION__, op, width, height, flags)); 2511 2512 if (op >= ARRAY_SIZE(gen6_blend_op)) 2513 return false; 2514 2515 if (gen6_composite_fallback(sna, src, NULL, dst)) { 2516 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2517 return false; 2518 } 2519 2520 if (need_tiling(sna, width, height) && 2521 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2522 DBG(("%s: fallback, tiled operation not on GPU\n", 2523 __FUNCTION__)); 2524 return false; 2525 } 2526 2527 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2528 struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); 2529 assert(priv); 2530 2531 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2532 return true; 2533 2534 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2535 return false; 2536 2537 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2538 } 2539 2540 return true; 2541} 2542 2543static bool 2544gen6_render_composite_spans(struct sna *sna, 2545 uint8_t op, 2546 PicturePtr src, 2547 PicturePtr dst, 2548 int16_t src_x, int16_t src_y, 2549 int16_t dst_x, int16_t dst_y, 2550 int16_t width, int16_t height, 2551 unsigned flags, 2552 struct sna_composite_spans_op *tmp) 2553{ 2554 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2555 width, height, flags, sna->kgem.ring)); 2556 2557 assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); 2558 2559 if (need_tiling(sna, width, height)) { 2560 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2561 __FUNCTION__, width, height)); 2562 return sna_tiling_composite_spans(op, src, dst, 2563 src_x, src_y, dst_x, dst_y, 2564 width, height, flags, tmp); 2565 } 2566 2567 tmp->base.op = op; 2568 if (!gen6_composite_set_target(sna, &tmp->base, dst, 2569 dst_x, dst_y, width, height, true)) 2570 return false; 2571 2572 switch (gen6_composite_picture(sna, src, &tmp->base.src, 2573 src_x, src_y, 2574 width, height, 2575 dst_x, dst_y, 2576 dst->polyMode == PolyModePrecise)) { 2577 case -1: 2578 goto cleanup_dst; 2579 case 0: 2580 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2581 goto cleanup_dst; 2582 /* fall through to fixup */ 2583 case 1: 2584 gen6_composite_channel_convert(&tmp->base.src); 2585 break; 2586 } 2587 tmp->base.mask.bo = NULL; 2588 2589 tmp->base.is_affine = tmp->base.src.is_affine; 2590 tmp->base.need_magic_ca_pass = false; 2591 2592 tmp->base.u.gen6.flags = 2593 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2594 tmp->base.src.repeat, 2595 SAMPLER_FILTER_NEAREST, 2596 SAMPLER_EXTEND_PAD), 2597 gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), 2598 GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2599 gen4_choose_spans_emitter(sna, tmp)); 2600 2601 tmp->box = gen6_render_composite_spans_box; 2602 tmp->boxes = gen6_render_composite_spans_boxes; 2603 if (tmp->emit_boxes) 2604 tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; 2605 tmp->done = gen6_render_composite_spans_done; 2606 2607 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2608 if (!kgem_check_bo(&sna->kgem, 2609 tmp->base.dst.bo, tmp->base.src.bo, 2610 NULL)) { 2611 kgem_submit(&sna->kgem); 2612 if (!kgem_check_bo(&sna->kgem, 2613 tmp->base.dst.bo, tmp->base.src.bo, 2614 NULL)) 2615 goto cleanup_src; 2616 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2617 } 2618 2619 gen6_align_vertex(sna, &tmp->base); 2620 gen6_emit_composite_state(sna, &tmp->base); 2621 return true; 2622 2623cleanup_src: 2624 if (tmp->base.src.bo) 2625 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2626cleanup_dst: 2627 if (tmp->base.redirect.real_bo) 2628 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2629 return false; 2630} 2631#endif 2632 2633static void 2634gen6_emit_copy_state(struct sna *sna, 2635 const struct sna_composite_op *op) 2636{ 2637 uint32_t *binding_table; 2638 uint16_t offset; 2639 bool dirty; 2640 2641 dirty = gen6_get_batch(sna, op); 2642 2643 binding_table = gen6_composite_get_binding_table(sna, &offset); 2644 2645 binding_table[0] = 2646 gen6_bind_bo(sna, 2647 op->dst.bo, op->dst.width, op->dst.height, 2648 gen6_get_dest_format(op->dst.format), 2649 true); 2650 binding_table[1] = 2651 gen6_bind_bo(sna, 2652 op->src.bo, op->src.width, op->src.height, 2653 op->src.card_format, 2654 false); 2655 2656 if (sna->kgem.surface == offset && 2657 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 2658 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 2659 offset = sna->render_state.gen6.surface_table; 2660 } 2661 2662 gen6_emit_state(sna, op, offset | dirty); 2663} 2664 2665static inline bool prefer_blt_copy(struct sna *sna, 2666 struct kgem_bo *src_bo, 2667 struct kgem_bo *dst_bo, 2668 unsigned flags) 2669{ 2670 if (flags & COPY_SYNC) 2671 return false; 2672 2673 if (PREFER_RENDER) 2674 return PREFER_RENDER > 0; 2675 2676 if (sna->kgem.ring == KGEM_BLT) 2677 return true; 2678 2679 if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2680 return true; 2681 2682 if (untiled_tlb_miss(src_bo) || 2683 untiled_tlb_miss(dst_bo)) 2684 return true; 2685 2686 if (force_blt_ring(sna)) 2687 return true; 2688 2689 if (kgem_bo_is_render(dst_bo) || 2690 kgem_bo_is_render(src_bo)) 2691 return false; 2692 2693 if (prefer_render_ring(sna, dst_bo)) 2694 return false; 2695 2696 if (!prefer_blt_ring(sna, dst_bo, flags)) 2697 return false; 2698 2699 return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2700} 2701 2702static bool 2703gen6_render_copy_boxes(struct sna *sna, uint8_t alu, 2704 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2705 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2706 const BoxRec *box, int n, unsigned flags) 2707{ 2708 struct sna_composite_op tmp; 2709 BoxRec extents; 2710 2711 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", 2712 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2713 src_bo == dst_bo, 2714 overlaps(sna, 2715 src_bo, src_dx, src_dy, 2716 dst_bo, dst_dx, dst_dy, 2717 box, n, flags, &extents))); 2718 2719 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2720 sna_blt_compare_depth(src, dst) && 2721 sna_blt_copy_boxes(sna, alu, 2722 src_bo, src_dx, src_dy, 2723 dst_bo, dst_dx, dst_dy, 2724 dst->bitsPerPixel, 2725 box, n)) 2726 return true; 2727 2728 if (!(alu == GXcopy || alu == GXclear)) { 2729fallback_blt: 2730 if (!sna_blt_compare_depth(src, dst)) 2731 return false; 2732 2733 return sna_blt_copy_boxes_fallback(sna, alu, 2734 src, src_bo, src_dx, src_dy, 2735 dst, dst_bo, dst_dx, dst_dy, 2736 box, n); 2737 } 2738 2739 if (overlaps(sna, 2740 src_bo, src_dx, src_dy, 2741 dst_bo, dst_dx, dst_dy, 2742 box, n, flags, 2743 &extents)) { 2744 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2745 2746 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2747 sna_blt_copy_boxes(sna, alu, 2748 src_bo, src_dx, src_dy, 2749 dst_bo, dst_dx, dst_dy, 2750 dst->bitsPerPixel, 2751 box, n)) 2752 return true; 2753 2754 if (big) 2755 goto fallback_blt; 2756 2757 assert(src_bo == dst_bo); 2758 assert(src->depth == dst->depth); 2759 assert(src->width == dst->width); 2760 assert(src->height == dst->height); 2761 return sna_render_copy_boxes__overlap(sna, alu, 2762 src, src_bo, 2763 src_dx, src_dy, 2764 dst_dx, dst_dy, 2765 box, n, &extents); 2766 } 2767 2768 if (dst->depth == src->depth) { 2769 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2770 tmp.src.pict_format = tmp.dst.format; 2771 } else { 2772 tmp.dst.format = sna_format_for_depth(dst->depth); 2773 tmp.src.pict_format = sna_format_for_depth(src->depth); 2774 } 2775 if (!gen6_check_format(tmp.src.pict_format)) 2776 goto fallback_blt; 2777 2778 tmp.dst.pixmap = (PixmapPtr)dst; 2779 tmp.dst.width = dst->width; 2780 tmp.dst.height = dst->height; 2781 tmp.dst.bo = dst_bo; 2782 tmp.dst.x = tmp.dst.y = 0; 2783 tmp.damage = NULL; 2784 2785 sna_render_composite_redirect_init(&tmp); 2786 if (too_large(tmp.dst.width, tmp.dst.height)) { 2787 int i; 2788 2789 extents = box[0]; 2790 for (i = 1; i < n; i++) { 2791 if (box[i].x1 < extents.x1) 2792 extents.x1 = box[i].x1; 2793 if (box[i].y1 < extents.y1) 2794 extents.y1 = box[i].y1; 2795 2796 if (box[i].x2 > extents.x2) 2797 extents.x2 = box[i].x2; 2798 if (box[i].y2 > extents.y2) 2799 extents.y2 = box[i].y2; 2800 } 2801 2802 if (!sna_render_composite_redirect(sna, &tmp, 2803 extents.x1 + dst_dx, 2804 extents.y1 + dst_dy, 2805 extents.x2 - extents.x1, 2806 extents.y2 - extents.y1, 2807 n > 1)) 2808 goto fallback_tiled; 2809 } 2810 2811 tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); 2812 if (too_large(src->width, src->height)) { 2813 int i; 2814 2815 extents = box[0]; 2816 for (i = 1; i < n; i++) { 2817 if (box[i].x1 < extents.x1) 2818 extents.x1 = box[i].x1; 2819 if (box[i].y1 < extents.y1) 2820 extents.y1 = box[i].y1; 2821 2822 if (box[i].x2 > extents.x2) 2823 extents.x2 = box[i].x2; 2824 if (box[i].y2 > extents.y2) 2825 extents.y2 = box[i].y2; 2826 } 2827 2828 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2829 extents.x1 + src_dx, 2830 extents.y1 + src_dy, 2831 extents.x2 - extents.x1, 2832 extents.y2 - extents.y1)) { 2833 DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); 2834 goto fallback_tiled_dst; 2835 } 2836 } else { 2837 tmp.src.bo = src_bo; 2838 tmp.src.width = src->width; 2839 tmp.src.height = src->height; 2840 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2841 } 2842 2843 tmp.mask.bo = NULL; 2844 2845 tmp.floats_per_vertex = 2; 2846 tmp.floats_per_rect = 6; 2847 tmp.need_magic_ca_pass = 0; 2848 2849 tmp.u.gen6.flags = COPY_FLAGS(alu); 2850 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2851 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); 2852 assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); 2853 2854 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2855 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2856 kgem_submit(&sna->kgem); 2857 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2858 DBG(("%s: too large for a single operation\n", 2859 __FUNCTION__)); 2860 if (tmp.src.bo != src_bo) 2861 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2862 if (tmp.redirect.real_bo) 2863 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2864 goto fallback_blt; 2865 } 2866 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2867 } 2868 2869 src_dx += tmp.src.offset[0]; 2870 src_dy += tmp.src.offset[1]; 2871 2872 dst_dx += tmp.dst.x; 2873 dst_dy += tmp.dst.y; 2874 2875 tmp.dst.x = tmp.dst.y = 0; 2876 2877 gen6_align_vertex(sna, &tmp); 2878 gen6_emit_copy_state(sna, &tmp); 2879 2880 do { 2881 int16_t *v; 2882 int n_this_time; 2883 2884 n_this_time = gen6_get_rectangles(sna, &tmp, n, 2885 gen6_emit_copy_state); 2886 n -= n_this_time; 2887 2888 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2889 sna->render.vertex_used += 6 * n_this_time; 2890 assert(sna->render.vertex_used <= sna->render.vertex_size); 2891 do { 2892 2893 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2894 box->x1 + src_dx, box->y1 + src_dy, 2895 box->x1 + dst_dx, box->y1 + dst_dy, 2896 box->x2 - box->x1, box->y2 - box->y1)); 2897 v[0] = box->x2 + dst_dx; 2898 v[2] = box->x2 + src_dx; 2899 v[1] = v[5] = box->y2 + dst_dy; 2900 v[3] = v[7] = box->y2 + src_dy; 2901 v[8] = v[4] = box->x1 + dst_dx; 2902 v[10] = v[6] = box->x1 + src_dx; 2903 v[9] = box->y1 + dst_dy; 2904 v[11] = box->y1 + src_dy; 2905 v += 12; box++; 2906 } while (--n_this_time); 2907 } while (n); 2908 2909 gen4_vertex_flush(sna); 2910 sna_render_composite_redirect_done(sna, &tmp); 2911 if (tmp.src.bo != src_bo) 2912 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2913 return true; 2914 2915fallback_tiled_dst: 2916 if (tmp.redirect.real_bo) 2917 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2918fallback_tiled: 2919 if (sna_blt_compare_depth(src, dst) && 2920 sna_blt_copy_boxes(sna, alu, 2921 src_bo, src_dx, src_dy, 2922 dst_bo, dst_dx, dst_dy, 2923 dst->bitsPerPixel, 2924 box, n)) 2925 return true; 2926 2927 return sna_tiling_copy_boxes(sna, alu, 2928 src, src_bo, src_dx, src_dy, 2929 dst, dst_bo, dst_dx, dst_dy, 2930 box, n); 2931} 2932 2933static void 2934gen6_render_copy_blt(struct sna *sna, 2935 const struct sna_copy_op *op, 2936 int16_t sx, int16_t sy, 2937 int16_t w, int16_t h, 2938 int16_t dx, int16_t dy) 2939{ 2940 int16_t *v; 2941 2942 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); 2943 2944 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2945 sna->render.vertex_used += 6; 2946 assert(sna->render.vertex_used <= sna->render.vertex_size); 2947 2948 v[0] = dx+w; v[1] = dy+h; 2949 v[2] = sx+w; v[3] = sy+h; 2950 v[4] = dx; v[5] = dy+h; 2951 v[6] = sx; v[7] = sy+h; 2952 v[8] = dx; v[9] = dy; 2953 v[10] = sx; v[11] = sy; 2954} 2955 2956static void 2957gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2958{ 2959 DBG(("%s()\n", __FUNCTION__)); 2960 2961 assert(!sna->render.active); 2962 if (sna->render.vertex_offset) 2963 gen4_vertex_flush(sna); 2964} 2965 2966static bool 2967gen6_render_copy(struct sna *sna, uint8_t alu, 2968 PixmapPtr src, struct kgem_bo *src_bo, 2969 PixmapPtr dst, struct kgem_bo *dst_bo, 2970 struct sna_copy_op *op) 2971{ 2972 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2973 __FUNCTION__, alu, 2974 src->drawable.width, src->drawable.height, 2975 dst->drawable.width, dst->drawable.height)); 2976 2977 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2978 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2979 sna_blt_copy(sna, alu, 2980 src_bo, dst_bo, 2981 dst->drawable.bitsPerPixel, 2982 op)) 2983 return true; 2984 2985 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2986 too_large(src->drawable.width, src->drawable.height) || 2987 too_large(dst->drawable.width, dst->drawable.height)) { 2988fallback: 2989 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2990 return false; 2991 2992 return sna_blt_copy(sna, alu, src_bo, dst_bo, 2993 dst->drawable.bitsPerPixel, 2994 op); 2995 } 2996 2997 if (dst->drawable.depth == src->drawable.depth) { 2998 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2999 op->base.src.pict_format = op->base.dst.format; 3000 } else { 3001 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3002 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3003 } 3004 if (!gen6_check_format(op->base.src.pict_format)) 3005 goto fallback; 3006 3007 op->base.dst.pixmap = dst; 3008 op->base.dst.width = dst->drawable.width; 3009 op->base.dst.height = dst->drawable.height; 3010 op->base.dst.bo = dst_bo; 3011 3012 op->base.src.bo = src_bo; 3013 op->base.src.card_format = 3014 gen6_get_card_format(op->base.src.pict_format); 3015 op->base.src.width = src->drawable.width; 3016 op->base.src.height = src->drawable.height; 3017 3018 op->base.mask.bo = NULL; 3019 3020 op->base.floats_per_vertex = 2; 3021 op->base.floats_per_rect = 6; 3022 3023 op->base.u.gen6.flags = COPY_FLAGS(alu); 3024 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3025 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); 3026 assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); 3027 3028 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3029 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3030 kgem_submit(&sna->kgem); 3031 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3032 goto fallback; 3033 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3034 } 3035 3036 gen6_align_vertex(sna, &op->base); 3037 gen6_emit_copy_state(sna, &op->base); 3038 3039 op->blt = gen6_render_copy_blt; 3040 op->done = gen6_render_copy_done; 3041 return true; 3042} 3043 3044static void 3045gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3046{ 3047 uint32_t *binding_table; 3048 uint16_t offset; 3049 bool dirty; 3050 3051 dirty = gen6_get_batch(sna, op); 3052 3053 binding_table = gen6_composite_get_binding_table(sna, &offset); 3054 3055 binding_table[0] = 3056 gen6_bind_bo(sna, 3057 op->dst.bo, op->dst.width, op->dst.height, 3058 gen6_get_dest_format(op->dst.format), 3059 true); 3060 binding_table[1] = 3061 gen6_bind_bo(sna, 3062 op->src.bo, 1, 1, 3063 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 3064 false); 3065 3066 if (sna->kgem.surface == offset && 3067 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 3068 sna->kgem.surface += 3069 sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); 3070 offset = sna->render_state.gen6.surface_table; 3071 } 3072 3073 gen6_emit_state(sna, op, offset | dirty); 3074} 3075 3076static bool 3077gen6_render_fill_boxes(struct sna *sna, 3078 CARD8 op, 3079 PictFormat format, 3080 const xRenderColor *color, 3081 const DrawableRec *dst, struct kgem_bo *dst_bo, 3082 const BoxRec *box, int n) 3083{ 3084 struct sna_composite_op tmp; 3085 uint32_t pixel; 3086 3087 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3088 __FUNCTION__, op, 3089 color->red, color->green, color->blue, color->alpha, (int)format)); 3090 3091 if (op >= ARRAY_SIZE(gen6_blend_op)) { 3092 DBG(("%s: fallback due to unhandled blend op: %d\n", 3093 __FUNCTION__, op)); 3094 return false; 3095 } 3096 3097 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3098 !gen6_check_dst_format(format)) { 3099 uint8_t alu = GXinvalid; 3100 3101 if (op <= PictOpSrc) { 3102 pixel = 0; 3103 if (op == PictOpClear) 3104 alu = GXclear; 3105 else if (sna_get_pixel_from_rgba(&pixel, 3106 color->red, 3107 color->green, 3108 color->blue, 3109 color->alpha, 3110 format)) 3111 alu = GXcopy; 3112 } 3113 3114 if (alu != GXinvalid && 3115 sna_blt_fill_boxes(sna, alu, 3116 dst_bo, dst->bitsPerPixel, 3117 pixel, box, n)) 3118 return true; 3119 3120 if (!gen6_check_dst_format(format)) 3121 return false; 3122 } 3123 3124 if (op == PictOpClear) { 3125 pixel = 0; 3126 op = PictOpSrc; 3127 } else if (!sna_get_pixel_from_rgba(&pixel, 3128 color->red, 3129 color->green, 3130 color->blue, 3131 color->alpha, 3132 PICT_a8r8g8b8)) 3133 return false; 3134 3135 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3136 __FUNCTION__, pixel, n, 3137 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3138 3139 tmp.dst.pixmap = (PixmapPtr)dst; 3140 tmp.dst.width = dst->width; 3141 tmp.dst.height = dst->height; 3142 tmp.dst.format = format; 3143 tmp.dst.bo = dst_bo; 3144 tmp.dst.x = tmp.dst.y = 0; 3145 tmp.damage = NULL; 3146 3147 sna_render_composite_redirect_init(&tmp); 3148 if (too_large(dst->width, dst->height)) { 3149 BoxRec extents; 3150 3151 boxes_extents(box, n, &extents); 3152 if (!sna_render_composite_redirect(sna, &tmp, 3153 extents.x1, extents.y1, 3154 extents.x2 - extents.x1, 3155 extents.y2 - extents.y1, 3156 n > 1)) 3157 return sna_tiling_fill_boxes(sna, op, format, color, 3158 dst, dst_bo, box, n); 3159 } 3160 3161 tmp.src.bo = sna_render_get_solid(sna, pixel); 3162 tmp.mask.bo = NULL; 3163 3164 tmp.floats_per_vertex = 2; 3165 tmp.floats_per_rect = 6; 3166 tmp.need_magic_ca_pass = false; 3167 3168 tmp.u.gen6.flags = FILL_FLAGS(op, format); 3169 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3170 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3171 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3172 3173 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3174 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3175 kgem_submit(&sna->kgem); 3176 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3177 } 3178 3179 gen6_align_vertex(sna, &tmp); 3180 gen6_emit_fill_state(sna, &tmp); 3181 3182 do { 3183 int n_this_time; 3184 int16_t *v; 3185 3186 n_this_time = gen6_get_rectangles(sna, &tmp, n, 3187 gen6_emit_fill_state); 3188 n -= n_this_time; 3189 3190 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3191 sna->render.vertex_used += 6 * n_this_time; 3192 assert(sna->render.vertex_used <= sna->render.vertex_size); 3193 do { 3194 DBG((" (%d, %d), (%d, %d)\n", 3195 box->x1, box->y1, box->x2, box->y2)); 3196 3197 v[0] = box->x2; 3198 v[5] = v[1] = box->y2; 3199 v[8] = v[4] = box->x1; 3200 v[9] = box->y1; 3201 v[2] = v[3] = v[7] = 1; 3202 v[6] = v[10] = v[11] = 0; 3203 v += 12; box++; 3204 } while (--n_this_time); 3205 } while (n); 3206 3207 gen4_vertex_flush(sna); 3208 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3209 sna_render_composite_redirect_done(sna, &tmp); 3210 return true; 3211} 3212 3213static void 3214gen6_render_op_fill_blt(struct sna *sna, 3215 const struct sna_fill_op *op, 3216 int16_t x, int16_t y, int16_t w, int16_t h) 3217{ 3218 int16_t *v; 3219 3220 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3221 3222 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3223 3224 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3225 sna->render.vertex_used += 6; 3226 assert(sna->render.vertex_used <= sna->render.vertex_size); 3227 3228 v[0] = x+w; 3229 v[4] = v[8] = x; 3230 v[1] = v[5] = y+h; 3231 v[9] = y; 3232 3233 v[2] = v[3] = v[7] = 1; 3234 v[6] = v[10] = v[11] = 0; 3235} 3236 3237fastcall static void 3238gen6_render_op_fill_box(struct sna *sna, 3239 const struct sna_fill_op *op, 3240 const BoxRec *box) 3241{ 3242 int16_t *v; 3243 3244 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3245 box->x1, box->y1, box->x2, box->y2)); 3246 3247 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3248 3249 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3250 sna->render.vertex_used += 6; 3251 assert(sna->render.vertex_used <= sna->render.vertex_size); 3252 3253 v[0] = box->x2; 3254 v[8] = v[4] = box->x1; 3255 v[5] = v[1] = box->y2; 3256 v[9] = box->y1; 3257 3258 v[7] = v[2] = v[3] = 1; 3259 v[6] = v[10] = v[11] = 0; 3260} 3261 3262fastcall static void 3263gen6_render_op_fill_boxes(struct sna *sna, 3264 const struct sna_fill_op *op, 3265 const BoxRec *box, 3266 int nbox) 3267{ 3268 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3269 box->x1, box->y1, box->x2, box->y2, nbox)); 3270 3271 do { 3272 int nbox_this_time; 3273 int16_t *v; 3274 3275 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 3276 gen6_emit_fill_state); 3277 nbox -= nbox_this_time; 3278 3279 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3280 sna->render.vertex_used += 6 * nbox_this_time; 3281 assert(sna->render.vertex_used <= sna->render.vertex_size); 3282 3283 do { 3284 v[0] = box->x2; 3285 v[8] = v[4] = box->x1; 3286 v[5] = v[1] = box->y2; 3287 v[9] = box->y1; 3288 v[7] = v[2] = v[3] = 1; 3289 v[6] = v[10] = v[11] = 0; 3290 box++; v += 12; 3291 } while (--nbox_this_time); 3292 } while (nbox); 3293} 3294 3295static void 3296gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) 3297{ 3298 DBG(("%s()\n", __FUNCTION__)); 3299 3300 assert(!sna->render.active); 3301 if (sna->render.vertex_offset) 3302 gen4_vertex_flush(sna); 3303 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3304} 3305 3306static bool 3307gen6_render_fill(struct sna *sna, uint8_t alu, 3308 PixmapPtr dst, struct kgem_bo *dst_bo, 3309 uint32_t color, unsigned flags, 3310 struct sna_fill_op *op) 3311{ 3312 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3313 3314 if (prefer_blt_fill(sna, dst_bo, flags) && 3315 sna_blt_fill(sna, alu, 3316 dst_bo, dst->drawable.bitsPerPixel, 3317 color, 3318 op)) 3319 return true; 3320 3321 if (!(alu == GXcopy || alu == GXclear) || 3322 too_large(dst->drawable.width, dst->drawable.height)) 3323 return sna_blt_fill(sna, alu, 3324 dst_bo, dst->drawable.bitsPerPixel, 3325 color, 3326 op); 3327 3328 if (alu == GXclear) 3329 color = 0; 3330 3331 op->base.dst.pixmap = dst; 3332 op->base.dst.width = dst->drawable.width; 3333 op->base.dst.height = dst->drawable.height; 3334 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3335 op->base.dst.bo = dst_bo; 3336 op->base.dst.x = op->base.dst.y = 0; 3337 3338 op->base.src.bo = 3339 sna_render_get_solid(sna, 3340 sna_rgba_for_color(color, 3341 dst->drawable.depth)); 3342 op->base.mask.bo = NULL; 3343 3344 op->base.need_magic_ca_pass = false; 3345 op->base.floats_per_vertex = 2; 3346 op->base.floats_per_rect = 6; 3347 3348 op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; 3349 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3350 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); 3351 assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); 3352 3353 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3354 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3355 kgem_submit(&sna->kgem); 3356 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3357 } 3358 3359 gen6_align_vertex(sna, &op->base); 3360 gen6_emit_fill_state(sna, &op->base); 3361 3362 op->blt = gen6_render_op_fill_blt; 3363 op->box = gen6_render_op_fill_box; 3364 op->boxes = gen6_render_op_fill_boxes; 3365 op->points = NULL; 3366 op->done = gen6_render_op_fill_done; 3367 return true; 3368} 3369 3370static bool 3371gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3372 uint32_t color, 3373 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3374 uint8_t alu) 3375{ 3376 BoxRec box; 3377 3378 box.x1 = x1; 3379 box.y1 = y1; 3380 box.x2 = x2; 3381 box.y2 = y2; 3382 3383 return sna_blt_fill_boxes(sna, alu, 3384 bo, dst->drawable.bitsPerPixel, 3385 color, &box, 1); 3386} 3387 3388static bool 3389gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3390 uint32_t color, 3391 int16_t x1, int16_t y1, 3392 int16_t x2, int16_t y2, 3393 uint8_t alu) 3394{ 3395 struct sna_composite_op tmp; 3396 int16_t *v; 3397 3398 /* Prefer to use the BLT if already engaged */ 3399 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3400 gen6_render_fill_one_try_blt(sna, dst, bo, color, 3401 x1, y1, x2, y2, alu)) 3402 return true; 3403 3404 /* Must use the BLT if we can't RENDER... */ 3405 if (!(alu == GXcopy || alu == GXclear) || 3406 too_large(dst->drawable.width, dst->drawable.height)) 3407 return gen6_render_fill_one_try_blt(sna, dst, bo, color, 3408 x1, y1, x2, y2, alu); 3409 3410 if (alu == GXclear) 3411 color = 0; 3412 3413 tmp.dst.pixmap = dst; 3414 tmp.dst.width = dst->drawable.width; 3415 tmp.dst.height = dst->drawable.height; 3416 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3417 tmp.dst.bo = bo; 3418 tmp.dst.x = tmp.dst.y = 0; 3419 3420 tmp.src.bo = 3421 sna_render_get_solid(sna, 3422 sna_rgba_for_color(color, 3423 dst->drawable.depth)); 3424 tmp.mask.bo = NULL; 3425 3426 tmp.floats_per_vertex = 2; 3427 tmp.floats_per_rect = 6; 3428 tmp.need_magic_ca_pass = false; 3429 3430 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3431 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3432 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3433 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3434 3435 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3436 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3437 kgem_submit(&sna->kgem); 3438 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3439 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3440 return false; 3441 } 3442 } 3443 3444 gen6_align_vertex(sna, &tmp); 3445 gen6_emit_fill_state(sna, &tmp); 3446 3447 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3448 3449 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3450 3451 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3452 sna->render.vertex_used += 6; 3453 assert(sna->render.vertex_used <= sna->render.vertex_size); 3454 3455 v[0] = x2; 3456 v[8] = v[4] = x1; 3457 v[5] = v[1] = y2; 3458 v[9] = y1; 3459 v[7] = v[2] = v[3] = 1; 3460 v[6] = v[10] = v[11] = 0; 3461 3462 gen4_vertex_flush(sna); 3463 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3464 3465 return true; 3466} 3467 3468static bool 3469gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3470{ 3471 BoxRec box; 3472 3473 box.x1 = 0; 3474 box.y1 = 0; 3475 box.x2 = dst->drawable.width; 3476 box.y2 = dst->drawable.height; 3477 3478 return sna_blt_fill_boxes(sna, GXclear, 3479 bo, dst->drawable.bitsPerPixel, 3480 0, &box, 1); 3481} 3482 3483static bool 3484gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3485{ 3486 struct sna_composite_op tmp; 3487 int16_t *v; 3488 3489 DBG(("%s: %dx%d\n", 3490 __FUNCTION__, 3491 dst->drawable.width, 3492 dst->drawable.height)); 3493 3494 /* Prefer to use the BLT if, and only if, already engaged */ 3495 if (sna->kgem.ring == KGEM_BLT && 3496 gen6_render_clear_try_blt(sna, dst, bo)) 3497 return true; 3498 3499 /* Must use the BLT if we can't RENDER... */ 3500 if (too_large(dst->drawable.width, dst->drawable.height)) 3501 return gen6_render_clear_try_blt(sna, dst, bo); 3502 3503 tmp.dst.pixmap = dst; 3504 tmp.dst.width = dst->drawable.width; 3505 tmp.dst.height = dst->drawable.height; 3506 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3507 tmp.dst.bo = bo; 3508 tmp.dst.x = tmp.dst.y = 0; 3509 3510 tmp.src.bo = sna_render_get_solid(sna, 0); 3511 tmp.mask.bo = NULL; 3512 3513 tmp.floats_per_vertex = 2; 3514 tmp.floats_per_rect = 6; 3515 tmp.need_magic_ca_pass = false; 3516 3517 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3518 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3519 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3520 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3521 3522 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3523 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3524 kgem_submit(&sna->kgem); 3525 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3526 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3527 return false; 3528 } 3529 } 3530 3531 gen6_align_vertex(sna, &tmp); 3532 gen6_emit_fill_state(sna, &tmp); 3533 3534 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3535 3536 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3537 sna->render.vertex_used += 6; 3538 assert(sna->render.vertex_used <= sna->render.vertex_size); 3539 3540 v[0] = dst->drawable.width; 3541 v[5] = v[1] = dst->drawable.height; 3542 v[8] = v[4] = 0; 3543 v[9] = 0; 3544 3545 v[7] = v[2] = v[3] = 1; 3546 v[6] = v[10] = v[11] = 0; 3547 3548 gen4_vertex_flush(sna); 3549 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3550 3551 return true; 3552} 3553 3554static void gen6_render_reset(struct sna *sna) 3555{ 3556 sna->render_state.gen6.needs_invariant = true; 3557 sna->render_state.gen6.first_state_packet = true; 3558 sna->render_state.gen6.ve_id = 3 << 2; 3559 sna->render_state.gen6.last_primitive = -1; 3560 3561 sna->render_state.gen6.num_sf_outputs = 0; 3562 sna->render_state.gen6.samplers = -1; 3563 sna->render_state.gen6.blend = -1; 3564 sna->render_state.gen6.kernel = -1; 3565 sna->render_state.gen6.drawrect_offset = -1; 3566 sna->render_state.gen6.drawrect_limit = -1; 3567 sna->render_state.gen6.surface_table = -1; 3568 3569 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3570 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3571 discard_vbo(sna); 3572 } 3573 3574 sna->render.vertex_offset = 0; 3575 sna->render.nvertex_reloc = 0; 3576 sna->render.vb_id = 0; 3577} 3578 3579static void gen6_render_fini(struct sna *sna) 3580{ 3581 kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); 3582} 3583 3584static bool is_gt2(struct sna *sna, int devid) 3585{ 3586 return devid & 0x30; 3587} 3588 3589static bool is_mobile(struct sna *sna, int devid) 3590{ 3591 return (devid & 0xf) == 0x6; 3592} 3593 3594static bool gen6_render_setup(struct sna *sna, int devid) 3595{ 3596 struct gen6_render_state *state = &sna->render_state.gen6; 3597 struct sna_static_stream general; 3598 struct gen6_sampler_state *ss; 3599 int i, j, k, l, m; 3600 3601 state->info = >1_info; 3602 if (is_gt2(sna, devid)) 3603 state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ 3604 state->gt = state->info->gt; 3605 3606 sna_static_stream_init(&general); 3607 3608 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3609 * dumps, you know it points to zero. 3610 */ 3611 null_create(&general); 3612 scratch_create(&general); 3613 3614 for (m = 0; m < GEN6_KERNEL_COUNT; m++) { 3615 if (wm_kernels[m].size) { 3616 state->wm_kernel[m][1] = 3617 sna_static_stream_add(&general, 3618 wm_kernels[m].data, 3619 wm_kernels[m].size, 3620 64); 3621 } else { 3622 if (USE_8_PIXEL_DISPATCH) { 3623 state->wm_kernel[m][0] = 3624 sna_static_stream_compile_wm(sna, &general, 3625 wm_kernels[m].data, 8); 3626 } 3627 3628 if (USE_16_PIXEL_DISPATCH) { 3629 state->wm_kernel[m][1] = 3630 sna_static_stream_compile_wm(sna, &general, 3631 wm_kernels[m].data, 16); 3632 } 3633 3634 if (USE_32_PIXEL_DISPATCH) { 3635 state->wm_kernel[m][2] = 3636 sna_static_stream_compile_wm(sna, &general, 3637 wm_kernels[m].data, 32); 3638 } 3639 } 3640 if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { 3641 state->wm_kernel[m][1] = 3642 sna_static_stream_compile_wm(sna, &general, 3643 wm_kernels[m].data, 16); 3644 } 3645 } 3646 3647 ss = sna_static_stream_map(&general, 3648 2 * sizeof(*ss) * 3649 (2 + 3650 FILTER_COUNT * EXTEND_COUNT * 3651 FILTER_COUNT * EXTEND_COUNT), 3652 32); 3653 state->wm_state = sna_static_stream_offsetof(&general, ss); 3654 sampler_copy_init(ss); ss += 2; 3655 sampler_fill_init(ss); ss += 2; 3656 for (i = 0; i < FILTER_COUNT; i++) { 3657 for (j = 0; j < EXTEND_COUNT; j++) { 3658 for (k = 0; k < FILTER_COUNT; k++) { 3659 for (l = 0; l < EXTEND_COUNT; l++) { 3660 sampler_state_init(ss++, i, j); 3661 sampler_state_init(ss++, k, l); 3662 } 3663 } 3664 } 3665 } 3666 3667 state->cc_blend = gen6_composite_create_blend_state(&general); 3668 3669 state->general_bo = sna_static_stream_fini(sna, &general); 3670 return state->general_bo != NULL; 3671} 3672 3673const char *gen6_render_init(struct sna *sna, const char *backend) 3674{ 3675 int devid = intel_get_device_id(sna->dev); 3676 3677 if (!gen6_render_setup(sna, devid)) 3678 return backend; 3679 3680 sna->kgem.context_switch = gen6_render_context_switch; 3681 sna->kgem.retire = gen6_render_retire; 3682 sna->kgem.expire = gen4_render_expire; 3683 3684#if !NO_COMPOSITE 3685 sna->render.composite = gen6_render_composite; 3686 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3687#endif 3688 3689#if !NO_COMPOSITE_SPANS 3690 sna->render.check_composite_spans = gen6_check_composite_spans; 3691 sna->render.composite_spans = gen6_render_composite_spans; 3692 if (is_mobile(sna, devid)) 3693 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3694#endif 3695 sna->render.video = gen6_render_video; 3696 3697#if !NO_COPY_BOXES 3698 sna->render.copy_boxes = gen6_render_copy_boxes; 3699#endif 3700#if !NO_COPY 3701 sna->render.copy = gen6_render_copy; 3702#endif 3703 3704#if !NO_FILL_BOXES 3705 sna->render.fill_boxes = gen6_render_fill_boxes; 3706#endif 3707#if !NO_FILL 3708 sna->render.fill = gen6_render_fill; 3709#endif 3710#if !NO_FILL_ONE 3711 sna->render.fill_one = gen6_render_fill_one; 3712#endif 3713#if !NO_FILL_CLEAR 3714 sna->render.clear = gen6_render_clear; 3715#endif 3716 3717 sna->render.flush = gen4_render_flush; 3718 sna->render.reset = gen6_render_reset; 3719 sna->render.fini = gen6_render_fini; 3720 3721 sna->render.max_3d_size = GEN6_MAX_SIZE; 3722 sna->render.max_3d_pitch = 1 << 18; 3723 return sna->render_state.gen6.info->name; 3724} 3725