1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen6_render.h" 45#include "gen6_common.h" 46#include "gen4_common.h" 47#include "gen4_source.h" 48#include "gen4_vertex.h" 49 50#define ALWAYS_INVALIDATE 0 51#define ALWAYS_FLUSH 0 52#define ALWAYS_STALL 0 53 54#define NO_COMPOSITE 0 55#define NO_COMPOSITE_SPANS 0 56#define NO_COPY 0 57#define NO_COPY_BOXES 0 58#define NO_FILL 0 59#define NO_FILL_BOXES 0 60#define NO_FILL_ONE 0 61#define NO_FILL_CLEAR 0 62 63#define USE_8_PIXEL_DISPATCH 1 64#define USE_16_PIXEL_DISPATCH 1 65#define USE_32_PIXEL_DISPATCH 0 66 67#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 68#error "Must select at least 8, 16 or 32 pixel dispatch" 69#endif 70 71#define GEN6_MAX_SIZE 8192 72 73struct gt_info { 74 const char *name; 75 int max_vs_threads; 76 int max_gs_threads; 77 int max_wm_threads; 78 struct { 79 int size; 80 int max_vs_entries; 81 int max_gs_entries; 82 } urb; 83 int gt; 84}; 85 86static const struct gt_info gt1_info = { 87 .name = "Sandybridge (gen6, gt1)", 88 .max_vs_threads = 24, 89 .max_gs_threads = 21, 90 .max_wm_threads = 40, 91 .urb = { 32, 256, 256 }, 92 .gt = 1, 93}; 94 95static const struct gt_info gt2_info = { 96 .name = "Sandybridge (gen6, gt2)", 97 .max_vs_threads = 60, 98 .max_gs_threads = 60, 99 .max_wm_threads = 80, 100 .urb = { 64, 256, 256 }, 101 .gt = 2, 102}; 103 104static const uint32_t ps_kernel_packed_bt601[][4] = { 105#include "exa_wm_src_affine.g6b" 106#include "exa_wm_src_sample_argb.g6b" 107#include "exa_wm_yuv_rgb_bt601.g6b" 108#include "exa_wm_write.g6b" 109}; 110 111static const uint32_t ps_kernel_planar_bt601[][4] = { 112#include "exa_wm_src_affine.g6b" 113#include "exa_wm_src_sample_planar.g6b" 114#include "exa_wm_yuv_rgb_bt601.g6b" 115#include "exa_wm_write.g6b" 116}; 117 118static const uint32_t ps_kernel_nv12_bt601[][4] = { 119#include "exa_wm_src_affine.g6b" 120#include "exa_wm_src_sample_nv12.g6b" 121#include "exa_wm_yuv_rgb_bt601.g6b" 122#include "exa_wm_write.g6b" 123}; 124 125static const uint32_t ps_kernel_packed_bt709[][4] = { 126#include "exa_wm_src_affine.g6b" 127#include "exa_wm_src_sample_argb.g6b" 128#include "exa_wm_yuv_rgb_bt709.g6b" 129#include "exa_wm_write.g6b" 130}; 131 132static const uint32_t ps_kernel_planar_bt709[][4] = { 133#include "exa_wm_src_affine.g6b" 134#include "exa_wm_src_sample_planar.g6b" 135#include "exa_wm_yuv_rgb_bt709.g6b" 136#include "exa_wm_write.g6b" 137}; 138 139static const uint32_t ps_kernel_nv12_bt709[][4] = { 140#include "exa_wm_src_affine.g6b" 141#include "exa_wm_src_sample_nv12.g6b" 142#include "exa_wm_yuv_rgb_bt709.g6b" 143#include "exa_wm_write.g6b" 144}; 145 146#define NOKERNEL(kernel_enum, func, ns) \ 147 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} 148#define KERNEL(kernel_enum, kernel, ns) \ 149 [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} 150 151static const struct wm_kernel_info { 152 const char *name; 153 const void *data; 154 unsigned int size; 155 unsigned int num_surfaces; 156} wm_kernels[] = { 157 NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 158 NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 159 160 NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 161 NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 162 163 NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 164 NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 165 166 NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 167 NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 168 169 NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 170 NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 171 172 173 KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7), 174 KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7), 175 KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2), 176 177 KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), 178 KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), 179 KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), 180}; 181#undef KERNEL 182 183static const struct blendinfo { 184 bool src_alpha; 185 uint32_t src_blend; 186 uint32_t dst_blend; 187} gen6_blend_op[] = { 188 /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, 189 /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, 190 /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, 191 /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 192 /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, 193 /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 194 /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, 195 /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 196 /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 197 /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 198 /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, 199 /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 200 /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, 201}; 202 203/** 204 * Highest-valued BLENDFACTOR used in gen6_blend_op. 205 * 206 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, 207 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 208 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 209 */ 210#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) 211 212#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 213 214#define BLEND_OFFSET(s, d) \ 215 (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) 216 217#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) 218#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) 219 220#define SAMPLER_OFFSET(sf, se, mf, me) \ 221 (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) 222 223#define VERTEX_2s2s 0 224 225#define COPY_SAMPLER 0 226#define COPY_VERTEX VERTEX_2s2s 227#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) 228 229#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) 230#define FILL_VERTEX VERTEX_2s2s 231#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 232#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 233 234#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) 235#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) 236#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) 237#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) 238#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 239 240#define OUT_BATCH(v) batch_emit(sna, v) 241#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 242#define OUT_VERTEX_F(v) vertex_emit(sna, v) 243 244static inline bool too_large(int width, int height) 245{ 246 return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; 247} 248 249static uint32_t gen6_get_blend(int op, 250 bool has_component_alpha, 251 uint32_t dst_format) 252{ 253 uint32_t src, dst; 254 255 src = gen6_blend_op[op].src_blend; 256 dst = gen6_blend_op[op].dst_blend; 257 258 /* If there's no dst alpha channel, adjust the blend op so that 259 * we'll treat it always as 1. 260 */ 261 if (PICT_FORMAT_A(dst_format) == 0) { 262 if (src == GEN6_BLENDFACTOR_DST_ALPHA) 263 src = GEN6_BLENDFACTOR_ONE; 264 else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) 265 src = GEN6_BLENDFACTOR_ZERO; 266 } 267 268 /* If the source alpha is being used, then we should only be in a 269 * case where the source blend factor is 0, and the source blend 270 * value is the mask channels multiplied by the source picture's alpha. 271 */ 272 if (has_component_alpha && gen6_blend_op[op].src_alpha) { 273 if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) 274 dst = GEN6_BLENDFACTOR_SRC_COLOR; 275 else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) 276 dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; 277 } 278 279 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 280 op, dst_format, PICT_FORMAT_A(dst_format), 281 src, dst, (int)BLEND_OFFSET(src, dst))); 282 return BLEND_OFFSET(src, dst); 283} 284 285static uint32_t gen6_get_card_format(PictFormat format) 286{ 287 switch (format) { 288 default: 289 return -1; 290 case PICT_a8r8g8b8: 291 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 292 case PICT_x8r8g8b8: 293 return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; 294 case PICT_a8b8g8r8: 295 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 296 case PICT_x8b8g8r8: 297 return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; 298#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 299 case PICT_a2r10g10b10: 300 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 301 case PICT_x2r10g10b10: 302 return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; 303#endif 304 case PICT_r8g8b8: 305 return GEN6_SURFACEFORMAT_R8G8B8_UNORM; 306 case PICT_r5g6b5: 307 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 308 case PICT_a1r5g5b5: 309 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 310 case PICT_a8: 311 return GEN6_SURFACEFORMAT_A8_UNORM; 312 case PICT_a4r4g4b4: 313 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 314 } 315} 316 317static uint32_t gen6_get_dest_format(PictFormat format) 318{ 319 switch (format) { 320 default: 321 return -1; 322 case PICT_a8r8g8b8: 323 case PICT_x8r8g8b8: 324 return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 325 case PICT_a8b8g8r8: 326 case PICT_x8b8g8r8: 327 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 328#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 329 case PICT_a2r10g10b10: 330 case PICT_x2r10g10b10: 331 return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 332#endif 333 case PICT_r5g6b5: 334 return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 335 case PICT_x1r5g5b5: 336 case PICT_a1r5g5b5: 337 return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 338 case PICT_a8: 339 return GEN6_SURFACEFORMAT_A8_UNORM; 340 case PICT_a4r4g4b4: 341 case PICT_x4r4g4b4: 342 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 343 } 344} 345 346static bool gen6_check_dst_format(PictFormat format) 347{ 348 if (gen6_get_dest_format(format) != -1) 349 return true; 350 351 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 352 return false; 353} 354 355static bool gen6_check_format(uint32_t format) 356{ 357 if (gen6_get_card_format(format) != -1) 358 return true; 359 360 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 361 return false; 362} 363 364static uint32_t gen6_filter(uint32_t filter) 365{ 366 switch (filter) { 367 default: 368 assert(0); 369 case PictFilterNearest: 370 return SAMPLER_FILTER_NEAREST; 371 case PictFilterBilinear: 372 return SAMPLER_FILTER_BILINEAR; 373 } 374} 375 376static uint32_t gen6_check_filter(PicturePtr picture) 377{ 378 switch (picture->filter) { 379 case PictFilterNearest: 380 case PictFilterBilinear: 381 return true; 382 default: 383 return false; 384 } 385} 386 387static uint32_t gen6_repeat(uint32_t repeat) 388{ 389 switch (repeat) { 390 default: 391 assert(0); 392 case RepeatNone: 393 return SAMPLER_EXTEND_NONE; 394 case RepeatNormal: 395 return SAMPLER_EXTEND_REPEAT; 396 case RepeatPad: 397 return SAMPLER_EXTEND_PAD; 398 case RepeatReflect: 399 return SAMPLER_EXTEND_REFLECT; 400 } 401} 402 403static bool gen6_check_repeat(PicturePtr picture) 404{ 405 if (!picture->repeat) 406 return true; 407 408 switch (picture->repeatType) { 409 case RepeatNone: 410 case RepeatNormal: 411 case RepeatPad: 412 case RepeatReflect: 413 return true; 414 default: 415 return false; 416 } 417} 418 419static int 420gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 421{ 422 int base; 423 424 if (has_mask) { 425 if (is_ca) { 426 if (gen6_blend_op[op].src_alpha) 427 base = GEN6_WM_KERNEL_MASKSA; 428 else 429 base = GEN6_WM_KERNEL_MASKCA; 430 } else 431 base = GEN6_WM_KERNEL_MASK; 432 } else 433 base = GEN6_WM_KERNEL_NOMASK; 434 435 return base + !is_affine; 436} 437 438inline static void 439gen6_emit_pipe_invalidate(struct sna *sna) 440{ 441 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 442 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | 443 GEN6_PIPE_CONTROL_TC_FLUSH | 444 GEN6_PIPE_CONTROL_CS_STALL); 445 OUT_BATCH(0); 446 OUT_BATCH(0); 447} 448 449inline static void 450gen6_emit_pipe_flush(struct sna *sna, bool need_stall) 451{ 452 unsigned stall; 453 454 stall = 0; 455 if (need_stall) 456 stall = GEN6_PIPE_CONTROL_CS_STALL; 457 458 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 459 OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | stall); 460 OUT_BATCH(0); 461 OUT_BATCH(0); 462} 463 464inline static void 465gen6_emit_pipe_stall(struct sna *sna) 466{ 467 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 468 OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 469 GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 470 OUT_BATCH(0); 471 OUT_BATCH(0); 472} 473 474static void 475gen6_emit_urb(struct sna *sna) 476{ 477 OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); 478 OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | 479 (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ 480 OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | 481 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ 482} 483 484static void 485gen6_emit_state_base_address(struct sna *sna) 486{ 487 OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); 488 OUT_BATCH(0); /* general */ 489 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 490 sna->kgem.nbatch, 491 NULL, 492 I915_GEM_DOMAIN_INSTRUCTION << 16, 493 BASE_ADDRESS_MODIFY)); 494 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 495 sna->kgem.nbatch, 496 sna->render_state.gen6.general_bo, 497 I915_GEM_DOMAIN_INSTRUCTION << 16, 498 BASE_ADDRESS_MODIFY)); 499 OUT_BATCH(0); /* indirect */ 500 OUT_BATCH(kgem_add_reloc(&sna->kgem, 501 sna->kgem.nbatch, 502 sna->render_state.gen6.general_bo, 503 I915_GEM_DOMAIN_INSTRUCTION << 16, 504 BASE_ADDRESS_MODIFY)); 505 506 /* upper bounds, disable */ 507 OUT_BATCH(0); 508 OUT_BATCH(BASE_ADDRESS_MODIFY); 509 OUT_BATCH(0); 510 OUT_BATCH(BASE_ADDRESS_MODIFY); 511} 512 513static void 514gen6_emit_viewports(struct sna *sna) 515{ 516 OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | 517 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | 518 (4 - 2)); 519 OUT_BATCH(0); 520 OUT_BATCH(0); 521 OUT_BATCH(0); 522} 523 524static void 525gen6_emit_vs(struct sna *sna) 526{ 527 /* disable VS constant buffer */ 528 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); 529 OUT_BATCH(0); 530 OUT_BATCH(0); 531 OUT_BATCH(0); 532 OUT_BATCH(0); 533 534 OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); 535 OUT_BATCH(0); /* no VS kernel */ 536 OUT_BATCH(0); 537 OUT_BATCH(0); 538 OUT_BATCH(0); 539 OUT_BATCH(0); /* pass-through */ 540} 541 542static void 543gen6_emit_gs(struct sna *sna) 544{ 545 /* disable GS constant buffer */ 546 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); 547 OUT_BATCH(0); 548 OUT_BATCH(0); 549 OUT_BATCH(0); 550 OUT_BATCH(0); 551 552 OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); 553 OUT_BATCH(0); /* no GS kernel */ 554 OUT_BATCH(0); 555 OUT_BATCH(0); 556 OUT_BATCH(0); 557 OUT_BATCH(0); 558 OUT_BATCH(0); /* pass-through */ 559} 560 561static void 562gen6_emit_clip(struct sna *sna) 563{ 564 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); 565 OUT_BATCH(0); 566 OUT_BATCH(0); /* pass-through */ 567 OUT_BATCH(0); 568} 569 570static void 571gen6_emit_wm_constants(struct sna *sna) 572{ 573 /* disable WM constant buffer */ 574 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 575 OUT_BATCH(0); 576 OUT_BATCH(0); 577 OUT_BATCH(0); 578 OUT_BATCH(0); 579} 580 581static void 582gen6_emit_null_depth_buffer(struct sna *sna) 583{ 584 OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); 585 OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 586 GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 587 OUT_BATCH(0); 588 OUT_BATCH(0); 589 OUT_BATCH(0); 590 OUT_BATCH(0); 591 OUT_BATCH(0); 592 593 OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); 594 OUT_BATCH(0); 595} 596 597static void 598gen6_emit_invariant(struct sna *sna) 599{ 600 OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); 601 602 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); 603 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 604 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 605 OUT_BATCH(0); 606 607 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); 608 OUT_BATCH(1); 609 610 gen6_emit_urb(sna); 611 612 gen6_emit_state_base_address(sna); 613 614 gen6_emit_viewports(sna); 615 gen6_emit_vs(sna); 616 gen6_emit_gs(sna); 617 gen6_emit_clip(sna); 618 gen6_emit_wm_constants(sna); 619 gen6_emit_null_depth_buffer(sna); 620 621 sna->render_state.gen6.needs_invariant = false; 622} 623 624static void 625gen6_emit_cc(struct sna *sna, int blend) 626{ 627 struct gen6_render_state *render = &sna->render_state.gen6; 628 629 if (render->blend == blend) 630 return; 631 632 DBG(("%s: blend = %x\n", __FUNCTION__, blend)); 633 634 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); 635 OUT_BATCH((render->cc_blend + blend) | 1); 636 if (render->blend == (unsigned)-1) { 637 OUT_BATCH(1); 638 OUT_BATCH(1); 639 } else { 640 OUT_BATCH(0); 641 OUT_BATCH(0); 642 } 643 644 render->blend = blend; 645} 646 647static void 648gen6_emit_sampler(struct sna *sna, uint32_t state) 649{ 650 if (sna->render_state.gen6.samplers == state) 651 return; 652 653 sna->render_state.gen6.samplers = state; 654 655 DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 656 657 OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | 658 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | 659 (4 - 2)); 660 OUT_BATCH(0); /* VS */ 661 OUT_BATCH(0); /* GS */ 662 OUT_BATCH(sna->render_state.gen6.wm_state + state); 663} 664 665static void 666gen6_emit_sf(struct sna *sna, bool has_mask) 667{ 668 int num_sf_outputs = has_mask ? 2 : 1; 669 670 if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) 671 return; 672 673 DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 674 __FUNCTION__, num_sf_outputs, 1, 0)); 675 676 sna->render_state.gen6.num_sf_outputs = num_sf_outputs; 677 678 OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); 679 OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | 680 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | 681 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); 682 OUT_BATCH(0); 683 OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); 684 OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ 685 OUT_BATCH(0); 686 OUT_BATCH(0); 687 OUT_BATCH(0); 688 OUT_BATCH(0); 689 OUT_BATCH(0); /* DW9 */ 690 OUT_BATCH(0); 691 OUT_BATCH(0); 692 OUT_BATCH(0); 693 OUT_BATCH(0); 694 OUT_BATCH(0); /* DW14 */ 695 OUT_BATCH(0); 696 OUT_BATCH(0); 697 OUT_BATCH(0); 698 OUT_BATCH(0); 699 OUT_BATCH(0); /* DW19 */ 700} 701 702static void 703gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) 704{ 705 const uint32_t *kernels; 706 707 if (sna->render_state.gen6.kernel == kernel) 708 return; 709 710 sna->render_state.gen6.kernel = kernel; 711 kernels = sna->render_state.gen6.wm_kernel[kernel]; 712 713 DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", 714 __FUNCTION__, 715 wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, 716 kernels[0], kernels[1], kernels[2])); 717 718 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 719 OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 720 OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | 721 wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 722 OUT_BATCH(0); /* scratch space */ 723 OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | 724 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | 725 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); 726 OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | 727 (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | 728 (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | 729 (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | 730 GEN6_3DSTATE_WM_DISPATCH_ENABLE); 731 OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | 732 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 733 OUT_BATCH(kernels[2]); 734 OUT_BATCH(kernels[1]); 735} 736 737static bool 738gen6_emit_binding_table(struct sna *sna, uint16_t offset) 739{ 740 if (sna->render_state.gen6.surface_table == offset) 741 return false; 742 743 /* Binding table pointers */ 744 OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | 745 GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | 746 (4 - 2)); 747 OUT_BATCH(0); /* vs */ 748 OUT_BATCH(0); /* gs */ 749 /* Only the PS uses the binding table */ 750 OUT_BATCH(offset*4); 751 752 sna->render_state.gen6.surface_table = offset; 753 return true; 754} 755 756static bool 757gen6_emit_drawing_rectangle(struct sna *sna, 758 const struct sna_composite_op *op) 759{ 760 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 761 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 762 763 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 764 assert(!too_large(op->dst.width, op->dst.height)); 765 766 if (sna->render_state.gen6.drawrect_limit == limit && 767 sna->render_state.gen6.drawrect_offset == offset) 768 return true; 769 770 /* [DevSNB-C+{W/A}] Before any depth stall flush (including those 771 * produced by non-pipelined state commands), software needs to first 772 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 773 * 0. 774 * 775 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 776 * BEFORE the pipe-control with a post-sync op and no write-cache 777 * flushes. 778 */ 779 if (!sna->render_state.gen6.first_state_packet) 780 gen6_emit_pipe_stall(sna); 781 782 OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 783 OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); 784 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 785 sna->render_state.gen6.general_bo, 786 I915_GEM_DOMAIN_INSTRUCTION << 16 | 787 I915_GEM_DOMAIN_INSTRUCTION, 788 64)); 789 OUT_BATCH(0); 790 791 DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n", 792 __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.height)); 793 OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 794 OUT_BATCH(0); 795 OUT_BATCH(limit); 796 OUT_BATCH(offset); 797 798 sna->render_state.gen6.drawrect_offset = offset; 799 sna->render_state.gen6.drawrect_limit = limit; 800 return false; 801} 802 803static void 804gen6_emit_vertex_elements(struct sna *sna, 805 const struct sna_composite_op *op) 806{ 807 /* 808 * vertex data in vertex buffer 809 * position: (x, y) 810 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 811 * texture coordinate 1 if (has_mask is true): same as above 812 */ 813 struct gen6_render_state *render = &sna->render_state.gen6; 814 uint32_t src_format, dw; 815 int id = GEN6_VERTEX(op->u.gen6.flags); 816 bool has_mask; 817 818 DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 819 820 if (render->ve_id == id) 821 return; 822 render->ve_id = id; 823 824 /* The VUE layout 825 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 826 * dword 4-7: position (x, y, 1.0, 1.0), 827 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 828 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 829 * 830 * dword 4-15 are fetched from vertex buffer 831 */ 832 has_mask = (id >> 2) != 0; 833 OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | 834 ((2 * (3 + has_mask)) + 1 - 2)); 835 836 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 837 GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | 838 0 << VE0_OFFSET_SHIFT); 839 OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 840 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 841 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 842 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); 843 844 /* x,y */ 845 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 846 GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 847 0 << VE0_OFFSET_SHIFT); 848 OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 849 GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 850 GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 851 GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); 852 853 /* u0, v0, w0 */ 854 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 855 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 856 switch (id & 3) { 857 default: 858 assert(0); 859 case 0: 860 src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; 861 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 862 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 863 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 864 break; 865 case 1: 866 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 867 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 868 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 869 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 870 break; 871 case 2: 872 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 873 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 874 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 875 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 876 break; 877 case 3: 878 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 879 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 880 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 881 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 882 break; 883 } 884 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 885 src_format << VE0_FORMAT_SHIFT | 886 4 << VE0_OFFSET_SHIFT); 887 OUT_BATCH(dw); 888 889 /* u1, v1, w1 */ 890 if (has_mask) { 891 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 892 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 893 dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 894 switch (id >> 2) { 895 case 1: 896 src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 897 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 898 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 899 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 900 break; 901 default: 902 assert(0); 903 case 2: 904 src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 905 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 906 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 907 dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 908 break; 909 case 3: 910 src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 911 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 912 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 913 dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 914 break; 915 } 916 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 917 src_format << VE0_FORMAT_SHIFT | 918 offset << VE0_OFFSET_SHIFT); 919 OUT_BATCH(dw); 920 } 921} 922 923static void 924gen6_emit_state(struct sna *sna, 925 const struct sna_composite_op *op, 926 uint16_t wm_binding_table) 927{ 928 bool need_invalidate; 929 bool need_flush; 930 bool need_stall; 931 932 assert(op->dst.bo->exec); 933 934 need_flush = wm_binding_table & 1; 935 if (ALWAYS_FLUSH) 936 need_flush = true; 937 938 wm_binding_table &= ~1; 939 need_stall = sna->render_state.gen6.surface_table != wm_binding_table; 940 if (ALWAYS_STALL) 941 need_stall = true; 942 943 need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 944 if (ALWAYS_INVALIDATE) 945 need_invalidate = true; 946 947 if (need_invalidate) { 948 gen6_emit_pipe_invalidate(sna); 949 kgem_clear_dirty(&sna->kgem); 950 assert(op->dst.bo->exec); 951 kgem_bo_mark_dirty(op->dst.bo); 952 953 need_flush = false; 954 need_stall = false; 955 sna->render_state.gen6.first_state_packet = true; 956 } 957 if (need_flush) { 958 gen6_emit_pipe_flush(sna, need_stall); 959 need_stall = false; 960 sna->render_state.gen6.first_state_packet = true; 961 } 962 963 need_stall &= gen6_emit_drawing_rectangle(sna, op); 964 if (need_stall) 965 gen6_emit_pipe_stall(sna); 966 967 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 968 gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); 969 gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); 970 gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); 971 gen6_emit_vertex_elements(sna, op); 972 gen6_emit_binding_table(sna, wm_binding_table); 973 974 sna->render_state.gen6.first_state_packet = false; 975} 976 977static bool gen6_magic_ca_pass(struct sna *sna, 978 const struct sna_composite_op *op) 979{ 980 struct gen6_render_state *state = &sna->render_state.gen6; 981 982 if (!op->need_magic_ca_pass) 983 return false; 984 985 DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 986 sna->render.vertex_start, sna->render.vertex_index)); 987 988 gen6_emit_pipe_stall(sna); 989 990 gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); 991 gen6_emit_wm(sna, 992 gen6_choose_composite_kernel(PictOpAdd, 993 true, true, 994 op->is_affine), 995 true); 996 997 OUT_BATCH(GEN6_3DPRIMITIVE | 998 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 999 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1000 0 << 9 | 1001 4); 1002 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1003 OUT_BATCH(sna->render.vertex_start); 1004 OUT_BATCH(1); /* single instance */ 1005 OUT_BATCH(0); /* start instance location */ 1006 OUT_BATCH(0); /* index buffer offset, ignored */ 1007 1008 state->last_primitive = sna->kgem.nbatch; 1009 return true; 1010} 1011 1012typedef struct gen6_surface_state_padded { 1013 struct gen6_surface_state state; 1014 char pad[32 - sizeof(struct gen6_surface_state)]; 1015} gen6_surface_state_padded; 1016 1017static void null_create(struct sna_static_stream *stream) 1018{ 1019 /* A bunch of zeros useful for legacy border color and depth-stencil */ 1020 sna_static_stream_map(stream, 64, 64); 1021} 1022 1023static void scratch_create(struct sna_static_stream *stream) 1024{ 1025 /* 64 bytes of scratch space for random writes, such as 1026 * the pipe-control w/a. 1027 */ 1028 sna_static_stream_map(stream, 64, 64); 1029} 1030 1031static void 1032sampler_state_init(struct gen6_sampler_state *sampler_state, 1033 sampler_filter_t filter, 1034 sampler_extend_t extend) 1035{ 1036 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1037 1038 /* We use the legacy mode to get the semantics specified by 1039 * the Render extension. */ 1040 sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; 1041 1042 switch (filter) { 1043 default: 1044 case SAMPLER_FILTER_NEAREST: 1045 sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; 1046 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; 1047 break; 1048 case SAMPLER_FILTER_BILINEAR: 1049 sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; 1050 sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; 1051 break; 1052 } 1053 1054 switch (extend) { 1055 default: 1056 case SAMPLER_EXTEND_NONE: 1057 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1058 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1059 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1060 break; 1061 case SAMPLER_EXTEND_REPEAT: 1062 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1063 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1064 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1065 break; 1066 case SAMPLER_EXTEND_PAD: 1067 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1068 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1069 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1070 break; 1071 case SAMPLER_EXTEND_REFLECT: 1072 sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1073 sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1074 sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1075 break; 1076 } 1077} 1078 1079static void 1080sampler_copy_init(struct gen6_sampler_state *ss) 1081{ 1082 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1083 ss->ss3.non_normalized_coord = 1; 1084 1085 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1086} 1087 1088static void 1089sampler_fill_init(struct gen6_sampler_state *ss) 1090{ 1091 sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1092 ss->ss3.non_normalized_coord = 1; 1093 1094 sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1095} 1096 1097static uint32_t 1098gen6_tiling_bits(uint32_t tiling) 1099{ 1100 switch (tiling) { 1101 default: assert(0); 1102 case I915_TILING_NONE: return 0; 1103 case I915_TILING_X: return GEN6_SURFACE_TILED; 1104 case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; 1105 } 1106} 1107 1108/** 1109 * Sets up the common fields for a surface state buffer for the given 1110 * picture in the given surface state buffer. 1111 */ 1112static int 1113gen6_bind_bo(struct sna *sna, 1114 struct kgem_bo *bo, 1115 uint32_t width, 1116 uint32_t height, 1117 uint32_t format, 1118 bool is_dst) 1119{ 1120 uint32_t *ss; 1121 uint32_t domains; 1122 uint16_t offset; 1123 uint32_t is_scanout = is_dst && bo->scanout; 1124 1125 /* After the first bind, we manage the cache domains within the batch */ 1126 offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1127 if (offset) { 1128 DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", 1129 offset, bo->handle, format, 1130 is_dst ? "render" : "sampler")); 1131 assert(offset >= sna->kgem.surface); 1132 if (is_dst) 1133 kgem_bo_mark_dirty(bo); 1134 return offset * sizeof(uint32_t); 1135 } 1136 1137 offset = sna->kgem.surface -= 1138 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1139 ss = sna->kgem.batch + offset; 1140 ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | 1141 GEN6_SURFACE_BLEND_ENABLED | 1142 format << GEN6_SURFACE_FORMAT_SHIFT); 1143 if (is_dst) { 1144 ss[0] |= GEN6_SURFACE_RC_READ_WRITE; 1145 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1146 } else 1147 domains = I915_GEM_DOMAIN_SAMPLER << 16; 1148 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1149 ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | 1150 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); 1151 assert(bo->pitch <= (1 << 18)); 1152 ss[3] = (gen6_tiling_bits(bo->tiling) | 1153 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); 1154 ss[4] = 0; 1155 ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; 1156 1157 kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1158 1159 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1160 offset, bo->handle, ss[1], 1161 format, width, height, bo->pitch, bo->tiling, 1162 domains & 0xffff ? "render" : "sampler")); 1163 1164 return offset * sizeof(uint32_t); 1165} 1166 1167static void gen6_emit_vertex_buffer(struct sna *sna, 1168 const struct sna_composite_op *op) 1169{ 1170 int id = GEN6_VERTEX(op->u.gen6.flags); 1171 1172 OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); 1173 OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 1174 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); 1175 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1176 OUT_BATCH(0); 1177 OUT_BATCH(~0); /* max address: disabled */ 1178 OUT_BATCH(0); 1179 1180 sna->render.vb_id |= 1 << id; 1181} 1182 1183static void gen6_emit_primitive(struct sna *sna) 1184{ 1185 if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { 1186 DBG(("%s: continuing previous primitive, start=%d, index=%d\n", 1187 __FUNCTION__, 1188 sna->render.vertex_start, 1189 sna->render.vertex_index)); 1190 sna->render.vertex_offset = sna->kgem.nbatch - 5; 1191 return; 1192 } 1193 1194 OUT_BATCH(GEN6_3DPRIMITIVE | 1195 GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1196 _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1197 0 << 9 | 1198 4); 1199 sna->render.vertex_offset = sna->kgem.nbatch; 1200 OUT_BATCH(0); /* vertex count, to be filled in later */ 1201 OUT_BATCH(sna->render.vertex_index); 1202 OUT_BATCH(1); /* single instance */ 1203 OUT_BATCH(0); /* start instance location */ 1204 OUT_BATCH(0); /* index buffer offset, ignored */ 1205 sna->render.vertex_start = sna->render.vertex_index; 1206 DBG(("%s: started new primitive: index=%d\n", 1207 __FUNCTION__, sna->render.vertex_start)); 1208 1209 sna->render_state.gen6.last_primitive = sna->kgem.nbatch; 1210} 1211 1212static bool gen6_rectangle_begin(struct sna *sna, 1213 const struct sna_composite_op *op) 1214{ 1215 int id = 1 << GEN6_VERTEX(op->u.gen6.flags); 1216 int ndwords; 1217 1218 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1219 return true; 1220 1221 ndwords = op->need_magic_ca_pass ? 60 : 6; 1222 if ((sna->render.vb_id & id) == 0) 1223 ndwords += 5; 1224 if (!kgem_check_batch(&sna->kgem, ndwords)) 1225 return false; 1226 1227 if ((sna->render.vb_id & id) == 0) 1228 gen6_emit_vertex_buffer(sna, op); 1229 1230 gen6_emit_primitive(sna); 1231 return true; 1232} 1233 1234static int gen6_get_rectangles__flush(struct sna *sna, 1235 const struct sna_composite_op *op) 1236{ 1237 /* Preventing discarding new vbo after lock contention */ 1238 if (sna_vertex_wait__locked(&sna->render)) { 1239 int rem = vertex_space(sna); 1240 if (rem > op->floats_per_rect) 1241 return rem; 1242 } 1243 1244 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) 1245 return 0; 1246 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1247 return 0; 1248 1249 if (sna->render.vertex_offset) { 1250 gen4_vertex_flush(sna); 1251 if (gen6_magic_ca_pass(sna, op)) { 1252 gen6_emit_pipe_stall(sna); 1253 gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 1254 gen6_emit_wm(sna, 1255 GEN6_KERNEL(op->u.gen6.flags), 1256 GEN6_VERTEX(op->u.gen6.flags) >> 2); 1257 } 1258 } 1259 1260 return gen4_vertex_finish(sna); 1261} 1262 1263inline static int gen6_get_rectangles(struct sna *sna, 1264 const struct sna_composite_op *op, 1265 int want, 1266 void (*emit_state)(struct sna *, const struct sna_composite_op *op)) 1267{ 1268 int rem; 1269 1270 assert(want); 1271 1272start: 1273 rem = vertex_space(sna); 1274 if (unlikely(rem < op->floats_per_rect)) { 1275 DBG(("flushing vbo for %s: %d < %d\n", 1276 __FUNCTION__, rem, op->floats_per_rect)); 1277 rem = gen6_get_rectangles__flush(sna, op); 1278 if (unlikely(rem == 0)) 1279 goto flush; 1280 } 1281 1282 if (unlikely(sna->render.vertex_offset == 0)) { 1283 if (!gen6_rectangle_begin(sna, op)) 1284 goto flush; 1285 else 1286 goto start; 1287 } 1288 1289 assert(rem <= vertex_space(sna)); 1290 assert(op->floats_per_rect <= rem); 1291 if (want > 1 && want * op->floats_per_rect > rem) 1292 want = rem / op->floats_per_rect; 1293 1294 assert(want > 0); 1295 sna->render.vertex_index += 3*want; 1296 return want; 1297 1298flush: 1299 if (sna->render.vertex_offset) { 1300 gen4_vertex_flush(sna); 1301 gen6_magic_ca_pass(sna, op); 1302 } 1303 sna_vertex_wait__locked(&sna->render); 1304 _kgem_submit(&sna->kgem); 1305 emit_state(sna, op); 1306 goto start; 1307} 1308 1309inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, 1310 uint16_t *offset) 1311{ 1312 uint32_t *table; 1313 1314 sna->kgem.surface -= 1315 sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1316 /* Clear all surplus entries to zero in case of prefetch */ 1317 table = memset(sna->kgem.batch + sna->kgem.surface, 1318 0, sizeof(struct gen6_surface_state_padded)); 1319 1320 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1321 1322 *offset = sna->kgem.surface; 1323 return table; 1324} 1325 1326static bool 1327gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) 1328{ 1329 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1330 1331 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1332 DBG(("%s: flushing batch: %d < %d+%d\n", 1333 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1334 150, 4*8)); 1335 kgem_submit(&sna->kgem); 1336 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1337 } 1338 1339 if (sna->render_state.gen6.needs_invariant) 1340 gen6_emit_invariant(sna); 1341 1342 return kgem_bo_is_dirty(op->dst.bo); 1343} 1344 1345static void gen6_emit_composite_state(struct sna *sna, 1346 const struct sna_composite_op *op) 1347{ 1348 uint32_t *binding_table; 1349 uint16_t offset; 1350 bool dirty; 1351 1352 dirty = gen6_get_batch(sna, op); 1353 1354 binding_table = gen6_composite_get_binding_table(sna, &offset); 1355 1356 binding_table[0] = 1357 gen6_bind_bo(sna, 1358 op->dst.bo, op->dst.width, op->dst.height, 1359 gen6_get_dest_format(op->dst.format), 1360 true); 1361 binding_table[1] = 1362 gen6_bind_bo(sna, 1363 op->src.bo, op->src.width, op->src.height, 1364 op->src.card_format, 1365 false); 1366 if (op->mask.bo) { 1367 binding_table[2] = 1368 gen6_bind_bo(sna, 1369 op->mask.bo, 1370 op->mask.width, 1371 op->mask.height, 1372 op->mask.card_format, 1373 false); 1374 } 1375 1376 if (sna->kgem.surface == offset && 1377 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && 1378 (op->mask.bo == NULL || 1379 sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { 1380 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1381 offset = sna->render_state.gen6.surface_table; 1382 } 1383 1384 gen6_emit_state(sna, op, offset | dirty); 1385} 1386 1387static void 1388gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1389{ 1390 assert (sna->render.vertex_offset == 0); 1391 if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { 1392 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1393 sna->render_state.gen6.floats_per_vertex, 1394 op->floats_per_vertex)); 1395 gen4_vertex_align(sna, op); 1396 sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; 1397 } 1398 assert((sna->render.vertex_used % op->floats_per_vertex) == 0); 1399} 1400 1401fastcall static void 1402gen6_render_composite_blt(struct sna *sna, 1403 const struct sna_composite_op *op, 1404 const struct sna_composite_rectangles *r) 1405{ 1406 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1407 op->prim_emit(sna, op, r); 1408} 1409 1410fastcall static void 1411gen6_render_composite_box(struct sna *sna, 1412 const struct sna_composite_op *op, 1413 const BoxRec *box) 1414{ 1415 struct sna_composite_rectangles r; 1416 1417 gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1418 1419 DBG((" %s: (%d, %d), (%d, %d)\n", 1420 __FUNCTION__, 1421 box->x1, box->y1, box->x2, box->y2)); 1422 1423 r.dst.x = box->x1; 1424 r.dst.y = box->y1; 1425 r.width = box->x2 - box->x1; 1426 r.height = box->y2 - box->y1; 1427 r.src = r.mask = r.dst; 1428 1429 op->prim_emit(sna, op, &r); 1430} 1431 1432static void 1433gen6_render_composite_boxes__blt(struct sna *sna, 1434 const struct sna_composite_op *op, 1435 const BoxRec *box, int nbox) 1436{ 1437 DBG(("composite_boxes(%d)\n", nbox)); 1438 1439 do { 1440 int nbox_this_time; 1441 1442 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1443 gen6_emit_composite_state); 1444 nbox -= nbox_this_time; 1445 1446 do { 1447 struct sna_composite_rectangles r; 1448 1449 DBG((" %s: (%d, %d), (%d, %d)\n", 1450 __FUNCTION__, 1451 box->x1, box->y1, box->x2, box->y2)); 1452 1453 r.dst.x = box->x1; 1454 r.dst.y = box->y1; 1455 r.width = box->x2 - box->x1; 1456 r.height = box->y2 - box->y1; 1457 r.src = r.mask = r.dst; 1458 1459 op->prim_emit(sna, op, &r); 1460 box++; 1461 } while (--nbox_this_time); 1462 } while (nbox); 1463} 1464 1465static void 1466gen6_render_composite_boxes(struct sna *sna, 1467 const struct sna_composite_op *op, 1468 const BoxRec *box, int nbox) 1469{ 1470 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1471 1472 do { 1473 int nbox_this_time; 1474 float *v; 1475 1476 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1477 gen6_emit_composite_state); 1478 assert(nbox_this_time); 1479 nbox -= nbox_this_time; 1480 1481 v = sna->render.vertices + sna->render.vertex_used; 1482 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1483 1484 op->emit_boxes(op, box, nbox_this_time, v); 1485 box += nbox_this_time; 1486 } while (nbox); 1487} 1488 1489static void 1490gen6_render_composite_boxes__thread(struct sna *sna, 1491 const struct sna_composite_op *op, 1492 const BoxRec *box, int nbox) 1493{ 1494 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1495 1496 sna_vertex_lock(&sna->render); 1497 do { 1498 int nbox_this_time; 1499 float *v; 1500 1501 nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1502 gen6_emit_composite_state); 1503 assert(nbox_this_time); 1504 nbox -= nbox_this_time; 1505 1506 v = sna->render.vertices + sna->render.vertex_used; 1507 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1508 1509 sna_vertex_acquire__locked(&sna->render); 1510 sna_vertex_unlock(&sna->render); 1511 1512 op->emit_boxes(op, box, nbox_this_time, v); 1513 box += nbox_this_time; 1514 1515 sna_vertex_lock(&sna->render); 1516 sna_vertex_release__locked(&sna->render); 1517 } while (nbox); 1518 sna_vertex_unlock(&sna->render); 1519} 1520 1521#ifndef MAX 1522#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1523#endif 1524 1525static uint32_t 1526gen6_composite_create_blend_state(struct sna_static_stream *stream) 1527{ 1528 char *base, *ptr; 1529 int src, dst; 1530 1531 base = sna_static_stream_map(stream, 1532 GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 1533 64); 1534 1535 ptr = base; 1536 for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { 1537 for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { 1538 struct gen6_blend_state *blend = 1539 (struct gen6_blend_state *)ptr; 1540 1541 blend->blend0.dest_blend_factor = dst; 1542 blend->blend0.source_blend_factor = src; 1543 blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; 1544 blend->blend0.blend_enable = 1545 !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); 1546 1547 blend->blend1.post_blend_clamp_enable = 1; 1548 blend->blend1.pre_blend_clamp_enable = 1; 1549 1550 ptr += GEN6_BLEND_STATE_PADDED_SIZE; 1551 } 1552 } 1553 1554 return sna_static_stream_offsetof(stream, base); 1555} 1556 1557static uint32_t gen6_bind_video_source(struct sna *sna, 1558 struct kgem_bo *src_bo, 1559 uint32_t src_offset, 1560 int src_width, 1561 int src_height, 1562 int src_pitch, 1563 uint32_t src_surf_format) 1564{ 1565 struct gen6_surface_state *ss; 1566 1567 sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1568 1569 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1570 ss->ss0.surface_type = GEN6_SURFACE_2D; 1571 ss->ss0.surface_format = src_surf_format; 1572 1573 ss->ss1.base_addr = 1574 kgem_add_reloc(&sna->kgem, 1575 sna->kgem.surface + 1, 1576 src_bo, 1577 I915_GEM_DOMAIN_SAMPLER << 16, 1578 src_offset); 1579 1580 ss->ss2.width = src_width - 1; 1581 ss->ss2.height = src_height - 1; 1582 ss->ss3.pitch = src_pitch - 1; 1583 1584 return sna->kgem.surface * sizeof(uint32_t); 1585} 1586 1587static void gen6_emit_video_state(struct sna *sna, 1588 const struct sna_composite_op *op) 1589{ 1590 struct sna_video_frame *frame = op->priv; 1591 uint32_t src_surf_format[6]; 1592 uint32_t src_surf_base[6]; 1593 int src_width[6]; 1594 int src_height[6]; 1595 int src_pitch[6]; 1596 uint32_t *binding_table; 1597 uint16_t offset; 1598 bool dirty; 1599 int n_src, n; 1600 1601 dirty = gen6_get_batch(sna, op); 1602 1603 src_surf_base[0] = 0; 1604 src_surf_base[1] = 0; 1605 src_surf_base[2] = frame->VBufOffset; 1606 src_surf_base[3] = frame->VBufOffset; 1607 src_surf_base[4] = frame->UBufOffset; 1608 src_surf_base[5] = frame->UBufOffset; 1609 1610 if (is_planar_fourcc(frame->id)) { 1611 for (n = 0; n < 2; n++) { 1612 src_surf_format[n] = GEN6_SURFACEFORMAT_R8_UNORM; 1613 src_width[n] = frame->width; 1614 src_height[n] = frame->height; 1615 src_pitch[n] = frame->pitch[1]; 1616 } 1617 for (; n < 6; n++) { 1618 if (is_nv12_fourcc(frame->id)) 1619 src_surf_format[n] = GEN6_SURFACEFORMAT_R8G8_UNORM; 1620 else 1621 src_surf_format[n] = GEN6_SURFACEFORMAT_R8_UNORM; 1622 src_width[n] = frame->width / 2; 1623 src_height[n] = frame->height / 2; 1624 src_pitch[n] = frame->pitch[0]; 1625 } 1626 n_src = 6; 1627 } else { 1628 if (frame->id == FOURCC_UYVY) 1629 src_surf_format[0] = GEN6_SURFACEFORMAT_YCRCB_SWAPY; 1630 else 1631 src_surf_format[0] = GEN6_SURFACEFORMAT_YCRCB_NORMAL; 1632 1633 src_width[0] = frame->width; 1634 src_height[0] = frame->height; 1635 src_pitch[0] = frame->pitch[0]; 1636 n_src = 1; 1637 } 1638 1639 binding_table = gen6_composite_get_binding_table(sna, &offset); 1640 1641 binding_table[0] = 1642 gen6_bind_bo(sna, 1643 op->dst.bo, op->dst.width, op->dst.height, 1644 gen6_get_dest_format(op->dst.format), 1645 true); 1646 for (n = 0; n < n_src; n++) { 1647 binding_table[1+n] = 1648 gen6_bind_video_source(sna, 1649 frame->bo, 1650 src_surf_base[n], 1651 src_width[n], 1652 src_height[n], 1653 src_pitch[n], 1654 src_surf_format[n]); 1655 } 1656 1657 gen6_emit_state(sna, op, offset | dirty); 1658} 1659 1660static unsigned select_video_kernel(const struct sna_video *video, 1661 const struct sna_video_frame *frame) 1662{ 1663 switch (frame->id) { 1664 case FOURCC_YV12: 1665 case FOURCC_I420: 1666 case FOURCC_XVMC: 1667 return video->colorspace ? 1668 GEN6_WM_KERNEL_VIDEO_PLANAR_BT709 : 1669 GEN6_WM_KERNEL_VIDEO_PLANAR_BT601; 1670 1671 case FOURCC_NV12: 1672 return video->colorspace ? 1673 GEN6_WM_KERNEL_VIDEO_NV12_BT709 : 1674 GEN6_WM_KERNEL_VIDEO_NV12_BT601; 1675 1676 default: 1677 return video->colorspace ? 1678 GEN6_WM_KERNEL_VIDEO_PACKED_BT709 : 1679 GEN6_WM_KERNEL_VIDEO_PACKED_BT601; 1680 } 1681} 1682 1683static bool 1684gen6_render_video(struct sna *sna, 1685 struct sna_video *video, 1686 struct sna_video_frame *frame, 1687 RegionPtr dstRegion, 1688 PixmapPtr pixmap) 1689{ 1690 struct sna_composite_op tmp; 1691 struct sna_pixmap *priv = sna_pixmap(pixmap); 1692 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1693 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1694 int src_width = frame->src.x2 - frame->src.x1; 1695 int src_height = frame->src.y2 - frame->src.y1; 1696 float src_offset_x, src_offset_y; 1697 float src_scale_x, src_scale_y; 1698 unsigned filter; 1699 const BoxRec *box; 1700 int nbox; 1701 1702 DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1703 __FUNCTION__, 1704 src_width, src_height, dst_width, dst_height, 1705 region_num_rects(dstRegion), 1706 REGION_EXTENTS(NULL, dstRegion)->x1, 1707 REGION_EXTENTS(NULL, dstRegion)->y1, 1708 REGION_EXTENTS(NULL, dstRegion)->x2, 1709 REGION_EXTENTS(NULL, dstRegion)->y2)); 1710 1711 assert(priv->gpu_bo); 1712 memset(&tmp, 0, sizeof(tmp)); 1713 1714 tmp.dst.pixmap = pixmap; 1715 tmp.dst.width = pixmap->drawable.width; 1716 tmp.dst.height = pixmap->drawable.height; 1717 tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1718 tmp.dst.bo = priv->gpu_bo; 1719 1720 tmp.src.bo = frame->bo; 1721 tmp.mask.bo = NULL; 1722 1723 tmp.floats_per_vertex = 3; 1724 tmp.floats_per_rect = 9; 1725 1726 if (src_width == dst_width && src_height == dst_height) 1727 filter = SAMPLER_FILTER_NEAREST; 1728 else 1729 filter = SAMPLER_FILTER_BILINEAR; 1730 1731 tmp.u.gen6.flags = 1732 GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1733 SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1734 NO_BLEND, 1735 select_video_kernel(video, frame), 1736 2); 1737 tmp.priv = frame; 1738 1739 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1740 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1741 kgem_submit(&sna->kgem); 1742 assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); 1743 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1744 } 1745 1746 gen6_align_vertex(sna, &tmp); 1747 gen6_emit_video_state(sna, &tmp); 1748 1749 src_scale_x = (float)src_width / dst_width / frame->width; 1750 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1751 1752 src_scale_y = (float)src_height / dst_height / frame->height; 1753 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1754 1755 box = region_rects(dstRegion); 1756 nbox = region_num_rects(dstRegion); 1757 while (nbox--) { 1758 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); 1759 1760 OUT_VERTEX(box->x2, box->y2); 1761 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1762 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1763 1764 OUT_VERTEX(box->x1, box->y2); 1765 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1766 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1767 1768 OUT_VERTEX(box->x1, box->y1); 1769 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1770 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1771 1772 box++; 1773 } 1774 gen4_vertex_flush(sna); 1775 1776 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 1777 sna_damage_add(&priv->gpu_damage, dstRegion); 1778 1779 return true; 1780} 1781 1782static int 1783gen6_composite_picture(struct sna *sna, 1784 PicturePtr picture, 1785 struct sna_composite_channel *channel, 1786 int x, int y, 1787 int w, int h, 1788 int dst_x, int dst_y, 1789 bool precise) 1790{ 1791 PixmapPtr pixmap; 1792 uint32_t color; 1793 int16_t dx, dy; 1794 1795 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n", 1796 __FUNCTION__, x, y, w, h, dst_x, dst_y, precise)); 1797 1798 channel->is_solid = false; 1799 channel->card_format = -1; 1800 1801 if (sna_picture_is_solid(picture, &color)) 1802 return gen4_channel_init_solid(sna, channel, color); 1803 1804 if (picture->pDrawable == NULL) { 1805 int ret; 1806 1807 if (picture->pSourcePict->type == SourcePictTypeLinear) 1808 return gen4_channel_init_linear(sna, picture, channel, 1809 x, y, 1810 w, h, 1811 dst_x, dst_y); 1812 1813 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1814 ret = -1; 1815 if (!precise) 1816 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1817 x, y, w, h, dst_x, dst_y); 1818 if (ret == -1) 1819 ret = sna_render_picture_fixup(sna, picture, channel, 1820 x, y, w, h, dst_x, dst_y); 1821 return ret; 1822 } 1823 1824 if (picture->alphaMap) { 1825 DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); 1826 return sna_render_picture_fixup(sna, picture, channel, 1827 x, y, w, h, dst_x, dst_y); 1828 } 1829 1830 if (!gen6_check_repeat(picture)) 1831 return sna_render_picture_fixup(sna, picture, channel, 1832 x, y, w, h, dst_x, dst_y); 1833 1834 if (!gen6_check_filter(picture)) 1835 return sna_render_picture_fixup(sna, picture, channel, 1836 x, y, w, h, dst_x, dst_y); 1837 1838 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1839 channel->filter = picture->filter; 1840 1841 pixmap = get_drawable_pixmap(picture->pDrawable); 1842 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1843 1844 x += dx + picture->pDrawable->x; 1845 y += dy + picture->pDrawable->y; 1846 1847 channel->is_affine = sna_transform_is_affine(picture->transform); 1848 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1849 DBG(("%s: integer translation (%d, %d), removing\n", 1850 __FUNCTION__, dx, dy)); 1851 x += dx; 1852 y += dy; 1853 channel->transform = NULL; 1854 channel->filter = PictFilterNearest; 1855 1856 if (channel->repeat && 1857 (x >= 0 && 1858 y >= 0 && 1859 x + w <= pixmap->drawable.width && 1860 y + h <= pixmap->drawable.height)) { 1861 struct sna_pixmap *priv = sna_pixmap(pixmap); 1862 if (priv && priv->clear) { 1863 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1864 return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); 1865 } 1866 } 1867 } else 1868 channel->transform = picture->transform; 1869 1870 channel->pict_format = picture->format; 1871 channel->card_format = gen6_get_card_format(picture->format); 1872 if (channel->card_format == (unsigned)-1) 1873 return sna_render_picture_convert(sna, picture, channel, pixmap, 1874 x, y, w, h, dst_x, dst_y, 1875 false); 1876 1877 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1878 DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1879 pixmap->drawable.width, pixmap->drawable.height)); 1880 return sna_render_picture_extract(sna, picture, channel, 1881 x, y, w, h, dst_x, dst_y); 1882 } 1883 1884 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 1885 __FUNCTION__, 1886 channel->repeat, channel->filter, 1887 channel->transform != NULL, channel->is_affine, 1888 channel->pict_format)); 1889 if (channel->transform) { 1890#define f2d(x) (((double)(x))/65536.) 1891 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n", 1892 __FUNCTION__, 1893 f2d(channel->transform->matrix[0][0]), 1894 f2d(channel->transform->matrix[0][1]), 1895 f2d(channel->transform->matrix[0][2]), 1896 f2d(channel->transform->matrix[1][0]), 1897 f2d(channel->transform->matrix[1][1]), 1898 f2d(channel->transform->matrix[1][2]), 1899 f2d(channel->transform->matrix[2][0]), 1900 f2d(channel->transform->matrix[2][1]), 1901 f2d(channel->transform->matrix[2][2]), 1902 channel->transform->matrix[0][0], 1903 channel->transform->matrix[0][1], 1904 channel->transform->matrix[0][2], 1905 channel->transform->matrix[1][0], 1906 channel->transform->matrix[1][1], 1907 channel->transform->matrix[1][2], 1908 channel->transform->matrix[2][0], 1909 channel->transform->matrix[2][1], 1910 channel->transform->matrix[2][2])); 1911#undef f2d 1912 } 1913 1914 return sna_render_pixmap_bo(sna, channel, pixmap, 1915 x, y, w, h, dst_x, dst_y); 1916} 1917 1918inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) 1919{ 1920 channel->repeat = gen6_repeat(channel->repeat); 1921 channel->filter = gen6_filter(channel->filter); 1922 if (channel->card_format == (unsigned)-1) 1923 channel->card_format = gen6_get_card_format(channel->pict_format); 1924 assert(channel->card_format != (unsigned)-1); 1925} 1926 1927static void gen6_render_composite_done(struct sna *sna, 1928 const struct sna_composite_op *op) 1929{ 1930 DBG(("%s\n", __FUNCTION__)); 1931 1932 assert(!sna->render.active); 1933 if (sna->render.vertex_offset) { 1934 gen4_vertex_flush(sna); 1935 gen6_magic_ca_pass(sna, op); 1936 } 1937 1938 if (op->mask.bo) 1939 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1940 if (op->src.bo) 1941 kgem_bo_destroy(&sna->kgem, op->src.bo); 1942 1943 sna_render_composite_redirect_done(sna, op); 1944} 1945 1946inline static bool 1947gen6_composite_set_target(struct sna *sna, 1948 struct sna_composite_op *op, 1949 PicturePtr dst, 1950 int x, int y, int w, int h, 1951 bool partial) 1952{ 1953 BoxRec box; 1954 unsigned int hint; 1955 1956 DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1957 1958 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1959 op->dst.format = dst->format; 1960 op->dst.width = op->dst.pixmap->drawable.width; 1961 op->dst.height = op->dst.pixmap->drawable.height; 1962 1963 if (w && h) { 1964 box.x1 = x; 1965 box.y1 = y; 1966 box.x2 = x + w; 1967 box.y2 = y + h; 1968 } else 1969 sna_render_picture_extents(dst, &box); 1970 1971 hint = PREFER_GPU | RENDER_GPU; 1972 if (!need_tiling(sna, op->dst.width, op->dst.height)) 1973 hint |= FORCE_GPU; 1974 if (!partial) { 1975 hint |= IGNORE_DAMAGE; 1976 if (w == op->dst.width && h == op->dst.height) 1977 hint |= REPLACES; 1978 } 1979 1980 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1981 if (op->dst.bo == NULL) 1982 return false; 1983 1984 if (hint & REPLACES) { 1985 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1986 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1987 } 1988 1989 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1990 &op->dst.x, &op->dst.y); 1991 1992 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1993 __FUNCTION__, 1994 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1995 op->dst.width, op->dst.height, 1996 op->dst.bo->pitch, 1997 op->dst.x, op->dst.y, 1998 op->damage ? *op->damage : (void *)-1)); 1999 2000 assert(op->dst.bo->proxy == NULL); 2001 2002 if (too_large(op->dst.width, op->dst.height) && 2003 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2004 return false; 2005 2006 return true; 2007} 2008 2009static bool 2010try_blt(struct sna *sna, 2011 uint8_t op, 2012 PicturePtr src, 2013 PicturePtr mask, 2014 PicturePtr dst, 2015 int16_t src_x, int16_t src_y, 2016 int16_t msk_x, int16_t msk_y, 2017 int16_t dst_x, int16_t dst_y, 2018 int16_t width, int16_t height, 2019 unsigned flags, 2020 struct sna_composite_op *tmp) 2021{ 2022 struct kgem_bo *bo; 2023 2024 if (sna->kgem.mode == KGEM_BLT) { 2025 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2026 goto execute; 2027 } 2028 2029 if (too_large(width, height)) { 2030 DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2031 __FUNCTION__, width, height)); 2032 goto execute; 2033 } 2034 2035 bo = __sna_drawable_peek_bo(dst->pDrawable); 2036 if (bo == NULL) 2037 goto execute; 2038 2039 if (untiled_tlb_miss(bo)) 2040 goto execute; 2041 2042 if (bo->rq) { 2043 if (RQ_IS_BLT(bo->rq)) 2044 goto execute; 2045 2046 return false; 2047 } 2048 2049 if (bo->tiling == I915_TILING_Y) 2050 goto upload; 2051 2052 if (src->pDrawable == dst->pDrawable && 2053 can_switch_to_blt(sna, bo, 0)) 2054 goto execute; 2055 2056 if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2057 goto execute; 2058 2059 if (src->pDrawable) { 2060 struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); 2061 if (s == NULL) 2062 goto execute; 2063 2064 if (prefer_blt_bo(sna, s, bo)) 2065 goto execute; 2066 } 2067 2068 if (sna->kgem.ring == KGEM_BLT) { 2069 DBG(("%s: already performing BLT\n", __FUNCTION__)); 2070 goto execute; 2071 } 2072 2073upload: 2074 flags |= COMPOSITE_UPLOAD; 2075execute: 2076 return sna_blt_composite(sna, op, 2077 src, dst, 2078 src_x, src_y, 2079 dst_x, dst_y, 2080 width, height, 2081 flags, tmp); 2082} 2083 2084static bool 2085check_gradient(PicturePtr picture, bool precise) 2086{ 2087 if (picture->pDrawable) 2088 return false; 2089 2090 switch (picture->pSourcePict->type) { 2091 case SourcePictTypeSolidFill: 2092 case SourcePictTypeLinear: 2093 return false; 2094 default: 2095 return precise; 2096 } 2097} 2098 2099static bool 2100has_alphamap(PicturePtr p) 2101{ 2102 return p->alphaMap != NULL; 2103} 2104 2105static bool 2106need_upload(PicturePtr p) 2107{ 2108 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2109} 2110 2111static bool 2112source_is_busy(PixmapPtr pixmap) 2113{ 2114 struct sna_pixmap *priv = sna_pixmap(pixmap); 2115 if (priv == NULL || priv->clear) 2116 return false; 2117 2118 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2119 return true; 2120 2121 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2122 return true; 2123 2124 return priv->gpu_damage && !priv->cpu_damage; 2125} 2126 2127static bool 2128source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2129{ 2130 if (sna_picture_is_solid(p, NULL)) 2131 return false; 2132 2133 if (p->pSourcePict) 2134 return check_gradient(p, precise); 2135 2136 if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) 2137 return true; 2138 2139 if (pixmap && source_is_busy(pixmap)) 2140 return false; 2141 2142 return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p); 2143} 2144 2145static bool 2146gen6_composite_fallback(struct sna *sna, 2147 PicturePtr src, 2148 PicturePtr mask, 2149 PicturePtr dst) 2150{ 2151 PixmapPtr src_pixmap; 2152 PixmapPtr mask_pixmap; 2153 PixmapPtr dst_pixmap; 2154 bool src_fallback, mask_fallback; 2155 2156 if (!gen6_check_dst_format(dst->format)) { 2157 DBG(("%s: unknown destination format: %d\n", 2158 __FUNCTION__, dst->format)); 2159 return true; 2160 } 2161 2162 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2163 2164 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2165 src_fallback = source_fallback(src, src_pixmap, 2166 dst->polyMode == PolyModePrecise); 2167 2168 if (mask) { 2169 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2170 mask_fallback = source_fallback(mask, mask_pixmap, 2171 dst->polyMode == PolyModePrecise); 2172 } else { 2173 mask_pixmap = NULL; 2174 mask_fallback = false; 2175 } 2176 2177 /* If we are using the destination as a source and need to 2178 * readback in order to upload the source, do it all 2179 * on the cpu. 2180 */ 2181 if (src_pixmap == dst_pixmap && src_fallback) { 2182 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2183 return true; 2184 } 2185 if (mask_pixmap == dst_pixmap && mask_fallback) { 2186 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2187 return true; 2188 } 2189 2190 /* If anything is on the GPU, push everything out to the GPU */ 2191 if (dst_use_gpu(dst_pixmap)) { 2192 DBG(("%s: dst is already on the GPU, try to use GPU\n", 2193 __FUNCTION__)); 2194 return false; 2195 } 2196 2197 if (src_pixmap && !src_fallback) { 2198 DBG(("%s: src is already on the GPU, try to use GPU\n", 2199 __FUNCTION__)); 2200 return false; 2201 } 2202 if (mask_pixmap && !mask_fallback) { 2203 DBG(("%s: mask is already on the GPU, try to use GPU\n", 2204 __FUNCTION__)); 2205 return false; 2206 } 2207 2208 /* However if the dst is not on the GPU and we need to 2209 * render one of the sources using the CPU, we may 2210 * as well do the entire operation in place onthe CPU. 2211 */ 2212 if (src_fallback) { 2213 DBG(("%s: dst is on the CPU and src will fallback\n", 2214 __FUNCTION__)); 2215 return true; 2216 } 2217 2218 if (mask && mask_fallback) { 2219 DBG(("%s: dst is on the CPU and mask will fallback\n", 2220 __FUNCTION__)); 2221 return true; 2222 } 2223 2224 if (too_large(dst_pixmap->drawable.width, 2225 dst_pixmap->drawable.height) && 2226 dst_is_cpu(dst_pixmap)) { 2227 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2228 return true; 2229 } 2230 2231 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2232 __FUNCTION__)); 2233 return dst_use_cpu(dst_pixmap); 2234} 2235 2236static int 2237reuse_source(struct sna *sna, 2238 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2239 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2240{ 2241 uint32_t color; 2242 2243 if (src_x != msk_x || src_y != msk_y) 2244 return false; 2245 2246 if (src == mask) { 2247 DBG(("%s: mask is source\n", __FUNCTION__)); 2248 *mc = *sc; 2249 mc->bo = kgem_bo_reference(mc->bo); 2250 return true; 2251 } 2252 2253 if (sna_picture_is_solid(mask, &color)) 2254 return gen4_channel_init_solid(sna, mc, color); 2255 2256 if (sc->is_solid) 2257 return false; 2258 2259 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2260 return false; 2261 2262 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2263 2264 if (!sna_transform_equal(src->transform, mask->transform)) 2265 return false; 2266 2267 if (!sna_picture_alphamap_equal(src, mask)) 2268 return false; 2269 2270 if (!gen6_check_repeat(mask)) 2271 return false; 2272 2273 if (!gen6_check_filter(mask)) 2274 return false; 2275 2276 if (!gen6_check_format(mask->format)) 2277 return false; 2278 2279 DBG(("%s: reusing source channel for mask with a twist\n", 2280 __FUNCTION__)); 2281 2282 *mc = *sc; 2283 mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2284 mc->filter = gen6_filter(mask->filter); 2285 mc->pict_format = mask->format; 2286 mc->card_format = gen6_get_card_format(mask->format); 2287 mc->bo = kgem_bo_reference(mc->bo); 2288 return true; 2289} 2290 2291static bool 2292gen6_render_composite(struct sna *sna, 2293 uint8_t op, 2294 PicturePtr src, 2295 PicturePtr mask, 2296 PicturePtr dst, 2297 int16_t src_x, int16_t src_y, 2298 int16_t msk_x, int16_t msk_y, 2299 int16_t dst_x, int16_t dst_y, 2300 int16_t width, int16_t height, 2301 unsigned flags, 2302 struct sna_composite_op *tmp) 2303{ 2304 if (op >= ARRAY_SIZE(gen6_blend_op)) 2305 return false; 2306 2307 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 2308 width, height, sna->kgem.ring)); 2309 2310 if (mask == NULL && 2311 try_blt(sna, op, 2312 src, mask, dst, 2313 src_x, src_y, 2314 msk_x, msk_y, 2315 dst_x, dst_y, 2316 width, height, 2317 flags, tmp)) 2318 return true; 2319 2320 if (gen6_composite_fallback(sna, src, mask, dst)) 2321 goto fallback; 2322 2323 if (need_tiling(sna, width, height)) 2324 return sna_tiling_composite(op, src, mask, dst, 2325 src_x, src_y, 2326 msk_x, msk_y, 2327 dst_x, dst_y, 2328 width, height, 2329 tmp); 2330 2331 if (op == PictOpClear && src == sna->clear) 2332 op = PictOpSrc; 2333 tmp->op = op; 2334 if (!gen6_composite_set_target(sna, tmp, dst, 2335 dst_x, dst_y, width, height, 2336 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2337 goto fallback; 2338 2339 switch (gen6_composite_picture(sna, src, &tmp->src, 2340 src_x, src_y, 2341 width, height, 2342 dst_x, dst_y, 2343 dst->polyMode == PolyModePrecise)) { 2344 case -1: 2345 goto cleanup_dst; 2346 case 0: 2347 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2348 goto cleanup_dst; 2349 /* fall through to fixup */ 2350 case 1: 2351 /* Did we just switch rings to prepare the source? */ 2352 if (mask == NULL && 2353 prefer_blt_composite(sna, tmp) && 2354 sna_blt_composite__convert(sna, 2355 dst_x, dst_y, width, height, 2356 tmp)) 2357 return true; 2358 2359 gen6_composite_channel_convert(&tmp->src); 2360 break; 2361 } 2362 2363 tmp->is_affine = tmp->src.is_affine; 2364 tmp->has_component_alpha = false; 2365 tmp->need_magic_ca_pass = false; 2366 2367 tmp->mask.bo = NULL; 2368 tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2369 tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2370 2371 if (mask) { 2372 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2373 tmp->has_component_alpha = true; 2374 2375 /* Check if it's component alpha that relies on a source alpha and on 2376 * the source value. We can only get one of those into the single 2377 * source value that we get to blend with. 2378 */ 2379 if (gen6_blend_op[op].src_alpha && 2380 (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { 2381 if (op != PictOpOver) 2382 goto cleanup_src; 2383 2384 tmp->need_magic_ca_pass = true; 2385 tmp->op = PictOpOutReverse; 2386 } 2387 } 2388 2389 if (!reuse_source(sna, 2390 src, &tmp->src, src_x, src_y, 2391 mask, &tmp->mask, msk_x, msk_y)) { 2392 switch (gen6_composite_picture(sna, mask, &tmp->mask, 2393 msk_x, msk_y, 2394 width, height, 2395 dst_x, dst_y, 2396 dst->polyMode == PolyModePrecise)) { 2397 case -1: 2398 goto cleanup_src; 2399 case 0: 2400 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2401 goto cleanup_src; 2402 /* fall through to fixup */ 2403 case 1: 2404 gen6_composite_channel_convert(&tmp->mask); 2405 break; 2406 } 2407 } 2408 2409 tmp->is_affine &= tmp->mask.is_affine; 2410 } 2411 2412 tmp->u.gen6.flags = 2413 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2414 tmp->src.repeat, 2415 tmp->mask.filter, 2416 tmp->mask.repeat), 2417 gen6_get_blend(tmp->op, 2418 tmp->has_component_alpha, 2419 tmp->dst.format), 2420 gen6_choose_composite_kernel(tmp->op, 2421 tmp->mask.bo != NULL, 2422 tmp->has_component_alpha, 2423 tmp->is_affine), 2424 gen4_choose_composite_emitter(sna, tmp)); 2425 2426 tmp->blt = gen6_render_composite_blt; 2427 tmp->box = gen6_render_composite_box; 2428 tmp->boxes = gen6_render_composite_boxes__blt; 2429 if (tmp->emit_boxes) { 2430 tmp->boxes = gen6_render_composite_boxes; 2431 tmp->thread_boxes = gen6_render_composite_boxes__thread; 2432 } 2433 tmp->done = gen6_render_composite_done; 2434 2435 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2436 if (!kgem_check_bo(&sna->kgem, 2437 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2438 NULL)) { 2439 kgem_submit(&sna->kgem); 2440 if (!kgem_check_bo(&sna->kgem, 2441 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2442 NULL)) 2443 goto cleanup_mask; 2444 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2445 } 2446 2447 gen6_align_vertex(sna, tmp); 2448 gen6_emit_composite_state(sna, tmp); 2449 return true; 2450 2451cleanup_mask: 2452 if (tmp->mask.bo) { 2453 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2454 tmp->mask.bo = NULL; 2455 } 2456cleanup_src: 2457 if (tmp->src.bo) { 2458 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2459 tmp->src.bo = NULL; 2460 } 2461cleanup_dst: 2462 if (tmp->redirect.real_bo) { 2463 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2464 tmp->redirect.real_bo = NULL; 2465 } 2466fallback: 2467 return (mask == NULL && 2468 sna_blt_composite(sna, op, 2469 src, dst, 2470 src_x, src_y, 2471 dst_x, dst_y, 2472 width, height, 2473 flags | COMPOSITE_FALLBACK, tmp)); 2474} 2475 2476#if !NO_COMPOSITE_SPANS 2477fastcall static void 2478gen6_render_composite_spans_box(struct sna *sna, 2479 const struct sna_composite_spans_op *op, 2480 const BoxRec *box, float opacity) 2481{ 2482 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2483 __FUNCTION__, 2484 op->base.src.offset[0], op->base.src.offset[1], 2485 opacity, 2486 op->base.dst.x, op->base.dst.y, 2487 box->x1, box->y1, 2488 box->x2 - box->x1, 2489 box->y2 - box->y1)); 2490 2491 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); 2492 op->prim_emit(sna, op, box, opacity); 2493} 2494 2495static void 2496gen6_render_composite_spans_boxes(struct sna *sna, 2497 const struct sna_composite_spans_op *op, 2498 const BoxRec *box, int nbox, 2499 float opacity) 2500{ 2501 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2502 __FUNCTION__, nbox, 2503 op->base.src.offset[0], op->base.src.offset[1], 2504 opacity, 2505 op->base.dst.x, op->base.dst.y)); 2506 2507 do { 2508 int nbox_this_time; 2509 2510 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2511 gen6_emit_composite_state); 2512 nbox -= nbox_this_time; 2513 2514 do { 2515 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2516 box->x1, box->y1, 2517 box->x2 - box->x1, 2518 box->y2 - box->y1)); 2519 2520 op->prim_emit(sna, op, box++, opacity); 2521 } while (--nbox_this_time); 2522 } while (nbox); 2523} 2524 2525fastcall static void 2526gen6_render_composite_spans_boxes__thread(struct sna *sna, 2527 const struct sna_composite_spans_op *op, 2528 const struct sna_opacity_box *box, 2529 int nbox) 2530{ 2531 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2532 __FUNCTION__, nbox, 2533 op->base.src.offset[0], op->base.src.offset[1], 2534 op->base.dst.x, op->base.dst.y)); 2535 2536 sna_vertex_lock(&sna->render); 2537 do { 2538 int nbox_this_time; 2539 float *v; 2540 2541 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2542 gen6_emit_composite_state); 2543 assert(nbox_this_time); 2544 nbox -= nbox_this_time; 2545 2546 v = sna->render.vertices + sna->render.vertex_used; 2547 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2548 2549 sna_vertex_acquire__locked(&sna->render); 2550 sna_vertex_unlock(&sna->render); 2551 2552 op->emit_boxes(op, box, nbox_this_time, v); 2553 box += nbox_this_time; 2554 2555 sna_vertex_lock(&sna->render); 2556 sna_vertex_release__locked(&sna->render); 2557 } while (nbox); 2558 sna_vertex_unlock(&sna->render); 2559} 2560 2561fastcall static void 2562gen6_render_composite_spans_done(struct sna *sna, 2563 const struct sna_composite_spans_op *op) 2564{ 2565 DBG(("%s()\n", __FUNCTION__)); 2566 assert(!sna->render.active); 2567 2568 if (sna->render.vertex_offset) 2569 gen4_vertex_flush(sna); 2570 2571 if (op->base.src.bo) 2572 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2573 2574 sna_render_composite_redirect_done(sna, &op->base); 2575} 2576 2577static bool 2578gen6_check_composite_spans(struct sna *sna, 2579 uint8_t op, PicturePtr src, PicturePtr dst, 2580 int16_t width, int16_t height, 2581 unsigned flags) 2582{ 2583 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2584 __FUNCTION__, op, width, height, flags)); 2585 2586 if (op >= ARRAY_SIZE(gen6_blend_op)) 2587 return false; 2588 2589 if (gen6_composite_fallback(sna, src, NULL, dst)) { 2590 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2591 return false; 2592 } 2593 2594 if (need_tiling(sna, width, height) && 2595 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2596 DBG(("%s: fallback, tiled operation not on GPU\n", 2597 __FUNCTION__)); 2598 return false; 2599 } 2600 2601 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2602 struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); 2603 assert(priv); 2604 2605 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2606 return true; 2607 2608 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2609 return false; 2610 2611 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2612 } 2613 2614 return true; 2615} 2616 2617static bool 2618gen6_render_composite_spans(struct sna *sna, 2619 uint8_t op, 2620 PicturePtr src, 2621 PicturePtr dst, 2622 int16_t src_x, int16_t src_y, 2623 int16_t dst_x, int16_t dst_y, 2624 int16_t width, int16_t height, 2625 unsigned flags, 2626 struct sna_composite_spans_op *tmp) 2627{ 2628 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2629 width, height, flags, sna->kgem.ring)); 2630 2631 assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); 2632 2633 if (need_tiling(sna, width, height)) { 2634 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2635 __FUNCTION__, width, height)); 2636 return sna_tiling_composite_spans(op, src, dst, 2637 src_x, src_y, dst_x, dst_y, 2638 width, height, flags, tmp); 2639 } 2640 2641 tmp->base.op = op; 2642 if (!gen6_composite_set_target(sna, &tmp->base, dst, 2643 dst_x, dst_y, width, height, true)) 2644 return false; 2645 2646 switch (gen6_composite_picture(sna, src, &tmp->base.src, 2647 src_x, src_y, 2648 width, height, 2649 dst_x, dst_y, 2650 dst->polyMode == PolyModePrecise)) { 2651 case -1: 2652 goto cleanup_dst; 2653 case 0: 2654 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2655 goto cleanup_dst; 2656 /* fall through to fixup */ 2657 case 1: 2658 gen6_composite_channel_convert(&tmp->base.src); 2659 break; 2660 } 2661 tmp->base.mask.bo = NULL; 2662 2663 tmp->base.is_affine = tmp->base.src.is_affine; 2664 tmp->base.need_magic_ca_pass = false; 2665 2666 tmp->base.u.gen6.flags = 2667 GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2668 tmp->base.src.repeat, 2669 SAMPLER_FILTER_NEAREST, 2670 SAMPLER_EXTEND_PAD), 2671 gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), 2672 GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2673 gen4_choose_spans_emitter(sna, tmp)); 2674 2675 tmp->box = gen6_render_composite_spans_box; 2676 tmp->boxes = gen6_render_composite_spans_boxes; 2677 if (tmp->emit_boxes) 2678 tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; 2679 tmp->done = gen6_render_composite_spans_done; 2680 2681 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2682 if (!kgem_check_bo(&sna->kgem, 2683 tmp->base.dst.bo, tmp->base.src.bo, 2684 NULL)) { 2685 kgem_submit(&sna->kgem); 2686 if (!kgem_check_bo(&sna->kgem, 2687 tmp->base.dst.bo, tmp->base.src.bo, 2688 NULL)) 2689 goto cleanup_src; 2690 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2691 } 2692 2693 gen6_align_vertex(sna, &tmp->base); 2694 gen6_emit_composite_state(sna, &tmp->base); 2695 return true; 2696 2697cleanup_src: 2698 if (tmp->base.src.bo) 2699 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2700cleanup_dst: 2701 if (tmp->base.redirect.real_bo) 2702 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2703 return false; 2704} 2705#endif 2706 2707static void 2708gen6_emit_copy_state(struct sna *sna, 2709 const struct sna_composite_op *op) 2710{ 2711 uint32_t *binding_table; 2712 uint16_t offset; 2713 bool dirty; 2714 2715 dirty = gen6_get_batch(sna, op); 2716 2717 binding_table = gen6_composite_get_binding_table(sna, &offset); 2718 2719 binding_table[0] = 2720 gen6_bind_bo(sna, 2721 op->dst.bo, op->dst.width, op->dst.height, 2722 gen6_get_dest_format(op->dst.format), 2723 true); 2724 binding_table[1] = 2725 gen6_bind_bo(sna, 2726 op->src.bo, op->src.width, op->src.height, 2727 op->src.card_format, 2728 false); 2729 2730 if (sna->kgem.surface == offset && 2731 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 2732 sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 2733 offset = sna->render_state.gen6.surface_table; 2734 } 2735 2736 gen6_emit_state(sna, op, offset | dirty); 2737} 2738 2739static inline bool prefer_blt_copy(struct sna *sna, 2740 struct kgem_bo *src_bo, 2741 struct kgem_bo *dst_bo, 2742 unsigned flags) 2743{ 2744 if (flags & COPY_SYNC) 2745 return false; 2746 2747 if (PREFER_RENDER) 2748 return PREFER_RENDER > 0; 2749 2750 if (sna->kgem.ring == KGEM_BLT) 2751 return true; 2752 2753 if (flags & COPY_DRI && !sna->kgem.has_semaphores) 2754 return false; 2755 2756 if ((flags & COPY_SMALL || src_bo == dst_bo) && 2757 can_switch_to_blt(sna, dst_bo, flags)) 2758 return true; 2759 2760 if (untiled_tlb_miss(src_bo) || 2761 untiled_tlb_miss(dst_bo)) 2762 return true; 2763 2764 if (force_blt_ring(sna, dst_bo)) 2765 return true; 2766 2767 if (kgem_bo_is_render(dst_bo) || 2768 kgem_bo_is_render(src_bo)) 2769 return false; 2770 2771 if (flags & COPY_LAST && 2772 can_switch_to_blt(sna, dst_bo, flags)) 2773 return true; 2774 2775 if (prefer_render_ring(sna, dst_bo)) 2776 return false; 2777 2778 if (!prefer_blt_ring(sna, dst_bo, flags)) 2779 return false; 2780 2781 return prefer_blt_bo(sna, src_bo, dst_bo); 2782} 2783 2784static bool 2785gen6_render_copy_boxes(struct sna *sna, uint8_t alu, 2786 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2787 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2788 const BoxRec *box, int n, unsigned flags) 2789{ 2790 struct sna_composite_op tmp; 2791 BoxRec extents; 2792 2793 DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", 2794 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2795 src_bo == dst_bo, 2796 overlaps(sna, 2797 src_bo, src_dx, src_dy, 2798 dst_bo, dst_dx, dst_dy, 2799 box, n, flags, &extents))); 2800 2801 if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2802 sna_blt_compare_depth(src, dst) && 2803 sna_blt_copy_boxes(sna, alu, 2804 src_bo, src_dx, src_dy, 2805 dst_bo, dst_dx, dst_dy, 2806 dst->bitsPerPixel, 2807 box, n)) 2808 return true; 2809 2810 if (!(alu == GXcopy || alu == GXclear)) { 2811fallback_blt: 2812 if (!sna_blt_compare_depth(src, dst)) 2813 return false; 2814 2815 return sna_blt_copy_boxes_fallback(sna, alu, 2816 src, src_bo, src_dx, src_dy, 2817 dst, dst_bo, dst_dx, dst_dy, 2818 box, n); 2819 } 2820 2821 if (overlaps(sna, 2822 src_bo, src_dx, src_dy, 2823 dst_bo, dst_dx, dst_dy, 2824 box, n, flags, 2825 &extents)) { 2826 bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2827 2828 if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2829 sna_blt_copy_boxes(sna, alu, 2830 src_bo, src_dx, src_dy, 2831 dst_bo, dst_dx, dst_dy, 2832 dst->bitsPerPixel, 2833 box, n)) 2834 return true; 2835 2836 if (big) 2837 goto fallback_blt; 2838 2839 assert(src_bo == dst_bo); 2840 assert(src->depth == dst->depth); 2841 assert(src->width == dst->width); 2842 assert(src->height == dst->height); 2843 return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, 2844 src_dx, src_dy, 2845 dst_dx, dst_dy, 2846 box, n, &extents); 2847 } 2848 2849 if (dst->depth == src->depth) { 2850 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2851 tmp.src.pict_format = tmp.dst.format; 2852 } else { 2853 tmp.dst.format = sna_format_for_depth(dst->depth); 2854 tmp.src.pict_format = sna_format_for_depth(src->depth); 2855 } 2856 if (!gen6_check_format(tmp.src.pict_format)) 2857 goto fallback_blt; 2858 2859 tmp.dst.pixmap = (PixmapPtr)dst; 2860 tmp.dst.width = dst->width; 2861 tmp.dst.height = dst->height; 2862 tmp.dst.bo = dst_bo; 2863 tmp.dst.x = tmp.dst.y = 0; 2864 tmp.damage = NULL; 2865 2866 sna_render_composite_redirect_init(&tmp); 2867 if (too_large(tmp.dst.width, tmp.dst.height)) { 2868 int i; 2869 2870 extents = box[0]; 2871 for (i = 1; i < n; i++) { 2872 if (box[i].x1 < extents.x1) 2873 extents.x1 = box[i].x1; 2874 if (box[i].y1 < extents.y1) 2875 extents.y1 = box[i].y1; 2876 2877 if (box[i].x2 > extents.x2) 2878 extents.x2 = box[i].x2; 2879 if (box[i].y2 > extents.y2) 2880 extents.y2 = box[i].y2; 2881 } 2882 2883 if (!sna_render_composite_redirect(sna, &tmp, 2884 extents.x1 + dst_dx, 2885 extents.y1 + dst_dy, 2886 extents.x2 - extents.x1, 2887 extents.y2 - extents.y1, 2888 n > 1)) 2889 goto fallback_tiled; 2890 } 2891 2892 tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); 2893 if (too_large(src->width, src->height)) { 2894 int i; 2895 2896 extents = box[0]; 2897 for (i = 1; i < n; i++) { 2898 if (box[i].x1 < extents.x1) 2899 extents.x1 = box[i].x1; 2900 if (box[i].y1 < extents.y1) 2901 extents.y1 = box[i].y1; 2902 2903 if (box[i].x2 > extents.x2) 2904 extents.x2 = box[i].x2; 2905 if (box[i].y2 > extents.y2) 2906 extents.y2 = box[i].y2; 2907 } 2908 2909 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2910 extents.x1 + src_dx, 2911 extents.y1 + src_dy, 2912 extents.x2 - extents.x1, 2913 extents.y2 - extents.y1)) { 2914 DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); 2915 goto fallback_tiled_dst; 2916 } 2917 } else { 2918 tmp.src.bo = src_bo; 2919 tmp.src.width = src->width; 2920 tmp.src.height = src->height; 2921 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2922 } 2923 2924 tmp.mask.bo = NULL; 2925 2926 tmp.floats_per_vertex = 2; 2927 tmp.floats_per_rect = 6; 2928 tmp.need_magic_ca_pass = 0; 2929 2930 tmp.u.gen6.flags = COPY_FLAGS(alu); 2931 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2932 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); 2933 assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); 2934 2935 kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2936 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2937 kgem_submit(&sna->kgem); 2938 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2939 DBG(("%s: too large for a single operation\n", 2940 __FUNCTION__)); 2941 if (tmp.src.bo != src_bo) 2942 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2943 if (tmp.redirect.real_bo) 2944 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2945 goto fallback_blt; 2946 } 2947 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2948 } 2949 2950 src_dx += tmp.src.offset[0]; 2951 src_dy += tmp.src.offset[1]; 2952 2953 dst_dx += tmp.dst.x; 2954 dst_dy += tmp.dst.y; 2955 2956 tmp.dst.x = tmp.dst.y = 0; 2957 2958 gen6_align_vertex(sna, &tmp); 2959 gen6_emit_copy_state(sna, &tmp); 2960 2961 do { 2962 int16_t *v; 2963 int n_this_time; 2964 2965 n_this_time = gen6_get_rectangles(sna, &tmp, n, 2966 gen6_emit_copy_state); 2967 n -= n_this_time; 2968 2969 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2970 sna->render.vertex_used += 6 * n_this_time; 2971 assert(sna->render.vertex_used <= sna->render.vertex_size); 2972 do { 2973 2974 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2975 box->x1 + src_dx, box->y1 + src_dy, 2976 box->x1 + dst_dx, box->y1 + dst_dy, 2977 box->x2 - box->x1, box->y2 - box->y1)); 2978 v[0] = box->x2 + dst_dx; 2979 v[2] = box->x2 + src_dx; 2980 v[1] = v[5] = box->y2 + dst_dy; 2981 v[3] = v[7] = box->y2 + src_dy; 2982 v[8] = v[4] = box->x1 + dst_dx; 2983 v[10] = v[6] = box->x1 + src_dx; 2984 v[9] = box->y1 + dst_dy; 2985 v[11] = box->y1 + src_dy; 2986 v += 12; box++; 2987 } while (--n_this_time); 2988 } while (n); 2989 2990 gen4_vertex_flush(sna); 2991 sna_render_composite_redirect_done(sna, &tmp); 2992 if (tmp.src.bo != src_bo) 2993 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2994 return true; 2995 2996fallback_tiled_dst: 2997 if (tmp.redirect.real_bo) 2998 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2999fallback_tiled: 3000 if (sna_blt_compare_depth(src, dst) && 3001 sna_blt_copy_boxes(sna, alu, 3002 src_bo, src_dx, src_dy, 3003 dst_bo, dst_dx, dst_dy, 3004 dst->bitsPerPixel, 3005 box, n)) 3006 return true; 3007 3008 return sna_tiling_copy_boxes(sna, alu, 3009 src, src_bo, src_dx, src_dy, 3010 dst, dst_bo, dst_dx, dst_dy, 3011 box, n); 3012} 3013 3014static void 3015gen6_render_copy_blt(struct sna *sna, 3016 const struct sna_copy_op *op, 3017 int16_t sx, int16_t sy, 3018 int16_t w, int16_t h, 3019 int16_t dx, int16_t dy) 3020{ 3021 int16_t *v; 3022 3023 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); 3024 3025 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3026 sna->render.vertex_used += 6; 3027 assert(sna->render.vertex_used <= sna->render.vertex_size); 3028 3029 v[0] = dx+w; v[1] = dy+h; 3030 v[2] = sx+w; v[3] = sy+h; 3031 v[4] = dx; v[5] = dy+h; 3032 v[6] = sx; v[7] = sy+h; 3033 v[8] = dx; v[9] = dy; 3034 v[10] = sx; v[11] = sy; 3035} 3036 3037static void 3038gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3039{ 3040 DBG(("%s()\n", __FUNCTION__)); 3041 3042 assert(!sna->render.active); 3043 if (sna->render.vertex_offset) 3044 gen4_vertex_flush(sna); 3045} 3046 3047static bool 3048gen6_render_copy(struct sna *sna, uint8_t alu, 3049 PixmapPtr src, struct kgem_bo *src_bo, 3050 PixmapPtr dst, struct kgem_bo *dst_bo, 3051 struct sna_copy_op *op) 3052{ 3053 DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3054 __FUNCTION__, alu, 3055 src->drawable.width, src->drawable.height, 3056 dst->drawable.width, dst->drawable.height)); 3057 3058 if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3059 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3060 sna_blt_copy(sna, alu, 3061 src_bo, dst_bo, 3062 dst->drawable.bitsPerPixel, 3063 op)) 3064 return true; 3065 3066 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3067 too_large(src->drawable.width, src->drawable.height) || 3068 too_large(dst->drawable.width, dst->drawable.height)) { 3069fallback: 3070 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3071 return false; 3072 3073 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3074 dst->drawable.bitsPerPixel, 3075 op); 3076 } 3077 3078 if (dst->drawable.depth == src->drawable.depth) { 3079 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3080 op->base.src.pict_format = op->base.dst.format; 3081 } else { 3082 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3083 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3084 } 3085 if (!gen6_check_format(op->base.src.pict_format)) 3086 goto fallback; 3087 3088 op->base.dst.pixmap = dst; 3089 op->base.dst.width = dst->drawable.width; 3090 op->base.dst.height = dst->drawable.height; 3091 op->base.dst.bo = dst_bo; 3092 3093 op->base.src.bo = src_bo; 3094 op->base.src.card_format = 3095 gen6_get_card_format(op->base.src.pict_format); 3096 op->base.src.width = src->drawable.width; 3097 op->base.src.height = src->drawable.height; 3098 3099 op->base.mask.bo = NULL; 3100 3101 op->base.floats_per_vertex = 2; 3102 op->base.floats_per_rect = 6; 3103 3104 op->base.u.gen6.flags = COPY_FLAGS(alu); 3105 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3106 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); 3107 assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); 3108 3109 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3110 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3111 kgem_submit(&sna->kgem); 3112 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3113 goto fallback; 3114 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3115 } 3116 3117 gen6_align_vertex(sna, &op->base); 3118 gen6_emit_copy_state(sna, &op->base); 3119 3120 op->blt = gen6_render_copy_blt; 3121 op->done = gen6_render_copy_done; 3122 return true; 3123} 3124 3125static void 3126gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3127{ 3128 uint32_t *binding_table; 3129 uint16_t offset; 3130 bool dirty; 3131 3132 dirty = gen6_get_batch(sna, op); 3133 3134 binding_table = gen6_composite_get_binding_table(sna, &offset); 3135 3136 binding_table[0] = 3137 gen6_bind_bo(sna, 3138 op->dst.bo, op->dst.width, op->dst.height, 3139 gen6_get_dest_format(op->dst.format), 3140 true); 3141 binding_table[1] = 3142 gen6_bind_bo(sna, 3143 op->src.bo, 1, 1, 3144 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 3145 false); 3146 3147 if (sna->kgem.surface == offset && 3148 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 3149 sna->kgem.surface += 3150 sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); 3151 offset = sna->render_state.gen6.surface_table; 3152 } 3153 3154 gen6_emit_state(sna, op, offset | dirty); 3155} 3156 3157static bool 3158gen6_render_fill_boxes(struct sna *sna, 3159 CARD8 op, 3160 PictFormat format, 3161 const xRenderColor *color, 3162 const DrawableRec *dst, struct kgem_bo *dst_bo, 3163 const BoxRec *box, int n) 3164{ 3165 struct sna_composite_op tmp; 3166 uint32_t pixel; 3167 3168 DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3169 __FUNCTION__, op, 3170 color->red, color->green, color->blue, color->alpha, (int)format)); 3171 3172 if (op >= ARRAY_SIZE(gen6_blend_op)) { 3173 DBG(("%s: fallback due to unhandled blend op: %d\n", 3174 __FUNCTION__, op)); 3175 return false; 3176 } 3177 3178 if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3179 !gen6_check_dst_format(format)) { 3180 uint8_t alu = GXinvalid; 3181 3182 if (op <= PictOpSrc) { 3183 pixel = 0; 3184 if (op == PictOpClear) 3185 alu = GXclear; 3186 else if (sna_get_pixel_from_rgba(&pixel, 3187 color->red, 3188 color->green, 3189 color->blue, 3190 color->alpha, 3191 format)) 3192 alu = GXcopy; 3193 } 3194 3195 if (alu != GXinvalid && 3196 sna_blt_fill_boxes(sna, alu, 3197 dst_bo, dst->bitsPerPixel, 3198 pixel, box, n)) 3199 return true; 3200 3201 if (!gen6_check_dst_format(format)) 3202 return false; 3203 } 3204 3205 if (op == PictOpClear) { 3206 pixel = 0; 3207 op = PictOpSrc; 3208 } else if (!sna_get_pixel_from_rgba(&pixel, 3209 color->red, 3210 color->green, 3211 color->blue, 3212 color->alpha, 3213 PICT_a8r8g8b8)) 3214 return false; 3215 3216 DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3217 __FUNCTION__, pixel, n, 3218 box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3219 3220 tmp.dst.pixmap = (PixmapPtr)dst; 3221 tmp.dst.width = dst->width; 3222 tmp.dst.height = dst->height; 3223 tmp.dst.format = format; 3224 tmp.dst.bo = dst_bo; 3225 tmp.dst.x = tmp.dst.y = 0; 3226 tmp.damage = NULL; 3227 3228 sna_render_composite_redirect_init(&tmp); 3229 if (too_large(dst->width, dst->height)) { 3230 BoxRec extents; 3231 3232 boxes_extents(box, n, &extents); 3233 if (!sna_render_composite_redirect(sna, &tmp, 3234 extents.x1, extents.y1, 3235 extents.x2 - extents.x1, 3236 extents.y2 - extents.y1, 3237 n > 1)) 3238 return sna_tiling_fill_boxes(sna, op, format, color, 3239 dst, dst_bo, box, n); 3240 } 3241 3242 tmp.src.bo = sna_render_get_solid(sna, pixel); 3243 tmp.mask.bo = NULL; 3244 3245 tmp.floats_per_vertex = 2; 3246 tmp.floats_per_rect = 6; 3247 tmp.need_magic_ca_pass = false; 3248 3249 tmp.u.gen6.flags = FILL_FLAGS(op, format); 3250 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3251 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3252 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3253 3254 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3255 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3256 kgem_submit(&sna->kgem); 3257 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3258 } 3259 3260 gen6_align_vertex(sna, &tmp); 3261 gen6_emit_fill_state(sna, &tmp); 3262 3263 do { 3264 int n_this_time; 3265 int16_t *v; 3266 3267 n_this_time = gen6_get_rectangles(sna, &tmp, n, 3268 gen6_emit_fill_state); 3269 n -= n_this_time; 3270 3271 v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3272 sna->render.vertex_used += 6 * n_this_time; 3273 assert(sna->render.vertex_used <= sna->render.vertex_size); 3274 do { 3275 DBG((" (%d, %d), (%d, %d)\n", 3276 box->x1, box->y1, box->x2, box->y2)); 3277 3278 v[0] = box->x2; 3279 v[5] = v[1] = box->y2; 3280 v[8] = v[4] = box->x1; 3281 v[9] = box->y1; 3282 v[2] = v[3] = v[7] = 1; 3283 v[6] = v[10] = v[11] = 0; 3284 v += 12; box++; 3285 } while (--n_this_time); 3286 } while (n); 3287 3288 gen4_vertex_flush(sna); 3289 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3290 sna_render_composite_redirect_done(sna, &tmp); 3291 return true; 3292} 3293 3294static void 3295gen6_render_op_fill_blt(struct sna *sna, 3296 const struct sna_fill_op *op, 3297 int16_t x, int16_t y, int16_t w, int16_t h) 3298{ 3299 int16_t *v; 3300 3301 DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3302 3303 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3304 3305 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3306 sna->render.vertex_used += 6; 3307 assert(sna->render.vertex_used <= sna->render.vertex_size); 3308 3309 v[0] = x+w; 3310 v[4] = v[8] = x; 3311 v[1] = v[5] = y+h; 3312 v[9] = y; 3313 3314 v[2] = v[3] = v[7] = 1; 3315 v[6] = v[10] = v[11] = 0; 3316} 3317 3318fastcall static void 3319gen6_render_op_fill_box(struct sna *sna, 3320 const struct sna_fill_op *op, 3321 const BoxRec *box) 3322{ 3323 int16_t *v; 3324 3325 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3326 box->x1, box->y1, box->x2, box->y2)); 3327 3328 gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3329 3330 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3331 sna->render.vertex_used += 6; 3332 assert(sna->render.vertex_used <= sna->render.vertex_size); 3333 3334 v[0] = box->x2; 3335 v[8] = v[4] = box->x1; 3336 v[5] = v[1] = box->y2; 3337 v[9] = box->y1; 3338 3339 v[7] = v[2] = v[3] = 1; 3340 v[6] = v[10] = v[11] = 0; 3341} 3342 3343fastcall static void 3344gen6_render_op_fill_boxes(struct sna *sna, 3345 const struct sna_fill_op *op, 3346 const BoxRec *box, 3347 int nbox) 3348{ 3349 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3350 box->x1, box->y1, box->x2, box->y2, nbox)); 3351 3352 do { 3353 int nbox_this_time; 3354 int16_t *v; 3355 3356 nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 3357 gen6_emit_fill_state); 3358 nbox -= nbox_this_time; 3359 3360 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3361 sna->render.vertex_used += 6 * nbox_this_time; 3362 assert(sna->render.vertex_used <= sna->render.vertex_size); 3363 3364 do { 3365 v[0] = box->x2; 3366 v[8] = v[4] = box->x1; 3367 v[5] = v[1] = box->y2; 3368 v[9] = box->y1; 3369 v[7] = v[2] = v[3] = 1; 3370 v[6] = v[10] = v[11] = 0; 3371 box++; v += 12; 3372 } while (--nbox_this_time); 3373 } while (nbox); 3374} 3375 3376static void 3377gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) 3378{ 3379 DBG(("%s()\n", __FUNCTION__)); 3380 3381 assert(!sna->render.active); 3382 if (sna->render.vertex_offset) 3383 gen4_vertex_flush(sna); 3384 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3385} 3386 3387static bool 3388gen6_render_fill(struct sna *sna, uint8_t alu, 3389 PixmapPtr dst, struct kgem_bo *dst_bo, 3390 uint32_t color, unsigned flags, 3391 struct sna_fill_op *op) 3392{ 3393 DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3394 3395 if (prefer_blt_fill(sna, dst_bo, flags) && 3396 sna_blt_fill(sna, alu, 3397 dst_bo, dst->drawable.bitsPerPixel, 3398 color, 3399 op)) 3400 return true; 3401 3402 if (!(alu == GXcopy || alu == GXclear) || 3403 too_large(dst->drawable.width, dst->drawable.height)) 3404 return sna_blt_fill(sna, alu, 3405 dst_bo, dst->drawable.bitsPerPixel, 3406 color, 3407 op); 3408 3409 if (alu == GXclear) 3410 color = 0; 3411 3412 op->base.dst.pixmap = dst; 3413 op->base.dst.width = dst->drawable.width; 3414 op->base.dst.height = dst->drawable.height; 3415 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3416 op->base.dst.bo = dst_bo; 3417 op->base.dst.x = op->base.dst.y = 0; 3418 3419 op->base.src.bo = 3420 sna_render_get_solid(sna, 3421 sna_rgba_for_color(color, 3422 dst->drawable.depth)); 3423 op->base.mask.bo = NULL; 3424 3425 op->base.need_magic_ca_pass = false; 3426 op->base.floats_per_vertex = 2; 3427 op->base.floats_per_rect = 6; 3428 3429 op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; 3430 assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3431 assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); 3432 assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); 3433 3434 kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3435 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3436 kgem_submit(&sna->kgem); 3437 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3438 } 3439 3440 gen6_align_vertex(sna, &op->base); 3441 gen6_emit_fill_state(sna, &op->base); 3442 3443 op->blt = gen6_render_op_fill_blt; 3444 op->box = gen6_render_op_fill_box; 3445 op->boxes = gen6_render_op_fill_boxes; 3446 op->points = NULL; 3447 op->done = gen6_render_op_fill_done; 3448 return true; 3449} 3450 3451static bool 3452gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3453 uint32_t color, 3454 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3455 uint8_t alu) 3456{ 3457 BoxRec box; 3458 3459 box.x1 = x1; 3460 box.y1 = y1; 3461 box.x2 = x2; 3462 box.y2 = y2; 3463 3464 return sna_blt_fill_boxes(sna, alu, 3465 bo, dst->drawable.bitsPerPixel, 3466 color, &box, 1); 3467} 3468 3469static bool 3470gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3471 uint32_t color, 3472 int16_t x1, int16_t y1, 3473 int16_t x2, int16_t y2, 3474 uint8_t alu) 3475{ 3476 struct sna_composite_op tmp; 3477 int16_t *v; 3478 3479 /* Prefer to use the BLT if already engaged */ 3480 if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3481 gen6_render_fill_one_try_blt(sna, dst, bo, color, 3482 x1, y1, x2, y2, alu)) 3483 return true; 3484 3485 /* Must use the BLT if we can't RENDER... */ 3486 if (!(alu == GXcopy || alu == GXclear) || 3487 too_large(dst->drawable.width, dst->drawable.height)) 3488 return gen6_render_fill_one_try_blt(sna, dst, bo, color, 3489 x1, y1, x2, y2, alu); 3490 3491 if (alu == GXclear) 3492 color = 0; 3493 3494 tmp.dst.pixmap = dst; 3495 tmp.dst.width = dst->drawable.width; 3496 tmp.dst.height = dst->drawable.height; 3497 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3498 tmp.dst.bo = bo; 3499 tmp.dst.x = tmp.dst.y = 0; 3500 3501 tmp.src.bo = 3502 sna_render_get_solid(sna, 3503 sna_rgba_for_color(color, 3504 dst->drawable.depth)); 3505 tmp.mask.bo = NULL; 3506 3507 tmp.floats_per_vertex = 2; 3508 tmp.floats_per_rect = 6; 3509 tmp.need_magic_ca_pass = false; 3510 3511 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3512 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3513 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3514 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3515 3516 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3517 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3518 kgem_submit(&sna->kgem); 3519 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3520 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3521 return false; 3522 } 3523 } 3524 3525 gen6_align_vertex(sna, &tmp); 3526 gen6_emit_fill_state(sna, &tmp); 3527 3528 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3529 3530 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3531 3532 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3533 sna->render.vertex_used += 6; 3534 assert(sna->render.vertex_used <= sna->render.vertex_size); 3535 3536 v[0] = x2; 3537 v[8] = v[4] = x1; 3538 v[5] = v[1] = y2; 3539 v[9] = y1; 3540 v[7] = v[2] = v[3] = 1; 3541 v[6] = v[10] = v[11] = 0; 3542 3543 gen4_vertex_flush(sna); 3544 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3545 3546 return true; 3547} 3548 3549static bool 3550gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3551{ 3552 BoxRec box; 3553 3554 box.x1 = 0; 3555 box.y1 = 0; 3556 box.x2 = dst->drawable.width; 3557 box.y2 = dst->drawable.height; 3558 3559 return sna_blt_fill_boxes(sna, GXclear, 3560 bo, dst->drawable.bitsPerPixel, 3561 0, &box, 1); 3562} 3563 3564static bool 3565gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3566{ 3567 struct sna_composite_op tmp; 3568 int16_t *v; 3569 3570 DBG(("%s: %dx%d\n", 3571 __FUNCTION__, 3572 dst->drawable.width, 3573 dst->drawable.height)); 3574 3575 /* Prefer to use the BLT if, and only if, already engaged */ 3576 if (sna->kgem.ring == KGEM_BLT && 3577 gen6_render_clear_try_blt(sna, dst, bo)) 3578 return true; 3579 3580 /* Must use the BLT if we can't RENDER... */ 3581 if (too_large(dst->drawable.width, dst->drawable.height)) 3582 return gen6_render_clear_try_blt(sna, dst, bo); 3583 3584 tmp.dst.pixmap = dst; 3585 tmp.dst.width = dst->drawable.width; 3586 tmp.dst.height = dst->drawable.height; 3587 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3588 tmp.dst.bo = bo; 3589 tmp.dst.x = tmp.dst.y = 0; 3590 3591 tmp.src.bo = sna_render_get_solid(sna, 0); 3592 tmp.mask.bo = NULL; 3593 3594 tmp.floats_per_vertex = 2; 3595 tmp.floats_per_rect = 6; 3596 tmp.need_magic_ca_pass = false; 3597 3598 tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3599 assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3600 assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3601 assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3602 3603 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3604 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3605 kgem_submit(&sna->kgem); 3606 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3607 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3608 return false; 3609 } 3610 } 3611 3612 gen6_align_vertex(sna, &tmp); 3613 gen6_emit_fill_state(sna, &tmp); 3614 3615 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3616 3617 v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3618 sna->render.vertex_used += 6; 3619 assert(sna->render.vertex_used <= sna->render.vertex_size); 3620 3621 v[0] = dst->drawable.width; 3622 v[5] = v[1] = dst->drawable.height; 3623 v[8] = v[4] = 0; 3624 v[9] = 0; 3625 3626 v[7] = v[2] = v[3] = 1; 3627 v[6] = v[10] = v[11] = 0; 3628 3629 gen4_vertex_flush(sna); 3630 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3631 3632 return true; 3633} 3634 3635static void gen6_render_reset(struct sna *sna) 3636{ 3637 sna->render_state.gen6.needs_invariant = true; 3638 sna->render_state.gen6.first_state_packet = true; 3639 sna->render_state.gen6.ve_id = 3 << 2; 3640 sna->render_state.gen6.last_primitive = -1; 3641 3642 sna->render_state.gen6.num_sf_outputs = 0; 3643 sna->render_state.gen6.samplers = -1; 3644 sna->render_state.gen6.blend = -1; 3645 sna->render_state.gen6.kernel = -1; 3646 sna->render_state.gen6.drawrect_offset = -1; 3647 sna->render_state.gen6.drawrect_limit = -1; 3648 sna->render_state.gen6.surface_table = -1; 3649 3650 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3651 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3652 discard_vbo(sna); 3653 } 3654 3655 sna->render.vertex_offset = 0; 3656 sna->render.nvertex_reloc = 0; 3657 sna->render.vb_id = 0; 3658} 3659 3660static void gen6_render_fini(struct sna *sna) 3661{ 3662 kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); 3663} 3664 3665static bool is_gt2(struct sna *sna, int devid) 3666{ 3667 return devid & 0x30; 3668} 3669 3670static bool is_mobile(struct sna *sna, int devid) 3671{ 3672 return (devid & 0xf) == 0x6; 3673} 3674 3675static bool gen6_render_setup(struct sna *sna, int devid) 3676{ 3677 struct gen6_render_state *state = &sna->render_state.gen6; 3678 struct sna_static_stream general; 3679 struct gen6_sampler_state *ss; 3680 int i, j, k, l, m; 3681 3682 state->info = >1_info; 3683 if (is_gt2(sna, devid)) 3684 state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ 3685 state->gt = state->info->gt; 3686 3687 sna_static_stream_init(&general); 3688 3689 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3690 * dumps, you know it points to zero. 3691 */ 3692 null_create(&general); 3693 scratch_create(&general); 3694 3695 for (m = 0; m < GEN6_KERNEL_COUNT; m++) { 3696 if (wm_kernels[m].size) { 3697 state->wm_kernel[m][1] = 3698 sna_static_stream_add(&general, 3699 wm_kernels[m].data, 3700 wm_kernels[m].size, 3701 64); 3702 } else { 3703 if (USE_8_PIXEL_DISPATCH) { 3704 state->wm_kernel[m][0] = 3705 sna_static_stream_compile_wm(sna, &general, 3706 wm_kernels[m].data, 8); 3707 } 3708 3709 if (USE_16_PIXEL_DISPATCH) { 3710 state->wm_kernel[m][1] = 3711 sna_static_stream_compile_wm(sna, &general, 3712 wm_kernels[m].data, 16); 3713 } 3714 3715 if (USE_32_PIXEL_DISPATCH) { 3716 state->wm_kernel[m][2] = 3717 sna_static_stream_compile_wm(sna, &general, 3718 wm_kernels[m].data, 32); 3719 } 3720 } 3721 if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { 3722 state->wm_kernel[m][1] = 3723 sna_static_stream_compile_wm(sna, &general, 3724 wm_kernels[m].data, 16); 3725 } 3726 } 3727 3728 ss = sna_static_stream_map(&general, 3729 2 * sizeof(*ss) * 3730 (2 + 3731 FILTER_COUNT * EXTEND_COUNT * 3732 FILTER_COUNT * EXTEND_COUNT), 3733 32); 3734 state->wm_state = sna_static_stream_offsetof(&general, ss); 3735 sampler_copy_init(ss); ss += 2; 3736 sampler_fill_init(ss); ss += 2; 3737 for (i = 0; i < FILTER_COUNT; i++) { 3738 for (j = 0; j < EXTEND_COUNT; j++) { 3739 for (k = 0; k < FILTER_COUNT; k++) { 3740 for (l = 0; l < EXTEND_COUNT; l++) { 3741 sampler_state_init(ss++, i, j); 3742 sampler_state_init(ss++, k, l); 3743 } 3744 } 3745 } 3746 } 3747 3748 state->cc_blend = gen6_composite_create_blend_state(&general); 3749 3750 state->general_bo = sna_static_stream_fini(sna, &general); 3751 return state->general_bo != NULL; 3752} 3753 3754const char *gen6_render_init(struct sna *sna, const char *backend) 3755{ 3756 int devid = intel_get_device_id(sna->dev); 3757 3758 if (!gen6_render_setup(sna, devid)) 3759 return backend; 3760 3761 sna->kgem.context_switch = gen6_render_context_switch; 3762 sna->kgem.retire = gen6_render_retire; 3763 sna->kgem.expire = gen4_render_expire; 3764 3765#if !NO_COMPOSITE 3766 sna->render.composite = gen6_render_composite; 3767 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3768#endif 3769 3770#if !NO_COMPOSITE_SPANS 3771 sna->render.check_composite_spans = gen6_check_composite_spans; 3772 sna->render.composite_spans = gen6_render_composite_spans; 3773 if (is_mobile(sna, devid)) 3774 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3775#endif 3776 sna->render.video = gen6_render_video; 3777 3778#if !NO_COPY_BOXES 3779 sna->render.copy_boxes = gen6_render_copy_boxes; 3780#endif 3781#if !NO_COPY 3782 sna->render.copy = gen6_render_copy; 3783#endif 3784 3785#if !NO_FILL_BOXES 3786 sna->render.fill_boxes = gen6_render_fill_boxes; 3787#endif 3788#if !NO_FILL 3789 sna->render.fill = gen6_render_fill; 3790#endif 3791#if !NO_FILL_ONE 3792 sna->render.fill_one = gen6_render_fill_one; 3793#endif 3794#if !NO_FILL_CLEAR 3795 sna->render.clear = gen6_render_clear; 3796#endif 3797 3798 sna->render.flush = gen4_render_flush; 3799 sna->render.reset = gen6_render_reset; 3800 sna->render.fini = gen6_render_fini; 3801 3802 sna->render.max_3d_size = GEN6_MAX_SIZE; 3803 sna->render.max_3d_pitch = 1 << 18; 3804 return sna->render_state.gen6.info->name; 3805} 3806