1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen4_common.h" 45#include "gen4_render.h" 46#include "gen4_source.h" 47#include "gen4_vertex.h" 48 49/* gen4 has a serious issue with its shaders that we need to flush 50 * after every rectangle... So until that is resolved, prefer 51 * the BLT engine. 52 */ 53#define FORCE_SPANS 0 54#define FORCE_NONRECTILINEAR_SPANS -1 55#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 56 57#define ALWAYS_FLUSH 1 58 59#define NO_COMPOSITE 0 60#define NO_COMPOSITE_SPANS 0 61#define NO_COPY 0 62#define NO_COPY_BOXES 0 63#define NO_FILL 0 64#define NO_FILL_ONE 0 65#define NO_FILL_BOXES 0 66#define NO_VIDEO 0 67 68#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 69 70#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 71 72/* Set up a default static partitioning of the URB, which is supposed to 73 * allow anything we would want to do, at potentially lower performance. 74 */ 75#define URB_CS_ENTRY_SIZE 1 76#define URB_CS_ENTRIES 0 77 78#define URB_VS_ENTRY_SIZE 1 79#define URB_VS_ENTRIES 32 80 81#define URB_GS_ENTRY_SIZE 0 82#define URB_GS_ENTRIES 0 83 84#define URB_CL_ENTRY_SIZE 0 85#define URB_CL_ENTRIES 0 86 87#define URB_SF_ENTRY_SIZE 2 88#define URB_SF_ENTRIES 64 89 90/* 91 * this program computes dA/dx and dA/dy for the texture coordinates along 92 * with the base texture coordinate. It was extracted from the Mesa driver 93 */ 94 95#define SF_KERNEL_NUM_GRF 16 96#define PS_KERNEL_NUM_GRF 32 97 98#define GEN4_MAX_SF_THREADS 24 99#define GEN4_MAX_WM_THREADS 32 100#define G4X_MAX_WM_THREADS 50 101 102static const uint32_t ps_kernel_packed_bt601_static[][4] = { 103#include "exa_wm_xy.g4b" 104#include "exa_wm_src_affine.g4b" 105#include "exa_wm_src_sample_argb.g4b" 106#include "exa_wm_yuv_rgb_bt601.g4b" 107#include "exa_wm_write.g4b" 108}; 109 110static const uint32_t ps_kernel_planar_bt601_static[][4] = { 111#include "exa_wm_xy.g4b" 112#include "exa_wm_src_affine.g4b" 113#include "exa_wm_src_sample_planar.g4b" 114#include "exa_wm_yuv_rgb_bt601.g4b" 115#include "exa_wm_write.g4b" 116}; 117 118static const uint32_t ps_kernel_nv12_bt601_static[][4] = { 119#include "exa_wm_xy.g4b" 120#include "exa_wm_src_affine.g4b" 121#include "exa_wm_src_sample_nv12.g4b" 122#include "exa_wm_yuv_rgb_bt601.g4b" 123#include "exa_wm_write.g4b" 124}; 125 126static const uint32_t ps_kernel_packed_bt709_static[][4] = { 127#include "exa_wm_xy.g4b" 128#include "exa_wm_src_affine.g4b" 129#include "exa_wm_src_sample_argb.g4b" 130#include "exa_wm_yuv_rgb_bt709.g4b" 131#include "exa_wm_write.g4b" 132}; 133 134static const uint32_t ps_kernel_planar_bt709_static[][4] = { 135#include "exa_wm_xy.g4b" 136#include "exa_wm_src_affine.g4b" 137#include "exa_wm_src_sample_planar.g4b" 138#include "exa_wm_yuv_rgb_bt709.g4b" 139#include "exa_wm_write.g4b" 140}; 141 142static const uint32_t ps_kernel_nv12_bt709_static[][4] = { 143#include "exa_wm_xy.g4b" 144#include "exa_wm_src_affine.g4b" 145#include "exa_wm_src_sample_nv12.g4b" 146#include "exa_wm_yuv_rgb_bt709.g4b" 147#include "exa_wm_write.g4b" 148}; 149 150#define NOKERNEL(kernel_enum, func, masked) \ 151 [kernel_enum] = {func, 0, masked} 152#define KERNEL(kernel_enum, kernel, masked) \ 153 [kernel_enum] = {&kernel, sizeof(kernel), masked} 154static const struct wm_kernel_info { 155 const void *data; 156 unsigned int size; 157 bool has_mask; 158} wm_kernels[] = { 159 NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false), 160 NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false), 161 162 NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true), 163 NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true), 164 165 NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true), 166 NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true), 167 168 NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true), 169 NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true), 170 171 NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true), 172 NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true), 173 174 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT601, ps_kernel_planar_bt601_static, false), 175 KERNEL(WM_KERNEL_VIDEO_NV12_BT601, ps_kernel_nv12_bt601_static, false), 176 KERNEL(WM_KERNEL_VIDEO_PACKED_BT601, ps_kernel_packed_bt601_static, false), 177 178 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT709, ps_kernel_planar_bt709_static, false), 179 KERNEL(WM_KERNEL_VIDEO_NV12_BT709, ps_kernel_nv12_bt709_static, false), 180 KERNEL(WM_KERNEL_VIDEO_PACKED_BT709, ps_kernel_packed_bt709_static, false), 181}; 182#undef KERNEL 183 184static const struct blendinfo { 185 bool src_alpha; 186 uint32_t src_blend; 187 uint32_t dst_blend; 188} gen4_blend_op[] = { 189 /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO}, 190 /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO}, 191 /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE}, 192 /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 193 /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE}, 194 /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 195 /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA}, 196 /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 197 /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 198 /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 199 /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA}, 200 /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 201 /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE}, 202}; 203 204/** 205 * Highest-valued BLENDFACTOR used in gen4_blend_op. 206 * 207 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR, 208 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 209 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 210 */ 211#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1) 212 213#define BLEND_OFFSET(s, d) \ 214 (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64) 215 216#define SAMPLER_OFFSET(sf, se, mf, me, k) \ 217 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64) 218 219static void 220gen4_emit_pipelined_pointers(struct sna *sna, 221 const struct sna_composite_op *op, 222 int blend, int kernel); 223 224#define OUT_BATCH(v) batch_emit(sna, v) 225#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 226#define OUT_VERTEX_F(v) vertex_emit(sna, v) 227 228#define GEN4_MAX_3D_SIZE 8192 229 230static inline bool too_large(int width, int height) 231{ 232 return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE; 233} 234 235static int 236gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 237{ 238 int base; 239 240 if (has_mask) { 241 if (is_ca) { 242 if (gen4_blend_op[op].src_alpha) 243 base = WM_KERNEL_MASKSA; 244 else 245 base = WM_KERNEL_MASKCA; 246 } else 247 base = WM_KERNEL_MASK; 248 } else 249 base = WM_KERNEL; 250 251 return base + !is_affine; 252} 253 254static bool gen4_magic_ca_pass(struct sna *sna, 255 const struct sna_composite_op *op) 256{ 257 struct gen4_render_state *state = &sna->render_state.gen4; 258 259 if (!op->need_magic_ca_pass) 260 return false; 261 262 assert(sna->render.vertex_index > sna->render.vertex_start); 263 264 DBG(("%s: CA fixup\n", __FUNCTION__)); 265 assert(op->mask.bo != NULL); 266 assert(op->has_component_alpha); 267 268 gen4_emit_pipelined_pointers(sna, op, PictOpAdd, 269 gen4_choose_composite_kernel(PictOpAdd, 270 true, true, op->is_affine)); 271 272 OUT_BATCH(GEN4_3DPRIMITIVE | 273 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 274 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 275 (0 << 9) | 276 4); 277 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 278 OUT_BATCH(sna->render.vertex_start); 279 OUT_BATCH(1); /* single instance */ 280 OUT_BATCH(0); /* start instance location */ 281 OUT_BATCH(0); /* index buffer offset, ignored */ 282 283 state->last_primitive = sna->kgem.nbatch; 284 return true; 285} 286 287static uint32_t gen4_get_blend(int op, 288 bool has_component_alpha, 289 uint32_t dst_format) 290{ 291 uint32_t src, dst; 292 293 src = gen4_blend_op[op].src_blend; 294 dst = gen4_blend_op[op].dst_blend; 295 296 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 297 * it as always 1. 298 */ 299 if (PICT_FORMAT_A(dst_format) == 0) { 300 if (src == GEN4_BLENDFACTOR_DST_ALPHA) 301 src = GEN4_BLENDFACTOR_ONE; 302 else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA) 303 src = GEN4_BLENDFACTOR_ZERO; 304 } 305 306 /* If the source alpha is being used, then we should only be in a 307 * case where the source blend factor is 0, and the source blend 308 * value is the mask channels multiplied by the source picture's alpha. 309 */ 310 if (has_component_alpha && gen4_blend_op[op].src_alpha) { 311 if (dst == GEN4_BLENDFACTOR_SRC_ALPHA) 312 dst = GEN4_BLENDFACTOR_SRC_COLOR; 313 else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA) 314 dst = GEN4_BLENDFACTOR_INV_SRC_COLOR; 315 } 316 317 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 318 op, dst_format, PICT_FORMAT_A(dst_format), 319 src, dst, BLEND_OFFSET(src, dst))); 320 return BLEND_OFFSET(src, dst); 321} 322 323static uint32_t gen4_get_card_format(PictFormat format) 324{ 325 switch (format) { 326 default: 327 return -1; 328 case PICT_a8r8g8b8: 329 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 330 case PICT_x8r8g8b8: 331 return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; 332 case PICT_a8b8g8r8: 333 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 334 case PICT_x8b8g8r8: 335 return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM; 336#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 337 case PICT_a2r10g10b10: 338 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 339 case PICT_x2r10g10b10: 340 return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM; 341#endif 342 case PICT_r8g8b8: 343 return GEN4_SURFACEFORMAT_R8G8B8_UNORM; 344 case PICT_r5g6b5: 345 return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 346 case PICT_a1r5g5b5: 347 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 348 case PICT_a8: 349 return GEN4_SURFACEFORMAT_A8_UNORM; 350 case PICT_a4r4g4b4: 351 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 352 } 353} 354 355static uint32_t gen4_get_dest_format(PictFormat format) 356{ 357 switch (format) { 358 default: 359 return -1; 360 case PICT_a8r8g8b8: 361 case PICT_x8r8g8b8: 362 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 363 case PICT_a8b8g8r8: 364 case PICT_x8b8g8r8: 365 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 366#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 367 case PICT_a2r10g10b10: 368 case PICT_x2r10g10b10: 369 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 370#endif 371 case PICT_r5g6b5: 372 return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 373 case PICT_x1r5g5b5: 374 case PICT_a1r5g5b5: 375 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 376 case PICT_a8: 377 return GEN4_SURFACEFORMAT_A8_UNORM; 378 case PICT_a4r4g4b4: 379 case PICT_x4r4g4b4: 380 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 381 } 382} 383 384static bool gen4_check_dst_format(PictFormat format) 385{ 386 if (gen4_get_dest_format(format) != -1) 387 return true; 388 389 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 390 return false; 391} 392 393static bool gen4_check_format(uint32_t format) 394{ 395 if (gen4_get_card_format(format) != -1) 396 return true; 397 398 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 399 return false; 400} 401 402typedef struct gen4_surface_state_padded { 403 struct gen4_surface_state state; 404 char pad[32 - sizeof(struct gen4_surface_state)]; 405} gen4_surface_state_padded; 406 407static void null_create(struct sna_static_stream *stream) 408{ 409 /* A bunch of zeros useful for legacy border color and depth-stencil */ 410 sna_static_stream_map(stream, 64, 64); 411} 412 413static void 414sampler_state_init(struct gen4_sampler_state *sampler_state, 415 sampler_filter_t filter, 416 sampler_extend_t extend) 417{ 418 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 419 420 /* We use the legacy mode to get the semantics specified by 421 * the Render extension. */ 422 sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY; 423 424 switch (filter) { 425 default: 426 case SAMPLER_FILTER_NEAREST: 427 sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST; 428 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST; 429 break; 430 case SAMPLER_FILTER_BILINEAR: 431 sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR; 432 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR; 433 break; 434 } 435 436 switch (extend) { 437 default: 438 case SAMPLER_EXTEND_NONE: 439 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 440 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 441 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 442 break; 443 case SAMPLER_EXTEND_REPEAT: 444 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 445 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 446 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 447 break; 448 case SAMPLER_EXTEND_PAD: 449 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 450 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 451 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 452 break; 453 case SAMPLER_EXTEND_REFLECT: 454 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 455 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 456 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 457 break; 458 } 459} 460 461static uint32_t gen4_filter(uint32_t filter) 462{ 463 switch (filter) { 464 default: 465 assert(0); 466 case PictFilterNearest: 467 return SAMPLER_FILTER_NEAREST; 468 case PictFilterBilinear: 469 return SAMPLER_FILTER_BILINEAR; 470 } 471} 472 473static uint32_t gen4_check_filter(PicturePtr picture) 474{ 475 switch (picture->filter) { 476 case PictFilterNearest: 477 case PictFilterBilinear: 478 return true; 479 default: 480 DBG(("%s: unknown filter: %s [%d]\n", 481 __FUNCTION__, 482 PictureGetFilterName(picture->filter), 483 picture->filter)); 484 return false; 485 } 486} 487 488static uint32_t gen4_repeat(uint32_t repeat) 489{ 490 switch (repeat) { 491 default: 492 assert(0); 493 case RepeatNone: 494 return SAMPLER_EXTEND_NONE; 495 case RepeatNormal: 496 return SAMPLER_EXTEND_REPEAT; 497 case RepeatPad: 498 return SAMPLER_EXTEND_PAD; 499 case RepeatReflect: 500 return SAMPLER_EXTEND_REFLECT; 501 } 502} 503 504static bool gen4_check_repeat(PicturePtr picture) 505{ 506 if (!picture->repeat) 507 return true; 508 509 switch (picture->repeatType) { 510 case RepeatNone: 511 case RepeatNormal: 512 case RepeatPad: 513 case RepeatReflect: 514 return true; 515 default: 516 DBG(("%s: unknown repeat: %d\n", 517 __FUNCTION__, picture->repeatType)); 518 return false; 519 } 520} 521 522static uint32_t 523gen4_tiling_bits(uint32_t tiling) 524{ 525 switch (tiling) { 526 default: assert(0); 527 case I915_TILING_NONE: return 0; 528 case I915_TILING_X: return GEN4_SURFACE_TILED; 529 case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y; 530 } 531} 532 533/** 534 * Sets up the common fields for a surface state buffer for the given 535 * picture in the given surface state buffer. 536 */ 537static uint32_t 538gen4_bind_bo(struct sna *sna, 539 struct kgem_bo *bo, 540 uint32_t width, 541 uint32_t height, 542 uint32_t format, 543 bool is_dst) 544{ 545 uint32_t domains; 546 uint16_t offset; 547 uint32_t *ss; 548 549 assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); 550 551 /* After the first bind, we manage the cache domains within the batch */ 552 offset = kgem_bo_get_binding(bo, format | is_dst << 31); 553 if (offset) { 554 assert(offset >= sna->kgem.surface); 555 if (is_dst) 556 kgem_bo_mark_dirty(bo); 557 return offset * sizeof(uint32_t); 558 } 559 560 offset = sna->kgem.surface -= 561 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 562 ss = sna->kgem.batch + offset; 563 564 ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT | 565 GEN4_SURFACE_BLEND_ENABLED | 566 format << GEN4_SURFACE_FORMAT_SHIFT); 567 568 if (is_dst) { 569 ss[0] |= GEN4_SURFACE_RC_READ_WRITE; 570 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; 571 } else 572 domains = I915_GEM_DOMAIN_SAMPLER << 16; 573 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 574 575 ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT | 576 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT); 577 ss[3] = (gen4_tiling_bits(bo->tiling) | 578 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT); 579 ss[4] = 0; 580 ss[5] = 0; 581 582 kgem_bo_set_binding(bo, format | is_dst << 31, offset); 583 584 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 585 offset, bo->handle, ss[1], 586 format, width, height, bo->pitch, bo->tiling, 587 domains & 0xffff ? "render" : "sampler")); 588 589 return offset * sizeof(uint32_t); 590} 591 592static void gen4_emit_vertex_buffer(struct sna *sna, 593 const struct sna_composite_op *op) 594{ 595 int id = op->u.gen4.ve_id; 596 597 assert((sna->render.vb_id & (1 << id)) == 0); 598 599 OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); 600 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | 601 (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 602 assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc)); 603 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 604 OUT_BATCH(0); 605 OUT_BATCH(0); 606 OUT_BATCH(0); 607 608 sna->render.vb_id |= 1 << id; 609} 610 611inline static void 612gen4_emit_pipe_flush(struct sna *sna) 613{ 614#if 1 615 OUT_BATCH(GEN4_PIPE_CONTROL | 616 GEN4_PIPE_CONTROL_WC_FLUSH | 617 (4 - 2)); 618 OUT_BATCH(0); 619 OUT_BATCH(0); 620 OUT_BATCH(0); 621#else 622 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 623#endif 624} 625 626inline static void 627gen4_emit_pipe_break(struct sna *sna) 628{ 629#if !ALWAYS_FLUSH 630 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); 631 OUT_BATCH(0); 632 OUT_BATCH(0); 633 OUT_BATCH(0); 634#else 635 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 636#endif 637} 638 639inline static void 640gen4_emit_pipe_invalidate(struct sna *sna) 641{ 642#if 0 643 OUT_BATCH(GEN4_PIPE_CONTROL | 644 GEN4_PIPE_CONTROL_WC_FLUSH | 645 (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) | 646 (4 - 2)); 647 OUT_BATCH(0); 648 OUT_BATCH(0); 649 OUT_BATCH(0); 650#else 651 OUT_BATCH(MI_FLUSH); 652#endif 653} 654 655static void gen4_emit_primitive(struct sna *sna) 656{ 657 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) { 658 sna->render.vertex_offset = sna->kgem.nbatch - 5; 659 return; 660 } 661 662 OUT_BATCH(GEN4_3DPRIMITIVE | 663 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 664 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 665 (0 << 9) | 666 4); 667 sna->render.vertex_offset = sna->kgem.nbatch; 668 OUT_BATCH(0); /* vertex count, to be filled in later */ 669 OUT_BATCH(sna->render.vertex_index); 670 OUT_BATCH(1); /* single instance */ 671 OUT_BATCH(0); /* start instance location */ 672 OUT_BATCH(0); /* index buffer offset, ignored */ 673 sna->render.vertex_start = sna->render.vertex_index; 674 675 sna->render_state.gen4.last_primitive = sna->kgem.nbatch; 676} 677 678static bool gen4_rectangle_begin(struct sna *sna, 679 const struct sna_composite_op *op) 680{ 681 unsigned int id = 1 << op->u.gen4.ve_id; 682 int ndwords; 683 684 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 685 return true; 686 687 /* 7xpipelined pointers + 6xprimitive + 1xflush */ 688 ndwords = op->need_magic_ca_pass? 19 : 6; 689 if ((sna->render.vb_id & id) == 0) 690 ndwords += 5; 691 ndwords += 8*FORCE_FLUSH; 692 693 if (!kgem_check_batch(&sna->kgem, ndwords)) 694 return false; 695 696 if ((sna->render.vb_id & id) == 0) 697 gen4_emit_vertex_buffer(sna, op); 698 if (sna->render.vertex_offset == 0) 699 gen4_emit_primitive(sna); 700 701 return true; 702} 703 704static int gen4_get_rectangles__flush(struct sna *sna, 705 const struct sna_composite_op *op) 706{ 707 /* Preventing discarding new vbo after lock contention */ 708 if (sna_vertex_wait__locked(&sna->render)) { 709 int rem = vertex_space(sna); 710 if (rem > op->floats_per_rect) 711 return rem; 712 } 713 714 if (!kgem_check_batch(&sna->kgem, 715 8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6))) 716 return 0; 717 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 718 return 0; 719 720 if (sna->render.vertex_offset) { 721 gen4_vertex_flush(sna); 722 if (gen4_magic_ca_pass(sna, op)) 723 gen4_emit_pipelined_pointers(sna, op, op->op, 724 op->u.gen4.wm_kernel); 725 } 726 727 return gen4_vertex_finish(sna); 728} 729 730inline static int gen4_get_rectangles(struct sna *sna, 731 const struct sna_composite_op *op, 732 int want, 733 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 734{ 735 int rem; 736 737 assert(want); 738#if FORCE_FLUSH 739 rem = sna->render.vertex_offset; 740 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) 741 rem = sna->kgem.nbatch - 5; 742 if (rem) { 743 rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3; 744 if (rem <= 0) { 745 if (sna->render.vertex_offset) { 746 gen4_vertex_flush(sna); 747 if (gen4_magic_ca_pass(sna, op)) { 748 if (kgem_check_batch(&sna->kgem, 19+6)) 749 gen4_emit_pipelined_pointers(sna, op, op->op, 750 op->u.gen4.wm_kernel); 751 } 752 } 753 gen4_emit_pipe_break(sna); 754 rem = MAX_FLUSH_VERTICES; 755 } 756 } else 757 rem = MAX_FLUSH_VERTICES; 758 if (want > rem) 759 want = rem; 760#endif 761 762start: 763 rem = vertex_space(sna); 764 if (unlikely(rem < op->floats_per_rect)) { 765 DBG(("flushing vbo for %s: %d < %d\n", 766 __FUNCTION__, rem, op->floats_per_rect)); 767 rem = gen4_get_rectangles__flush(sna, op); 768 if (unlikely(rem == 0)) 769 goto flush; 770 } 771 772 if (unlikely(sna->render.vertex_offset == 0)) { 773 if (!gen4_rectangle_begin(sna, op)) 774 goto flush; 775 else 776 goto start; 777 } 778 779 assert(rem <= vertex_space(sna)); 780 assert(op->floats_per_rect <= rem); 781 if (want > 1 && want * op->floats_per_rect > rem) 782 want = rem / op->floats_per_rect; 783 784 sna->render.vertex_index += 3*want; 785 return want; 786 787flush: 788 if (sna->render.vertex_offset) { 789 gen4_vertex_flush(sna); 790 gen4_magic_ca_pass(sna, op); 791 } 792 sna_vertex_wait__locked(&sna->render); 793 _kgem_submit(&sna->kgem); 794 emit_state(sna, op); 795 goto start; 796} 797 798static uint32_t * 799gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset) 800{ 801 sna->kgem.surface -= 802 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 803 804 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 805 806 /* Clear all surplus entries to zero in case of prefetch */ 807 *offset = sna->kgem.surface; 808 return memset(sna->kgem.batch + sna->kgem.surface, 809 0, sizeof(struct gen4_surface_state_padded)); 810} 811 812static void 813gen4_emit_urb(struct sna *sna) 814{ 815 int urb_vs_end; 816 int urb_gs_end; 817 int urb_cl_end; 818 int urb_sf_end; 819 int urb_cs_end; 820 821 if (!sna->render_state.gen4.needs_urb) 822 return; 823 824 urb_vs_end = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 825 urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 826 urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE; 827 urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 828 urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 829 assert(urb_cs_end <= 256); 830 831 while ((sna->kgem.nbatch & 15) > 12) 832 OUT_BATCH(MI_NOOP); 833 834 OUT_BATCH(GEN4_URB_FENCE | 835 UF0_CS_REALLOC | 836 UF0_SF_REALLOC | 837 UF0_CLIP_REALLOC | 838 UF0_GS_REALLOC | 839 UF0_VS_REALLOC | 840 1); 841 OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT | 842 urb_gs_end << UF1_GS_FENCE_SHIFT | 843 urb_vs_end << UF1_VS_FENCE_SHIFT); 844 OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT | 845 urb_sf_end << UF2_SF_FENCE_SHIFT); 846 847 /* Constant buffer state */ 848 OUT_BATCH(GEN4_CS_URB_STATE | 0); 849 OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0); 850 851 sna->render_state.gen4.needs_urb = false; 852} 853 854static void 855gen4_emit_state_base_address(struct sna *sna) 856{ 857 assert(sna->render_state.gen4.general_bo->proxy == NULL); 858 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4); 859 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */ 860 sna->kgem.nbatch, 861 sna->render_state.gen4.general_bo, 862 I915_GEM_DOMAIN_INSTRUCTION << 16, 863 BASE_ADDRESS_MODIFY)); 864 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 865 sna->kgem.nbatch, 866 NULL, 867 I915_GEM_DOMAIN_INSTRUCTION << 16, 868 BASE_ADDRESS_MODIFY)); 869 OUT_BATCH(0); /* media */ 870 871 /* upper bounds, all disabled */ 872 OUT_BATCH(BASE_ADDRESS_MODIFY); 873 OUT_BATCH(0); 874} 875 876static void 877gen4_emit_invariant(struct sna *sna) 878{ 879 assert(sna->kgem.surface == sna->kgem.batch_size); 880 881 if (sna->kgem.gen >= 045) 882 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 883 else 884 OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D); 885 886 gen4_emit_state_base_address(sna); 887 888 sna->render_state.gen4.needs_invariant = false; 889} 890 891static void 892gen4_get_batch(struct sna *sna, const struct sna_composite_op *op) 893{ 894 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 895 896 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) { 897 DBG(("%s: flushing batch: %d < %d+%d\n", 898 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 899 150, 4*8)); 900 kgem_submit(&sna->kgem); 901 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 902 } 903 904 if (sna->render_state.gen4.needs_invariant) 905 gen4_emit_invariant(sna); 906} 907 908static void 909gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) 910{ 911 assert(op->floats_per_rect == 3*op->floats_per_vertex); 912 if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { 913 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 914 sna->render_state.gen4.floats_per_vertex, 915 op->floats_per_vertex)); 916 gen4_vertex_align(sna, op); 917 sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex; 918 } 919} 920 921static void 922gen4_emit_binding_table(struct sna *sna, uint16_t offset) 923{ 924 if (sna->render_state.gen4.surface_table == offset) 925 return; 926 927 sna->render_state.gen4.surface_table = offset; 928 929 /* Binding table pointers */ 930 OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4); 931 OUT_BATCH(0); /* vs */ 932 OUT_BATCH(0); /* gs */ 933 OUT_BATCH(0); /* clip */ 934 OUT_BATCH(0); /* sf */ 935 /* Only the PS uses the binding table */ 936 OUT_BATCH(offset*4); 937} 938 939static void 940gen4_emit_pipelined_pointers(struct sna *sna, 941 const struct sna_composite_op *op, 942 int blend, int kernel) 943{ 944 uint16_t sp, bp; 945 uint32_t key; 946 947 DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n", 948 __FUNCTION__, op->u.gen4.ve_id & 2, 949 op->src.filter, op->src.repeat, 950 op->mask.filter, op->mask.repeat, 951 kernel, blend, op->has_component_alpha, (int)op->dst.format)); 952 953 sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, 954 op->mask.filter, op->mask.repeat, 955 kernel); 956 bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); 957 958 DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); 959 key = sp | (uint32_t)bp << 16; 960 if (key == sna->render_state.gen4.last_pipelined_pointers) 961 return; 962 963 OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5); 964 OUT_BATCH(sna->render_state.gen4.vs); 965 OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ 966 OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ 967 OUT_BATCH(sna->render_state.gen4.sf); 968 OUT_BATCH(sna->render_state.gen4.wm + sp); 969 OUT_BATCH(sna->render_state.gen4.cc + bp); 970 971 sna->render_state.gen4.last_pipelined_pointers = key; 972 gen4_emit_urb(sna); 973} 974 975static bool 976gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) 977{ 978 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 979 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 980 981 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 982 assert(!too_large(op->dst.width, op->dst.height)); 983 984 if (sna->render_state.gen4.drawrect_limit == limit && 985 sna->render_state.gen4.drawrect_offset == offset) 986 return true; 987 988 sna->render_state.gen4.drawrect_offset = offset; 989 sna->render_state.gen4.drawrect_limit = limit; 990 991 OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 992 OUT_BATCH(0); 993 OUT_BATCH(limit); 994 OUT_BATCH(offset); 995 return false; 996} 997 998static void 999gen4_emit_vertex_elements(struct sna *sna, 1000 const struct sna_composite_op *op) 1001{ 1002 /* 1003 * vertex data in vertex buffer 1004 * position: (x, y) 1005 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1006 * texture coordinate 1 if (has_mask is true): same as above 1007 */ 1008 struct gen4_render_state *render = &sna->render_state.gen4; 1009 uint32_t src_format, dw; 1010 int id = op->u.gen4.ve_id; 1011 1012 if (render->ve_id == id) 1013 return; 1014 render->ve_id = id; 1015 1016 /* The VUE layout 1017 * dword 0-3: position (x, y, 1.0, 1.0), 1018 * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) 1019 * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) 1020 */ 1021 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1)); 1022 1023 /* x,y */ 1024 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1025 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 1026 0 << VE0_OFFSET_SHIFT); 1027 OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 1028 VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 1029 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | 1030 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 1031 (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1032 1033 /* u0, v0, w0 */ 1034 /* u0, v0, w0 */ 1035 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 1036 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1037 switch (id & 3) { 1038 default: 1039 assert(0); 1040 case 0: 1041 src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED; 1042 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1043 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1044 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1045 break; 1046 case 1: 1047 src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1048 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1049 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1050 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1051 break; 1052 case 2: 1053 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1054 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1055 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1056 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1057 break; 1058 case 3: 1059 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1060 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1061 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1062 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1063 break; 1064 } 1065 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1066 src_format << VE0_FORMAT_SHIFT | 1067 4 << VE0_OFFSET_SHIFT); 1068 OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1069 1070 /* u1, v1, w1 */ 1071 if (id >> 2) { 1072 unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1073 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, 1074 id >> 2, src_offset)); 1075 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1076 switch (id >> 2) { 1077 case 1: 1078 src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1079 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1080 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1081 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1082 break; 1083 default: 1084 assert(0); 1085 case 2: 1086 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1087 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1088 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1089 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1090 break; 1091 case 3: 1092 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1093 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1094 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1095 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1096 break; 1097 } 1098 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1099 src_format << VE0_FORMAT_SHIFT | 1100 src_offset << VE0_OFFSET_SHIFT); 1101 OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1102 } else { 1103 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1104 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 1105 0 << VE0_OFFSET_SHIFT); 1106 OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 1107 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 1108 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 1109 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 1110 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1111 } 1112} 1113 1114static void 1115gen4_emit_state(struct sna *sna, 1116 const struct sna_composite_op *op, 1117 uint16_t wm_binding_table) 1118{ 1119 bool flush; 1120 1121 assert(op->dst.bo->exec); 1122 1123 flush = wm_binding_table & 1; 1124 wm_binding_table &= ~1; 1125 1126 if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 1127 DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, 1128 kgem_bo_is_dirty(op->src.bo), 1129 kgem_bo_is_dirty(op->mask.bo), 1130 flush)); 1131 gen4_emit_pipe_invalidate(sna); 1132 kgem_clear_dirty(&sna->kgem); 1133 kgem_bo_mark_dirty(op->dst.bo); 1134 flush = false; 1135 } 1136 flush &= gen4_emit_drawing_rectangle(sna, op); 1137 if (flush && op->op > PictOpSrc) 1138 gen4_emit_pipe_flush(sna); 1139 1140 gen4_emit_binding_table(sna, wm_binding_table); 1141 gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); 1142 gen4_emit_vertex_elements(sna, op); 1143} 1144 1145static void 1146gen4_bind_surfaces(struct sna *sna, 1147 const struct sna_composite_op *op) 1148{ 1149 uint32_t *binding_table; 1150 uint16_t offset, dirty; 1151 1152 gen4_get_batch(sna, op); 1153 dirty = kgem_bo_is_dirty(op->dst.bo); 1154 1155 binding_table = gen4_composite_get_binding_table(sna, &offset); 1156 1157 binding_table[0] = 1158 gen4_bind_bo(sna, 1159 op->dst.bo, op->dst.width, op->dst.height, 1160 gen4_get_dest_format(op->dst.format), 1161 true); 1162 binding_table[1] = 1163 gen4_bind_bo(sna, 1164 op->src.bo, op->src.width, op->src.height, 1165 op->src.card_format, 1166 false); 1167 if (op->mask.bo) { 1168 assert(op->u.gen4.ve_id >> 2); 1169 binding_table[2] = 1170 gen4_bind_bo(sna, 1171 op->mask.bo, 1172 op->mask.width, 1173 op->mask.height, 1174 op->mask.card_format, 1175 false); 1176 } 1177 1178 if (sna->kgem.surface == offset && 1179 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table && 1180 (op->mask.bo == NULL || 1181 sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) { 1182 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1183 offset = sna->render_state.gen4.surface_table; 1184 } 1185 1186 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1187 dirty = 0; 1188 1189 gen4_emit_state(sna, op, offset | dirty); 1190} 1191 1192fastcall static void 1193gen4_render_composite_blt(struct sna *sna, 1194 const struct sna_composite_op *op, 1195 const struct sna_composite_rectangles *r) 1196{ 1197 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", 1198 __FUNCTION__, 1199 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], 1200 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], 1201 r->dst.x, r->dst.y, op->dst.x, op->dst.y, 1202 r->width, r->height)); 1203 1204 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1205 op->prim_emit(sna, op, r); 1206} 1207 1208fastcall static void 1209gen4_render_composite_box(struct sna *sna, 1210 const struct sna_composite_op *op, 1211 const BoxRec *box) 1212{ 1213 struct sna_composite_rectangles r; 1214 1215 DBG((" %s: (%d, %d), (%d, %d)\n", 1216 __FUNCTION__, 1217 box->x1, box->y1, box->x2, box->y2)); 1218 1219 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1220 1221 r.dst.x = box->x1; 1222 r.dst.y = box->y1; 1223 r.width = box->x2 - box->x1; 1224 r.height = box->y2 - box->y1; 1225 r.mask = r.src = r.dst; 1226 1227 op->prim_emit(sna, op, &r); 1228} 1229 1230static void 1231gen4_render_composite_boxes__blt(struct sna *sna, 1232 const struct sna_composite_op *op, 1233 const BoxRec *box, int nbox) 1234{ 1235 DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", 1236 __FUNCTION__, nbox, op->dst.x, op->dst.y, 1237 op->src.offset[0], op->src.offset[1], 1238 op->src.width, op->src.height, 1239 op->mask.offset[0], op->mask.offset[1], 1240 op->mask.width, op->mask.height)); 1241 1242 do { 1243 int nbox_this_time; 1244 1245 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1246 gen4_bind_surfaces); 1247 nbox -= nbox_this_time; 1248 1249 do { 1250 struct sna_composite_rectangles r; 1251 1252 DBG((" %s: (%d, %d), (%d, %d)\n", 1253 __FUNCTION__, 1254 box->x1, box->y1, box->x2, box->y2)); 1255 1256 r.dst.x = box->x1; 1257 r.dst.y = box->y1; 1258 r.width = box->x2 - box->x1; 1259 r.height = box->y2 - box->y1; 1260 r.mask = r.src = r.dst; 1261 op->prim_emit(sna, op, &r); 1262 box++; 1263 } while (--nbox_this_time); 1264 } while (nbox); 1265} 1266 1267static void 1268gen4_render_composite_boxes(struct sna *sna, 1269 const struct sna_composite_op *op, 1270 const BoxRec *box, int nbox) 1271{ 1272 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1273 1274 do { 1275 int nbox_this_time; 1276 float *v; 1277 1278 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1279 gen4_bind_surfaces); 1280 assert(nbox_this_time); 1281 nbox -= nbox_this_time; 1282 1283 v = sna->render.vertices + sna->render.vertex_used; 1284 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1285 1286 op->emit_boxes(op, box, nbox_this_time, v); 1287 box += nbox_this_time; 1288 } while (nbox); 1289} 1290 1291#if !FORCE_FLUSH 1292static void 1293gen4_render_composite_boxes__thread(struct sna *sna, 1294 const struct sna_composite_op *op, 1295 const BoxRec *box, int nbox) 1296{ 1297 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1298 1299 sna_vertex_lock(&sna->render); 1300 do { 1301 int nbox_this_time; 1302 float *v; 1303 1304 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1305 gen4_bind_surfaces); 1306 assert(nbox_this_time); 1307 nbox -= nbox_this_time; 1308 1309 v = sna->render.vertices + sna->render.vertex_used; 1310 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1311 1312 sna_vertex_acquire__locked(&sna->render); 1313 sna_vertex_unlock(&sna->render); 1314 1315 op->emit_boxes(op, box, nbox_this_time, v); 1316 box += nbox_this_time; 1317 1318 sna_vertex_lock(&sna->render); 1319 sna_vertex_release__locked(&sna->render); 1320 } while (nbox); 1321 sna_vertex_unlock(&sna->render); 1322} 1323#endif 1324 1325#ifndef MAX 1326#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1327#endif 1328 1329static uint32_t gen4_bind_video_source(struct sna *sna, 1330 struct kgem_bo *src_bo, 1331 uint32_t src_offset, 1332 int src_width, 1333 int src_height, 1334 int src_pitch, 1335 uint32_t src_surf_format) 1336{ 1337 struct gen4_surface_state *ss; 1338 1339 sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1340 1341 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1342 ss->ss0.surface_type = GEN4_SURFACE_2D; 1343 ss->ss0.surface_format = src_surf_format; 1344 ss->ss0.color_blend = 1; 1345 1346 ss->ss1.base_addr = 1347 kgem_add_reloc(&sna->kgem, 1348 sna->kgem.surface + 1, 1349 src_bo, 1350 I915_GEM_DOMAIN_SAMPLER << 16, 1351 src_offset); 1352 1353 ss->ss2.width = src_width - 1; 1354 ss->ss2.height = src_height - 1; 1355 ss->ss3.pitch = src_pitch - 1; 1356 1357 return sna->kgem.surface * sizeof(uint32_t); 1358} 1359 1360static void gen4_video_bind_surfaces(struct sna *sna, 1361 const struct sna_composite_op *op) 1362{ 1363 struct sna_video_frame *frame = op->priv; 1364 uint32_t src_surf_format[6]; 1365 uint32_t src_surf_base[6]; 1366 int src_width[6]; 1367 int src_height[6]; 1368 int src_pitch[6]; 1369 uint32_t *binding_table; 1370 uint16_t offset, dirty; 1371 int n_src, n; 1372 1373 src_surf_base[0] = 0; 1374 src_surf_base[1] = 0; 1375 src_surf_base[2] = frame->VBufOffset; 1376 src_surf_base[3] = frame->VBufOffset; 1377 src_surf_base[4] = frame->UBufOffset; 1378 src_surf_base[5] = frame->UBufOffset; 1379 1380 if (is_planar_fourcc(frame->id)) { 1381 for (n = 0; n < 2; n++) { 1382 src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM; 1383 src_width[n] = frame->width; 1384 src_height[n] = frame->height; 1385 src_pitch[n] = frame->pitch[1]; 1386 } 1387 for (; n < 6; n++) { 1388 if (is_nv12_fourcc(frame->id)) 1389 src_surf_format[n] = GEN4_SURFACEFORMAT_R8G8_UNORM; 1390 else 1391 src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM; 1392 src_width[n] = frame->width / 2; 1393 src_height[n] = frame->height / 2; 1394 src_pitch[n] = frame->pitch[0]; 1395 } 1396 n_src = 6; 1397 } else { 1398 if (frame->id == FOURCC_UYVY) 1399 src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_SWAPY; 1400 else 1401 src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_NORMAL; 1402 1403 src_width[0] = frame->width; 1404 src_height[0] = frame->height; 1405 src_pitch[0] = frame->pitch[0]; 1406 n_src = 1; 1407 } 1408 1409 gen4_get_batch(sna, op); 1410 dirty = kgem_bo_is_dirty(op->dst.bo); 1411 1412 binding_table = gen4_composite_get_binding_table(sna, &offset); 1413 binding_table[0] = 1414 gen4_bind_bo(sna, 1415 op->dst.bo, op->dst.width, op->dst.height, 1416 gen4_get_dest_format(op->dst.format), 1417 true); 1418 for (n = 0; n < n_src; n++) { 1419 binding_table[1+n] = 1420 gen4_bind_video_source(sna, 1421 frame->bo, 1422 src_surf_base[n], 1423 src_width[n], 1424 src_height[n], 1425 src_pitch[n], 1426 src_surf_format[n]); 1427 } 1428 1429 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1430 dirty = 0; 1431 1432 gen4_emit_state(sna, op, offset | dirty); 1433} 1434 1435static unsigned select_video_kernel(const struct sna_video *video, 1436 const struct sna_video_frame *frame) 1437{ 1438 switch (frame->id) { 1439 case FOURCC_YV12: 1440 case FOURCC_I420: 1441 case FOURCC_XVMC: 1442 return video->colorspace ? 1443 WM_KERNEL_VIDEO_PLANAR_BT709 : 1444 WM_KERNEL_VIDEO_PLANAR_BT601; 1445 1446 case FOURCC_NV12: 1447 return video->colorspace ? 1448 WM_KERNEL_VIDEO_NV12_BT709 : 1449 WM_KERNEL_VIDEO_NV12_BT601; 1450 1451 default: 1452 return video->colorspace ? 1453 WM_KERNEL_VIDEO_PACKED_BT709 : 1454 WM_KERNEL_VIDEO_PACKED_BT601; 1455 } 1456} 1457 1458static bool 1459gen4_render_video(struct sna *sna, 1460 struct sna_video *video, 1461 struct sna_video_frame *frame, 1462 RegionPtr dstRegion, 1463 PixmapPtr pixmap) 1464{ 1465 struct sna_composite_op tmp; 1466 struct sna_pixmap *priv = sna_pixmap(pixmap); 1467 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1468 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1469 int src_width = frame->src.x2 - frame->src.x1; 1470 int src_height = frame->src.y2 - frame->src.y1; 1471 float src_offset_x, src_offset_y; 1472 float src_scale_x, src_scale_y; 1473 const BoxRec *box; 1474 int nbox; 1475 1476 DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, 1477 src_width, src_height, dst_width, dst_height)); 1478 1479 assert(priv->gpu_bo); 1480 memset(&tmp, 0, sizeof(tmp)); 1481 1482 tmp.op = PictOpSrc; 1483 tmp.dst.pixmap = pixmap; 1484 tmp.dst.width = pixmap->drawable.width; 1485 tmp.dst.height = pixmap->drawable.height; 1486 tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); 1487 tmp.dst.bo = priv->gpu_bo; 1488 1489 if (src_width == dst_width && src_height == dst_height) 1490 tmp.src.filter = SAMPLER_FILTER_NEAREST; 1491 else 1492 tmp.src.filter = SAMPLER_FILTER_BILINEAR; 1493 tmp.src.repeat = SAMPLER_EXTEND_PAD; 1494 tmp.src.bo = frame->bo; 1495 tmp.mask.bo = NULL; 1496 tmp.u.gen4.wm_kernel = select_video_kernel(video, frame); 1497 tmp.u.gen4.ve_id = 2; 1498 tmp.is_affine = true; 1499 tmp.floats_per_vertex = 3; 1500 tmp.floats_per_rect = 9; 1501 tmp.priv = frame; 1502 1503 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1504 kgem_submit(&sna->kgem); 1505 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1506 return false; 1507 } 1508 1509 gen4_align_vertex(sna, &tmp); 1510 gen4_video_bind_surfaces(sna, &tmp); 1511 1512 src_scale_x = (float)src_width / dst_width / frame->width; 1513 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1514 1515 src_scale_y = (float)src_height / dst_height / frame->height; 1516 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1517 1518 box = region_rects(dstRegion); 1519 nbox = region_num_rects(dstRegion); 1520 do { 1521 int n; 1522 1523 n = gen4_get_rectangles(sna, &tmp, nbox, 1524 gen4_video_bind_surfaces); 1525 assert(n); 1526 nbox -= n; 1527 1528 do { 1529 OUT_VERTEX(box->x2, box->y2); 1530 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1531 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1532 1533 OUT_VERTEX(box->x1, box->y2); 1534 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1535 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1536 1537 OUT_VERTEX(box->x1, box->y1); 1538 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1539 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1540 1541 box++; 1542 } while (--n); 1543 } while (nbox); 1544 gen4_vertex_flush(sna); 1545 1546 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 1547 sna_damage_add(&priv->gpu_damage, dstRegion); 1548 1549 return true; 1550} 1551 1552static int 1553gen4_composite_picture(struct sna *sna, 1554 PicturePtr picture, 1555 struct sna_composite_channel *channel, 1556 int x, int y, 1557 int w, int h, 1558 int dst_x, int dst_y, 1559 bool precise) 1560{ 1561 PixmapPtr pixmap; 1562 uint32_t color; 1563 int16_t dx, dy; 1564 1565 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1566 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1567 1568 channel->is_solid = false; 1569 channel->card_format = -1; 1570 1571 if (sna_picture_is_solid(picture, &color)) 1572 return gen4_channel_init_solid(sna, channel, color); 1573 1574 if (picture->pDrawable == NULL) { 1575 int ret; 1576 1577 if (picture->pSourcePict->type == SourcePictTypeLinear) 1578 return gen4_channel_init_linear(sna, picture, channel, 1579 x, y, 1580 w, h, 1581 dst_x, dst_y); 1582 1583 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1584 ret = -1; 1585 if (!precise) 1586 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1587 x, y, w, h, dst_x, dst_y); 1588 if (ret == -1) 1589 ret = sna_render_picture_fixup(sna, picture, channel, 1590 x, y, w, h, dst_x, dst_y); 1591 return ret; 1592 } 1593 1594 if (picture->alphaMap) { 1595 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1596 return sna_render_picture_fixup(sna, picture, channel, 1597 x, y, w, h, dst_x, dst_y); 1598 } 1599 1600 if (!gen4_check_repeat(picture)) { 1601 DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__)); 1602 return sna_render_picture_fixup(sna, picture, channel, 1603 x, y, w, h, dst_x, dst_y); 1604 } 1605 1606 if (!gen4_check_filter(picture)) { 1607 DBG(("%s: unhandled filter fixup\n", __FUNCTION__)); 1608 return sna_render_picture_fixup(sna, picture, channel, 1609 x, y, w, h, dst_x, dst_y); 1610 } 1611 1612 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1613 channel->filter = picture->filter; 1614 1615 pixmap = get_drawable_pixmap(picture->pDrawable); 1616 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1617 1618 x += dx + picture->pDrawable->x; 1619 y += dy + picture->pDrawable->y; 1620 1621 channel->is_affine = sna_transform_is_affine(picture->transform); 1622 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1623 DBG(("%s: integer translation (%d, %d), removing\n", 1624 __FUNCTION__, dx, dy)); 1625 x += dx; 1626 y += dy; 1627 channel->transform = NULL; 1628 channel->filter = PictFilterNearest; 1629 1630 if (channel->repeat && 1631 (x >= 0 && 1632 y >= 0 && 1633 x + w <= pixmap->drawable.width && 1634 y + h <= pixmap->drawable.height)) { 1635 struct sna_pixmap *priv = sna_pixmap(pixmap); 1636 if (priv && priv->clear) { 1637 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1638 return gen4_channel_init_solid(sna, channel, 1639 solid_color(picture->format, 1640 priv->clear_color)); 1641 } 1642 } 1643 } else 1644 channel->transform = picture->transform; 1645 1646 channel->pict_format = picture->format; 1647 channel->card_format = gen4_get_card_format(picture->format); 1648 if (channel->card_format == -1) 1649 return sna_render_picture_convert(sna, picture, channel, pixmap, 1650 x, y, w, h, dst_x, dst_y, 1651 false); 1652 1653 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1654 return sna_render_picture_extract(sna, picture, channel, 1655 x, y, w, h, dst_x, dst_y); 1656 1657 return sna_render_pixmap_bo(sna, channel, pixmap, 1658 x, y, w, h, dst_x, dst_y); 1659} 1660 1661static void gen4_composite_channel_convert(struct sna_composite_channel *channel) 1662{ 1663 DBG(("%s: repeat %d -> %d, filter %d -> %d\n", 1664 __FUNCTION__, 1665 channel->repeat, gen4_repeat(channel->repeat), 1666 channel->filter, gen4_repeat(channel->filter))); 1667 channel->repeat = gen4_repeat(channel->repeat); 1668 channel->filter = gen4_filter(channel->filter); 1669 if (channel->card_format == (unsigned)-1) 1670 channel->card_format = gen4_get_card_format(channel->pict_format); 1671} 1672 1673static void 1674gen4_render_composite_done(struct sna *sna, 1675 const struct sna_composite_op *op) 1676{ 1677 DBG(("%s()\n", __FUNCTION__)); 1678 1679 if (sna->render.vertex_offset) { 1680 gen4_vertex_flush(sna); 1681 gen4_magic_ca_pass(sna, op); 1682 } 1683 1684 if (op->mask.bo) 1685 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1686 if (op->src.bo) 1687 kgem_bo_destroy(&sna->kgem, op->src.bo); 1688 1689 sna_render_composite_redirect_done(sna, op); 1690} 1691 1692static bool 1693gen4_composite_set_target(struct sna *sna, 1694 struct sna_composite_op *op, 1695 PicturePtr dst, 1696 int x, int y, int w, int h, 1697 bool partial) 1698{ 1699 BoxRec box; 1700 unsigned hint; 1701 1702 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1703 op->dst.width = op->dst.pixmap->drawable.width; 1704 op->dst.height = op->dst.pixmap->drawable.height; 1705 op->dst.format = dst->format; 1706 if (w && h) { 1707 box.x1 = x; 1708 box.y1 = y; 1709 box.x2 = x + w; 1710 box.y2 = y + h; 1711 } else 1712 sna_render_picture_extents(dst, &box); 1713 1714 hint = PREFER_GPU | RENDER_GPU; 1715 if (!need_tiling(sna, op->dst.width, op->dst.height)) 1716 hint |= FORCE_GPU; 1717 if (!partial) { 1718 hint |= IGNORE_DAMAGE; 1719 if (w == op->dst.width && h == op->dst.height) 1720 hint |= REPLACES; 1721 } 1722 1723 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1724 if (op->dst.bo == NULL) 1725 return false; 1726 1727 if (hint & REPLACES) { 1728 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1729 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1730 } 1731 1732 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1733 &op->dst.x, &op->dst.y); 1734 1735 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1736 __FUNCTION__, 1737 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1738 op->dst.width, op->dst.height, 1739 op->dst.bo->pitch, 1740 op->dst.x, op->dst.y, 1741 op->damage ? *op->damage : (void *)-1)); 1742 1743 assert(op->dst.bo->proxy == NULL); 1744 1745 if (too_large(op->dst.width, op->dst.height) && 1746 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1747 return false; 1748 1749 return true; 1750} 1751 1752static bool 1753check_gradient(PicturePtr picture, bool precise) 1754{ 1755 switch (picture->pSourcePict->type) { 1756 case SourcePictTypeSolidFill: 1757 case SourcePictTypeLinear: 1758 return false; 1759 default: 1760 return precise; 1761 } 1762} 1763 1764static bool 1765has_alphamap(PicturePtr p) 1766{ 1767 return p->alphaMap != NULL; 1768} 1769 1770static bool 1771need_upload(struct sna *sna, PicturePtr p) 1772{ 1773 return p->pDrawable && untransformed(p) && 1774 !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); 1775} 1776 1777static bool 1778source_is_busy(PixmapPtr pixmap) 1779{ 1780 struct sna_pixmap *priv = sna_pixmap(pixmap); 1781 if (priv == NULL) 1782 return false; 1783 1784 if (priv->clear) 1785 return false; 1786 1787 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1788 return true; 1789 1790 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1791 return true; 1792 1793 return priv->gpu_damage && !priv->cpu_damage; 1794} 1795 1796static bool 1797source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) 1798{ 1799 if (sna_picture_is_solid(p, NULL)) 1800 return false; 1801 1802 if (p->pSourcePict) 1803 return check_gradient(p, precise); 1804 1805 if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) 1806 return true; 1807 1808 /* soft errors: perfer to upload/compute rather than readback */ 1809 if (pixmap && source_is_busy(pixmap)) 1810 return false; 1811 1812 return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); 1813} 1814 1815static bool 1816gen4_composite_fallback(struct sna *sna, 1817 PicturePtr src, 1818 PicturePtr mask, 1819 PicturePtr dst) 1820{ 1821 PixmapPtr src_pixmap; 1822 PixmapPtr mask_pixmap; 1823 PixmapPtr dst_pixmap; 1824 bool src_fallback, mask_fallback; 1825 1826 if (!gen4_check_dst_format(dst->format)) { 1827 DBG(("%s: unknown destination format: %d\n", 1828 __FUNCTION__, dst->format)); 1829 return true; 1830 } 1831 1832 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1833 1834 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1835 src_fallback = source_fallback(sna, src, src_pixmap, 1836 dst->polyMode == PolyModePrecise); 1837 1838 if (mask) { 1839 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1840 mask_fallback = source_fallback(sna, mask, mask_pixmap, 1841 dst->polyMode == PolyModePrecise); 1842 } else { 1843 mask_pixmap = NULL; 1844 mask_fallback = false; 1845 } 1846 1847 /* If we are using the destination as a source and need to 1848 * readback in order to upload the source, do it all 1849 * on the cpu. 1850 */ 1851 if (src_pixmap == dst_pixmap && src_fallback) { 1852 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1853 return true; 1854 } 1855 if (mask_pixmap == dst_pixmap && mask_fallback) { 1856 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1857 return true; 1858 } 1859 1860 /* If anything is on the GPU, push everything out to the GPU */ 1861 if (dst_use_gpu(dst_pixmap)) { 1862 DBG(("%s: dst is already on the GPU, try to use GPU\n", 1863 __FUNCTION__)); 1864 return false; 1865 } 1866 1867 if (src_pixmap && !src_fallback) { 1868 DBG(("%s: src is already on the GPU, try to use GPU\n", 1869 __FUNCTION__)); 1870 return false; 1871 } 1872 if (mask_pixmap && !mask_fallback) { 1873 DBG(("%s: mask is already on the GPU, try to use GPU\n", 1874 __FUNCTION__)); 1875 return false; 1876 } 1877 1878 /* However if the dst is not on the GPU and we need to 1879 * render one of the sources using the CPU, we may 1880 * as well do the entire operation in place onthe CPU. 1881 */ 1882 if (src_fallback) { 1883 DBG(("%s: dst is on the CPU and src will fallback\n", 1884 __FUNCTION__)); 1885 return true; 1886 } 1887 1888 if (mask_fallback) { 1889 DBG(("%s: dst is on the CPU and mask will fallback\n", 1890 __FUNCTION__)); 1891 return true; 1892 } 1893 1894 if (too_large(dst_pixmap->drawable.width, 1895 dst_pixmap->drawable.height) && 1896 dst_is_cpu(dst_pixmap)) { 1897 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1898 return true; 1899 } 1900 1901 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1902 __FUNCTION__)); 1903 return dst_use_cpu(dst_pixmap); 1904} 1905 1906static int 1907reuse_source(struct sna *sna, 1908 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1909 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1910{ 1911 uint32_t color; 1912 1913 if (src_x != msk_x || src_y != msk_y) 1914 return false; 1915 1916 if (src == mask) { 1917 DBG(("%s: mask is source\n", __FUNCTION__)); 1918 *mc = *sc; 1919 mc->bo = kgem_bo_reference(mc->bo); 1920 return true; 1921 } 1922 1923 if (sna_picture_is_solid(mask, &color)) 1924 return gen4_channel_init_solid(sna, mc, color); 1925 1926 if (sc->is_solid) 1927 return false; 1928 1929 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1930 return false; 1931 1932 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1933 1934 if (!sna_transform_equal(src->transform, mask->transform)) 1935 return false; 1936 1937 if (!sna_picture_alphamap_equal(src, mask)) 1938 return false; 1939 1940 if (!gen4_check_repeat(mask)) 1941 return false; 1942 1943 if (!gen4_check_filter(mask)) 1944 return false; 1945 1946 if (!gen4_check_format(mask->format)) 1947 return false; 1948 1949 DBG(("%s: reusing source channel for mask with a twist\n", 1950 __FUNCTION__)); 1951 1952 *mc = *sc; 1953 mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone); 1954 mc->filter = gen4_filter(mask->filter); 1955 mc->pict_format = mask->format; 1956 mc->card_format = gen4_get_card_format(mask->format); 1957 mc->bo = kgem_bo_reference(mc->bo); 1958 return true; 1959} 1960 1961static bool 1962gen4_render_composite(struct sna *sna, 1963 uint8_t op, 1964 PicturePtr src, 1965 PicturePtr mask, 1966 PicturePtr dst, 1967 int16_t src_x, int16_t src_y, 1968 int16_t msk_x, int16_t msk_y, 1969 int16_t dst_x, int16_t dst_y, 1970 int16_t width, int16_t height, 1971 unsigned flags, 1972 struct sna_composite_op *tmp) 1973{ 1974 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 1975 width, height, sna->kgem.mode)); 1976 1977 if (op >= ARRAY_SIZE(gen4_blend_op)) 1978 return false; 1979 1980 if (mask == NULL && 1981 sna_blt_composite(sna, op, 1982 src, dst, 1983 src_x, src_y, 1984 dst_x, dst_y, 1985 width, height, 1986 flags, tmp)) 1987 return true; 1988 1989 if (gen4_composite_fallback(sna, src, mask, dst)) 1990 goto fallback; 1991 1992 if (need_tiling(sna, width, height)) 1993 return sna_tiling_composite(op, src, mask, dst, 1994 src_x, src_y, 1995 msk_x, msk_y, 1996 dst_x, dst_y, 1997 width, height, 1998 tmp); 1999 2000 if (!gen4_composite_set_target(sna, tmp, dst, 2001 dst_x, dst_y, width, height, 2002 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 2003 DBG(("%s: failed to set composite target\n", __FUNCTION__)); 2004 goto fallback; 2005 } 2006 2007 tmp->op = op; 2008 switch (gen4_composite_picture(sna, src, &tmp->src, 2009 src_x, src_y, 2010 width, height, 2011 dst_x, dst_y, 2012 dst->polyMode == PolyModePrecise)) { 2013 case -1: 2014 DBG(("%s: failed to prepare source\n", __FUNCTION__)); 2015 goto cleanup_dst; 2016 case 0: 2017 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2018 goto cleanup_dst; 2019 /* fall through to fixup */ 2020 case 1: 2021 if (mask == NULL && 2022 sna_blt_composite__convert(sna, 2023 dst_x, dst_y, width, height, 2024 tmp)) 2025 return true; 2026 2027 gen4_composite_channel_convert(&tmp->src); 2028 break; 2029 } 2030 2031 tmp->is_affine = tmp->src.is_affine; 2032 tmp->has_component_alpha = false; 2033 tmp->need_magic_ca_pass = false; 2034 2035 if (mask) { 2036 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2037 tmp->has_component_alpha = true; 2038 2039 /* Check if it's component alpha that relies on a source alpha and on 2040 * the source value. We can only get one of those into the single 2041 * source value that we get to blend with. 2042 */ 2043 if (gen4_blend_op[op].src_alpha && 2044 (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { 2045 if (op != PictOpOver) { 2046 DBG(("%s -- fallback: unhandled component alpha blend\n", 2047 __FUNCTION__)); 2048 2049 goto cleanup_src; 2050 } 2051 2052 tmp->need_magic_ca_pass = true; 2053 tmp->op = PictOpOutReverse; 2054 } 2055 } 2056 2057 if (!reuse_source(sna, 2058 src, &tmp->src, src_x, src_y, 2059 mask, &tmp->mask, msk_x, msk_y)) { 2060 switch (gen4_composite_picture(sna, mask, &tmp->mask, 2061 msk_x, msk_y, 2062 width, height, 2063 dst_x, dst_y, 2064 dst->polyMode == PolyModePrecise)) { 2065 case -1: 2066 DBG(("%s: failed to prepare mask\n", __FUNCTION__)); 2067 goto cleanup_src; 2068 case 0: 2069 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2070 goto cleanup_src; 2071 /* fall through to fixup */ 2072 case 1: 2073 gen4_composite_channel_convert(&tmp->mask); 2074 break; 2075 } 2076 } 2077 2078 tmp->is_affine &= tmp->mask.is_affine; 2079 } 2080 2081 tmp->u.gen4.wm_kernel = 2082 gen4_choose_composite_kernel(tmp->op, 2083 tmp->mask.bo != NULL, 2084 tmp->has_component_alpha, 2085 tmp->is_affine); 2086 tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); 2087 2088 tmp->blt = gen4_render_composite_blt; 2089 tmp->box = gen4_render_composite_box; 2090 tmp->boxes = gen4_render_composite_boxes__blt; 2091 if (tmp->emit_boxes) { 2092 tmp->boxes = gen4_render_composite_boxes; 2093#if !FORCE_FLUSH 2094 tmp->thread_boxes = gen4_render_composite_boxes__thread; 2095#endif 2096 } 2097 tmp->done = gen4_render_composite_done; 2098 2099 if (!kgem_check_bo(&sna->kgem, 2100 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2101 NULL)) { 2102 kgem_submit(&sna->kgem); 2103 if (!kgem_check_bo(&sna->kgem, 2104 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2105 NULL)) 2106 goto cleanup_mask; 2107 } 2108 2109 gen4_align_vertex(sna, tmp); 2110 gen4_bind_surfaces(sna, tmp); 2111 return true; 2112 2113cleanup_mask: 2114 if (tmp->mask.bo) { 2115 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2116 tmp->mask.bo = NULL; 2117 } 2118cleanup_src: 2119 if (tmp->src.bo) { 2120 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2121 tmp->src.bo = NULL; 2122 } 2123cleanup_dst: 2124 if (tmp->redirect.real_bo) { 2125 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2126 tmp->redirect.real_bo = NULL; 2127 } 2128fallback: 2129 return (mask == NULL && 2130 sna_blt_composite(sna, op, 2131 src, dst, 2132 src_x, src_y, 2133 dst_x, dst_y, 2134 width, height, 2135 flags | COMPOSITE_FALLBACK, tmp)); 2136} 2137 2138#if !NO_COMPOSITE_SPANS 2139fastcall static void 2140gen4_render_composite_spans_box(struct sna *sna, 2141 const struct sna_composite_spans_op *op, 2142 const BoxRec *box, float opacity) 2143{ 2144 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2145 __FUNCTION__, 2146 op->base.src.offset[0], op->base.src.offset[1], 2147 opacity, 2148 op->base.dst.x, op->base.dst.y, 2149 box->x1, box->y1, 2150 box->x2 - box->x1, 2151 box->y2 - box->y1)); 2152 2153 gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces); 2154 op->prim_emit(sna, op, box, opacity); 2155} 2156 2157static void 2158gen4_render_composite_spans_boxes(struct sna *sna, 2159 const struct sna_composite_spans_op *op, 2160 const BoxRec *box, int nbox, 2161 float opacity) 2162{ 2163 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2164 __FUNCTION__, nbox, 2165 op->base.src.offset[0], op->base.src.offset[1], 2166 opacity, 2167 op->base.dst.x, op->base.dst.y)); 2168 2169 do { 2170 int nbox_this_time; 2171 2172 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2173 gen4_bind_surfaces); 2174 nbox -= nbox_this_time; 2175 2176 do { 2177 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2178 box->x1, box->y1, 2179 box->x2 - box->x1, 2180 box->y2 - box->y1)); 2181 2182 op->prim_emit(sna, op, box++, opacity); 2183 } while (--nbox_this_time); 2184 } while (nbox); 2185} 2186 2187fastcall static void 2188gen4_render_composite_spans_boxes__thread(struct sna *sna, 2189 const struct sna_composite_spans_op *op, 2190 const struct sna_opacity_box *box, 2191 int nbox) 2192{ 2193 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2194 __FUNCTION__, nbox, 2195 op->base.src.offset[0], op->base.src.offset[1], 2196 op->base.dst.x, op->base.dst.y)); 2197 assert(nbox); 2198 2199 sna_vertex_lock(&sna->render); 2200 do { 2201 int nbox_this_time; 2202 float *v; 2203 2204 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2205 gen4_bind_surfaces); 2206 assert(nbox_this_time); 2207 nbox -= nbox_this_time; 2208 2209 v = sna->render.vertices + sna->render.vertex_used; 2210 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2211 2212 sna_vertex_acquire__locked(&sna->render); 2213 sna_vertex_unlock(&sna->render); 2214 2215 op->emit_boxes(op, box, nbox_this_time, v); 2216 box += nbox_this_time; 2217 2218 sna_vertex_lock(&sna->render); 2219 sna_vertex_release__locked(&sna->render); 2220 } while (nbox); 2221 sna_vertex_unlock(&sna->render); 2222} 2223 2224fastcall static void 2225gen4_render_composite_spans_done(struct sna *sna, 2226 const struct sna_composite_spans_op *op) 2227{ 2228 if (sna->render.vertex_offset) 2229 gen4_vertex_flush(sna); 2230 2231 DBG(("%s()\n", __FUNCTION__)); 2232 2233 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2234 sna_render_composite_redirect_done(sna, &op->base); 2235} 2236 2237static bool 2238gen4_check_composite_spans(struct sna *sna, 2239 uint8_t op, PicturePtr src, PicturePtr dst, 2240 int16_t width, int16_t height, 2241 unsigned flags) 2242{ 2243 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2244 __FUNCTION__, op, width, height, flags)); 2245 2246 if (op >= ARRAY_SIZE(gen4_blend_op)) 2247 return false; 2248 2249 if (gen4_composite_fallback(sna, src, NULL, dst)) { 2250 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2251 return false; 2252 } 2253 2254 if (need_tiling(sna, width, height) && 2255 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2256 DBG(("%s: fallback, tiled operation not on GPU\n", 2257 __FUNCTION__)); 2258 return false; 2259 } 2260 2261 if (FORCE_SPANS) 2262 return FORCE_SPANS > 0; 2263 2264 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2265 struct sna_pixmap *priv; 2266 2267 if (FORCE_NONRECTILINEAR_SPANS) 2268 return FORCE_NONRECTILINEAR_SPANS > 0; 2269 2270 if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0) 2271 return false; 2272 2273 priv = sna_pixmap_from_drawable(dst->pDrawable); 2274 assert(priv); 2275 2276 if (priv->cpu_bo && 2277 __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) 2278 return true; 2279 2280 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2281 return false; 2282 2283 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2284 } 2285 2286 return true; 2287} 2288 2289static bool 2290gen4_render_composite_spans(struct sna *sna, 2291 uint8_t op, 2292 PicturePtr src, 2293 PicturePtr dst, 2294 int16_t src_x, int16_t src_y, 2295 int16_t dst_x, int16_t dst_y, 2296 int16_t width, int16_t height, 2297 unsigned flags, 2298 struct sna_composite_spans_op *tmp) 2299{ 2300 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2301 width, height, flags, sna->kgem.ring)); 2302 2303 assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags)); 2304 2305 if (need_tiling(sna, width, height)) { 2306 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2307 __FUNCTION__, width, height)); 2308 return sna_tiling_composite_spans(op, src, dst, 2309 src_x, src_y, dst_x, dst_y, 2310 width, height, flags, tmp); 2311 } 2312 2313 tmp->base.op = op; 2314 if (!gen4_composite_set_target(sna, &tmp->base, dst, 2315 dst_x, dst_y, width, height, true)) 2316 return false; 2317 2318 switch (gen4_composite_picture(sna, src, &tmp->base.src, 2319 src_x, src_y, 2320 width, height, 2321 dst_x, dst_y, 2322 dst->polyMode == PolyModePrecise)) { 2323 case -1: 2324 goto cleanup_dst; 2325 case 0: 2326 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2327 goto cleanup_dst; 2328 /* fall through to fixup */ 2329 case 1: 2330 gen4_composite_channel_convert(&tmp->base.src); 2331 break; 2332 } 2333 2334 tmp->base.mask.bo = NULL; 2335 tmp->base.mask.filter = SAMPLER_FILTER_NEAREST; 2336 tmp->base.mask.repeat = SAMPLER_EXTEND_NONE; 2337 2338 tmp->base.is_affine = tmp->base.src.is_affine; 2339 tmp->base.has_component_alpha = false; 2340 tmp->base.need_magic_ca_pass = false; 2341 2342 tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp); 2343 tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; 2344 2345 tmp->box = gen4_render_composite_spans_box; 2346 tmp->boxes = gen4_render_composite_spans_boxes; 2347 if (tmp->emit_boxes) 2348 tmp->thread_boxes = gen4_render_composite_spans_boxes__thread; 2349 tmp->done = gen4_render_composite_spans_done; 2350 2351 if (!kgem_check_bo(&sna->kgem, 2352 tmp->base.dst.bo, tmp->base.src.bo, 2353 NULL)) { 2354 kgem_submit(&sna->kgem); 2355 if (!kgem_check_bo(&sna->kgem, 2356 tmp->base.dst.bo, tmp->base.src.bo, 2357 NULL)) 2358 goto cleanup_src; 2359 } 2360 2361 gen4_align_vertex(sna, &tmp->base); 2362 gen4_bind_surfaces(sna, &tmp->base); 2363 return true; 2364 2365cleanup_src: 2366 if (tmp->base.src.bo) 2367 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2368cleanup_dst: 2369 if (tmp->base.redirect.real_bo) 2370 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2371 return false; 2372} 2373#endif 2374 2375static void 2376gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) 2377{ 2378 uint32_t *binding_table; 2379 uint16_t offset, dirty; 2380 2381 gen4_get_batch(sna, op); 2382 dirty = kgem_bo_is_dirty(op->dst.bo); 2383 2384 binding_table = gen4_composite_get_binding_table(sna, &offset); 2385 2386 binding_table[0] = 2387 gen4_bind_bo(sna, 2388 op->dst.bo, op->dst.width, op->dst.height, 2389 gen4_get_dest_format(op->dst.format), 2390 true); 2391 binding_table[1] = 2392 gen4_bind_bo(sna, 2393 op->src.bo, op->src.width, op->src.height, 2394 op->src.card_format, 2395 false); 2396 2397 if (sna->kgem.surface == offset && 2398 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { 2399 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 2400 offset = sna->render_state.gen4.surface_table; 2401 } 2402 2403 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 2404 dirty = 0; 2405 2406 gen4_emit_state(sna, op, offset | dirty); 2407} 2408 2409static void 2410gen4_render_copy_one(struct sna *sna, 2411 const struct sna_composite_op *op, 2412 int sx, int sy, 2413 int w, int h, 2414 int dx, int dy) 2415{ 2416 gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces); 2417 2418 OUT_VERTEX(dx+w, dy+h); 2419 OUT_VERTEX_F((sx+w)*op->src.scale[0]); 2420 OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2421 2422 OUT_VERTEX(dx, dy+h); 2423 OUT_VERTEX_F(sx*op->src.scale[0]); 2424 OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2425 2426 OUT_VERTEX(dx, dy); 2427 OUT_VERTEX_F(sx*op->src.scale[0]); 2428 OUT_VERTEX_F(sy*op->src.scale[1]); 2429} 2430 2431static bool 2432gen4_render_copy_boxes(struct sna *sna, uint8_t alu, 2433 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2434 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2435 const BoxRec *box, int n, unsigned flags) 2436{ 2437 struct sna_composite_op tmp; 2438 2439 DBG(("%s x %d\n", __FUNCTION__, n)); 2440 2441 if (sna_blt_compare_depth(src, dst) && 2442 sna_blt_copy_boxes(sna, alu, 2443 src_bo, src_dx, src_dy, 2444 dst_bo, dst_dx, dst_dy, 2445 dst->bitsPerPixel, 2446 box, n)) 2447 return true; 2448 2449 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { 2450fallback_blt: 2451 if (!sna_blt_compare_depth(src, dst)) 2452 return false; 2453 2454 return sna_blt_copy_boxes_fallback(sna, alu, 2455 src, src_bo, src_dx, src_dy, 2456 dst, dst_bo, dst_dx, dst_dy, 2457 box, n); 2458 } 2459 2460 memset(&tmp, 0, sizeof(tmp)); 2461 2462 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 2463 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 2464 2465 if (dst->depth == src->depth) { 2466 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2467 tmp.src.pict_format = tmp.dst.format; 2468 } else { 2469 tmp.dst.format = sna_format_for_depth(dst->depth); 2470 tmp.src.pict_format = sna_format_for_depth(src->depth); 2471 } 2472 if (!gen4_check_format(tmp.src.pict_format)) 2473 goto fallback_blt; 2474 2475 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2476 2477 tmp.dst.pixmap = (PixmapPtr)dst; 2478 tmp.dst.width = dst->width; 2479 tmp.dst.height = dst->height; 2480 tmp.dst.x = tmp.dst.y = 0; 2481 tmp.dst.bo = dst_bo; 2482 tmp.damage = NULL; 2483 2484 sna_render_composite_redirect_init(&tmp); 2485 if (too_large(tmp.dst.width, tmp.dst.height)) { 2486 BoxRec extents = box[0]; 2487 int i; 2488 2489 for (i = 1; i < n; i++) { 2490 if (box[i].x1 < extents.x1) 2491 extents.x1 = box[i].x1; 2492 if (box[i].y1 < extents.y1) 2493 extents.y1 = box[i].y1; 2494 2495 if (box[i].x2 > extents.x2) 2496 extents.x2 = box[i].x2; 2497 if (box[i].y2 > extents.y2) 2498 extents.y2 = box[i].y2; 2499 } 2500 if (!sna_render_composite_redirect(sna, &tmp, 2501 extents.x1 + dst_dx, 2502 extents.y1 + dst_dy, 2503 extents.x2 - extents.x1, 2504 extents.y2 - extents.y1, 2505 n > 1)) 2506 goto fallback_tiled; 2507 } 2508 2509 tmp.src.filter = SAMPLER_FILTER_NEAREST; 2510 tmp.src.repeat = SAMPLER_EXTEND_NONE; 2511 tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format); 2512 if (too_large(src->width, src->height)) { 2513 BoxRec extents = box[0]; 2514 int i; 2515 2516 for (i = 1; i < n; i++) { 2517 if (box[i].x1 < extents.x1) 2518 extents.x1 = box[i].x1; 2519 if (box[i].y1 < extents.y1) 2520 extents.y1 = box[i].y1; 2521 2522 if (box[i].x2 > extents.x2) 2523 extents.x2 = box[i].x2; 2524 if (box[i].y2 > extents.y2) 2525 extents.y2 = box[i].y2; 2526 } 2527 2528 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2529 extents.x1 + src_dx, 2530 extents.y1 + src_dy, 2531 extents.x2 - extents.x1, 2532 extents.y2 - extents.y1)) 2533 goto fallback_tiled_dst; 2534 } else { 2535 tmp.src.bo = kgem_bo_reference(src_bo); 2536 tmp.src.width = src->width; 2537 tmp.src.height = src->height; 2538 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2539 tmp.src.scale[0] = 1.f/src->width; 2540 tmp.src.scale[1] = 1.f/src->height; 2541 } 2542 2543 tmp.is_affine = true; 2544 tmp.floats_per_vertex = 3; 2545 tmp.floats_per_rect = 9; 2546 tmp.u.gen4.wm_kernel = WM_KERNEL; 2547 tmp.u.gen4.ve_id = 2; 2548 2549 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2550 kgem_submit(&sna->kgem); 2551 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2552 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2553 if (tmp.redirect.real_bo) 2554 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2555 2556 goto fallback_blt; 2557 } 2558 } 2559 2560 dst_dx += tmp.dst.x; 2561 dst_dy += tmp.dst.y; 2562 tmp.dst.x = tmp.dst.y = 0; 2563 2564 src_dx += tmp.src.offset[0]; 2565 src_dy += tmp.src.offset[1]; 2566 2567 gen4_align_vertex(sna, &tmp); 2568 gen4_copy_bind_surfaces(sna, &tmp); 2569 2570 do { 2571 gen4_render_copy_one(sna, &tmp, 2572 box->x1 + src_dx, box->y1 + src_dy, 2573 box->x2 - box->x1, box->y2 - box->y1, 2574 box->x1 + dst_dx, box->y1 + dst_dy); 2575 box++; 2576 } while (--n); 2577 2578 gen4_vertex_flush(sna); 2579 sna_render_composite_redirect_done(sna, &tmp); 2580 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2581 return true; 2582 2583fallback_tiled_dst: 2584 if (tmp.redirect.real_bo) 2585 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2586fallback_tiled: 2587 if (sna_blt_compare_depth(src, dst) && 2588 sna_blt_copy_boxes(sna, alu, 2589 src_bo, src_dx, src_dy, 2590 dst_bo, dst_dx, dst_dy, 2591 dst->bitsPerPixel, 2592 box, n)) 2593 return true; 2594 2595 return sna_tiling_copy_boxes(sna, alu, 2596 src, src_bo, src_dx, src_dy, 2597 dst, dst_bo, dst_dx, dst_dy, 2598 box, n); 2599} 2600 2601static void 2602gen4_render_copy_blt(struct sna *sna, 2603 const struct sna_copy_op *op, 2604 int16_t sx, int16_t sy, 2605 int16_t w, int16_t h, 2606 int16_t dx, int16_t dy) 2607{ 2608 gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy); 2609} 2610 2611static void 2612gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2613{ 2614 if (sna->render.vertex_offset) 2615 gen4_vertex_flush(sna); 2616} 2617 2618static bool 2619gen4_render_copy(struct sna *sna, uint8_t alu, 2620 PixmapPtr src, struct kgem_bo *src_bo, 2621 PixmapPtr dst, struct kgem_bo *dst_bo, 2622 struct sna_copy_op *op) 2623{ 2624 DBG(("%s: src=%ld, dst=%ld, alu=%d\n", 2625 __FUNCTION__, 2626 src->drawable.serialNumber, 2627 dst->drawable.serialNumber, 2628 alu)); 2629 2630 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2631 sna_blt_copy(sna, alu, 2632 src_bo, dst_bo, 2633 dst->drawable.bitsPerPixel, 2634 op)) 2635 return true; 2636 2637 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2638 too_large(src->drawable.width, src->drawable.height) || 2639 too_large(dst->drawable.width, dst->drawable.height)) { 2640fallback: 2641 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2642 return false; 2643 2644 return sna_blt_copy(sna, alu, src_bo, dst_bo, 2645 dst->drawable.bitsPerPixel, 2646 op); 2647 } 2648 2649 if (dst->drawable.depth == src->drawable.depth) { 2650 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2651 op->base.src.pict_format = op->base.dst.format; 2652 } else { 2653 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2654 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 2655 } 2656 if (!gen4_check_format(op->base.src.pict_format)) 2657 goto fallback; 2658 2659 op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2660 2661 op->base.dst.pixmap = dst; 2662 op->base.dst.width = dst->drawable.width; 2663 op->base.dst.height = dst->drawable.height; 2664 op->base.dst.bo = dst_bo; 2665 2666 op->base.src.bo = src_bo; 2667 op->base.src.card_format = 2668 gen4_get_card_format(op->base.src.pict_format); 2669 op->base.src.width = src->drawable.width; 2670 op->base.src.height = src->drawable.height; 2671 op->base.src.scale[0] = 1.f/src->drawable.width; 2672 op->base.src.scale[1] = 1.f/src->drawable.height; 2673 op->base.src.filter = SAMPLER_FILTER_NEAREST; 2674 op->base.src.repeat = SAMPLER_EXTEND_NONE; 2675 2676 op->base.is_affine = true; 2677 op->base.floats_per_vertex = 3; 2678 op->base.floats_per_rect = 9; 2679 op->base.u.gen4.wm_kernel = WM_KERNEL; 2680 op->base.u.gen4.ve_id = 2; 2681 2682 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2683 kgem_submit(&sna->kgem); 2684 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 2685 goto fallback; 2686 } 2687 2688 if (kgem_bo_is_dirty(src_bo)) { 2689 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2690 sna_blt_copy(sna, alu, 2691 src_bo, dst_bo, 2692 dst->drawable.bitsPerPixel, 2693 op)) 2694 return true; 2695 } 2696 2697 gen4_align_vertex(sna, &op->base); 2698 gen4_copy_bind_surfaces(sna, &op->base); 2699 2700 op->blt = gen4_render_copy_blt; 2701 op->done = gen4_render_copy_done; 2702 return true; 2703} 2704 2705static void 2706gen4_render_fill_rectangle(struct sna *sna, 2707 const struct sna_composite_op *op, 2708 int x, int y, int w, int h) 2709{ 2710 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 2711 2712 OUT_VERTEX(x+w, y+h); 2713 OUT_VERTEX_F(.5); 2714 2715 OUT_VERTEX(x, y+h); 2716 OUT_VERTEX_F(.5); 2717 2718 OUT_VERTEX(x, y); 2719 OUT_VERTEX_F(.5); 2720} 2721 2722static bool 2723gen4_render_fill_boxes(struct sna *sna, 2724 CARD8 op, 2725 PictFormat format, 2726 const xRenderColor *color, 2727 const DrawableRec *dst, struct kgem_bo *dst_bo, 2728 const BoxRec *box, int n) 2729{ 2730 struct sna_composite_op tmp; 2731 uint32_t pixel; 2732 2733 if (op >= ARRAY_SIZE(gen4_blend_op)) { 2734 DBG(("%s: fallback due to unhandled blend op: %d\n", 2735 __FUNCTION__, op)); 2736 return false; 2737 } 2738 2739 if (op <= PictOpSrc) { 2740 uint8_t alu = GXinvalid; 2741 2742 pixel = 0; 2743 if (op == PictOpClear) 2744 alu = GXclear; 2745 else if (sna_get_pixel_from_rgba(&pixel, 2746 color->red, 2747 color->green, 2748 color->blue, 2749 color->alpha, 2750 format)) 2751 alu = GXcopy; 2752 2753 if (alu != GXinvalid && 2754 sna_blt_fill_boxes(sna, alu, 2755 dst_bo, dst->bitsPerPixel, 2756 pixel, box, n)) 2757 return true; 2758 2759 if (!gen4_check_dst_format(format)) 2760 return false; 2761 2762 if (too_large(dst->width, dst->height)) 2763 return sna_tiling_fill_boxes(sna, op, format, color, 2764 dst, dst_bo, box, n); 2765 } 2766 2767 if (op == PictOpClear) { 2768 pixel = 0; 2769 op = PictOpSrc; 2770 } else if (!sna_get_pixel_from_rgba(&pixel, 2771 color->red, 2772 color->green, 2773 color->blue, 2774 color->alpha, 2775 PICT_a8r8g8b8)) 2776 return false; 2777 2778 DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n)); 2779 2780 memset(&tmp, 0, sizeof(tmp)); 2781 2782 tmp.op = op; 2783 2784 tmp.dst.pixmap = (PixmapPtr)dst; 2785 tmp.dst.width = dst->width; 2786 tmp.dst.height = dst->height; 2787 tmp.dst.format = format; 2788 tmp.dst.bo = dst_bo; 2789 2790 sna_render_composite_redirect_init(&tmp); 2791 if (too_large(dst->width, dst->height)) { 2792 BoxRec extents; 2793 2794 boxes_extents(box, n, &extents); 2795 if (!sna_render_composite_redirect(sna, &tmp, 2796 extents.x1, extents.y1, 2797 extents.x2 - extents.x1, 2798 extents.y2 - extents.y1, 2799 n > 1)) 2800 return sna_tiling_fill_boxes(sna, op, format, color, 2801 dst, dst_bo, box, n); 2802 } 2803 2804 gen4_channel_init_solid(sna, &tmp.src, pixel); 2805 2806 tmp.is_affine = true; 2807 tmp.floats_per_vertex = 2; 2808 tmp.floats_per_rect = 6; 2809 tmp.u.gen4.wm_kernel = WM_KERNEL; 2810 tmp.u.gen4.ve_id = 1; 2811 2812 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2813 kgem_submit(&sna->kgem); 2814 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2815 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2816 return false; 2817 } 2818 } 2819 2820 gen4_align_vertex(sna, &tmp); 2821 gen4_bind_surfaces(sna, &tmp); 2822 2823 do { 2824 gen4_render_fill_rectangle(sna, &tmp, 2825 box->x1, box->y1, 2826 box->x2 - box->x1, 2827 box->y2 - box->y1); 2828 box++; 2829 } while (--n); 2830 2831 gen4_vertex_flush(sna); 2832 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2833 sna_render_composite_redirect_done(sna, &tmp); 2834 return true; 2835} 2836 2837static void 2838gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op, 2839 int16_t x, int16_t y, int16_t w, int16_t h) 2840{ 2841 gen4_render_fill_rectangle(sna, &op->base, x, y, w, h); 2842} 2843 2844fastcall static void 2845gen4_render_fill_op_box(struct sna *sna, 2846 const struct sna_fill_op *op, 2847 const BoxRec *box) 2848{ 2849 gen4_render_fill_rectangle(sna, &op->base, 2850 box->x1, box->y1, 2851 box->x2-box->x1, box->y2-box->y1); 2852} 2853 2854fastcall static void 2855gen4_render_fill_op_boxes(struct sna *sna, 2856 const struct sna_fill_op *op, 2857 const BoxRec *box, 2858 int nbox) 2859{ 2860 do { 2861 gen4_render_fill_rectangle(sna, &op->base, 2862 box->x1, box->y1, 2863 box->x2-box->x1, box->y2-box->y1); 2864 box++; 2865 } while (--nbox); 2866} 2867 2868static void 2869gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2870{ 2871 if (sna->render.vertex_offset) 2872 gen4_vertex_flush(sna); 2873 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2874} 2875 2876static bool 2877gen4_render_fill(struct sna *sna, uint8_t alu, 2878 PixmapPtr dst, struct kgem_bo *dst_bo, 2879 uint32_t color, unsigned flags, 2880 struct sna_fill_op *op) 2881{ 2882 if (sna_blt_fill(sna, alu, 2883 dst_bo, dst->drawable.bitsPerPixel, 2884 color, 2885 op)) 2886 return true; 2887 2888 if (!(alu == GXcopy || alu == GXclear) || 2889 too_large(dst->drawable.width, dst->drawable.height)) 2890 return sna_blt_fill(sna, alu, 2891 dst_bo, dst->drawable.bitsPerPixel, 2892 color, 2893 op); 2894 2895 if (alu == GXclear) 2896 color = 0; 2897 2898 op->base.op = color == 0 ? PictOpClear : PictOpSrc; 2899 2900 op->base.dst.pixmap = dst; 2901 op->base.dst.width = dst->drawable.width; 2902 op->base.dst.height = dst->drawable.height; 2903 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2904 op->base.dst.bo = dst_bo; 2905 op->base.dst.x = op->base.dst.y = 0; 2906 2907 op->base.need_magic_ca_pass = 0; 2908 op->base.has_component_alpha = 0; 2909 2910 gen4_channel_init_solid(sna, &op->base.src, 2911 sna_rgba_for_color(color, 2912 dst->drawable.depth)); 2913 op->base.mask.bo = NULL; 2914 2915 op->base.is_affine = true; 2916 op->base.floats_per_vertex = 2; 2917 op->base.floats_per_rect = 6; 2918 op->base.u.gen4.wm_kernel = WM_KERNEL; 2919 op->base.u.gen4.ve_id = 1; 2920 2921 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2922 kgem_submit(&sna->kgem); 2923 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2924 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2925 return false; 2926 } 2927 } 2928 2929 gen4_align_vertex(sna, &op->base); 2930 gen4_bind_surfaces(sna, &op->base); 2931 2932 op->blt = gen4_render_fill_op_blt; 2933 op->box = gen4_render_fill_op_box; 2934 op->boxes = gen4_render_fill_op_boxes; 2935 op->points = NULL; 2936 op->done = gen4_render_fill_op_done; 2937 return true; 2938} 2939 2940static bool 2941gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2942 uint32_t color, 2943 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 2944 uint8_t alu) 2945{ 2946 BoxRec box; 2947 2948 box.x1 = x1; 2949 box.y1 = y1; 2950 box.x2 = x2; 2951 box.y2 = y2; 2952 2953 return sna_blt_fill_boxes(sna, alu, 2954 bo, dst->drawable.bitsPerPixel, 2955 color, &box, 1); 2956} 2957 2958static bool 2959gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2960 uint32_t color, 2961 int16_t x1, int16_t y1, 2962 int16_t x2, int16_t y2, 2963 uint8_t alu) 2964{ 2965 struct sna_composite_op tmp; 2966 2967 DBG(("%s: color=%08x\n", __FUNCTION__, color)); 2968 2969 if (gen4_render_fill_one_try_blt(sna, dst, bo, color, 2970 x1, y1, x2, y2, alu)) 2971 return true; 2972 2973 /* Must use the BLT if we can't RENDER... */ 2974 if (!(alu == GXcopy || alu == GXclear) || 2975 too_large(dst->drawable.width, dst->drawable.height)) 2976 return false; 2977 2978 if (alu == GXclear) 2979 color = 0; 2980 2981 tmp.op = color == 0 ? PictOpClear : PictOpSrc; 2982 2983 tmp.dst.pixmap = dst; 2984 tmp.dst.width = dst->drawable.width; 2985 tmp.dst.height = dst->drawable.height; 2986 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 2987 tmp.dst.bo = bo; 2988 tmp.dst.x = tmp.dst.y = 0; 2989 2990 gen4_channel_init_solid(sna, &tmp.src, 2991 sna_rgba_for_color(color, 2992 dst->drawable.depth)); 2993 tmp.mask.bo = NULL; 2994 tmp.mask.filter = SAMPLER_FILTER_NEAREST; 2995 tmp.mask.repeat = SAMPLER_EXTEND_NONE; 2996 2997 tmp.is_affine = true; 2998 tmp.floats_per_vertex = 2; 2999 tmp.floats_per_rect = 6; 3000 tmp.has_component_alpha = false; 3001 tmp.need_magic_ca_pass = false; 3002 3003 tmp.u.gen4.wm_kernel = WM_KERNEL; 3004 tmp.u.gen4.ve_id = 1; 3005 3006 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3007 kgem_submit(&sna->kgem); 3008 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3009 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3010 return false; 3011 } 3012 } 3013 3014 gen4_align_vertex(sna, &tmp); 3015 gen4_bind_surfaces(sna, &tmp); 3016 3017 gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); 3018 3019 gen4_vertex_flush(sna); 3020 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3021 3022 return true; 3023} 3024 3025static void gen4_render_reset(struct sna *sna) 3026{ 3027 sna->render_state.gen4.needs_invariant = true; 3028 sna->render_state.gen4.needs_urb = true; 3029 sna->render_state.gen4.ve_id = -1; 3030 sna->render_state.gen4.last_primitive = -1; 3031 sna->render_state.gen4.last_pipelined_pointers = -1; 3032 3033 sna->render_state.gen4.drawrect_offset = -1; 3034 sna->render_state.gen4.drawrect_limit = -1; 3035 sna->render_state.gen4.surface_table = 0; 3036 3037 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3038 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3039 discard_vbo(sna); 3040 } 3041 3042 sna->render.vertex_offset = 0; 3043 sna->render.nvertex_reloc = 0; 3044 sna->render.vb_id = 0; 3045} 3046 3047static void gen4_render_fini(struct sna *sna) 3048{ 3049 kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo); 3050} 3051 3052static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) 3053{ 3054 struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32); 3055 3056 /* Set up the vertex shader to be disabled (passthrough) */ 3057 vs->thread4.nr_urb_entries = URB_VS_ENTRIES; 3058 vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 3059 vs->vs6.vs_enable = 0; 3060 vs->vs6.vert_cache_disable = 1; 3061 3062 return sna_static_stream_offsetof(stream, vs); 3063} 3064 3065static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, 3066 uint32_t kernel) 3067{ 3068 struct gen4_sf_unit_state *sf; 3069 3070 sf = sna_static_stream_map(stream, sizeof(*sf), 32); 3071 3072 sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 3073 sf->thread0.kernel_start_pointer = kernel >> 6; 3074 sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 3075 sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 3076 sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 3077 /* don't smash vertex header, read start from dw8 */ 3078 sf->thread3.urb_entry_read_offset = 1; 3079 sf->thread3.dispatch_grf_start_reg = 3; 3080 sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1; 3081 sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 3082 sf->thread4.nr_urb_entries = URB_SF_ENTRIES; 3083 sf->sf5.viewport_transform = false; /* skip viewport */ 3084 sf->sf6.cull_mode = GEN4_CULLMODE_NONE; 3085 sf->sf6.scissor = 0; 3086 sf->sf7.trifan_pv = 2; 3087 sf->sf6.dest_org_vbias = 0x8; 3088 sf->sf6.dest_org_hbias = 0x8; 3089 3090 return sna_static_stream_offsetof(stream, sf); 3091} 3092 3093static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, 3094 sampler_filter_t src_filter, 3095 sampler_extend_t src_extend, 3096 sampler_filter_t mask_filter, 3097 sampler_extend_t mask_extend) 3098{ 3099 struct gen4_sampler_state *sampler_state; 3100 3101 sampler_state = sna_static_stream_map(stream, 3102 sizeof(struct gen4_sampler_state) * 2, 3103 32); 3104 sampler_state_init(&sampler_state[0], src_filter, src_extend); 3105 sampler_state_init(&sampler_state[1], mask_filter, mask_extend); 3106 3107 return sna_static_stream_offsetof(stream, sampler_state); 3108} 3109 3110static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, 3111 int gen, 3112 bool has_mask, 3113 uint32_t kernel, 3114 uint32_t sampler) 3115{ 3116 assert((kernel & 63) == 0); 3117 wm->thread0.kernel_start_pointer = kernel >> 6; 3118 wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 3119 3120 wm->thread1.single_program_flow = 0; 3121 3122 wm->thread3.const_urb_entry_read_length = 0; 3123 wm->thread3.const_urb_entry_read_offset = 0; 3124 3125 wm->thread3.urb_entry_read_offset = 0; 3126 wm->thread3.dispatch_grf_start_reg = 3; 3127 3128 assert((sampler & 31) == 0); 3129 wm->wm4.sampler_state_pointer = sampler >> 5; 3130 wm->wm4.sampler_count = 1; 3131 3132 wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1; 3133 wm->wm5.transposed_urb_read = 0; 3134 wm->wm5.thread_dispatch_enable = 1; 3135 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 3136 * start point 3137 */ 3138 wm->wm5.enable_16_pix = 1; 3139 wm->wm5.enable_8_pix = 0; 3140 wm->wm5.early_depth_test = 1; 3141 3142 /* Each pair of attributes (src/mask coords) is two URB entries */ 3143 if (has_mask) { 3144 wm->thread1.binding_table_entry_count = 3; 3145 wm->thread3.urb_entry_read_length = 4; 3146 } else { 3147 wm->thread1.binding_table_entry_count = 2; 3148 wm->thread3.urb_entry_read_length = 2; 3149 } 3150} 3151 3152static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) 3153{ 3154 uint8_t *ptr, *base; 3155 int i, j; 3156 3157 base = ptr = 3158 sna_static_stream_map(stream, 3159 GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64, 3160 64); 3161 3162 for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) { 3163 for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) { 3164 struct gen4_cc_unit_state *state = 3165 (struct gen4_cc_unit_state *)ptr; 3166 3167 state->cc3.blend_enable = 3168 !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE); 3169 3170 state->cc5.logicop_func = 0xc; /* COPY */ 3171 state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD; 3172 3173 /* Fill in alpha blend factors same as color, for the future. */ 3174 state->cc5.ia_src_blend_factor = i; 3175 state->cc5.ia_dest_blend_factor = j; 3176 3177 state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD; 3178 state->cc6.clamp_post_alpha_blend = 1; 3179 state->cc6.clamp_pre_alpha_blend = 1; 3180 state->cc6.src_blend_factor = i; 3181 state->cc6.dest_blend_factor = j; 3182 3183 ptr += 64; 3184 } 3185 } 3186 3187 return sna_static_stream_offsetof(stream, base); 3188} 3189 3190static bool gen4_render_setup(struct sna *sna) 3191{ 3192 struct gen4_render_state *state = &sna->render_state.gen4; 3193 struct sna_static_stream general; 3194 struct gen4_wm_unit_state_padded *wm_state; 3195 uint32_t sf, wm[KERNEL_COUNT]; 3196 int i, j, k, l, m; 3197 3198 sna_static_stream_init(&general); 3199 3200 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3201 * dumps, you know it points to zero. 3202 */ 3203 null_create(&general); 3204 3205 sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask); 3206 for (m = 0; m < KERNEL_COUNT; m++) { 3207 if (wm_kernels[m].size) { 3208 wm[m] = sna_static_stream_add(&general, 3209 wm_kernels[m].data, 3210 wm_kernels[m].size, 3211 64); 3212 } else { 3213 wm[m] = sna_static_stream_compile_wm(sna, &general, 3214 wm_kernels[m].data, 3215 16); 3216 } 3217 } 3218 3219 state->vs = gen4_create_vs_unit_state(&general); 3220 state->sf = gen4_create_sf_state(&general, sf); 3221 3222 wm_state = sna_static_stream_map(&general, 3223 sizeof(*wm_state) * KERNEL_COUNT * 3224 FILTER_COUNT * EXTEND_COUNT * 3225 FILTER_COUNT * EXTEND_COUNT, 3226 64); 3227 state->wm = sna_static_stream_offsetof(&general, wm_state); 3228 for (i = 0; i < FILTER_COUNT; i++) { 3229 for (j = 0; j < EXTEND_COUNT; j++) { 3230 for (k = 0; k < FILTER_COUNT; k++) { 3231 for (l = 0; l < EXTEND_COUNT; l++) { 3232 uint32_t sampler_state; 3233 3234 sampler_state = 3235 gen4_create_sampler_state(&general, 3236 i, j, 3237 k, l); 3238 3239 for (m = 0; m < KERNEL_COUNT; m++) { 3240 gen4_init_wm_state(&wm_state->state, 3241 sna->kgem.gen, 3242 wm_kernels[m].has_mask, 3243 wm[m], sampler_state); 3244 wm_state++; 3245 } 3246 } 3247 } 3248 } 3249 } 3250 3251 state->cc = gen4_create_cc_unit_state(&general); 3252 3253 state->general_bo = sna_static_stream_fini(sna, &general); 3254 return state->general_bo != NULL; 3255} 3256 3257const char *gen4_render_init(struct sna *sna, const char *backend) 3258{ 3259 if (!gen4_render_setup(sna)) 3260 return backend; 3261 3262 sna->kgem.retire = gen4_render_retire; 3263 sna->kgem.expire = gen4_render_expire; 3264 3265#if !NO_COMPOSITE 3266 sna->render.composite = gen4_render_composite; 3267 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3268#endif 3269#if !NO_COMPOSITE_SPANS 3270 sna->render.check_composite_spans = gen4_check_composite_spans; 3271 sna->render.composite_spans = gen4_render_composite_spans; 3272 if (0) 3273 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3274#endif 3275 3276#if !NO_VIDEO 3277 sna->render.video = gen4_render_video; 3278#endif 3279 3280#if !NO_COPY_BOXES 3281 sna->render.copy_boxes = gen4_render_copy_boxes; 3282#endif 3283#if !NO_COPY 3284 sna->render.copy = gen4_render_copy; 3285#endif 3286 3287#if !NO_FILL_BOXES 3288 sna->render.fill_boxes = gen4_render_fill_boxes; 3289#endif 3290#if !NO_FILL 3291 sna->render.fill = gen4_render_fill; 3292#endif 3293#if !NO_FILL_ONE 3294 sna->render.fill_one = gen4_render_fill_one; 3295#endif 3296 3297 sna->render.flush = gen4_render_flush; 3298 sna->render.reset = gen4_render_reset; 3299 sna->render.fini = gen4_render_fini; 3300 3301 sna->render.max_3d_size = GEN4_MAX_3D_SIZE; 3302 sna->render.max_3d_pitch = 1 << 18; 3303 return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)"; 3304} 3305