1/* 2 * Copyright © 2006,2008,2011 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@sna.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * Chris Wilson <chris@chris-wilson.co.uk> 30 * 31 */ 32 33#ifdef HAVE_CONFIG_H 34#include "config.h" 35#endif 36 37#include "sna.h" 38#include "sna_reg.h" 39#include "sna_render.h" 40#include "sna_render_inline.h" 41#include "sna_video.h" 42 43#include "brw/brw.h" 44#include "gen4_common.h" 45#include "gen4_render.h" 46#include "gen4_source.h" 47#include "gen4_vertex.h" 48 49/* gen4 has a serious issue with its shaders that we need to flush 50 * after every rectangle... So until that is resolved, prefer 51 * the BLT engine. 52 */ 53#define FORCE_SPANS 0 54#define FORCE_NONRECTILINEAR_SPANS -1 55#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 56 57#define ALWAYS_FLUSH 1 58 59#define NO_COMPOSITE 0 60#define NO_COMPOSITE_SPANS 0 61#define NO_COPY 0 62#define NO_COPY_BOXES 0 63#define NO_FILL 0 64#define NO_FILL_ONE 0 65#define NO_FILL_BOXES 0 66#define NO_VIDEO 0 67 68#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 69 70#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 71 72/* Set up a default static partitioning of the URB, which is supposed to 73 * allow anything we would want to do, at potentially lower performance. 74 */ 75#define URB_CS_ENTRY_SIZE 1 76#define URB_CS_ENTRIES 0 77 78#define URB_VS_ENTRY_SIZE 1 79#define URB_VS_ENTRIES 32 80 81#define URB_GS_ENTRY_SIZE 0 82#define URB_GS_ENTRIES 0 83 84#define URB_CL_ENTRY_SIZE 0 85#define URB_CL_ENTRIES 0 86 87#define URB_SF_ENTRY_SIZE 2 88#define URB_SF_ENTRIES 64 89 90/* 91 * this program computes dA/dx and dA/dy for the texture coordinates along 92 * with the base texture coordinate. It was extracted from the Mesa driver 93 */ 94 95#define SF_KERNEL_NUM_GRF 16 96#define PS_KERNEL_NUM_GRF 32 97 98#define GEN4_MAX_SF_THREADS 24 99#define GEN4_MAX_WM_THREADS 32 100#define G4X_MAX_WM_THREADS 50 101 102static const uint32_t ps_kernel_packed_static[][4] = { 103#include "exa_wm_xy.g4b" 104#include "exa_wm_src_affine.g4b" 105#include "exa_wm_src_sample_argb.g4b" 106#include "exa_wm_yuv_rgb.g4b" 107#include "exa_wm_write.g4b" 108}; 109 110static const uint32_t ps_kernel_planar_static[][4] = { 111#include "exa_wm_xy.g4b" 112#include "exa_wm_src_affine.g4b" 113#include "exa_wm_src_sample_planar.g4b" 114#include "exa_wm_yuv_rgb.g4b" 115#include "exa_wm_write.g4b" 116}; 117 118#define NOKERNEL(kernel_enum, func, masked) \ 119 [kernel_enum] = {func, 0, masked} 120#define KERNEL(kernel_enum, kernel, masked) \ 121 [kernel_enum] = {&kernel, sizeof(kernel), masked} 122static const struct wm_kernel_info { 123 const void *data; 124 unsigned int size; 125 bool has_mask; 126} wm_kernels[] = { 127 NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false), 128 NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false), 129 130 NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true), 131 NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true), 132 133 NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true), 134 NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true), 135 136 NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true), 137 NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true), 138 139 NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true), 140 NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true), 141 142 KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false), 143 KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false), 144}; 145#undef KERNEL 146 147static const struct blendinfo { 148 bool src_alpha; 149 uint32_t src_blend; 150 uint32_t dst_blend; 151} gen4_blend_op[] = { 152 /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO}, 153 /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO}, 154 /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE}, 155 /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 156 /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE}, 157 /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 158 /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA}, 159 /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 160 /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 161 /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 162 /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA}, 163 /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 164 /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE}, 165}; 166 167/** 168 * Highest-valued BLENDFACTOR used in gen4_blend_op. 169 * 170 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR, 171 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 172 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 173 */ 174#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1) 175 176#define BLEND_OFFSET(s, d) \ 177 (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64) 178 179#define SAMPLER_OFFSET(sf, se, mf, me, k) \ 180 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64) 181 182static void 183gen4_emit_pipelined_pointers(struct sna *sna, 184 const struct sna_composite_op *op, 185 int blend, int kernel); 186 187#define OUT_BATCH(v) batch_emit(sna, v) 188#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 189#define OUT_VERTEX_F(v) vertex_emit(sna, v) 190 191#define GEN4_MAX_3D_SIZE 8192 192 193static inline bool too_large(int width, int height) 194{ 195 return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE; 196} 197 198static int 199gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 200{ 201 int base; 202 203 if (has_mask) { 204 if (is_ca) { 205 if (gen4_blend_op[op].src_alpha) 206 base = WM_KERNEL_MASKSA; 207 else 208 base = WM_KERNEL_MASKCA; 209 } else 210 base = WM_KERNEL_MASK; 211 } else 212 base = WM_KERNEL; 213 214 return base + !is_affine; 215} 216 217static bool gen4_magic_ca_pass(struct sna *sna, 218 const struct sna_composite_op *op) 219{ 220 struct gen4_render_state *state = &sna->render_state.gen4; 221 222 if (!op->need_magic_ca_pass) 223 return false; 224 225 assert(sna->render.vertex_index > sna->render.vertex_start); 226 227 DBG(("%s: CA fixup\n", __FUNCTION__)); 228 assert(op->mask.bo != NULL); 229 assert(op->has_component_alpha); 230 231 gen4_emit_pipelined_pointers(sna, op, PictOpAdd, 232 gen4_choose_composite_kernel(PictOpAdd, 233 true, true, op->is_affine)); 234 235 OUT_BATCH(GEN4_3DPRIMITIVE | 236 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 237 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 238 (0 << 9) | 239 4); 240 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 241 OUT_BATCH(sna->render.vertex_start); 242 OUT_BATCH(1); /* single instance */ 243 OUT_BATCH(0); /* start instance location */ 244 OUT_BATCH(0); /* index buffer offset, ignored */ 245 246 state->last_primitive = sna->kgem.nbatch; 247 return true; 248} 249 250static uint32_t gen4_get_blend(int op, 251 bool has_component_alpha, 252 uint32_t dst_format) 253{ 254 uint32_t src, dst; 255 256 src = gen4_blend_op[op].src_blend; 257 dst = gen4_blend_op[op].dst_blend; 258 259 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 260 * it as always 1. 261 */ 262 if (PICT_FORMAT_A(dst_format) == 0) { 263 if (src == GEN4_BLENDFACTOR_DST_ALPHA) 264 src = GEN4_BLENDFACTOR_ONE; 265 else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA) 266 src = GEN4_BLENDFACTOR_ZERO; 267 } 268 269 /* If the source alpha is being used, then we should only be in a 270 * case where the source blend factor is 0, and the source blend 271 * value is the mask channels multiplied by the source picture's alpha. 272 */ 273 if (has_component_alpha && gen4_blend_op[op].src_alpha) { 274 if (dst == GEN4_BLENDFACTOR_SRC_ALPHA) 275 dst = GEN4_BLENDFACTOR_SRC_COLOR; 276 else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA) 277 dst = GEN4_BLENDFACTOR_INV_SRC_COLOR; 278 } 279 280 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 281 op, dst_format, PICT_FORMAT_A(dst_format), 282 src, dst, BLEND_OFFSET(src, dst))); 283 return BLEND_OFFSET(src, dst); 284} 285 286static uint32_t gen4_get_card_format(PictFormat format) 287{ 288 switch (format) { 289 default: 290 return -1; 291 case PICT_a8r8g8b8: 292 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 293 case PICT_x8r8g8b8: 294 return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; 295 case PICT_a8b8g8r8: 296 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 297 case PICT_x8b8g8r8: 298 return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM; 299#ifdef PICT_a2r10g10b10 300 case PICT_a2r10g10b10: 301 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 302 case PICT_x2r10g10b10: 303 return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM; 304#endif 305 case PICT_r8g8b8: 306 return GEN4_SURFACEFORMAT_R8G8B8_UNORM; 307 case PICT_r5g6b5: 308 return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 309 case PICT_a1r5g5b5: 310 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 311 case PICT_a8: 312 return GEN4_SURFACEFORMAT_A8_UNORM; 313 case PICT_a4r4g4b4: 314 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 315 } 316} 317 318static uint32_t gen4_get_dest_format(PictFormat format) 319{ 320 switch (format) { 321 default: 322 return -1; 323 case PICT_a8r8g8b8: 324 case PICT_x8r8g8b8: 325 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 326 case PICT_a8b8g8r8: 327 case PICT_x8b8g8r8: 328 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 329#ifdef PICT_a2r10g10b10 330 case PICT_a2r10g10b10: 331 case PICT_x2r10g10b10: 332 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 333#endif 334 case PICT_r5g6b5: 335 return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 336 case PICT_x1r5g5b5: 337 case PICT_a1r5g5b5: 338 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 339 case PICT_a8: 340 return GEN4_SURFACEFORMAT_A8_UNORM; 341 case PICT_a4r4g4b4: 342 case PICT_x4r4g4b4: 343 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 344 } 345} 346 347static bool gen4_check_dst_format(PictFormat format) 348{ 349 if (gen4_get_dest_format(format) != -1) 350 return true; 351 352 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 353 return false; 354} 355 356static bool gen4_check_format(uint32_t format) 357{ 358 if (gen4_get_card_format(format) != -1) 359 return true; 360 361 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 362 return false; 363} 364 365typedef struct gen4_surface_state_padded { 366 struct gen4_surface_state state; 367 char pad[32 - sizeof(struct gen4_surface_state)]; 368} gen4_surface_state_padded; 369 370static void null_create(struct sna_static_stream *stream) 371{ 372 /* A bunch of zeros useful for legacy border color and depth-stencil */ 373 sna_static_stream_map(stream, 64, 64); 374} 375 376static void 377sampler_state_init(struct gen4_sampler_state *sampler_state, 378 sampler_filter_t filter, 379 sampler_extend_t extend) 380{ 381 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 382 383 /* We use the legacy mode to get the semantics specified by 384 * the Render extension. */ 385 sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY; 386 387 switch (filter) { 388 default: 389 case SAMPLER_FILTER_NEAREST: 390 sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST; 391 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST; 392 break; 393 case SAMPLER_FILTER_BILINEAR: 394 sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR; 395 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR; 396 break; 397 } 398 399 switch (extend) { 400 default: 401 case SAMPLER_EXTEND_NONE: 402 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 403 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 404 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 405 break; 406 case SAMPLER_EXTEND_REPEAT: 407 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 408 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 409 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 410 break; 411 case SAMPLER_EXTEND_PAD: 412 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 413 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 414 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 415 break; 416 case SAMPLER_EXTEND_REFLECT: 417 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 418 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 419 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 420 break; 421 } 422} 423 424static uint32_t gen4_filter(uint32_t filter) 425{ 426 switch (filter) { 427 default: 428 assert(0); 429 case PictFilterNearest: 430 return SAMPLER_FILTER_NEAREST; 431 case PictFilterBilinear: 432 return SAMPLER_FILTER_BILINEAR; 433 } 434} 435 436static uint32_t gen4_check_filter(PicturePtr picture) 437{ 438 switch (picture->filter) { 439 case PictFilterNearest: 440 case PictFilterBilinear: 441 return true; 442 default: 443 DBG(("%s: unknown filter: %s [%d]\n", 444 __FUNCTION__, 445 PictureGetFilterName(picture->filter), 446 picture->filter)); 447 return false; 448 } 449} 450 451static uint32_t gen4_repeat(uint32_t repeat) 452{ 453 switch (repeat) { 454 default: 455 assert(0); 456 case RepeatNone: 457 return SAMPLER_EXTEND_NONE; 458 case RepeatNormal: 459 return SAMPLER_EXTEND_REPEAT; 460 case RepeatPad: 461 return SAMPLER_EXTEND_PAD; 462 case RepeatReflect: 463 return SAMPLER_EXTEND_REFLECT; 464 } 465} 466 467static bool gen4_check_repeat(PicturePtr picture) 468{ 469 if (!picture->repeat) 470 return true; 471 472 switch (picture->repeatType) { 473 case RepeatNone: 474 case RepeatNormal: 475 case RepeatPad: 476 case RepeatReflect: 477 return true; 478 default: 479 DBG(("%s: unknown repeat: %d\n", 480 __FUNCTION__, picture->repeatType)); 481 return false; 482 } 483} 484 485static uint32_t 486gen4_tiling_bits(uint32_t tiling) 487{ 488 switch (tiling) { 489 default: assert(0); 490 case I915_TILING_NONE: return 0; 491 case I915_TILING_X: return GEN4_SURFACE_TILED; 492 case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y; 493 } 494} 495 496/** 497 * Sets up the common fields for a surface state buffer for the given 498 * picture in the given surface state buffer. 499 */ 500static uint32_t 501gen4_bind_bo(struct sna *sna, 502 struct kgem_bo *bo, 503 uint32_t width, 504 uint32_t height, 505 uint32_t format, 506 bool is_dst) 507{ 508 uint32_t domains; 509 uint16_t offset; 510 uint32_t *ss; 511 512 assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); 513 514 /* After the first bind, we manage the cache domains within the batch */ 515 offset = kgem_bo_get_binding(bo, format | is_dst << 31); 516 if (offset) { 517 assert(offset >= sna->kgem.surface); 518 if (is_dst) 519 kgem_bo_mark_dirty(bo); 520 return offset * sizeof(uint32_t); 521 } 522 523 offset = sna->kgem.surface -= 524 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 525 ss = sna->kgem.batch + offset; 526 527 ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT | 528 GEN4_SURFACE_BLEND_ENABLED | 529 format << GEN4_SURFACE_FORMAT_SHIFT); 530 531 if (is_dst) { 532 ss[0] |= GEN4_SURFACE_RC_READ_WRITE; 533 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; 534 } else 535 domains = I915_GEM_DOMAIN_SAMPLER << 16; 536 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 537 538 ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT | 539 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT); 540 ss[3] = (gen4_tiling_bits(bo->tiling) | 541 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT); 542 ss[4] = 0; 543 ss[5] = 0; 544 545 kgem_bo_set_binding(bo, format | is_dst << 31, offset); 546 547 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 548 offset, bo->handle, ss[1], 549 format, width, height, bo->pitch, bo->tiling, 550 domains & 0xffff ? "render" : "sampler")); 551 552 return offset * sizeof(uint32_t); 553} 554 555static void gen4_emit_vertex_buffer(struct sna *sna, 556 const struct sna_composite_op *op) 557{ 558 int id = op->u.gen4.ve_id; 559 560 assert((sna->render.vb_id & (1 << id)) == 0); 561 562 OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); 563 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | 564 (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 565 assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc)); 566 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 567 OUT_BATCH(0); 568 OUT_BATCH(0); 569 OUT_BATCH(0); 570 571 sna->render.vb_id |= 1 << id; 572} 573 574inline static void 575gen4_emit_pipe_flush(struct sna *sna) 576{ 577#if 1 578 OUT_BATCH(GEN4_PIPE_CONTROL | 579 GEN4_PIPE_CONTROL_WC_FLUSH | 580 (4 - 2)); 581 OUT_BATCH(0); 582 OUT_BATCH(0); 583 OUT_BATCH(0); 584#else 585 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 586#endif 587} 588 589inline static void 590gen4_emit_pipe_break(struct sna *sna) 591{ 592#if !ALWAYS_FLUSH 593 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); 594 OUT_BATCH(0); 595 OUT_BATCH(0); 596 OUT_BATCH(0); 597#else 598 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 599#endif 600} 601 602inline static void 603gen4_emit_pipe_invalidate(struct sna *sna) 604{ 605#if 0 606 OUT_BATCH(GEN4_PIPE_CONTROL | 607 GEN4_PIPE_CONTROL_WC_FLUSH | 608 (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) | 609 (4 - 2)); 610 OUT_BATCH(0); 611 OUT_BATCH(0); 612 OUT_BATCH(0); 613#else 614 OUT_BATCH(MI_FLUSH); 615#endif 616} 617 618static void gen4_emit_primitive(struct sna *sna) 619{ 620 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) { 621 sna->render.vertex_offset = sna->kgem.nbatch - 5; 622 return; 623 } 624 625 OUT_BATCH(GEN4_3DPRIMITIVE | 626 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 627 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 628 (0 << 9) | 629 4); 630 sna->render.vertex_offset = sna->kgem.nbatch; 631 OUT_BATCH(0); /* vertex count, to be filled in later */ 632 OUT_BATCH(sna->render.vertex_index); 633 OUT_BATCH(1); /* single instance */ 634 OUT_BATCH(0); /* start instance location */ 635 OUT_BATCH(0); /* index buffer offset, ignored */ 636 sna->render.vertex_start = sna->render.vertex_index; 637 638 sna->render_state.gen4.last_primitive = sna->kgem.nbatch; 639} 640 641static bool gen4_rectangle_begin(struct sna *sna, 642 const struct sna_composite_op *op) 643{ 644 unsigned int id = 1 << op->u.gen4.ve_id; 645 int ndwords; 646 647 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 648 return true; 649 650 /* 7xpipelined pointers + 6xprimitive + 1xflush */ 651 ndwords = op->need_magic_ca_pass? 19 : 6; 652 if ((sna->render.vb_id & id) == 0) 653 ndwords += 5; 654 ndwords += 8*FORCE_FLUSH; 655 656 if (!kgem_check_batch(&sna->kgem, ndwords)) 657 return false; 658 659 if ((sna->render.vb_id & id) == 0) 660 gen4_emit_vertex_buffer(sna, op); 661 if (sna->render.vertex_offset == 0) 662 gen4_emit_primitive(sna); 663 664 return true; 665} 666 667static int gen4_get_rectangles__flush(struct sna *sna, 668 const struct sna_composite_op *op) 669{ 670 /* Preventing discarding new vbo after lock contention */ 671 if (sna_vertex_wait__locked(&sna->render)) { 672 int rem = vertex_space(sna); 673 if (rem > op->floats_per_rect) 674 return rem; 675 } 676 677 if (!kgem_check_batch(&sna->kgem, 678 8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6))) 679 return 0; 680 if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 681 return 0; 682 683 if (sna->render.vertex_offset) { 684 gen4_vertex_flush(sna); 685 if (gen4_magic_ca_pass(sna, op)) 686 gen4_emit_pipelined_pointers(sna, op, op->op, 687 op->u.gen4.wm_kernel); 688 } 689 690 return gen4_vertex_finish(sna); 691} 692 693inline static int gen4_get_rectangles(struct sna *sna, 694 const struct sna_composite_op *op, 695 int want, 696 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 697{ 698 int rem; 699 700 assert(want); 701#if FORCE_FLUSH 702 rem = sna->render.vertex_offset; 703 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) 704 rem = sna->kgem.nbatch - 5; 705 if (rem) { 706 rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3; 707 if (rem <= 0) { 708 if (sna->render.vertex_offset) { 709 gen4_vertex_flush(sna); 710 if (gen4_magic_ca_pass(sna, op)) { 711 if (kgem_check_batch(&sna->kgem, 19+6)) 712 gen4_emit_pipelined_pointers(sna, op, op->op, 713 op->u.gen4.wm_kernel); 714 } 715 } 716 gen4_emit_pipe_break(sna); 717 rem = MAX_FLUSH_VERTICES; 718 } 719 } else 720 rem = MAX_FLUSH_VERTICES; 721 if (want > rem) 722 want = rem; 723#endif 724 725start: 726 rem = vertex_space(sna); 727 if (unlikely(rem < op->floats_per_rect)) { 728 DBG(("flushing vbo for %s: %d < %d\n", 729 __FUNCTION__, rem, op->floats_per_rect)); 730 rem = gen4_get_rectangles__flush(sna, op); 731 if (unlikely(rem == 0)) 732 goto flush; 733 } 734 735 if (unlikely(sna->render.vertex_offset == 0)) { 736 if (!gen4_rectangle_begin(sna, op)) 737 goto flush; 738 else 739 goto start; 740 } 741 742 assert(rem <= vertex_space(sna)); 743 assert(op->floats_per_rect <= rem); 744 if (want > 1 && want * op->floats_per_rect > rem) 745 want = rem / op->floats_per_rect; 746 747 sna->render.vertex_index += 3*want; 748 return want; 749 750flush: 751 if (sna->render.vertex_offset) { 752 gen4_vertex_flush(sna); 753 gen4_magic_ca_pass(sna, op); 754 } 755 sna_vertex_wait__locked(&sna->render); 756 _kgem_submit(&sna->kgem); 757 emit_state(sna, op); 758 goto start; 759} 760 761static uint32_t * 762gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset) 763{ 764 sna->kgem.surface -= 765 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 766 767 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 768 769 /* Clear all surplus entries to zero in case of prefetch */ 770 *offset = sna->kgem.surface; 771 return memset(sna->kgem.batch + sna->kgem.surface, 772 0, sizeof(struct gen4_surface_state_padded)); 773} 774 775static void 776gen4_emit_urb(struct sna *sna) 777{ 778 int urb_vs_end; 779 int urb_gs_end; 780 int urb_cl_end; 781 int urb_sf_end; 782 int urb_cs_end; 783 784 if (!sna->render_state.gen4.needs_urb) 785 return; 786 787 urb_vs_end = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 788 urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 789 urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE; 790 urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 791 urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 792 assert(urb_cs_end <= 256); 793 794 while ((sna->kgem.nbatch & 15) > 12) 795 OUT_BATCH(MI_NOOP); 796 797 OUT_BATCH(GEN4_URB_FENCE | 798 UF0_CS_REALLOC | 799 UF0_SF_REALLOC | 800 UF0_CLIP_REALLOC | 801 UF0_GS_REALLOC | 802 UF0_VS_REALLOC | 803 1); 804 OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT | 805 urb_gs_end << UF1_GS_FENCE_SHIFT | 806 urb_vs_end << UF1_VS_FENCE_SHIFT); 807 OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT | 808 urb_sf_end << UF2_SF_FENCE_SHIFT); 809 810 /* Constant buffer state */ 811 OUT_BATCH(GEN4_CS_URB_STATE | 0); 812 OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0); 813 814 sna->render_state.gen4.needs_urb = false; 815} 816 817static void 818gen4_emit_state_base_address(struct sna *sna) 819{ 820 assert(sna->render_state.gen4.general_bo->proxy == NULL); 821 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4); 822 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */ 823 sna->kgem.nbatch, 824 sna->render_state.gen4.general_bo, 825 I915_GEM_DOMAIN_INSTRUCTION << 16, 826 BASE_ADDRESS_MODIFY)); 827 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 828 sna->kgem.nbatch, 829 NULL, 830 I915_GEM_DOMAIN_INSTRUCTION << 16, 831 BASE_ADDRESS_MODIFY)); 832 OUT_BATCH(0); /* media */ 833 834 /* upper bounds, all disabled */ 835 OUT_BATCH(BASE_ADDRESS_MODIFY); 836 OUT_BATCH(0); 837} 838 839static void 840gen4_emit_invariant(struct sna *sna) 841{ 842 assert(sna->kgem.surface == sna->kgem.batch_size); 843 844 if (sna->kgem.gen >= 045) 845 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 846 else 847 OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D); 848 849 gen4_emit_state_base_address(sna); 850 851 sna->render_state.gen4.needs_invariant = false; 852} 853 854static void 855gen4_get_batch(struct sna *sna, const struct sna_composite_op *op) 856{ 857 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 858 859 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) { 860 DBG(("%s: flushing batch: %d < %d+%d\n", 861 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 862 150, 4*8)); 863 kgem_submit(&sna->kgem); 864 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 865 } 866 867 if (sna->render_state.gen4.needs_invariant) 868 gen4_emit_invariant(sna); 869} 870 871static void 872gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) 873{ 874 assert(op->floats_per_rect == 3*op->floats_per_vertex); 875 if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { 876 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 877 sna->render_state.gen4.floats_per_vertex, 878 op->floats_per_vertex)); 879 gen4_vertex_align(sna, op); 880 sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex; 881 } 882} 883 884static void 885gen4_emit_binding_table(struct sna *sna, uint16_t offset) 886{ 887 if (sna->render_state.gen4.surface_table == offset) 888 return; 889 890 sna->render_state.gen4.surface_table = offset; 891 892 /* Binding table pointers */ 893 OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4); 894 OUT_BATCH(0); /* vs */ 895 OUT_BATCH(0); /* gs */ 896 OUT_BATCH(0); /* clip */ 897 OUT_BATCH(0); /* sf */ 898 /* Only the PS uses the binding table */ 899 OUT_BATCH(offset*4); 900} 901 902static void 903gen4_emit_pipelined_pointers(struct sna *sna, 904 const struct sna_composite_op *op, 905 int blend, int kernel) 906{ 907 uint16_t sp, bp; 908 uint32_t key; 909 910 DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n", 911 __FUNCTION__, op->u.gen4.ve_id & 2, 912 op->src.filter, op->src.repeat, 913 op->mask.filter, op->mask.repeat, 914 kernel, blend, op->has_component_alpha, (int)op->dst.format)); 915 916 sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, 917 op->mask.filter, op->mask.repeat, 918 kernel); 919 bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); 920 921 DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); 922 key = sp | (uint32_t)bp << 16; 923 if (key == sna->render_state.gen4.last_pipelined_pointers) 924 return; 925 926 OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5); 927 OUT_BATCH(sna->render_state.gen4.vs); 928 OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ 929 OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ 930 OUT_BATCH(sna->render_state.gen4.sf); 931 OUT_BATCH(sna->render_state.gen4.wm + sp); 932 OUT_BATCH(sna->render_state.gen4.cc + bp); 933 934 sna->render_state.gen4.last_pipelined_pointers = key; 935 gen4_emit_urb(sna); 936} 937 938static bool 939gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) 940{ 941 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 942 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 943 944 assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 945 assert(!too_large(op->dst.width, op->dst.height)); 946 947 if (sna->render_state.gen4.drawrect_limit == limit && 948 sna->render_state.gen4.drawrect_offset == offset) 949 return true; 950 951 sna->render_state.gen4.drawrect_offset = offset; 952 sna->render_state.gen4.drawrect_limit = limit; 953 954 OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 955 OUT_BATCH(0); 956 OUT_BATCH(limit); 957 OUT_BATCH(offset); 958 return false; 959} 960 961static void 962gen4_emit_vertex_elements(struct sna *sna, 963 const struct sna_composite_op *op) 964{ 965 /* 966 * vertex data in vertex buffer 967 * position: (x, y) 968 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 969 * texture coordinate 1 if (has_mask is true): same as above 970 */ 971 struct gen4_render_state *render = &sna->render_state.gen4; 972 uint32_t src_format, dw; 973 int id = op->u.gen4.ve_id; 974 975 if (render->ve_id == id) 976 return; 977 render->ve_id = id; 978 979 /* The VUE layout 980 * dword 0-3: position (x, y, 1.0, 1.0), 981 * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) 982 * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) 983 */ 984 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1)); 985 986 /* x,y */ 987 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 988 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 989 0 << VE0_OFFSET_SHIFT); 990 OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 991 VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 992 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | 993 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 994 (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 995 996 /* u0, v0, w0 */ 997 /* u0, v0, w0 */ 998 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 999 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1000 switch (id & 3) { 1001 default: 1002 assert(0); 1003 case 0: 1004 src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED; 1005 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1006 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1007 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1008 break; 1009 case 1: 1010 src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1011 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1012 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1013 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1014 break; 1015 case 2: 1016 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1017 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1018 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1019 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1020 break; 1021 case 3: 1022 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1023 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1024 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1025 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1026 break; 1027 } 1028 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1029 src_format << VE0_FORMAT_SHIFT | 1030 4 << VE0_OFFSET_SHIFT); 1031 OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1032 1033 /* u1, v1, w1 */ 1034 if (id >> 2) { 1035 unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1036 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, 1037 id >> 2, src_offset)); 1038 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1039 switch (id >> 2) { 1040 case 1: 1041 src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1042 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1043 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1044 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1045 break; 1046 default: 1047 assert(0); 1048 case 2: 1049 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1050 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1051 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1052 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1053 break; 1054 case 3: 1055 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1056 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1057 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1058 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1059 break; 1060 } 1061 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1062 src_format << VE0_FORMAT_SHIFT | 1063 src_offset << VE0_OFFSET_SHIFT); 1064 OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1065 } else { 1066 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1067 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 1068 0 << VE0_OFFSET_SHIFT); 1069 OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 1070 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 1071 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 1072 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 1073 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1074 } 1075} 1076 1077static void 1078gen4_emit_state(struct sna *sna, 1079 const struct sna_composite_op *op, 1080 uint16_t wm_binding_table) 1081{ 1082 bool flush; 1083 1084 assert(op->dst.bo->exec); 1085 1086 flush = wm_binding_table & 1; 1087 wm_binding_table &= ~1; 1088 1089 if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 1090 DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, 1091 kgem_bo_is_dirty(op->src.bo), 1092 kgem_bo_is_dirty(op->mask.bo), 1093 flush)); 1094 gen4_emit_pipe_invalidate(sna); 1095 kgem_clear_dirty(&sna->kgem); 1096 kgem_bo_mark_dirty(op->dst.bo); 1097 flush = false; 1098 } 1099 flush &= gen4_emit_drawing_rectangle(sna, op); 1100 if (flush && op->op > PictOpSrc) 1101 gen4_emit_pipe_flush(sna); 1102 1103 gen4_emit_binding_table(sna, wm_binding_table); 1104 gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); 1105 gen4_emit_vertex_elements(sna, op); 1106} 1107 1108static void 1109gen4_bind_surfaces(struct sna *sna, 1110 const struct sna_composite_op *op) 1111{ 1112 uint32_t *binding_table; 1113 uint16_t offset, dirty; 1114 1115 gen4_get_batch(sna, op); 1116 dirty = kgem_bo_is_dirty(op->dst.bo); 1117 1118 binding_table = gen4_composite_get_binding_table(sna, &offset); 1119 1120 binding_table[0] = 1121 gen4_bind_bo(sna, 1122 op->dst.bo, op->dst.width, op->dst.height, 1123 gen4_get_dest_format(op->dst.format), 1124 true); 1125 binding_table[1] = 1126 gen4_bind_bo(sna, 1127 op->src.bo, op->src.width, op->src.height, 1128 op->src.card_format, 1129 false); 1130 if (op->mask.bo) { 1131 assert(op->u.gen4.ve_id >> 2); 1132 binding_table[2] = 1133 gen4_bind_bo(sna, 1134 op->mask.bo, 1135 op->mask.width, 1136 op->mask.height, 1137 op->mask.card_format, 1138 false); 1139 } 1140 1141 if (sna->kgem.surface == offset && 1142 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table && 1143 (op->mask.bo == NULL || 1144 sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) { 1145 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1146 offset = sna->render_state.gen4.surface_table; 1147 } 1148 1149 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1150 dirty = 0; 1151 1152 gen4_emit_state(sna, op, offset | dirty); 1153} 1154 1155fastcall static void 1156gen4_render_composite_blt(struct sna *sna, 1157 const struct sna_composite_op *op, 1158 const struct sna_composite_rectangles *r) 1159{ 1160 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", 1161 __FUNCTION__, 1162 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], 1163 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], 1164 r->dst.x, r->dst.y, op->dst.x, op->dst.y, 1165 r->width, r->height)); 1166 1167 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1168 op->prim_emit(sna, op, r); 1169} 1170 1171fastcall static void 1172gen4_render_composite_box(struct sna *sna, 1173 const struct sna_composite_op *op, 1174 const BoxRec *box) 1175{ 1176 struct sna_composite_rectangles r; 1177 1178 DBG((" %s: (%d, %d), (%d, %d)\n", 1179 __FUNCTION__, 1180 box->x1, box->y1, box->x2, box->y2)); 1181 1182 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1183 1184 r.dst.x = box->x1; 1185 r.dst.y = box->y1; 1186 r.width = box->x2 - box->x1; 1187 r.height = box->y2 - box->y1; 1188 r.mask = r.src = r.dst; 1189 1190 op->prim_emit(sna, op, &r); 1191} 1192 1193static void 1194gen4_render_composite_boxes__blt(struct sna *sna, 1195 const struct sna_composite_op *op, 1196 const BoxRec *box, int nbox) 1197{ 1198 DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", 1199 __FUNCTION__, nbox, op->dst.x, op->dst.y, 1200 op->src.offset[0], op->src.offset[1], 1201 op->src.width, op->src.height, 1202 op->mask.offset[0], op->mask.offset[1], 1203 op->mask.width, op->mask.height)); 1204 1205 do { 1206 int nbox_this_time; 1207 1208 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1209 gen4_bind_surfaces); 1210 nbox -= nbox_this_time; 1211 1212 do { 1213 struct sna_composite_rectangles r; 1214 1215 DBG((" %s: (%d, %d), (%d, %d)\n", 1216 __FUNCTION__, 1217 box->x1, box->y1, box->x2, box->y2)); 1218 1219 r.dst.x = box->x1; 1220 r.dst.y = box->y1; 1221 r.width = box->x2 - box->x1; 1222 r.height = box->y2 - box->y1; 1223 r.mask = r.src = r.dst; 1224 op->prim_emit(sna, op, &r); 1225 box++; 1226 } while (--nbox_this_time); 1227 } while (nbox); 1228} 1229 1230static void 1231gen4_render_composite_boxes(struct sna *sna, 1232 const struct sna_composite_op *op, 1233 const BoxRec *box, int nbox) 1234{ 1235 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1236 1237 do { 1238 int nbox_this_time; 1239 float *v; 1240 1241 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1242 gen4_bind_surfaces); 1243 assert(nbox_this_time); 1244 nbox -= nbox_this_time; 1245 1246 v = sna->render.vertices + sna->render.vertex_used; 1247 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1248 1249 op->emit_boxes(op, box, nbox_this_time, v); 1250 box += nbox_this_time; 1251 } while (nbox); 1252} 1253 1254#if !FORCE_FLUSH 1255static void 1256gen4_render_composite_boxes__thread(struct sna *sna, 1257 const struct sna_composite_op *op, 1258 const BoxRec *box, int nbox) 1259{ 1260 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1261 1262 sna_vertex_lock(&sna->render); 1263 do { 1264 int nbox_this_time; 1265 float *v; 1266 1267 nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1268 gen4_bind_surfaces); 1269 assert(nbox_this_time); 1270 nbox -= nbox_this_time; 1271 1272 v = sna->render.vertices + sna->render.vertex_used; 1273 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1274 1275 sna_vertex_acquire__locked(&sna->render); 1276 sna_vertex_unlock(&sna->render); 1277 1278 op->emit_boxes(op, box, nbox_this_time, v); 1279 box += nbox_this_time; 1280 1281 sna_vertex_lock(&sna->render); 1282 sna_vertex_release__locked(&sna->render); 1283 } while (nbox); 1284 sna_vertex_unlock(&sna->render); 1285} 1286#endif 1287 1288#ifndef MAX 1289#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1290#endif 1291 1292static uint32_t gen4_bind_video_source(struct sna *sna, 1293 struct kgem_bo *src_bo, 1294 uint32_t src_offset, 1295 int src_width, 1296 int src_height, 1297 int src_pitch, 1298 uint32_t src_surf_format) 1299{ 1300 struct gen4_surface_state *ss; 1301 1302 sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1303 1304 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1305 ss->ss0.surface_type = GEN4_SURFACE_2D; 1306 ss->ss0.surface_format = src_surf_format; 1307 ss->ss0.color_blend = 1; 1308 1309 ss->ss1.base_addr = 1310 kgem_add_reloc(&sna->kgem, 1311 sna->kgem.surface + 1, 1312 src_bo, 1313 I915_GEM_DOMAIN_SAMPLER << 16, 1314 src_offset); 1315 1316 ss->ss2.width = src_width - 1; 1317 ss->ss2.height = src_height - 1; 1318 ss->ss3.pitch = src_pitch - 1; 1319 1320 return sna->kgem.surface * sizeof(uint32_t); 1321} 1322 1323static void gen4_video_bind_surfaces(struct sna *sna, 1324 const struct sna_composite_op *op) 1325{ 1326 struct sna_video_frame *frame = op->priv; 1327 uint32_t src_surf_format; 1328 uint32_t src_surf_base[6]; 1329 int src_width[6]; 1330 int src_height[6]; 1331 int src_pitch[6]; 1332 uint32_t *binding_table; 1333 uint16_t offset, dirty; 1334 int n_src, n; 1335 1336 src_surf_base[0] = 0; 1337 src_surf_base[1] = 0; 1338 src_surf_base[2] = frame->VBufOffset; 1339 src_surf_base[3] = frame->VBufOffset; 1340 src_surf_base[4] = frame->UBufOffset; 1341 src_surf_base[5] = frame->UBufOffset; 1342 1343 if (is_planar_fourcc(frame->id)) { 1344 src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM; 1345 src_width[1] = src_width[0] = frame->width; 1346 src_height[1] = src_height[0] = frame->height; 1347 src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1348 src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1349 frame->width / 2; 1350 src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1351 frame->height / 2; 1352 src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1353 frame->pitch[0]; 1354 n_src = 6; 1355 } else { 1356 if (frame->id == FOURCC_UYVY) 1357 src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY; 1358 else 1359 src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL; 1360 1361 src_width[0] = frame->width; 1362 src_height[0] = frame->height; 1363 src_pitch[0] = frame->pitch[0]; 1364 n_src = 1; 1365 } 1366 1367 gen4_get_batch(sna, op); 1368 dirty = kgem_bo_is_dirty(op->dst.bo); 1369 1370 binding_table = gen4_composite_get_binding_table(sna, &offset); 1371 binding_table[0] = 1372 gen4_bind_bo(sna, 1373 op->dst.bo, op->dst.width, op->dst.height, 1374 gen4_get_dest_format(op->dst.format), 1375 true); 1376 for (n = 0; n < n_src; n++) { 1377 binding_table[1+n] = 1378 gen4_bind_video_source(sna, 1379 frame->bo, 1380 src_surf_base[n], 1381 src_width[n], 1382 src_height[n], 1383 src_pitch[n], 1384 src_surf_format); 1385 } 1386 1387 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1388 dirty = 0; 1389 1390 gen4_emit_state(sna, op, offset | dirty); 1391} 1392 1393static bool 1394gen4_render_video(struct sna *sna, 1395 struct sna_video *video, 1396 struct sna_video_frame *frame, 1397 RegionPtr dstRegion, 1398 PixmapPtr pixmap) 1399{ 1400 struct sna_composite_op tmp; 1401 struct sna_pixmap *priv = sna_pixmap(pixmap); 1402 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1403 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1404 int src_width = frame->src.x2 - frame->src.x1; 1405 int src_height = frame->src.y2 - frame->src.y1; 1406 float src_offset_x, src_offset_y; 1407 float src_scale_x, src_scale_y; 1408 int nbox, pix_xoff, pix_yoff; 1409 const BoxRec *box; 1410 1411 DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, 1412 src_width, src_height, dst_width, dst_height)); 1413 1414 assert(priv->gpu_bo); 1415 memset(&tmp, 0, sizeof(tmp)); 1416 1417 tmp.op = PictOpSrc; 1418 tmp.dst.pixmap = pixmap; 1419 tmp.dst.width = pixmap->drawable.width; 1420 tmp.dst.height = pixmap->drawable.height; 1421 tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); 1422 tmp.dst.bo = priv->gpu_bo; 1423 1424 if (src_width == dst_width && src_height == dst_height) 1425 tmp.src.filter = SAMPLER_FILTER_NEAREST; 1426 else 1427 tmp.src.filter = SAMPLER_FILTER_BILINEAR; 1428 tmp.src.repeat = SAMPLER_EXTEND_PAD; 1429 tmp.src.bo = frame->bo; 1430 tmp.mask.bo = NULL; 1431 tmp.u.gen4.wm_kernel = 1432 is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; 1433 tmp.u.gen4.ve_id = 2; 1434 tmp.is_affine = true; 1435 tmp.floats_per_vertex = 3; 1436 tmp.floats_per_rect = 9; 1437 tmp.priv = frame; 1438 1439 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1440 kgem_submit(&sna->kgem); 1441 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1442 return false; 1443 } 1444 1445 gen4_align_vertex(sna, &tmp); 1446 gen4_video_bind_surfaces(sna, &tmp); 1447 1448 /* Set up the offset for translating from the given region (in screen 1449 * coordinates) to the backing pixmap. 1450 */ 1451#ifdef COMPOSITE 1452 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1453 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1454#else 1455 pix_xoff = 0; 1456 pix_yoff = 0; 1457#endif 1458 1459 src_scale_x = (float)src_width / dst_width / frame->width; 1460 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1461 1462 src_scale_y = (float)src_height / dst_height / frame->height; 1463 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1464 1465 box = region_rects(dstRegion); 1466 nbox = region_num_rects(dstRegion); 1467 do { 1468 int n; 1469 1470 n = gen4_get_rectangles(sna, &tmp, nbox, 1471 gen4_video_bind_surfaces); 1472 assert(n); 1473 nbox -= n; 1474 1475 do { 1476 BoxRec r; 1477 1478 r.x1 = box->x1 + pix_xoff; 1479 r.x2 = box->x2 + pix_xoff; 1480 r.y1 = box->y1 + pix_yoff; 1481 r.y2 = box->y2 + pix_yoff; 1482 1483 OUT_VERTEX(r.x2, r.y2); 1484 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1485 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1486 1487 OUT_VERTEX(r.x1, r.y2); 1488 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1489 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1490 1491 OUT_VERTEX(r.x1, r.y1); 1492 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1493 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1494 1495 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1496 sna_damage_add_box(&priv->gpu_damage, &r); 1497 sna_damage_subtract_box(&priv->cpu_damage, &r); 1498 } 1499 box++; 1500 } while (--n); 1501 } while (nbox); 1502 gen4_vertex_flush(sna); 1503 1504 return true; 1505} 1506 1507static int 1508gen4_composite_picture(struct sna *sna, 1509 PicturePtr picture, 1510 struct sna_composite_channel *channel, 1511 int x, int y, 1512 int w, int h, 1513 int dst_x, int dst_y, 1514 bool precise) 1515{ 1516 PixmapPtr pixmap; 1517 uint32_t color; 1518 int16_t dx, dy; 1519 1520 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1521 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1522 1523 channel->is_solid = false; 1524 channel->card_format = -1; 1525 1526 if (sna_picture_is_solid(picture, &color)) 1527 return gen4_channel_init_solid(sna, channel, color); 1528 1529 if (picture->pDrawable == NULL) { 1530 int ret; 1531 1532 if (picture->pSourcePict->type == SourcePictTypeLinear) 1533 return gen4_channel_init_linear(sna, picture, channel, 1534 x, y, 1535 w, h, 1536 dst_x, dst_y); 1537 1538 DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1539 ret = -1; 1540 if (!precise) 1541 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1542 x, y, w, h, dst_x, dst_y); 1543 if (ret == -1) 1544 ret = sna_render_picture_fixup(sna, picture, channel, 1545 x, y, w, h, dst_x, dst_y); 1546 return ret; 1547 } 1548 1549 if (picture->alphaMap) { 1550 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1551 return sna_render_picture_fixup(sna, picture, channel, 1552 x, y, w, h, dst_x, dst_y); 1553 } 1554 1555 if (!gen4_check_repeat(picture)) { 1556 DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__)); 1557 return sna_render_picture_fixup(sna, picture, channel, 1558 x, y, w, h, dst_x, dst_y); 1559 } 1560 1561 if (!gen4_check_filter(picture)) { 1562 DBG(("%s: unhandled filter fixup\n", __FUNCTION__)); 1563 return sna_render_picture_fixup(sna, picture, channel, 1564 x, y, w, h, dst_x, dst_y); 1565 } 1566 1567 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1568 channel->filter = picture->filter; 1569 1570 pixmap = get_drawable_pixmap(picture->pDrawable); 1571 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1572 1573 x += dx + picture->pDrawable->x; 1574 y += dy + picture->pDrawable->y; 1575 1576 channel->is_affine = sna_transform_is_affine(picture->transform); 1577 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1578 DBG(("%s: integer translation (%d, %d), removing\n", 1579 __FUNCTION__, dx, dy)); 1580 x += dx; 1581 y += dy; 1582 channel->transform = NULL; 1583 channel->filter = PictFilterNearest; 1584 1585 if (channel->repeat && 1586 (x >= 0 && 1587 y >= 0 && 1588 x + w < pixmap->drawable.width && 1589 y + h < pixmap->drawable.height)) { 1590 struct sna_pixmap *priv = sna_pixmap(pixmap); 1591 if (priv && priv->clear) { 1592 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1593 return gen4_channel_init_solid(sna, channel, priv->clear_color); 1594 } 1595 } 1596 } else 1597 channel->transform = picture->transform; 1598 1599 channel->pict_format = picture->format; 1600 channel->card_format = gen4_get_card_format(picture->format); 1601 if (channel->card_format == -1) 1602 return sna_render_picture_convert(sna, picture, channel, pixmap, 1603 x, y, w, h, dst_x, dst_y, 1604 false); 1605 1606 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1607 return sna_render_picture_extract(sna, picture, channel, 1608 x, y, w, h, dst_x, dst_y); 1609 1610 return sna_render_pixmap_bo(sna, channel, pixmap, 1611 x, y, w, h, dst_x, dst_y); 1612} 1613 1614static void gen4_composite_channel_convert(struct sna_composite_channel *channel) 1615{ 1616 DBG(("%s: repeat %d -> %d, filter %d -> %d\n", 1617 __FUNCTION__, 1618 channel->repeat, gen4_repeat(channel->repeat), 1619 channel->filter, gen4_repeat(channel->filter))); 1620 channel->repeat = gen4_repeat(channel->repeat); 1621 channel->filter = gen4_filter(channel->filter); 1622 if (channel->card_format == (unsigned)-1) 1623 channel->card_format = gen4_get_card_format(channel->pict_format); 1624} 1625 1626static void 1627gen4_render_composite_done(struct sna *sna, 1628 const struct sna_composite_op *op) 1629{ 1630 DBG(("%s()\n", __FUNCTION__)); 1631 1632 if (sna->render.vertex_offset) { 1633 gen4_vertex_flush(sna); 1634 gen4_magic_ca_pass(sna, op); 1635 } 1636 1637 if (op->mask.bo) 1638 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1639 if (op->src.bo) 1640 kgem_bo_destroy(&sna->kgem, op->src.bo); 1641 1642 sna_render_composite_redirect_done(sna, op); 1643} 1644 1645static bool 1646gen4_composite_set_target(struct sna *sna, 1647 struct sna_composite_op *op, 1648 PicturePtr dst, 1649 int x, int y, int w, int h, 1650 bool partial) 1651{ 1652 BoxRec box; 1653 unsigned hint; 1654 1655 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1656 op->dst.width = op->dst.pixmap->drawable.width; 1657 op->dst.height = op->dst.pixmap->drawable.height; 1658 op->dst.format = dst->format; 1659 if (w && h) { 1660 box.x1 = x; 1661 box.y1 = y; 1662 box.x2 = x + w; 1663 box.y2 = y + h; 1664 } else 1665 sna_render_picture_extents(dst, &box); 1666 1667 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1668 if (!partial) { 1669 hint |= IGNORE_DAMAGE; 1670 if (w == op->dst.width && h == op->dst.height) 1671 hint |= REPLACES; 1672 } 1673 1674 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1675 if (op->dst.bo == NULL) 1676 return false; 1677 1678 if (hint & REPLACES) { 1679 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1680 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1681 } 1682 1683 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1684 &op->dst.x, &op->dst.y); 1685 1686 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1687 __FUNCTION__, 1688 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1689 op->dst.width, op->dst.height, 1690 op->dst.bo->pitch, 1691 op->dst.x, op->dst.y, 1692 op->damage ? *op->damage : (void *)-1)); 1693 1694 assert(op->dst.bo->proxy == NULL); 1695 1696 if (too_large(op->dst.width, op->dst.height) && 1697 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1698 return false; 1699 1700 return true; 1701} 1702 1703static bool 1704check_gradient(PicturePtr picture, bool precise) 1705{ 1706 switch (picture->pSourcePict->type) { 1707 case SourcePictTypeSolidFill: 1708 case SourcePictTypeLinear: 1709 return false; 1710 default: 1711 return precise; 1712 } 1713} 1714 1715static bool 1716has_alphamap(PicturePtr p) 1717{ 1718 return p->alphaMap != NULL; 1719} 1720 1721static bool 1722need_upload(struct sna *sna, PicturePtr p) 1723{ 1724 return p->pDrawable && untransformed(p) && 1725 !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); 1726} 1727 1728static bool 1729source_is_busy(PixmapPtr pixmap) 1730{ 1731 struct sna_pixmap *priv = sna_pixmap(pixmap); 1732 if (priv == NULL) 1733 return false; 1734 1735 if (priv->clear) 1736 return false; 1737 1738 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1739 return true; 1740 1741 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1742 return true; 1743 1744 return priv->gpu_damage && !priv->cpu_damage; 1745} 1746 1747static bool 1748source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) 1749{ 1750 if (sna_picture_is_solid(p, NULL)) 1751 return false; 1752 1753 if (p->pSourcePict) 1754 return check_gradient(p, precise); 1755 1756 if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) 1757 return true; 1758 1759 /* soft errors: perfer to upload/compute rather than readback */ 1760 if (pixmap && source_is_busy(pixmap)) 1761 return false; 1762 1763 return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); 1764} 1765 1766static bool 1767gen4_composite_fallback(struct sna *sna, 1768 PicturePtr src, 1769 PicturePtr mask, 1770 PicturePtr dst) 1771{ 1772 PixmapPtr src_pixmap; 1773 PixmapPtr mask_pixmap; 1774 PixmapPtr dst_pixmap; 1775 bool src_fallback, mask_fallback; 1776 1777 if (!gen4_check_dst_format(dst->format)) { 1778 DBG(("%s: unknown destination format: %d\n", 1779 __FUNCTION__, dst->format)); 1780 return true; 1781 } 1782 1783 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1784 1785 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1786 src_fallback = source_fallback(sna, src, src_pixmap, 1787 dst->polyMode == PolyModePrecise); 1788 1789 if (mask) { 1790 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1791 mask_fallback = source_fallback(sna, mask, mask_pixmap, 1792 dst->polyMode == PolyModePrecise); 1793 } else { 1794 mask_pixmap = NULL; 1795 mask_fallback = false; 1796 } 1797 1798 /* If we are using the destination as a source and need to 1799 * readback in order to upload the source, do it all 1800 * on the cpu. 1801 */ 1802 if (src_pixmap == dst_pixmap && src_fallback) { 1803 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1804 return true; 1805 } 1806 if (mask_pixmap == dst_pixmap && mask_fallback) { 1807 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1808 return true; 1809 } 1810 1811 /* If anything is on the GPU, push everything out to the GPU */ 1812 if (dst_use_gpu(dst_pixmap)) { 1813 DBG(("%s: dst is already on the GPU, try to use GPU\n", 1814 __FUNCTION__)); 1815 return false; 1816 } 1817 1818 if (src_pixmap && !src_fallback) { 1819 DBG(("%s: src is already on the GPU, try to use GPU\n", 1820 __FUNCTION__)); 1821 return false; 1822 } 1823 if (mask_pixmap && !mask_fallback) { 1824 DBG(("%s: mask is already on the GPU, try to use GPU\n", 1825 __FUNCTION__)); 1826 return false; 1827 } 1828 1829 /* However if the dst is not on the GPU and we need to 1830 * render one of the sources using the CPU, we may 1831 * as well do the entire operation in place onthe CPU. 1832 */ 1833 if (src_fallback) { 1834 DBG(("%s: dst is on the CPU and src will fallback\n", 1835 __FUNCTION__)); 1836 return true; 1837 } 1838 1839 if (mask_fallback) { 1840 DBG(("%s: dst is on the CPU and mask will fallback\n", 1841 __FUNCTION__)); 1842 return true; 1843 } 1844 1845 if (too_large(dst_pixmap->drawable.width, 1846 dst_pixmap->drawable.height) && 1847 dst_is_cpu(dst_pixmap)) { 1848 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1849 return true; 1850 } 1851 1852 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1853 __FUNCTION__)); 1854 return dst_use_cpu(dst_pixmap); 1855} 1856 1857static int 1858reuse_source(struct sna *sna, 1859 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1860 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1861{ 1862 uint32_t color; 1863 1864 if (src_x != msk_x || src_y != msk_y) 1865 return false; 1866 1867 if (src == mask) { 1868 DBG(("%s: mask is source\n", __FUNCTION__)); 1869 *mc = *sc; 1870 mc->bo = kgem_bo_reference(mc->bo); 1871 return true; 1872 } 1873 1874 if (sna_picture_is_solid(mask, &color)) 1875 return gen4_channel_init_solid(sna, mc, color); 1876 1877 if (sc->is_solid) 1878 return false; 1879 1880 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1881 return false; 1882 1883 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1884 1885 if (!sna_transform_equal(src->transform, mask->transform)) 1886 return false; 1887 1888 if (!sna_picture_alphamap_equal(src, mask)) 1889 return false; 1890 1891 if (!gen4_check_repeat(mask)) 1892 return false; 1893 1894 if (!gen4_check_filter(mask)) 1895 return false; 1896 1897 if (!gen4_check_format(mask->format)) 1898 return false; 1899 1900 DBG(("%s: reusing source channel for mask with a twist\n", 1901 __FUNCTION__)); 1902 1903 *mc = *sc; 1904 mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone); 1905 mc->filter = gen4_filter(mask->filter); 1906 mc->pict_format = mask->format; 1907 mc->card_format = gen4_get_card_format(mask->format); 1908 mc->bo = kgem_bo_reference(mc->bo); 1909 return true; 1910} 1911 1912static bool 1913gen4_render_composite(struct sna *sna, 1914 uint8_t op, 1915 PicturePtr src, 1916 PicturePtr mask, 1917 PicturePtr dst, 1918 int16_t src_x, int16_t src_y, 1919 int16_t msk_x, int16_t msk_y, 1920 int16_t dst_x, int16_t dst_y, 1921 int16_t width, int16_t height, 1922 unsigned flags, 1923 struct sna_composite_op *tmp) 1924{ 1925 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 1926 width, height, sna->kgem.mode)); 1927 1928 if (op >= ARRAY_SIZE(gen4_blend_op)) 1929 return false; 1930 1931 if (mask == NULL && 1932 sna_blt_composite(sna, op, 1933 src, dst, 1934 src_x, src_y, 1935 dst_x, dst_y, 1936 width, height, 1937 flags, tmp)) 1938 return true; 1939 1940 if (gen4_composite_fallback(sna, src, mask, dst)) 1941 goto fallback; 1942 1943 if (need_tiling(sna, width, height)) 1944 return sna_tiling_composite(op, src, mask, dst, 1945 src_x, src_y, 1946 msk_x, msk_y, 1947 dst_x, dst_y, 1948 width, height, 1949 tmp); 1950 1951 if (!gen4_composite_set_target(sna, tmp, dst, 1952 dst_x, dst_y, width, height, 1953 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 1954 DBG(("%s: failed to set composite target\n", __FUNCTION__)); 1955 goto fallback; 1956 } 1957 1958 tmp->op = op; 1959 switch (gen4_composite_picture(sna, src, &tmp->src, 1960 src_x, src_y, 1961 width, height, 1962 dst_x, dst_y, 1963 dst->polyMode == PolyModePrecise)) { 1964 case -1: 1965 DBG(("%s: failed to prepare source\n", __FUNCTION__)); 1966 goto cleanup_dst; 1967 case 0: 1968 if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 1969 goto cleanup_dst; 1970 /* fall through to fixup */ 1971 case 1: 1972 if (mask == NULL && 1973 sna_blt_composite__convert(sna, 1974 dst_x, dst_y, width, height, 1975 tmp)) 1976 return true; 1977 1978 gen4_composite_channel_convert(&tmp->src); 1979 break; 1980 } 1981 1982 tmp->is_affine = tmp->src.is_affine; 1983 tmp->has_component_alpha = false; 1984 tmp->need_magic_ca_pass = false; 1985 1986 if (mask) { 1987 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1988 tmp->has_component_alpha = true; 1989 1990 /* Check if it's component alpha that relies on a source alpha and on 1991 * the source value. We can only get one of those into the single 1992 * source value that we get to blend with. 1993 */ 1994 if (gen4_blend_op[op].src_alpha && 1995 (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { 1996 if (op != PictOpOver) { 1997 DBG(("%s -- fallback: unhandled component alpha blend\n", 1998 __FUNCTION__)); 1999 2000 goto cleanup_src; 2001 } 2002 2003 tmp->need_magic_ca_pass = true; 2004 tmp->op = PictOpOutReverse; 2005 } 2006 } 2007 2008 if (!reuse_source(sna, 2009 src, &tmp->src, src_x, src_y, 2010 mask, &tmp->mask, msk_x, msk_y)) { 2011 switch (gen4_composite_picture(sna, mask, &tmp->mask, 2012 msk_x, msk_y, 2013 width, height, 2014 dst_x, dst_y, 2015 dst->polyMode == PolyModePrecise)) { 2016 case -1: 2017 DBG(("%s: failed to prepare mask\n", __FUNCTION__)); 2018 goto cleanup_src; 2019 case 0: 2020 if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2021 goto cleanup_src; 2022 /* fall through to fixup */ 2023 case 1: 2024 gen4_composite_channel_convert(&tmp->mask); 2025 break; 2026 } 2027 } 2028 2029 tmp->is_affine &= tmp->mask.is_affine; 2030 } 2031 2032 tmp->u.gen4.wm_kernel = 2033 gen4_choose_composite_kernel(tmp->op, 2034 tmp->mask.bo != NULL, 2035 tmp->has_component_alpha, 2036 tmp->is_affine); 2037 tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); 2038 2039 tmp->blt = gen4_render_composite_blt; 2040 tmp->box = gen4_render_composite_box; 2041 tmp->boxes = gen4_render_composite_boxes__blt; 2042 if (tmp->emit_boxes) { 2043 tmp->boxes = gen4_render_composite_boxes; 2044#if !FORCE_FLUSH 2045 tmp->thread_boxes = gen4_render_composite_boxes__thread; 2046#endif 2047 } 2048 tmp->done = gen4_render_composite_done; 2049 2050 if (!kgem_check_bo(&sna->kgem, 2051 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2052 NULL)) { 2053 kgem_submit(&sna->kgem); 2054 if (!kgem_check_bo(&sna->kgem, 2055 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2056 NULL)) 2057 goto cleanup_mask; 2058 } 2059 2060 gen4_align_vertex(sna, tmp); 2061 gen4_bind_surfaces(sna, tmp); 2062 return true; 2063 2064cleanup_mask: 2065 if (tmp->mask.bo) { 2066 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2067 tmp->mask.bo = NULL; 2068 } 2069cleanup_src: 2070 if (tmp->src.bo) { 2071 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2072 tmp->src.bo = NULL; 2073 } 2074cleanup_dst: 2075 if (tmp->redirect.real_bo) { 2076 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2077 tmp->redirect.real_bo = NULL; 2078 } 2079fallback: 2080 return (mask == NULL && 2081 sna_blt_composite(sna, op, 2082 src, dst, 2083 src_x, src_y, 2084 dst_x, dst_y, 2085 width, height, 2086 flags | COMPOSITE_FALLBACK, tmp)); 2087} 2088 2089#if !NO_COMPOSITE_SPANS 2090fastcall static void 2091gen4_render_composite_spans_box(struct sna *sna, 2092 const struct sna_composite_spans_op *op, 2093 const BoxRec *box, float opacity) 2094{ 2095 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2096 __FUNCTION__, 2097 op->base.src.offset[0], op->base.src.offset[1], 2098 opacity, 2099 op->base.dst.x, op->base.dst.y, 2100 box->x1, box->y1, 2101 box->x2 - box->x1, 2102 box->y2 - box->y1)); 2103 2104 gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces); 2105 op->prim_emit(sna, op, box, opacity); 2106} 2107 2108static void 2109gen4_render_composite_spans_boxes(struct sna *sna, 2110 const struct sna_composite_spans_op *op, 2111 const BoxRec *box, int nbox, 2112 float opacity) 2113{ 2114 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2115 __FUNCTION__, nbox, 2116 op->base.src.offset[0], op->base.src.offset[1], 2117 opacity, 2118 op->base.dst.x, op->base.dst.y)); 2119 2120 do { 2121 int nbox_this_time; 2122 2123 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2124 gen4_bind_surfaces); 2125 nbox -= nbox_this_time; 2126 2127 do { 2128 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2129 box->x1, box->y1, 2130 box->x2 - box->x1, 2131 box->y2 - box->y1)); 2132 2133 op->prim_emit(sna, op, box++, opacity); 2134 } while (--nbox_this_time); 2135 } while (nbox); 2136} 2137 2138fastcall static void 2139gen4_render_composite_spans_boxes__thread(struct sna *sna, 2140 const struct sna_composite_spans_op *op, 2141 const struct sna_opacity_box *box, 2142 int nbox) 2143{ 2144 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2145 __FUNCTION__, nbox, 2146 op->base.src.offset[0], op->base.src.offset[1], 2147 op->base.dst.x, op->base.dst.y)); 2148 assert(nbox); 2149 2150 sna_vertex_lock(&sna->render); 2151 do { 2152 int nbox_this_time; 2153 float *v; 2154 2155 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2156 gen4_bind_surfaces); 2157 assert(nbox_this_time); 2158 nbox -= nbox_this_time; 2159 2160 v = sna->render.vertices + sna->render.vertex_used; 2161 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2162 2163 sna_vertex_acquire__locked(&sna->render); 2164 sna_vertex_unlock(&sna->render); 2165 2166 op->emit_boxes(op, box, nbox_this_time, v); 2167 box += nbox_this_time; 2168 2169 sna_vertex_lock(&sna->render); 2170 sna_vertex_release__locked(&sna->render); 2171 } while (nbox); 2172 sna_vertex_unlock(&sna->render); 2173} 2174 2175fastcall static void 2176gen4_render_composite_spans_done(struct sna *sna, 2177 const struct sna_composite_spans_op *op) 2178{ 2179 if (sna->render.vertex_offset) 2180 gen4_vertex_flush(sna); 2181 2182 DBG(("%s()\n", __FUNCTION__)); 2183 2184 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2185 sna_render_composite_redirect_done(sna, &op->base); 2186} 2187 2188static bool 2189gen4_check_composite_spans(struct sna *sna, 2190 uint8_t op, PicturePtr src, PicturePtr dst, 2191 int16_t width, int16_t height, 2192 unsigned flags) 2193{ 2194 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2195 __FUNCTION__, op, width, height, flags)); 2196 2197 if (op >= ARRAY_SIZE(gen4_blend_op)) 2198 return false; 2199 2200 if (gen4_composite_fallback(sna, src, NULL, dst)) { 2201 DBG(("%s: operation would fallback\n", __FUNCTION__)); 2202 return false; 2203 } 2204 2205 if (need_tiling(sna, width, height) && 2206 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2207 DBG(("%s: fallback, tiled operation not on GPU\n", 2208 __FUNCTION__)); 2209 return false; 2210 } 2211 2212 if (FORCE_SPANS) 2213 return FORCE_SPANS > 0; 2214 2215 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2216 struct sna_pixmap *priv; 2217 2218 if (FORCE_NONRECTILINEAR_SPANS) 2219 return FORCE_NONRECTILINEAR_SPANS > 0; 2220 2221 if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0) 2222 return false; 2223 2224 priv = sna_pixmap_from_drawable(dst->pDrawable); 2225 assert(priv); 2226 2227 if (priv->cpu_bo && 2228 __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) 2229 return true; 2230 2231 if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2232 return false; 2233 2234 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2235 } 2236 2237 return true; 2238} 2239 2240static bool 2241gen4_render_composite_spans(struct sna *sna, 2242 uint8_t op, 2243 PicturePtr src, 2244 PicturePtr dst, 2245 int16_t src_x, int16_t src_y, 2246 int16_t dst_x, int16_t dst_y, 2247 int16_t width, int16_t height, 2248 unsigned flags, 2249 struct sna_composite_spans_op *tmp) 2250{ 2251 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2252 width, height, flags, sna->kgem.ring)); 2253 2254 assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags)); 2255 2256 if (need_tiling(sna, width, height)) { 2257 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2258 __FUNCTION__, width, height)); 2259 return sna_tiling_composite_spans(op, src, dst, 2260 src_x, src_y, dst_x, dst_y, 2261 width, height, flags, tmp); 2262 } 2263 2264 tmp->base.op = op; 2265 if (!gen4_composite_set_target(sna, &tmp->base, dst, 2266 dst_x, dst_y, width, height, true)) 2267 return false; 2268 2269 switch (gen4_composite_picture(sna, src, &tmp->base.src, 2270 src_x, src_y, 2271 width, height, 2272 dst_x, dst_y, 2273 dst->polyMode == PolyModePrecise)) { 2274 case -1: 2275 goto cleanup_dst; 2276 case 0: 2277 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2278 goto cleanup_dst; 2279 /* fall through to fixup */ 2280 case 1: 2281 gen4_composite_channel_convert(&tmp->base.src); 2282 break; 2283 } 2284 2285 tmp->base.mask.bo = NULL; 2286 tmp->base.mask.filter = SAMPLER_FILTER_NEAREST; 2287 tmp->base.mask.repeat = SAMPLER_EXTEND_NONE; 2288 2289 tmp->base.is_affine = tmp->base.src.is_affine; 2290 tmp->base.has_component_alpha = false; 2291 tmp->base.need_magic_ca_pass = false; 2292 2293 tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp); 2294 tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; 2295 2296 tmp->box = gen4_render_composite_spans_box; 2297 tmp->boxes = gen4_render_composite_spans_boxes; 2298 if (tmp->emit_boxes) 2299 tmp->thread_boxes = gen4_render_composite_spans_boxes__thread; 2300 tmp->done = gen4_render_composite_spans_done; 2301 2302 if (!kgem_check_bo(&sna->kgem, 2303 tmp->base.dst.bo, tmp->base.src.bo, 2304 NULL)) { 2305 kgem_submit(&sna->kgem); 2306 if (!kgem_check_bo(&sna->kgem, 2307 tmp->base.dst.bo, tmp->base.src.bo, 2308 NULL)) 2309 goto cleanup_src; 2310 } 2311 2312 gen4_align_vertex(sna, &tmp->base); 2313 gen4_bind_surfaces(sna, &tmp->base); 2314 return true; 2315 2316cleanup_src: 2317 if (tmp->base.src.bo) 2318 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2319cleanup_dst: 2320 if (tmp->base.redirect.real_bo) 2321 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2322 return false; 2323} 2324#endif 2325 2326static void 2327gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) 2328{ 2329 uint32_t *binding_table; 2330 uint16_t offset, dirty; 2331 2332 gen4_get_batch(sna, op); 2333 dirty = kgem_bo_is_dirty(op->dst.bo); 2334 2335 binding_table = gen4_composite_get_binding_table(sna, &offset); 2336 2337 binding_table[0] = 2338 gen4_bind_bo(sna, 2339 op->dst.bo, op->dst.width, op->dst.height, 2340 gen4_get_dest_format(op->dst.format), 2341 true); 2342 binding_table[1] = 2343 gen4_bind_bo(sna, 2344 op->src.bo, op->src.width, op->src.height, 2345 op->src.card_format, 2346 false); 2347 2348 if (sna->kgem.surface == offset && 2349 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { 2350 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 2351 offset = sna->render_state.gen4.surface_table; 2352 } 2353 2354 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 2355 dirty = 0; 2356 2357 gen4_emit_state(sna, op, offset | dirty); 2358} 2359 2360static void 2361gen4_render_copy_one(struct sna *sna, 2362 const struct sna_composite_op *op, 2363 int sx, int sy, 2364 int w, int h, 2365 int dx, int dy) 2366{ 2367 gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces); 2368 2369 OUT_VERTEX(dx+w, dy+h); 2370 OUT_VERTEX_F((sx+w)*op->src.scale[0]); 2371 OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2372 2373 OUT_VERTEX(dx, dy+h); 2374 OUT_VERTEX_F(sx*op->src.scale[0]); 2375 OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2376 2377 OUT_VERTEX(dx, dy); 2378 OUT_VERTEX_F(sx*op->src.scale[0]); 2379 OUT_VERTEX_F(sy*op->src.scale[1]); 2380} 2381 2382static bool 2383gen4_render_copy_boxes(struct sna *sna, uint8_t alu, 2384 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2385 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2386 const BoxRec *box, int n, unsigned flags) 2387{ 2388 struct sna_composite_op tmp; 2389 2390 DBG(("%s x %d\n", __FUNCTION__, n)); 2391 2392 if (sna_blt_compare_depth(src, dst) && 2393 sna_blt_copy_boxes(sna, alu, 2394 src_bo, src_dx, src_dy, 2395 dst_bo, dst_dx, dst_dy, 2396 dst->bitsPerPixel, 2397 box, n)) 2398 return true; 2399 2400 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { 2401fallback_blt: 2402 if (!sna_blt_compare_depth(src, dst)) 2403 return false; 2404 2405 return sna_blt_copy_boxes_fallback(sna, alu, 2406 src, src_bo, src_dx, src_dy, 2407 dst, dst_bo, dst_dx, dst_dy, 2408 box, n); 2409 } 2410 2411 memset(&tmp, 0, sizeof(tmp)); 2412 2413 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 2414 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 2415 2416 if (dst->depth == src->depth) { 2417 tmp.dst.format = sna_render_format_for_depth(dst->depth); 2418 tmp.src.pict_format = tmp.dst.format; 2419 } else { 2420 tmp.dst.format = sna_format_for_depth(dst->depth); 2421 tmp.src.pict_format = sna_format_for_depth(src->depth); 2422 } 2423 if (!gen4_check_format(tmp.src.pict_format)) 2424 goto fallback_blt; 2425 2426 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2427 2428 tmp.dst.pixmap = (PixmapPtr)dst; 2429 tmp.dst.width = dst->width; 2430 tmp.dst.height = dst->height; 2431 tmp.dst.x = tmp.dst.y = 0; 2432 tmp.dst.bo = dst_bo; 2433 tmp.damage = NULL; 2434 2435 sna_render_composite_redirect_init(&tmp); 2436 if (too_large(tmp.dst.width, tmp.dst.height)) { 2437 BoxRec extents = box[0]; 2438 int i; 2439 2440 for (i = 1; i < n; i++) { 2441 if (box[i].x1 < extents.x1) 2442 extents.x1 = box[i].x1; 2443 if (box[i].y1 < extents.y1) 2444 extents.y1 = box[i].y1; 2445 2446 if (box[i].x2 > extents.x2) 2447 extents.x2 = box[i].x2; 2448 if (box[i].y2 > extents.y2) 2449 extents.y2 = box[i].y2; 2450 } 2451 if (!sna_render_composite_redirect(sna, &tmp, 2452 extents.x1 + dst_dx, 2453 extents.y1 + dst_dy, 2454 extents.x2 - extents.x1, 2455 extents.y2 - extents.y1, 2456 n > 1)) 2457 goto fallback_tiled; 2458 } 2459 2460 tmp.src.filter = SAMPLER_FILTER_NEAREST; 2461 tmp.src.repeat = SAMPLER_EXTEND_NONE; 2462 tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format); 2463 if (too_large(src->width, src->height)) { 2464 BoxRec extents = box[0]; 2465 int i; 2466 2467 for (i = 1; i < n; i++) { 2468 if (box[i].x1 < extents.x1) 2469 extents.x1 = box[i].x1; 2470 if (box[i].y1 < extents.y1) 2471 extents.y1 = box[i].y1; 2472 2473 if (box[i].x2 > extents.x2) 2474 extents.x2 = box[i].x2; 2475 if (box[i].y2 > extents.y2) 2476 extents.y2 = box[i].y2; 2477 } 2478 2479 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2480 extents.x1 + src_dx, 2481 extents.y1 + src_dy, 2482 extents.x2 - extents.x1, 2483 extents.y2 - extents.y1)) 2484 goto fallback_tiled_dst; 2485 } else { 2486 tmp.src.bo = kgem_bo_reference(src_bo); 2487 tmp.src.width = src->width; 2488 tmp.src.height = src->height; 2489 tmp.src.offset[0] = tmp.src.offset[1] = 0; 2490 tmp.src.scale[0] = 1.f/src->width; 2491 tmp.src.scale[1] = 1.f/src->height; 2492 } 2493 2494 tmp.is_affine = true; 2495 tmp.floats_per_vertex = 3; 2496 tmp.floats_per_rect = 9; 2497 tmp.u.gen4.wm_kernel = WM_KERNEL; 2498 tmp.u.gen4.ve_id = 2; 2499 2500 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2501 kgem_submit(&sna->kgem); 2502 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2503 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2504 if (tmp.redirect.real_bo) 2505 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2506 2507 goto fallback_blt; 2508 } 2509 } 2510 2511 dst_dx += tmp.dst.x; 2512 dst_dy += tmp.dst.y; 2513 tmp.dst.x = tmp.dst.y = 0; 2514 2515 src_dx += tmp.src.offset[0]; 2516 src_dy += tmp.src.offset[1]; 2517 2518 gen4_align_vertex(sna, &tmp); 2519 gen4_copy_bind_surfaces(sna, &tmp); 2520 2521 do { 2522 gen4_render_copy_one(sna, &tmp, 2523 box->x1 + src_dx, box->y1 + src_dy, 2524 box->x2 - box->x1, box->y2 - box->y1, 2525 box->x1 + dst_dx, box->y1 + dst_dy); 2526 box++; 2527 } while (--n); 2528 2529 gen4_vertex_flush(sna); 2530 sna_render_composite_redirect_done(sna, &tmp); 2531 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2532 return true; 2533 2534fallback_tiled_dst: 2535 if (tmp.redirect.real_bo) 2536 kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2537fallback_tiled: 2538 if (sna_blt_compare_depth(src, dst) && 2539 sna_blt_copy_boxes(sna, alu, 2540 src_bo, src_dx, src_dy, 2541 dst_bo, dst_dx, dst_dy, 2542 dst->bitsPerPixel, 2543 box, n)) 2544 return true; 2545 2546 return sna_tiling_copy_boxes(sna, alu, 2547 src, src_bo, src_dx, src_dy, 2548 dst, dst_bo, dst_dx, dst_dy, 2549 box, n); 2550} 2551 2552static void 2553gen4_render_copy_blt(struct sna *sna, 2554 const struct sna_copy_op *op, 2555 int16_t sx, int16_t sy, 2556 int16_t w, int16_t h, 2557 int16_t dx, int16_t dy) 2558{ 2559 gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy); 2560} 2561 2562static void 2563gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2564{ 2565 if (sna->render.vertex_offset) 2566 gen4_vertex_flush(sna); 2567} 2568 2569static bool 2570gen4_render_copy(struct sna *sna, uint8_t alu, 2571 PixmapPtr src, struct kgem_bo *src_bo, 2572 PixmapPtr dst, struct kgem_bo *dst_bo, 2573 struct sna_copy_op *op) 2574{ 2575 DBG(("%s: src=%ld, dst=%ld, alu=%d\n", 2576 __FUNCTION__, 2577 src->drawable.serialNumber, 2578 dst->drawable.serialNumber, 2579 alu)); 2580 2581 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2582 sna_blt_copy(sna, alu, 2583 src_bo, dst_bo, 2584 dst->drawable.bitsPerPixel, 2585 op)) 2586 return true; 2587 2588 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2589 too_large(src->drawable.width, src->drawable.height) || 2590 too_large(dst->drawable.width, dst->drawable.height)) { 2591fallback: 2592 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2593 return false; 2594 2595 return sna_blt_copy(sna, alu, src_bo, dst_bo, 2596 dst->drawable.bitsPerPixel, 2597 op); 2598 } 2599 2600 if (dst->drawable.depth == src->drawable.depth) { 2601 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2602 op->base.src.pict_format = op->base.dst.format; 2603 } else { 2604 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2605 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 2606 } 2607 if (!gen4_check_format(op->base.src.pict_format)) 2608 goto fallback; 2609 2610 op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2611 2612 op->base.dst.pixmap = dst; 2613 op->base.dst.width = dst->drawable.width; 2614 op->base.dst.height = dst->drawable.height; 2615 op->base.dst.bo = dst_bo; 2616 2617 op->base.src.bo = src_bo; 2618 op->base.src.card_format = 2619 gen4_get_card_format(op->base.src.pict_format); 2620 op->base.src.width = src->drawable.width; 2621 op->base.src.height = src->drawable.height; 2622 op->base.src.scale[0] = 1.f/src->drawable.width; 2623 op->base.src.scale[1] = 1.f/src->drawable.height; 2624 op->base.src.filter = SAMPLER_FILTER_NEAREST; 2625 op->base.src.repeat = SAMPLER_EXTEND_NONE; 2626 2627 op->base.is_affine = true; 2628 op->base.floats_per_vertex = 3; 2629 op->base.floats_per_rect = 9; 2630 op->base.u.gen4.wm_kernel = WM_KERNEL; 2631 op->base.u.gen4.ve_id = 2; 2632 2633 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2634 kgem_submit(&sna->kgem); 2635 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 2636 goto fallback; 2637 } 2638 2639 if (kgem_bo_is_dirty(src_bo)) { 2640 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2641 sna_blt_copy(sna, alu, 2642 src_bo, dst_bo, 2643 dst->drawable.bitsPerPixel, 2644 op)) 2645 return true; 2646 } 2647 2648 gen4_align_vertex(sna, &op->base); 2649 gen4_copy_bind_surfaces(sna, &op->base); 2650 2651 op->blt = gen4_render_copy_blt; 2652 op->done = gen4_render_copy_done; 2653 return true; 2654} 2655 2656static void 2657gen4_render_fill_rectangle(struct sna *sna, 2658 const struct sna_composite_op *op, 2659 int x, int y, int w, int h) 2660{ 2661 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 2662 2663 OUT_VERTEX(x+w, y+h); 2664 OUT_VERTEX_F(.5); 2665 2666 OUT_VERTEX(x, y+h); 2667 OUT_VERTEX_F(.5); 2668 2669 OUT_VERTEX(x, y); 2670 OUT_VERTEX_F(.5); 2671} 2672 2673static bool 2674gen4_render_fill_boxes(struct sna *sna, 2675 CARD8 op, 2676 PictFormat format, 2677 const xRenderColor *color, 2678 const DrawableRec *dst, struct kgem_bo *dst_bo, 2679 const BoxRec *box, int n) 2680{ 2681 struct sna_composite_op tmp; 2682 uint32_t pixel; 2683 2684 if (op >= ARRAY_SIZE(gen4_blend_op)) { 2685 DBG(("%s: fallback due to unhandled blend op: %d\n", 2686 __FUNCTION__, op)); 2687 return false; 2688 } 2689 2690 if (op <= PictOpSrc) { 2691 uint8_t alu = GXinvalid; 2692 2693 pixel = 0; 2694 if (op == PictOpClear) 2695 alu = GXclear; 2696 else if (sna_get_pixel_from_rgba(&pixel, 2697 color->red, 2698 color->green, 2699 color->blue, 2700 color->alpha, 2701 format)) 2702 alu = GXcopy; 2703 2704 if (alu != GXinvalid && 2705 sna_blt_fill_boxes(sna, alu, 2706 dst_bo, dst->bitsPerPixel, 2707 pixel, box, n)) 2708 return true; 2709 2710 if (!gen4_check_dst_format(format)) 2711 return false; 2712 2713 if (too_large(dst->width, dst->height)) 2714 return sna_tiling_fill_boxes(sna, op, format, color, 2715 dst, dst_bo, box, n); 2716 } 2717 2718 if (op == PictOpClear) { 2719 pixel = 0; 2720 op = PictOpSrc; 2721 } else if (!sna_get_pixel_from_rgba(&pixel, 2722 color->red, 2723 color->green, 2724 color->blue, 2725 color->alpha, 2726 PICT_a8r8g8b8)) 2727 return false; 2728 2729 DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n)); 2730 2731 memset(&tmp, 0, sizeof(tmp)); 2732 2733 tmp.op = op; 2734 2735 tmp.dst.pixmap = (PixmapPtr)dst; 2736 tmp.dst.width = dst->width; 2737 tmp.dst.height = dst->height; 2738 tmp.dst.format = format; 2739 tmp.dst.bo = dst_bo; 2740 2741 gen4_channel_init_solid(sna, &tmp.src, pixel); 2742 2743 tmp.is_affine = true; 2744 tmp.floats_per_vertex = 2; 2745 tmp.floats_per_rect = 6; 2746 tmp.u.gen4.wm_kernel = WM_KERNEL; 2747 tmp.u.gen4.ve_id = 1; 2748 2749 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2750 kgem_submit(&sna->kgem); 2751 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 2752 return false; 2753 } 2754 2755 gen4_align_vertex(sna, &tmp); 2756 gen4_bind_surfaces(sna, &tmp); 2757 2758 do { 2759 gen4_render_fill_rectangle(sna, &tmp, 2760 box->x1, box->y1, 2761 box->x2 - box->x1, 2762 box->y2 - box->y1); 2763 box++; 2764 } while (--n); 2765 2766 gen4_vertex_flush(sna); 2767 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2768 return true; 2769} 2770 2771static void 2772gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op, 2773 int16_t x, int16_t y, int16_t w, int16_t h) 2774{ 2775 gen4_render_fill_rectangle(sna, &op->base, x, y, w, h); 2776} 2777 2778fastcall static void 2779gen4_render_fill_op_box(struct sna *sna, 2780 const struct sna_fill_op *op, 2781 const BoxRec *box) 2782{ 2783 gen4_render_fill_rectangle(sna, &op->base, 2784 box->x1, box->y1, 2785 box->x2-box->x1, box->y2-box->y1); 2786} 2787 2788fastcall static void 2789gen4_render_fill_op_boxes(struct sna *sna, 2790 const struct sna_fill_op *op, 2791 const BoxRec *box, 2792 int nbox) 2793{ 2794 do { 2795 gen4_render_fill_rectangle(sna, &op->base, 2796 box->x1, box->y1, 2797 box->x2-box->x1, box->y2-box->y1); 2798 box++; 2799 } while (--nbox); 2800} 2801 2802static void 2803gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2804{ 2805 if (sna->render.vertex_offset) 2806 gen4_vertex_flush(sna); 2807 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2808} 2809 2810static bool 2811gen4_render_fill(struct sna *sna, uint8_t alu, 2812 PixmapPtr dst, struct kgem_bo *dst_bo, 2813 uint32_t color, unsigned flags, 2814 struct sna_fill_op *op) 2815{ 2816 if (sna_blt_fill(sna, alu, 2817 dst_bo, dst->drawable.bitsPerPixel, 2818 color, 2819 op)) 2820 return true; 2821 2822 if (!(alu == GXcopy || alu == GXclear) || 2823 too_large(dst->drawable.width, dst->drawable.height)) 2824 return sna_blt_fill(sna, alu, 2825 dst_bo, dst->drawable.bitsPerPixel, 2826 color, 2827 op); 2828 2829 if (alu == GXclear) 2830 color = 0; 2831 2832 op->base.op = color == 0 ? PictOpClear : PictOpSrc; 2833 2834 op->base.dst.pixmap = dst; 2835 op->base.dst.width = dst->drawable.width; 2836 op->base.dst.height = dst->drawable.height; 2837 op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2838 op->base.dst.bo = dst_bo; 2839 op->base.dst.x = op->base.dst.y = 0; 2840 2841 op->base.need_magic_ca_pass = 0; 2842 op->base.has_component_alpha = 0; 2843 2844 gen4_channel_init_solid(sna, &op->base.src, 2845 sna_rgba_for_color(color, 2846 dst->drawable.depth)); 2847 op->base.mask.bo = NULL; 2848 2849 op->base.is_affine = true; 2850 op->base.floats_per_vertex = 2; 2851 op->base.floats_per_rect = 6; 2852 op->base.u.gen4.wm_kernel = WM_KERNEL; 2853 op->base.u.gen4.ve_id = 1; 2854 2855 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2856 kgem_submit(&sna->kgem); 2857 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2858 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2859 return false; 2860 } 2861 } 2862 2863 gen4_align_vertex(sna, &op->base); 2864 gen4_bind_surfaces(sna, &op->base); 2865 2866 op->blt = gen4_render_fill_op_blt; 2867 op->box = gen4_render_fill_op_box; 2868 op->boxes = gen4_render_fill_op_boxes; 2869 op->points = NULL; 2870 op->done = gen4_render_fill_op_done; 2871 return true; 2872} 2873 2874static bool 2875gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2876 uint32_t color, 2877 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 2878 uint8_t alu) 2879{ 2880 BoxRec box; 2881 2882 box.x1 = x1; 2883 box.y1 = y1; 2884 box.x2 = x2; 2885 box.y2 = y2; 2886 2887 return sna_blt_fill_boxes(sna, alu, 2888 bo, dst->drawable.bitsPerPixel, 2889 color, &box, 1); 2890} 2891 2892static bool 2893gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2894 uint32_t color, 2895 int16_t x1, int16_t y1, 2896 int16_t x2, int16_t y2, 2897 uint8_t alu) 2898{ 2899 struct sna_composite_op tmp; 2900 2901 DBG(("%s: color=%08x\n", __FUNCTION__, color)); 2902 2903 if (gen4_render_fill_one_try_blt(sna, dst, bo, color, 2904 x1, y1, x2, y2, alu)) 2905 return true; 2906 2907 /* Must use the BLT if we can't RENDER... */ 2908 if (!(alu == GXcopy || alu == GXclear) || 2909 too_large(dst->drawable.width, dst->drawable.height)) 2910 return false; 2911 2912 if (alu == GXclear) 2913 color = 0; 2914 2915 tmp.op = color == 0 ? PictOpClear : PictOpSrc; 2916 2917 tmp.dst.pixmap = dst; 2918 tmp.dst.width = dst->drawable.width; 2919 tmp.dst.height = dst->drawable.height; 2920 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 2921 tmp.dst.bo = bo; 2922 tmp.dst.x = tmp.dst.y = 0; 2923 2924 gen4_channel_init_solid(sna, &tmp.src, 2925 sna_rgba_for_color(color, 2926 dst->drawable.depth)); 2927 tmp.mask.bo = NULL; 2928 tmp.mask.filter = SAMPLER_FILTER_NEAREST; 2929 tmp.mask.repeat = SAMPLER_EXTEND_NONE; 2930 2931 tmp.is_affine = true; 2932 tmp.floats_per_vertex = 2; 2933 tmp.floats_per_rect = 6; 2934 tmp.has_component_alpha = false; 2935 tmp.need_magic_ca_pass = false; 2936 2937 tmp.u.gen4.wm_kernel = WM_KERNEL; 2938 tmp.u.gen4.ve_id = 1; 2939 2940 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 2941 kgem_submit(&sna->kgem); 2942 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 2943 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2944 return false; 2945 } 2946 } 2947 2948 gen4_align_vertex(sna, &tmp); 2949 gen4_bind_surfaces(sna, &tmp); 2950 2951 gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); 2952 2953 gen4_vertex_flush(sna); 2954 kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2955 2956 return true; 2957} 2958 2959static void gen4_render_reset(struct sna *sna) 2960{ 2961 sna->render_state.gen4.needs_invariant = true; 2962 sna->render_state.gen4.needs_urb = true; 2963 sna->render_state.gen4.ve_id = -1; 2964 sna->render_state.gen4.last_primitive = -1; 2965 sna->render_state.gen4.last_pipelined_pointers = -1; 2966 2967 sna->render_state.gen4.drawrect_offset = -1; 2968 sna->render_state.gen4.drawrect_limit = -1; 2969 sna->render_state.gen4.surface_table = 0; 2970 2971 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 2972 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 2973 discard_vbo(sna); 2974 } 2975 2976 sna->render.vertex_offset = 0; 2977 sna->render.nvertex_reloc = 0; 2978 sna->render.vb_id = 0; 2979} 2980 2981static void gen4_render_fini(struct sna *sna) 2982{ 2983 kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo); 2984} 2985 2986static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) 2987{ 2988 struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32); 2989 2990 /* Set up the vertex shader to be disabled (passthrough) */ 2991 vs->thread4.nr_urb_entries = URB_VS_ENTRIES; 2992 vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 2993 vs->vs6.vs_enable = 0; 2994 vs->vs6.vert_cache_disable = 1; 2995 2996 return sna_static_stream_offsetof(stream, vs); 2997} 2998 2999static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, 3000 uint32_t kernel) 3001{ 3002 struct gen4_sf_unit_state *sf; 3003 3004 sf = sna_static_stream_map(stream, sizeof(*sf), 32); 3005 3006 sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 3007 sf->thread0.kernel_start_pointer = kernel >> 6; 3008 sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 3009 sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 3010 sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 3011 /* don't smash vertex header, read start from dw8 */ 3012 sf->thread3.urb_entry_read_offset = 1; 3013 sf->thread3.dispatch_grf_start_reg = 3; 3014 sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1; 3015 sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 3016 sf->thread4.nr_urb_entries = URB_SF_ENTRIES; 3017 sf->sf5.viewport_transform = false; /* skip viewport */ 3018 sf->sf6.cull_mode = GEN4_CULLMODE_NONE; 3019 sf->sf6.scissor = 0; 3020 sf->sf7.trifan_pv = 2; 3021 sf->sf6.dest_org_vbias = 0x8; 3022 sf->sf6.dest_org_hbias = 0x8; 3023 3024 return sna_static_stream_offsetof(stream, sf); 3025} 3026 3027static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, 3028 sampler_filter_t src_filter, 3029 sampler_extend_t src_extend, 3030 sampler_filter_t mask_filter, 3031 sampler_extend_t mask_extend) 3032{ 3033 struct gen4_sampler_state *sampler_state; 3034 3035 sampler_state = sna_static_stream_map(stream, 3036 sizeof(struct gen4_sampler_state) * 2, 3037 32); 3038 sampler_state_init(&sampler_state[0], src_filter, src_extend); 3039 sampler_state_init(&sampler_state[1], mask_filter, mask_extend); 3040 3041 return sna_static_stream_offsetof(stream, sampler_state); 3042} 3043 3044static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, 3045 int gen, 3046 bool has_mask, 3047 uint32_t kernel, 3048 uint32_t sampler) 3049{ 3050 assert((kernel & 63) == 0); 3051 wm->thread0.kernel_start_pointer = kernel >> 6; 3052 wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 3053 3054 wm->thread1.single_program_flow = 0; 3055 3056 wm->thread3.const_urb_entry_read_length = 0; 3057 wm->thread3.const_urb_entry_read_offset = 0; 3058 3059 wm->thread3.urb_entry_read_offset = 0; 3060 wm->thread3.dispatch_grf_start_reg = 3; 3061 3062 assert((sampler & 31) == 0); 3063 wm->wm4.sampler_state_pointer = sampler >> 5; 3064 wm->wm4.sampler_count = 1; 3065 3066 wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1; 3067 wm->wm5.transposed_urb_read = 0; 3068 wm->wm5.thread_dispatch_enable = 1; 3069 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 3070 * start point 3071 */ 3072 wm->wm5.enable_16_pix = 1; 3073 wm->wm5.enable_8_pix = 0; 3074 wm->wm5.early_depth_test = 1; 3075 3076 /* Each pair of attributes (src/mask coords) is two URB entries */ 3077 if (has_mask) { 3078 wm->thread1.binding_table_entry_count = 3; 3079 wm->thread3.urb_entry_read_length = 4; 3080 } else { 3081 wm->thread1.binding_table_entry_count = 2; 3082 wm->thread3.urb_entry_read_length = 2; 3083 } 3084} 3085 3086static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) 3087{ 3088 uint8_t *ptr, *base; 3089 int i, j; 3090 3091 base = ptr = 3092 sna_static_stream_map(stream, 3093 GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64, 3094 64); 3095 3096 for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) { 3097 for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) { 3098 struct gen4_cc_unit_state *state = 3099 (struct gen4_cc_unit_state *)ptr; 3100 3101 state->cc3.blend_enable = 3102 !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE); 3103 3104 state->cc5.logicop_func = 0xc; /* COPY */ 3105 state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD; 3106 3107 /* Fill in alpha blend factors same as color, for the future. */ 3108 state->cc5.ia_src_blend_factor = i; 3109 state->cc5.ia_dest_blend_factor = j; 3110 3111 state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD; 3112 state->cc6.clamp_post_alpha_blend = 1; 3113 state->cc6.clamp_pre_alpha_blend = 1; 3114 state->cc6.src_blend_factor = i; 3115 state->cc6.dest_blend_factor = j; 3116 3117 ptr += 64; 3118 } 3119 } 3120 3121 return sna_static_stream_offsetof(stream, base); 3122} 3123 3124static bool gen4_render_setup(struct sna *sna) 3125{ 3126 struct gen4_render_state *state = &sna->render_state.gen4; 3127 struct sna_static_stream general; 3128 struct gen4_wm_unit_state_padded *wm_state; 3129 uint32_t sf, wm[KERNEL_COUNT]; 3130 int i, j, k, l, m; 3131 3132 sna_static_stream_init(&general); 3133 3134 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3135 * dumps, you know it points to zero. 3136 */ 3137 null_create(&general); 3138 3139 sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask); 3140 for (m = 0; m < KERNEL_COUNT; m++) { 3141 if (wm_kernels[m].size) { 3142 wm[m] = sna_static_stream_add(&general, 3143 wm_kernels[m].data, 3144 wm_kernels[m].size, 3145 64); 3146 } else { 3147 wm[m] = sna_static_stream_compile_wm(sna, &general, 3148 wm_kernels[m].data, 3149 16); 3150 } 3151 } 3152 3153 state->vs = gen4_create_vs_unit_state(&general); 3154 state->sf = gen4_create_sf_state(&general, sf); 3155 3156 wm_state = sna_static_stream_map(&general, 3157 sizeof(*wm_state) * KERNEL_COUNT * 3158 FILTER_COUNT * EXTEND_COUNT * 3159 FILTER_COUNT * EXTEND_COUNT, 3160 64); 3161 state->wm = sna_static_stream_offsetof(&general, wm_state); 3162 for (i = 0; i < FILTER_COUNT; i++) { 3163 for (j = 0; j < EXTEND_COUNT; j++) { 3164 for (k = 0; k < FILTER_COUNT; k++) { 3165 for (l = 0; l < EXTEND_COUNT; l++) { 3166 uint32_t sampler_state; 3167 3168 sampler_state = 3169 gen4_create_sampler_state(&general, 3170 i, j, 3171 k, l); 3172 3173 for (m = 0; m < KERNEL_COUNT; m++) { 3174 gen4_init_wm_state(&wm_state->state, 3175 sna->kgem.gen, 3176 wm_kernels[m].has_mask, 3177 wm[m], sampler_state); 3178 wm_state++; 3179 } 3180 } 3181 } 3182 } 3183 } 3184 3185 state->cc = gen4_create_cc_unit_state(&general); 3186 3187 state->general_bo = sna_static_stream_fini(sna, &general); 3188 return state->general_bo != NULL; 3189} 3190 3191const char *gen4_render_init(struct sna *sna, const char *backend) 3192{ 3193 if (!gen4_render_setup(sna)) 3194 return backend; 3195 3196 sna->kgem.retire = gen4_render_retire; 3197 sna->kgem.expire = gen4_render_expire; 3198 3199#if !NO_COMPOSITE 3200 sna->render.composite = gen4_render_composite; 3201 sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3202#endif 3203#if !NO_COMPOSITE_SPANS 3204 sna->render.check_composite_spans = gen4_check_composite_spans; 3205 sna->render.composite_spans = gen4_render_composite_spans; 3206 if (0) 3207 sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3208#endif 3209 3210#if !NO_VIDEO 3211 sna->render.video = gen4_render_video; 3212#endif 3213 3214#if !NO_COPY_BOXES 3215 sna->render.copy_boxes = gen4_render_copy_boxes; 3216#endif 3217#if !NO_COPY 3218 sna->render.copy = gen4_render_copy; 3219#endif 3220 3221#if !NO_FILL_BOXES 3222 sna->render.fill_boxes = gen4_render_fill_boxes; 3223#endif 3224#if !NO_FILL 3225 sna->render.fill = gen4_render_fill; 3226#endif 3227#if !NO_FILL_ONE 3228 sna->render.fill_one = gen4_render_fill_one; 3229#endif 3230 3231 sna->render.flush = gen4_render_flush; 3232 sna->render.reset = gen4_render_reset; 3233 sna->render.fini = gen4_render_fini; 3234 3235 sna->render.max_3d_size = GEN4_MAX_3D_SIZE; 3236 sna->render.max_3d_pitch = 1 << 18; 3237 return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)"; 3238} 3239