1/* 2 * Copyright © 2006,2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Wang Zhenyu <zhenyu.z.wang@intel.com> 25 * Eric Anholt <eric@anholt.net> 26 * Chris Wilson <chris@chris-wilson.co.uk> 27 * 28 */ 29 30#ifdef HAVE_CONFIG_H 31#include "config.h" 32#endif 33 34#include "sna.h" 35#include "sna_reg.h" 36#include "sna_render.h" 37#include "sna_render_inline.h" 38 39#include "gen2_render.h" 40 41#define NO_COMPOSITE 0 42#define NO_COMPOSITE_SPANS 0 43#define NO_COPY 0 44#define NO_COPY_BOXES 0 45#define NO_FILL 0 46#define NO_FILL_ONE 0 47#define NO_FILL_BOXES 0 48 49#define MAX_3D_SIZE 2048 50#define MAX_3D_PITCH 8192 51 52#define BATCH(v) batch_emit(sna, v) 53#define BATCH_F(v) batch_emit_float(sna, v) 54#define VERTEX(v) batch_emit_float(sna, v) 55 56static const struct blendinfo { 57 bool dst_alpha; 58 bool src_alpha; 59 uint32_t src_blend; 60 uint32_t dst_blend; 61} gen2_blend_op[] = { 62 /* Clear */ 63 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 64 /* Src */ 65 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 66 /* Dst */ 67 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 68 /* Over */ 69 {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 70 /* OverReverse */ 71 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 72 /* In */ 73 {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 74 /* InReverse */ 75 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 76 /* Out */ 77 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 78 /* OutReverse */ 79 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 80 /* Atop */ 81 {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 82 /* AtopReverse */ 83 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 84 /* Xor */ 85 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 86 /* Add */ 87 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 88}; 89 90static const struct formatinfo { 91 unsigned int fmt; 92 uint32_t card_fmt; 93} i8xx_tex_formats[] = { 94 {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, 95 {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, 96 {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, 97 {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, 98 {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, 99 {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, 100}, i85x_tex_formats[] = { 101 {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, 102 {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, 103}; 104 105static inline bool 106too_large(int width, int height) 107{ 108 return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 109} 110 111static inline uint32_t 112gen2_buf_tiling(uint32_t tiling) 113{ 114 uint32_t v = 0; 115 switch (tiling) { 116 default: assert(0); 117 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 118 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 119 case I915_TILING_NONE: break; 120 } 121 return v; 122} 123 124static uint32_t 125gen2_get_dst_format(uint32_t format) 126{ 127#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) 128 switch (format) { 129 default: 130 assert(0); 131 case PICT_a8r8g8b8: 132 case PICT_x8r8g8b8: 133 return COLR_BUF_ARGB8888 | BIAS; 134 case PICT_r5g6b5: 135 return COLR_BUF_RGB565 | BIAS; 136 case PICT_a1r5g5b5: 137 case PICT_x1r5g5b5: 138 return COLR_BUF_ARGB1555 | BIAS; 139 case PICT_a8: 140 return COLR_BUF_8BIT | BIAS; 141 case PICT_a4r4g4b4: 142 case PICT_x4r4g4b4: 143 return COLR_BUF_ARGB4444 | BIAS; 144 } 145#undef BIAS 146} 147 148static bool 149gen2_check_dst_format(uint32_t format) 150{ 151 switch (format) { 152 case PICT_a8r8g8b8: 153 case PICT_x8r8g8b8: 154 case PICT_r5g6b5: 155 case PICT_a1r5g5b5: 156 case PICT_x1r5g5b5: 157 case PICT_a8: 158 case PICT_a4r4g4b4: 159 case PICT_x4r4g4b4: 160 return true; 161 default: 162 return false; 163 } 164} 165 166static uint32_t 167gen2_get_card_format(struct sna *sna, uint32_t format) 168{ 169 unsigned int i; 170 171 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 172 if (i8xx_tex_formats[i].fmt == format) 173 return i8xx_tex_formats[i].card_fmt; 174 175 if (sna->kgem.gen < 021) { 176 /* Whilst these are not directly supported on 830/845, 177 * we only enable them when we can implicitly convert 178 * them to a supported variant through the texture 179 * combiners. 180 */ 181 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 182 if (i85x_tex_formats[i].fmt == format) 183 return i8xx_tex_formats[1+i].card_fmt; 184 } else { 185 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 186 if (i85x_tex_formats[i].fmt == format) 187 return i85x_tex_formats[i].card_fmt; 188 } 189 190 assert(0); 191 return 0; 192} 193 194static uint32_t 195gen2_check_format(struct sna *sna, PicturePtr p) 196{ 197 unsigned int i; 198 199 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 200 if (i8xx_tex_formats[i].fmt == p->format) 201 return true; 202 203 if (sna->kgem.gen > 021) { 204 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 205 if (i85x_tex_formats[i].fmt == p->format) 206 return true; 207 } 208 209 return false; 210} 211 212static uint32_t 213gen2_sampler_tiling_bits(uint32_t tiling) 214{ 215 uint32_t bits = 0; 216 switch (tiling) { 217 default: 218 assert(0); 219 case I915_TILING_Y: 220 bits |= TM0S1_TILE_WALK; 221 case I915_TILING_X: 222 bits |= TM0S1_TILED_SURFACE; 223 case I915_TILING_NONE: 224 break; 225 } 226 return bits; 227} 228 229static bool 230gen2_check_filter(PicturePtr picture) 231{ 232 switch (picture->filter) { 233 case PictFilterNearest: 234 case PictFilterBilinear: 235 return true; 236 default: 237 return false; 238 } 239} 240 241static bool 242gen2_check_repeat(PicturePtr picture) 243{ 244 if (!picture->repeat) 245 return true; 246 247 switch (picture->repeatType) { 248 case RepeatNone: 249 case RepeatNormal: 250 case RepeatPad: 251 case RepeatReflect: 252 return true; 253 default: 254 return false; 255 } 256} 257 258static void 259gen2_emit_texture(struct sna *sna, 260 const struct sna_composite_channel *channel, 261 int unit) 262{ 263 uint32_t wrap_mode_u, wrap_mode_v; 264 uint32_t texcoordtype; 265 uint32_t filter; 266 267 assert(channel->bo); 268 269 if (channel->is_affine) 270 texcoordtype = TEXCOORDTYPE_CARTESIAN; 271 else 272 texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; 273 274 switch (channel->repeat) { 275 default: 276 assert(0); 277 case RepeatNone: 278 wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER; 279 break; 280 case RepeatNormal: 281 wrap_mode_u = TEXCOORDMODE_WRAP; 282 break; 283 case RepeatPad: 284 wrap_mode_u = TEXCOORDMODE_CLAMP; 285 break; 286 case RepeatReflect: 287 wrap_mode_u = TEXCOORDMODE_MIRROR; 288 break; 289 } 290 if (channel->is_linear) 291 wrap_mode_v = TEXCOORDMODE_WRAP; 292 else 293 wrap_mode_v = wrap_mode_u; 294 295 switch (channel->filter) { 296 default: 297 assert(0); 298 case PictFilterNearest: 299 filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | 300 FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | 301 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 302 break; 303 case PictFilterBilinear: 304 filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | 305 FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | 306 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 307 break; 308 } 309 310 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4); 311 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 312 channel->bo, 313 I915_GEM_DOMAIN_SAMPLER << 16, 314 0)); 315 BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | 316 ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | 317 gen2_get_card_format(sna, channel->pict_format) | 318 gen2_sampler_tiling_bits(channel->bo->tiling)); 319 BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); 320 BATCH(filter); 321 BATCH(0); /* default color */ 322 323 BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | 324 ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype | 325 ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) | 326 ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u)); 327} 328 329static void 330gen2_get_blend_factors(const struct sna_composite_op *op, 331 int blend, 332 uint32_t *c_out, 333 uint32_t *a_out) 334{ 335 uint32_t cblend, ablend; 336 337 /* If component alpha is active in the mask and the blend operation 338 * uses the source alpha, then we know we don't need the source 339 * value (otherwise we would have hit a fallback earlier), so we 340 * provide the source alpha (src.A * mask.X) as output color. 341 * Conversely, if CA is set and we don't need the source alpha, then 342 * we produce the source value (src.X * mask.X) and the source alpha 343 * is unused.. Otherwise, we provide the non-CA source value 344 * (src.X * mask.A). 345 * 346 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 347 * pictures, but we need to implement it for 830/845 and there's no 348 * harm done in leaving it in. 349 */ 350 cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT; 351 ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT; 352 353 /* Get the source picture's channels into TBx_ARG1 */ 354 if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) || 355 op->dst.format == PICT_a8) { 356 /* Producing source alpha value, so the first set of channels 357 * is src.A instead of src.X. We also do this if the destination 358 * is a8, in which case src.G is what's written, and the other 359 * channels are ignored. 360 */ 361 if (op->src.is_opaque) { 362 ablend |= TB0C_ARG1_SEL_ONE; 363 cblend |= TB0C_ARG1_SEL_ONE; 364 } else if (op->src.is_solid) { 365 ablend |= TB0C_ARG1_SEL_DIFFUSE; 366 cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA; 367 } else { 368 ablend |= TB0C_ARG1_SEL_TEXEL0; 369 cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; 370 } 371 } else { 372 if (op->src.is_solid) 373 cblend |= TB0C_ARG1_SEL_DIFFUSE; 374 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 375 cblend |= TB0C_ARG1_SEL_TEXEL0; 376 else 377 cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 378 379 if (op->src.is_opaque) 380 ablend |= TB0A_ARG1_SEL_ONE; 381 else if (op->src.is_solid) 382 ablend |= TB0A_ARG1_SEL_DIFFUSE; 383 else 384 ablend |= TB0A_ARG1_SEL_TEXEL0; 385 } 386 387 if (op->mask.bo) { 388 if (op->src.is_solid) { 389 cblend |= TB0C_ARG2_SEL_TEXEL0; 390 ablend |= TB0A_ARG2_SEL_TEXEL0; 391 } else { 392 cblend |= TB0C_ARG2_SEL_TEXEL1; 393 ablend |= TB0A_ARG2_SEL_TEXEL1; 394 } 395 396 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 397 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 398 399 cblend |= TB0C_OP_MODULATE; 400 ablend |= TB0A_OP_MODULATE; 401 } else if (op->mask.is_solid) { 402 cblend |= TB0C_ARG2_SEL_DIFFUSE; 403 ablend |= TB0A_ARG2_SEL_DIFFUSE; 404 405 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 406 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 407 408 cblend |= TB0C_OP_MODULATE; 409 ablend |= TB0A_OP_MODULATE; 410 } else { 411 cblend |= TB0C_OP_ARG1; 412 ablend |= TB0A_OP_ARG1; 413 } 414 415 *c_out = cblend; 416 *a_out = ablend; 417} 418 419static uint32_t gen2_get_blend_cntl(int op, 420 bool has_component_alpha, 421 uint32_t dst_format) 422{ 423 uint32_t sblend, dblend; 424 425 if (op <= PictOpSrc) 426 return S8_ENABLE_COLOR_BUFFER_WRITE; 427 428 sblend = gen2_blend_op[op].src_blend; 429 dblend = gen2_blend_op[op].dst_blend; 430 431 if (gen2_blend_op[op].dst_alpha) { 432 /* If there's no dst alpha channel, adjust the blend op so that 433 * we'll treat it as always 1. 434 */ 435 if (PICT_FORMAT_A(dst_format) == 0) { 436 if (sblend == BLENDFACTOR_DST_ALPHA) 437 sblend = BLENDFACTOR_ONE; 438 else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 439 sblend = BLENDFACTOR_ZERO; 440 } 441 442 /* gen2 engine reads 8bit color buffer into green channel 443 * in cases like color buffer blending etc., and also writes 444 * back green channel. So with dst_alpha blend we should use 445 * color factor. 446 */ 447 if (dst_format == PICT_a8) { 448 if (sblend == BLENDFACTOR_DST_ALPHA) 449 sblend = BLENDFACTOR_DST_COLR; 450 else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 451 sblend = BLENDFACTOR_INV_DST_COLR; 452 } 453 } 454 455 /* If the source alpha is being used, then we should only be in a case 456 * where the source blend factor is 0, and the source blend value is 457 * the mask channels multiplied by the source picture's alpha. 458 */ 459 if (has_component_alpha && gen2_blend_op[op].src_alpha) { 460 if (dblend == BLENDFACTOR_SRC_ALPHA) 461 dblend = BLENDFACTOR_SRC_COLR; 462 else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) 463 dblend = BLENDFACTOR_INV_SRC_COLR; 464 } 465 466 return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | 467 dblend << S8_DST_BLEND_FACTOR_SHIFT | 468 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 469 S8_ENABLE_COLOR_BUFFER_WRITE); 470} 471 472static void gen2_emit_invariant(struct sna *sna) 473{ 474 int i; 475 476 for (i = 0; i < 4; i++) { 477 BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i)); 478 BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) | 479 DISABLE_TEX_STREAM_BUMP | 480 ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) | 481 ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i)); 482 BATCH(_3DSTATE_MAP_COORD_TRANSFORM); 483 BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i)); 484 } 485 486 BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); 487 BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | 488 TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | 489 TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | 490 TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); 491 492 BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 493 494 BATCH(_3DSTATE_VERTEX_TRANSFORM); 495 BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); 496 497 BATCH(_3DSTATE_W_STATE_CMD); 498 BATCH(MAGIC_W_STATE_DWORD1); 499 BATCH_F(1.0); 500 501 BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | 502 DISABLE_INDPT_ALPHA_BLEND | 503 ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); 504 505 BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); 506 BATCH(0); 507 508 BATCH(_3DSTATE_MODES_1_CMD | 509 ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD | 510 ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) | 511 ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); 512 513 BATCH(_3DSTATE_ENABLES_1_CMD | 514 DISABLE_LOGIC_OP | 515 DISABLE_STENCIL_TEST | 516 DISABLE_DEPTH_BIAS | 517 DISABLE_SPEC_ADD | 518 DISABLE_FOG | 519 DISABLE_ALPHA_TEST | 520 DISABLE_DEPTH_TEST | 521 ENABLE_COLOR_BLEND); 522 523 BATCH(_3DSTATE_ENABLES_2_CMD | 524 DISABLE_STENCIL_WRITE | 525 DISABLE_DITHER | 526 DISABLE_DEPTH_WRITE | 527 ENABLE_COLOR_MASK | 528 ENABLE_COLOR_WRITE | 529 ENABLE_TEX_CACHE); 530 531 BATCH(_3DSTATE_STIPPLE); 532 BATCH(0); 533 534 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 535 TEXPIPE_COLOR | 536 ENABLE_TEXOUTPUT_WRT_SEL | 537 TEXOP_OUTPUT_CURRENT | 538 DISABLE_TEX_CNTRL_STAGE | 539 TEXOP_SCALE_1X | 540 TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | 541 TEXBLENDOP_ARG1); 542 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 543 TEXPIPE_ALPHA | 544 ENABLE_TEXOUTPUT_WRT_SEL | 545 TEXOP_OUTPUT_CURRENT | 546 TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | 547 TEXBLENDOP_ARG1); 548 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 549 TEXPIPE_COLOR | 550 TEXBLEND_ARG1 | 551 TEXBLENDARG_MODIFY_PARMS | 552 TEXBLENDARG_DIFFUSE); 553 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 554 TEXPIPE_ALPHA | 555 TEXBLEND_ARG1 | 556 TEXBLENDARG_MODIFY_PARMS | 557 TEXBLENDARG_DIFFUSE); 558 559#define INVARIANT_SIZE 35 560 561 sna->render_state.gen2.need_invariant = false; 562} 563 564static void 565gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) 566{ 567 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 568 569 if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) { 570 DBG(("%s: flushing batch: size %d > %d\n", 571 __FUNCTION__, INVARIANT_SIZE+40, 572 sna->kgem.surface-sna->kgem.nbatch)); 573 kgem_submit(&sna->kgem); 574 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 575 } 576 577 if (!kgem_check_reloc(&sna->kgem, 3)) { 578 DBG(("%s: flushing batch: reloc %d >= %d\n", 579 __FUNCTION__, 580 sna->kgem.nreloc + 3, 581 (int)KGEM_RELOC_SIZE(&sna->kgem))); 582 kgem_submit(&sna->kgem); 583 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 584 } 585 586 if (!kgem_check_exec(&sna->kgem, 3)) { 587 DBG(("%s: flushing batch: exec %d >= %d\n", 588 __FUNCTION__, 589 sna->kgem.nexec + 1, 590 (int)KGEM_EXEC_SIZE(&sna->kgem))); 591 kgem_submit(&sna->kgem); 592 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 593 } 594 595 if (sna->render_state.gen2.need_invariant) 596 gen2_emit_invariant(sna); 597} 598 599static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) 600{ 601 assert(!too_large(op->dst.width, op->dst.height)); 602 assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); 603 assert(sna->render.vertex_offset == 0); 604 605 assert(op->dst.bo->unique_id); 606 if (sna->render_state.gen2.target == op->dst.bo->unique_id) { 607 kgem_bo_mark_dirty(op->dst.bo); 608 return; 609 } 610 611 BATCH(_3DSTATE_BUF_INFO_CMD); 612 BATCH(BUF_3D_ID_COLOR_BACK | 613 gen2_buf_tiling(op->dst.bo->tiling) | 614 BUF_3D_PITCH(op->dst.bo->pitch)); 615 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 616 op->dst.bo, 617 I915_GEM_DOMAIN_RENDER << 16 | 618 I915_GEM_DOMAIN_RENDER, 619 0)); 620 621 BATCH(_3DSTATE_DST_BUF_VARS_CMD); 622 BATCH(gen2_get_dst_format(op->dst.format)); 623 624 BATCH(_3DSTATE_DRAW_RECT_CMD); 625 BATCH(0); 626 BATCH(0); /* ymin, xmin */ 627 BATCH(DRAW_YMAX(op->dst.height - 1) | 628 DRAW_XMAX(op->dst.width - 1)); 629 BATCH(0); /* yorig, xorig */ 630 631 sna->render_state.gen2.target = op->dst.bo->unique_id; 632} 633 634static void gen2_disable_logic_op(struct sna *sna) 635{ 636 if (!sna->render_state.gen2.logic_op_enabled) 637 return; 638 639 DBG(("%s\n", __FUNCTION__)); 640 641 BATCH(_3DSTATE_ENABLES_1_CMD | 642 DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND); 643 644 sna->render_state.gen2.logic_op_enabled = 0; 645} 646 647static void gen2_enable_logic_op(struct sna *sna, int op) 648{ 649 static const uint8_t logic_op[] = { 650 LOGICOP_CLEAR, /* GXclear */ 651 LOGICOP_AND, /* GXand */ 652 LOGICOP_AND_RVRSE, /* GXandReverse */ 653 LOGICOP_COPY, /* GXcopy */ 654 LOGICOP_AND_INV, /* GXandInverted */ 655 LOGICOP_NOOP, /* GXnoop */ 656 LOGICOP_XOR, /* GXxor */ 657 LOGICOP_OR, /* GXor */ 658 LOGICOP_NOR, /* GXnor */ 659 LOGICOP_EQUIV, /* GXequiv */ 660 LOGICOP_INV, /* GXinvert */ 661 LOGICOP_OR_RVRSE, /* GXorReverse */ 662 LOGICOP_COPY_INV, /* GXcopyInverted */ 663 LOGICOP_OR_INV, /* GXorInverted */ 664 LOGICOP_NAND, /* GXnand */ 665 LOGICOP_SET /* GXset */ 666 }; 667 668 if (sna->render_state.gen2.logic_op_enabled != op+1) { 669 if (!sna->render_state.gen2.logic_op_enabled) { 670 if (op == GXclear || op == GXcopy) 671 return; 672 673 DBG(("%s\n", __FUNCTION__)); 674 675 BATCH(_3DSTATE_ENABLES_1_CMD | 676 ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND); 677 } 678 679 BATCH(_3DSTATE_MODES_4_CMD | 680 ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op])); 681 sna->render_state.gen2.logic_op_enabled = op+1; 682 } 683} 684 685static void gen2_emit_composite_state(struct sna *sna, 686 const struct sna_composite_op *op) 687{ 688 uint32_t texcoordfmt, v, unwind; 689 uint32_t cblend, ablend; 690 int tex; 691 692 gen2_get_batch(sna, op); 693 694 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 695 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 696 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 697 else 698 BATCH(_3DSTATE_MODES_5_CMD | 699 PIPELINE_FLUSH_RENDER_CACHE | 700 PIPELINE_FLUSH_TEXTURE_CACHE); 701 kgem_clear_dirty(&sna->kgem); 702 } 703 704 gen2_emit_target(sna, op); 705 706 unwind = sna->kgem.nbatch; 707 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 708 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 709 BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12); 710 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 711 BATCH(gen2_get_blend_cntl(op->op, 712 op->has_component_alpha, 713 op->dst.format)); 714 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 715 sna->kgem.batch + unwind + 1, 716 3 * sizeof(uint32_t)) == 0) 717 sna->kgem.nbatch = unwind; 718 else 719 sna->render_state.gen2.ls1 = unwind; 720 721 gen2_disable_logic_op(sna); 722 723 gen2_get_blend_factors(op, op->op, &cblend, &ablend); 724 unwind = sna->kgem.nbatch; 725 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 726 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 727 BATCH(cblend); 728 BATCH(ablend); 729 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 730 sna->kgem.batch + unwind + 1, 731 2 * sizeof(uint32_t)) == 0) 732 sna->kgem.nbatch = unwind; 733 else 734 sna->render_state.gen2.ls2 = unwind; 735 736 tex = texcoordfmt = 0; 737 if (!op->src.is_solid) { 738 if (op->src.is_affine) 739 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 740 else 741 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 742 gen2_emit_texture(sna, &op->src, tex++); 743 } else { 744 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 745 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 746 BATCH(op->src.u.gen2.pixel); 747 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 748 } 749 } 750 if (op->mask.bo) { 751 if (op->mask.is_affine) 752 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 753 else 754 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 755 gen2_emit_texture(sna, &op->mask, tex++); 756 } else if (op->mask.is_solid) { 757 if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) { 758 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 759 BATCH(op->mask.u.gen2.pixel); 760 sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel; 761 } 762 } 763 764 v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt; 765 if (sna->render_state.gen2.vft != v) { 766 BATCH(v); 767 sna->render_state.gen2.vft = v; 768 } 769} 770 771static inline void 772gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) 773{ 774 VERTEX(dstX); 775 VERTEX(dstY); 776} 777 778inline static void 779gen2_emit_composite_linear(struct sna *sna, 780 const struct sna_composite_channel *channel, 781 int16_t x, int16_t y) 782{ 783 float v; 784 785 v = (x * channel->u.linear.dx + 786 y * channel->u.linear.dy + 787 channel->u.linear.offset); 788 DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v)); 789 VERTEX(v); 790 VERTEX(v); 791} 792 793static void 794gen2_emit_composite_texcoord(struct sna *sna, 795 const struct sna_composite_channel *channel, 796 int16_t x, int16_t y) 797{ 798 float s = 0, t = 0, w = 1; 799 800 x += channel->offset[0]; 801 y += channel->offset[1]; 802 803 if (channel->is_affine) { 804 sna_get_transformed_coordinates(x, y, 805 channel->transform, 806 &s, &t); 807 VERTEX(s * channel->scale[0]); 808 VERTEX(t * channel->scale[1]); 809 } else { 810 sna_get_transformed_coordinates_3d(x, y, 811 channel->transform, 812 &s, &t, &w); 813 VERTEX(s * channel->scale[0]); 814 VERTEX(t * channel->scale[1]); 815 VERTEX(w); 816 } 817} 818 819static void 820gen2_emit_composite_vertex(struct sna *sna, 821 const struct sna_composite_op *op, 822 int16_t srcX, int16_t srcY, 823 int16_t mskX, int16_t mskY, 824 int16_t dstX, int16_t dstY) 825{ 826 gen2_emit_composite_dstcoord(sna, dstX, dstY); 827 if (op->src.is_linear) 828 gen2_emit_composite_linear(sna, &op->src, srcX, srcY); 829 else if (!op->src.is_solid) 830 gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); 831 832 if (op->mask.is_linear) 833 gen2_emit_composite_linear(sna, &op->mask, mskX, mskY); 834 else if (op->mask.bo) 835 gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); 836} 837 838fastcall static void 839gen2_emit_composite_primitive(struct sna *sna, 840 const struct sna_composite_op *op, 841 const struct sna_composite_rectangles *r) 842{ 843 gen2_emit_composite_vertex(sna, op, 844 r->src.x + r->width, 845 r->src.y + r->height, 846 r->mask.x + r->width, 847 r->mask.y + r->height, 848 op->dst.x + r->dst.x + r->width, 849 op->dst.y + r->dst.y + r->height); 850 gen2_emit_composite_vertex(sna, op, 851 r->src.x, 852 r->src.y + r->height, 853 r->mask.x, 854 r->mask.y + r->height, 855 op->dst.x + r->dst.x, 856 op->dst.y + r->dst.y + r->height); 857 gen2_emit_composite_vertex(sna, op, 858 r->src.x, 859 r->src.y, 860 r->mask.x, 861 r->mask.y, 862 op->dst.x + r->dst.x, 863 op->dst.y + r->dst.y); 864} 865 866fastcall static void 867gen2_emit_composite_primitive_constant(struct sna *sna, 868 const struct sna_composite_op *op, 869 const struct sna_composite_rectangles *r) 870{ 871 int16_t dst_x = r->dst.x + op->dst.x; 872 int16_t dst_y = r->dst.y + op->dst.y; 873 874 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 875 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 876 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 877} 878 879fastcall static void 880gen2_emit_composite_primitive_linear(struct sna *sna, 881 const struct sna_composite_op *op, 882 const struct sna_composite_rectangles *r) 883{ 884 int16_t dst_x = r->dst.x + op->dst.x; 885 int16_t dst_y = r->dst.y + op->dst.y; 886 887 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 888 gen2_emit_composite_linear(sna, &op->src, 889 r->src.x + r->width, r->src.y + r->height); 890 891 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 892 gen2_emit_composite_linear(sna, &op->src, 893 r->src.x, r->src.y + r->height); 894 895 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 896 gen2_emit_composite_linear(sna, &op->src, 897 r->src.x, r->src.y); 898} 899 900fastcall static void 901gen2_emit_composite_primitive_identity(struct sna *sna, 902 const struct sna_composite_op *op, 903 const struct sna_composite_rectangles *r) 904{ 905 float w = r->width; 906 float h = r->height; 907 float *v; 908 909 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 910 sna->kgem.nbatch += 12; 911 912 v[8] = v[4] = r->dst.x + op->dst.x; 913 v[0] = v[4] + w; 914 915 v[9] = r->dst.y + op->dst.y; 916 v[5] = v[1] = v[9] + h; 917 918 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 919 v[2] = v[6] + w * op->src.scale[0]; 920 921 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 922 v[7] = v[3] = v[11] + h * op->src.scale[1]; 923} 924 925fastcall static void 926gen2_emit_composite_primitive_affine(struct sna *sna, 927 const struct sna_composite_op *op, 928 const struct sna_composite_rectangles *r) 929{ 930 PictTransform *transform = op->src.transform; 931 int src_x = r->src.x + (int)op->src.offset[0]; 932 int src_y = r->src.y + (int)op->src.offset[1]; 933 float *v; 934 935 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 936 sna->kgem.nbatch += 12; 937 938 v[8] = v[4] = r->dst.x + op->dst.x; 939 v[0] = v[4] + r->width; 940 941 v[9] = r->dst.y + op->dst.y; 942 v[5] = v[1] = v[9] + r->height; 943 944 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 945 transform, op->src.scale, 946 &v[2], &v[3]); 947 948 _sna_get_transformed_scaled(src_x, src_y + r->height, 949 transform, op->src.scale, 950 &v[6], &v[7]); 951 952 _sna_get_transformed_scaled(src_x, src_y, 953 transform, op->src.scale, 954 &v[10], &v[11]); 955} 956 957fastcall static void 958gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna, 959 const struct sna_composite_op *op, 960 const struct sna_composite_rectangles *r) 961{ 962 float w = r->width; 963 float h = r->height; 964 float *v; 965 966 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 967 sna->kgem.nbatch += 12; 968 969 v[8] = v[4] = r->dst.x + op->dst.x; 970 v[0] = v[4] + w; 971 972 v[9] = r->dst.y + op->dst.y; 973 v[5] = v[1] = v[9] + h; 974 975 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 976 v[2] = v[6] + w * op->mask.scale[0]; 977 978 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 979 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 980} 981 982#if defined(sse2) && !defined(__x86_64__) 983sse2 fastcall static void 984gen2_emit_composite_primitive_constant__sse2(struct sna *sna, 985 const struct sna_composite_op *op, 986 const struct sna_composite_rectangles *r) 987{ 988 int16_t dst_x = r->dst.x + op->dst.x; 989 int16_t dst_y = r->dst.y + op->dst.y; 990 991 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 992 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 993 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 994} 995 996sse2 fastcall static void 997gen2_emit_composite_primitive_linear__sse2(struct sna *sna, 998 const struct sna_composite_op *op, 999 const struct sna_composite_rectangles *r) 1000{ 1001 int16_t dst_x = r->dst.x + op->dst.x; 1002 int16_t dst_y = r->dst.y + op->dst.y; 1003 1004 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 1005 gen2_emit_composite_linear(sna, &op->src, 1006 r->src.x + r->width, r->src.y + r->height); 1007 1008 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 1009 gen2_emit_composite_linear(sna, &op->src, 1010 r->src.x, r->src.y + r->height); 1011 1012 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 1013 gen2_emit_composite_linear(sna, &op->src, 1014 r->src.x, r->src.y); 1015} 1016 1017sse2 fastcall static void 1018gen2_emit_composite_primitive_identity__sse2(struct sna *sna, 1019 const struct sna_composite_op *op, 1020 const struct sna_composite_rectangles *r) 1021{ 1022 float w = r->width; 1023 float h = r->height; 1024 float *v; 1025 1026 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1027 sna->kgem.nbatch += 12; 1028 1029 v[8] = v[4] = r->dst.x + op->dst.x; 1030 v[0] = v[4] + w; 1031 1032 v[9] = r->dst.y + op->dst.y; 1033 v[5] = v[1] = v[9] + h; 1034 1035 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1036 v[2] = v[6] + w * op->src.scale[0]; 1037 1038 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1039 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1040} 1041 1042sse2 fastcall static void 1043gen2_emit_composite_primitive_affine__sse2(struct sna *sna, 1044 const struct sna_composite_op *op, 1045 const struct sna_composite_rectangles *r) 1046{ 1047 PictTransform *transform = op->src.transform; 1048 int src_x = r->src.x + (int)op->src.offset[0]; 1049 int src_y = r->src.y + (int)op->src.offset[1]; 1050 float *v; 1051 1052 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1053 sna->kgem.nbatch += 12; 1054 1055 v[8] = v[4] = r->dst.x + op->dst.x; 1056 v[0] = v[4] + r->width; 1057 1058 v[9] = r->dst.y + op->dst.y; 1059 v[5] = v[1] = v[9] + r->height; 1060 1061 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1062 transform, op->src.scale, 1063 &v[2], &v[3]); 1064 1065 _sna_get_transformed_scaled(src_x, src_y + r->height, 1066 transform, op->src.scale, 1067 &v[6], &v[7]); 1068 1069 _sna_get_transformed_scaled(src_x, src_y, 1070 transform, op->src.scale, 1071 &v[10], &v[11]); 1072} 1073 1074sse2 fastcall static void 1075gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1076 const struct sna_composite_op *op, 1077 const struct sna_composite_rectangles *r) 1078{ 1079 float w = r->width; 1080 float h = r->height; 1081 float *v; 1082 1083 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1084 sna->kgem.nbatch += 12; 1085 1086 v[8] = v[4] = r->dst.x + op->dst.x; 1087 v[0] = v[4] + w; 1088 1089 v[9] = r->dst.y + op->dst.y; 1090 v[5] = v[1] = v[9] + h; 1091 1092 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1093 v[2] = v[6] + w * op->mask.scale[0]; 1094 1095 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1096 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1097} 1098#endif 1099 1100static void gen2_magic_ca_pass(struct sna *sna, 1101 const struct sna_composite_op *op) 1102{ 1103 uint32_t ablend, cblend, *src, *dst; 1104 int n; 1105 1106 if (!op->need_magic_ca_pass) 1107 return; 1108 1109 DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__, 1110 sna->kgem.nbatch, sna->render.vertex_offset)); 1111 1112 assert(op->mask.bo); 1113 assert(op->has_component_alpha); 1114 1115 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0); 1116 BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT | 1117 BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT | 1118 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 1119 S8_ENABLE_COLOR_BUFFER_WRITE); 1120 sna->render_state.gen2.ls1 = 0; 1121 1122 gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend); 1123 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 1124 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 1125 BATCH(cblend); 1126 BATCH(ablend); 1127 sna->render_state.gen2.ls2 = 0; 1128 1129 src = sna->kgem.batch + sna->render.vertex_offset; 1130 dst = sna->kgem.batch + sna->kgem.nbatch; 1131 n = 1 + sna->render.vertex_index; 1132 sna->kgem.nbatch += n; 1133 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 1134 while (n--) 1135 *dst++ = *src++; 1136} 1137 1138static void gen2_vertex_flush(struct sna *sna, 1139 const struct sna_composite_op *op) 1140{ 1141 if (sna->render.vertex_index == 0) 1142 return; 1143 1144 sna->kgem.batch[sna->render.vertex_offset] |= 1145 sna->render.vertex_index - 1; 1146 1147 gen2_magic_ca_pass(sna, op); 1148 1149 sna->render.vertex_offset = 0; 1150 sna->render.vertex_index = 0; 1151} 1152 1153inline static int gen2_get_rectangles(struct sna *sna, 1154 const struct sna_composite_op *op, 1155 int want) 1156{ 1157 int rem = batch_space(sna), size, need; 1158 1159 DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n", 1160 __FUNCTION__, want, op->floats_per_vertex, rem)); 1161 1162 assert(op->floats_per_vertex); 1163 assert(op->floats_per_rect == 3 * op->floats_per_vertex); 1164 1165 need = 1; 1166 size = op->floats_per_rect; 1167 if (op->need_magic_ca_pass) 1168 need += 6 + size*sna->render.vertex_index, size *= 2; 1169 1170 DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n", 1171 __FUNCTION__, want, need, size, rem)); 1172 if (rem < need + size) { 1173 gen2_vertex_flush(sna, op); 1174 kgem_submit(&sna->kgem); 1175 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1176 return 0; 1177 } 1178 1179 rem -= need; 1180 if (sna->render.vertex_offset == 0) { 1181 if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) == 1182 (PRIM3D_INLINE | PRIM3D_RECTLIST)) { 1183 uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1]; 1184 assert(*b & 0xffff); 1185 sna->render.vertex_index = 1 + (*b & 0xffff); 1186 *b = PRIM3D_INLINE | PRIM3D_RECTLIST; 1187 sna->render.vertex_offset = sna->kgem.nbatch - 1; 1188 assert(!op->need_magic_ca_pass); 1189 } else { 1190 sna->render.vertex_offset = sna->kgem.nbatch; 1191 BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); 1192 } 1193 } 1194 1195 if (want > 1 && want * size > rem) 1196 want = rem / size; 1197 1198 assert(want); 1199 sna->render.vertex_index += want*op->floats_per_rect; 1200 return want; 1201} 1202 1203fastcall static void 1204gen2_render_composite_blt(struct sna *sna, 1205 const struct sna_composite_op *op, 1206 const struct sna_composite_rectangles *r) 1207{ 1208 if (!gen2_get_rectangles(sna, op, 1)) { 1209 gen2_emit_composite_state(sna, op); 1210 gen2_get_rectangles(sna, op, 1); 1211 } 1212 1213 op->prim_emit(sna, op, r); 1214} 1215 1216fastcall static void 1217gen2_render_composite_box(struct sna *sna, 1218 const struct sna_composite_op *op, 1219 const BoxRec *box) 1220{ 1221 struct sna_composite_rectangles r; 1222 1223 if (!gen2_get_rectangles(sna, op, 1)) { 1224 gen2_emit_composite_state(sna, op); 1225 gen2_get_rectangles(sna, op, 1); 1226 } 1227 1228 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1229 box->x1, box->y1, 1230 box->x2 - box->x1, 1231 box->y2 - box->y1)); 1232 1233 r.dst.x = box->x1; r.dst.y = box->y1; 1234 r.width = box->x2 - box->x1; 1235 r.height = box->y2 - box->y1; 1236 r.src = r.mask = r.dst; 1237 1238 op->prim_emit(sna, op, &r); 1239} 1240 1241static void 1242gen2_render_composite_boxes(struct sna *sna, 1243 const struct sna_composite_op *op, 1244 const BoxRec *box, int nbox) 1245{ 1246 do { 1247 int nbox_this_time; 1248 1249 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1250 if (nbox_this_time == 0) { 1251 gen2_emit_composite_state(sna, op); 1252 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1253 } 1254 nbox -= nbox_this_time; 1255 1256 do { 1257 struct sna_composite_rectangles r; 1258 1259 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1260 box->x1, box->y1, 1261 box->x2 - box->x1, 1262 box->y2 - box->y1)); 1263 1264 r.dst.x = box->x1; r.dst.y = box->y1; 1265 r.width = box->x2 - box->x1; 1266 r.height = box->y2 - box->y1; 1267 r.src = r.mask = r.dst; 1268 1269 op->prim_emit(sna, op, &r); 1270 box++; 1271 } while (--nbox_this_time); 1272 } while (nbox); 1273} 1274 1275static void gen2_render_composite_done(struct sna *sna, 1276 const struct sna_composite_op *op) 1277{ 1278 gen2_vertex_flush(sna, op); 1279 1280 if (op->mask.bo) 1281 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1282 if (op->src.bo) 1283 kgem_bo_destroy(&sna->kgem, op->src.bo); 1284 sna_render_composite_redirect_done(sna, op); 1285} 1286 1287static bool 1288gen2_composite_solid_init(struct sna *sna, 1289 struct sna_composite_channel *channel, 1290 uint32_t color) 1291{ 1292 channel->filter = PictFilterNearest; 1293 channel->repeat = RepeatNormal; 1294 channel->is_solid = true; 1295 channel->is_affine = true; 1296 channel->width = 1; 1297 channel->height = 1; 1298 channel->pict_format = PICT_a8r8g8b8; 1299 1300 channel->bo = NULL; 1301 channel->u.gen2.pixel = color; 1302 1303 channel->scale[0] = channel->scale[1] = 1; 1304 channel->offset[0] = channel->offset[1] = 0; 1305 return true; 1306} 1307 1308#define xFixedToDouble(f) pixman_fixed_to_double(f) 1309 1310static bool 1311gen2_composite_linear_init(struct sna *sna, 1312 PicturePtr picture, 1313 struct sna_composite_channel *channel, 1314 int x, int y, 1315 int w, int h, 1316 int dst_x, int dst_y) 1317{ 1318 PictLinearGradient *linear = 1319 (PictLinearGradient *)picture->pSourcePict; 1320 pixman_fixed_t tx, ty; 1321 float x0, y0, sf; 1322 float dx, dy; 1323 1324 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 1325 __FUNCTION__, 1326 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 1327 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 1328 1329 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 1330 return 0; 1331 1332 if (!sna_transform_is_affine(picture->transform)) { 1333 DBG(("%s: fallback due to projective transform\n", 1334 __FUNCTION__)); 1335 return sna_render_picture_fixup(sna, picture, channel, 1336 x, y, w, h, dst_x, dst_y); 1337 } 1338 1339 channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); 1340 if (!channel->bo) 1341 return 0; 1342 1343 channel->filter = PictFilterNearest; 1344 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1345 channel->is_linear = true; 1346 channel->width = channel->bo->pitch / 4; 1347 channel->height = 1; 1348 channel->pict_format = PICT_a8r8g8b8; 1349 1350 channel->scale[0] = channel->scale[1] = 1; 1351 channel->offset[0] = channel->offset[1] = 0; 1352 1353 if (sna_transform_is_translation(picture->transform, &tx, &ty)) { 1354 dx = xFixedToDouble(linear->p2.x - linear->p1.x); 1355 dy = xFixedToDouble(linear->p2.y - linear->p1.y); 1356 1357 x0 = xFixedToDouble(linear->p1.x); 1358 y0 = xFixedToDouble(linear->p1.y); 1359 1360 if (tx | ty) { 1361 x0 -= pixman_fixed_to_double(tx); 1362 y0 -= pixman_fixed_to_double(ty); 1363 } 1364 } else { 1365 struct pixman_f_vector p1, p2; 1366 struct pixman_f_transform m, inv; 1367 1368 pixman_f_transform_from_pixman_transform(&m, picture->transform); 1369 DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", 1370 __FUNCTION__, 1371 m.m[0][0], m.m[0][1], m.m[0][2], 1372 m.m[1][0], m.m[1][1], m.m[1][2], 1373 m.m[2][0], m.m[2][1], m.m[2][2])); 1374 if (!pixman_f_transform_invert(&inv, &m)) 1375 return 0; 1376 1377 p1.v[0] = pixman_fixed_to_double(linear->p1.x); 1378 p1.v[1] = pixman_fixed_to_double(linear->p1.y); 1379 p1.v[2] = 1.; 1380 pixman_f_transform_point(&inv, &p1); 1381 1382 p2.v[0] = pixman_fixed_to_double(linear->p2.x); 1383 p2.v[1] = pixman_fixed_to_double(linear->p2.y); 1384 p2.v[2] = 1.; 1385 pixman_f_transform_point(&inv, &p2); 1386 1387 DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", 1388 __FUNCTION__, 1389 p1.v[0], p1.v[1], p1.v[2], 1390 p2.v[0], p2.v[1], p2.v[2])); 1391 1392 dx = p2.v[0] - p1.v[0]; 1393 dy = p2.v[1] - p1.v[1]; 1394 1395 x0 = p1.v[0]; 1396 y0 = p1.v[1]; 1397 } 1398 1399 sf = dx*dx + dy*dy; 1400 dx /= sf; 1401 dy /= sf; 1402 1403 channel->u.linear.dx = dx; 1404 channel->u.linear.dy = dy; 1405 channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); 1406 1407 DBG(("%s: dx=%f, dy=%f, offset=%f\n", 1408 __FUNCTION__, dx, dy, channel->u.linear.offset)); 1409 1410 return channel->bo != NULL; 1411} 1412 1413static bool source_is_covered(PicturePtr picture, 1414 int x, int y, 1415 int width, int height) 1416{ 1417 int x1, y1, x2, y2; 1418 1419 if (picture->repeat && picture->repeatType != RepeatNone) 1420 return true; 1421 1422 if (picture->pDrawable == NULL) 1423 return false; 1424 1425 if (picture->transform) { 1426 pixman_box16_t sample; 1427 1428 sample.x1 = x; 1429 sample.y1 = y; 1430 sample.x2 = x + width; 1431 sample.y2 = y + height; 1432 1433 pixman_transform_bounds(picture->transform, &sample); 1434 1435 x1 = sample.x1; 1436 x2 = sample.x2; 1437 y1 = sample.y1; 1438 y2 = sample.y2; 1439 } else { 1440 x1 = x; 1441 y1 = y; 1442 x2 = x + width; 1443 y2 = y + height; 1444 } 1445 1446 return 1447 x1 >= 0 && y1 >= 0 && 1448 x2 <= picture->pDrawable->width && 1449 y2 <= picture->pDrawable->height; 1450} 1451 1452static bool 1453gen2_check_card_format(struct sna *sna, 1454 PicturePtr picture, 1455 struct sna_composite_channel *channel, 1456 int x, int y, int w, int h, 1457 bool *fixup_alpha) 1458{ 1459 uint32_t format = picture->format; 1460 unsigned int i; 1461 1462 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { 1463 if (i8xx_tex_formats[i].fmt == format) 1464 return true; 1465 } 1466 1467 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { 1468 if (i85x_tex_formats[i].fmt == format) { 1469 if (sna->kgem.gen >= 021) 1470 return true; 1471 1472 if (source_is_covered(picture, x, y, w,h)) { 1473 channel->is_opaque = true; 1474 return true; 1475 } 1476 1477 *fixup_alpha = true; 1478 return false; 1479 } 1480 } 1481 1482 *fixup_alpha = false; 1483 return false; 1484} 1485 1486static int 1487gen2_composite_picture(struct sna *sna, 1488 PicturePtr picture, 1489 struct sna_composite_channel *channel, 1490 int x, int y, 1491 int w, int h, 1492 int dst_x, int dst_y, 1493 bool precise) 1494{ 1495 PixmapPtr pixmap; 1496 uint32_t color; 1497 int16_t dx, dy; 1498 bool fixup_alpha; 1499 1500 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1501 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1502 1503 channel->is_solid = false; 1504 channel->is_linear = false; 1505 channel->is_opaque = false; 1506 channel->is_affine = true; 1507 channel->transform = NULL; 1508 channel->card_format = -1; 1509 1510 if (sna_picture_is_solid(picture, &color)) 1511 return gen2_composite_solid_init(sna, channel, color); 1512 1513 if (!gen2_check_repeat(picture)) { 1514 DBG(("%s -- fallback, unhandled repeat %d\n", 1515 __FUNCTION__, picture->repeat)); 1516 return sna_render_picture_fixup(sna, picture, channel, 1517 x, y, w, h, dst_x, dst_y); 1518 } 1519 1520 if (!gen2_check_filter(picture)) { 1521 DBG(("%s -- fallback, unhandled filter %d\n", 1522 __FUNCTION__, picture->filter)); 1523 return sna_render_picture_fixup(sna, picture, channel, 1524 x, y, w, h, dst_x, dst_y); 1525 } 1526 1527 if (picture->pDrawable == NULL) { 1528 int ret; 1529 1530 if (picture->pSourcePict->type == SourcePictTypeLinear) 1531 return gen2_composite_linear_init(sna, picture, channel, 1532 x, y, 1533 w, h, 1534 dst_x, dst_y); 1535 1536 DBG(("%s -- fallback, unhandled source %d\n", 1537 __FUNCTION__, picture->pSourcePict->type)); 1538 ret = -1; 1539 if (!precise) 1540 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1541 x, y, w, h, dst_x, dst_y); 1542 if (ret == -1) 1543 ret = sna_render_picture_fixup(sna, picture, channel, 1544 x, y, w, h, dst_x, dst_y); 1545 return ret; 1546 } 1547 1548 if (picture->alphaMap) { 1549 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1550 return sna_render_picture_fixup(sna, picture, channel, 1551 x, y, w, h, dst_x, dst_y); 1552 } 1553 1554 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1555 channel->filter = picture->filter; 1556 1557 pixmap = get_drawable_pixmap(picture->pDrawable); 1558 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1559 1560 x += dx + picture->pDrawable->x; 1561 y += dy + picture->pDrawable->y; 1562 1563 channel->is_affine = sna_transform_is_affine(picture->transform); 1564 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1565 DBG(("%s: integer translation (%d, %d), removing\n", 1566 __FUNCTION__, dx, dy)); 1567 x += dx; 1568 y += dy; 1569 channel->transform = NULL; 1570 channel->filter = PictFilterNearest; 1571 1572 if (channel->repeat && 1573 (x >= 0 && 1574 y >= 0 && 1575 x + w < pixmap->drawable.width && 1576 y + h < pixmap->drawable.height)) { 1577 struct sna_pixmap *priv = sna_pixmap(pixmap); 1578 if (priv && priv->clear) { 1579 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1580 return gen2_composite_solid_init(sna, channel, priv->clear_color); 1581 } 1582 } 1583 } else 1584 channel->transform = picture->transform; 1585 1586 if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha)) 1587 return sna_render_picture_convert(sna, picture, channel, pixmap, 1588 x, y, w, h, dst_x, dst_y, fixup_alpha); 1589 1590 channel->pict_format = picture->format; 1591 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1592 return sna_render_picture_extract(sna, picture, channel, 1593 x, y, w, h, dst_x, dst_y); 1594 1595 return sna_render_pixmap_bo(sna, channel, pixmap, 1596 x, y, w, h, dst_x, dst_y); 1597} 1598 1599static bool 1600gen2_composite_set_target(struct sna *sna, 1601 struct sna_composite_op *op, 1602 PicturePtr dst, 1603 int x, int y, int w, int h, 1604 bool partial) 1605{ 1606 BoxRec box; 1607 unsigned hint; 1608 1609 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1610 op->dst.format = dst->format; 1611 op->dst.width = op->dst.pixmap->drawable.width; 1612 op->dst.height = op->dst.pixmap->drawable.height; 1613 1614 if (w && h) { 1615 box.x1 = x; 1616 box.y1 = y; 1617 box.x2 = x + w; 1618 box.y2 = y + h; 1619 } else 1620 sna_render_picture_extents(dst, &box); 1621 1622 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1623 if (!partial) { 1624 hint |= IGNORE_DAMAGE; 1625 if (w == op->dst.width && h == op->dst.height) 1626 hint |= REPLACES; 1627 } 1628 1629 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1630 if (op->dst.bo == NULL) 1631 return false; 1632 1633 if (hint & REPLACES) { 1634 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1635 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1636 } 1637 1638 assert((op->dst.bo->pitch & 7) == 0); 1639 1640 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1641 &op->dst.x, &op->dst.y); 1642 1643 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1644 __FUNCTION__, 1645 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1646 op->dst.width, op->dst.height, 1647 op->dst.bo->pitch, 1648 op->dst.x, op->dst.y, 1649 op->damage ? *op->damage : (void *)-1)); 1650 1651 assert(op->dst.bo->proxy == NULL); 1652 1653 if (((too_large(op->dst.width, op->dst.height) || 1654 op->dst.bo->pitch > MAX_3D_PITCH)) && 1655 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1656 return false; 1657 1658 return true; 1659} 1660 1661static bool 1662is_unhandled_gradient(PicturePtr picture, bool precise) 1663{ 1664 if (picture->pDrawable) 1665 return false; 1666 1667 switch (picture->pSourcePict->type) { 1668 case SourcePictTypeSolidFill: 1669 case SourcePictTypeLinear: 1670 return false; 1671 default: 1672 return precise; 1673 } 1674} 1675 1676static bool 1677has_alphamap(PicturePtr p) 1678{ 1679 return p->alphaMap != NULL; 1680} 1681 1682static bool 1683need_upload(PicturePtr p) 1684{ 1685 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1686} 1687 1688static bool 1689source_is_busy(PixmapPtr pixmap) 1690{ 1691 struct sna_pixmap *priv = sna_pixmap(pixmap); 1692 if (priv == NULL) 1693 return false; 1694 1695 if (priv->clear) 1696 return false; 1697 1698 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1699 return true; 1700 1701 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1702 return true; 1703 1704 return priv->gpu_damage && !priv->cpu_damage; 1705} 1706 1707static bool 1708source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 1709{ 1710 if (sna_picture_is_solid(p, NULL)) 1711 return false; 1712 1713 if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p)) 1714 return true; 1715 1716 if (pixmap && source_is_busy(pixmap)) 1717 return false; 1718 1719 return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p); 1720} 1721 1722static bool 1723gen2_composite_fallback(struct sna *sna, 1724 PicturePtr src, 1725 PicturePtr mask, 1726 PicturePtr dst) 1727{ 1728 PixmapPtr src_pixmap; 1729 PixmapPtr mask_pixmap; 1730 PixmapPtr dst_pixmap; 1731 bool src_fallback, mask_fallback; 1732 1733 if (!gen2_check_dst_format(dst->format)) { 1734 DBG(("%s: unknown destination format: %d\n", 1735 __FUNCTION__, dst->format)); 1736 return true; 1737 } 1738 1739 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1740 1741 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1742 src_fallback = source_fallback(src, src_pixmap, 1743 dst->polyMode == PolyModePrecise); 1744 1745 if (mask) { 1746 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1747 mask_fallback = source_fallback(mask, mask_pixmap, 1748 dst->polyMode == PolyModePrecise); 1749 } else { 1750 mask_pixmap = NULL; 1751 mask_fallback = NULL; 1752 } 1753 1754 /* If we are using the destination as a source and need to 1755 * readback in order to upload the source, do it all 1756 * on the cpu. 1757 */ 1758 if (src_pixmap == dst_pixmap && src_fallback) { 1759 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1760 return true; 1761 } 1762 if (mask_pixmap == dst_pixmap && mask_fallback) { 1763 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1764 return true; 1765 } 1766 1767 /* If anything is on the GPU, push everything out to the GPU */ 1768 if (dst_use_gpu(dst_pixmap)) { 1769 DBG(("%s: dst is already on the GPU, try to use GPU\n", 1770 __FUNCTION__)); 1771 return false; 1772 } 1773 1774 if (src_pixmap && !src_fallback) { 1775 DBG(("%s: src is already on the GPU, try to use GPU\n", 1776 __FUNCTION__)); 1777 return false; 1778 } 1779 if (mask_pixmap && !mask_fallback) { 1780 DBG(("%s: mask is already on the GPU, try to use GPU\n", 1781 __FUNCTION__)); 1782 return false; 1783 } 1784 1785 /* However if the dst is not on the GPU and we need to 1786 * render one of the sources using the CPU, we may 1787 * as well do the entire operation in place onthe CPU. 1788 */ 1789 if (src_fallback) { 1790 DBG(("%s: dst is on the CPU and src will fallback\n", 1791 __FUNCTION__)); 1792 return true; 1793 } 1794 1795 if (mask && mask_fallback) { 1796 DBG(("%s: dst is on the CPU and mask will fallback\n", 1797 __FUNCTION__)); 1798 return true; 1799 } 1800 1801 if (too_large(dst_pixmap->drawable.width, 1802 dst_pixmap->drawable.height) && 1803 dst_is_cpu(dst_pixmap)) { 1804 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1805 return true; 1806 } 1807 1808 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1809 __FUNCTION__)); 1810 return dst_use_cpu(dst_pixmap); 1811} 1812 1813static int 1814reuse_source(struct sna *sna, 1815 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1816 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1817{ 1818 uint32_t color; 1819 1820 if (src_x != msk_x || src_y != msk_y) 1821 return false; 1822 1823 if (sna_picture_is_solid(mask, &color)) 1824 return gen2_composite_solid_init(sna, mc, color); 1825 1826 if (sc->is_solid) 1827 return false; 1828 1829 if (src == mask) { 1830 DBG(("%s: mask is source\n", __FUNCTION__)); 1831 *mc = *sc; 1832 mc->bo = kgem_bo_reference(mc->bo); 1833 return true; 1834 } 1835 1836 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1837 return false; 1838 1839 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1840 1841 if (!sna_transform_equal(src->transform, mask->transform)) 1842 return false; 1843 1844 if (!sna_picture_alphamap_equal(src, mask)) 1845 return false; 1846 1847 if (!gen2_check_repeat(mask)) 1848 return false; 1849 1850 if (!gen2_check_filter(mask)) 1851 return false; 1852 1853 if (!gen2_check_format(sna, mask)) 1854 return false; 1855 1856 DBG(("%s: reusing source channel for mask with a twist\n", 1857 __FUNCTION__)); 1858 1859 *mc = *sc; 1860 mc->repeat = mask->repeat ? mask->repeatType : RepeatNone; 1861 mc->filter = mask->filter; 1862 mc->pict_format = mask->format; 1863 mc->bo = kgem_bo_reference(mc->bo); 1864 return true; 1865} 1866 1867static bool 1868gen2_render_composite(struct sna *sna, 1869 uint8_t op, 1870 PicturePtr src, 1871 PicturePtr mask, 1872 PicturePtr dst, 1873 int16_t src_x, int16_t src_y, 1874 int16_t mask_x, int16_t mask_y, 1875 int16_t dst_x, int16_t dst_y, 1876 int16_t width, int16_t height, 1877 unsigned flags, 1878 struct sna_composite_op *tmp) 1879{ 1880 DBG(("%s()\n", __FUNCTION__)); 1881 1882 if (op >= ARRAY_SIZE(gen2_blend_op)) { 1883 DBG(("%s: fallback due to unhandled blend op: %d\n", 1884 __FUNCTION__, op)); 1885 return false; 1886 } 1887 1888 if (mask == NULL && 1889 sna_blt_composite(sna, op, src, dst, 1890 src_x, src_y, 1891 dst_x, dst_y, 1892 width, height, 1893 flags, tmp)) 1894 return true; 1895 1896 if (gen2_composite_fallback(sna, src, mask, dst)) 1897 goto fallback; 1898 1899 if (need_tiling(sna, width, height)) 1900 return sna_tiling_composite(op, src, mask, dst, 1901 src_x, src_y, 1902 mask_x, mask_y, 1903 dst_x, dst_y, 1904 width, height, 1905 tmp); 1906 1907 tmp->op = op; 1908 sna_render_composite_redirect_init(tmp); 1909 1910 if (!gen2_composite_set_target(sna, tmp, dst, 1911 dst_x, dst_y, width, height, 1912 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 1913 DBG(("%s: unable to set render target\n", 1914 __FUNCTION__)); 1915 goto fallback; 1916 } 1917 1918 switch (gen2_composite_picture(sna, src, &tmp->src, 1919 src_x, src_y, 1920 width, height, 1921 dst_x, dst_y, 1922 dst->polyMode == PolyModePrecise)) { 1923 case -1: 1924 DBG(("%s: fallback -- unable to prepare source\n", 1925 __FUNCTION__)); 1926 goto cleanup_dst; 1927 case 0: 1928 gen2_composite_solid_init(sna, &tmp->src, 0); 1929 break; 1930 case 1: 1931 if (mask == NULL && tmp->src.bo && 1932 sna_blt_composite__convert(sna, 1933 dst_x, dst_y, width, height, 1934 tmp)) 1935 return true; 1936 break; 1937 } 1938 1939 if (mask) { 1940 if (!reuse_source(sna, 1941 src, &tmp->src, src_x, src_y, 1942 mask, &tmp->mask, mask_x, mask_y)) { 1943 switch (gen2_composite_picture(sna, mask, &tmp->mask, 1944 mask_x, mask_y, 1945 width, height, 1946 dst_x, dst_y, 1947 dst->polyMode == PolyModePrecise)) { 1948 case -1: 1949 DBG(("%s: fallback -- unable to prepare mask\n", 1950 __FUNCTION__)); 1951 goto cleanup_src; 1952 case 0: 1953 gen2_composite_solid_init(sna, &tmp->mask, 0); 1954 case 1: 1955 break; 1956 } 1957 } 1958 1959 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1960 /* Check if it's component alpha that relies on a source alpha 1961 * and on the source value. We can only get one of those 1962 * into the single source value that we get to blend with. 1963 */ 1964 tmp->has_component_alpha = true; 1965 if (gen2_blend_op[op].src_alpha && 1966 (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 1967 if (op != PictOpOver) { 1968 DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", 1969 __FUNCTION__, 1970 gen2_blend_op[op].src_blend)); 1971 goto cleanup_src; 1972 } 1973 1974 tmp->need_magic_ca_pass = true; 1975 tmp->op = PictOpOutReverse; 1976 } 1977 } 1978 1979 /* convert solid to a texture (pure convenience) */ 1980 if (tmp->mask.is_solid && tmp->src.is_solid) { 1981 assert(tmp->mask.is_affine); 1982 tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel); 1983 if (!tmp->mask.bo) 1984 goto cleanup_src; 1985 } 1986 } 1987 1988 tmp->floats_per_vertex = 2; 1989 if (!tmp->src.is_solid) 1990 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3; 1991 if (tmp->mask.bo) 1992 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3; 1993 tmp->floats_per_rect = 3*tmp->floats_per_vertex; 1994 1995 tmp->prim_emit = gen2_emit_composite_primitive; 1996 if (tmp->mask.bo) { 1997 if (tmp->mask.transform == NULL) { 1998 if (tmp->src.is_solid) { 1999 assert(tmp->floats_per_rect == 12); 2000#if defined(sse2) && !defined(__x86_64__) 2001 if (sna->cpu_features & SSE2) { 2002 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2; 2003 } else 2004#endif 2005 { 2006 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask; 2007 } 2008 } 2009 } 2010 } else { 2011 if (tmp->src.is_solid) { 2012 assert(tmp->floats_per_rect == 6); 2013#if defined(sse2) && !defined(__x86_64__) 2014 if (sna->cpu_features & SSE2) { 2015 tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2; 2016 } else 2017#endif 2018 { 2019 tmp->prim_emit = gen2_emit_composite_primitive_constant; 2020 } 2021 } else if (tmp->src.is_linear) { 2022 assert(tmp->floats_per_rect == 12); 2023#if defined(sse2) && !defined(__x86_64__) 2024 if (sna->cpu_features & SSE2) { 2025 tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2; 2026 } else 2027#endif 2028 { 2029 tmp->prim_emit = gen2_emit_composite_primitive_linear; 2030 } 2031 } else if (tmp->src.transform == NULL) { 2032 assert(tmp->floats_per_rect == 12); 2033#if defined(sse2) && !defined(__x86_64__) 2034 if (sna->cpu_features & SSE2) { 2035 tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2; 2036 } else 2037#endif 2038 { 2039 tmp->prim_emit = gen2_emit_composite_primitive_identity; 2040 } 2041 } else if (tmp->src.is_affine) { 2042 assert(tmp->floats_per_rect == 12); 2043 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2044 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2045#if defined(sse2) && !defined(__x86_64__) 2046 if (sna->cpu_features & SSE2) { 2047 tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2; 2048 } else 2049#endif 2050 { 2051 tmp->prim_emit = gen2_emit_composite_primitive_affine; 2052 } 2053 } 2054 } 2055 2056 tmp->blt = gen2_render_composite_blt; 2057 tmp->box = gen2_render_composite_box; 2058 tmp->boxes = gen2_render_composite_boxes; 2059 tmp->done = gen2_render_composite_done; 2060 2061 if (!kgem_check_bo(&sna->kgem, 2062 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2063 NULL)) { 2064 kgem_submit(&sna->kgem); 2065 if (!kgem_check_bo(&sna->kgem, 2066 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2067 NULL)) { 2068 DBG(("%s: fallback, operation does not fit into GTT\n", 2069 __FUNCTION__)); 2070 goto cleanup_mask; 2071 } 2072 } 2073 2074 gen2_emit_composite_state(sna, tmp); 2075 return true; 2076 2077cleanup_mask: 2078 if (tmp->mask.bo) { 2079 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2080 tmp->mask.bo = NULL; 2081 } 2082cleanup_src: 2083 if (tmp->src.bo) { 2084 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2085 tmp->src.bo = NULL; 2086 } 2087cleanup_dst: 2088 if (tmp->redirect.real_bo) { 2089 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2090 tmp->redirect.real_bo = NULL; 2091 } 2092fallback: 2093 return (mask == NULL && 2094 sna_blt_composite(sna, op, src, dst, 2095 src_x, src_y, 2096 dst_x, dst_y, 2097 width, height, 2098 flags | COMPOSITE_FALLBACK, tmp)); 2099} 2100 2101fastcall static void 2102gen2_emit_composite_spans_primitive_constant(struct sna *sna, 2103 const struct sna_composite_spans_op *op, 2104 const BoxRec *box, 2105 float opacity) 2106{ 2107 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2108 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2109 sna->kgem.nbatch += 9; 2110 2111 v[0] = op->base.dst.x + box->x2; 2112 v[1] = op->base.dst.y + box->y2; 2113 *((uint32_t *)v + 2) = alpha; 2114 2115 v[3] = op->base.dst.x + box->x1; 2116 v[4] = v[1]; 2117 *((uint32_t *)v + 5) = alpha; 2118 2119 v[6] = v[3]; 2120 v[7] = op->base.dst.y + box->y1; 2121 *((uint32_t *)v + 8) = alpha; 2122} 2123 2124fastcall static void 2125gen2_emit_composite_spans_primitive_linear(struct sna *sna, 2126 const struct sna_composite_spans_op *op, 2127 const BoxRec *box, 2128 float opacity) 2129{ 2130 union { 2131 float f; 2132 uint32_t u; 2133 } alpha; 2134 2135 alpha.u = (uint8_t)(255 * opacity) << 24; 2136 2137 gen2_emit_composite_dstcoord(sna, 2138 op->base.dst.x + box->x2, 2139 op->base.dst.y + box->y2); 2140 VERTEX(alpha.f); 2141 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2142 2143 gen2_emit_composite_dstcoord(sna, 2144 op->base.dst.x + box->x1, 2145 op->base.dst.y + box->y2); 2146 VERTEX(alpha.f); 2147 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2148 2149 gen2_emit_composite_dstcoord(sna, 2150 op->base.dst.x + box->x1, 2151 op->base.dst.y + box->y1); 2152 VERTEX(alpha.f); 2153 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2154} 2155 2156fastcall static void 2157gen2_emit_composite_spans_primitive_identity_source(struct sna *sna, 2158 const struct sna_composite_spans_op *op, 2159 const BoxRec *box, 2160 float opacity) 2161{ 2162 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2163 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2164 sna->kgem.nbatch += 15; 2165 2166 v[0] = op->base.dst.x + box->x2; 2167 v[1] = op->base.dst.y + box->y2; 2168 *((uint32_t *)v + 2) = alpha; 2169 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2170 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2171 2172 v[5] = op->base.dst.x + box->x1; 2173 v[6] = v[1]; 2174 *((uint32_t *)v + 7) = alpha; 2175 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2176 v[9] = v[4]; 2177 2178 v[10] = v[5]; 2179 v[11] = op->base.dst.y + box->y1; 2180 *((uint32_t *)v + 12) = alpha; 2181 v[13] = v[8]; 2182 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2183} 2184 2185fastcall static void 2186gen2_emit_composite_spans_primitive_affine_source(struct sna *sna, 2187 const struct sna_composite_spans_op *op, 2188 const BoxRec *box, 2189 float opacity) 2190{ 2191 PictTransform *transform = op->base.src.transform; 2192 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2193 float *v; 2194 2195 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2196 sna->kgem.nbatch += 15; 2197 2198 v[0] = op->base.dst.x + box->x2; 2199 v[6] = v[1] = op->base.dst.y + box->y2; 2200 v[10] = v[5] = op->base.dst.x + box->x1; 2201 v[11] = op->base.dst.y + box->y1; 2202 *((uint32_t *)v + 2) = alpha; 2203 *((uint32_t *)v + 7) = alpha; 2204 *((uint32_t *)v + 12) = alpha; 2205 2206 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2207 (int)op->base.src.offset[1] + box->y2, 2208 transform, op->base.src.scale, 2209 &v[3], &v[4]); 2210 2211 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2212 (int)op->base.src.offset[1] + box->y2, 2213 transform, op->base.src.scale, 2214 &v[8], &v[9]); 2215 2216 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2217 (int)op->base.src.offset[1] + box->y1, 2218 transform, op->base.src.scale, 2219 &v[13], &v[14]); 2220} 2221 2222#if defined(sse2) && !defined(__x86_64__) 2223sse2 fastcall static void 2224gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 2225 const struct sna_composite_spans_op *op, 2226 const BoxRec *box, 2227 float opacity) 2228{ 2229 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2230 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2231 sna->kgem.nbatch += 9; 2232 2233 v[0] = op->base.dst.x + box->x2; 2234 v[1] = op->base.dst.y + box->y2; 2235 *((uint32_t *)v + 2) = alpha; 2236 2237 v[3] = op->base.dst.x + box->x1; 2238 v[4] = v[1]; 2239 *((uint32_t *)v + 5) = alpha; 2240 2241 v[6] = v[3]; 2242 v[7] = op->base.dst.y + box->y1; 2243 *((uint32_t *)v + 8) = alpha; 2244} 2245 2246sse2 fastcall static void 2247gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna, 2248 const struct sna_composite_spans_op *op, 2249 const BoxRec *box, 2250 float opacity) 2251{ 2252 union { 2253 float f; 2254 uint32_t u; 2255 } alpha; 2256 2257 alpha.u = (uint8_t)(255 * opacity) << 24; 2258 2259 gen2_emit_composite_dstcoord(sna, 2260 op->base.dst.x + box->x2, 2261 op->base.dst.y + box->y2); 2262 VERTEX(alpha.f); 2263 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2264 2265 gen2_emit_composite_dstcoord(sna, 2266 op->base.dst.x + box->x1, 2267 op->base.dst.y + box->y2); 2268 VERTEX(alpha.f); 2269 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2270 2271 gen2_emit_composite_dstcoord(sna, 2272 op->base.dst.x + box->x1, 2273 op->base.dst.y + box->y1); 2274 VERTEX(alpha.f); 2275 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2276} 2277 2278sse2 fastcall static void 2279gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 2280 const struct sna_composite_spans_op *op, 2281 const BoxRec *box, 2282 float opacity) 2283{ 2284 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2285 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2286 sna->kgem.nbatch += 15; 2287 2288 v[0] = op->base.dst.x + box->x2; 2289 v[1] = op->base.dst.y + box->y2; 2290 *((uint32_t *)v + 2) = alpha; 2291 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2292 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2293 2294 v[5] = op->base.dst.x + box->x1; 2295 v[6] = v[1]; 2296 *((uint32_t *)v + 7) = alpha; 2297 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2298 v[9] = v[4]; 2299 2300 v[10] = v[5]; 2301 v[11] = op->base.dst.y + box->y1; 2302 *((uint32_t *)v + 12) = alpha; 2303 v[13] = v[8]; 2304 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2305} 2306 2307sse2 fastcall static void 2308gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 2309 const struct sna_composite_spans_op *op, 2310 const BoxRec *box, 2311 float opacity) 2312{ 2313 PictTransform *transform = op->base.src.transform; 2314 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2315 float *v; 2316 2317 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2318 sna->kgem.nbatch += 15; 2319 2320 v[0] = op->base.dst.x + box->x2; 2321 v[6] = v[1] = op->base.dst.y + box->y2; 2322 v[10] = v[5] = op->base.dst.x + box->x1; 2323 v[11] = op->base.dst.y + box->y1; 2324 *((uint32_t *)v + 2) = alpha; 2325 *((uint32_t *)v + 7) = alpha; 2326 *((uint32_t *)v + 12) = alpha; 2327 2328 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2329 (int)op->base.src.offset[1] + box->y2, 2330 transform, op->base.src.scale, 2331 &v[3], &v[4]); 2332 2333 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2334 (int)op->base.src.offset[1] + box->y2, 2335 transform, op->base.src.scale, 2336 &v[8], &v[9]); 2337 2338 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2339 (int)op->base.src.offset[1] + box->y1, 2340 transform, op->base.src.scale, 2341 &v[13], &v[14]); 2342} 2343#endif 2344 2345static void 2346gen2_emit_composite_spans_vertex(struct sna *sna, 2347 const struct sna_composite_spans_op *op, 2348 int16_t x, int16_t y, 2349 float opacity) 2350{ 2351 gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 2352 BATCH((uint8_t)(opacity * 255) << 24); 2353 assert(!op->base.src.is_solid); 2354 if (op->base.src.is_linear) 2355 gen2_emit_composite_linear(sna, &op->base.src, x, y); 2356 else 2357 gen2_emit_composite_texcoord(sna, &op->base.src, x, y); 2358} 2359 2360fastcall static void 2361gen2_emit_composite_spans_primitive(struct sna *sna, 2362 const struct sna_composite_spans_op *op, 2363 const BoxRec *box, 2364 float opacity) 2365{ 2366 gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity); 2367 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity); 2368 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity); 2369} 2370 2371static void 2372gen2_emit_spans_pipeline(struct sna *sna, 2373 const struct sna_composite_spans_op *op) 2374{ 2375 uint32_t cblend, ablend; 2376 uint32_t unwind; 2377 2378 cblend = 2379 TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE | 2380 TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA | 2381 TB0C_OUTPUT_WRITE_CURRENT; 2382 ablend = 2383 TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE | 2384 TB0A_ARG1_SEL_DIFFUSE | 2385 TB0A_OUTPUT_WRITE_CURRENT; 2386 2387 if (op->base.src.is_solid) { 2388 ablend |= TB0A_ARG2_SEL_SPECULAR; 2389 cblend |= TB0C_ARG2_SEL_SPECULAR; 2390 if (op->base.dst.format == PICT_a8) 2391 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 2392 } else if (op->base.dst.format == PICT_a8) { 2393 ablend |= TB0A_ARG2_SEL_TEXEL0; 2394 cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA; 2395 } else { 2396 if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0) 2397 cblend |= TB0C_ARG2_SEL_TEXEL0; 2398 else 2399 cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT; 2400 2401 if (op->base.src.is_opaque) 2402 ablend |= TB0A_ARG2_SEL_ONE; 2403 else 2404 ablend |= TB0A_ARG2_SEL_TEXEL0; 2405 } 2406 2407 unwind = sna->kgem.nbatch; 2408 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2409 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2410 BATCH(cblend); 2411 BATCH(ablend); 2412 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2413 sna->kgem.batch + unwind + 1, 2414 2 * sizeof(uint32_t)) == 0) 2415 sna->kgem.nbatch = unwind; 2416 else 2417 sna->render_state.gen2.ls2 = unwind; 2418} 2419 2420static void gen2_emit_composite_spans_state(struct sna *sna, 2421 const struct sna_composite_spans_op *op) 2422{ 2423 uint32_t unwind; 2424 2425 gen2_get_batch(sna, &op->base); 2426 gen2_emit_target(sna, &op->base); 2427 2428 unwind = sna->kgem.nbatch; 2429 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2430 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2431 BATCH(!op->base.src.is_solid << 12); 2432 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT); 2433 BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format)); 2434 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2435 sna->kgem.batch + unwind + 1, 2436 3 * sizeof(uint32_t)) == 0) 2437 sna->kgem.nbatch = unwind; 2438 else 2439 sna->render_state.gen2.ls1 = unwind; 2440 2441 gen2_disable_logic_op(sna); 2442 gen2_emit_spans_pipeline(sna, op); 2443 2444 if (op->base.src.is_solid) { 2445 if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) { 2446 BATCH(_3DSTATE_DFLT_SPECULAR_CMD); 2447 BATCH(op->base.src.u.gen2.pixel); 2448 sna->render_state.gen2.specular = op->base.src.u.gen2.pixel; 2449 } 2450 } else { 2451 uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD | 2452 (op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D); 2453 if (sna->render_state.gen2.vft != v) { 2454 BATCH(v); 2455 sna->render_state.gen2.vft = v; 2456 } 2457 gen2_emit_texture(sna, &op->base.src, 0); 2458 } 2459} 2460 2461fastcall static void 2462gen2_render_composite_spans_box(struct sna *sna, 2463 const struct sna_composite_spans_op *op, 2464 const BoxRec *box, float opacity) 2465{ 2466 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2467 __FUNCTION__, 2468 op->base.src.offset[0], op->base.src.offset[1], 2469 opacity, 2470 op->base.dst.x, op->base.dst.y, 2471 box->x1, box->y1, 2472 box->x2 - box->x1, 2473 box->y2 - box->y1)); 2474 2475 if (gen2_get_rectangles(sna, &op->base, 1) == 0) { 2476 gen2_emit_composite_spans_state(sna, op); 2477 gen2_get_rectangles(sna, &op->base, 1); 2478 } 2479 2480 op->prim_emit(sna, op, box, opacity); 2481} 2482 2483static void 2484gen2_render_composite_spans_boxes(struct sna *sna, 2485 const struct sna_composite_spans_op *op, 2486 const BoxRec *box, int nbox, 2487 float opacity) 2488{ 2489 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2490 __FUNCTION__, nbox, 2491 op->base.src.offset[0], op->base.src.offset[1], 2492 opacity, 2493 op->base.dst.x, op->base.dst.y)); 2494 2495 do { 2496 int nbox_this_time; 2497 2498 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2499 if (nbox_this_time == 0) { 2500 gen2_emit_composite_spans_state(sna, op); 2501 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2502 } 2503 nbox -= nbox_this_time; 2504 2505 do { 2506 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2507 box->x1, box->y1, 2508 box->x2 - box->x1, 2509 box->y2 - box->y1)); 2510 2511 op->prim_emit(sna, op, box++, opacity); 2512 } while (--nbox_this_time); 2513 } while (nbox); 2514} 2515 2516fastcall static void 2517gen2_render_composite_spans_done(struct sna *sna, 2518 const struct sna_composite_spans_op *op) 2519{ 2520 DBG(("%s()\n", __FUNCTION__)); 2521 2522 gen2_vertex_flush(sna, &op->base); 2523 2524 if (op->base.src.bo) 2525 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2526 2527 sna_render_composite_redirect_done(sna, &op->base); 2528} 2529 2530static bool 2531gen2_check_composite_spans(struct sna *sna, 2532 uint8_t op, PicturePtr src, PicturePtr dst, 2533 int16_t width, int16_t height, unsigned flags) 2534{ 2535 if (op >= ARRAY_SIZE(gen2_blend_op)) 2536 return false; 2537 2538 if (gen2_composite_fallback(sna, src, NULL, dst)) 2539 return false; 2540 2541 if (need_tiling(sna, width, height)) { 2542 if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2543 DBG(("%s: fallback, tiled operation not on GPU\n", 2544 __FUNCTION__)); 2545 return false; 2546 } 2547 } 2548 2549 return true; 2550} 2551 2552static bool 2553gen2_render_composite_spans(struct sna *sna, 2554 uint8_t op, 2555 PicturePtr src, 2556 PicturePtr dst, 2557 int16_t src_x, int16_t src_y, 2558 int16_t dst_x, int16_t dst_y, 2559 int16_t width, int16_t height, 2560 unsigned flags, 2561 struct sna_composite_spans_op *tmp) 2562{ 2563 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 2564 src_x, src_y, dst_x, dst_y, width, height)); 2565 2566 assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags)); 2567 if (need_tiling(sna, width, height)) { 2568 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2569 __FUNCTION__, width, height)); 2570 return sna_tiling_composite_spans(op, src, dst, 2571 src_x, src_y, dst_x, dst_y, 2572 width, height, flags, tmp); 2573 } 2574 2575 tmp->base.op = op; 2576 sna_render_composite_redirect_init(&tmp->base); 2577 if (!gen2_composite_set_target(sna, &tmp->base, dst, 2578 dst_x, dst_y, width, height, 2579 true)) { 2580 DBG(("%s: unable to set render target\n", 2581 __FUNCTION__)); 2582 return false; 2583 } 2584 2585 switch (gen2_composite_picture(sna, src, &tmp->base.src, 2586 src_x, src_y, 2587 width, height, 2588 dst_x, dst_y, 2589 dst->polyMode == PolyModePrecise)) { 2590 case -1: 2591 goto cleanup_dst; 2592 case 0: 2593 gen2_composite_solid_init(sna, &tmp->base.src, 0); 2594 case 1: 2595 break; 2596 } 2597 assert(tmp->base.src.bo || tmp->base.src.is_solid); 2598 2599 tmp->prim_emit = gen2_emit_composite_spans_primitive; 2600 tmp->base.floats_per_vertex = 3; 2601 if (tmp->base.src.is_solid) { 2602#if defined(sse2) && !defined(__x86_64__) 2603 if (sna->cpu_features & SSE2) { 2604 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2; 2605 } else 2606#endif 2607 { 2608 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant; 2609 } 2610 } else if (tmp->base.src.is_linear) { 2611 tmp->base.floats_per_vertex += 2; 2612#if defined(sse2) && !defined(__x86_64__) 2613 if (sna->cpu_features & SSE2) { 2614 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2; 2615 } else 2616#endif 2617 { 2618 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear; 2619 } 2620 } else { 2621 assert(tmp->base.src.bo); 2622 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 2623 if (tmp->base.src.transform == NULL) { 2624#if defined(sse2) && !defined(__x86_64__) 2625 if (sna->cpu_features & SSE2) { 2626 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2; 2627 } else 2628#endif 2629 { 2630 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source; 2631 } 2632 } else if (tmp->base.src.is_affine) { 2633 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 2634 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 2635#if defined(sse2) && !defined(__x86_64__) 2636 if (sna->cpu_features & SSE2) { 2637 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2; 2638 } else 2639#endif 2640 { 2641 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source; 2642 } 2643 } 2644 } 2645 tmp->base.mask.bo = NULL; 2646 tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex; 2647 2648 tmp->box = gen2_render_composite_spans_box; 2649 tmp->boxes = gen2_render_composite_spans_boxes; 2650 tmp->done = gen2_render_composite_spans_done; 2651 2652 if (!kgem_check_bo(&sna->kgem, 2653 tmp->base.dst.bo, tmp->base.src.bo, 2654 NULL)) { 2655 kgem_submit(&sna->kgem); 2656 if (!kgem_check_bo(&sna->kgem, 2657 tmp->base.dst.bo, tmp->base.src.bo, 2658 NULL)) 2659 goto cleanup_src; 2660 } 2661 2662 gen2_emit_composite_spans_state(sna, tmp); 2663 return true; 2664 2665cleanup_src: 2666 if (tmp->base.src.bo) 2667 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2668cleanup_dst: 2669 if (tmp->base.redirect.real_bo) 2670 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2671 return false; 2672} 2673 2674static void 2675gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op) 2676{ 2677 uint32_t blend, unwind; 2678 2679 unwind = sna->kgem.nbatch; 2680 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2681 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2682 2683 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 2684 TB0C_ARG1_SEL_DIFFUSE | 2685 TB0C_OUTPUT_WRITE_CURRENT; 2686 if (op->dst.format == PICT_a8) 2687 blend |= TB0C_ARG1_REPLICATE_ALPHA; 2688 BATCH(blend); 2689 2690 BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 2691 TB0A_ARG1_SEL_DIFFUSE | 2692 TB0A_OUTPUT_WRITE_CURRENT); 2693 2694 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2695 sna->kgem.batch + unwind + 1, 2696 2 * sizeof(uint32_t)) == 0) 2697 sna->kgem.nbatch = unwind; 2698 else 2699 sna->render_state.gen2.ls2 = unwind; 2700} 2701 2702static void gen2_emit_fill_composite_state(struct sna *sna, 2703 const struct sna_composite_op *op, 2704 uint32_t pixel) 2705{ 2706 uint32_t ls1; 2707 2708 gen2_get_batch(sna, op); 2709 gen2_emit_target(sna, op); 2710 2711 ls1 = sna->kgem.nbatch; 2712 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2713 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2714 BATCH(0); 2715 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2716 BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format)); 2717 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2718 sna->kgem.batch + ls1 + 1, 2719 3 * sizeof(uint32_t)) == 0) 2720 sna->kgem.nbatch = ls1; 2721 else 2722 sna->render_state.gen2.ls1 = ls1; 2723 2724 gen2_emit_fill_pipeline(sna, op); 2725 2726 if (pixel != sna->render_state.gen2.diffuse) { 2727 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2728 BATCH(pixel); 2729 sna->render_state.gen2.diffuse = pixel; 2730 } 2731} 2732 2733static bool 2734gen2_render_fill_boxes_try_blt(struct sna *sna, 2735 CARD8 op, PictFormat format, 2736 const xRenderColor *color, 2737 const DrawableRec *dst, struct kgem_bo *dst_bo, 2738 const BoxRec *box, int n) 2739{ 2740 uint8_t alu; 2741 uint32_t pixel; 2742 2743 if (op > PictOpSrc) 2744 return false; 2745 2746 if (op == PictOpClear) { 2747 alu = GXclear; 2748 pixel = 0; 2749 } else if (!sna_get_pixel_from_rgba(&pixel, 2750 color->red, 2751 color->green, 2752 color->blue, 2753 color->alpha, 2754 format)) 2755 return false; 2756 else 2757 alu = GXcopy; 2758 2759 return sna_blt_fill_boxes(sna, alu, 2760 dst_bo, dst->bitsPerPixel, 2761 pixel, box, n); 2762} 2763 2764static bool 2765gen2_render_fill_boxes(struct sna *sna, 2766 CARD8 op, 2767 PictFormat format, 2768 const xRenderColor *color, 2769 const DrawableRec *dst, struct kgem_bo *dst_bo, 2770 const BoxRec *box, int n) 2771{ 2772 struct sna_composite_op tmp; 2773 uint32_t pixel; 2774 2775 if (op >= ARRAY_SIZE(gen2_blend_op)) { 2776 DBG(("%s: fallback due to unhandled blend op: %d\n", 2777 __FUNCTION__, op)); 2778 return false; 2779 } 2780 2781#if NO_FILL_BOXES 2782 return gen2_render_fill_boxes_try_blt(sna, op, format, color, 2783 dst, dst_bo, 2784 box, n); 2785#endif 2786 if (gen2_render_fill_boxes_try_blt(sna, op, format, color, 2787 dst, dst_bo, 2788 box, n)) 2789 return true; 2790 2791 2792 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 2793 __FUNCTION__, op, (int)format, 2794 color->red, color->green, color->blue, color->alpha)); 2795 2796 if (too_large(dst->width, dst->height) || 2797 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH || 2798 !gen2_check_dst_format(format)) { 2799 DBG(("%s: try blt, too large or incompatible destination\n", 2800 __FUNCTION__)); 2801 if (!gen2_check_dst_format(format)) 2802 return false; 2803 2804 assert(dst_bo->pitch >= 8); 2805 return sna_tiling_fill_boxes(sna, op, format, color, 2806 dst, dst_bo, box, n); 2807 } 2808 2809 if (op == PictOpClear) 2810 pixel = 0; 2811 else if (!sna_get_pixel_from_rgba(&pixel, 2812 color->red, 2813 color->green, 2814 color->blue, 2815 color->alpha, 2816 PICT_a8r8g8b8)) 2817 return false; 2818 2819 DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n", 2820 __FUNCTION__, op, (int)format, pixel)); 2821 2822 memset(&tmp, 0, sizeof(tmp)); 2823 tmp.op = op; 2824 tmp.dst.pixmap = (PixmapPtr)dst; 2825 tmp.dst.width = dst->width; 2826 tmp.dst.height = dst->height; 2827 tmp.dst.format = format; 2828 tmp.dst.bo = dst_bo; 2829 tmp.floats_per_vertex = 2; 2830 tmp.floats_per_rect = 6; 2831 2832 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2833 kgem_submit(&sna->kgem); 2834 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 2835 return false; 2836 } 2837 2838 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2839 2840 do { 2841 int n_this_time = gen2_get_rectangles(sna, &tmp, n); 2842 if (n_this_time == 0) { 2843 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2844 n_this_time = gen2_get_rectangles(sna, &tmp, n); 2845 } 2846 n -= n_this_time; 2847 2848 do { 2849 DBG((" (%d, %d), (%d, %d): %x\n", 2850 box->x1, box->y1, box->x2, box->y2, pixel)); 2851 VERTEX(box->x2); 2852 VERTEX(box->y2); 2853 VERTEX(box->x1); 2854 VERTEX(box->y2); 2855 VERTEX(box->x1); 2856 VERTEX(box->y1); 2857 box++; 2858 } while (--n_this_time); 2859 } while (n); 2860 2861 gen2_vertex_flush(sna, &tmp); 2862 return true; 2863} 2864 2865static void gen2_emit_fill_state(struct sna *sna, 2866 const struct sna_composite_op *op) 2867{ 2868 uint32_t ls1; 2869 2870 gen2_get_batch(sna, op); 2871 gen2_emit_target(sna, op); 2872 2873 ls1 = sna->kgem.nbatch; 2874 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2875 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2876 BATCH(0); 2877 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2878 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 2879 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2880 sna->kgem.batch + ls1 + 1, 2881 3 * sizeof(uint32_t)) == 0) 2882 sna->kgem.nbatch = ls1; 2883 else 2884 sna->render_state.gen2.ls1 = ls1; 2885 2886 gen2_enable_logic_op(sna, op->op); 2887 gen2_emit_fill_pipeline(sna, op); 2888 2889 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 2890 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2891 BATCH(op->src.u.gen2.pixel); 2892 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 2893 } 2894} 2895 2896static void 2897gen2_render_fill_op_blt(struct sna *sna, 2898 const struct sna_fill_op *op, 2899 int16_t x, int16_t y, int16_t w, int16_t h) 2900{ 2901 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2902 gen2_emit_fill_state(sna, &op->base); 2903 gen2_get_rectangles(sna, &op->base, 1); 2904 } 2905 2906 VERTEX(x+w); 2907 VERTEX(y+h); 2908 VERTEX(x); 2909 VERTEX(y+h); 2910 VERTEX(x); 2911 VERTEX(y); 2912} 2913 2914fastcall static void 2915gen2_render_fill_op_box(struct sna *sna, 2916 const struct sna_fill_op *op, 2917 const BoxRec *box) 2918{ 2919 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2920 gen2_emit_fill_state(sna, &op->base); 2921 gen2_get_rectangles(sna, &op->base, 1); 2922 } 2923 2924 VERTEX(box->x2); 2925 VERTEX(box->y2); 2926 VERTEX(box->x1); 2927 VERTEX(box->y2); 2928 VERTEX(box->x1); 2929 VERTEX(box->y1); 2930} 2931 2932fastcall static void 2933gen2_render_fill_op_boxes(struct sna *sna, 2934 const struct sna_fill_op *op, 2935 const BoxRec *box, 2936 int nbox) 2937{ 2938 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 2939 box->x1, box->y1, box->x2, box->y2, nbox)); 2940 2941 do { 2942 int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2943 if (nbox_this_time == 0) { 2944 gen2_emit_fill_state(sna, &op->base); 2945 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2946 } 2947 nbox -= nbox_this_time; 2948 2949 do { 2950 VERTEX(box->x2); 2951 VERTEX(box->y2); 2952 VERTEX(box->x1); 2953 VERTEX(box->y2); 2954 VERTEX(box->x1); 2955 VERTEX(box->y1); 2956 box++; 2957 } while (--nbox_this_time); 2958 } while (nbox); 2959} 2960 2961static void 2962gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2963{ 2964 gen2_vertex_flush(sna, &op->base); 2965} 2966 2967static bool 2968gen2_render_fill(struct sna *sna, uint8_t alu, 2969 PixmapPtr dst, struct kgem_bo *dst_bo, 2970 uint32_t color, unsigned flags, 2971 struct sna_fill_op *tmp) 2972{ 2973#if NO_FILL 2974 return sna_blt_fill(sna, alu, 2975 dst_bo, dst->drawable.bitsPerPixel, 2976 color, 2977 tmp); 2978#endif 2979 2980 /* Prefer to use the BLT if already engaged */ 2981 if (sna_blt_fill(sna, alu, 2982 dst_bo, dst->drawable.bitsPerPixel, 2983 color, 2984 tmp)) 2985 return true; 2986 2987 /* Must use the BLT if we can't RENDER... */ 2988 if (too_large(dst->drawable.width, dst->drawable.height) || 2989 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) 2990 return false; 2991 2992 tmp->base.op = alu; 2993 tmp->base.dst.pixmap = dst; 2994 tmp->base.dst.width = dst->drawable.width; 2995 tmp->base.dst.height = dst->drawable.height; 2996 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2997 tmp->base.dst.bo = dst_bo; 2998 tmp->base.dst.x = tmp->base.dst.y = 0; 2999 tmp->base.floats_per_vertex = 2; 3000 tmp->base.floats_per_rect = 6; 3001 3002 tmp->base.src.u.gen2.pixel = 3003 sna_rgba_for_color(color, dst->drawable.depth); 3004 3005 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3006 kgem_submit(&sna->kgem); 3007 return sna_blt_fill(sna, alu, 3008 dst_bo, dst->drawable.bitsPerPixel, 3009 color, 3010 tmp); 3011 } 3012 3013 tmp->blt = gen2_render_fill_op_blt; 3014 tmp->box = gen2_render_fill_op_box; 3015 tmp->boxes = gen2_render_fill_op_boxes; 3016 tmp->points = NULL; 3017 tmp->done = gen2_render_fill_op_done; 3018 3019 gen2_emit_fill_state(sna, &tmp->base); 3020 return true; 3021} 3022 3023static bool 3024gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3025 uint32_t color, 3026 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3027 uint8_t alu) 3028{ 3029 BoxRec box; 3030 3031 box.x1 = x1; 3032 box.y1 = y1; 3033 box.x2 = x2; 3034 box.y2 = y2; 3035 3036 return sna_blt_fill_boxes(sna, alu, 3037 bo, dst->drawable.bitsPerPixel, 3038 color, &box, 1); 3039} 3040 3041static bool 3042gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3043 uint32_t color, 3044 int16_t x1, int16_t y1, 3045 int16_t x2, int16_t y2, 3046 uint8_t alu) 3047{ 3048 struct sna_composite_op tmp; 3049 3050#if NO_FILL_ONE 3051 return gen2_render_fill_one_try_blt(sna, dst, bo, color, 3052 x1, y1, x2, y2, alu); 3053#endif 3054 3055 /* Prefer to use the BLT if already engaged */ 3056 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3057 x1, y1, x2, y2, alu)) 3058 return true; 3059 3060 /* Must use the BLT if we can't RENDER... */ 3061 if (too_large(dst->drawable.width, dst->drawable.height) || 3062 bo->pitch < 8 || bo->pitch > MAX_3D_PITCH) 3063 return false; 3064 3065 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3066 kgem_submit(&sna->kgem); 3067 3068 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3069 x1, y1, x2, y2, alu)) 3070 return true; 3071 3072 if (!kgem_check_bo(&sna->kgem, bo, NULL)) 3073 return false; 3074 } 3075 3076 tmp.op = alu; 3077 tmp.dst.pixmap = dst; 3078 tmp.dst.width = dst->drawable.width; 3079 tmp.dst.height = dst->drawable.height; 3080 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3081 tmp.dst.bo = bo; 3082 tmp.floats_per_vertex = 2; 3083 tmp.floats_per_rect = 6; 3084 tmp.need_magic_ca_pass = false; 3085 3086 tmp.src.u.gen2.pixel = 3087 sna_rgba_for_color(color, dst->drawable.depth); 3088 3089 gen2_emit_fill_state(sna, &tmp); 3090 gen2_get_rectangles(sna, &tmp, 1); 3091 DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__, 3092 x1, y1, x2, y2, tmp.src.u.gen2.pixel)); 3093 VERTEX(x2); 3094 VERTEX(y2); 3095 VERTEX(x1); 3096 VERTEX(y2); 3097 VERTEX(x1); 3098 VERTEX(y1); 3099 gen2_vertex_flush(sna, &tmp); 3100 3101 return true; 3102} 3103 3104static void 3105gen2_render_copy_setup_source(struct sna_composite_channel *channel, 3106 const DrawableRec *draw, 3107 struct kgem_bo *bo) 3108{ 3109 assert(draw->width && draw->height); 3110 3111 channel->filter = PictFilterNearest; 3112 channel->repeat = RepeatNone; 3113 channel->width = draw->width; 3114 channel->height = draw->height; 3115 channel->scale[0] = 1.f/draw->width; 3116 channel->scale[1] = 1.f/draw->height; 3117 channel->offset[0] = 0; 3118 channel->offset[1] = 0; 3119 channel->pict_format = sna_format_for_depth(draw->depth); 3120 channel->bo = bo; 3121 channel->is_affine = 1; 3122 3123 DBG(("%s: source=%d, (%dx%d), format=%08x\n", 3124 __FUNCTION__, bo->handle, 3125 channel->width, channel->height, 3126 channel->pict_format)); 3127} 3128 3129static void 3130gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op) 3131{ 3132 uint32_t blend, unwind; 3133 3134 unwind = sna->kgem.nbatch; 3135 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 3136 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 3137 3138 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 3139 TB0C_OUTPUT_WRITE_CURRENT; 3140 if (op->dst.format == PICT_a8) 3141 blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0; 3142 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 3143 blend |= TB0C_ARG1_SEL_TEXEL0; 3144 else 3145 blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 3146 BATCH(blend); 3147 3148 blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 3149 TB0A_OUTPUT_WRITE_CURRENT; 3150 if (PICT_FORMAT_A(op->src.pict_format) == 0) 3151 blend |= TB0A_ARG1_SEL_ONE; 3152 else 3153 blend |= TB0A_ARG1_SEL_TEXEL0; 3154 BATCH(blend); 3155 3156 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 3157 sna->kgem.batch + unwind + 1, 3158 2 * sizeof(uint32_t)) == 0) 3159 sna->kgem.nbatch = unwind; 3160 else 3161 sna->render_state.gen2.ls2 = unwind; 3162} 3163 3164static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op) 3165{ 3166 uint32_t ls1, v; 3167 3168 gen2_get_batch(sna, op); 3169 3170 if (kgem_bo_is_dirty(op->src.bo)) { 3171 if (op->src.bo == op->dst.bo) 3172 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 3173 else 3174 BATCH(_3DSTATE_MODES_5_CMD | 3175 PIPELINE_FLUSH_RENDER_CACHE | 3176 PIPELINE_FLUSH_TEXTURE_CACHE); 3177 kgem_clear_dirty(&sna->kgem); 3178 } 3179 gen2_emit_target(sna, op); 3180 3181 ls1 = sna->kgem.nbatch; 3182 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 3183 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 3184 BATCH(1<<12); 3185 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 3186 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 3187 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 3188 sna->kgem.batch + ls1 + 1, 3189 3 * sizeof(uint32_t)) == 0) 3190 sna->kgem.nbatch = ls1; 3191 else 3192 sna->render_state.gen2.ls1 = ls1; 3193 3194 gen2_enable_logic_op(sna, op->op); 3195 gen2_emit_copy_pipeline(sna, op); 3196 3197 v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; 3198 if (sna->render_state.gen2.vft != v) { 3199 BATCH(v); 3200 sna->render_state.gen2.vft = v; 3201 } 3202 3203 gen2_emit_texture(sna, &op->src, 0); 3204} 3205 3206static bool 3207gen2_render_copy_boxes(struct sna *sna, uint8_t alu, 3208 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3209 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3210 const BoxRec *box, int n, unsigned flags) 3211{ 3212 struct sna_composite_op tmp; 3213 3214#if NO_COPY_BOXES 3215 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3216 return false; 3217 3218 return sna_blt_copy_boxes(sna, alu, 3219 src_bo, src_dx, src_dy, 3220 dst_bo, dst_dx, dst_dy, 3221 dst->drawable.bitsPerPixel, 3222 box, n); 3223#endif 3224 3225 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 3226 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 3227 3228 if (sna_blt_compare_depth(src, dst) && 3229 sna_blt_copy_boxes(sna, alu, 3230 src_bo, src_dx, src_dy, 3231 dst_bo, dst_dx, dst_dy, 3232 dst->bitsPerPixel, 3233 box, n)) 3234 return true; 3235 3236 if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 3237 too_large(src->width, src->height) || 3238 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) { 3239fallback: 3240 return sna_blt_copy_boxes_fallback(sna, alu, 3241 src, src_bo, src_dx, src_dy, 3242 dst, dst_bo, dst_dx, dst_dy, 3243 box, n); 3244 } 3245 3246 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3247 kgem_submit(&sna->kgem); 3248 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3249 goto fallback; 3250 } 3251 3252 assert(dst_bo->pitch >= 8); 3253 3254 memset(&tmp, 0, sizeof(tmp)); 3255 tmp.op = alu; 3256 3257 tmp.dst.pixmap = (PixmapPtr)dst; 3258 tmp.dst.width = dst->width; 3259 tmp.dst.height = dst->height; 3260 tmp.dst.format = sna_format_for_depth(dst->depth); 3261 tmp.dst.bo = dst_bo; 3262 tmp.dst.x = tmp.dst.y = 0; 3263 tmp.damage = NULL; 3264 3265 DBG(("%s: target=%d, format=%08x, size=%dx%d\n", 3266 __FUNCTION__, dst_bo->handle, 3267 (unsigned)tmp.dst.format, 3268 tmp.dst.width, 3269 tmp.dst.height)); 3270 3271 sna_render_composite_redirect_init(&tmp); 3272 if (too_large(tmp.dst.width, tmp.dst.height) || 3273 dst_bo->pitch > MAX_3D_PITCH) { 3274 BoxRec extents = box[0]; 3275 int i; 3276 3277 for (i = 1; i < n; i++) { 3278 if (box[i].x1 < extents.x1) 3279 extents.x1 = box[i].x1; 3280 if (box[i].y1 < extents.y1) 3281 extents.y1 = box[i].y1; 3282 3283 if (box[i].x2 > extents.x2) 3284 extents.x2 = box[i].x2; 3285 if (box[i].y2 > extents.y2) 3286 extents.y2 = box[i].y2; 3287 } 3288 if (!sna_render_composite_redirect(sna, &tmp, 3289 extents.x1 + dst_dx, 3290 extents.y1 + dst_dy, 3291 extents.x2 - extents.x1, 3292 extents.y2 - extents.y1, 3293 alu != GXcopy || n > 1)) 3294 goto fallback_tiled; 3295 } 3296 3297 tmp.floats_per_vertex = 4; 3298 tmp.floats_per_rect = 12; 3299 3300 dst_dx += tmp.dst.x; 3301 dst_dy += tmp.dst.y; 3302 tmp.dst.x = tmp.dst.y = 0; 3303 3304 gen2_render_copy_setup_source(&tmp.src, src, src_bo); 3305 gen2_emit_copy_state(sna, &tmp); 3306 do { 3307 int n_this_time; 3308 3309 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3310 if (n_this_time == 0) { 3311 gen2_emit_copy_state(sna, &tmp); 3312 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3313 } 3314 n -= n_this_time; 3315 3316 do { 3317 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3318 box->x1 + src_dx, box->y1 + src_dy, 3319 box->x1 + dst_dx, box->y1 + dst_dy, 3320 box->x2 - box->x1, box->y2 - box->y1)); 3321 VERTEX(box->x2 + dst_dx); 3322 VERTEX(box->y2 + dst_dy); 3323 VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 3324 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3325 3326 VERTEX(box->x1 + dst_dx); 3327 VERTEX(box->y2 + dst_dy); 3328 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3329 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3330 3331 VERTEX(box->x1 + dst_dx); 3332 VERTEX(box->y1 + dst_dy); 3333 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3334 VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 3335 3336 box++; 3337 } while (--n_this_time); 3338 } while (n); 3339 3340 gen2_vertex_flush(sna, &tmp); 3341 sna_render_composite_redirect_done(sna, &tmp); 3342 return true; 3343 3344fallback_tiled: 3345 return sna_tiling_copy_boxes(sna, alu, 3346 src, src_bo, src_dx, src_dy, 3347 dst, dst_bo, dst_dx, dst_dy, 3348 box, n); 3349} 3350 3351static void 3352gen2_render_copy_blt(struct sna *sna, 3353 const struct sna_copy_op *op, 3354 int16_t sx, int16_t sy, 3355 int16_t w, int16_t h, 3356 int16_t dx, int16_t dy) 3357{ 3358 if (!gen2_get_rectangles(sna, &op->base, 1)) { 3359 gen2_emit_copy_state(sna, &op->base); 3360 gen2_get_rectangles(sna, &op->base, 1); 3361 } 3362 3363 VERTEX(dx+w); 3364 VERTEX(dy+h); 3365 VERTEX((sx+w)*op->base.src.scale[0]); 3366 VERTEX((sy+h)*op->base.src.scale[1]); 3367 3368 VERTEX(dx); 3369 VERTEX(dy+h); 3370 VERTEX(sx*op->base.src.scale[0]); 3371 VERTEX((sy+h)*op->base.src.scale[1]); 3372 3373 VERTEX(dx); 3374 VERTEX(dy); 3375 VERTEX(sx*op->base.src.scale[0]); 3376 VERTEX(sy*op->base.src.scale[1]); 3377} 3378 3379static void 3380gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3381{ 3382 gen2_vertex_flush(sna, &op->base); 3383} 3384 3385static bool 3386gen2_render_copy(struct sna *sna, uint8_t alu, 3387 PixmapPtr src, struct kgem_bo *src_bo, 3388 PixmapPtr dst, struct kgem_bo *dst_bo, 3389 struct sna_copy_op *tmp) 3390{ 3391#if NO_COPY 3392 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3393 return false; 3394 3395 return sna_blt_copy(sna, alu, 3396 src_bo, dst_bo, 3397 dst->drawable.bitsPerPixel, 3398 tmp); 3399#endif 3400 3401 /* Prefer to use the BLT */ 3402 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3403 sna_blt_copy(sna, alu, 3404 src_bo, dst_bo, 3405 dst->drawable.bitsPerPixel, 3406 tmp)) 3407 return true; 3408 3409 /* Must use the BLT if we can't RENDER... */ 3410 if (too_large(src->drawable.width, src->drawable.height) || 3411 too_large(dst->drawable.width, dst->drawable.height) || 3412 src_bo->pitch > MAX_3D_PITCH || 3413 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) { 3414fallback: 3415 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3416 return false; 3417 3418 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3419 dst->drawable.bitsPerPixel, 3420 tmp); 3421 } 3422 3423 tmp->base.op = alu; 3424 3425 tmp->base.dst.pixmap = dst; 3426 tmp->base.dst.width = dst->drawable.width; 3427 tmp->base.dst.height = dst->drawable.height; 3428 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3429 tmp->base.dst.bo = dst_bo; 3430 3431 gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo); 3432 tmp->base.mask.bo = NULL; 3433 3434 tmp->base.floats_per_vertex = 4; 3435 tmp->base.floats_per_rect = 12; 3436 3437 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3438 kgem_submit(&sna->kgem); 3439 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3440 goto fallback; 3441 } 3442 3443 tmp->blt = gen2_render_copy_blt; 3444 tmp->done = gen2_render_copy_done; 3445 3446 gen2_emit_composite_state(sna, &tmp->base); 3447 return true; 3448} 3449 3450static void 3451gen2_render_reset(struct sna *sna) 3452{ 3453 sna->render_state.gen2.need_invariant = true; 3454 sna->render_state.gen2.logic_op_enabled = 0; 3455 sna->render_state.gen2.target = 0; 3456 3457 sna->render_state.gen2.ls1 = 0; 3458 sna->render_state.gen2.ls2 = 0; 3459 sna->render_state.gen2.vft = 0; 3460 3461 sna->render_state.gen2.diffuse = 0x0c0ffee0; 3462 sna->render_state.gen2.specular = 0x0c0ffee0; 3463} 3464 3465static void 3466gen2_render_flush(struct sna *sna) 3467{ 3468 assert(sna->render.vertex_index == 0); 3469 assert(sna->render.vertex_offset == 0); 3470} 3471 3472static void 3473gen2_render_context_switch(struct kgem *kgem, 3474 int new_mode) 3475{ 3476 struct sna *sna = container_of(kgem, struct sna, kgem); 3477 3478 if (!kgem->nbatch) 3479 return; 3480 3481 /* Reload BLT registers following a lost context */ 3482 sna->blt_state.fill_bo = 0; 3483 3484 if (kgem_ring_is_idle(kgem, kgem->ring)) { 3485 DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); 3486 _kgem_submit(kgem); 3487 } 3488} 3489 3490const char *gen2_render_init(struct sna *sna, const char *backend) 3491{ 3492 struct sna_render *render = &sna->render; 3493 3494 sna->kgem.context_switch = gen2_render_context_switch; 3495 3496 /* Use the BLT (and overlay) for everything except when forced to 3497 * use the texture combiners. 3498 */ 3499#if !NO_COMPOSITE 3500 render->composite = gen2_render_composite; 3501 render->prefer_gpu |= PREFER_GPU_RENDER; 3502#endif 3503#if !NO_COMPOSITE_SPANS 3504 render->check_composite_spans = gen2_check_composite_spans; 3505 render->composite_spans = gen2_render_composite_spans; 3506 render->prefer_gpu |= PREFER_GPU_SPANS; 3507#endif 3508 render->fill_boxes = gen2_render_fill_boxes; 3509 render->fill = gen2_render_fill; 3510 render->fill_one = gen2_render_fill_one; 3511 render->copy = gen2_render_copy; 3512 render->copy_boxes = gen2_render_copy_boxes; 3513 3514 /* XXX YUV color space conversion for video? */ 3515 3516 render->reset = gen2_render_reset; 3517 render->flush = gen2_render_flush; 3518 3519 render->max_3d_size = MAX_3D_SIZE; 3520 render->max_3d_pitch = MAX_3D_PITCH; 3521 return "Almador (gen2)"; 3522} 3523