gen2_render.c revision 03b705cf
1/* 2 * Copyright © 2006,2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Wang Zhenyu <zhenyu.z.wang@intel.com> 25 * Eric Anholt <eric@anholt.net> 26 * Chris Wilson <chris@chris-wilson.co.uk> 27 * 28 */ 29 30#ifdef HAVE_CONFIG_H 31#include "config.h" 32#endif 33 34#include "sna.h" 35#include "sna_reg.h" 36#include "sna_render.h" 37#include "sna_render_inline.h" 38 39#include "gen2_render.h" 40 41#define NO_COMPOSITE 0 42#define NO_COMPOSITE_SPANS 0 43#define NO_COPY 0 44#define NO_COPY_BOXES 0 45#define NO_FILL 0 46#define NO_FILL_ONE 0 47#define NO_FILL_BOXES 0 48 49#define MAX_3D_SIZE 2048 50#define MAX_3D_PITCH 8192 51 52#define BATCH(v) batch_emit(sna, v) 53#define BATCH_F(v) batch_emit_float(sna, v) 54#define VERTEX(v) batch_emit_float(sna, v) 55 56static const struct blendinfo { 57 bool dst_alpha; 58 bool src_alpha; 59 uint32_t src_blend; 60 uint32_t dst_blend; 61} gen2_blend_op[] = { 62 /* Clear */ 63 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 64 /* Src */ 65 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 66 /* Dst */ 67 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 68 /* Over */ 69 {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 70 /* OverReverse */ 71 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 72 /* In */ 73 {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 74 /* InReverse */ 75 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 76 /* Out */ 77 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 78 /* OutReverse */ 79 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 80 /* Atop */ 81 {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 82 /* AtopReverse */ 83 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 84 /* Xor */ 85 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 86 /* Add */ 87 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 88}; 89 90static const struct formatinfo { 91 unsigned int fmt; 92 uint32_t card_fmt; 93} i8xx_tex_formats[] = { 94 {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, 95 {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, 96 {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, 97 {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, 98 {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, 99 {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, 100}, i85x_tex_formats[] = { 101 {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, 102 {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, 103}; 104 105static inline bool 106too_large(int width, int height) 107{ 108 return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 109} 110 111static inline uint32_t 112gen2_buf_tiling(uint32_t tiling) 113{ 114 uint32_t v = 0; 115 switch (tiling) { 116 default: assert(0); 117 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 118 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 119 case I915_TILING_NONE: break; 120 } 121 return v; 122} 123 124static uint32_t 125gen2_get_dst_format(uint32_t format) 126{ 127#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) 128 switch (format) { 129 default: 130 assert(0); 131 case PICT_a8r8g8b8: 132 case PICT_x8r8g8b8: 133 return COLR_BUF_ARGB8888 | BIAS; 134 case PICT_r5g6b5: 135 return COLR_BUF_RGB565 | BIAS; 136 case PICT_a1r5g5b5: 137 case PICT_x1r5g5b5: 138 return COLR_BUF_ARGB1555 | BIAS; 139 case PICT_a8: 140 return COLR_BUF_8BIT | BIAS; 141 case PICT_a4r4g4b4: 142 case PICT_x4r4g4b4: 143 return COLR_BUF_ARGB4444 | BIAS; 144 } 145#undef BIAS 146} 147 148static bool 149gen2_check_dst_format(uint32_t format) 150{ 151 switch (format) { 152 case PICT_a8r8g8b8: 153 case PICT_x8r8g8b8: 154 case PICT_r5g6b5: 155 case PICT_a1r5g5b5: 156 case PICT_x1r5g5b5: 157 case PICT_a8: 158 case PICT_a4r4g4b4: 159 case PICT_x4r4g4b4: 160 return true; 161 default: 162 return false; 163 } 164} 165 166static uint32_t 167gen2_get_card_format(struct sna *sna, uint32_t format) 168{ 169 unsigned int i; 170 171 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 172 if (i8xx_tex_formats[i].fmt == format) 173 return i8xx_tex_formats[i].card_fmt; 174 175 if (sna->kgem.gen < 021) { 176 /* Whilst these are not directly supported on 830/845, 177 * we only enable them when we can implicitly convert 178 * them to a supported variant through the texture 179 * combiners. 180 */ 181 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 182 if (i85x_tex_formats[i].fmt == format) 183 return i8xx_tex_formats[1+i].card_fmt; 184 } else { 185 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 186 if (i85x_tex_formats[i].fmt == format) 187 return i85x_tex_formats[i].card_fmt; 188 } 189 190 assert(0); 191 return 0; 192} 193 194static uint32_t 195gen2_check_format(struct sna *sna, PicturePtr p) 196{ 197 unsigned int i; 198 199 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 200 if (i8xx_tex_formats[i].fmt == p->format) 201 return true; 202 203 if (sna->kgem.gen > 021) { 204 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 205 if (i85x_tex_formats[i].fmt == p->format) 206 return true; 207 } 208 209 return false; 210} 211 212static uint32_t 213gen2_sampler_tiling_bits(uint32_t tiling) 214{ 215 uint32_t bits = 0; 216 switch (tiling) { 217 default: 218 assert(0); 219 case I915_TILING_Y: 220 bits |= TM0S1_TILE_WALK; 221 case I915_TILING_X: 222 bits |= TM0S1_TILED_SURFACE; 223 case I915_TILING_NONE: 224 break; 225 } 226 return bits; 227} 228 229static bool 230gen2_check_filter(PicturePtr picture) 231{ 232 switch (picture->filter) { 233 case PictFilterNearest: 234 case PictFilterBilinear: 235 return true; 236 default: 237 return false; 238 } 239} 240 241static bool 242gen2_check_repeat(PicturePtr picture) 243{ 244 if (!picture->repeat) 245 return true; 246 247 switch (picture->repeatType) { 248 case RepeatNone: 249 case RepeatNormal: 250 case RepeatPad: 251 case RepeatReflect: 252 return true; 253 default: 254 return false; 255 } 256} 257 258static void 259gen2_emit_texture(struct sna *sna, 260 const struct sna_composite_channel *channel, 261 int unit) 262{ 263 uint32_t wrap_mode_u, wrap_mode_v; 264 uint32_t texcoordtype; 265 uint32_t filter; 266 267 if (channel->is_affine) 268 texcoordtype = TEXCOORDTYPE_CARTESIAN; 269 else 270 texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; 271 272 switch (channel->repeat) { 273 default: 274 assert(0); 275 case RepeatNone: 276 wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER; 277 break; 278 case RepeatNormal: 279 wrap_mode_u = TEXCOORDMODE_WRAP; 280 break; 281 case RepeatPad: 282 wrap_mode_u = TEXCOORDMODE_CLAMP; 283 break; 284 case RepeatReflect: 285 wrap_mode_u = TEXCOORDMODE_MIRROR; 286 break; 287 } 288 if (channel->is_linear) 289 wrap_mode_v = TEXCOORDMODE_WRAP; 290 else 291 wrap_mode_v = wrap_mode_u; 292 293 switch (channel->filter) { 294 default: 295 assert(0); 296 case PictFilterNearest: 297 filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | 298 FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | 299 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 300 break; 301 case PictFilterBilinear: 302 filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | 303 FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | 304 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 305 break; 306 } 307 308 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4); 309 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 310 channel->bo, 311 I915_GEM_DOMAIN_SAMPLER << 16, 312 0)); 313 BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | 314 ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | 315 gen2_get_card_format(sna, channel->pict_format) | 316 gen2_sampler_tiling_bits(channel->bo->tiling)); 317 BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); 318 BATCH(filter); 319 BATCH(0); /* default color */ 320 321 BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | 322 ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype | 323 ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) | 324 ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u)); 325} 326 327static void 328gen2_get_blend_factors(const struct sna_composite_op *op, 329 int blend, 330 uint32_t *c_out, 331 uint32_t *a_out) 332{ 333 uint32_t cblend, ablend; 334 335 /* If component alpha is active in the mask and the blend operation 336 * uses the source alpha, then we know we don't need the source 337 * value (otherwise we would have hit a fallback earlier), so we 338 * provide the source alpha (src.A * mask.X) as output color. 339 * Conversely, if CA is set and we don't need the source alpha, then 340 * we produce the source value (src.X * mask.X) and the source alpha 341 * is unused.. Otherwise, we provide the non-CA source value 342 * (src.X * mask.A). 343 * 344 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 345 * pictures, but we need to implement it for 830/845 and there's no 346 * harm done in leaving it in. 347 */ 348 cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT; 349 ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT; 350 351 352 /* Get the source picture's channels into TBx_ARG1 */ 353 if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) || 354 op->dst.format == PICT_a8) { 355 /* Producing source alpha value, so the first set of channels 356 * is src.A instead of src.X. We also do this if the destination 357 * is a8, in which case src.G is what's written, and the other 358 * channels are ignored. 359 */ 360 if (op->src.is_solid) { 361 ablend |= TB0A_ARG1_SEL_DIFFUSE; 362 cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA; 363 } else { 364 ablend |= TB0A_ARG1_SEL_TEXEL0; 365 cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; 366 } 367 } else { 368 if (op->src.is_solid) 369 cblend |= TB0C_ARG1_SEL_DIFFUSE; 370 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 371 cblend |= TB0C_ARG1_SEL_TEXEL0; 372 else 373 cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 374 if (op->src.is_solid) 375 ablend |= TB0A_ARG1_SEL_DIFFUSE; 376 else if (op->src.is_opaque) 377 ablend |= TB0A_ARG1_SEL_ONE; 378 else 379 ablend |= TB0A_ARG1_SEL_TEXEL0; 380 } 381 382 if (op->mask.bo) { 383 if (op->src.is_solid) { 384 cblend |= TB0C_ARG2_SEL_TEXEL0; 385 ablend |= TB0A_ARG2_SEL_TEXEL0; 386 } else { 387 cblend |= TB0C_ARG2_SEL_TEXEL1; 388 ablend |= TB0A_ARG2_SEL_TEXEL1; 389 } 390 391 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 392 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 393 394 cblend |= TB0C_OP_MODULATE; 395 ablend |= TB0A_OP_MODULATE; 396 } else if (op->mask.is_solid) { 397 cblend |= TB0C_ARG2_SEL_DIFFUSE; 398 ablend |= TB0A_ARG2_SEL_DIFFUSE; 399 400 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 401 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 402 403 cblend |= TB0C_OP_MODULATE; 404 ablend |= TB0A_OP_MODULATE; 405 } else { 406 cblend |= TB0C_OP_ARG1; 407 ablend |= TB0A_OP_ARG1; 408 } 409 410 *c_out = cblend; 411 *a_out = ablend; 412} 413 414static uint32_t gen2_get_blend_cntl(int op, 415 bool has_component_alpha, 416 uint32_t dst_format) 417{ 418 uint32_t sblend, dblend; 419 420 if (op <= PictOpSrc) 421 return S8_ENABLE_COLOR_BUFFER_WRITE; 422 423 sblend = gen2_blend_op[op].src_blend; 424 dblend = gen2_blend_op[op].dst_blend; 425 426 /* If there's no dst alpha channel, adjust the blend op so that 427 * we'll treat it as always 1. 428 */ 429 if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) { 430 if (sblend == BLENDFACTOR_DST_ALPHA) 431 sblend = BLENDFACTOR_ONE; 432 else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 433 sblend = BLENDFACTOR_ZERO; 434 } 435 436 /* If the source alpha is being used, then we should only be in a case 437 * where the source blend factor is 0, and the source blend value is 438 * the mask channels multiplied by the source picture's alpha. 439 */ 440 if (has_component_alpha && gen2_blend_op[op].src_alpha) { 441 if (dblend == BLENDFACTOR_SRC_ALPHA) 442 dblend = BLENDFACTOR_SRC_COLR; 443 else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) 444 dblend = BLENDFACTOR_INV_SRC_COLR; 445 } 446 447 return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | 448 dblend << S8_DST_BLEND_FACTOR_SHIFT | 449 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 450 S8_ENABLE_COLOR_BUFFER_WRITE); 451} 452 453static void gen2_emit_invariant(struct sna *sna) 454{ 455 int i; 456 457 for (i = 0; i < 4; i++) { 458 BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i)); 459 BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) | 460 DISABLE_TEX_STREAM_BUMP | 461 ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) | 462 ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i)); 463 BATCH(_3DSTATE_MAP_COORD_TRANSFORM); 464 BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i)); 465 } 466 467 BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); 468 BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | 469 TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | 470 TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | 471 TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); 472 473 BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 474 475 BATCH(_3DSTATE_VERTEX_TRANSFORM); 476 BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); 477 478 BATCH(_3DSTATE_W_STATE_CMD); 479 BATCH(MAGIC_W_STATE_DWORD1); 480 BATCH_F(1.0); 481 482 BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | 483 DISABLE_INDPT_ALPHA_BLEND | 484 ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); 485 486 BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); 487 BATCH(0); 488 489 BATCH(_3DSTATE_MODES_1_CMD | 490 ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD | 491 ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) | 492 ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); 493 494 BATCH(_3DSTATE_ENABLES_1_CMD | 495 DISABLE_LOGIC_OP | 496 DISABLE_STENCIL_TEST | 497 DISABLE_DEPTH_BIAS | 498 DISABLE_SPEC_ADD | 499 DISABLE_FOG | 500 DISABLE_ALPHA_TEST | 501 DISABLE_DEPTH_TEST | 502 ENABLE_COLOR_BLEND); 503 504 BATCH(_3DSTATE_ENABLES_2_CMD | 505 DISABLE_STENCIL_WRITE | 506 DISABLE_DITHER | 507 DISABLE_DEPTH_WRITE | 508 ENABLE_COLOR_MASK | 509 ENABLE_COLOR_WRITE | 510 ENABLE_TEX_CACHE); 511 512 BATCH(_3DSTATE_STIPPLE); 513 BATCH(0); 514 515 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 516 TEXPIPE_COLOR | 517 ENABLE_TEXOUTPUT_WRT_SEL | 518 TEXOP_OUTPUT_CURRENT | 519 DISABLE_TEX_CNTRL_STAGE | 520 TEXOP_SCALE_1X | 521 TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | 522 TEXBLENDOP_ARG1); 523 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 524 TEXPIPE_ALPHA | 525 ENABLE_TEXOUTPUT_WRT_SEL | 526 TEXOP_OUTPUT_CURRENT | 527 TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | 528 TEXBLENDOP_ARG1); 529 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 530 TEXPIPE_COLOR | 531 TEXBLEND_ARG1 | 532 TEXBLENDARG_MODIFY_PARMS | 533 TEXBLENDARG_DIFFUSE); 534 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 535 TEXPIPE_ALPHA | 536 TEXBLEND_ARG1 | 537 TEXBLENDARG_MODIFY_PARMS | 538 TEXBLENDARG_DIFFUSE); 539 540#define INVARIANT_SIZE 35 541 542 sna->render_state.gen2.need_invariant = false; 543} 544 545static void 546gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) 547{ 548 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 549 550 if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) { 551 DBG(("%s: flushing batch: size %d > %d\n", 552 __FUNCTION__, INVARIANT_SIZE+40, 553 sna->kgem.surface-sna->kgem.nbatch)); 554 kgem_submit(&sna->kgem); 555 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 556 } 557 558 if (!kgem_check_reloc(&sna->kgem, 3)) { 559 DBG(("%s: flushing batch: reloc %d >= %d\n", 560 __FUNCTION__, 561 sna->kgem.nreloc + 3, 562 (int)KGEM_RELOC_SIZE(&sna->kgem))); 563 kgem_submit(&sna->kgem); 564 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 565 } 566 567 if (!kgem_check_exec(&sna->kgem, 3)) { 568 DBG(("%s: flushing batch: exec %d >= %d\n", 569 __FUNCTION__, 570 sna->kgem.nexec + 1, 571 (int)KGEM_EXEC_SIZE(&sna->kgem))); 572 kgem_submit(&sna->kgem); 573 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 574 } 575 576 if (sna->render_state.gen2.need_invariant) 577 gen2_emit_invariant(sna); 578} 579 580static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) 581{ 582 assert(!too_large(op->dst.width, op->dst.height)); 583 assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); 584 assert(sna->render.vertex_offset == 0); 585 586 if (sna->render_state.gen2.target == op->dst.bo->unique_id) { 587 kgem_bo_mark_dirty(op->dst.bo); 588 return; 589 } 590 591 BATCH(_3DSTATE_BUF_INFO_CMD); 592 BATCH(BUF_3D_ID_COLOR_BACK | 593 gen2_buf_tiling(op->dst.bo->tiling) | 594 BUF_3D_PITCH(op->dst.bo->pitch)); 595 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 596 op->dst.bo, 597 I915_GEM_DOMAIN_RENDER << 16 | 598 I915_GEM_DOMAIN_RENDER, 599 0)); 600 601 BATCH(_3DSTATE_DST_BUF_VARS_CMD); 602 BATCH(gen2_get_dst_format(op->dst.format)); 603 604 BATCH(_3DSTATE_DRAW_RECT_CMD); 605 BATCH(0); 606 BATCH(0); /* ymin, xmin */ 607 BATCH(DRAW_YMAX(op->dst.height - 1) | 608 DRAW_XMAX(op->dst.width - 1)); 609 BATCH(0); /* yorig, xorig */ 610 611 sna->render_state.gen2.target = op->dst.bo->unique_id; 612} 613 614static void gen2_disable_logic_op(struct sna *sna) 615{ 616 if (!sna->render_state.gen2.logic_op_enabled) 617 return; 618 619 DBG(("%s\n", __FUNCTION__)); 620 621 BATCH(_3DSTATE_ENABLES_1_CMD | 622 DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND); 623 624 sna->render_state.gen2.logic_op_enabled = 0; 625} 626 627static void gen2_enable_logic_op(struct sna *sna, int op) 628{ 629 static const uint8_t logic_op[] = { 630 LOGICOP_CLEAR, /* GXclear */ 631 LOGICOP_AND, /* GXand */ 632 LOGICOP_AND_RVRSE, /* GXandReverse */ 633 LOGICOP_COPY, /* GXcopy */ 634 LOGICOP_AND_INV, /* GXandInverted */ 635 LOGICOP_NOOP, /* GXnoop */ 636 LOGICOP_XOR, /* GXxor */ 637 LOGICOP_OR, /* GXor */ 638 LOGICOP_NOR, /* GXnor */ 639 LOGICOP_EQUIV, /* GXequiv */ 640 LOGICOP_INV, /* GXinvert */ 641 LOGICOP_OR_RVRSE, /* GXorReverse */ 642 LOGICOP_COPY_INV, /* GXcopyInverted */ 643 LOGICOP_OR_INV, /* GXorInverted */ 644 LOGICOP_NAND, /* GXnand */ 645 LOGICOP_SET /* GXset */ 646 }; 647 648 if (sna->render_state.gen2.logic_op_enabled != op+1) { 649 if (!sna->render_state.gen2.logic_op_enabled) { 650 if (op == GXclear || op == GXcopy) 651 return; 652 653 DBG(("%s\n", __FUNCTION__)); 654 655 BATCH(_3DSTATE_ENABLES_1_CMD | 656 ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND); 657 } 658 659 BATCH(_3DSTATE_MODES_4_CMD | 660 ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op])); 661 sna->render_state.gen2.logic_op_enabled = op+1; 662 } 663} 664 665static void gen2_emit_composite_state(struct sna *sna, 666 const struct sna_composite_op *op) 667{ 668 uint32_t texcoordfmt, v, unwind; 669 uint32_t cblend, ablend; 670 int tex; 671 672 gen2_get_batch(sna, op); 673 674 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 675 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 676 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 677 else 678 BATCH(_3DSTATE_MODES_5_CMD | 679 PIPELINE_FLUSH_RENDER_CACHE | 680 PIPELINE_FLUSH_TEXTURE_CACHE); 681 kgem_clear_dirty(&sna->kgem); 682 } 683 684 gen2_emit_target(sna, op); 685 686 unwind = sna->kgem.nbatch; 687 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 688 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 689 BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12); 690 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 691 BATCH(gen2_get_blend_cntl(op->op, 692 op->has_component_alpha, 693 op->dst.format)); 694 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 695 sna->kgem.batch + unwind + 1, 696 3 * sizeof(uint32_t)) == 0) 697 sna->kgem.nbatch = unwind; 698 else 699 sna->render_state.gen2.ls1 = unwind; 700 701 gen2_disable_logic_op(sna); 702 703 gen2_get_blend_factors(op, op->op, &cblend, &ablend); 704 unwind = sna->kgem.nbatch; 705 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 706 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 707 BATCH(cblend); 708 BATCH(ablend); 709 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 710 sna->kgem.batch + unwind + 1, 711 2 * sizeof(uint32_t)) == 0) 712 sna->kgem.nbatch = unwind; 713 else 714 sna->render_state.gen2.ls2 = unwind; 715 716 tex = texcoordfmt = 0; 717 if (!op->src.is_solid) { 718 if (op->src.is_affine) 719 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 720 else 721 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 722 gen2_emit_texture(sna, &op->src, tex++); 723 } else { 724 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 725 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 726 BATCH(op->src.u.gen2.pixel); 727 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 728 } 729 } 730 if (op->mask.bo) { 731 if (op->mask.is_affine) 732 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 733 else 734 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 735 gen2_emit_texture(sna, &op->mask, tex++); 736 } else if (op->mask.is_solid) { 737 if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) { 738 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 739 BATCH(op->mask.u.gen2.pixel); 740 sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel; 741 } 742 } 743 744 v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt; 745 if (sna->render_state.gen2.vft != v) { 746 BATCH(v); 747 sna->render_state.gen2.vft = v; 748 } 749} 750 751static inline void 752gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) 753{ 754 VERTEX(dstX); 755 VERTEX(dstY); 756} 757 758inline static void 759gen2_emit_composite_linear(struct sna *sna, 760 const struct sna_composite_channel *channel, 761 int16_t x, int16_t y) 762{ 763 float v; 764 765 v = (x * channel->u.linear.dx + 766 y * channel->u.linear.dy + 767 channel->u.linear.offset); 768 DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v)); 769 VERTEX(v); 770 VERTEX(v); 771} 772 773static void 774gen2_emit_composite_texcoord(struct sna *sna, 775 const struct sna_composite_channel *channel, 776 int16_t x, int16_t y) 777{ 778 float s = 0, t = 0, w = 1; 779 780 x += channel->offset[0]; 781 y += channel->offset[1]; 782 783 if (channel->is_affine) { 784 sna_get_transformed_coordinates(x, y, 785 channel->transform, 786 &s, &t); 787 VERTEX(s * channel->scale[0]); 788 VERTEX(t * channel->scale[1]); 789 } else { 790 sna_get_transformed_coordinates_3d(x, y, 791 channel->transform, 792 &s, &t, &w); 793 VERTEX(s * channel->scale[0]); 794 VERTEX(t * channel->scale[1]); 795 VERTEX(w); 796 } 797} 798 799static void 800gen2_emit_composite_vertex(struct sna *sna, 801 const struct sna_composite_op *op, 802 int16_t srcX, int16_t srcY, 803 int16_t mskX, int16_t mskY, 804 int16_t dstX, int16_t dstY) 805{ 806 gen2_emit_composite_dstcoord(sna, dstX, dstY); 807 if (op->src.is_linear) 808 gen2_emit_composite_linear(sna, &op->src, srcX, srcY); 809 else if (!op->src.is_solid) 810 gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); 811 812 if (op->mask.is_linear) 813 gen2_emit_composite_linear(sna, &op->mask, mskX, mskY); 814 else if (op->mask.bo) 815 gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); 816} 817 818fastcall static void 819gen2_emit_composite_primitive(struct sna *sna, 820 const struct sna_composite_op *op, 821 const struct sna_composite_rectangles *r) 822{ 823 gen2_emit_composite_vertex(sna, op, 824 r->src.x + r->width, 825 r->src.y + r->height, 826 r->mask.x + r->width, 827 r->mask.y + r->height, 828 op->dst.x + r->dst.x + r->width, 829 op->dst.y + r->dst.y + r->height); 830 gen2_emit_composite_vertex(sna, op, 831 r->src.x, 832 r->src.y + r->height, 833 r->mask.x, 834 r->mask.y + r->height, 835 op->dst.x + r->dst.x, 836 op->dst.y + r->dst.y + r->height); 837 gen2_emit_composite_vertex(sna, op, 838 r->src.x, 839 r->src.y, 840 r->mask.x, 841 r->mask.y, 842 op->dst.x + r->dst.x, 843 op->dst.y + r->dst.y); 844} 845 846fastcall static void 847gen2_emit_composite_primitive_constant(struct sna *sna, 848 const struct sna_composite_op *op, 849 const struct sna_composite_rectangles *r) 850{ 851 int16_t dst_x = r->dst.x + op->dst.x; 852 int16_t dst_y = r->dst.y + op->dst.y; 853 854 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 855 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 856 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 857} 858 859fastcall static void 860gen2_emit_composite_primitive_linear(struct sna *sna, 861 const struct sna_composite_op *op, 862 const struct sna_composite_rectangles *r) 863{ 864 int16_t dst_x = r->dst.x + op->dst.x; 865 int16_t dst_y = r->dst.y + op->dst.y; 866 867 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 868 gen2_emit_composite_linear(sna, &op->src, 869 r->src.x + r->width, r->src.y + r->height); 870 871 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 872 gen2_emit_composite_linear(sna, &op->src, 873 r->src.x, r->src.y + r->height); 874 875 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 876 gen2_emit_composite_linear(sna, &op->src, 877 r->src.x, r->src.y); 878} 879 880fastcall static void 881gen2_emit_composite_primitive_identity(struct sna *sna, 882 const struct sna_composite_op *op, 883 const struct sna_composite_rectangles *r) 884{ 885 float w = r->width; 886 float h = r->height; 887 float *v; 888 889 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 890 sna->kgem.nbatch += 12; 891 892 v[8] = v[4] = r->dst.x + op->dst.x; 893 v[0] = v[4] + w; 894 895 v[9] = r->dst.y + op->dst.y; 896 v[5] = v[1] = v[9] + h; 897 898 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 899 v[2] = v[6] + w * op->src.scale[0]; 900 901 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 902 v[7] = v[3] = v[11] + h * op->src.scale[1]; 903} 904 905fastcall static void 906gen2_emit_composite_primitive_affine(struct sna *sna, 907 const struct sna_composite_op *op, 908 const struct sna_composite_rectangles *r) 909{ 910 PictTransform *transform = op->src.transform; 911 int src_x = r->src.x + (int)op->src.offset[0]; 912 int src_y = r->src.y + (int)op->src.offset[1]; 913 float *v; 914 915 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 916 sna->kgem.nbatch += 12; 917 918 v[8] = v[4] = r->dst.x + op->dst.x; 919 v[0] = v[4] + r->width; 920 921 v[9] = r->dst.y + op->dst.y; 922 v[5] = v[1] = v[9] + r->height; 923 924 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 925 transform, op->src.scale, 926 &v[2], &v[3]); 927 928 _sna_get_transformed_scaled(src_x, src_y + r->height, 929 transform, op->src.scale, 930 &v[6], &v[7]); 931 932 _sna_get_transformed_scaled(src_x, src_y, 933 transform, op->src.scale, 934 &v[10], &v[11]); 935} 936 937fastcall static void 938gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna, 939 const struct sna_composite_op *op, 940 const struct sna_composite_rectangles *r) 941{ 942 float w = r->width; 943 float h = r->height; 944 float *v; 945 946 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 947 sna->kgem.nbatch += 12; 948 949 v[8] = v[4] = r->dst.x + op->dst.x; 950 v[0] = v[4] + w; 951 952 v[9] = r->dst.y + op->dst.y; 953 v[5] = v[1] = v[9] + h; 954 955 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 956 v[2] = v[6] + w * op->mask.scale[0]; 957 958 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 959 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 960} 961 962#if defined(sse2) && !defined(__x86_64__) 963sse2 fastcall static void 964gen2_emit_composite_primitive_constant__sse2(struct sna *sna, 965 const struct sna_composite_op *op, 966 const struct sna_composite_rectangles *r) 967{ 968 int16_t dst_x = r->dst.x + op->dst.x; 969 int16_t dst_y = r->dst.y + op->dst.y; 970 971 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 972 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 973 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 974} 975 976sse2 fastcall static void 977gen2_emit_composite_primitive_linear__sse2(struct sna *sna, 978 const struct sna_composite_op *op, 979 const struct sna_composite_rectangles *r) 980{ 981 int16_t dst_x = r->dst.x + op->dst.x; 982 int16_t dst_y = r->dst.y + op->dst.y; 983 984 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 985 gen2_emit_composite_linear(sna, &op->src, 986 r->src.x + r->width, r->src.y + r->height); 987 988 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 989 gen2_emit_composite_linear(sna, &op->src, 990 r->src.x, r->src.y + r->height); 991 992 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 993 gen2_emit_composite_linear(sna, &op->src, 994 r->src.x, r->src.y); 995} 996 997sse2 fastcall static void 998gen2_emit_composite_primitive_identity__sse2(struct sna *sna, 999 const struct sna_composite_op *op, 1000 const struct sna_composite_rectangles *r) 1001{ 1002 float w = r->width; 1003 float h = r->height; 1004 float *v; 1005 1006 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1007 sna->kgem.nbatch += 12; 1008 1009 v[8] = v[4] = r->dst.x + op->dst.x; 1010 v[0] = v[4] + w; 1011 1012 v[9] = r->dst.y + op->dst.y; 1013 v[5] = v[1] = v[9] + h; 1014 1015 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1016 v[2] = v[6] + w * op->src.scale[0]; 1017 1018 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1019 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1020} 1021 1022sse2 fastcall static void 1023gen2_emit_composite_primitive_affine__sse2(struct sna *sna, 1024 const struct sna_composite_op *op, 1025 const struct sna_composite_rectangles *r) 1026{ 1027 PictTransform *transform = op->src.transform; 1028 int src_x = r->src.x + (int)op->src.offset[0]; 1029 int src_y = r->src.y + (int)op->src.offset[1]; 1030 float *v; 1031 1032 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1033 sna->kgem.nbatch += 12; 1034 1035 v[8] = v[4] = r->dst.x + op->dst.x; 1036 v[0] = v[4] + r->width; 1037 1038 v[9] = r->dst.y + op->dst.y; 1039 v[5] = v[1] = v[9] + r->height; 1040 1041 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1042 transform, op->src.scale, 1043 &v[2], &v[3]); 1044 1045 _sna_get_transformed_scaled(src_x, src_y + r->height, 1046 transform, op->src.scale, 1047 &v[6], &v[7]); 1048 1049 _sna_get_transformed_scaled(src_x, src_y, 1050 transform, op->src.scale, 1051 &v[10], &v[11]); 1052} 1053 1054sse2 fastcall static void 1055gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1056 const struct sna_composite_op *op, 1057 const struct sna_composite_rectangles *r) 1058{ 1059 float w = r->width; 1060 float h = r->height; 1061 float *v; 1062 1063 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1064 sna->kgem.nbatch += 12; 1065 1066 v[8] = v[4] = r->dst.x + op->dst.x; 1067 v[0] = v[4] + w; 1068 1069 v[9] = r->dst.y + op->dst.y; 1070 v[5] = v[1] = v[9] + h; 1071 1072 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1073 v[2] = v[6] + w * op->mask.scale[0]; 1074 1075 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1076 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1077} 1078#endif 1079 1080static void gen2_magic_ca_pass(struct sna *sna, 1081 const struct sna_composite_op *op) 1082{ 1083 uint32_t ablend, cblend, *src, *dst; 1084 int n; 1085 1086 if (!op->need_magic_ca_pass) 1087 return; 1088 1089 DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__, 1090 sna->kgem.nbatch, sna->render.vertex_offset)); 1091 1092 assert(op->mask.bo); 1093 assert(op->has_component_alpha); 1094 1095 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0); 1096 BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT | 1097 BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT | 1098 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 1099 S8_ENABLE_COLOR_BUFFER_WRITE); 1100 sna->render_state.gen2.ls1 = 0; 1101 1102 gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend); 1103 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 1104 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 1105 BATCH(cblend); 1106 BATCH(ablend); 1107 sna->render_state.gen2.ls2 = 0; 1108 1109 src = sna->kgem.batch + sna->render.vertex_offset; 1110 dst = sna->kgem.batch + sna->kgem.nbatch; 1111 n = 1 + sna->render.vertex_index; 1112 sna->kgem.nbatch += n; 1113 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 1114 while (n--) 1115 *dst++ = *src++; 1116} 1117 1118static void gen2_vertex_flush(struct sna *sna, 1119 const struct sna_composite_op *op) 1120{ 1121 if (sna->render.vertex_index == 0) 1122 return; 1123 1124 sna->kgem.batch[sna->render.vertex_offset] |= 1125 sna->render.vertex_index - 1; 1126 1127 gen2_magic_ca_pass(sna, op); 1128 1129 sna->render.vertex_offset = 0; 1130 sna->render.vertex_index = 0; 1131} 1132 1133inline static int gen2_get_rectangles(struct sna *sna, 1134 const struct sna_composite_op *op, 1135 int want) 1136{ 1137 int rem = batch_space(sna), size, need; 1138 1139 DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n", 1140 __FUNCTION__, want, op->floats_per_vertex, rem)); 1141 1142 assert(op->floats_per_vertex); 1143 assert(op->floats_per_rect == 3 * op->floats_per_vertex); 1144 1145 need = 1; 1146 size = op->floats_per_rect; 1147 if (op->need_magic_ca_pass) 1148 need += 6 + size*sna->render.vertex_index, size *= 2; 1149 1150 DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n", 1151 __FUNCTION__, want, need, size, rem)); 1152 if (rem < need + size) { 1153 gen2_vertex_flush(sna, op); 1154 kgem_submit(&sna->kgem); 1155 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1156 return 0; 1157 } 1158 1159 rem -= need; 1160 if (sna->render.vertex_offset == 0) { 1161 if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) == 1162 (PRIM3D_INLINE | PRIM3D_RECTLIST)) { 1163 uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1]; 1164 assert(*b & 0xffff); 1165 sna->render.vertex_index = 1 + (*b & 0xffff); 1166 *b = PRIM3D_INLINE | PRIM3D_RECTLIST; 1167 sna->render.vertex_offset = sna->kgem.nbatch - 1; 1168 assert(!op->need_magic_ca_pass); 1169 } else { 1170 sna->render.vertex_offset = sna->kgem.nbatch; 1171 BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); 1172 } 1173 } 1174 1175 if (want > 1 && want * size > rem) 1176 want = rem / size; 1177 1178 assert(want); 1179 sna->render.vertex_index += want*op->floats_per_rect; 1180 return want; 1181} 1182 1183fastcall static void 1184gen2_render_composite_blt(struct sna *sna, 1185 const struct sna_composite_op *op, 1186 const struct sna_composite_rectangles *r) 1187{ 1188 if (!gen2_get_rectangles(sna, op, 1)) { 1189 gen2_emit_composite_state(sna, op); 1190 gen2_get_rectangles(sna, op, 1); 1191 } 1192 1193 op->prim_emit(sna, op, r); 1194} 1195 1196fastcall static void 1197gen2_render_composite_box(struct sna *sna, 1198 const struct sna_composite_op *op, 1199 const BoxRec *box) 1200{ 1201 struct sna_composite_rectangles r; 1202 1203 if (!gen2_get_rectangles(sna, op, 1)) { 1204 gen2_emit_composite_state(sna, op); 1205 gen2_get_rectangles(sna, op, 1); 1206 } 1207 1208 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1209 box->x1, box->y1, 1210 box->x2 - box->x1, 1211 box->y2 - box->y1)); 1212 1213 r.dst.x = box->x1; r.dst.y = box->y1; 1214 r.width = box->x2 - box->x1; 1215 r.height = box->y2 - box->y1; 1216 r.src = r.mask = r.dst; 1217 1218 op->prim_emit(sna, op, &r); 1219} 1220 1221static void 1222gen2_render_composite_boxes(struct sna *sna, 1223 const struct sna_composite_op *op, 1224 const BoxRec *box, int nbox) 1225{ 1226 do { 1227 int nbox_this_time; 1228 1229 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1230 if (nbox_this_time == 0) { 1231 gen2_emit_composite_state(sna, op); 1232 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1233 } 1234 nbox -= nbox_this_time; 1235 1236 do { 1237 struct sna_composite_rectangles r; 1238 1239 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1240 box->x1, box->y1, 1241 box->x2 - box->x1, 1242 box->y2 - box->y1)); 1243 1244 r.dst.x = box->x1; r.dst.y = box->y1; 1245 r.width = box->x2 - box->x1; 1246 r.height = box->y2 - box->y1; 1247 r.src = r.mask = r.dst; 1248 1249 op->prim_emit(sna, op, &r); 1250 box++; 1251 } while (--nbox_this_time); 1252 } while (nbox); 1253} 1254 1255static void gen2_render_composite_done(struct sna *sna, 1256 const struct sna_composite_op *op) 1257{ 1258 gen2_vertex_flush(sna, op); 1259 1260 if (op->mask.bo) 1261 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1262 if (op->src.bo) 1263 kgem_bo_destroy(&sna->kgem, op->src.bo); 1264 sna_render_composite_redirect_done(sna, op); 1265} 1266 1267static bool 1268gen2_composite_solid_init(struct sna *sna, 1269 struct sna_composite_channel *channel, 1270 uint32_t color) 1271{ 1272 channel->filter = PictFilterNearest; 1273 channel->repeat = RepeatNormal; 1274 channel->is_solid = true; 1275 channel->is_affine = true; 1276 channel->width = 1; 1277 channel->height = 1; 1278 channel->pict_format = PICT_a8r8g8b8; 1279 1280 channel->bo = NULL; 1281 channel->u.gen2.pixel = color; 1282 1283 channel->scale[0] = channel->scale[1] = 1; 1284 channel->offset[0] = channel->offset[1] = 0; 1285 return true; 1286} 1287 1288#define xFixedToDouble(f) pixman_fixed_to_double(f) 1289 1290static bool 1291gen2_composite_linear_init(struct sna *sna, 1292 PicturePtr picture, 1293 struct sna_composite_channel *channel, 1294 int x, int y, 1295 int w, int h, 1296 int dst_x, int dst_y) 1297{ 1298 PictLinearGradient *linear = 1299 (PictLinearGradient *)picture->pSourcePict; 1300 pixman_fixed_t tx, ty; 1301 float x0, y0, sf; 1302 float dx, dy; 1303 1304 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 1305 __FUNCTION__, 1306 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 1307 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 1308 1309 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 1310 return 0; 1311 1312 if (!sna_transform_is_affine(picture->transform)) { 1313 DBG(("%s: fallback due to projective transform\n", 1314 __FUNCTION__)); 1315 return sna_render_picture_fixup(sna, picture, channel, 1316 x, y, w, h, dst_x, dst_y); 1317 } 1318 1319 channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); 1320 if (!channel->bo) 1321 return 0; 1322 1323 channel->filter = PictFilterNearest; 1324 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1325 channel->is_linear = true; 1326 channel->width = channel->bo->pitch / 4; 1327 channel->height = 1; 1328 channel->pict_format = PICT_a8r8g8b8; 1329 1330 channel->scale[0] = channel->scale[1] = 1; 1331 channel->offset[0] = channel->offset[1] = 0; 1332 1333 if (sna_transform_is_translation(picture->transform, &tx, &ty)) { 1334 dx = xFixedToDouble(linear->p2.x - linear->p1.x); 1335 dy = xFixedToDouble(linear->p2.y - linear->p1.y); 1336 1337 x0 = xFixedToDouble(linear->p1.x); 1338 y0 = xFixedToDouble(linear->p1.y); 1339 1340 if (tx | ty) { 1341 x0 -= pixman_fixed_to_double(tx); 1342 y0 -= pixman_fixed_to_double(ty); 1343 } 1344 } else { 1345 struct pixman_f_vector p1, p2; 1346 struct pixman_f_transform m, inv; 1347 1348 pixman_f_transform_from_pixman_transform(&m, picture->transform); 1349 DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", 1350 __FUNCTION__, 1351 m.m[0][0], m.m[0][1], m.m[0][2], 1352 m.m[1][0], m.m[1][1], m.m[1][2], 1353 m.m[2][0], m.m[2][1], m.m[2][2])); 1354 if (!pixman_f_transform_invert(&inv, &m)) 1355 return 0; 1356 1357 p1.v[0] = pixman_fixed_to_double(linear->p1.x); 1358 p1.v[1] = pixman_fixed_to_double(linear->p1.y); 1359 p1.v[2] = 1.; 1360 pixman_f_transform_point(&inv, &p1); 1361 1362 p2.v[0] = pixman_fixed_to_double(linear->p2.x); 1363 p2.v[1] = pixman_fixed_to_double(linear->p2.y); 1364 p2.v[2] = 1.; 1365 pixman_f_transform_point(&inv, &p2); 1366 1367 DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", 1368 __FUNCTION__, 1369 p1.v[0], p1.v[1], p1.v[2], 1370 p2.v[0], p2.v[1], p2.v[2])); 1371 1372 dx = p2.v[0] - p1.v[0]; 1373 dy = p2.v[1] - p1.v[1]; 1374 1375 x0 = p1.v[0]; 1376 y0 = p1.v[1]; 1377 } 1378 1379 sf = dx*dx + dy*dy; 1380 dx /= sf; 1381 dy /= sf; 1382 1383 channel->u.linear.dx = dx; 1384 channel->u.linear.dy = dy; 1385 channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); 1386 1387 DBG(("%s: dx=%f, dy=%f, offset=%f\n", 1388 __FUNCTION__, dx, dy, channel->u.linear.offset)); 1389 1390 return channel->bo != NULL; 1391} 1392 1393static bool source_is_covered(PicturePtr picture, 1394 int x, int y, 1395 int width, int height) 1396{ 1397 int x1, y1, x2, y2; 1398 1399 if (picture->repeat && picture->repeatType != RepeatNone) 1400 return true; 1401 1402 if (picture->pDrawable == NULL) 1403 return false; 1404 1405 if (picture->transform) { 1406 pixman_box16_t sample; 1407 1408 sample.x1 = x; 1409 sample.y1 = y; 1410 sample.x2 = x + width; 1411 sample.y2 = y + height; 1412 1413 pixman_transform_bounds(picture->transform, &sample); 1414 1415 x1 = sample.x1; 1416 x2 = sample.x2; 1417 y1 = sample.y1; 1418 y2 = sample.y2; 1419 } else { 1420 x1 = x; 1421 y1 = y; 1422 x2 = x + width; 1423 y2 = y + height; 1424 } 1425 1426 return 1427 x1 >= 0 && y1 >= 0 && 1428 x2 <= picture->pDrawable->width && 1429 y2 <= picture->pDrawable->height; 1430} 1431 1432static bool 1433gen2_check_card_format(struct sna *sna, 1434 PicturePtr picture, 1435 struct sna_composite_channel *channel, 1436 int x, int y, int w, int h, 1437 bool *fixup_alpha) 1438{ 1439 uint32_t format = picture->format; 1440 unsigned int i; 1441 1442 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { 1443 if (i8xx_tex_formats[i].fmt == format) 1444 return true; 1445 } 1446 1447 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { 1448 if (i85x_tex_formats[i].fmt == format) { 1449 if (sna->kgem.gen >= 021) 1450 return true; 1451 1452 if (source_is_covered(picture, x, y, w,h)) { 1453 channel->is_opaque = true; 1454 return true; 1455 } 1456 1457 *fixup_alpha = true; 1458 return false; 1459 } 1460 } 1461 1462 *fixup_alpha = false; 1463 return false; 1464} 1465 1466static int 1467gen2_composite_picture(struct sna *sna, 1468 PicturePtr picture, 1469 struct sna_composite_channel *channel, 1470 int x, int y, 1471 int w, int h, 1472 int dst_x, int dst_y, 1473 bool precise) 1474{ 1475 PixmapPtr pixmap; 1476 uint32_t color; 1477 int16_t dx, dy; 1478 bool fixup_alpha; 1479 1480 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1481 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1482 1483 channel->is_solid = false; 1484 channel->is_linear = false; 1485 channel->is_opaque = false; 1486 channel->is_affine = true; 1487 channel->transform = NULL; 1488 1489 if (sna_picture_is_solid(picture, &color)) 1490 return gen2_composite_solid_init(sna, channel, color); 1491 1492 if (!gen2_check_repeat(picture)) { 1493 DBG(("%s -- fallback, unhandled repeat %d\n", 1494 __FUNCTION__, picture->repeat)); 1495 return sna_render_picture_fixup(sna, picture, channel, 1496 x, y, w, h, dst_x, dst_y); 1497 } 1498 1499 if (!gen2_check_filter(picture)) { 1500 DBG(("%s -- fallback, unhandled filter %d\n", 1501 __FUNCTION__, picture->filter)); 1502 return sna_render_picture_fixup(sna, picture, channel, 1503 x, y, w, h, dst_x, dst_y); 1504 } 1505 1506 if (picture->pDrawable == NULL) { 1507 int ret; 1508 1509 if (picture->pSourcePict->type == SourcePictTypeLinear) 1510 return gen2_composite_linear_init(sna, picture, channel, 1511 x, y, 1512 w, h, 1513 dst_x, dst_y); 1514 1515 DBG(("%s -- fallback, unhandled source %d\n", 1516 __FUNCTION__, picture->pSourcePict->type)); 1517 ret = -1; 1518 if (!precise) 1519 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1520 x, y, w, h, dst_x, dst_y); 1521 if (ret == -1) 1522 ret = sna_render_picture_fixup(sna, picture, channel, 1523 x, y, w, h, dst_x, dst_y); 1524 return ret; 1525 } 1526 1527 if (picture->alphaMap) { 1528 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1529 return sna_render_picture_fixup(sna, picture, channel, 1530 x, y, w, h, dst_x, dst_y); 1531 } 1532 1533 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1534 channel->filter = picture->filter; 1535 1536 pixmap = get_drawable_pixmap(picture->pDrawable); 1537 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1538 1539 x += dx + picture->pDrawable->x; 1540 y += dy + picture->pDrawable->y; 1541 1542 channel->is_affine = sna_transform_is_affine(picture->transform); 1543 if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { 1544 DBG(("%s: integer translation (%d, %d), removing\n", 1545 __FUNCTION__, dx, dy)); 1546 x += dx; 1547 y += dy; 1548 channel->transform = NULL; 1549 channel->filter = PictFilterNearest; 1550 } else 1551 channel->transform = picture->transform; 1552 1553 if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha)) 1554 return sna_render_picture_convert(sna, picture, channel, pixmap, 1555 x, y, w, h, dst_x, dst_y, fixup_alpha); 1556 1557 channel->pict_format = picture->format; 1558 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1559 return sna_render_picture_extract(sna, picture, channel, 1560 x, y, w, h, dst_x, dst_y); 1561 1562 return sna_render_pixmap_bo(sna, channel, pixmap, 1563 x, y, w, h, dst_x, dst_y); 1564} 1565 1566static bool 1567gen2_composite_set_target(struct sna *sna, 1568 struct sna_composite_op *op, 1569 PicturePtr dst, 1570 int x, int y, int w, int h) 1571{ 1572 BoxRec box; 1573 1574 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1575 op->dst.format = dst->format; 1576 op->dst.width = op->dst.pixmap->drawable.width; 1577 op->dst.height = op->dst.pixmap->drawable.height; 1578 1579 if (w && h) { 1580 box.x1 = x; 1581 box.y1 = y; 1582 box.x2 = x + w; 1583 box.y2 = y + h; 1584 } else 1585 sna_render_picture_extents(dst, &box); 1586 1587 op->dst.bo = sna_drawable_use_bo (dst->pDrawable, 1588 PREFER_GPU | FORCE_GPU | RENDER_GPU, 1589 &box, &op->damage); 1590 if (op->dst.bo == NULL) 1591 return false; 1592 1593 if (op->dst.bo->pitch < 8) { 1594 struct sna_pixmap *priv; 1595 struct kgem_bo *bo; 1596 1597 priv = sna_pixmap_move_to_gpu (op->dst.pixmap, 1598 MOVE_READ | MOVE_WRITE); 1599 if (priv == NULL || priv->pinned) 1600 return false; 1601 1602 assert(op->dst.bo == priv->gpu_bo); 1603 bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo, 1604 op->dst.width, op->dst.height, 8, 1605 op->dst.pixmap->drawable.bitsPerPixel); 1606 if (bo == NULL) 1607 return false; 1608 1609 kgem_bo_destroy(&sna->kgem, priv->gpu_bo); 1610 priv->gpu_bo = bo; 1611 1612 op->dst.bo = priv->gpu_bo; 1613 op->damage = &priv->gpu_damage; 1614 if (sna_damage_is_all(op->damage, 1615 op->dst.width, op->dst.height)) 1616 op->damage = NULL; 1617 } 1618 1619 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1620 &op->dst.x, &op->dst.y); 1621 1622 DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1623 __FUNCTION__, 1624 op->dst.pixmap, (int)op->dst.format, 1625 op->dst.width, op->dst.height, 1626 op->dst.bo->pitch, 1627 op->dst.x, op->dst.y, 1628 op->damage ? *op->damage : (void *)-1)); 1629 1630 assert(op->dst.bo->proxy == NULL); 1631 return true; 1632} 1633 1634static bool 1635is_unhandled_gradient(PicturePtr picture, bool precise) 1636{ 1637 if (picture->pDrawable) 1638 return false; 1639 1640 switch (picture->pSourcePict->type) { 1641 case SourcePictTypeSolidFill: 1642 case SourcePictTypeLinear: 1643 return false; 1644 default: 1645 return precise; 1646 } 1647} 1648 1649static bool 1650has_alphamap(PicturePtr p) 1651{ 1652 return p->alphaMap != NULL; 1653} 1654 1655static bool 1656need_upload(PicturePtr p) 1657{ 1658 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1659} 1660 1661static bool 1662source_is_busy(PixmapPtr pixmap) 1663{ 1664 struct sna_pixmap *priv = sna_pixmap(pixmap); 1665 if (priv == NULL) 1666 return false; 1667 1668 if (priv->clear) 1669 return false; 1670 1671 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1672 return true; 1673 1674 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1675 return true; 1676 1677 return priv->gpu_damage && !priv->cpu_damage; 1678} 1679 1680static bool 1681source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 1682{ 1683 if (sna_picture_is_solid(p, NULL)) 1684 return false; 1685 1686 if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p)) 1687 return true; 1688 1689 if (pixmap && source_is_busy(pixmap)) 1690 return false; 1691 1692 return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p); 1693} 1694 1695static bool 1696gen2_composite_fallback(struct sna *sna, 1697 PicturePtr src, 1698 PicturePtr mask, 1699 PicturePtr dst) 1700{ 1701 PixmapPtr src_pixmap; 1702 PixmapPtr mask_pixmap; 1703 PixmapPtr dst_pixmap; 1704 bool src_fallback, mask_fallback; 1705 1706 if (!gen2_check_dst_format(dst->format)) { 1707 DBG(("%s: unknown destination format: %d\n", 1708 __FUNCTION__, dst->format)); 1709 return true; 1710 } 1711 1712 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1713 1714 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1715 src_fallback = source_fallback(src, src_pixmap, 1716 dst->polyMode == PolyModePrecise); 1717 1718 if (mask) { 1719 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1720 mask_fallback = source_fallback(mask, mask_pixmap, 1721 dst->polyMode == PolyModePrecise); 1722 } else { 1723 mask_pixmap = NULL; 1724 mask_fallback = NULL; 1725 } 1726 1727 /* If we are using the destination as a source and need to 1728 * readback in order to upload the source, do it all 1729 * on the cpu. 1730 */ 1731 if (src_pixmap == dst_pixmap && src_fallback) { 1732 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1733 return true; 1734 } 1735 if (mask_pixmap == dst_pixmap && mask_fallback) { 1736 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1737 return true; 1738 } 1739 1740 /* If anything is on the GPU, push everything out to the GPU */ 1741 if (dst_use_gpu(dst_pixmap)) { 1742 DBG(("%s: dst is already on the GPU, try to use GPU\n", 1743 __FUNCTION__)); 1744 return false; 1745 } 1746 1747 if (src_pixmap && !src_fallback) { 1748 DBG(("%s: src is already on the GPU, try to use GPU\n", 1749 __FUNCTION__)); 1750 return false; 1751 } 1752 if (mask_pixmap && !mask_fallback) { 1753 DBG(("%s: mask is already on the GPU, try to use GPU\n", 1754 __FUNCTION__)); 1755 return false; 1756 } 1757 1758 /* However if the dst is not on the GPU and we need to 1759 * render one of the sources using the CPU, we may 1760 * as well do the entire operation in place onthe CPU. 1761 */ 1762 if (src_fallback) { 1763 DBG(("%s: dst is on the CPU and src will fallback\n", 1764 __FUNCTION__)); 1765 return true; 1766 } 1767 1768 if (mask && mask_fallback) { 1769 DBG(("%s: dst is on the CPU and mask will fallback\n", 1770 __FUNCTION__)); 1771 return true; 1772 } 1773 1774 if (too_large(dst_pixmap->drawable.width, 1775 dst_pixmap->drawable.height) && 1776 dst_is_cpu(dst_pixmap)) { 1777 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1778 return true; 1779 } 1780 1781 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1782 __FUNCTION__)); 1783 return dst_use_cpu(dst_pixmap); 1784} 1785 1786static int 1787reuse_source(struct sna *sna, 1788 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1789 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1790{ 1791 uint32_t color; 1792 1793 if (src_x != msk_x || src_y != msk_y) 1794 return false; 1795 1796 if (sna_picture_is_solid(mask, &color)) 1797 return gen2_composite_solid_init(sna, mc, color); 1798 1799 if (sc->is_solid) 1800 return false; 1801 1802 if (src == mask) { 1803 DBG(("%s: mask is source\n", __FUNCTION__)); 1804 *mc = *sc; 1805 mc->bo = kgem_bo_reference(mc->bo); 1806 return true; 1807 } 1808 1809 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1810 return false; 1811 1812 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1813 1814 if (!sna_transform_equal(src->transform, mask->transform)) 1815 return false; 1816 1817 if (!sna_picture_alphamap_equal(src, mask)) 1818 return false; 1819 1820 if (!gen2_check_repeat(mask)) 1821 return false; 1822 1823 if (!gen2_check_filter(mask)) 1824 return false; 1825 1826 if (!gen2_check_format(sna, mask)) 1827 return false; 1828 1829 DBG(("%s: reusing source channel for mask with a twist\n", 1830 __FUNCTION__)); 1831 1832 *mc = *sc; 1833 mc->repeat = mask->repeat ? mask->repeatType : RepeatNone; 1834 mc->filter = mask->filter; 1835 mc->pict_format = mask->format; 1836 mc->bo = kgem_bo_reference(mc->bo); 1837 return true; 1838} 1839 1840static bool 1841gen2_render_composite(struct sna *sna, 1842 uint8_t op, 1843 PicturePtr src, 1844 PicturePtr mask, 1845 PicturePtr dst, 1846 int16_t src_x, int16_t src_y, 1847 int16_t mask_x, int16_t mask_y, 1848 int16_t dst_x, int16_t dst_y, 1849 int16_t width, int16_t height, 1850 struct sna_composite_op *tmp) 1851{ 1852 DBG(("%s()\n", __FUNCTION__)); 1853 1854 if (op >= ARRAY_SIZE(gen2_blend_op)) { 1855 DBG(("%s: fallback due to unhandled blend op: %d\n", 1856 __FUNCTION__, op)); 1857 return false; 1858 } 1859 1860 if (mask == NULL && 1861 sna_blt_composite(sna, op, src, dst, 1862 src_x, src_y, 1863 dst_x, dst_y, 1864 width, height, 1865 tmp, false)) 1866 return true; 1867 1868 if (gen2_composite_fallback(sna, src, mask, dst)) 1869 return false; 1870 1871 if (need_tiling(sna, width, height)) 1872 return sna_tiling_composite(op, src, mask, dst, 1873 src_x, src_y, 1874 mask_x, mask_y, 1875 dst_x, dst_y, 1876 width, height, 1877 tmp); 1878 1879 if (!gen2_composite_set_target(sna, tmp, dst, 1880 dst_x, dst_y, width, height)) { 1881 DBG(("%s: unable to set render target\n", 1882 __FUNCTION__)); 1883 return false; 1884 } 1885 1886 tmp->op = op; 1887 1888 sna_render_composite_redirect_init(tmp); 1889 if (too_large(tmp->dst.width, tmp->dst.height) || 1890 tmp->dst.bo->pitch > MAX_3D_PITCH) { 1891 if (!sna_render_composite_redirect(sna, tmp, 1892 dst_x, dst_y, width, height, 1893 op > PictOpSrc || dst->pCompositeClip->data != NULL)) 1894 return false; 1895 } 1896 1897 switch (gen2_composite_picture(sna, src, &tmp->src, 1898 src_x, src_y, 1899 width, height, 1900 dst_x, dst_y, 1901 dst->polyMode == PolyModePrecise)) { 1902 case -1: 1903 DBG(("%s: fallback -- unable to prepare source\n", 1904 __FUNCTION__)); 1905 goto cleanup_dst; 1906 case 0: 1907 gen2_composite_solid_init(sna, &tmp->src, 0); 1908 break; 1909 case 1: 1910 if (mask == NULL && tmp->src.bo && 1911 sna_blt_composite__convert(sna, 1912 dst_x, dst_y, width, height, 1913 tmp)) 1914 return true; 1915 break; 1916 } 1917 1918 if (mask) { 1919 if (!reuse_source(sna, 1920 src, &tmp->src, src_x, src_y, 1921 mask, &tmp->mask, mask_x, mask_y)) { 1922 switch (gen2_composite_picture(sna, mask, &tmp->mask, 1923 mask_x, mask_y, 1924 width, height, 1925 dst_x, dst_y, 1926 dst->polyMode == PolyModePrecise)) { 1927 case -1: 1928 DBG(("%s: fallback -- unable to prepare mask\n", 1929 __FUNCTION__)); 1930 goto cleanup_src; 1931 case 0: 1932 gen2_composite_solid_init(sna, &tmp->mask, 0); 1933 case 1: 1934 break; 1935 } 1936 } 1937 1938 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1939 /* Check if it's component alpha that relies on a source alpha 1940 * and on the source value. We can only get one of those 1941 * into the single source value that we get to blend with. 1942 */ 1943 tmp->has_component_alpha = true; 1944 if (gen2_blend_op[op].src_alpha && 1945 (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 1946 if (op != PictOpOver) { 1947 DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", 1948 __FUNCTION__, 1949 gen2_blend_op[op].src_blend)); 1950 goto cleanup_src; 1951 } 1952 1953 tmp->need_magic_ca_pass = true; 1954 tmp->op = PictOpOutReverse; 1955 } 1956 } 1957 1958 /* convert solid to a texture (pure convenience) */ 1959 if (tmp->mask.is_solid && tmp->src.is_solid) { 1960 assert(tmp->mask.is_affine); 1961 tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel); 1962 if (!tmp->mask.bo) 1963 goto cleanup_src; 1964 } 1965 } 1966 1967 tmp->floats_per_vertex = 2; 1968 if (!tmp->src.is_solid) 1969 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3; 1970 if (tmp->mask.bo) 1971 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3; 1972 tmp->floats_per_rect = 3*tmp->floats_per_vertex; 1973 1974 tmp->prim_emit = gen2_emit_composite_primitive; 1975 if (tmp->mask.bo) { 1976 if (tmp->mask.transform == NULL) { 1977 if (tmp->src.is_solid) { 1978 assert(tmp->floats_per_rect == 12); 1979#if defined(sse2) && !defined(__x86_64__) 1980 if (sna->cpu_features & SSE2) { 1981 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2; 1982 } else 1983#endif 1984 { 1985 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask; 1986 } 1987 } 1988 } 1989 } else { 1990 if (tmp->src.is_solid) { 1991 assert(tmp->floats_per_rect == 6); 1992#if defined(sse2) && !defined(__x86_64__) 1993 if (sna->cpu_features & SSE2) { 1994 tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2; 1995 } else 1996#endif 1997 { 1998 tmp->prim_emit = gen2_emit_composite_primitive_constant; 1999 } 2000 } else if (tmp->src.is_linear) { 2001 assert(tmp->floats_per_rect == 12); 2002#if defined(sse2) && !defined(__x86_64__) 2003 if (sna->cpu_features & SSE2) { 2004 tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2; 2005 } else 2006#endif 2007 { 2008 tmp->prim_emit = gen2_emit_composite_primitive_linear; 2009 } 2010 } else if (tmp->src.transform == NULL) { 2011 assert(tmp->floats_per_rect == 12); 2012#if defined(sse2) && !defined(__x86_64__) 2013 if (sna->cpu_features & SSE2) { 2014 tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2; 2015 } else 2016#endif 2017 { 2018 tmp->prim_emit = gen2_emit_composite_primitive_identity; 2019 } 2020 } else if (tmp->src.is_affine) { 2021 assert(tmp->floats_per_rect == 12); 2022 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2023 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2024#if defined(sse2) && !defined(__x86_64__) 2025 if (sna->cpu_features & SSE2) { 2026 tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2; 2027 } else 2028#endif 2029 { 2030 tmp->prim_emit = gen2_emit_composite_primitive_affine; 2031 } 2032 } 2033 } 2034 2035 tmp->blt = gen2_render_composite_blt; 2036 tmp->box = gen2_render_composite_box; 2037 tmp->boxes = gen2_render_composite_boxes; 2038 tmp->done = gen2_render_composite_done; 2039 2040 if (!kgem_check_bo(&sna->kgem, 2041 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2042 NULL)) { 2043 kgem_submit(&sna->kgem); 2044 if (!kgem_check_bo(&sna->kgem, 2045 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2046 NULL)) { 2047 DBG(("%s: fallback, operation does not fit into GTT\n", 2048 __FUNCTION__)); 2049 goto cleanup_mask; 2050 } 2051 } 2052 2053 gen2_emit_composite_state(sna, tmp); 2054 return true; 2055 2056cleanup_mask: 2057 if (tmp->mask.bo) 2058 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2059cleanup_src: 2060 if (tmp->src.bo) 2061 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2062cleanup_dst: 2063 if (tmp->redirect.real_bo) 2064 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2065 return false; 2066} 2067 2068fastcall static void 2069gen2_emit_composite_spans_primitive_constant(struct sna *sna, 2070 const struct sna_composite_spans_op *op, 2071 const BoxRec *box, 2072 float opacity) 2073{ 2074 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2075 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2076 sna->kgem.nbatch += 9; 2077 2078 v[0] = op->base.dst.x + box->x2; 2079 v[1] = op->base.dst.y + box->y2; 2080 *((uint32_t *)v + 2) = alpha; 2081 2082 v[3] = op->base.dst.x + box->x1; 2083 v[4] = v[1]; 2084 *((uint32_t *)v + 5) = alpha; 2085 2086 v[6] = v[3]; 2087 v[7] = op->base.dst.y + box->y1; 2088 *((uint32_t *)v + 8) = alpha; 2089} 2090 2091fastcall static void 2092gen2_emit_composite_spans_primitive_linear(struct sna *sna, 2093 const struct sna_composite_spans_op *op, 2094 const BoxRec *box, 2095 float opacity) 2096{ 2097 union { 2098 float f; 2099 uint32_t u; 2100 } alpha; 2101 2102 alpha.u = (uint8_t)(255 * opacity) << 24; 2103 2104 gen2_emit_composite_dstcoord(sna, 2105 op->base.dst.x + box->x2, 2106 op->base.dst.y + box->y2); 2107 VERTEX(alpha.f); 2108 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2109 2110 gen2_emit_composite_dstcoord(sna, 2111 op->base.dst.x + box->x1, 2112 op->base.dst.y + box->y2); 2113 VERTEX(alpha.f); 2114 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2115 2116 gen2_emit_composite_dstcoord(sna, 2117 op->base.dst.x + box->x1, 2118 op->base.dst.y + box->y1); 2119 VERTEX(alpha.f); 2120 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2121} 2122 2123fastcall static void 2124gen2_emit_composite_spans_primitive_identity_source(struct sna *sna, 2125 const struct sna_composite_spans_op *op, 2126 const BoxRec *box, 2127 float opacity) 2128{ 2129 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2130 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2131 sna->kgem.nbatch += 15; 2132 2133 v[0] = op->base.dst.x + box->x2; 2134 v[1] = op->base.dst.y + box->y2; 2135 *((uint32_t *)v + 2) = alpha; 2136 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2137 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2138 2139 v[5] = op->base.dst.x + box->x1; 2140 v[6] = v[1]; 2141 *((uint32_t *)v + 7) = alpha; 2142 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2143 v[9] = v[4]; 2144 2145 v[10] = v[5]; 2146 v[11] = op->base.dst.y + box->y1; 2147 *((uint32_t *)v + 12) = alpha; 2148 v[13] = v[8]; 2149 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2150} 2151 2152fastcall static void 2153gen2_emit_composite_spans_primitive_affine_source(struct sna *sna, 2154 const struct sna_composite_spans_op *op, 2155 const BoxRec *box, 2156 float opacity) 2157{ 2158 PictTransform *transform = op->base.src.transform; 2159 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2160 float *v; 2161 2162 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2163 sna->kgem.nbatch += 15; 2164 2165 v[0] = op->base.dst.x + box->x2; 2166 v[6] = v[1] = op->base.dst.y + box->y2; 2167 v[10] = v[5] = op->base.dst.x + box->x1; 2168 v[11] = op->base.dst.y + box->y1; 2169 *((uint32_t *)v + 2) = alpha; 2170 *((uint32_t *)v + 7) = alpha; 2171 *((uint32_t *)v + 12) = alpha; 2172 2173 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2174 (int)op->base.src.offset[1] + box->y2, 2175 transform, op->base.src.scale, 2176 &v[3], &v[4]); 2177 2178 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2179 (int)op->base.src.offset[1] + box->y2, 2180 transform, op->base.src.scale, 2181 &v[8], &v[9]); 2182 2183 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2184 (int)op->base.src.offset[1] + box->y1, 2185 transform, op->base.src.scale, 2186 &v[13], &v[14]); 2187} 2188 2189#if defined(sse2) && !defined(__x86_64__) 2190sse2 fastcall static void 2191gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 2192 const struct sna_composite_spans_op *op, 2193 const BoxRec *box, 2194 float opacity) 2195{ 2196 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2197 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2198 sna->kgem.nbatch += 9; 2199 2200 v[0] = op->base.dst.x + box->x2; 2201 v[1] = op->base.dst.y + box->y2; 2202 *((uint32_t *)v + 2) = alpha; 2203 2204 v[3] = op->base.dst.x + box->x1; 2205 v[4] = v[1]; 2206 *((uint32_t *)v + 5) = alpha; 2207 2208 v[6] = v[3]; 2209 v[7] = op->base.dst.y + box->y1; 2210 *((uint32_t *)v + 8) = alpha; 2211} 2212 2213sse2 fastcall static void 2214gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna, 2215 const struct sna_composite_spans_op *op, 2216 const BoxRec *box, 2217 float opacity) 2218{ 2219 union { 2220 float f; 2221 uint32_t u; 2222 } alpha; 2223 2224 alpha.u = (uint8_t)(255 * opacity) << 24; 2225 2226 gen2_emit_composite_dstcoord(sna, 2227 op->base.dst.x + box->x2, 2228 op->base.dst.y + box->y2); 2229 VERTEX(alpha.f); 2230 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2231 2232 gen2_emit_composite_dstcoord(sna, 2233 op->base.dst.x + box->x1, 2234 op->base.dst.y + box->y2); 2235 VERTEX(alpha.f); 2236 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2237 2238 gen2_emit_composite_dstcoord(sna, 2239 op->base.dst.x + box->x1, 2240 op->base.dst.y + box->y1); 2241 VERTEX(alpha.f); 2242 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2243} 2244 2245sse2 fastcall static void 2246gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 2247 const struct sna_composite_spans_op *op, 2248 const BoxRec *box, 2249 float opacity) 2250{ 2251 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2252 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2253 sna->kgem.nbatch += 15; 2254 2255 v[0] = op->base.dst.x + box->x2; 2256 v[1] = op->base.dst.y + box->y2; 2257 *((uint32_t *)v + 2) = alpha; 2258 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2259 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2260 2261 v[5] = op->base.dst.x + box->x1; 2262 v[6] = v[1]; 2263 *((uint32_t *)v + 7) = alpha; 2264 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2265 v[9] = v[4]; 2266 2267 v[10] = v[5]; 2268 v[11] = op->base.dst.y + box->y1; 2269 *((uint32_t *)v + 12) = alpha; 2270 v[13] = v[8]; 2271 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2272} 2273 2274sse2 fastcall static void 2275gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 2276 const struct sna_composite_spans_op *op, 2277 const BoxRec *box, 2278 float opacity) 2279{ 2280 PictTransform *transform = op->base.src.transform; 2281 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2282 float *v; 2283 2284 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2285 sna->kgem.nbatch += 15; 2286 2287 v[0] = op->base.dst.x + box->x2; 2288 v[6] = v[1] = op->base.dst.y + box->y2; 2289 v[10] = v[5] = op->base.dst.x + box->x1; 2290 v[11] = op->base.dst.y + box->y1; 2291 *((uint32_t *)v + 2) = alpha; 2292 *((uint32_t *)v + 7) = alpha; 2293 *((uint32_t *)v + 12) = alpha; 2294 2295 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2296 (int)op->base.src.offset[1] + box->y2, 2297 transform, op->base.src.scale, 2298 &v[3], &v[4]); 2299 2300 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2301 (int)op->base.src.offset[1] + box->y2, 2302 transform, op->base.src.scale, 2303 &v[8], &v[9]); 2304 2305 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2306 (int)op->base.src.offset[1] + box->y1, 2307 transform, op->base.src.scale, 2308 &v[13], &v[14]); 2309} 2310#endif 2311 2312static void 2313gen2_emit_composite_spans_vertex(struct sna *sna, 2314 const struct sna_composite_spans_op *op, 2315 int16_t x, int16_t y, 2316 float opacity) 2317{ 2318 gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 2319 BATCH((uint8_t)(opacity * 255) << 24); 2320 assert(!op->base.src.is_solid); 2321 if (op->base.src.is_linear) 2322 gen2_emit_composite_linear(sna, &op->base.src, x, y); 2323 else 2324 gen2_emit_composite_texcoord(sna, &op->base.src, x, y); 2325} 2326 2327fastcall static void 2328gen2_emit_composite_spans_primitive(struct sna *sna, 2329 const struct sna_composite_spans_op *op, 2330 const BoxRec *box, 2331 float opacity) 2332{ 2333 gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity); 2334 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity); 2335 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity); 2336} 2337 2338static void 2339gen2_emit_spans_pipeline(struct sna *sna, 2340 const struct sna_composite_spans_op *op) 2341{ 2342 uint32_t cblend, ablend; 2343 uint32_t unwind; 2344 2345 cblend = 2346 TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE | 2347 TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA | 2348 TB0C_OUTPUT_WRITE_CURRENT; 2349 ablend = 2350 TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE | 2351 TB0A_ARG1_SEL_DIFFUSE | 2352 TB0A_OUTPUT_WRITE_CURRENT; 2353 2354 if (op->base.src.is_solid) { 2355 ablend |= TB0A_ARG2_SEL_SPECULAR; 2356 cblend |= TB0C_ARG2_SEL_SPECULAR; 2357 if (op->base.dst.format == PICT_a8) 2358 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 2359 } else if (op->base.dst.format == PICT_a8) { 2360 ablend |= TB0A_ARG2_SEL_TEXEL0; 2361 cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA; 2362 } else { 2363 if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0) 2364 cblend |= TB0C_ARG2_SEL_TEXEL0; 2365 else 2366 cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT; 2367 2368 if (op->base.src.is_opaque) 2369 ablend |= TB0A_ARG2_SEL_ONE; 2370 else 2371 ablend |= TB0A_ARG2_SEL_TEXEL0; 2372 } 2373 2374 unwind = sna->kgem.nbatch; 2375 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2376 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2377 BATCH(cblend); 2378 BATCH(ablend); 2379 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2380 sna->kgem.batch + unwind + 1, 2381 2 * sizeof(uint32_t)) == 0) 2382 sna->kgem.nbatch = unwind; 2383 else 2384 sna->render_state.gen2.ls2 = unwind; 2385} 2386 2387static void gen2_emit_composite_spans_state(struct sna *sna, 2388 const struct sna_composite_spans_op *op) 2389{ 2390 uint32_t unwind; 2391 2392 gen2_get_batch(sna, &op->base); 2393 gen2_emit_target(sna, &op->base); 2394 2395 unwind = sna->kgem.nbatch; 2396 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2397 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2398 BATCH(!op->base.src.is_solid << 12); 2399 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT); 2400 BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format)); 2401 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2402 sna->kgem.batch + unwind + 1, 2403 3 * sizeof(uint32_t)) == 0) 2404 sna->kgem.nbatch = unwind; 2405 else 2406 sna->render_state.gen2.ls1 = unwind; 2407 2408 gen2_disable_logic_op(sna); 2409 gen2_emit_spans_pipeline(sna, op); 2410 2411 if (op->base.src.is_solid) { 2412 if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) { 2413 BATCH(_3DSTATE_DFLT_SPECULAR_CMD); 2414 BATCH(op->base.src.u.gen2.pixel); 2415 sna->render_state.gen2.specular = op->base.src.u.gen2.pixel; 2416 } 2417 } else { 2418 uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD | 2419 (op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D); 2420 if (sna->render_state.gen2.vft != v) { 2421 BATCH(v); 2422 sna->render_state.gen2.vft = v; 2423 } 2424 gen2_emit_texture(sna, &op->base.src, 0); 2425 } 2426} 2427 2428fastcall static void 2429gen2_render_composite_spans_box(struct sna *sna, 2430 const struct sna_composite_spans_op *op, 2431 const BoxRec *box, float opacity) 2432{ 2433 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2434 __FUNCTION__, 2435 op->base.src.offset[0], op->base.src.offset[1], 2436 opacity, 2437 op->base.dst.x, op->base.dst.y, 2438 box->x1, box->y1, 2439 box->x2 - box->x1, 2440 box->y2 - box->y1)); 2441 2442 if (gen2_get_rectangles(sna, &op->base, 1) == 0) { 2443 gen2_emit_composite_spans_state(sna, op); 2444 gen2_get_rectangles(sna, &op->base, 1); 2445 } 2446 2447 op->prim_emit(sna, op, box, opacity); 2448} 2449 2450static void 2451gen2_render_composite_spans_boxes(struct sna *sna, 2452 const struct sna_composite_spans_op *op, 2453 const BoxRec *box, int nbox, 2454 float opacity) 2455{ 2456 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2457 __FUNCTION__, nbox, 2458 op->base.src.offset[0], op->base.src.offset[1], 2459 opacity, 2460 op->base.dst.x, op->base.dst.y)); 2461 2462 do { 2463 int nbox_this_time; 2464 2465 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2466 if (nbox_this_time == 0) { 2467 gen2_emit_composite_spans_state(sna, op); 2468 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2469 } 2470 nbox -= nbox_this_time; 2471 2472 do { 2473 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2474 box->x1, box->y1, 2475 box->x2 - box->x1, 2476 box->y2 - box->y1)); 2477 2478 op->prim_emit(sna, op, box++, opacity); 2479 } while (--nbox_this_time); 2480 } while (nbox); 2481} 2482 2483fastcall static void 2484gen2_render_composite_spans_done(struct sna *sna, 2485 const struct sna_composite_spans_op *op) 2486{ 2487 DBG(("%s()\n", __FUNCTION__)); 2488 2489 gen2_vertex_flush(sna, &op->base); 2490 2491 if (op->base.src.bo) 2492 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2493 2494 sna_render_composite_redirect_done(sna, &op->base); 2495} 2496 2497static bool 2498gen2_check_composite_spans(struct sna *sna, 2499 uint8_t op, PicturePtr src, PicturePtr dst, 2500 int16_t width, int16_t height, unsigned flags) 2501{ 2502 if (op >= ARRAY_SIZE(gen2_blend_op)) 2503 return false; 2504 2505 if (gen2_composite_fallback(sna, src, NULL, dst)) 2506 return false; 2507 2508 if (need_tiling(sna, width, height)) { 2509 if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2510 DBG(("%s: fallback, tiled operation not on GPU\n", 2511 __FUNCTION__)); 2512 return false; 2513 } 2514 } 2515 2516 return true; 2517} 2518 2519static bool 2520gen2_render_composite_spans(struct sna *sna, 2521 uint8_t op, 2522 PicturePtr src, 2523 PicturePtr dst, 2524 int16_t src_x, int16_t src_y, 2525 int16_t dst_x, int16_t dst_y, 2526 int16_t width, int16_t height, 2527 unsigned flags, 2528 struct sna_composite_spans_op *tmp) 2529{ 2530 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 2531 src_x, src_y, dst_x, dst_y, width, height)); 2532 2533 assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags)); 2534 if (need_tiling(sna, width, height)) { 2535 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2536 __FUNCTION__, width, height)); 2537 return sna_tiling_composite_spans(op, src, dst, 2538 src_x, src_y, dst_x, dst_y, 2539 width, height, flags, tmp); 2540 } 2541 2542 if (!gen2_composite_set_target(sna, &tmp->base, dst, 2543 dst_x, dst_y, width, height)) { 2544 DBG(("%s: unable to set render target\n", 2545 __FUNCTION__)); 2546 return false; 2547 } 2548 2549 tmp->base.op = op; 2550 2551 sna_render_composite_redirect_init(&tmp->base); 2552 if (too_large(tmp->base.dst.width, tmp->base.dst.height) || 2553 tmp->base.dst.bo->pitch > MAX_3D_PITCH) { 2554 if (!sna_render_composite_redirect(sna, &tmp->base, 2555 dst_x, dst_y, width, height, 2556 true)) 2557 return false; 2558 } 2559 2560 switch (gen2_composite_picture(sna, src, &tmp->base.src, 2561 src_x, src_y, 2562 width, height, 2563 dst_x, dst_y, 2564 dst->polyMode == PolyModePrecise)) { 2565 case -1: 2566 goto cleanup_dst; 2567 case 0: 2568 gen2_composite_solid_init(sna, &tmp->base.src, 0); 2569 case 1: 2570 break; 2571 } 2572 2573 tmp->prim_emit = gen2_emit_composite_spans_primitive; 2574 tmp->base.floats_per_vertex = 3; 2575 if (tmp->base.src.is_solid) { 2576#if defined(sse2) && !defined(__x86_64__) 2577 if (sna->cpu_features & SSE2) { 2578 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2; 2579 } else 2580#endif 2581 { 2582 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant; 2583 } 2584 } else if (tmp->base.src.is_linear) { 2585 tmp->base.floats_per_vertex += 2; 2586#if defined(sse2) && !defined(__x86_64__) 2587 if (sna->cpu_features & SSE2) { 2588 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2; 2589 } else 2590#endif 2591 { 2592 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear; 2593 } 2594 } else { 2595 assert(tmp->base.src.bo); 2596 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 2597 if (tmp->base.src.transform == NULL) { 2598#if defined(sse2) && !defined(__x86_64__) 2599 if (sna->cpu_features & SSE2) { 2600 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2; 2601 } else 2602#endif 2603 { 2604 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source; 2605 } 2606 } else if (tmp->base.src.is_affine) { 2607 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 2608 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 2609#if defined(sse2) && !defined(__x86_64__) 2610 if (sna->cpu_features & SSE2) { 2611 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2; 2612 } else 2613#endif 2614 { 2615 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source; 2616 } 2617 } 2618 } 2619 tmp->base.mask.bo = NULL; 2620 tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex; 2621 2622 tmp->box = gen2_render_composite_spans_box; 2623 tmp->boxes = gen2_render_composite_spans_boxes; 2624 tmp->done = gen2_render_composite_spans_done; 2625 2626 if (!kgem_check_bo(&sna->kgem, 2627 tmp->base.dst.bo, tmp->base.src.bo, 2628 NULL)) { 2629 kgem_submit(&sna->kgem); 2630 if (!kgem_check_bo(&sna->kgem, 2631 tmp->base.dst.bo, tmp->base.src.bo, 2632 NULL)) 2633 goto cleanup_src; 2634 } 2635 2636 gen2_emit_composite_spans_state(sna, tmp); 2637 return true; 2638 2639cleanup_src: 2640 if (tmp->base.src.bo) 2641 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2642cleanup_dst: 2643 if (tmp->base.redirect.real_bo) 2644 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2645 return false; 2646} 2647 2648static void 2649gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op) 2650{ 2651 uint32_t blend, unwind; 2652 2653 unwind = sna->kgem.nbatch; 2654 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2655 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2656 2657 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 2658 TB0C_ARG1_SEL_DIFFUSE | 2659 TB0C_OUTPUT_WRITE_CURRENT; 2660 if (op->dst.format == PICT_a8) 2661 blend |= TB0C_ARG1_REPLICATE_ALPHA; 2662 BATCH(blend); 2663 2664 BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 2665 TB0A_ARG1_SEL_DIFFUSE | 2666 TB0A_OUTPUT_WRITE_CURRENT); 2667 2668 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2669 sna->kgem.batch + unwind + 1, 2670 2 * sizeof(uint32_t)) == 0) 2671 sna->kgem.nbatch = unwind; 2672 else 2673 sna->render_state.gen2.ls2 = unwind; 2674} 2675 2676static void gen2_emit_fill_composite_state(struct sna *sna, 2677 const struct sna_composite_op *op, 2678 uint32_t pixel) 2679{ 2680 uint32_t ls1; 2681 2682 gen2_get_batch(sna, op); 2683 gen2_emit_target(sna, op); 2684 2685 ls1 = sna->kgem.nbatch; 2686 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2687 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2688 BATCH(0); 2689 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2690 BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format)); 2691 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2692 sna->kgem.batch + ls1 + 1, 2693 3 * sizeof(uint32_t)) == 0) 2694 sna->kgem.nbatch = ls1; 2695 else 2696 sna->render_state.gen2.ls1 = ls1; 2697 2698 gen2_emit_fill_pipeline(sna, op); 2699 2700 if (pixel != sna->render_state.gen2.diffuse) { 2701 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2702 BATCH(pixel); 2703 sna->render_state.gen2.diffuse = pixel; 2704 } 2705} 2706 2707static bool 2708gen2_render_fill_boxes_try_blt(struct sna *sna, 2709 CARD8 op, PictFormat format, 2710 const xRenderColor *color, 2711 PixmapPtr dst, struct kgem_bo *dst_bo, 2712 const BoxRec *box, int n) 2713{ 2714 uint8_t alu; 2715 uint32_t pixel; 2716 2717 if (op > PictOpSrc) 2718 return false; 2719 2720 if (op == PictOpClear) { 2721 alu = GXclear; 2722 pixel = 0; 2723 } else if (!sna_get_pixel_from_rgba(&pixel, 2724 color->red, 2725 color->green, 2726 color->blue, 2727 color->alpha, 2728 format)) 2729 return false; 2730 else 2731 alu = GXcopy; 2732 2733 return sna_blt_fill_boxes(sna, alu, 2734 dst_bo, dst->drawable.bitsPerPixel, 2735 pixel, box, n); 2736} 2737 2738static bool 2739gen2_render_fill_boxes(struct sna *sna, 2740 CARD8 op, 2741 PictFormat format, 2742 const xRenderColor *color, 2743 PixmapPtr dst, struct kgem_bo *dst_bo, 2744 const BoxRec *box, int n) 2745{ 2746 struct sna_composite_op tmp; 2747 uint32_t pixel; 2748 2749 if (op >= ARRAY_SIZE(gen2_blend_op)) { 2750 DBG(("%s: fallback due to unhandled blend op: %d\n", 2751 __FUNCTION__, op)); 2752 return false; 2753 } 2754 2755#if NO_FILL_BOXES 2756 return gen2_render_fill_boxes_try_blt(sna, op, format, color, 2757 dst, dst_bo, 2758 box, n); 2759#endif 2760 if (gen2_render_fill_boxes_try_blt(sna, op, format, color, 2761 dst, dst_bo, 2762 box, n)) 2763 return true; 2764 2765 2766 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 2767 __FUNCTION__, op, (int)format, 2768 color->red, color->green, color->blue, color->alpha)); 2769 2770 if (too_large(dst->drawable.width, dst->drawable.height) || 2771 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH || 2772 !gen2_check_dst_format(format)) { 2773 DBG(("%s: try blt, too large or incompatible destination\n", 2774 __FUNCTION__)); 2775 if (!gen2_check_dst_format(format)) 2776 return false; 2777 2778 assert(dst_bo->pitch >= 8); 2779 return sna_tiling_fill_boxes(sna, op, format, color, 2780 dst, dst_bo, box, n); 2781 } 2782 2783 if (op == PictOpClear) 2784 pixel = 0; 2785 else if (!sna_get_pixel_from_rgba(&pixel, 2786 color->red, 2787 color->green, 2788 color->blue, 2789 color->alpha, 2790 PICT_a8r8g8b8)) 2791 return false; 2792 2793 DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n", 2794 __FUNCTION__, op, (int)format, pixel)); 2795 2796 memset(&tmp, 0, sizeof(tmp)); 2797 tmp.op = op; 2798 tmp.dst.pixmap = dst; 2799 tmp.dst.width = dst->drawable.width; 2800 tmp.dst.height = dst->drawable.height; 2801 tmp.dst.format = format; 2802 tmp.dst.bo = dst_bo; 2803 tmp.floats_per_vertex = 2; 2804 tmp.floats_per_rect = 6; 2805 2806 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2807 kgem_submit(&sna->kgem); 2808 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 2809 } 2810 2811 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2812 2813 do { 2814 int n_this_time = gen2_get_rectangles(sna, &tmp, n); 2815 if (n_this_time == 0) { 2816 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2817 n_this_time = gen2_get_rectangles(sna, &tmp, n); 2818 } 2819 n -= n_this_time; 2820 2821 do { 2822 DBG((" (%d, %d), (%d, %d): %x\n", 2823 box->x1, box->y1, box->x2, box->y2, pixel)); 2824 VERTEX(box->x2); 2825 VERTEX(box->y2); 2826 VERTEX(box->x1); 2827 VERTEX(box->y2); 2828 VERTEX(box->x1); 2829 VERTEX(box->y1); 2830 box++; 2831 } while (--n_this_time); 2832 } while (n); 2833 2834 gen2_vertex_flush(sna, &tmp); 2835 return true; 2836} 2837 2838static void gen2_emit_fill_state(struct sna *sna, 2839 const struct sna_composite_op *op) 2840{ 2841 uint32_t ls1; 2842 2843 gen2_get_batch(sna, op); 2844 gen2_emit_target(sna, op); 2845 2846 ls1 = sna->kgem.nbatch; 2847 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2848 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2849 BATCH(0); 2850 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2851 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 2852 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2853 sna->kgem.batch + ls1 + 1, 2854 3 * sizeof(uint32_t)) == 0) 2855 sna->kgem.nbatch = ls1; 2856 else 2857 sna->render_state.gen2.ls1 = ls1; 2858 2859 gen2_enable_logic_op(sna, op->op); 2860 gen2_emit_fill_pipeline(sna, op); 2861 2862 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 2863 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2864 BATCH(op->src.u.gen2.pixel); 2865 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 2866 } 2867} 2868 2869static void 2870gen2_render_fill_op_blt(struct sna *sna, 2871 const struct sna_fill_op *op, 2872 int16_t x, int16_t y, int16_t w, int16_t h) 2873{ 2874 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2875 gen2_emit_fill_state(sna, &op->base); 2876 gen2_get_rectangles(sna, &op->base, 1); 2877 } 2878 2879 VERTEX(x+w); 2880 VERTEX(y+h); 2881 VERTEX(x); 2882 VERTEX(y+h); 2883 VERTEX(x); 2884 VERTEX(y); 2885} 2886 2887fastcall static void 2888gen2_render_fill_op_box(struct sna *sna, 2889 const struct sna_fill_op *op, 2890 const BoxRec *box) 2891{ 2892 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2893 gen2_emit_fill_state(sna, &op->base); 2894 gen2_get_rectangles(sna, &op->base, 1); 2895 } 2896 2897 VERTEX(box->x2); 2898 VERTEX(box->y2); 2899 VERTEX(box->x1); 2900 VERTEX(box->y2); 2901 VERTEX(box->x1); 2902 VERTEX(box->y1); 2903} 2904 2905fastcall static void 2906gen2_render_fill_op_boxes(struct sna *sna, 2907 const struct sna_fill_op *op, 2908 const BoxRec *box, 2909 int nbox) 2910{ 2911 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 2912 box->x1, box->y1, box->x2, box->y2, nbox)); 2913 2914 do { 2915 int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2916 if (nbox_this_time == 0) { 2917 gen2_emit_fill_state(sna, &op->base); 2918 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2919 } 2920 nbox -= nbox_this_time; 2921 2922 do { 2923 VERTEX(box->x2); 2924 VERTEX(box->y2); 2925 VERTEX(box->x1); 2926 VERTEX(box->y2); 2927 VERTEX(box->x1); 2928 VERTEX(box->y1); 2929 box++; 2930 } while (--nbox_this_time); 2931 } while (nbox); 2932} 2933 2934static void 2935gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2936{ 2937 gen2_vertex_flush(sna, &op->base); 2938} 2939 2940static bool 2941gen2_render_fill(struct sna *sna, uint8_t alu, 2942 PixmapPtr dst, struct kgem_bo *dst_bo, 2943 uint32_t color, 2944 struct sna_fill_op *tmp) 2945{ 2946#if NO_FILL 2947 return sna_blt_fill(sna, alu, 2948 dst_bo, dst->drawable.bitsPerPixel, 2949 color, 2950 tmp); 2951#endif 2952 2953 /* Prefer to use the BLT if already engaged */ 2954 if (sna_blt_fill(sna, alu, 2955 dst_bo, dst->drawable.bitsPerPixel, 2956 color, 2957 tmp)) 2958 return true; 2959 2960 /* Must use the BLT if we can't RENDER... */ 2961 if (too_large(dst->drawable.width, dst->drawable.height) || 2962 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) 2963 return false; 2964 2965 tmp->base.op = alu; 2966 tmp->base.dst.pixmap = dst; 2967 tmp->base.dst.width = dst->drawable.width; 2968 tmp->base.dst.height = dst->drawable.height; 2969 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2970 tmp->base.dst.bo = dst_bo; 2971 tmp->base.dst.x = tmp->base.dst.y = 0; 2972 tmp->base.floats_per_vertex = 2; 2973 tmp->base.floats_per_rect = 6; 2974 2975 tmp->base.src.u.gen2.pixel = 2976 sna_rgba_for_color(color, dst->drawable.depth); 2977 2978 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2979 kgem_submit(&sna->kgem); 2980 return sna_blt_fill(sna, alu, 2981 dst_bo, dst->drawable.bitsPerPixel, 2982 color, 2983 tmp); 2984 } 2985 2986 tmp->blt = gen2_render_fill_op_blt; 2987 tmp->box = gen2_render_fill_op_box; 2988 tmp->boxes = gen2_render_fill_op_boxes; 2989 tmp->done = gen2_render_fill_op_done; 2990 2991 gen2_emit_fill_state(sna, &tmp->base); 2992 return true; 2993} 2994 2995static bool 2996gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2997 uint32_t color, 2998 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 2999 uint8_t alu) 3000{ 3001 BoxRec box; 3002 3003 box.x1 = x1; 3004 box.y1 = y1; 3005 box.x2 = x2; 3006 box.y2 = y2; 3007 3008 return sna_blt_fill_boxes(sna, alu, 3009 bo, dst->drawable.bitsPerPixel, 3010 color, &box, 1); 3011} 3012 3013static bool 3014gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3015 uint32_t color, 3016 int16_t x1, int16_t y1, 3017 int16_t x2, int16_t y2, 3018 uint8_t alu) 3019{ 3020 struct sna_composite_op tmp; 3021 3022#if NO_FILL_ONE 3023 return gen2_render_fill_one_try_blt(sna, dst, bo, color, 3024 x1, y1, x2, y2, alu); 3025#endif 3026 3027 /* Prefer to use the BLT if already engaged */ 3028 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3029 x1, y1, x2, y2, alu)) 3030 return true; 3031 3032 /* Must use the BLT if we can't RENDER... */ 3033 if (too_large(dst->drawable.width, dst->drawable.height) || 3034 bo->pitch < 8 || bo->pitch > MAX_3D_PITCH) 3035 return false; 3036 3037 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3038 kgem_submit(&sna->kgem); 3039 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3040 x1, y1, x2, y2, alu)) 3041 return true; 3042 assert(kgem_check_bo(&sna->kgem, bo, NULL)); 3043 } 3044 3045 tmp.op = alu; 3046 tmp.dst.pixmap = dst; 3047 tmp.dst.width = dst->drawable.width; 3048 tmp.dst.height = dst->drawable.height; 3049 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3050 tmp.dst.bo = bo; 3051 tmp.floats_per_vertex = 2; 3052 tmp.floats_per_rect = 6; 3053 tmp.need_magic_ca_pass = false; 3054 3055 tmp.src.u.gen2.pixel = 3056 sna_rgba_for_color(color, dst->drawable.depth); 3057 3058 gen2_emit_fill_state(sna, &tmp); 3059 gen2_get_rectangles(sna, &tmp, 1); 3060 DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__, 3061 x1, y1, x2, y2, tmp.src.u.gen2.pixel)); 3062 VERTEX(x2); 3063 VERTEX(y2); 3064 VERTEX(x1); 3065 VERTEX(y2); 3066 VERTEX(x1); 3067 VERTEX(y1); 3068 gen2_vertex_flush(sna, &tmp); 3069 3070 return true; 3071} 3072 3073static void 3074gen2_render_copy_setup_source(struct sna_composite_channel *channel, 3075 PixmapPtr pixmap, 3076 struct kgem_bo *bo) 3077{ 3078 assert(pixmap->drawable.width && pixmap->drawable.height); 3079 3080 channel->filter = PictFilterNearest; 3081 channel->repeat = RepeatNone; 3082 channel->width = pixmap->drawable.width; 3083 channel->height = pixmap->drawable.height; 3084 channel->scale[0] = 1.f/pixmap->drawable.width; 3085 channel->scale[1] = 1.f/pixmap->drawable.height; 3086 channel->offset[0] = 0; 3087 channel->offset[1] = 0; 3088 channel->pict_format = sna_format_for_depth(pixmap->drawable.depth); 3089 channel->bo = bo; 3090 channel->is_affine = 1; 3091 3092 DBG(("%s: source=%d, (%dx%d), format=%08x\n", 3093 __FUNCTION__, bo->handle, 3094 channel->width, channel->height, 3095 channel->pict_format)); 3096} 3097 3098static void 3099gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op) 3100{ 3101 uint32_t blend, unwind; 3102 3103 unwind = sna->kgem.nbatch; 3104 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 3105 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 3106 3107 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 3108 TB0C_OUTPUT_WRITE_CURRENT; 3109 if (op->dst.format == PICT_a8) 3110 blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0; 3111 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 3112 blend |= TB0C_ARG1_SEL_TEXEL0; 3113 else 3114 blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 3115 BATCH(blend); 3116 3117 blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 3118 TB0A_OUTPUT_WRITE_CURRENT; 3119 if (PICT_FORMAT_A(op->src.pict_format) == 0) 3120 blend |= TB0A_ARG1_SEL_ONE; 3121 else 3122 blend |= TB0A_ARG1_SEL_TEXEL0; 3123 BATCH(blend); 3124 3125 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 3126 sna->kgem.batch + unwind + 1, 3127 2 * sizeof(uint32_t)) == 0) 3128 sna->kgem.nbatch = unwind; 3129 else 3130 sna->render_state.gen2.ls2 = unwind; 3131} 3132 3133static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op) 3134{ 3135 uint32_t ls1, v; 3136 3137 gen2_get_batch(sna, op); 3138 3139 if (kgem_bo_is_dirty(op->src.bo)) { 3140 if (op->src.bo == op->dst.bo) 3141 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 3142 else 3143 BATCH(_3DSTATE_MODES_5_CMD | 3144 PIPELINE_FLUSH_RENDER_CACHE | 3145 PIPELINE_FLUSH_TEXTURE_CACHE); 3146 kgem_clear_dirty(&sna->kgem); 3147 } 3148 gen2_emit_target(sna, op); 3149 3150 ls1 = sna->kgem.nbatch; 3151 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 3152 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 3153 BATCH(1<<12); 3154 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 3155 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 3156 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 3157 sna->kgem.batch + ls1 + 1, 3158 3 * sizeof(uint32_t)) == 0) 3159 sna->kgem.nbatch = ls1; 3160 else 3161 sna->render_state.gen2.ls1 = ls1; 3162 3163 gen2_enable_logic_op(sna, op->op); 3164 gen2_emit_copy_pipeline(sna, op); 3165 3166 v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; 3167 if (sna->render_state.gen2.vft != v) { 3168 BATCH(v); 3169 sna->render_state.gen2.vft = v; 3170 } 3171 3172 gen2_emit_texture(sna, &op->src, 0); 3173} 3174 3175static bool 3176gen2_render_copy_boxes(struct sna *sna, uint8_t alu, 3177 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3178 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3179 const BoxRec *box, int n, unsigned flags) 3180{ 3181 struct sna_composite_op tmp; 3182 3183#if NO_COPY_BOXES 3184 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3185 return false; 3186 3187 return sna_blt_copy_boxes(sna, alu, 3188 src_bo, src_dx, src_dy, 3189 dst_bo, dst_dx, dst_dy, 3190 dst->drawable.bitsPerPixel, 3191 box, n); 3192#endif 3193 3194 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 3195 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 3196 3197 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3198 sna_blt_copy_boxes(sna, alu, 3199 src_bo, src_dx, src_dy, 3200 dst_bo, dst_dx, dst_dy, 3201 dst->drawable.bitsPerPixel, 3202 box, n)) 3203 return true; 3204 3205 if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 3206 too_large(src->drawable.width, src->drawable.height) || 3207 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) { 3208fallback: 3209 return sna_blt_copy_boxes_fallback(sna, alu, 3210 src, src_bo, src_dx, src_dy, 3211 dst, dst_bo, dst_dx, dst_dy, 3212 box, n); 3213 } 3214 3215 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3216 kgem_submit(&sna->kgem); 3217 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3218 goto fallback; 3219 } 3220 3221 assert(dst_bo->pitch >= 8); 3222 3223 memset(&tmp, 0, sizeof(tmp)); 3224 tmp.op = alu; 3225 3226 tmp.dst.pixmap = dst; 3227 tmp.dst.width = dst->drawable.width; 3228 tmp.dst.height = dst->drawable.height; 3229 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3230 tmp.dst.bo = dst_bo; 3231 tmp.dst.x = tmp.dst.y = 0; 3232 tmp.damage = NULL; 3233 3234 DBG(("%s: target=%d, format=%08x, size=%dx%d\n", 3235 __FUNCTION__, dst_bo->handle, 3236 (unsigned)tmp.dst.format, 3237 tmp.dst.width, 3238 tmp.dst.height)); 3239 3240 sna_render_composite_redirect_init(&tmp); 3241 if (too_large(tmp.dst.width, tmp.dst.height) || 3242 dst_bo->pitch > MAX_3D_PITCH) { 3243 BoxRec extents = box[0]; 3244 int i; 3245 3246 for (i = 1; i < n; i++) { 3247 if (box[i].x1 < extents.x1) 3248 extents.x1 = box[i].x1; 3249 if (box[i].y1 < extents.y1) 3250 extents.y1 = box[i].y1; 3251 3252 if (box[i].x2 > extents.x2) 3253 extents.x2 = box[i].x2; 3254 if (box[i].y2 > extents.y2) 3255 extents.y2 = box[i].y2; 3256 } 3257 if (!sna_render_composite_redirect(sna, &tmp, 3258 extents.x1 + dst_dx, 3259 extents.y1 + dst_dy, 3260 extents.x2 - extents.x1, 3261 extents.y2 - extents.y1, 3262 alu != GXcopy || n > 1)) 3263 goto fallback_tiled; 3264 } 3265 3266 tmp.floats_per_vertex = 4; 3267 tmp.floats_per_rect = 12; 3268 3269 dst_dx += tmp.dst.x; 3270 dst_dy += tmp.dst.y; 3271 tmp.dst.x = tmp.dst.y = 0; 3272 3273 gen2_render_copy_setup_source(&tmp.src, src, src_bo); 3274 gen2_emit_copy_state(sna, &tmp); 3275 do { 3276 int n_this_time; 3277 3278 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3279 if (n_this_time == 0) { 3280 gen2_emit_copy_state(sna, &tmp); 3281 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3282 } 3283 n -= n_this_time; 3284 3285 do { 3286 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3287 box->x1 + src_dx, box->y1 + src_dy, 3288 box->x1 + dst_dx, box->y1 + dst_dy, 3289 box->x2 - box->x1, box->y2 - box->y1)); 3290 VERTEX(box->x2 + dst_dx); 3291 VERTEX(box->y2 + dst_dy); 3292 VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 3293 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3294 3295 VERTEX(box->x1 + dst_dx); 3296 VERTEX(box->y2 + dst_dy); 3297 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3298 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3299 3300 VERTEX(box->x1 + dst_dx); 3301 VERTEX(box->y1 + dst_dy); 3302 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3303 VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 3304 3305 box++; 3306 } while (--n_this_time); 3307 } while (n); 3308 3309 gen2_vertex_flush(sna, &tmp); 3310 sna_render_composite_redirect_done(sna, &tmp); 3311 return true; 3312 3313fallback_tiled: 3314 return sna_tiling_copy_boxes(sna, alu, 3315 src, src_bo, src_dx, src_dy, 3316 dst, dst_bo, dst_dx, dst_dy, 3317 box, n); 3318} 3319 3320static void 3321gen2_render_copy_blt(struct sna *sna, 3322 const struct sna_copy_op *op, 3323 int16_t sx, int16_t sy, 3324 int16_t w, int16_t h, 3325 int16_t dx, int16_t dy) 3326{ 3327 if (!gen2_get_rectangles(sna, &op->base, 1)) { 3328 gen2_emit_copy_state(sna, &op->base); 3329 gen2_get_rectangles(sna, &op->base, 1); 3330 } 3331 3332 VERTEX(dx+w); 3333 VERTEX(dy+h); 3334 VERTEX((sx+w)*op->base.src.scale[0]); 3335 VERTEX((sy+h)*op->base.src.scale[1]); 3336 3337 VERTEX(dx); 3338 VERTEX(dy+h); 3339 VERTEX(sx*op->base.src.scale[0]); 3340 VERTEX((sy+h)*op->base.src.scale[1]); 3341 3342 VERTEX(dx); 3343 VERTEX(dy); 3344 VERTEX(sx*op->base.src.scale[0]); 3345 VERTEX(sy*op->base.src.scale[1]); 3346} 3347 3348static void 3349gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3350{ 3351 gen2_vertex_flush(sna, &op->base); 3352} 3353 3354static bool 3355gen2_render_copy(struct sna *sna, uint8_t alu, 3356 PixmapPtr src, struct kgem_bo *src_bo, 3357 PixmapPtr dst, struct kgem_bo *dst_bo, 3358 struct sna_copy_op *tmp) 3359{ 3360#if NO_COPY 3361 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3362 return false; 3363 3364 return sna_blt_copy(sna, alu, 3365 src_bo, dst_bo, 3366 dst->drawable.bitsPerPixel, 3367 tmp); 3368#endif 3369 3370 /* Prefer to use the BLT */ 3371 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3372 sna_blt_copy(sna, alu, 3373 src_bo, dst_bo, 3374 dst->drawable.bitsPerPixel, 3375 tmp)) 3376 return true; 3377 3378 /* Must use the BLT if we can't RENDER... */ 3379 if (too_large(src->drawable.width, src->drawable.height) || 3380 too_large(dst->drawable.width, dst->drawable.height) || 3381 src_bo->pitch > MAX_3D_PITCH || 3382 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) { 3383fallback: 3384 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3385 return false; 3386 3387 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3388 dst->drawable.bitsPerPixel, 3389 tmp); 3390 } 3391 3392 tmp->base.op = alu; 3393 3394 tmp->base.dst.pixmap = dst; 3395 tmp->base.dst.width = dst->drawable.width; 3396 tmp->base.dst.height = dst->drawable.height; 3397 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3398 tmp->base.dst.bo = dst_bo; 3399 3400 gen2_render_copy_setup_source(&tmp->base.src, src, src_bo); 3401 tmp->base.mask.bo = NULL; 3402 3403 tmp->base.floats_per_vertex = 4; 3404 tmp->base.floats_per_rect = 12; 3405 3406 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3407 kgem_submit(&sna->kgem); 3408 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3409 goto fallback; 3410 } 3411 3412 tmp->blt = gen2_render_copy_blt; 3413 tmp->done = gen2_render_copy_done; 3414 3415 gen2_emit_composite_state(sna, &tmp->base); 3416 return true; 3417} 3418 3419static void 3420gen2_render_reset(struct sna *sna) 3421{ 3422 sna->render_state.gen2.need_invariant = true; 3423 sna->render_state.gen2.logic_op_enabled = 0; 3424 sna->render_state.gen2.target = 0; 3425 3426 sna->render_state.gen2.ls1 = 0; 3427 sna->render_state.gen2.ls2 = 0; 3428 sna->render_state.gen2.vft = 0; 3429 3430 sna->render_state.gen2.diffuse = 0x0c0ffee0; 3431 sna->render_state.gen2.specular = 0x0c0ffee0; 3432} 3433 3434static void 3435gen2_render_flush(struct sna *sna) 3436{ 3437 assert(sna->render.vertex_index == 0); 3438 assert(sna->render.vertex_offset == 0); 3439} 3440 3441static void 3442gen2_render_context_switch(struct kgem *kgem, 3443 int new_mode) 3444{ 3445 struct sna *sna = container_of(kgem, struct sna, kgem); 3446 3447 if (!kgem->nbatch) 3448 return; 3449 3450 /* Reload BLT registers following a lost context */ 3451 sna->blt_state.fill_bo = 0; 3452 3453 if (kgem_ring_is_idle(kgem, kgem->ring)) { 3454 DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); 3455 _kgem_submit(kgem); 3456 } 3457} 3458 3459const char *gen2_render_init(struct sna *sna, const char *backend) 3460{ 3461 struct sna_render *render = &sna->render; 3462 3463 sna->kgem.context_switch = gen2_render_context_switch; 3464 3465 /* Use the BLT (and overlay) for everything except when forced to 3466 * use the texture combiners. 3467 */ 3468#if !NO_COMPOSITE 3469 render->composite = gen2_render_composite; 3470 render->prefer_gpu |= PREFER_GPU_RENDER; 3471#endif 3472#if !NO_COMPOSITE_SPANS 3473 render->check_composite_spans = gen2_check_composite_spans; 3474 render->composite_spans = gen2_render_composite_spans; 3475 render->prefer_gpu |= PREFER_GPU_SPANS; 3476#endif 3477 render->fill_boxes = gen2_render_fill_boxes; 3478 render->fill = gen2_render_fill; 3479 render->fill_one = gen2_render_fill_one; 3480 render->copy = gen2_render_copy; 3481 render->copy_boxes = gen2_render_copy_boxes; 3482 3483 /* XXX YUV color space conversion for video? */ 3484 3485 render->reset = gen2_render_reset; 3486 render->flush = gen2_render_flush; 3487 3488 render->max_3d_size = MAX_3D_SIZE; 3489 render->max_3d_pitch = MAX_3D_PITCH; 3490 return "Almador (gen2)"; 3491} 3492