1/* 2 * Copyright © 2006,2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Wang Zhenyu <zhenyu.z.wang@intel.com> 25 * Eric Anholt <eric@anholt.net> 26 * Chris Wilson <chris@chris-wilson.co.uk> 27 * 28 */ 29 30#ifdef HAVE_CONFIG_H 31#include "config.h" 32#endif 33 34#include "sna.h" 35#include "sna_reg.h" 36#include "sna_render.h" 37#include "sna_render_inline.h" 38#include "sna_video.h" 39 40#include "gen2_render.h" 41 42#define NO_COMPOSITE 0 43#define NO_COMPOSITE_SPANS 0 44#define NO_COPY 0 45#define NO_COPY_BOXES 0 46#define NO_FILL 0 47#define NO_FILL_ONE 0 48#define NO_FILL_BOXES 0 49 50#define MAX_3D_SIZE 2048 51#define MAX_3D_PITCH 8192 52#define MAX_INLINE (1 << 18) 53 54#define BATCH(v) batch_emit(sna, v) 55#define BATCH_ALIGNED(v, a) batch_emit_aligned(sna, v, a) 56#define BATCH_F(v) batch_emit_float(sna, v) 57#define VERTEX(v) batch_emit_float(sna, v) 58 59static const struct blendinfo { 60 bool dst_alpha; 61 bool src_alpha; 62 uint32_t src_blend; 63 uint32_t dst_blend; 64} gen2_blend_op[] = { 65 /* Clear */ 66 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 67 /* Src */ 68 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 69 /* Dst */ 70 {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 71 /* Over */ 72 {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 73 /* OverReverse */ 74 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 75 /* In */ 76 {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 77 /* InReverse */ 78 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 79 /* Out */ 80 {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 81 /* OutReverse */ 82 {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 83 /* Atop */ 84 {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 85 /* AtopReverse */ 86 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 87 /* Xor */ 88 {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 89 /* Add */ 90 {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 91}; 92 93static const struct formatinfo { 94 unsigned int fmt; 95 uint32_t card_fmt; 96} i8xx_tex_formats[] = { 97 {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, 98 {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, 99 {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, 100 {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, 101 {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, 102 {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, 103}, i85x_tex_formats[] = { 104 {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, 105 {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, 106}; 107 108static inline bool 109too_large(int width, int height) 110{ 111 return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 112} 113 114static inline uint32_t 115gen2_buf_tiling(uint32_t tiling) 116{ 117 uint32_t v = 0; 118 switch (tiling) { 119 default: assert(0); 120 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 121 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 122 case I915_TILING_NONE: break; 123 } 124 return v; 125} 126 127static uint32_t 128gen2_get_dst_format(uint32_t format) 129{ 130#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) 131 switch (format) { 132 default: 133 assert(0); 134 case PICT_a8r8g8b8: 135 case PICT_x8r8g8b8: 136 return COLR_BUF_ARGB8888 | BIAS; 137 case PICT_r5g6b5: 138 return COLR_BUF_RGB565 | BIAS; 139 case PICT_a1r5g5b5: 140 case PICT_x1r5g5b5: 141 return COLR_BUF_ARGB1555 | BIAS; 142 case PICT_a8: 143 return COLR_BUF_8BIT | BIAS; 144 case PICT_a4r4g4b4: 145 case PICT_x4r4g4b4: 146 return COLR_BUF_ARGB4444 | BIAS; 147 } 148#undef BIAS 149} 150 151static bool 152gen2_check_dst_format(uint32_t format) 153{ 154 switch (format) { 155 case PICT_a8r8g8b8: 156 case PICT_x8r8g8b8: 157 case PICT_r5g6b5: 158 case PICT_a1r5g5b5: 159 case PICT_x1r5g5b5: 160 case PICT_a8: 161 case PICT_a4r4g4b4: 162 case PICT_x4r4g4b4: 163 return true; 164 default: 165 return false; 166 } 167} 168 169static uint32_t 170gen2_get_card_format(struct sna *sna, uint32_t format) 171{ 172 unsigned int i; 173 174 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 175 if (i8xx_tex_formats[i].fmt == format) 176 return i8xx_tex_formats[i].card_fmt; 177 178 if (sna->kgem.gen < 021) { 179 /* Whilst these are not directly supported on 830/845, 180 * we only enable them when we can implicitly convert 181 * them to a supported variant through the texture 182 * combiners. 183 */ 184 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 185 if (i85x_tex_formats[i].fmt == format) 186 return i8xx_tex_formats[1+i].card_fmt; 187 } else { 188 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 189 if (i85x_tex_formats[i].fmt == format) 190 return i85x_tex_formats[i].card_fmt; 191 } 192 193 assert(0); 194 return 0; 195} 196 197static uint32_t 198gen2_check_format(struct sna *sna, PicturePtr p) 199{ 200 unsigned int i; 201 202 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 203 if (i8xx_tex_formats[i].fmt == p->format) 204 return true; 205 206 if (sna->kgem.gen > 021) { 207 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 208 if (i85x_tex_formats[i].fmt == p->format) 209 return true; 210 } 211 212 return false; 213} 214 215static uint32_t 216gen2_sampler_tiling_bits(uint32_t tiling) 217{ 218 uint32_t bits = 0; 219 switch (tiling) { 220 default: 221 assert(0); 222 case I915_TILING_Y: 223 bits |= TM0S1_TILE_WALK; 224 case I915_TILING_X: 225 bits |= TM0S1_TILED_SURFACE; 226 case I915_TILING_NONE: 227 break; 228 } 229 return bits; 230} 231 232static bool 233gen2_check_filter(PicturePtr picture) 234{ 235 switch (picture->filter) { 236 case PictFilterNearest: 237 case PictFilterBilinear: 238 return true; 239 default: 240 return false; 241 } 242} 243 244static bool 245gen2_check_repeat(PicturePtr picture) 246{ 247 if (!picture->repeat) 248 return true; 249 250 switch (picture->repeatType) { 251 case RepeatNone: 252 case RepeatNormal: 253 case RepeatPad: 254 case RepeatReflect: 255 return true; 256 default: 257 return false; 258 } 259} 260 261static void 262gen2_emit_texture(struct sna *sna, 263 const struct sna_composite_channel *channel, 264 int unit) 265{ 266 uint32_t wrap_mode_u, wrap_mode_v; 267 uint32_t texcoordtype; 268 uint32_t filter; 269 270 assert(channel->bo); 271 272 if (channel->is_affine) 273 texcoordtype = TEXCOORDTYPE_CARTESIAN; 274 else 275 texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; 276 277 switch (channel->repeat) { 278 default: 279 assert(0); 280 case RepeatNone: 281 wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER; 282 break; 283 case RepeatNormal: 284 wrap_mode_u = TEXCOORDMODE_WRAP; 285 break; 286 case RepeatPad: 287 wrap_mode_u = TEXCOORDMODE_CLAMP; 288 break; 289 case RepeatReflect: 290 wrap_mode_u = TEXCOORDMODE_MIRROR; 291 break; 292 } 293 if (channel->is_linear) 294 wrap_mode_v = TEXCOORDMODE_WRAP; 295 else 296 wrap_mode_v = wrap_mode_u; 297 298 switch (channel->filter) { 299 default: 300 assert(0); 301 case PictFilterNearest: 302 filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | 303 FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | 304 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 305 break; 306 case PictFilterBilinear: 307 filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | 308 FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | 309 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 310 break; 311 } 312 313 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4); 314 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 315 channel->bo, 316 I915_GEM_DOMAIN_SAMPLER << 16, 317 0)); 318 BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | 319 ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | 320 gen2_get_card_format(sna, channel->pict_format) | 321 gen2_sampler_tiling_bits(channel->bo->tiling)); 322 BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); 323 BATCH(filter); 324 BATCH(0); /* default color */ 325 326 BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | 327 ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype | 328 ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) | 329 ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u)); 330} 331 332static void 333gen2_get_blend_factors(const struct sna_composite_op *op, 334 int blend, 335 uint32_t *c_out, 336 uint32_t *a_out) 337{ 338 uint32_t cblend, ablend; 339 340 /* If component alpha is active in the mask and the blend operation 341 * uses the source alpha, then we know we don't need the source 342 * value (otherwise we would have hit a fallback earlier), so we 343 * provide the source alpha (src.A * mask.X) as output color. 344 * Conversely, if CA is set and we don't need the source alpha, then 345 * we produce the source value (src.X * mask.X) and the source alpha 346 * is unused.. Otherwise, we provide the non-CA source value 347 * (src.X * mask.A). 348 * 349 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 350 * pictures, but we need to implement it for 830/845 and there's no 351 * harm done in leaving it in. 352 */ 353 cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT; 354 ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT; 355 356 /* Get the source picture's channels into TBx_ARG1 */ 357 if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) || 358 op->dst.format == PICT_a8) { 359 /* Producing source alpha value, so the first set of channels 360 * is src.A instead of src.X. We also do this if the destination 361 * is a8, in which case src.G is what's written, and the other 362 * channels are ignored. 363 */ 364 if (op->src.is_opaque) { 365 ablend |= TB0C_ARG1_SEL_ONE; 366 cblend |= TB0C_ARG1_SEL_ONE; 367 } else if (op->src.is_solid) { 368 ablend |= TB0C_ARG1_SEL_DIFFUSE; 369 cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA; 370 } else { 371 ablend |= TB0C_ARG1_SEL_TEXEL0; 372 cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; 373 } 374 } else { 375 if (op->src.is_solid) 376 cblend |= TB0C_ARG1_SEL_DIFFUSE; 377 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 378 cblend |= TB0C_ARG1_SEL_TEXEL0; 379 else 380 cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 381 382 if (op->src.is_opaque) 383 ablend |= TB0A_ARG1_SEL_ONE; 384 else if (op->src.is_solid) 385 ablend |= TB0A_ARG1_SEL_DIFFUSE; 386 else 387 ablend |= TB0A_ARG1_SEL_TEXEL0; 388 } 389 390 if (op->mask.bo) { 391 if (op->src.is_solid) { 392 cblend |= TB0C_ARG2_SEL_TEXEL0; 393 ablend |= TB0A_ARG2_SEL_TEXEL0; 394 } else { 395 cblend |= TB0C_ARG2_SEL_TEXEL1; 396 ablend |= TB0A_ARG2_SEL_TEXEL1; 397 } 398 399 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 400 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 401 402 cblend |= TB0C_OP_MODULATE; 403 ablend |= TB0A_OP_MODULATE; 404 } else if (op->mask.is_solid) { 405 cblend |= TB0C_ARG2_SEL_DIFFUSE; 406 ablend |= TB0A_ARG2_SEL_DIFFUSE; 407 408 if (op->dst.format == PICT_a8 || !op->has_component_alpha) 409 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 410 411 cblend |= TB0C_OP_MODULATE; 412 ablend |= TB0A_OP_MODULATE; 413 } else { 414 cblend |= TB0C_OP_ARG1; 415 ablend |= TB0A_OP_ARG1; 416 } 417 418 *c_out = cblend; 419 *a_out = ablend; 420} 421 422static uint32_t gen2_get_blend_cntl(int op, 423 bool has_component_alpha, 424 uint32_t dst_format) 425{ 426 uint32_t sblend, dblend; 427 428 if (op <= PictOpSrc) 429 return S8_ENABLE_COLOR_BUFFER_WRITE; 430 431 sblend = gen2_blend_op[op].src_blend; 432 dblend = gen2_blend_op[op].dst_blend; 433 434 if (gen2_blend_op[op].dst_alpha) { 435 /* If there's no dst alpha channel, adjust the blend op so that 436 * we'll treat it as always 1. 437 */ 438 if (PICT_FORMAT_A(dst_format) == 0) { 439 if (sblend == BLENDFACTOR_DST_ALPHA) 440 sblend = BLENDFACTOR_ONE; 441 else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 442 sblend = BLENDFACTOR_ZERO; 443 } 444 445 /* gen2 engine reads 8bit color buffer into green channel 446 * in cases like color buffer blending etc., and also writes 447 * back green channel. So with dst_alpha blend we should use 448 * color factor. 449 */ 450 if (dst_format == PICT_a8) { 451 if (sblend == BLENDFACTOR_DST_ALPHA) 452 sblend = BLENDFACTOR_DST_COLR; 453 else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 454 sblend = BLENDFACTOR_INV_DST_COLR; 455 } 456 } 457 458 /* If the source alpha is being used, then we should only be in a case 459 * where the source blend factor is 0, and the source blend value is 460 * the mask channels multiplied by the source picture's alpha. 461 */ 462 if (has_component_alpha && gen2_blend_op[op].src_alpha) { 463 if (dblend == BLENDFACTOR_SRC_ALPHA) 464 dblend = BLENDFACTOR_SRC_COLR; 465 else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) 466 dblend = BLENDFACTOR_INV_SRC_COLR; 467 } 468 469 return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | 470 dblend << S8_DST_BLEND_FACTOR_SHIFT | 471 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 472 S8_ENABLE_COLOR_BUFFER_WRITE); 473} 474 475static void gen2_emit_invariant(struct sna *sna) 476{ 477 int i; 478 479 for (i = 0; i < 4; i++) { 480 BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i)); 481 BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) | 482 DISABLE_TEX_STREAM_BUMP | 483 ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) | 484 ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i)); 485 BATCH(_3DSTATE_MAP_COORD_TRANSFORM); 486 BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i)); 487 } 488 489 BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); 490 BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | 491 TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | 492 TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | 493 TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); 494 495 BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 496 497 BATCH(_3DSTATE_VERTEX_TRANSFORM); 498 BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); 499 500 BATCH(_3DSTATE_W_STATE_CMD); 501 BATCH(MAGIC_W_STATE_DWORD1); 502 BATCH_F(1.0); 503 504 BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | 505 DISABLE_INDPT_ALPHA_BLEND | 506 ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); 507 508 BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); 509 BATCH(0); 510 511 BATCH(_3DSTATE_MODES_1_CMD | 512 ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD | 513 ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) | 514 ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); 515 516 BATCH(_3DSTATE_ENABLES_1_CMD | 517 DISABLE_LOGIC_OP | 518 DISABLE_STENCIL_TEST | 519 DISABLE_DEPTH_BIAS | 520 DISABLE_SPEC_ADD | 521 DISABLE_FOG | 522 DISABLE_ALPHA_TEST | 523 DISABLE_DEPTH_TEST | 524 ENABLE_COLOR_BLEND); 525 526 BATCH(_3DSTATE_ENABLES_2_CMD | 527 DISABLE_STENCIL_WRITE | 528 DISABLE_DITHER | 529 DISABLE_DEPTH_WRITE | 530 ENABLE_COLOR_MASK | 531 ENABLE_COLOR_WRITE | 532 ENABLE_TEX_CACHE); 533 534 BATCH(_3DSTATE_STIPPLE); 535 BATCH(0); 536 537 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 538 TEXPIPE_COLOR | 539 ENABLE_TEXOUTPUT_WRT_SEL | 540 TEXOP_OUTPUT_CURRENT | 541 DISABLE_TEX_CNTRL_STAGE | 542 TEXOP_SCALE_1X | 543 TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | 544 TEXBLENDOP_ARG1); 545 BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 546 TEXPIPE_ALPHA | 547 ENABLE_TEXOUTPUT_WRT_SEL | 548 TEXOP_OUTPUT_CURRENT | 549 TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | 550 TEXBLENDOP_ARG1); 551 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 552 TEXPIPE_COLOR | 553 TEXBLEND_ARG1 | 554 TEXBLENDARG_MODIFY_PARMS | 555 TEXBLENDARG_DIFFUSE); 556 BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 557 TEXPIPE_ALPHA | 558 TEXBLEND_ARG1 | 559 TEXBLENDARG_MODIFY_PARMS | 560 TEXBLENDARG_DIFFUSE); 561 562#define INVARIANT_SIZE 35 563 564 sna->render_state.gen2.need_invariant = false; 565} 566 567static void 568gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) 569{ 570 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 571 572 /* +7 for i830 3DSTATE_BUFFER_INFO w/a */ 573 if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40+7)) { 574 DBG(("%s: flushing batch: size %d > %d\n", 575 __FUNCTION__, INVARIANT_SIZE+40, 576 sna->kgem.surface-sna->kgem.nbatch)); 577 kgem_submit(&sna->kgem); 578 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 579 } 580 581 if (!kgem_check_reloc(&sna->kgem, 3)) { 582 DBG(("%s: flushing batch: reloc %d >= %d\n", 583 __FUNCTION__, 584 sna->kgem.nreloc + 3, 585 (int)KGEM_RELOC_SIZE(&sna->kgem))); 586 kgem_submit(&sna->kgem); 587 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 588 } 589 590 if (!kgem_check_exec(&sna->kgem, 3)) { 591 DBG(("%s: flushing batch: exec %d >= %d\n", 592 __FUNCTION__, 593 sna->kgem.nexec + 1, 594 (int)KGEM_EXEC_SIZE(&sna->kgem))); 595 kgem_submit(&sna->kgem); 596 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 597 } 598 599 if (sna->render_state.gen2.need_invariant) 600 gen2_emit_invariant(sna); 601} 602 603static void gen2_emit_target(struct sna *sna, 604 struct kgem_bo *bo, 605 int width, 606 int height, 607 int format) 608{ 609 assert(!too_large(width, height)); 610 assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH); 611 assert(sna->render.vertex_offset == 0); 612 613 assert(bo->unique_id); 614 if (sna->render_state.gen2.target == bo->unique_id) { 615 kgem_bo_mark_dirty(bo); 616 return; 617 } 618 619 /* 620 * i830 w/a: 3DSTATE_BUFFER_INFO 621 * must not straddle two cachelines. 622 */ 623 if (intel_get_device_id(sna->dev) == 0x3577) 624 BATCH_ALIGNED(_3DSTATE_BUF_INFO_CMD, 8); 625 else 626 BATCH(_3DSTATE_BUF_INFO_CMD); 627 BATCH(BUF_3D_ID_COLOR_BACK | 628 gen2_buf_tiling(bo->tiling) | 629 BUF_3D_PITCH(bo->pitch)); 630 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 631 bo, 632 I915_GEM_DOMAIN_RENDER << 16 | 633 I915_GEM_DOMAIN_RENDER, 634 0)); 635 636 BATCH(_3DSTATE_DST_BUF_VARS_CMD); 637 BATCH(gen2_get_dst_format(format)); 638 639 BATCH(_3DSTATE_DRAW_RECT_CMD); 640 BATCH(0); 641 BATCH(0); /* ymin, xmin */ 642 BATCH(DRAW_YMAX(height - 1) | 643 DRAW_XMAX(width - 1)); 644 BATCH(0); /* yorig, xorig */ 645 646 sna->render_state.gen2.target = bo->unique_id; 647} 648 649static void gen2_disable_logic_op(struct sna *sna) 650{ 651 if (!sna->render_state.gen2.logic_op_enabled) 652 return; 653 654 DBG(("%s\n", __FUNCTION__)); 655 656 BATCH(_3DSTATE_ENABLES_1_CMD | 657 DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND); 658 659 sna->render_state.gen2.logic_op_enabled = 0; 660} 661 662static void gen2_enable_logic_op(struct sna *sna, int op) 663{ 664 static const uint8_t logic_op[] = { 665 LOGICOP_CLEAR, /* GXclear */ 666 LOGICOP_AND, /* GXand */ 667 LOGICOP_AND_RVRSE, /* GXandReverse */ 668 LOGICOP_COPY, /* GXcopy */ 669 LOGICOP_AND_INV, /* GXandInverted */ 670 LOGICOP_NOOP, /* GXnoop */ 671 LOGICOP_XOR, /* GXxor */ 672 LOGICOP_OR, /* GXor */ 673 LOGICOP_NOR, /* GXnor */ 674 LOGICOP_EQUIV, /* GXequiv */ 675 LOGICOP_INV, /* GXinvert */ 676 LOGICOP_OR_RVRSE, /* GXorReverse */ 677 LOGICOP_COPY_INV, /* GXcopyInverted */ 678 LOGICOP_OR_INV, /* GXorInverted */ 679 LOGICOP_NAND, /* GXnand */ 680 LOGICOP_SET /* GXset */ 681 }; 682 683 if (sna->render_state.gen2.logic_op_enabled != op+1) { 684 if (!sna->render_state.gen2.logic_op_enabled) { 685 if (op == GXclear || op == GXcopy) 686 return; 687 688 DBG(("%s\n", __FUNCTION__)); 689 690 BATCH(_3DSTATE_ENABLES_1_CMD | 691 ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND); 692 } 693 694 BATCH(_3DSTATE_MODES_4_CMD | 695 ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op])); 696 sna->render_state.gen2.logic_op_enabled = op+1; 697 } 698} 699 700static void gen2_emit_composite_state(struct sna *sna, 701 const struct sna_composite_op *op) 702{ 703 uint32_t texcoordfmt, v, unwind; 704 uint32_t cblend, ablend; 705 int tex; 706 707 gen2_get_batch(sna, op); 708 709 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 710 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 711 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 712 else 713 BATCH(_3DSTATE_MODES_5_CMD | 714 PIPELINE_FLUSH_RENDER_CACHE | 715 PIPELINE_FLUSH_TEXTURE_CACHE); 716 kgem_clear_dirty(&sna->kgem); 717 } 718 719 gen2_emit_target(sna, 720 op->dst.bo, 721 op->dst.width, 722 op->dst.height, 723 op->dst.format); 724 725 unwind = sna->kgem.nbatch; 726 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 727 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 728 BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12); 729 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 730 BATCH(gen2_get_blend_cntl(op->op, 731 op->has_component_alpha, 732 op->dst.format)); 733 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 734 sna->kgem.batch + unwind + 1, 735 3 * sizeof(uint32_t)) == 0) 736 sna->kgem.nbatch = unwind; 737 else 738 sna->render_state.gen2.ls1 = unwind; 739 740 gen2_disable_logic_op(sna); 741 742 gen2_get_blend_factors(op, op->op, &cblend, &ablend); 743 unwind = sna->kgem.nbatch; 744 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 745 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 746 BATCH(cblend); 747 BATCH(ablend); 748 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 749 sna->kgem.batch + unwind + 1, 750 2 * sizeof(uint32_t)) == 0) 751 sna->kgem.nbatch = unwind; 752 else 753 sna->render_state.gen2.ls2 = unwind; 754 755 tex = texcoordfmt = 0; 756 if (!op->src.is_solid) { 757 if (op->src.is_affine) 758 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 759 else 760 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 761 gen2_emit_texture(sna, &op->src, tex++); 762 } else { 763 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 764 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 765 BATCH(op->src.u.gen2.pixel); 766 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 767 } 768 } 769 if (op->mask.bo) { 770 if (op->mask.is_affine) 771 texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 772 else 773 texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 774 gen2_emit_texture(sna, &op->mask, tex++); 775 } else if (op->mask.is_solid) { 776 if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) { 777 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 778 BATCH(op->mask.u.gen2.pixel); 779 sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel; 780 } 781 } 782 783 v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt; 784 if (sna->render_state.gen2.vft != v) { 785 BATCH(v); 786 sna->render_state.gen2.vft = v; 787 } 788} 789 790static inline void 791gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) 792{ 793 VERTEX(dstX); 794 VERTEX(dstY); 795} 796 797inline static void 798gen2_emit_composite_linear(struct sna *sna, 799 const struct sna_composite_channel *channel, 800 int16_t x, int16_t y) 801{ 802 float v; 803 804 v = (x * channel->u.linear.dx + 805 y * channel->u.linear.dy + 806 channel->u.linear.offset); 807 DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v)); 808 VERTEX(v); 809 VERTEX(v); 810} 811 812static void 813gen2_emit_composite_texcoord(struct sna *sna, 814 const struct sna_composite_channel *channel, 815 int16_t x, int16_t y) 816{ 817 float s = 0, t = 0, w = 1; 818 819 x += channel->offset[0]; 820 y += channel->offset[1]; 821 822 if (channel->is_affine) { 823 sna_get_transformed_coordinates(x, y, 824 channel->transform, 825 &s, &t); 826 VERTEX(s * channel->scale[0]); 827 VERTEX(t * channel->scale[1]); 828 } else { 829 sna_get_transformed_coordinates_3d(x, y, 830 channel->transform, 831 &s, &t, &w); 832 VERTEX(s * channel->scale[0]); 833 VERTEX(t * channel->scale[1]); 834 VERTEX(w); 835 } 836} 837 838static void 839gen2_emit_composite_vertex(struct sna *sna, 840 const struct sna_composite_op *op, 841 int16_t srcX, int16_t srcY, 842 int16_t mskX, int16_t mskY, 843 int16_t dstX, int16_t dstY) 844{ 845 gen2_emit_composite_dstcoord(sna, dstX, dstY); 846 if (op->src.is_linear) 847 gen2_emit_composite_linear(sna, &op->src, srcX, srcY); 848 else if (!op->src.is_solid) 849 gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); 850 851 if (op->mask.is_linear) 852 gen2_emit_composite_linear(sna, &op->mask, mskX, mskY); 853 else if (op->mask.bo) 854 gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); 855} 856 857fastcall static void 858gen2_emit_composite_primitive(struct sna *sna, 859 const struct sna_composite_op *op, 860 const struct sna_composite_rectangles *r) 861{ 862 gen2_emit_composite_vertex(sna, op, 863 r->src.x + r->width, 864 r->src.y + r->height, 865 r->mask.x + r->width, 866 r->mask.y + r->height, 867 op->dst.x + r->dst.x + r->width, 868 op->dst.y + r->dst.y + r->height); 869 gen2_emit_composite_vertex(sna, op, 870 r->src.x, 871 r->src.y + r->height, 872 r->mask.x, 873 r->mask.y + r->height, 874 op->dst.x + r->dst.x, 875 op->dst.y + r->dst.y + r->height); 876 gen2_emit_composite_vertex(sna, op, 877 r->src.x, 878 r->src.y, 879 r->mask.x, 880 r->mask.y, 881 op->dst.x + r->dst.x, 882 op->dst.y + r->dst.y); 883} 884 885fastcall static void 886gen2_emit_composite_primitive_constant(struct sna *sna, 887 const struct sna_composite_op *op, 888 const struct sna_composite_rectangles *r) 889{ 890 int16_t dst_x = r->dst.x + op->dst.x; 891 int16_t dst_y = r->dst.y + op->dst.y; 892 893 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 894 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 895 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 896} 897 898fastcall static void 899gen2_emit_composite_primitive_linear(struct sna *sna, 900 const struct sna_composite_op *op, 901 const struct sna_composite_rectangles *r) 902{ 903 int16_t dst_x = r->dst.x + op->dst.x; 904 int16_t dst_y = r->dst.y + op->dst.y; 905 906 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 907 gen2_emit_composite_linear(sna, &op->src, 908 r->src.x + r->width, r->src.y + r->height); 909 910 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 911 gen2_emit_composite_linear(sna, &op->src, 912 r->src.x, r->src.y + r->height); 913 914 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 915 gen2_emit_composite_linear(sna, &op->src, 916 r->src.x, r->src.y); 917} 918 919fastcall static void 920gen2_emit_composite_primitive_identity(struct sna *sna, 921 const struct sna_composite_op *op, 922 const struct sna_composite_rectangles *r) 923{ 924 float w = r->width; 925 float h = r->height; 926 float *v; 927 928 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 929 sna->kgem.nbatch += 12; 930 931 v[8] = v[4] = r->dst.x + op->dst.x; 932 v[0] = v[4] + w; 933 934 v[9] = r->dst.y + op->dst.y; 935 v[5] = v[1] = v[9] + h; 936 937 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 938 v[2] = v[6] + w * op->src.scale[0]; 939 940 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 941 v[7] = v[3] = v[11] + h * op->src.scale[1]; 942} 943 944fastcall static void 945gen2_emit_composite_primitive_affine(struct sna *sna, 946 const struct sna_composite_op *op, 947 const struct sna_composite_rectangles *r) 948{ 949 PictTransform *transform = op->src.transform; 950 int src_x = r->src.x + (int)op->src.offset[0]; 951 int src_y = r->src.y + (int)op->src.offset[1]; 952 float *v; 953 954 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 955 sna->kgem.nbatch += 12; 956 957 v[8] = v[4] = r->dst.x + op->dst.x; 958 v[0] = v[4] + r->width; 959 960 v[9] = r->dst.y + op->dst.y; 961 v[5] = v[1] = v[9] + r->height; 962 963 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 964 transform, op->src.scale, 965 &v[2], &v[3]); 966 967 _sna_get_transformed_scaled(src_x, src_y + r->height, 968 transform, op->src.scale, 969 &v[6], &v[7]); 970 971 _sna_get_transformed_scaled(src_x, src_y, 972 transform, op->src.scale, 973 &v[10], &v[11]); 974} 975 976fastcall static void 977gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna, 978 const struct sna_composite_op *op, 979 const struct sna_composite_rectangles *r) 980{ 981 float w = r->width; 982 float h = r->height; 983 float *v; 984 985 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 986 sna->kgem.nbatch += 12; 987 988 v[8] = v[4] = r->dst.x + op->dst.x; 989 v[0] = v[4] + w; 990 991 v[9] = r->dst.y + op->dst.y; 992 v[5] = v[1] = v[9] + h; 993 994 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 995 v[2] = v[6] + w * op->mask.scale[0]; 996 997 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 998 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 999} 1000 1001#if defined(sse2) && !defined(__x86_64__) 1002sse2 fastcall static void 1003gen2_emit_composite_primitive_constant__sse2(struct sna *sna, 1004 const struct sna_composite_op *op, 1005 const struct sna_composite_rectangles *r) 1006{ 1007 int16_t dst_x = r->dst.x + op->dst.x; 1008 int16_t dst_y = r->dst.y + op->dst.y; 1009 1010 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 1011 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 1012 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 1013} 1014 1015sse2 fastcall static void 1016gen2_emit_composite_primitive_linear__sse2(struct sna *sna, 1017 const struct sna_composite_op *op, 1018 const struct sna_composite_rectangles *r) 1019{ 1020 int16_t dst_x = r->dst.x + op->dst.x; 1021 int16_t dst_y = r->dst.y + op->dst.y; 1022 1023 gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 1024 gen2_emit_composite_linear(sna, &op->src, 1025 r->src.x + r->width, r->src.y + r->height); 1026 1027 gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 1028 gen2_emit_composite_linear(sna, &op->src, 1029 r->src.x, r->src.y + r->height); 1030 1031 gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 1032 gen2_emit_composite_linear(sna, &op->src, 1033 r->src.x, r->src.y); 1034} 1035 1036sse2 fastcall static void 1037gen2_emit_composite_primitive_identity__sse2(struct sna *sna, 1038 const struct sna_composite_op *op, 1039 const struct sna_composite_rectangles *r) 1040{ 1041 float w = r->width; 1042 float h = r->height; 1043 float *v; 1044 1045 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1046 sna->kgem.nbatch += 12; 1047 1048 v[8] = v[4] = r->dst.x + op->dst.x; 1049 v[0] = v[4] + w; 1050 1051 v[9] = r->dst.y + op->dst.y; 1052 v[5] = v[1] = v[9] + h; 1053 1054 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1055 v[2] = v[6] + w * op->src.scale[0]; 1056 1057 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1058 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1059} 1060 1061sse2 fastcall static void 1062gen2_emit_composite_primitive_affine__sse2(struct sna *sna, 1063 const struct sna_composite_op *op, 1064 const struct sna_composite_rectangles *r) 1065{ 1066 PictTransform *transform = op->src.transform; 1067 int src_x = r->src.x + (int)op->src.offset[0]; 1068 int src_y = r->src.y + (int)op->src.offset[1]; 1069 float *v; 1070 1071 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1072 sna->kgem.nbatch += 12; 1073 1074 v[8] = v[4] = r->dst.x + op->dst.x; 1075 v[0] = v[4] + r->width; 1076 1077 v[9] = r->dst.y + op->dst.y; 1078 v[5] = v[1] = v[9] + r->height; 1079 1080 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1081 transform, op->src.scale, 1082 &v[2], &v[3]); 1083 1084 _sna_get_transformed_scaled(src_x, src_y + r->height, 1085 transform, op->src.scale, 1086 &v[6], &v[7]); 1087 1088 _sna_get_transformed_scaled(src_x, src_y, 1089 transform, op->src.scale, 1090 &v[10], &v[11]); 1091} 1092 1093sse2 fastcall static void 1094gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1095 const struct sna_composite_op *op, 1096 const struct sna_composite_rectangles *r) 1097{ 1098 float w = r->width; 1099 float h = r->height; 1100 float *v; 1101 1102 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1103 sna->kgem.nbatch += 12; 1104 1105 v[8] = v[4] = r->dst.x + op->dst.x; 1106 v[0] = v[4] + w; 1107 1108 v[9] = r->dst.y + op->dst.y; 1109 v[5] = v[1] = v[9] + h; 1110 1111 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1112 v[2] = v[6] + w * op->mask.scale[0]; 1113 1114 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1115 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1116} 1117#endif 1118 1119static void gen2_magic_ca_pass(struct sna *sna, 1120 const struct sna_composite_op *op) 1121{ 1122 uint32_t ablend, cblend, *src, *dst; 1123 int n; 1124 1125 if (!op->need_magic_ca_pass) 1126 return; 1127 1128 DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__, 1129 sna->kgem.nbatch, sna->render.vertex_offset)); 1130 1131 assert(op->mask.bo); 1132 assert(op->has_component_alpha); 1133 1134 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0); 1135 BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT | 1136 BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT | 1137 S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 1138 S8_ENABLE_COLOR_BUFFER_WRITE); 1139 sna->render_state.gen2.ls1 = 0; 1140 1141 gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend); 1142 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 1143 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 1144 BATCH(cblend); 1145 BATCH(ablend); 1146 sna->render_state.gen2.ls2 = 0; 1147 1148 src = sna->kgem.batch + sna->render.vertex_offset; 1149 dst = sna->kgem.batch + sna->kgem.nbatch; 1150 n = 1 + sna->render.vertex_index; 1151 sna->kgem.nbatch += n; 1152 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 1153 while (n--) 1154 *dst++ = *src++; 1155} 1156 1157static void gen2_vertex_flush(struct sna *sna, 1158 const struct sna_composite_op *op) 1159{ 1160 if (sna->render.vertex_index == 0) 1161 return; 1162 1163 sna->kgem.batch[sna->render.vertex_offset] |= 1164 sna->render.vertex_index - 1; 1165 1166 gen2_magic_ca_pass(sna, op); 1167 1168 sna->render.vertex_offset = 0; 1169 sna->render.vertex_index = 0; 1170} 1171 1172inline static int gen2_get_rectangles(struct sna *sna, 1173 const struct sna_composite_op *op, 1174 int want) 1175{ 1176 int rem = batch_space(sna), size, need; 1177 1178 DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n", 1179 __FUNCTION__, want, op->floats_per_vertex, rem)); 1180 1181 assert(op->floats_per_vertex); 1182 assert(op->floats_per_rect == 3 * op->floats_per_vertex); 1183 1184 need = 1; 1185 size = op->floats_per_rect; 1186 if (op->need_magic_ca_pass) 1187 need += 6 + size*sna->render.vertex_index, size *= 2; 1188 1189 DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n", 1190 __FUNCTION__, want, need, size, rem)); 1191 if (rem < need + size) { 1192 gen2_vertex_flush(sna, op); 1193 kgem_submit(&sna->kgem); 1194 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1195 return 0; 1196 } 1197 1198 rem -= need; 1199 if (sna->render.vertex_offset == 0) { 1200 if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) == 1201 (PRIM3D_INLINE | PRIM3D_RECTLIST)) { 1202 uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1]; 1203 assert(*b & 0xffff); 1204 sna->render.vertex_index = 1 + (*b & 0xffff); 1205 *b = PRIM3D_INLINE | PRIM3D_RECTLIST; 1206 sna->render.vertex_offset = sna->kgem.nbatch - 1; 1207 assert(!op->need_magic_ca_pass); 1208 } else { 1209 sna->render.vertex_offset = sna->kgem.nbatch; 1210 BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); 1211 } 1212 1213 need = 0; 1214 } else 1215 need = sna->kgem.nbatch - sna->render.vertex_offset; 1216 1217 if (rem > MAX_INLINE - need) 1218 rem = MAX_INLINE -need; 1219 1220 if (want > 1 && want * size > rem) 1221 want = rem / size; 1222 1223 assert(want); 1224 sna->render.vertex_index += want*op->floats_per_rect; 1225 return want; 1226} 1227 1228fastcall static void 1229gen2_render_composite_blt(struct sna *sna, 1230 const struct sna_composite_op *op, 1231 const struct sna_composite_rectangles *r) 1232{ 1233 if (!gen2_get_rectangles(sna, op, 1)) { 1234 gen2_emit_composite_state(sna, op); 1235 gen2_get_rectangles(sna, op, 1); 1236 } 1237 1238 op->prim_emit(sna, op, r); 1239} 1240 1241fastcall static void 1242gen2_render_composite_box(struct sna *sna, 1243 const struct sna_composite_op *op, 1244 const BoxRec *box) 1245{ 1246 struct sna_composite_rectangles r; 1247 1248 if (!gen2_get_rectangles(sna, op, 1)) { 1249 gen2_emit_composite_state(sna, op); 1250 gen2_get_rectangles(sna, op, 1); 1251 } 1252 1253 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1254 box->x1, box->y1, 1255 box->x2 - box->x1, 1256 box->y2 - box->y1)); 1257 1258 r.dst.x = box->x1; r.dst.y = box->y1; 1259 r.width = box->x2 - box->x1; 1260 r.height = box->y2 - box->y1; 1261 r.src = r.mask = r.dst; 1262 1263 op->prim_emit(sna, op, &r); 1264} 1265 1266static void 1267gen2_render_composite_boxes(struct sna *sna, 1268 const struct sna_composite_op *op, 1269 const BoxRec *box, int nbox) 1270{ 1271 do { 1272 int nbox_this_time; 1273 1274 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1275 if (nbox_this_time == 0) { 1276 gen2_emit_composite_state(sna, op); 1277 nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1278 } 1279 nbox -= nbox_this_time; 1280 1281 do { 1282 struct sna_composite_rectangles r; 1283 1284 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1285 box->x1, box->y1, 1286 box->x2 - box->x1, 1287 box->y2 - box->y1)); 1288 1289 r.dst.x = box->x1; r.dst.y = box->y1; 1290 r.width = box->x2 - box->x1; 1291 r.height = box->y2 - box->y1; 1292 r.src = r.mask = r.dst; 1293 1294 op->prim_emit(sna, op, &r); 1295 box++; 1296 } while (--nbox_this_time); 1297 } while (nbox); 1298} 1299 1300static void gen2_render_composite_done(struct sna *sna, 1301 const struct sna_composite_op *op) 1302{ 1303 gen2_vertex_flush(sna, op); 1304 1305 if (op->mask.bo) 1306 kgem_bo_destroy(&sna->kgem, op->mask.bo); 1307 if (op->src.bo) 1308 kgem_bo_destroy(&sna->kgem, op->src.bo); 1309 sna_render_composite_redirect_done(sna, op); 1310} 1311 1312static bool 1313gen2_composite_solid_init(struct sna *sna, 1314 struct sna_composite_channel *channel, 1315 uint32_t color) 1316{ 1317 channel->filter = PictFilterNearest; 1318 channel->repeat = RepeatNormal; 1319 channel->is_solid = true; 1320 channel->is_affine = true; 1321 channel->width = 1; 1322 channel->height = 1; 1323 channel->pict_format = PICT_a8r8g8b8; 1324 1325 channel->bo = NULL; 1326 channel->u.gen2.pixel = color; 1327 1328 channel->scale[0] = channel->scale[1] = 1; 1329 channel->offset[0] = channel->offset[1] = 0; 1330 return true; 1331} 1332 1333#define xFixedToDouble(f) pixman_fixed_to_double(f) 1334 1335static bool 1336gen2_composite_linear_init(struct sna *sna, 1337 PicturePtr picture, 1338 struct sna_composite_channel *channel, 1339 int x, int y, 1340 int w, int h, 1341 int dst_x, int dst_y) 1342{ 1343 PictLinearGradient *linear = 1344 (PictLinearGradient *)picture->pSourcePict; 1345 pixman_fixed_t tx, ty; 1346 float x0, y0, sf; 1347 float dx, dy; 1348 1349 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 1350 __FUNCTION__, 1351 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 1352 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 1353 1354 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 1355 return 0; 1356 1357 if (!sna_transform_is_affine(picture->transform)) { 1358 DBG(("%s: fallback due to projective transform\n", 1359 __FUNCTION__)); 1360 return sna_render_picture_fixup(sna, picture, channel, 1361 x, y, w, h, dst_x, dst_y); 1362 } 1363 1364 channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); 1365 if (!channel->bo) 1366 return 0; 1367 1368 channel->filter = PictFilterNearest; 1369 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1370 channel->is_linear = true; 1371 channel->width = channel->bo->pitch / 4; 1372 channel->height = 1; 1373 channel->pict_format = PICT_a8r8g8b8; 1374 1375 channel->scale[0] = channel->scale[1] = 1; 1376 channel->offset[0] = channel->offset[1] = 0; 1377 1378 if (sna_transform_is_translation(picture->transform, &tx, &ty)) { 1379 dx = xFixedToDouble(linear->p2.x - linear->p1.x); 1380 dy = xFixedToDouble(linear->p2.y - linear->p1.y); 1381 1382 x0 = xFixedToDouble(linear->p1.x); 1383 y0 = xFixedToDouble(linear->p1.y); 1384 1385 if (tx | ty) { 1386 x0 -= pixman_fixed_to_double(tx); 1387 y0 -= pixman_fixed_to_double(ty); 1388 } 1389 } else { 1390 struct pixman_f_vector p1, p2; 1391 struct pixman_f_transform m, inv; 1392 1393 pixman_f_transform_from_pixman_transform(&m, picture->transform); 1394 DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", 1395 __FUNCTION__, 1396 m.m[0][0], m.m[0][1], m.m[0][2], 1397 m.m[1][0], m.m[1][1], m.m[1][2], 1398 m.m[2][0], m.m[2][1], m.m[2][2])); 1399 if (!pixman_f_transform_invert(&inv, &m)) 1400 return 0; 1401 1402 p1.v[0] = pixman_fixed_to_double(linear->p1.x); 1403 p1.v[1] = pixman_fixed_to_double(linear->p1.y); 1404 p1.v[2] = 1.; 1405 pixman_f_transform_point(&inv, &p1); 1406 1407 p2.v[0] = pixman_fixed_to_double(linear->p2.x); 1408 p2.v[1] = pixman_fixed_to_double(linear->p2.y); 1409 p2.v[2] = 1.; 1410 pixman_f_transform_point(&inv, &p2); 1411 1412 DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", 1413 __FUNCTION__, 1414 p1.v[0], p1.v[1], p1.v[2], 1415 p2.v[0], p2.v[1], p2.v[2])); 1416 1417 dx = p2.v[0] - p1.v[0]; 1418 dy = p2.v[1] - p1.v[1]; 1419 1420 x0 = p1.v[0]; 1421 y0 = p1.v[1]; 1422 } 1423 1424 sf = dx*dx + dy*dy; 1425 dx /= sf; 1426 dy /= sf; 1427 1428 channel->u.linear.dx = dx; 1429 channel->u.linear.dy = dy; 1430 channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); 1431 1432 DBG(("%s: dx=%f, dy=%f, offset=%f\n", 1433 __FUNCTION__, dx, dy, channel->u.linear.offset)); 1434 1435 return channel->bo != NULL; 1436} 1437 1438static bool source_is_covered(PicturePtr picture, 1439 int x, int y, 1440 int width, int height) 1441{ 1442 int x1, y1, x2, y2; 1443 1444 if (picture->repeat && picture->repeatType != RepeatNone) 1445 return true; 1446 1447 if (picture->pDrawable == NULL) 1448 return false; 1449 1450 if (picture->transform) { 1451 pixman_box16_t sample; 1452 1453 sample.x1 = x; 1454 sample.y1 = y; 1455 sample.x2 = x + width; 1456 sample.y2 = y + height; 1457 1458 pixman_transform_bounds(picture->transform, &sample); 1459 1460 x1 = sample.x1; 1461 x2 = sample.x2; 1462 y1 = sample.y1; 1463 y2 = sample.y2; 1464 } else { 1465 x1 = x; 1466 y1 = y; 1467 x2 = x + width; 1468 y2 = y + height; 1469 } 1470 1471 return 1472 x1 >= 0 && y1 >= 0 && 1473 x2 <= picture->pDrawable->width && 1474 y2 <= picture->pDrawable->height; 1475} 1476 1477static bool 1478gen2_check_card_format(struct sna *sna, 1479 PicturePtr picture, 1480 struct sna_composite_channel *channel, 1481 int x, int y, int w, int h, 1482 bool *fixup_alpha) 1483{ 1484 uint32_t format = picture->format; 1485 unsigned int i; 1486 1487 for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { 1488 if (i8xx_tex_formats[i].fmt == format) 1489 return true; 1490 } 1491 1492 for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { 1493 if (i85x_tex_formats[i].fmt == format) { 1494 if (sna->kgem.gen >= 021) 1495 return true; 1496 1497 if (source_is_covered(picture, x, y, w,h)) { 1498 channel->is_opaque = true; 1499 return true; 1500 } 1501 1502 *fixup_alpha = true; 1503 return false; 1504 } 1505 } 1506 1507 *fixup_alpha = false; 1508 return false; 1509} 1510 1511static int 1512gen2_composite_picture(struct sna *sna, 1513 PicturePtr picture, 1514 struct sna_composite_channel *channel, 1515 int x, int y, 1516 int w, int h, 1517 int dst_x, int dst_y, 1518 bool precise) 1519{ 1520 PixmapPtr pixmap; 1521 uint32_t color; 1522 int16_t dx, dy; 1523 bool fixup_alpha; 1524 1525 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1526 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1527 1528 channel->is_solid = false; 1529 channel->is_linear = false; 1530 channel->is_opaque = false; 1531 channel->is_affine = true; 1532 channel->transform = NULL; 1533 channel->card_format = -1; 1534 1535 if (sna_picture_is_solid(picture, &color)) 1536 return gen2_composite_solid_init(sna, channel, color); 1537 1538 if (!gen2_check_repeat(picture)) { 1539 DBG(("%s -- fallback, unhandled repeat %d\n", 1540 __FUNCTION__, picture->repeat)); 1541 return sna_render_picture_fixup(sna, picture, channel, 1542 x, y, w, h, dst_x, dst_y); 1543 } 1544 1545 if (!gen2_check_filter(picture)) { 1546 DBG(("%s -- fallback, unhandled filter %d\n", 1547 __FUNCTION__, picture->filter)); 1548 return sna_render_picture_fixup(sna, picture, channel, 1549 x, y, w, h, dst_x, dst_y); 1550 } 1551 1552 if (picture->pDrawable == NULL) { 1553 int ret; 1554 1555 if (picture->pSourcePict->type == SourcePictTypeLinear) 1556 return gen2_composite_linear_init(sna, picture, channel, 1557 x, y, 1558 w, h, 1559 dst_x, dst_y); 1560 1561 DBG(("%s -- fallback, unhandled source %d\n", 1562 __FUNCTION__, picture->pSourcePict->type)); 1563 ret = -1; 1564 if (!precise) 1565 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1566 x, y, w, h, dst_x, dst_y); 1567 if (ret == -1) 1568 ret = sna_render_picture_fixup(sna, picture, channel, 1569 x, y, w, h, dst_x, dst_y); 1570 return ret; 1571 } 1572 1573 if (picture->alphaMap) { 1574 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1575 return sna_render_picture_fixup(sna, picture, channel, 1576 x, y, w, h, dst_x, dst_y); 1577 } 1578 1579 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1580 channel->filter = picture->filter; 1581 1582 pixmap = get_drawable_pixmap(picture->pDrawable); 1583 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1584 1585 x += dx + picture->pDrawable->x; 1586 y += dy + picture->pDrawable->y; 1587 1588 channel->is_affine = sna_transform_is_affine(picture->transform); 1589 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1590 DBG(("%s: integer translation (%d, %d), removing\n", 1591 __FUNCTION__, dx, dy)); 1592 x += dx; 1593 y += dy; 1594 channel->transform = NULL; 1595 channel->filter = PictFilterNearest; 1596 1597 if (channel->repeat && 1598 (x >= 0 && 1599 y >= 0 && 1600 x + w <= pixmap->drawable.width && 1601 y + h <= pixmap->drawable.height)) { 1602 struct sna_pixmap *priv = sna_pixmap(pixmap); 1603 if (priv && priv->clear) { 1604 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1605 return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color)); 1606 } 1607 } 1608 } else 1609 channel->transform = picture->transform; 1610 1611 if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha)) 1612 return sna_render_picture_convert(sna, picture, channel, pixmap, 1613 x, y, w, h, dst_x, dst_y, fixup_alpha); 1614 1615 channel->pict_format = picture->format; 1616 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1617 return sna_render_picture_extract(sna, picture, channel, 1618 x, y, w, h, dst_x, dst_y); 1619 1620 return sna_render_pixmap_bo(sna, channel, pixmap, 1621 x, y, w, h, dst_x, dst_y); 1622} 1623 1624static bool 1625gen2_composite_set_target(struct sna *sna, 1626 struct sna_composite_op *op, 1627 PicturePtr dst, 1628 int x, int y, int w, int h, 1629 bool partial) 1630{ 1631 BoxRec box; 1632 unsigned hint; 1633 1634 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1635 op->dst.format = dst->format; 1636 op->dst.width = op->dst.pixmap->drawable.width; 1637 op->dst.height = op->dst.pixmap->drawable.height; 1638 1639 if (w && h) { 1640 box.x1 = x; 1641 box.y1 = y; 1642 box.x2 = x + w; 1643 box.y2 = y + h; 1644 } else 1645 sna_render_picture_extents(dst, &box); 1646 1647 hint = PREFER_GPU | RENDER_GPU; 1648 if (!need_tiling(sna, op->dst.width, op->dst.height)) 1649 hint |= FORCE_GPU; 1650 if (!partial) { 1651 hint |= IGNORE_DAMAGE; 1652 if (w == op->dst.width && h == op->dst.height) 1653 hint |= REPLACES; 1654 } 1655 1656 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1657 if (op->dst.bo == NULL) 1658 return false; 1659 1660 if (hint & REPLACES) { 1661 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1662 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1663 } 1664 1665 assert((op->dst.bo->pitch & 7) == 0); 1666 1667 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1668 &op->dst.x, &op->dst.y); 1669 1670 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1671 __FUNCTION__, 1672 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1673 op->dst.width, op->dst.height, 1674 op->dst.bo->pitch, 1675 op->dst.x, op->dst.y, 1676 op->damage ? *op->damage : (void *)-1)); 1677 1678 assert(op->dst.bo->proxy == NULL); 1679 1680 if (((too_large(op->dst.width, op->dst.height) || 1681 op->dst.bo->pitch > MAX_3D_PITCH)) && 1682 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1683 return false; 1684 1685 return true; 1686} 1687 1688static bool 1689is_unhandled_gradient(PicturePtr picture, bool precise) 1690{ 1691 if (picture->pDrawable) 1692 return false; 1693 1694 switch (picture->pSourcePict->type) { 1695 case SourcePictTypeSolidFill: 1696 case SourcePictTypeLinear: 1697 return false; 1698 default: 1699 return precise; 1700 } 1701} 1702 1703static bool 1704has_alphamap(PicturePtr p) 1705{ 1706 return p->alphaMap != NULL; 1707} 1708 1709static bool 1710need_upload(PicturePtr p) 1711{ 1712 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1713} 1714 1715static bool 1716source_is_busy(PixmapPtr pixmap) 1717{ 1718 struct sna_pixmap *priv = sna_pixmap(pixmap); 1719 if (priv == NULL) 1720 return false; 1721 1722 if (priv->clear) 1723 return false; 1724 1725 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1726 return true; 1727 1728 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1729 return true; 1730 1731 return priv->gpu_damage && !priv->cpu_damage; 1732} 1733 1734static bool 1735source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 1736{ 1737 if (sna_picture_is_solid(p, NULL)) 1738 return false; 1739 1740 if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p)) 1741 return true; 1742 1743 if (pixmap && source_is_busy(pixmap)) 1744 return false; 1745 1746 return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p); 1747} 1748 1749static bool 1750gen2_composite_fallback(struct sna *sna, 1751 PicturePtr src, 1752 PicturePtr mask, 1753 PicturePtr dst) 1754{ 1755 PixmapPtr src_pixmap; 1756 PixmapPtr mask_pixmap; 1757 PixmapPtr dst_pixmap; 1758 bool src_fallback, mask_fallback; 1759 1760 if (!gen2_check_dst_format(dst->format)) { 1761 DBG(("%s: unknown destination format: %d\n", 1762 __FUNCTION__, dst->format)); 1763 return true; 1764 } 1765 1766 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1767 1768 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1769 src_fallback = source_fallback(src, src_pixmap, 1770 dst->polyMode == PolyModePrecise); 1771 1772 if (mask) { 1773 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1774 mask_fallback = source_fallback(mask, mask_pixmap, 1775 dst->polyMode == PolyModePrecise); 1776 } else { 1777 mask_pixmap = NULL; 1778 mask_fallback = NULL; 1779 } 1780 1781 /* If we are using the destination as a source and need to 1782 * readback in order to upload the source, do it all 1783 * on the cpu. 1784 */ 1785 if (src_pixmap == dst_pixmap && src_fallback) { 1786 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1787 return true; 1788 } 1789 if (mask_pixmap == dst_pixmap && mask_fallback) { 1790 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1791 return true; 1792 } 1793 1794 /* If anything is on the GPU, push everything out to the GPU */ 1795 if (dst_use_gpu(dst_pixmap)) { 1796 DBG(("%s: dst is already on the GPU, try to use GPU\n", 1797 __FUNCTION__)); 1798 return false; 1799 } 1800 1801 if (src_pixmap && !src_fallback) { 1802 DBG(("%s: src is already on the GPU, try to use GPU\n", 1803 __FUNCTION__)); 1804 return false; 1805 } 1806 if (mask_pixmap && !mask_fallback) { 1807 DBG(("%s: mask is already on the GPU, try to use GPU\n", 1808 __FUNCTION__)); 1809 return false; 1810 } 1811 1812 /* However if the dst is not on the GPU and we need to 1813 * render one of the sources using the CPU, we may 1814 * as well do the entire operation in place onthe CPU. 1815 */ 1816 if (src_fallback) { 1817 DBG(("%s: dst is on the CPU and src will fallback\n", 1818 __FUNCTION__)); 1819 return true; 1820 } 1821 1822 if (mask && mask_fallback) { 1823 DBG(("%s: dst is on the CPU and mask will fallback\n", 1824 __FUNCTION__)); 1825 return true; 1826 } 1827 1828 if (too_large(dst_pixmap->drawable.width, 1829 dst_pixmap->drawable.height) && 1830 dst_is_cpu(dst_pixmap)) { 1831 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1832 return true; 1833 } 1834 1835 DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1836 __FUNCTION__)); 1837 return dst_use_cpu(dst_pixmap); 1838} 1839 1840static int 1841reuse_source(struct sna *sna, 1842 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1843 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1844{ 1845 uint32_t color; 1846 1847 if (src_x != msk_x || src_y != msk_y) 1848 return false; 1849 1850 if (sna_picture_is_solid(mask, &color)) 1851 return gen2_composite_solid_init(sna, mc, color); 1852 1853 if (sc->is_solid) 1854 return false; 1855 1856 if (src == mask) { 1857 DBG(("%s: mask is source\n", __FUNCTION__)); 1858 *mc = *sc; 1859 mc->bo = kgem_bo_reference(mc->bo); 1860 return true; 1861 } 1862 1863 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1864 return false; 1865 1866 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1867 1868 if (!sna_transform_equal(src->transform, mask->transform)) 1869 return false; 1870 1871 if (!sna_picture_alphamap_equal(src, mask)) 1872 return false; 1873 1874 if (!gen2_check_repeat(mask)) 1875 return false; 1876 1877 if (!gen2_check_filter(mask)) 1878 return false; 1879 1880 if (!gen2_check_format(sna, mask)) 1881 return false; 1882 1883 DBG(("%s: reusing source channel for mask with a twist\n", 1884 __FUNCTION__)); 1885 1886 *mc = *sc; 1887 mc->repeat = mask->repeat ? mask->repeatType : RepeatNone; 1888 mc->filter = mask->filter; 1889 mc->pict_format = mask->format; 1890 mc->bo = kgem_bo_reference(mc->bo); 1891 return true; 1892} 1893 1894static bool 1895gen2_render_composite(struct sna *sna, 1896 uint8_t op, 1897 PicturePtr src, 1898 PicturePtr mask, 1899 PicturePtr dst, 1900 int16_t src_x, int16_t src_y, 1901 int16_t mask_x, int16_t mask_y, 1902 int16_t dst_x, int16_t dst_y, 1903 int16_t width, int16_t height, 1904 unsigned flags, 1905 struct sna_composite_op *tmp) 1906{ 1907 DBG(("%s()\n", __FUNCTION__)); 1908 1909 if (op >= ARRAY_SIZE(gen2_blend_op)) { 1910 DBG(("%s: fallback due to unhandled blend op: %d\n", 1911 __FUNCTION__, op)); 1912 return false; 1913 } 1914 1915 if (mask == NULL && 1916 sna_blt_composite(sna, op, src, dst, 1917 src_x, src_y, 1918 dst_x, dst_y, 1919 width, height, 1920 flags, tmp)) 1921 return true; 1922 1923 if (gen2_composite_fallback(sna, src, mask, dst)) 1924 goto fallback; 1925 1926 if (need_tiling(sna, width, height)) 1927 return sna_tiling_composite(op, src, mask, dst, 1928 src_x, src_y, 1929 mask_x, mask_y, 1930 dst_x, dst_y, 1931 width, height, 1932 tmp); 1933 1934 tmp->op = op; 1935 sna_render_composite_redirect_init(tmp); 1936 1937 if (!gen2_composite_set_target(sna, tmp, dst, 1938 dst_x, dst_y, width, height, 1939 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 1940 DBG(("%s: unable to set render target\n", 1941 __FUNCTION__)); 1942 goto fallback; 1943 } 1944 1945 switch (gen2_composite_picture(sna, src, &tmp->src, 1946 src_x, src_y, 1947 width, height, 1948 dst_x, dst_y, 1949 dst->polyMode == PolyModePrecise)) { 1950 case -1: 1951 DBG(("%s: fallback -- unable to prepare source\n", 1952 __FUNCTION__)); 1953 goto cleanup_dst; 1954 case 0: 1955 gen2_composite_solid_init(sna, &tmp->src, 0); 1956 break; 1957 case 1: 1958 if (mask == NULL && tmp->src.bo && 1959 sna_blt_composite__convert(sna, 1960 dst_x, dst_y, width, height, 1961 tmp)) 1962 return true; 1963 break; 1964 } 1965 1966 if (mask) { 1967 if (!reuse_source(sna, 1968 src, &tmp->src, src_x, src_y, 1969 mask, &tmp->mask, mask_x, mask_y)) { 1970 switch (gen2_composite_picture(sna, mask, &tmp->mask, 1971 mask_x, mask_y, 1972 width, height, 1973 dst_x, dst_y, 1974 dst->polyMode == PolyModePrecise)) { 1975 case -1: 1976 DBG(("%s: fallback -- unable to prepare mask\n", 1977 __FUNCTION__)); 1978 goto cleanup_src; 1979 case 0: 1980 gen2_composite_solid_init(sna, &tmp->mask, 0); 1981 case 1: 1982 break; 1983 } 1984 } 1985 1986 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1987 /* Check if it's component alpha that relies on a source alpha 1988 * and on the source value. We can only get one of those 1989 * into the single source value that we get to blend with. 1990 */ 1991 tmp->has_component_alpha = true; 1992 if (gen2_blend_op[op].src_alpha && 1993 (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 1994 if (op != PictOpOver) { 1995 DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", 1996 __FUNCTION__, 1997 gen2_blend_op[op].src_blend)); 1998 goto cleanup_src; 1999 } 2000 2001 tmp->need_magic_ca_pass = true; 2002 tmp->op = PictOpOutReverse; 2003 } 2004 } 2005 2006 /* convert solid to a texture (pure convenience) */ 2007 if (tmp->mask.is_solid && tmp->src.is_solid) { 2008 assert(tmp->mask.is_affine); 2009 tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel); 2010 if (!tmp->mask.bo) 2011 goto cleanup_src; 2012 } 2013 } 2014 2015 tmp->floats_per_vertex = 2; 2016 if (!tmp->src.is_solid) 2017 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3; 2018 if (tmp->mask.bo) 2019 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3; 2020 tmp->floats_per_rect = 3*tmp->floats_per_vertex; 2021 2022 tmp->prim_emit = gen2_emit_composite_primitive; 2023 if (tmp->mask.bo) { 2024 if (tmp->mask.transform == NULL) { 2025 if (tmp->src.is_solid) { 2026 assert(tmp->floats_per_rect == 12); 2027#if defined(sse2) && !defined(__x86_64__) 2028 if (sna->cpu_features & SSE2) { 2029 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2; 2030 } else 2031#endif 2032 { 2033 tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask; 2034 } 2035 } 2036 } 2037 } else { 2038 if (tmp->src.is_solid) { 2039 assert(tmp->floats_per_rect == 6); 2040#if defined(sse2) && !defined(__x86_64__) 2041 if (sna->cpu_features & SSE2) { 2042 tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2; 2043 } else 2044#endif 2045 { 2046 tmp->prim_emit = gen2_emit_composite_primitive_constant; 2047 } 2048 } else if (tmp->src.is_linear) { 2049 assert(tmp->floats_per_rect == 12); 2050#if defined(sse2) && !defined(__x86_64__) 2051 if (sna->cpu_features & SSE2) { 2052 tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2; 2053 } else 2054#endif 2055 { 2056 tmp->prim_emit = gen2_emit_composite_primitive_linear; 2057 } 2058 } else if (tmp->src.transform == NULL) { 2059 assert(tmp->floats_per_rect == 12); 2060#if defined(sse2) && !defined(__x86_64__) 2061 if (sna->cpu_features & SSE2) { 2062 tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2; 2063 } else 2064#endif 2065 { 2066 tmp->prim_emit = gen2_emit_composite_primitive_identity; 2067 } 2068 } else if (tmp->src.is_affine) { 2069 assert(tmp->floats_per_rect == 12); 2070 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2071 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2072#if defined(sse2) && !defined(__x86_64__) 2073 if (sna->cpu_features & SSE2) { 2074 tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2; 2075 } else 2076#endif 2077 { 2078 tmp->prim_emit = gen2_emit_composite_primitive_affine; 2079 } 2080 } 2081 } 2082 2083 tmp->blt = gen2_render_composite_blt; 2084 tmp->box = gen2_render_composite_box; 2085 tmp->boxes = gen2_render_composite_boxes; 2086 tmp->done = gen2_render_composite_done; 2087 2088 if (!kgem_check_bo(&sna->kgem, 2089 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2090 NULL)) { 2091 kgem_submit(&sna->kgem); 2092 if (!kgem_check_bo(&sna->kgem, 2093 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2094 NULL)) { 2095 DBG(("%s: fallback, operation does not fit into GTT\n", 2096 __FUNCTION__)); 2097 goto cleanup_mask; 2098 } 2099 } 2100 2101 gen2_emit_composite_state(sna, tmp); 2102 return true; 2103 2104cleanup_mask: 2105 if (tmp->mask.bo) { 2106 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2107 tmp->mask.bo = NULL; 2108 } 2109cleanup_src: 2110 if (tmp->src.bo) { 2111 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2112 tmp->src.bo = NULL; 2113 } 2114cleanup_dst: 2115 if (tmp->redirect.real_bo) { 2116 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2117 tmp->redirect.real_bo = NULL; 2118 } 2119fallback: 2120 return (mask == NULL && 2121 sna_blt_composite(sna, op, src, dst, 2122 src_x, src_y, 2123 dst_x, dst_y, 2124 width, height, 2125 flags | COMPOSITE_FALLBACK, tmp)); 2126} 2127 2128fastcall static void 2129gen2_emit_composite_spans_primitive_constant(struct sna *sna, 2130 const struct sna_composite_spans_op *op, 2131 const BoxRec *box, 2132 float opacity) 2133{ 2134 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2135 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2136 sna->kgem.nbatch += 9; 2137 2138 v[0] = op->base.dst.x + box->x2; 2139 v[1] = op->base.dst.y + box->y2; 2140 *((uint32_t *)v + 2) = alpha; 2141 2142 v[3] = op->base.dst.x + box->x1; 2143 v[4] = v[1]; 2144 *((uint32_t *)v + 5) = alpha; 2145 2146 v[6] = v[3]; 2147 v[7] = op->base.dst.y + box->y1; 2148 *((uint32_t *)v + 8) = alpha; 2149} 2150 2151fastcall static void 2152gen2_emit_composite_spans_primitive_linear(struct sna *sna, 2153 const struct sna_composite_spans_op *op, 2154 const BoxRec *box, 2155 float opacity) 2156{ 2157 union { 2158 float f; 2159 uint32_t u; 2160 } alpha; 2161 2162 alpha.u = (uint8_t)(255 * opacity) << 24; 2163 2164 gen2_emit_composite_dstcoord(sna, 2165 op->base.dst.x + box->x2, 2166 op->base.dst.y + box->y2); 2167 VERTEX(alpha.f); 2168 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2169 2170 gen2_emit_composite_dstcoord(sna, 2171 op->base.dst.x + box->x1, 2172 op->base.dst.y + box->y2); 2173 VERTEX(alpha.f); 2174 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2175 2176 gen2_emit_composite_dstcoord(sna, 2177 op->base.dst.x + box->x1, 2178 op->base.dst.y + box->y1); 2179 VERTEX(alpha.f); 2180 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2181} 2182 2183fastcall static void 2184gen2_emit_composite_spans_primitive_identity_source(struct sna *sna, 2185 const struct sna_composite_spans_op *op, 2186 const BoxRec *box, 2187 float opacity) 2188{ 2189 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2190 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2191 sna->kgem.nbatch += 15; 2192 2193 v[0] = op->base.dst.x + box->x2; 2194 v[1] = op->base.dst.y + box->y2; 2195 *((uint32_t *)v + 2) = alpha; 2196 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2197 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2198 2199 v[5] = op->base.dst.x + box->x1; 2200 v[6] = v[1]; 2201 *((uint32_t *)v + 7) = alpha; 2202 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2203 v[9] = v[4]; 2204 2205 v[10] = v[5]; 2206 v[11] = op->base.dst.y + box->y1; 2207 *((uint32_t *)v + 12) = alpha; 2208 v[13] = v[8]; 2209 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2210} 2211 2212fastcall static void 2213gen2_emit_composite_spans_primitive_affine_source(struct sna *sna, 2214 const struct sna_composite_spans_op *op, 2215 const BoxRec *box, 2216 float opacity) 2217{ 2218 PictTransform *transform = op->base.src.transform; 2219 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2220 float *v; 2221 2222 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2223 sna->kgem.nbatch += 15; 2224 2225 v[0] = op->base.dst.x + box->x2; 2226 v[6] = v[1] = op->base.dst.y + box->y2; 2227 v[10] = v[5] = op->base.dst.x + box->x1; 2228 v[11] = op->base.dst.y + box->y1; 2229 *((uint32_t *)v + 2) = alpha; 2230 *((uint32_t *)v + 7) = alpha; 2231 *((uint32_t *)v + 12) = alpha; 2232 2233 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2234 (int)op->base.src.offset[1] + box->y2, 2235 transform, op->base.src.scale, 2236 &v[3], &v[4]); 2237 2238 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2239 (int)op->base.src.offset[1] + box->y2, 2240 transform, op->base.src.scale, 2241 &v[8], &v[9]); 2242 2243 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2244 (int)op->base.src.offset[1] + box->y1, 2245 transform, op->base.src.scale, 2246 &v[13], &v[14]); 2247} 2248 2249#if defined(sse2) && !defined(__x86_64__) 2250sse2 fastcall static void 2251gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 2252 const struct sna_composite_spans_op *op, 2253 const BoxRec *box, 2254 float opacity) 2255{ 2256 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2257 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2258 sna->kgem.nbatch += 9; 2259 2260 v[0] = op->base.dst.x + box->x2; 2261 v[1] = op->base.dst.y + box->y2; 2262 *((uint32_t *)v + 2) = alpha; 2263 2264 v[3] = op->base.dst.x + box->x1; 2265 v[4] = v[1]; 2266 *((uint32_t *)v + 5) = alpha; 2267 2268 v[6] = v[3]; 2269 v[7] = op->base.dst.y + box->y1; 2270 *((uint32_t *)v + 8) = alpha; 2271} 2272 2273sse2 fastcall static void 2274gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna, 2275 const struct sna_composite_spans_op *op, 2276 const BoxRec *box, 2277 float opacity) 2278{ 2279 union { 2280 float f; 2281 uint32_t u; 2282 } alpha; 2283 2284 alpha.u = (uint8_t)(255 * opacity) << 24; 2285 2286 gen2_emit_composite_dstcoord(sna, 2287 op->base.dst.x + box->x2, 2288 op->base.dst.y + box->y2); 2289 VERTEX(alpha.f); 2290 gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2291 2292 gen2_emit_composite_dstcoord(sna, 2293 op->base.dst.x + box->x1, 2294 op->base.dst.y + box->y2); 2295 VERTEX(alpha.f); 2296 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2297 2298 gen2_emit_composite_dstcoord(sna, 2299 op->base.dst.x + box->x1, 2300 op->base.dst.y + box->y1); 2301 VERTEX(alpha.f); 2302 gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2303} 2304 2305sse2 fastcall static void 2306gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 2307 const struct sna_composite_spans_op *op, 2308 const BoxRec *box, 2309 float opacity) 2310{ 2311 float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2312 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2313 sna->kgem.nbatch += 15; 2314 2315 v[0] = op->base.dst.x + box->x2; 2316 v[1] = op->base.dst.y + box->y2; 2317 *((uint32_t *)v + 2) = alpha; 2318 v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2319 v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2320 2321 v[5] = op->base.dst.x + box->x1; 2322 v[6] = v[1]; 2323 *((uint32_t *)v + 7) = alpha; 2324 v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2325 v[9] = v[4]; 2326 2327 v[10] = v[5]; 2328 v[11] = op->base.dst.y + box->y1; 2329 *((uint32_t *)v + 12) = alpha; 2330 v[13] = v[8]; 2331 v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2332} 2333 2334sse2 fastcall static void 2335gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 2336 const struct sna_composite_spans_op *op, 2337 const BoxRec *box, 2338 float opacity) 2339{ 2340 PictTransform *transform = op->base.src.transform; 2341 uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2342 float *v; 2343 2344 v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2345 sna->kgem.nbatch += 15; 2346 2347 v[0] = op->base.dst.x + box->x2; 2348 v[6] = v[1] = op->base.dst.y + box->y2; 2349 v[10] = v[5] = op->base.dst.x + box->x1; 2350 v[11] = op->base.dst.y + box->y1; 2351 *((uint32_t *)v + 2) = alpha; 2352 *((uint32_t *)v + 7) = alpha; 2353 *((uint32_t *)v + 12) = alpha; 2354 2355 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2356 (int)op->base.src.offset[1] + box->y2, 2357 transform, op->base.src.scale, 2358 &v[3], &v[4]); 2359 2360 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2361 (int)op->base.src.offset[1] + box->y2, 2362 transform, op->base.src.scale, 2363 &v[8], &v[9]); 2364 2365 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2366 (int)op->base.src.offset[1] + box->y1, 2367 transform, op->base.src.scale, 2368 &v[13], &v[14]); 2369} 2370#endif 2371 2372static void 2373gen2_emit_composite_spans_vertex(struct sna *sna, 2374 const struct sna_composite_spans_op *op, 2375 int16_t x, int16_t y, 2376 float opacity) 2377{ 2378 gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 2379 BATCH((uint8_t)(opacity * 255) << 24); 2380 assert(!op->base.src.is_solid); 2381 if (op->base.src.is_linear) 2382 gen2_emit_composite_linear(sna, &op->base.src, x, y); 2383 else 2384 gen2_emit_composite_texcoord(sna, &op->base.src, x, y); 2385} 2386 2387fastcall static void 2388gen2_emit_composite_spans_primitive(struct sna *sna, 2389 const struct sna_composite_spans_op *op, 2390 const BoxRec *box, 2391 float opacity) 2392{ 2393 gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity); 2394 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity); 2395 gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity); 2396} 2397 2398static void 2399gen2_emit_spans_pipeline(struct sna *sna, 2400 const struct sna_composite_spans_op *op) 2401{ 2402 uint32_t cblend, ablend; 2403 uint32_t unwind; 2404 2405 cblend = 2406 TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE | 2407 TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA | 2408 TB0C_OUTPUT_WRITE_CURRENT; 2409 ablend = 2410 TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE | 2411 TB0A_ARG1_SEL_DIFFUSE | 2412 TB0A_OUTPUT_WRITE_CURRENT; 2413 2414 if (op->base.src.is_solid) { 2415 ablend |= TB0A_ARG2_SEL_SPECULAR; 2416 cblend |= TB0C_ARG2_SEL_SPECULAR; 2417 if (op->base.dst.format == PICT_a8) 2418 cblend |= TB0C_ARG2_REPLICATE_ALPHA; 2419 } else if (op->base.dst.format == PICT_a8) { 2420 ablend |= TB0A_ARG2_SEL_TEXEL0; 2421 cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA; 2422 } else { 2423 if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0) 2424 cblend |= TB0C_ARG2_SEL_TEXEL0; 2425 else 2426 cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT; 2427 2428 if (op->base.src.is_opaque) 2429 ablend |= TB0A_ARG2_SEL_ONE; 2430 else 2431 ablend |= TB0A_ARG2_SEL_TEXEL0; 2432 } 2433 2434 unwind = sna->kgem.nbatch; 2435 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2436 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2437 BATCH(cblend); 2438 BATCH(ablend); 2439 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2440 sna->kgem.batch + unwind + 1, 2441 2 * sizeof(uint32_t)) == 0) 2442 sna->kgem.nbatch = unwind; 2443 else 2444 sna->render_state.gen2.ls2 = unwind; 2445} 2446 2447static void gen2_emit_composite_spans_state(struct sna *sna, 2448 const struct sna_composite_spans_op *op) 2449{ 2450 uint32_t unwind; 2451 2452 gen2_get_batch(sna, &op->base); 2453 gen2_emit_target(sna, 2454 op->base.dst.bo, 2455 op->base.dst.width, 2456 op->base.dst.height, 2457 op->base.dst.format); 2458 2459 unwind = sna->kgem.nbatch; 2460 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2461 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2462 BATCH(!op->base.src.is_solid << 12); 2463 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT); 2464 BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format)); 2465 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2466 sna->kgem.batch + unwind + 1, 2467 3 * sizeof(uint32_t)) == 0) 2468 sna->kgem.nbatch = unwind; 2469 else 2470 sna->render_state.gen2.ls1 = unwind; 2471 2472 gen2_disable_logic_op(sna); 2473 gen2_emit_spans_pipeline(sna, op); 2474 2475 if (op->base.src.is_solid) { 2476 if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) { 2477 BATCH(_3DSTATE_DFLT_SPECULAR_CMD); 2478 BATCH(op->base.src.u.gen2.pixel); 2479 sna->render_state.gen2.specular = op->base.src.u.gen2.pixel; 2480 } 2481 } else { 2482 uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD | 2483 (op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D); 2484 if (sna->render_state.gen2.vft != v) { 2485 BATCH(v); 2486 sna->render_state.gen2.vft = v; 2487 } 2488 gen2_emit_texture(sna, &op->base.src, 0); 2489 } 2490} 2491 2492fastcall static void 2493gen2_render_composite_spans_box(struct sna *sna, 2494 const struct sna_composite_spans_op *op, 2495 const BoxRec *box, float opacity) 2496{ 2497 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2498 __FUNCTION__, 2499 op->base.src.offset[0], op->base.src.offset[1], 2500 opacity, 2501 op->base.dst.x, op->base.dst.y, 2502 box->x1, box->y1, 2503 box->x2 - box->x1, 2504 box->y2 - box->y1)); 2505 2506 if (gen2_get_rectangles(sna, &op->base, 1) == 0) { 2507 gen2_emit_composite_spans_state(sna, op); 2508 gen2_get_rectangles(sna, &op->base, 1); 2509 } 2510 2511 op->prim_emit(sna, op, box, opacity); 2512} 2513 2514static void 2515gen2_render_composite_spans_boxes(struct sna *sna, 2516 const struct sna_composite_spans_op *op, 2517 const BoxRec *box, int nbox, 2518 float opacity) 2519{ 2520 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2521 __FUNCTION__, nbox, 2522 op->base.src.offset[0], op->base.src.offset[1], 2523 opacity, 2524 op->base.dst.x, op->base.dst.y)); 2525 2526 do { 2527 int nbox_this_time; 2528 2529 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2530 if (nbox_this_time == 0) { 2531 gen2_emit_composite_spans_state(sna, op); 2532 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2533 } 2534 nbox -= nbox_this_time; 2535 2536 do { 2537 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2538 box->x1, box->y1, 2539 box->x2 - box->x1, 2540 box->y2 - box->y1)); 2541 2542 op->prim_emit(sna, op, box++, opacity); 2543 } while (--nbox_this_time); 2544 } while (nbox); 2545} 2546 2547fastcall static void 2548gen2_render_composite_spans_done(struct sna *sna, 2549 const struct sna_composite_spans_op *op) 2550{ 2551 DBG(("%s()\n", __FUNCTION__)); 2552 2553 gen2_vertex_flush(sna, &op->base); 2554 2555 if (op->base.src.bo) 2556 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2557 2558 sna_render_composite_redirect_done(sna, &op->base); 2559} 2560 2561static bool 2562gen2_check_composite_spans(struct sna *sna, 2563 uint8_t op, PicturePtr src, PicturePtr dst, 2564 int16_t width, int16_t height, unsigned flags) 2565{ 2566 if (op >= ARRAY_SIZE(gen2_blend_op)) 2567 return false; 2568 2569 if (gen2_composite_fallback(sna, src, NULL, dst)) 2570 return false; 2571 2572 if (need_tiling(sna, width, height)) { 2573 if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2574 DBG(("%s: fallback, tiled operation not on GPU\n", 2575 __FUNCTION__)); 2576 return false; 2577 } 2578 } 2579 2580 return true; 2581} 2582 2583static bool 2584gen2_render_composite_spans(struct sna *sna, 2585 uint8_t op, 2586 PicturePtr src, 2587 PicturePtr dst, 2588 int16_t src_x, int16_t src_y, 2589 int16_t dst_x, int16_t dst_y, 2590 int16_t width, int16_t height, 2591 unsigned flags, 2592 struct sna_composite_spans_op *tmp) 2593{ 2594 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 2595 src_x, src_y, dst_x, dst_y, width, height)); 2596 2597 assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags)); 2598 if (need_tiling(sna, width, height)) { 2599 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2600 __FUNCTION__, width, height)); 2601 return sna_tiling_composite_spans(op, src, dst, 2602 src_x, src_y, dst_x, dst_y, 2603 width, height, flags, tmp); 2604 } 2605 2606 tmp->base.op = op; 2607 sna_render_composite_redirect_init(&tmp->base); 2608 if (!gen2_composite_set_target(sna, &tmp->base, dst, 2609 dst_x, dst_y, width, height, 2610 true)) { 2611 DBG(("%s: unable to set render target\n", 2612 __FUNCTION__)); 2613 return false; 2614 } 2615 2616 switch (gen2_composite_picture(sna, src, &tmp->base.src, 2617 src_x, src_y, 2618 width, height, 2619 dst_x, dst_y, 2620 dst->polyMode == PolyModePrecise)) { 2621 case -1: 2622 goto cleanup_dst; 2623 case 0: 2624 gen2_composite_solid_init(sna, &tmp->base.src, 0); 2625 case 1: 2626 break; 2627 } 2628 assert(tmp->base.src.bo || tmp->base.src.is_solid); 2629 2630 tmp->prim_emit = gen2_emit_composite_spans_primitive; 2631 tmp->base.floats_per_vertex = 3; 2632 if (tmp->base.src.is_solid) { 2633#if defined(sse2) && !defined(__x86_64__) 2634 if (sna->cpu_features & SSE2) { 2635 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2; 2636 } else 2637#endif 2638 { 2639 tmp->prim_emit = gen2_emit_composite_spans_primitive_constant; 2640 } 2641 } else if (tmp->base.src.is_linear) { 2642 tmp->base.floats_per_vertex += 2; 2643#if defined(sse2) && !defined(__x86_64__) 2644 if (sna->cpu_features & SSE2) { 2645 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2; 2646 } else 2647#endif 2648 { 2649 tmp->prim_emit = gen2_emit_composite_spans_primitive_linear; 2650 } 2651 } else { 2652 assert(tmp->base.src.bo); 2653 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 2654 if (tmp->base.src.transform == NULL) { 2655#if defined(sse2) && !defined(__x86_64__) 2656 if (sna->cpu_features & SSE2) { 2657 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2; 2658 } else 2659#endif 2660 { 2661 tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source; 2662 } 2663 } else if (tmp->base.src.is_affine) { 2664 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 2665 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 2666#if defined(sse2) && !defined(__x86_64__) 2667 if (sna->cpu_features & SSE2) { 2668 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2; 2669 } else 2670#endif 2671 { 2672 tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source; 2673 } 2674 } 2675 } 2676 tmp->base.mask.bo = NULL; 2677 tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex; 2678 2679 tmp->box = gen2_render_composite_spans_box; 2680 tmp->boxes = gen2_render_composite_spans_boxes; 2681 tmp->done = gen2_render_composite_spans_done; 2682 2683 if (!kgem_check_bo(&sna->kgem, 2684 tmp->base.dst.bo, tmp->base.src.bo, 2685 NULL)) { 2686 kgem_submit(&sna->kgem); 2687 if (!kgem_check_bo(&sna->kgem, 2688 tmp->base.dst.bo, tmp->base.src.bo, 2689 NULL)) 2690 goto cleanup_src; 2691 } 2692 2693 gen2_emit_composite_spans_state(sna, tmp); 2694 return true; 2695 2696cleanup_src: 2697 if (tmp->base.src.bo) 2698 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2699cleanup_dst: 2700 if (tmp->base.redirect.real_bo) 2701 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2702 return false; 2703} 2704 2705static void 2706gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op) 2707{ 2708 uint32_t blend, unwind; 2709 2710 unwind = sna->kgem.nbatch; 2711 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2712 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2713 2714 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 2715 TB0C_ARG1_SEL_DIFFUSE | 2716 TB0C_OUTPUT_WRITE_CURRENT; 2717 if (op->dst.format == PICT_a8) 2718 blend |= TB0C_ARG1_REPLICATE_ALPHA; 2719 BATCH(blend); 2720 2721 BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 2722 TB0A_ARG1_SEL_DIFFUSE | 2723 TB0A_OUTPUT_WRITE_CURRENT); 2724 2725 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2726 sna->kgem.batch + unwind + 1, 2727 2 * sizeof(uint32_t)) == 0) 2728 sna->kgem.nbatch = unwind; 2729 else 2730 sna->render_state.gen2.ls2 = unwind; 2731} 2732 2733static void gen2_emit_fill_composite_state(struct sna *sna, 2734 const struct sna_composite_op *op, 2735 uint32_t pixel) 2736{ 2737 uint32_t ls1; 2738 2739 gen2_get_batch(sna, op); 2740 gen2_emit_target(sna, 2741 op->dst.bo, 2742 op->dst.width, 2743 op->dst.height, 2744 op->dst.format); 2745 2746 ls1 = sna->kgem.nbatch; 2747 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2748 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2749 BATCH(0); 2750 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2751 BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format)); 2752 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2753 sna->kgem.batch + ls1 + 1, 2754 3 * sizeof(uint32_t)) == 0) 2755 sna->kgem.nbatch = ls1; 2756 else 2757 sna->render_state.gen2.ls1 = ls1; 2758 2759 gen2_emit_fill_pipeline(sna, op); 2760 2761 if (pixel != sna->render_state.gen2.diffuse) { 2762 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2763 BATCH(pixel); 2764 sna->render_state.gen2.diffuse = pixel; 2765 } 2766} 2767 2768static bool 2769gen2_render_fill_boxes_try_blt(struct sna *sna, 2770 CARD8 op, PictFormat format, 2771 const xRenderColor *color, 2772 const DrawableRec *dst, struct kgem_bo *dst_bo, 2773 const BoxRec *box, int n) 2774{ 2775 uint8_t alu; 2776 uint32_t pixel; 2777 2778 if (op > PictOpSrc) 2779 return false; 2780 2781 if (op == PictOpClear) { 2782 alu = GXclear; 2783 pixel = 0; 2784 } else if (!sna_get_pixel_from_rgba(&pixel, 2785 color->red, 2786 color->green, 2787 color->blue, 2788 color->alpha, 2789 format)) 2790 return false; 2791 else 2792 alu = GXcopy; 2793 2794 return sna_blt_fill_boxes(sna, alu, 2795 dst_bo, dst->bitsPerPixel, 2796 pixel, box, n); 2797} 2798 2799static bool 2800gen2_render_fill_boxes(struct sna *sna, 2801 CARD8 op, 2802 PictFormat format, 2803 const xRenderColor *color, 2804 const DrawableRec *dst, struct kgem_bo *dst_bo, 2805 const BoxRec *box, int n) 2806{ 2807 struct sna_composite_op tmp; 2808 uint32_t pixel; 2809 2810 if (op >= ARRAY_SIZE(gen2_blend_op)) { 2811 DBG(("%s: fallback due to unhandled blend op: %d\n", 2812 __FUNCTION__, op)); 2813 return false; 2814 } 2815 2816#if NO_FILL_BOXES 2817 return gen2_render_fill_boxes_try_blt(sna, op, format, color, 2818 dst, dst_bo, 2819 box, n); 2820#endif 2821 if (gen2_render_fill_boxes_try_blt(sna, op, format, color, 2822 dst, dst_bo, 2823 box, n)) 2824 return true; 2825 2826 2827 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 2828 __FUNCTION__, op, (int)format, 2829 color->red, color->green, color->blue, color->alpha)); 2830 2831 if (too_large(dst->width, dst->height) || 2832 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH || 2833 !gen2_check_dst_format(format)) { 2834 DBG(("%s: try blt, too large or incompatible destination\n", 2835 __FUNCTION__)); 2836 if (!gen2_check_dst_format(format)) 2837 return false; 2838 2839 assert(dst_bo->pitch >= 8); 2840 return sna_tiling_fill_boxes(sna, op, format, color, 2841 dst, dst_bo, box, n); 2842 } 2843 2844 if (op == PictOpClear) 2845 pixel = 0; 2846 else if (!sna_get_pixel_from_rgba(&pixel, 2847 color->red, 2848 color->green, 2849 color->blue, 2850 color->alpha, 2851 PICT_a8r8g8b8)) 2852 return false; 2853 2854 DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n", 2855 __FUNCTION__, op, (int)format, pixel)); 2856 2857 memset(&tmp, 0, sizeof(tmp)); 2858 tmp.op = op; 2859 tmp.dst.pixmap = (PixmapPtr)dst; 2860 tmp.dst.width = dst->width; 2861 tmp.dst.height = dst->height; 2862 tmp.dst.format = format; 2863 tmp.dst.bo = dst_bo; 2864 tmp.floats_per_vertex = 2; 2865 tmp.floats_per_rect = 6; 2866 2867 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2868 kgem_submit(&sna->kgem); 2869 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 2870 return false; 2871 } 2872 2873 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2874 2875 do { 2876 int n_this_time = gen2_get_rectangles(sna, &tmp, n); 2877 if (n_this_time == 0) { 2878 gen2_emit_fill_composite_state(sna, &tmp, pixel); 2879 n_this_time = gen2_get_rectangles(sna, &tmp, n); 2880 } 2881 n -= n_this_time; 2882 2883 do { 2884 DBG((" (%d, %d), (%d, %d): %x\n", 2885 box->x1, box->y1, box->x2, box->y2, pixel)); 2886 VERTEX(box->x2); 2887 VERTEX(box->y2); 2888 VERTEX(box->x1); 2889 VERTEX(box->y2); 2890 VERTEX(box->x1); 2891 VERTEX(box->y1); 2892 box++; 2893 } while (--n_this_time); 2894 } while (n); 2895 2896 gen2_vertex_flush(sna, &tmp); 2897 return true; 2898} 2899 2900static void gen2_emit_fill_state(struct sna *sna, 2901 const struct sna_composite_op *op) 2902{ 2903 uint32_t ls1; 2904 2905 gen2_get_batch(sna, op); 2906 gen2_emit_target(sna, 2907 op->dst.bo, 2908 op->dst.width, 2909 op->dst.height, 2910 op->dst.format); 2911 2912 ls1 = sna->kgem.nbatch; 2913 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2914 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2915 BATCH(0); 2916 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2917 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 2918 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2919 sna->kgem.batch + ls1 + 1, 2920 3 * sizeof(uint32_t)) == 0) 2921 sna->kgem.nbatch = ls1; 2922 else 2923 sna->render_state.gen2.ls1 = ls1; 2924 2925 gen2_enable_logic_op(sna, op->op); 2926 gen2_emit_fill_pipeline(sna, op); 2927 2928 if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 2929 BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2930 BATCH(op->src.u.gen2.pixel); 2931 sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 2932 } 2933} 2934 2935static void 2936gen2_render_fill_op_blt(struct sna *sna, 2937 const struct sna_fill_op *op, 2938 int16_t x, int16_t y, int16_t w, int16_t h) 2939{ 2940 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2941 gen2_emit_fill_state(sna, &op->base); 2942 gen2_get_rectangles(sna, &op->base, 1); 2943 } 2944 2945 VERTEX(x+w); 2946 VERTEX(y+h); 2947 VERTEX(x); 2948 VERTEX(y+h); 2949 VERTEX(x); 2950 VERTEX(y); 2951} 2952 2953fastcall static void 2954gen2_render_fill_op_box(struct sna *sna, 2955 const struct sna_fill_op *op, 2956 const BoxRec *box) 2957{ 2958 if (!gen2_get_rectangles(sna, &op->base, 1)) { 2959 gen2_emit_fill_state(sna, &op->base); 2960 gen2_get_rectangles(sna, &op->base, 1); 2961 } 2962 2963 VERTEX(box->x2); 2964 VERTEX(box->y2); 2965 VERTEX(box->x1); 2966 VERTEX(box->y2); 2967 VERTEX(box->x1); 2968 VERTEX(box->y1); 2969} 2970 2971fastcall static void 2972gen2_render_fill_op_boxes(struct sna *sna, 2973 const struct sna_fill_op *op, 2974 const BoxRec *box, 2975 int nbox) 2976{ 2977 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 2978 box->x1, box->y1, box->x2, box->y2, nbox)); 2979 2980 do { 2981 int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2982 if (nbox_this_time == 0) { 2983 gen2_emit_fill_state(sna, &op->base); 2984 nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2985 } 2986 nbox -= nbox_this_time; 2987 2988 do { 2989 VERTEX(box->x2); 2990 VERTEX(box->y2); 2991 VERTEX(box->x1); 2992 VERTEX(box->y2); 2993 VERTEX(box->x1); 2994 VERTEX(box->y1); 2995 box++; 2996 } while (--nbox_this_time); 2997 } while (nbox); 2998} 2999 3000static void 3001gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3002{ 3003 gen2_vertex_flush(sna, &op->base); 3004} 3005 3006static bool 3007gen2_render_fill(struct sna *sna, uint8_t alu, 3008 PixmapPtr dst, struct kgem_bo *dst_bo, 3009 uint32_t color, unsigned flags, 3010 struct sna_fill_op *tmp) 3011{ 3012#if NO_FILL 3013 return sna_blt_fill(sna, alu, 3014 dst_bo, dst->drawable.bitsPerPixel, 3015 color, 3016 tmp); 3017#endif 3018 3019 /* Prefer to use the BLT if already engaged */ 3020 if (sna_blt_fill(sna, alu, 3021 dst_bo, dst->drawable.bitsPerPixel, 3022 color, 3023 tmp)) 3024 return true; 3025 3026 /* Must use the BLT if we can't RENDER... */ 3027 if (too_large(dst->drawable.width, dst->drawable.height) || 3028 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) 3029 return false; 3030 3031 tmp->base.op = alu; 3032 tmp->base.dst.pixmap = dst; 3033 tmp->base.dst.width = dst->drawable.width; 3034 tmp->base.dst.height = dst->drawable.height; 3035 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3036 tmp->base.dst.bo = dst_bo; 3037 tmp->base.dst.x = tmp->base.dst.y = 0; 3038 tmp->base.floats_per_vertex = 2; 3039 tmp->base.floats_per_rect = 6; 3040 3041 tmp->base.src.u.gen2.pixel = 3042 sna_rgba_for_color(color, dst->drawable.depth); 3043 3044 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3045 kgem_submit(&sna->kgem); 3046 return sna_blt_fill(sna, alu, 3047 dst_bo, dst->drawable.bitsPerPixel, 3048 color, 3049 tmp); 3050 } 3051 3052 tmp->blt = gen2_render_fill_op_blt; 3053 tmp->box = gen2_render_fill_op_box; 3054 tmp->boxes = gen2_render_fill_op_boxes; 3055 tmp->points = NULL; 3056 tmp->done = gen2_render_fill_op_done; 3057 3058 gen2_emit_fill_state(sna, &tmp->base); 3059 return true; 3060} 3061 3062static bool 3063gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3064 uint32_t color, 3065 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3066 uint8_t alu) 3067{ 3068 BoxRec box; 3069 3070 box.x1 = x1; 3071 box.y1 = y1; 3072 box.x2 = x2; 3073 box.y2 = y2; 3074 3075 return sna_blt_fill_boxes(sna, alu, 3076 bo, dst->drawable.bitsPerPixel, 3077 color, &box, 1); 3078} 3079 3080static bool 3081gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3082 uint32_t color, 3083 int16_t x1, int16_t y1, 3084 int16_t x2, int16_t y2, 3085 uint8_t alu) 3086{ 3087 struct sna_composite_op tmp; 3088 3089#if NO_FILL_ONE 3090 return gen2_render_fill_one_try_blt(sna, dst, bo, color, 3091 x1, y1, x2, y2, alu); 3092#endif 3093 3094 /* Prefer to use the BLT if already engaged */ 3095 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3096 x1, y1, x2, y2, alu)) 3097 return true; 3098 3099 /* Must use the BLT if we can't RENDER... */ 3100 if (too_large(dst->drawable.width, dst->drawable.height) || 3101 bo->pitch < 8 || bo->pitch > MAX_3D_PITCH) 3102 return false; 3103 3104 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3105 kgem_submit(&sna->kgem); 3106 3107 if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3108 x1, y1, x2, y2, alu)) 3109 return true; 3110 3111 if (!kgem_check_bo(&sna->kgem, bo, NULL)) 3112 return false; 3113 } 3114 3115 tmp.op = alu; 3116 tmp.dst.pixmap = dst; 3117 tmp.dst.width = dst->drawable.width; 3118 tmp.dst.height = dst->drawable.height; 3119 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3120 tmp.dst.bo = bo; 3121 tmp.floats_per_vertex = 2; 3122 tmp.floats_per_rect = 6; 3123 tmp.need_magic_ca_pass = false; 3124 3125 tmp.src.u.gen2.pixel = 3126 sna_rgba_for_color(color, dst->drawable.depth); 3127 3128 gen2_emit_fill_state(sna, &tmp); 3129 gen2_get_rectangles(sna, &tmp, 1); 3130 DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__, 3131 x1, y1, x2, y2, tmp.src.u.gen2.pixel)); 3132 VERTEX(x2); 3133 VERTEX(y2); 3134 VERTEX(x1); 3135 VERTEX(y2); 3136 VERTEX(x1); 3137 VERTEX(y1); 3138 gen2_vertex_flush(sna, &tmp); 3139 3140 return true; 3141} 3142 3143static void 3144gen2_emit_video_state(struct sna *sna, 3145 struct sna_video *video, 3146 struct sna_video_frame *frame, 3147 PixmapPtr pixmap, 3148 struct kgem_bo *dst_bo, 3149 int width, int height, 3150 bool bilinear) 3151{ 3152 uint32_t ms1, v, unwind; 3153 3154 gen2_emit_target(sna, dst_bo, width, height, 3155 sna_format_for_depth(pixmap->drawable.depth)); 3156 3157 unwind = sna->kgem.nbatch; 3158 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 3159 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 3160 BATCH(1 << 12); 3161 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 3162 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 3163 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 3164 sna->kgem.batch + unwind + 1, 3165 3 * sizeof(uint32_t)) == 0) 3166 sna->kgem.nbatch = unwind; 3167 else 3168 sna->render_state.gen2.ls1 = unwind; 3169 3170 gen2_disable_logic_op(sna); 3171 3172 unwind = sna->kgem.nbatch; 3173 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 3174 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 3175 BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT | 3176 TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0); 3177 BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT | 3178 TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE); 3179 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 3180 sna->kgem.batch + unwind + 1, 3181 2 * sizeof(uint32_t)) == 0) 3182 sna->kgem.nbatch = unwind; 3183 else 3184 sna->render_state.gen2.ls2 = unwind; 3185 3186 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4); 3187 BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 3188 frame->bo, 3189 I915_GEM_DOMAIN_SAMPLER << 16, 3190 0)); 3191 ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION; 3192 switch (frame->id) { 3193 case FOURCC_YUY2: 3194 ms1 |= MT_422_YCRCB_NORMAL; 3195 break; 3196 case FOURCC_UYVY: 3197 ms1 |= MT_422_YCRCB_SWAPY; 3198 break; 3199 } 3200 BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) | 3201 ((frame->width - 1) << TM0S1_WIDTH_SHIFT) | 3202 ms1 | 3203 gen2_sampler_tiling_bits(frame->bo->tiling)); 3204 BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); 3205 if (bilinear) 3206 BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | 3207 FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | 3208 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 3209 else 3210 BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | 3211 FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | 3212 MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 3213 BATCH(0); /* default color */ 3214 3215 BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) | 3216 ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN | 3217 ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) | 3218 ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP)); 3219 3220 v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; 3221 if (sna->render_state.gen2.vft != v) { 3222 BATCH(v); 3223 sna->render_state.gen2.vft = v; 3224 } 3225} 3226 3227static void 3228gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo) 3229{ 3230 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3231 3232 if (!kgem_check_batch(&sna->kgem, 120) || 3233 !kgem_check_reloc(&sna->kgem, 4) || 3234 !kgem_check_exec(&sna->kgem, 2)) { 3235 _kgem_submit(&sna->kgem); 3236 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3237 } 3238 3239 if (sna->render_state.gen2.need_invariant) 3240 gen2_emit_invariant(sna); 3241} 3242 3243static int 3244gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) 3245{ 3246 int size = floats_per_vertex * 3; 3247 int rem = batch_space(sna) - 1; 3248 3249 if (rem > MAX_INLINE) 3250 rem = MAX_INLINE; 3251 3252 if (size * want > rem) 3253 want = rem / size; 3254 3255 return want; 3256} 3257 3258static bool 3259gen2_render_video(struct sna *sna, 3260 struct sna_video *video, 3261 struct sna_video_frame *frame, 3262 RegionPtr dstRegion, 3263 PixmapPtr pixmap) 3264{ 3265 struct sna_pixmap *priv = sna_pixmap(pixmap); 3266 const BoxRec *pbox = region_rects(dstRegion); 3267 int nbox = region_num_rects(dstRegion); 3268 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3269 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3270 int src_width = frame->src.x2 - frame->src.x1; 3271 int src_height = frame->src.y2 - frame->src.y1; 3272 float src_offset_x, src_offset_y; 3273 float src_scale_x, src_scale_y; 3274 int pix_xoff, pix_yoff; 3275 struct kgem_bo *dst_bo; 3276 bool bilinear; 3277 int copy = 0; 3278 3279 DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, 3280 src_width, src_height, frame->width, frame->height, dst_width, dst_height)); 3281 3282 assert(priv->gpu_bo); 3283 dst_bo = priv->gpu_bo; 3284 3285 bilinear = src_width != dst_width || src_height != dst_height; 3286 3287 src_scale_x = (float)src_width / dst_width / frame->width; 3288 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 3289 3290 src_scale_y = (float)src_height / dst_height / frame->height; 3291 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 3292 DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", 3293 __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); 3294 3295 if (too_large(pixmap->drawable.width, pixmap->drawable.height) || 3296 dst_bo->pitch > MAX_3D_PITCH) { 3297 int bpp = pixmap->drawable.bitsPerPixel; 3298 3299 if (too_large(dst_width, dst_height)) 3300 return false; 3301 3302 dst_bo = kgem_create_2d(&sna->kgem, 3303 dst_width, dst_height, bpp, 3304 kgem_choose_tiling(&sna->kgem, 3305 I915_TILING_X, 3306 dst_width, dst_height, bpp), 3307 0); 3308 if (!dst_bo) 3309 return false; 3310 3311 pix_xoff = -dstRegion->extents.x1; 3312 pix_yoff = -dstRegion->extents.y1; 3313 copy = 1; 3314 } else { 3315 /* Set up the offset for translating from the given region 3316 * (in screen coordinates) to the backing pixmap. 3317 */ 3318#ifdef COMPOSITE 3319 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 3320 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 3321#else 3322 pix_xoff = 0; 3323 pix_yoff = 0; 3324#endif 3325 3326 dst_width = pixmap->drawable.width; 3327 dst_height = pixmap->drawable.height; 3328 } 3329 3330 gen2_video_get_batch(sna, dst_bo); 3331 gen2_emit_video_state(sna, video, frame, pixmap, 3332 dst_bo, dst_width, dst_height, bilinear); 3333 do { 3334 int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); 3335 if (nbox_this_time == 0) { 3336 gen2_video_get_batch(sna, dst_bo); 3337 gen2_emit_video_state(sna, video, frame, pixmap, 3338 dst_bo, dst_width, dst_height, bilinear); 3339 nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); 3340 assert(nbox_this_time); 3341 } 3342 nbox -= nbox_this_time; 3343 3344 BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | 3345 ((12 * nbox_this_time) - 1)); 3346 do { 3347 int box_x1 = pbox->x1; 3348 int box_y1 = pbox->y1; 3349 int box_x2 = pbox->x2; 3350 int box_y2 = pbox->y2; 3351 3352 pbox++; 3353 3354 DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", 3355 __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, 3356 box_x1 * src_scale_x + src_offset_x, 3357 box_y1 * src_scale_y + src_offset_y, 3358 box_x2 * src_scale_x + src_offset_x, 3359 box_y2 * src_scale_y + src_offset_y)); 3360 3361 /* bottom right */ 3362 BATCH_F(box_x2 + pix_xoff); 3363 BATCH_F(box_y2 + pix_yoff); 3364 BATCH_F(box_x2 * src_scale_x + src_offset_x); 3365 BATCH_F(box_y2 * src_scale_y + src_offset_y); 3366 3367 /* bottom left */ 3368 BATCH_F(box_x1 + pix_xoff); 3369 BATCH_F(box_y2 + pix_yoff); 3370 BATCH_F(box_x1 * src_scale_x + src_offset_x); 3371 BATCH_F(box_y2 * src_scale_y + src_offset_y); 3372 3373 /* top left */ 3374 BATCH_F(box_x1 + pix_xoff); 3375 BATCH_F(box_y1 + pix_yoff); 3376 BATCH_F(box_x1 * src_scale_x + src_offset_x); 3377 BATCH_F(box_y1 * src_scale_y + src_offset_y); 3378 } while (--nbox_this_time); 3379 } while (nbox); 3380 3381 if (copy) { 3382#ifdef COMPOSITE 3383 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 3384 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 3385#else 3386 pix_xoff = 0; 3387 pix_yoff = 0; 3388#endif 3389 sna_blt_copy_boxes(sna, GXcopy, 3390 dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, 3391 priv->gpu_bo, pix_xoff, pix_yoff, 3392 pixmap->drawable.bitsPerPixel, 3393 region_rects(dstRegion), 3394 region_num_rects(dstRegion)); 3395 3396 kgem_bo_destroy(&sna->kgem, dst_bo); 3397 } 3398 3399 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 3400 if ((pix_xoff | pix_yoff) == 0) { 3401 sna_damage_add(&priv->gpu_damage, dstRegion); 3402 } else { 3403 sna_damage_add_boxes(&priv->gpu_damage, 3404 region_rects(dstRegion), 3405 region_num_rects(dstRegion), 3406 pix_xoff, pix_yoff); 3407 } 3408 } 3409 3410 return true; 3411} 3412 3413static void 3414gen2_render_copy_setup_source(struct sna_composite_channel *channel, 3415 const DrawableRec *draw, 3416 struct kgem_bo *bo) 3417{ 3418 assert(draw->width && draw->height); 3419 3420 channel->filter = PictFilterNearest; 3421 channel->repeat = RepeatNone; 3422 channel->width = draw->width; 3423 channel->height = draw->height; 3424 channel->scale[0] = 1.f/draw->width; 3425 channel->scale[1] = 1.f/draw->height; 3426 channel->offset[0] = 0; 3427 channel->offset[1] = 0; 3428 channel->pict_format = sna_format_for_depth(draw->depth); 3429 channel->bo = bo; 3430 channel->is_affine = 1; 3431 3432 DBG(("%s: source=%d, (%dx%d), format=%08x\n", 3433 __FUNCTION__, bo->handle, 3434 channel->width, channel->height, 3435 channel->pict_format)); 3436} 3437 3438static void 3439gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op) 3440{ 3441 uint32_t blend, unwind; 3442 3443 unwind = sna->kgem.nbatch; 3444 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 3445 LOAD_TEXTURE_BLEND_STAGE(0) | 1); 3446 3447 blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 3448 TB0C_OUTPUT_WRITE_CURRENT; 3449 if (op->dst.format == PICT_a8) 3450 blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0; 3451 else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 3452 blend |= TB0C_ARG1_SEL_TEXEL0; 3453 else 3454 blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 3455 BATCH(blend); 3456 3457 blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 3458 TB0A_OUTPUT_WRITE_CURRENT; 3459 if (PICT_FORMAT_A(op->src.pict_format) == 0) 3460 blend |= TB0A_ARG1_SEL_ONE; 3461 else 3462 blend |= TB0A_ARG1_SEL_TEXEL0; 3463 BATCH(blend); 3464 3465 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 3466 sna->kgem.batch + unwind + 1, 3467 2 * sizeof(uint32_t)) == 0) 3468 sna->kgem.nbatch = unwind; 3469 else 3470 sna->render_state.gen2.ls2 = unwind; 3471} 3472 3473static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op) 3474{ 3475 uint32_t ls1, v; 3476 3477 gen2_get_batch(sna, op); 3478 3479 if (kgem_bo_is_dirty(op->src.bo)) { 3480 if (op->src.bo == op->dst.bo) 3481 BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 3482 else 3483 BATCH(_3DSTATE_MODES_5_CMD | 3484 PIPELINE_FLUSH_RENDER_CACHE | 3485 PIPELINE_FLUSH_TEXTURE_CACHE); 3486 kgem_clear_dirty(&sna->kgem); 3487 } 3488 gen2_emit_target(sna, 3489 op->dst.bo, 3490 op->dst.width, 3491 op->dst.height, 3492 op->dst.format); 3493 3494 ls1 = sna->kgem.nbatch; 3495 BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 3496 I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 3497 BATCH(1<<12); 3498 BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 3499 BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 3500 if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 3501 sna->kgem.batch + ls1 + 1, 3502 3 * sizeof(uint32_t)) == 0) 3503 sna->kgem.nbatch = ls1; 3504 else 3505 sna->render_state.gen2.ls1 = ls1; 3506 3507 gen2_enable_logic_op(sna, op->op); 3508 gen2_emit_copy_pipeline(sna, op); 3509 3510 v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; 3511 if (sna->render_state.gen2.vft != v) { 3512 BATCH(v); 3513 sna->render_state.gen2.vft = v; 3514 } 3515 3516 gen2_emit_texture(sna, &op->src, 0); 3517} 3518 3519static bool 3520gen2_render_copy_boxes(struct sna *sna, uint8_t alu, 3521 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3522 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3523 const BoxRec *box, int n, unsigned flags) 3524{ 3525 struct sna_composite_op tmp; 3526 3527#if NO_COPY_BOXES 3528 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3529 return false; 3530 3531 return sna_blt_copy_boxes(sna, alu, 3532 src_bo, src_dx, src_dy, 3533 dst_bo, dst_dx, dst_dy, 3534 dst->drawable.bitsPerPixel, 3535 box, n); 3536#endif 3537 3538 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 3539 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 3540 3541 if (sna_blt_compare_depth(src, dst) && 3542 sna_blt_copy_boxes(sna, alu, 3543 src_bo, src_dx, src_dy, 3544 dst_bo, dst_dx, dst_dy, 3545 dst->bitsPerPixel, 3546 box, n)) 3547 return true; 3548 3549 if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 3550 too_large(src->width, src->height) || 3551 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) { 3552fallback: 3553 return sna_blt_copy_boxes_fallback(sna, alu, 3554 src, src_bo, src_dx, src_dy, 3555 dst, dst_bo, dst_dx, dst_dy, 3556 box, n); 3557 } 3558 3559 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3560 kgem_submit(&sna->kgem); 3561 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3562 goto fallback; 3563 } 3564 3565 assert(dst_bo->pitch >= 8); 3566 3567 memset(&tmp, 0, sizeof(tmp)); 3568 tmp.op = alu; 3569 3570 tmp.dst.pixmap = (PixmapPtr)dst; 3571 tmp.dst.width = dst->width; 3572 tmp.dst.height = dst->height; 3573 tmp.dst.format = sna_format_for_depth(dst->depth); 3574 tmp.dst.bo = dst_bo; 3575 tmp.dst.x = tmp.dst.y = 0; 3576 tmp.damage = NULL; 3577 3578 DBG(("%s: target=%d, format=%08x, size=%dx%d\n", 3579 __FUNCTION__, dst_bo->handle, 3580 (unsigned)tmp.dst.format, 3581 tmp.dst.width, 3582 tmp.dst.height)); 3583 3584 sna_render_composite_redirect_init(&tmp); 3585 if (too_large(tmp.dst.width, tmp.dst.height) || 3586 dst_bo->pitch > MAX_3D_PITCH) { 3587 BoxRec extents = box[0]; 3588 int i; 3589 3590 for (i = 1; i < n; i++) { 3591 if (box[i].x1 < extents.x1) 3592 extents.x1 = box[i].x1; 3593 if (box[i].y1 < extents.y1) 3594 extents.y1 = box[i].y1; 3595 3596 if (box[i].x2 > extents.x2) 3597 extents.x2 = box[i].x2; 3598 if (box[i].y2 > extents.y2) 3599 extents.y2 = box[i].y2; 3600 } 3601 if (!sna_render_composite_redirect(sna, &tmp, 3602 extents.x1 + dst_dx, 3603 extents.y1 + dst_dy, 3604 extents.x2 - extents.x1, 3605 extents.y2 - extents.y1, 3606 alu != GXcopy || n > 1)) 3607 goto fallback_tiled; 3608 } 3609 3610 tmp.floats_per_vertex = 4; 3611 tmp.floats_per_rect = 12; 3612 3613 dst_dx += tmp.dst.x; 3614 dst_dy += tmp.dst.y; 3615 tmp.dst.x = tmp.dst.y = 0; 3616 3617 gen2_render_copy_setup_source(&tmp.src, src, src_bo); 3618 gen2_emit_copy_state(sna, &tmp); 3619 do { 3620 int n_this_time; 3621 3622 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3623 if (n_this_time == 0) { 3624 gen2_emit_copy_state(sna, &tmp); 3625 n_this_time = gen2_get_rectangles(sna, &tmp, n); 3626 } 3627 n -= n_this_time; 3628 3629 do { 3630 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3631 box->x1 + src_dx, box->y1 + src_dy, 3632 box->x1 + dst_dx, box->y1 + dst_dy, 3633 box->x2 - box->x1, box->y2 - box->y1)); 3634 VERTEX(box->x2 + dst_dx); 3635 VERTEX(box->y2 + dst_dy); 3636 VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 3637 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3638 3639 VERTEX(box->x1 + dst_dx); 3640 VERTEX(box->y2 + dst_dy); 3641 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3642 VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3643 3644 VERTEX(box->x1 + dst_dx); 3645 VERTEX(box->y1 + dst_dy); 3646 VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3647 VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 3648 3649 box++; 3650 } while (--n_this_time); 3651 } while (n); 3652 3653 gen2_vertex_flush(sna, &tmp); 3654 sna_render_composite_redirect_done(sna, &tmp); 3655 return true; 3656 3657fallback_tiled: 3658 return sna_tiling_copy_boxes(sna, alu, 3659 src, src_bo, src_dx, src_dy, 3660 dst, dst_bo, dst_dx, dst_dy, 3661 box, n); 3662} 3663 3664static void 3665gen2_render_copy_blt(struct sna *sna, 3666 const struct sna_copy_op *op, 3667 int16_t sx, int16_t sy, 3668 int16_t w, int16_t h, 3669 int16_t dx, int16_t dy) 3670{ 3671 if (!gen2_get_rectangles(sna, &op->base, 1)) { 3672 gen2_emit_copy_state(sna, &op->base); 3673 gen2_get_rectangles(sna, &op->base, 1); 3674 } 3675 3676 VERTEX(dx+w); 3677 VERTEX(dy+h); 3678 VERTEX((sx+w)*op->base.src.scale[0]); 3679 VERTEX((sy+h)*op->base.src.scale[1]); 3680 3681 VERTEX(dx); 3682 VERTEX(dy+h); 3683 VERTEX(sx*op->base.src.scale[0]); 3684 VERTEX((sy+h)*op->base.src.scale[1]); 3685 3686 VERTEX(dx); 3687 VERTEX(dy); 3688 VERTEX(sx*op->base.src.scale[0]); 3689 VERTEX(sy*op->base.src.scale[1]); 3690} 3691 3692static void 3693gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3694{ 3695 gen2_vertex_flush(sna, &op->base); 3696} 3697 3698static bool 3699gen2_render_copy(struct sna *sna, uint8_t alu, 3700 PixmapPtr src, struct kgem_bo *src_bo, 3701 PixmapPtr dst, struct kgem_bo *dst_bo, 3702 struct sna_copy_op *tmp) 3703{ 3704#if NO_COPY 3705 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3706 return false; 3707 3708 return sna_blt_copy(sna, alu, 3709 src_bo, dst_bo, 3710 dst->drawable.bitsPerPixel, 3711 tmp); 3712#endif 3713 3714 /* Prefer to use the BLT */ 3715 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3716 sna_blt_copy(sna, alu, 3717 src_bo, dst_bo, 3718 dst->drawable.bitsPerPixel, 3719 tmp)) 3720 return true; 3721 3722 /* Must use the BLT if we can't RENDER... */ 3723 if (too_large(src->drawable.width, src->drawable.height) || 3724 too_large(dst->drawable.width, dst->drawable.height) || 3725 src_bo->pitch > MAX_3D_PITCH || 3726 dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) { 3727fallback: 3728 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3729 return false; 3730 3731 return sna_blt_copy(sna, alu, src_bo, dst_bo, 3732 dst->drawable.bitsPerPixel, 3733 tmp); 3734 } 3735 3736 tmp->base.op = alu; 3737 3738 tmp->base.dst.pixmap = dst; 3739 tmp->base.dst.width = dst->drawable.width; 3740 tmp->base.dst.height = dst->drawable.height; 3741 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3742 tmp->base.dst.bo = dst_bo; 3743 3744 gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo); 3745 tmp->base.mask.bo = NULL; 3746 3747 tmp->base.floats_per_vertex = 4; 3748 tmp->base.floats_per_rect = 12; 3749 3750 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3751 kgem_submit(&sna->kgem); 3752 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3753 goto fallback; 3754 } 3755 3756 tmp->blt = gen2_render_copy_blt; 3757 tmp->done = gen2_render_copy_done; 3758 3759 gen2_emit_composite_state(sna, &tmp->base); 3760 return true; 3761} 3762 3763static void 3764gen2_render_reset(struct sna *sna) 3765{ 3766 sna->render_state.gen2.need_invariant = true; 3767 sna->render_state.gen2.logic_op_enabled = 0; 3768 sna->render_state.gen2.target = 0; 3769 3770 sna->render_state.gen2.ls1 = 0; 3771 sna->render_state.gen2.ls2 = 0; 3772 sna->render_state.gen2.vft = 0; 3773 3774 sna->render_state.gen2.diffuse = 0x0c0ffee0; 3775 sna->render_state.gen2.specular = 0x0c0ffee0; 3776} 3777 3778static void 3779gen2_render_flush(struct sna *sna) 3780{ 3781 assert(sna->render.vertex_index == 0); 3782 assert(sna->render.vertex_offset == 0); 3783} 3784 3785static void 3786gen2_render_context_switch(struct kgem *kgem, 3787 int new_mode) 3788{ 3789 struct sna *sna = container_of(kgem, struct sna, kgem); 3790 3791 if (!kgem->nbatch) 3792 return; 3793 3794 /* Reload BLT registers following a lost context */ 3795 sna->blt_state.fill_bo = 0; 3796 3797 if (kgem_ring_is_idle(kgem, kgem->ring)) { 3798 DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); 3799 _kgem_submit(kgem); 3800 } 3801} 3802 3803const char *gen2_render_init(struct sna *sna, const char *backend) 3804{ 3805 struct sna_render *render = &sna->render; 3806 3807 sna->kgem.context_switch = gen2_render_context_switch; 3808 3809 /* Use the BLT (and overlay) for everything except when forced to 3810 * use the texture combiners. 3811 */ 3812#if !NO_COMPOSITE 3813 render->composite = gen2_render_composite; 3814 render->prefer_gpu |= PREFER_GPU_RENDER; 3815#endif 3816#if !NO_COMPOSITE_SPANS 3817 render->check_composite_spans = gen2_check_composite_spans; 3818 render->composite_spans = gen2_render_composite_spans; 3819 render->prefer_gpu |= PREFER_GPU_SPANS; 3820#endif 3821 render->fill_boxes = gen2_render_fill_boxes; 3822 render->fill = gen2_render_fill; 3823 render->fill_one = gen2_render_fill_one; 3824 render->copy = gen2_render_copy; 3825 render->copy_boxes = gen2_render_copy_boxes; 3826 3827 render->video = gen2_render_video; 3828 3829 render->reset = gen2_render_reset; 3830 render->flush = gen2_render_flush; 3831 3832 render->max_3d_size = MAX_3D_SIZE; 3833 render->max_3d_pitch = MAX_3D_PITCH; 3834 return "Almador (gen2)"; 3835} 3836