1/* 2 * Copyright © 2010-2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_render.h" 34#include "sna_render_inline.h" 35#include "sna_reg.h" 36#include "sna_video.h" 37 38#include "gen3_render.h" 39 40#define NO_COMPOSITE 0 41#define NO_COMPOSITE_SPANS 0 42#define NO_COPY 0 43#define NO_COPY_BOXES 0 44#define NO_FILL 0 45#define NO_FILL_ONE 0 46#define NO_FILL_BOXES 0 47 48#define PREFER_BLT_FILL 1 49 50enum { 51 SHADER_NONE = 0, 52 SHADER_ZERO, 53 SHADER_BLACK, 54 SHADER_WHITE, 55 SHADER_CONSTANT, 56 SHADER_LINEAR, 57 SHADER_RADIAL, 58 SHADER_TEXTURE, 59 SHADER_OPACITY, 60}; 61 62#define MAX_3D_SIZE 2048 63#define MAX_3D_PITCH 8192 64 65#define OUT_BATCH(v) batch_emit(sna, v) 66#define OUT_BATCH_F(v) batch_emit_float(sna, v) 67#define OUT_VERTEX(v) vertex_emit(sna, v) 68 69enum gen3_radial_mode { 70 RADIAL_ONE, 71 RADIAL_TWO 72}; 73 74static const struct blendinfo { 75 bool dst_alpha; 76 bool src_alpha; 77 uint32_t src_blend; 78 uint32_t dst_blend; 79} gen3_blend_op[] = { 80 /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO}, 81 /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO}, 82 /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE}, 83 /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA}, 84 /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE}, 85 /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO}, 86 /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA}, 87 /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO}, 88 /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA}, 89 /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, 90 /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA}, 91 /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, 92 /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE}, 93}; 94 95#define S6_COLOR_WRITE_ONLY \ 96 (S6_COLOR_WRITE_ENABLE | \ 97 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \ 98 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \ 99 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT) 100 101static const struct formatinfo { 102 unsigned int fmt, xfmt; 103 uint32_t card_fmt; 104 bool rb_reversed; 105} gen3_tex_formats[] = { 106 {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false}, 107 {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false}, 108 {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false}, 109 {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false}, 110 {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}, 111#ifdef PICT_a2r10g10b10 112 {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false}, 113 {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false}, 114#endif 115 {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false}, 116 {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true}, 117 {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false}, 118 {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true}, 119 {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false}, 120 {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true}, 121}; 122 123#define xFixedToDouble(f) pixman_fixed_to_double(f) 124 125static inline bool too_large(int width, int height) 126{ 127 return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 128} 129 130static inline uint32_t gen3_buf_tiling(uint32_t tiling) 131{ 132 uint32_t v = 0; 133 switch (tiling) { 134 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 135 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 136 case I915_TILING_NONE: break; 137 } 138 return v; 139} 140 141static inline bool 142gen3_check_pitch_3d(struct kgem_bo *bo) 143{ 144 return bo->pitch <= MAX_3D_PITCH; 145} 146 147static uint32_t gen3_get_blend_cntl(int op, 148 bool has_component_alpha, 149 uint32_t dst_format) 150{ 151 uint32_t sblend = gen3_blend_op[op].src_blend; 152 uint32_t dblend = gen3_blend_op[op].dst_blend; 153 154 if (op <= PictOpSrc) /* for clear and src disable blending */ 155 return S6_COLOR_WRITE_ONLY; 156 157 /* If there's no dst alpha channel, adjust the blend op so that we'll 158 * treat it as always 1. 159 */ 160 if (gen3_blend_op[op].dst_alpha) { 161 if (PICT_FORMAT_A(dst_format) == 0) { 162 if (sblend == BLENDFACT_DST_ALPHA) 163 sblend = BLENDFACT_ONE; 164 else if (sblend == BLENDFACT_INV_DST_ALPHA) 165 sblend = BLENDFACT_ZERO; 166 } 167 168 /* gen3 engine reads 8bit color buffer into green channel 169 * in cases like color buffer blending etc., and also writes 170 * back green channel. So with dst_alpha blend we should use 171 * color factor. See spec on "8-bit rendering". 172 */ 173 if (dst_format == PICT_a8) { 174 if (sblend == BLENDFACT_DST_ALPHA) 175 sblend = BLENDFACT_DST_COLR; 176 else if (sblend == BLENDFACT_INV_DST_ALPHA) 177 sblend = BLENDFACT_INV_DST_COLR; 178 } 179 } 180 181 /* If the source alpha is being used, then we should only be in a case 182 * where the source blend factor is 0, and the source blend value is the 183 * mask channels multiplied by the source picture's alpha. 184 */ 185 if (has_component_alpha && gen3_blend_op[op].src_alpha) { 186 if (dblend == BLENDFACT_SRC_ALPHA) 187 dblend = BLENDFACT_SRC_COLR; 188 else if (dblend == BLENDFACT_INV_SRC_ALPHA) 189 dblend = BLENDFACT_INV_SRC_COLR; 190 } 191 192 return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | 193 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | 194 sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT | 195 dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); 196} 197 198static bool gen3_check_dst_format(uint32_t format) 199{ 200 switch (format) { 201 case PICT_a8r8g8b8: 202 case PICT_x8r8g8b8: 203 case PICT_a8b8g8r8: 204 case PICT_x8b8g8r8: 205 case PICT_r5g6b5: 206 case PICT_b5g6r5: 207 case PICT_a1r5g5b5: 208 case PICT_x1r5g5b5: 209 case PICT_a1b5g5r5: 210 case PICT_x1b5g5r5: 211#ifdef PICT_a2r10g10b10 212 case PICT_a2r10g10b10: 213 case PICT_x2r10g10b10: 214 case PICT_a2b10g10r10: 215 case PICT_x2b10g10r10: 216#endif 217 case PICT_a8: 218 case PICT_a4r4g4b4: 219 case PICT_x4r4g4b4: 220 case PICT_a4b4g4r4: 221 case PICT_x4b4g4r4: 222 return true; 223 default: 224 return false; 225 } 226} 227 228static bool gen3_dst_rb_reversed(uint32_t format) 229{ 230 switch (format) { 231 case PICT_a8r8g8b8: 232 case PICT_x8r8g8b8: 233 case PICT_r5g6b5: 234 case PICT_a1r5g5b5: 235 case PICT_x1r5g5b5: 236#ifdef PICT_a2r10g10b10 237 case PICT_a2r10g10b10: 238 case PICT_x2r10g10b10: 239#endif 240 case PICT_a8: 241 case PICT_a4r4g4b4: 242 case PICT_x4r4g4b4: 243 return false; 244 default: 245 return true; 246 } 247} 248 249#define DSTORG_HORT_BIAS(x) ((x)<<20) 250#define DSTORG_VERT_BIAS(x) ((x)<<16) 251 252static uint32_t gen3_get_dst_format(uint32_t format) 253{ 254#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)) 255 switch (format) { 256 default: 257 case PICT_a8r8g8b8: 258 case PICT_x8r8g8b8: 259 case PICT_a8b8g8r8: 260 case PICT_x8b8g8r8: 261 return BIAS | COLR_BUF_ARGB8888; 262 case PICT_r5g6b5: 263 case PICT_b5g6r5: 264 return BIAS | COLR_BUF_RGB565; 265 case PICT_a1r5g5b5: 266 case PICT_x1r5g5b5: 267 case PICT_a1b5g5r5: 268 case PICT_x1b5g5r5: 269 return BIAS | COLR_BUF_ARGB1555; 270#ifdef PICT_a2r10g10b10 271 case PICT_a2r10g10b10: 272 case PICT_x2r10g10b10: 273 case PICT_a2b10g10r10: 274 case PICT_x2b10g10r10: 275 return BIAS | COLR_BUF_ARGB2AAA; 276#endif 277 case PICT_a8: 278 return BIAS | COLR_BUF_8BIT; 279 case PICT_a4r4g4b4: 280 case PICT_x4r4g4b4: 281 case PICT_a4b4g4r4: 282 case PICT_x4b4g4r4: 283 return BIAS | COLR_BUF_ARGB4444; 284 } 285#undef BIAS 286} 287 288static bool gen3_check_format(PicturePtr p) 289{ 290 switch (p->format) { 291 case PICT_a8: 292 case PICT_a8r8g8b8: 293 case PICT_x8r8g8b8: 294 case PICT_a8b8g8r8: 295 case PICT_x8b8g8r8: 296#ifdef PICT_a2r10g10b10 297 case PICT_a2r10g10b10: 298 case PICT_a2b10g10r10: 299#endif 300 case PICT_r5g6b5: 301 case PICT_b5g6r5: 302 case PICT_a1r5g5b5: 303 case PICT_a1b5g5r5: 304 case PICT_a4r4g4b4: 305 case PICT_a4b4g4r4: 306 return true; 307 default: 308 return false; 309 } 310} 311 312static bool gen3_check_xformat(PicturePtr p) 313{ 314 switch (p->format) { 315 case PICT_a8r8g8b8: 316 case PICT_x8r8g8b8: 317 case PICT_a8b8g8r8: 318 case PICT_x8b8g8r8: 319 case PICT_r5g6b5: 320 case PICT_b5g6r5: 321 case PICT_a1r5g5b5: 322 case PICT_x1r5g5b5: 323 case PICT_a1b5g5r5: 324 case PICT_x1b5g5r5: 325#ifdef PICT_a2r10g10b10 326 case PICT_a2r10g10b10: 327 case PICT_x2r10g10b10: 328 case PICT_a2b10g10r10: 329 case PICT_x2b10g10r10: 330#endif 331 case PICT_a8: 332 case PICT_a4r4g4b4: 333 case PICT_x4r4g4b4: 334 case PICT_a4b4g4r4: 335 case PICT_x4b4g4r4: 336 return true; 337 default: 338 return false; 339 } 340} 341 342static uint32_t gen3_texture_repeat(uint32_t repeat) 343{ 344#define REPEAT(x) \ 345 (SS3_NORMALIZED_COORDS | \ 346 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ 347 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT) 348 switch (repeat) { 349 default: 350 case RepeatNone: 351 return REPEAT(CLAMP_BORDER); 352 case RepeatNormal: 353 return REPEAT(WRAP); 354 case RepeatPad: 355 return REPEAT(CLAMP_EDGE); 356 case RepeatReflect: 357 return REPEAT(MIRROR); 358 } 359#undef REPEAT 360} 361 362static uint32_t gen3_gradient_repeat(uint32_t repeat) 363{ 364#define REPEAT(x) \ 365 (SS3_NORMALIZED_COORDS | \ 366 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ 367 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) 368 switch (repeat) { 369 default: 370 case RepeatNone: 371 return REPEAT(CLAMP_BORDER); 372 case RepeatNormal: 373 return REPEAT(WRAP); 374 case RepeatPad: 375 return REPEAT(CLAMP_EDGE); 376 case RepeatReflect: 377 return REPEAT(MIRROR); 378 } 379#undef REPEAT 380} 381 382static bool gen3_check_repeat(PicturePtr p) 383{ 384 if (!p->repeat) 385 return true; 386 387 switch (p->repeatType) { 388 case RepeatNone: 389 case RepeatNormal: 390 case RepeatPad: 391 case RepeatReflect: 392 return true; 393 default: 394 return false; 395 } 396} 397 398static uint32_t gen3_filter(uint32_t filter) 399{ 400 switch (filter) { 401 default: 402 assert(0); 403 case PictFilterNearest: 404 return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | 405 FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | 406 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); 407 case PictFilterBilinear: 408 return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | 409 FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | 410 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); 411 } 412} 413 414static bool gen3_check_filter(PicturePtr p) 415{ 416 switch (p->filter) { 417 case PictFilterNearest: 418 case PictFilterBilinear: 419 return true; 420 default: 421 return false; 422 } 423} 424 425static inline void 426gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY) 427{ 428 OUT_VERTEX(dstX); 429 OUT_VERTEX(dstY); 430} 431 432fastcall static void 433gen3_emit_composite_primitive_constant(struct sna *sna, 434 const struct sna_composite_op *op, 435 const struct sna_composite_rectangles *r) 436{ 437 int16_t dst_x = r->dst.x + op->dst.x; 438 int16_t dst_y = r->dst.y + op->dst.y; 439 440 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 441 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 442 gen3_emit_composite_dstcoord(sna, dst_x, dst_y); 443} 444 445fastcall static void 446gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, 447 const BoxRec *box, int nbox, 448 float *v) 449{ 450 do { 451 v[0] = box->x2; 452 v[1] = box->y2; 453 454 v[2] = box->x1; 455 v[3] = box->y2; 456 457 v[4] = box->x1; 458 v[5] = box->y1; 459 460 box++; 461 v += 6; 462 } while (--nbox); 463} 464 465fastcall static void 466gen3_emit_composite_primitive_identity_gradient(struct sna *sna, 467 const struct sna_composite_op *op, 468 const struct sna_composite_rectangles *r) 469{ 470 int16_t dst_x, dst_y; 471 int16_t src_x, src_y; 472 473 dst_x = r->dst.x + op->dst.x; 474 dst_y = r->dst.y + op->dst.y; 475 src_x = r->src.x + op->src.offset[0]; 476 src_y = r->src.y + op->src.offset[1]; 477 478 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 479 OUT_VERTEX(src_x + r->width); 480 OUT_VERTEX(src_y + r->height); 481 482 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 483 OUT_VERTEX(src_x); 484 OUT_VERTEX(src_y + r->height); 485 486 gen3_emit_composite_dstcoord(sna, dst_x, dst_y); 487 OUT_VERTEX(src_x); 488 OUT_VERTEX(src_y); 489} 490 491fastcall static void 492gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, 493 const BoxRec *box, int nbox, 494 float *v) 495{ 496 do { 497 v[0] = box->x2; 498 v[1] = box->y2; 499 v[2] = box->x2 + op->src.offset[0]; 500 v[3] = box->y2 + op->src.offset[1]; 501 502 v[4] = box->x1; 503 v[5] = box->y2; 504 v[6] = box->x1 + op->src.offset[0]; 505 v[7] = box->y2 + op->src.offset[1]; 506 507 v[8] = box->x1; 508 v[9] = box->y1; 509 v[10] = box->x1 + op->src.offset[0]; 510 v[11] = box->y1 + op->src.offset[1]; 511 512 v += 12; 513 box++; 514 } while (--nbox); 515} 516 517fastcall static void 518gen3_emit_composite_primitive_affine_gradient(struct sna *sna, 519 const struct sna_composite_op *op, 520 const struct sna_composite_rectangles *r) 521{ 522 PictTransform *transform = op->src.transform; 523 int16_t dst_x, dst_y; 524 int16_t src_x, src_y; 525 float *v; 526 527 dst_x = r->dst.x + op->dst.x; 528 dst_y = r->dst.y + op->dst.y; 529 src_x = r->src.x + op->src.offset[0]; 530 src_y = r->src.y + op->src.offset[1]; 531 532 v = sna->render.vertices + sna->render.vertex_used; 533 sna->render.vertex_used += 12; 534 535 v[0] = dst_x + r->width; 536 v[1] = dst_y + r->height; 537 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 538 transform, op->src.scale, 539 &v[2], &v[3]); 540 541 v[4] = dst_x; 542 v[5] = dst_y + r->height; 543 _sna_get_transformed_scaled(src_x, src_y + r->height, 544 transform, op->src.scale, 545 &v[6], &v[7]); 546 547 v[8] = dst_x; 548 v[9] = dst_y; 549 _sna_get_transformed_scaled(src_x, src_y, 550 transform, op->src.scale, 551 &v[10], &v[11]); 552} 553 554fastcall static void 555gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, 556 const BoxRec *box, int nbox, 557 float *v) 558{ 559 const PictTransform *transform = op->src.transform; 560 561 do { 562 v[0] = box->x2; 563 v[1] = box->y2; 564 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 565 box->y2 + op->src.offset[1], 566 transform, op->src.scale, 567 &v[2], &v[3]); 568 569 v[4] = box->x1; 570 v[5] = box->y2; 571 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 572 box->y2 + op->src.offset[1], 573 transform, op->src.scale, 574 &v[6], &v[7]); 575 576 v[8] = box->x1; 577 v[9] = box->y1; 578 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 579 box->y1 + op->src.offset[1], 580 transform, op->src.scale, 581 &v[10], &v[11]); 582 583 box++; 584 v += 12; 585 } while (--nbox); 586} 587 588fastcall static void 589gen3_emit_composite_primitive_identity_source(struct sna *sna, 590 const struct sna_composite_op *op, 591 const struct sna_composite_rectangles *r) 592{ 593 float w = r->width; 594 float h = r->height; 595 float *v; 596 597 v = sna->render.vertices + sna->render.vertex_used; 598 sna->render.vertex_used += 12; 599 600 v[8] = v[4] = r->dst.x + op->dst.x; 601 v[0] = v[4] + w; 602 603 v[9] = r->dst.y + op->dst.y; 604 v[5] = v[1] = v[9] + h; 605 606 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 607 v[2] = v[6] + w * op->src.scale[0]; 608 609 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 610 v[7] = v[3] = v[11] + h * op->src.scale[1]; 611} 612 613fastcall static void 614gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, 615 const BoxRec *box, int nbox, 616 float *v) 617{ 618 do { 619 v[0] = box->x2 + op->dst.x; 620 v[8] = v[4] = box->x1 + op->dst.x; 621 v[5] = v[1] = box->y2 + op->dst.y; 622 v[9] = box->y1 + op->dst.y; 623 624 v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 625 v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 626 627 v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 628 v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 629 630 v += 12; 631 box++; 632 } while (--nbox); 633} 634 635fastcall static void 636gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, 637 const struct sna_composite_op *op, 638 const struct sna_composite_rectangles *r) 639{ 640 float w = r->width; 641 float h = r->height; 642 float *v; 643 644 v = sna->render.vertices + sna->render.vertex_used; 645 sna->render.vertex_used += 12; 646 647 v[8] = v[4] = r->dst.x; 648 v[9] = r->dst.y; 649 650 v[0] = v[4] + w; 651 v[5] = v[1] = v[9] + h; 652 653 v[10] = v[6] = r->src.x * op->src.scale[0]; 654 v[11] = r->src.y * op->src.scale[1]; 655 656 v[2] = v[6] + w * op->src.scale[0]; 657 v[7] = v[3] = v[11] + h * op->src.scale[1]; 658} 659 660fastcall static void 661gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op, 662 const BoxRec *box, int nbox, 663 float *v) 664{ 665 do { 666 v[0] = box->x2; 667 v[8] = v[4] = box->x1; 668 v[5] = v[1] = box->y2; 669 v[9] = box->y1; 670 671 v[10] = v[6] = box->x1 * op->src.scale[0]; 672 v[2] = box->x2 * op->src.scale[0]; 673 674 v[11] = box->y1 * op->src.scale[1]; 675 v[7] = v[3] = box->y2 * op->src.scale[1]; 676 677 v += 12; 678 box++; 679 } while (--nbox); 680} 681 682fastcall static void 683gen3_emit_composite_primitive_affine_source(struct sna *sna, 684 const struct sna_composite_op *op, 685 const struct sna_composite_rectangles *r) 686{ 687 PictTransform *transform = op->src.transform; 688 int16_t dst_x = r->dst.x + op->dst.x; 689 int16_t dst_y = r->dst.y + op->dst.y; 690 int src_x = r->src.x + (int)op->src.offset[0]; 691 int src_y = r->src.y + (int)op->src.offset[1]; 692 float *v; 693 694 v = sna->render.vertices + sna->render.vertex_used; 695 sna->render.vertex_used += 12; 696 697 v[0] = dst_x + r->width; 698 v[5] = v[1] = dst_y + r->height; 699 v[8] = v[4] = dst_x; 700 v[9] = dst_y; 701 702 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 703 transform, op->src.scale, 704 &v[2], &v[3]); 705 706 _sna_get_transformed_scaled(src_x, src_y + r->height, 707 transform, op->src.scale, 708 &v[6], &v[7]); 709 710 _sna_get_transformed_scaled(src_x, src_y, 711 transform, op->src.scale, 712 &v[10], &v[11]); 713} 714 715fastcall static void 716gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, 717 const BoxRec *box, int nbox, 718 float *v) 719{ 720 const PictTransform *transform = op->src.transform; 721 722 do { 723 v[0] = box->x2; 724 v[5] = v[1] = box->y2; 725 v[8] = v[4] = box->x1; 726 v[9] = box->y1; 727 728 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 729 box->y2 + op->src.offset[1], 730 transform, op->src.scale, 731 &v[2], &v[3]); 732 733 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 734 box->y2 + op->src.offset[1], 735 transform, op->src.scale, 736 &v[6], &v[7]); 737 738 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 739 box->y1 + op->src.offset[1], 740 transform, op->src.scale, 741 &v[10], &v[11]); 742 743 v += 12; 744 box++; 745 } while (--nbox); 746} 747 748fastcall static void 749gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, 750 const struct sna_composite_op *op, 751 const struct sna_composite_rectangles *r) 752{ 753 float w = r->width; 754 float h = r->height; 755 float *v; 756 757 v = sna->render.vertices + sna->render.vertex_used; 758 sna->render.vertex_used += 12; 759 760 v[8] = v[4] = r->dst.x + op->dst.x; 761 v[0] = v[4] + w; 762 763 v[9] = r->dst.y + op->dst.y; 764 v[5] = v[1] = v[9] + h; 765 766 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 767 v[2] = v[6] + w * op->mask.scale[0]; 768 769 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 770 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 771} 772 773fastcall static void 774gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna, 775 const struct sna_composite_op *op, 776 const struct sna_composite_rectangles *r) 777{ 778 float w = r->width; 779 float h = r->height; 780 float *v; 781 782 v = sna->render.vertices + sna->render.vertex_used; 783 sna->render.vertex_used += 12; 784 785 v[8] = v[4] = r->dst.x; 786 v[9] = r->dst.y; 787 788 v[0] = v[4] + w; 789 v[5] = v[1] = v[9] + h; 790 791 v[10] = v[6] = r->mask.x * op->mask.scale[0]; 792 v[11] = r->mask.y * op->mask.scale[1]; 793 794 v[2] = v[6] + w * op->mask.scale[0]; 795 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 796} 797 798fastcall static void 799gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, 800 const struct sna_composite_op *op, 801 const struct sna_composite_rectangles *r) 802{ 803 float dst_x, dst_y; 804 float src_x, src_y; 805 float msk_x, msk_y; 806 float w, h; 807 float *v; 808 809 dst_x = r->dst.x + op->dst.x; 810 dst_y = r->dst.y + op->dst.y; 811 src_x = r->src.x + op->src.offset[0]; 812 src_y = r->src.y + op->src.offset[1]; 813 msk_x = r->mask.x + op->mask.offset[0]; 814 msk_y = r->mask.y + op->mask.offset[1]; 815 w = r->width; 816 h = r->height; 817 818 v = sna->render.vertices + sna->render.vertex_used; 819 sna->render.vertex_used += 18; 820 821 v[0] = dst_x + w; 822 v[1] = dst_y + h; 823 v[2] = (src_x + w) * op->src.scale[0]; 824 v[3] = (src_y + h) * op->src.scale[1]; 825 v[4] = (msk_x + w) * op->mask.scale[0]; 826 v[5] = (msk_y + h) * op->mask.scale[1]; 827 828 v[6] = dst_x; 829 v[7] = v[1]; 830 v[8] = src_x * op->src.scale[0]; 831 v[9] = v[3]; 832 v[10] = msk_x * op->mask.scale[0]; 833 v[11] =v[5]; 834 835 v[12] = v[6]; 836 v[13] = dst_y; 837 v[14] = v[8]; 838 v[15] = src_y * op->src.scale[1]; 839 v[16] = v[10]; 840 v[17] = msk_y * op->mask.scale[1]; 841} 842 843fastcall static void 844gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, 845 const struct sna_composite_op *op, 846 const struct sna_composite_rectangles *r) 847{ 848 int16_t src_x, src_y; 849 float dst_x, dst_y; 850 float msk_x, msk_y; 851 float w, h; 852 float *v; 853 854 dst_x = r->dst.x + op->dst.x; 855 dst_y = r->dst.y + op->dst.y; 856 src_x = r->src.x + op->src.offset[0]; 857 src_y = r->src.y + op->src.offset[1]; 858 msk_x = r->mask.x + op->mask.offset[0]; 859 msk_y = r->mask.y + op->mask.offset[1]; 860 w = r->width; 861 h = r->height; 862 863 v = sna->render.vertices + sna->render.vertex_used; 864 sna->render.vertex_used += 18; 865 866 v[0] = dst_x + w; 867 v[1] = dst_y + h; 868 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 869 op->src.transform, op->src.scale, 870 &v[2], &v[3]); 871 v[4] = (msk_x + w) * op->mask.scale[0]; 872 v[5] = (msk_y + h) * op->mask.scale[1]; 873 874 v[6] = dst_x; 875 v[7] = v[1]; 876 _sna_get_transformed_scaled(src_x, src_y + r->height, 877 op->src.transform, op->src.scale, 878 &v[8], &v[9]); 879 v[10] = msk_x * op->mask.scale[0]; 880 v[11] =v[5]; 881 882 v[12] = v[6]; 883 v[13] = dst_y; 884 _sna_get_transformed_scaled(src_x, src_y, 885 op->src.transform, op->src.scale, 886 &v[14], &v[15]); 887 v[16] = v[10]; 888 v[17] = msk_y * op->mask.scale[1]; 889} 890 891static void 892gen3_emit_composite_texcoord(struct sna *sna, 893 const struct sna_composite_channel *channel, 894 int16_t x, int16_t y) 895{ 896 float s = 0, t = 0, w = 1; 897 898 switch (channel->u.gen3.type) { 899 case SHADER_OPACITY: 900 case SHADER_NONE: 901 case SHADER_ZERO: 902 case SHADER_BLACK: 903 case SHADER_WHITE: 904 case SHADER_CONSTANT: 905 break; 906 907 case SHADER_LINEAR: 908 case SHADER_RADIAL: 909 case SHADER_TEXTURE: 910 x += channel->offset[0]; 911 y += channel->offset[1]; 912 if (channel->is_affine) { 913 sna_get_transformed_coordinates(x, y, 914 channel->transform, 915 &s, &t); 916 OUT_VERTEX(s * channel->scale[0]); 917 OUT_VERTEX(t * channel->scale[1]); 918 } else { 919 sna_get_transformed_coordinates_3d(x, y, 920 channel->transform, 921 &s, &t, &w); 922 OUT_VERTEX(s * channel->scale[0]); 923 OUT_VERTEX(t * channel->scale[1]); 924 OUT_VERTEX(0); 925 OUT_VERTEX(w); 926 } 927 break; 928 } 929} 930 931static void 932gen3_emit_composite_vertex(struct sna *sna, 933 const struct sna_composite_op *op, 934 int16_t srcX, int16_t srcY, 935 int16_t maskX, int16_t maskY, 936 int16_t dstX, int16_t dstY) 937{ 938 gen3_emit_composite_dstcoord(sna, dstX, dstY); 939 gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY); 940 gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY); 941} 942 943fastcall static void 944gen3_emit_composite_primitive(struct sna *sna, 945 const struct sna_composite_op *op, 946 const struct sna_composite_rectangles *r) 947{ 948 gen3_emit_composite_vertex(sna, op, 949 r->src.x + r->width, 950 r->src.y + r->height, 951 r->mask.x + r->width, 952 r->mask.y + r->height, 953 op->dst.x + r->dst.x + r->width, 954 op->dst.y + r->dst.y + r->height); 955 gen3_emit_composite_vertex(sna, op, 956 r->src.x, 957 r->src.y + r->height, 958 r->mask.x, 959 r->mask.y + r->height, 960 op->dst.x + r->dst.x, 961 op->dst.y + r->dst.y + r->height); 962 gen3_emit_composite_vertex(sna, op, 963 r->src.x, 964 r->src.y, 965 r->mask.x, 966 r->mask.y, 967 op->dst.x + r->dst.x, 968 op->dst.y + r->dst.y); 969} 970 971#if defined(sse2) && !defined(__x86_64__) 972sse2 fastcall static void 973gen3_emit_composite_primitive_constant__sse2(struct sna *sna, 974 const struct sna_composite_op *op, 975 const struct sna_composite_rectangles *r) 976{ 977 float *v; 978 979 v = sna->render.vertices + sna->render.vertex_used; 980 sna->render.vertex_used += 6; 981 982 v[4] = v[2] = r->dst.x + op->dst.x; 983 v[5] = r->dst.y + op->dst.y; 984 985 v[0] = v[2] + r->width; 986 v[3] = v[1] = v[5] + r->height; 987 988} 989 990sse2 fastcall static void 991gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op, 992 const BoxRec *box, int nbox, 993 float *v) 994{ 995 do { 996 v[0] = box->x2; 997 v[3] = v[1] = box->y2; 998 v[4] = v[2] = box->x1; 999 v[5] = box->y1; 1000 1001 box++; 1002 v += 6; 1003 } while (--nbox); 1004} 1005 1006sse2 fastcall static void 1007gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna, 1008 const struct sna_composite_op *op, 1009 const struct sna_composite_rectangles *r) 1010{ 1011 int16_t x, y; 1012 float *v; 1013 1014 v = sna->render.vertices + sna->render.vertex_used; 1015 sna->render.vertex_used += 12; 1016 1017 x = r->dst.x + op->dst.x; 1018 y = r->dst.y + op->dst.y; 1019 v[0] = x + r->width; 1020 v[5] = v[1] = y + r->height; 1021 v[8] = v[4] = x; 1022 v[9] = y; 1023 1024 x = r->src.x + op->src.offset[0]; 1025 y = r->src.y + op->src.offset[1]; 1026 v[2] = x + r->width; 1027 v[7] = v[3] = y + r->height; 1028 v[10] = v[6] = x; 1029 v[11] = y; 1030} 1031 1032sse2 fastcall static void 1033gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op, 1034 const BoxRec *box, int nbox, 1035 float *v) 1036{ 1037 do { 1038 v[0] = box->x2; 1039 v[5] = v[1] = box->y2; 1040 v[8] = v[4] = box->x1; 1041 v[9] = box->y1; 1042 1043 v[2] = box->x2 + op->src.offset[0]; 1044 v[7] = v[3] = box->y2 + op->src.offset[1]; 1045 v[10] = v[6] = box->x1 + op->src.offset[0]; 1046 v[11] = box->y1 + op->src.offset[1]; 1047 1048 v += 12; 1049 box++; 1050 } while (--nbox); 1051} 1052 1053sse2 fastcall static void 1054gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna, 1055 const struct sna_composite_op *op, 1056 const struct sna_composite_rectangles *r) 1057{ 1058 PictTransform *transform = op->src.transform; 1059 int16_t dst_x, dst_y; 1060 int16_t src_x, src_y; 1061 float *v; 1062 1063 dst_x = r->dst.x + op->dst.x; 1064 dst_y = r->dst.y + op->dst.y; 1065 src_x = r->src.x + op->src.offset[0]; 1066 src_y = r->src.y + op->src.offset[1]; 1067 1068 v = sna->render.vertices + sna->render.vertex_used; 1069 sna->render.vertex_used += 12; 1070 1071 v[0] = dst_x + r->width; 1072 v[1] = dst_y + r->height; 1073 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1074 transform, op->src.scale, 1075 &v[2], &v[3]); 1076 1077 v[4] = dst_x; 1078 v[5] = dst_y + r->height; 1079 _sna_get_transformed_scaled(src_x, src_y + r->height, 1080 transform, op->src.scale, 1081 &v[6], &v[7]); 1082 1083 v[8] = dst_x; 1084 v[9] = dst_y; 1085 _sna_get_transformed_scaled(src_x, src_y, 1086 transform, op->src.scale, 1087 &v[10], &v[11]); 1088} 1089 1090sse2 fastcall static void 1091gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op, 1092 const BoxRec *box, int nbox, 1093 float *v) 1094{ 1095 const PictTransform *transform = op->src.transform; 1096 1097 do { 1098 v[0] = box->x2; 1099 v[1] = box->y2; 1100 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 1101 box->y2 + op->src.offset[1], 1102 transform, op->src.scale, 1103 &v[2], &v[3]); 1104 1105 v[4] = box->x1; 1106 v[5] = box->y2; 1107 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1108 box->y2 + op->src.offset[1], 1109 transform, op->src.scale, 1110 &v[6], &v[7]); 1111 1112 v[8] = box->x1; 1113 v[9] = box->y1; 1114 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1115 box->y1 + op->src.offset[1], 1116 transform, op->src.scale, 1117 &v[10], &v[11]); 1118 1119 box++; 1120 v += 12; 1121 } while (--nbox); 1122} 1123 1124sse2 fastcall static void 1125gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna, 1126 const struct sna_composite_op *op, 1127 const struct sna_composite_rectangles *r) 1128{ 1129 float w = r->width; 1130 float h = r->height; 1131 float *v; 1132 1133 v = sna->render.vertices + sna->render.vertex_used; 1134 sna->render.vertex_used += 12; 1135 1136 v[8] = v[4] = r->dst.x + op->dst.x; 1137 v[0] = v[4] + w; 1138 1139 v[9] = r->dst.y + op->dst.y; 1140 v[5] = v[1] = v[9] + h; 1141 1142 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1143 v[2] = v[6] + w * op->src.scale[0]; 1144 1145 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1146 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1147} 1148 1149sse2 fastcall static void 1150gen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op, 1151 const BoxRec *box, int nbox, 1152 float *v) 1153{ 1154 do { 1155 v[0] = box->x2 + op->dst.x; 1156 v[8] = v[4] = box->x1 + op->dst.x; 1157 v[5] = v[1] = box->y2 + op->dst.y; 1158 v[9] = box->y1 + op->dst.y; 1159 1160 v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1161 v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1162 1163 v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1164 v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1165 1166 v += 12; 1167 box++; 1168 } while (--nbox); 1169} 1170 1171sse2 fastcall static void 1172gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna, 1173 const struct sna_composite_op *op, 1174 const struct sna_composite_rectangles *r) 1175{ 1176 float w = r->width; 1177 float h = r->height; 1178 float *v; 1179 1180 v = sna->render.vertices + sna->render.vertex_used; 1181 sna->render.vertex_used += 12; 1182 1183 v[8] = v[4] = r->dst.x; 1184 v[9] = r->dst.y; 1185 1186 v[0] = v[4] + w; 1187 v[5] = v[1] = v[9] + h; 1188 1189 v[10] = v[6] = r->src.x * op->src.scale[0]; 1190 v[11] = r->src.y * op->src.scale[1]; 1191 1192 v[2] = v[6] + w * op->src.scale[0]; 1193 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1194} 1195 1196sse2 fastcall static void 1197gen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op, 1198 const BoxRec *box, int nbox, 1199 float *v) 1200{ 1201 do { 1202 v[0] = box->x2; 1203 v[8] = v[4] = box->x1; 1204 v[5] = v[1] = box->y2; 1205 v[9] = box->y1; 1206 1207 v[10] = v[6] = box->x1 * op->src.scale[0]; 1208 v[2] = box->x2 * op->src.scale[0]; 1209 1210 v[11] = box->y1 * op->src.scale[1]; 1211 v[7] = v[3] = box->y2 * op->src.scale[1]; 1212 1213 v += 12; 1214 box++; 1215 } while (--nbox); 1216} 1217 1218sse2 fastcall static void 1219gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna, 1220 const struct sna_composite_op *op, 1221 const struct sna_composite_rectangles *r) 1222{ 1223 PictTransform *transform = op->src.transform; 1224 int16_t dst_x = r->dst.x + op->dst.x; 1225 int16_t dst_y = r->dst.y + op->dst.y; 1226 int src_x = r->src.x + (int)op->src.offset[0]; 1227 int src_y = r->src.y + (int)op->src.offset[1]; 1228 float *v; 1229 1230 v = sna->render.vertices + sna->render.vertex_used; 1231 sna->render.vertex_used += 12; 1232 1233 v[0] = dst_x + r->width; 1234 v[5] = v[1] = dst_y + r->height; 1235 v[8] = v[4] = dst_x; 1236 v[9] = dst_y; 1237 1238 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1239 transform, op->src.scale, 1240 &v[2], &v[3]); 1241 1242 _sna_get_transformed_scaled(src_x, src_y + r->height, 1243 transform, op->src.scale, 1244 &v[6], &v[7]); 1245 1246 _sna_get_transformed_scaled(src_x, src_y, 1247 transform, op->src.scale, 1248 &v[10], &v[11]); 1249} 1250 1251sse2 fastcall static void 1252gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op, 1253 const BoxRec *box, int nbox, 1254 float *v) 1255{ 1256 const PictTransform *transform = op->src.transform; 1257 1258 do { 1259 v[0] = box->x2; 1260 v[5] = v[1] = box->y2; 1261 v[8] = v[4] = box->x1; 1262 v[9] = box->y1; 1263 1264 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 1265 box->y2 + op->src.offset[1], 1266 transform, op->src.scale, 1267 &v[2], &v[3]); 1268 1269 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1270 box->y2 + op->src.offset[1], 1271 transform, op->src.scale, 1272 &v[6], &v[7]); 1273 1274 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1275 box->y1 + op->src.offset[1], 1276 transform, op->src.scale, 1277 &v[10], &v[11]); 1278 1279 v += 12; 1280 box++; 1281 } while (--nbox); 1282} 1283 1284sse2 fastcall static void 1285gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1286 const struct sna_composite_op *op, 1287 const struct sna_composite_rectangles *r) 1288{ 1289 float w = r->width; 1290 float h = r->height; 1291 float *v; 1292 1293 v = sna->render.vertices + sna->render.vertex_used; 1294 sna->render.vertex_used += 12; 1295 1296 v[8] = v[4] = r->dst.x + op->dst.x; 1297 v[0] = v[4] + w; 1298 1299 v[9] = r->dst.y + op->dst.y; 1300 v[5] = v[1] = v[9] + h; 1301 1302 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1303 v[2] = v[6] + w * op->mask.scale[0]; 1304 1305 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1306 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1307} 1308 1309sse2 fastcall static void 1310gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna, 1311 const struct sna_composite_op *op, 1312 const struct sna_composite_rectangles *r) 1313{ 1314 float w = r->width; 1315 float h = r->height; 1316 float *v; 1317 1318 v = sna->render.vertices + sna->render.vertex_used; 1319 sna->render.vertex_used += 12; 1320 1321 v[8] = v[4] = r->dst.x; 1322 v[9] = r->dst.y; 1323 1324 v[0] = v[4] + w; 1325 v[5] = v[1] = v[9] + h; 1326 1327 v[10] = v[6] = r->mask.x * op->mask.scale[0]; 1328 v[11] = r->mask.y * op->mask.scale[1]; 1329 1330 v[2] = v[6] + w * op->mask.scale[0]; 1331 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1332} 1333 1334sse2 fastcall static void 1335gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna, 1336 const struct sna_composite_op *op, 1337 const struct sna_composite_rectangles *r) 1338{ 1339 float dst_x, dst_y; 1340 float src_x, src_y; 1341 float msk_x, msk_y; 1342 float w, h; 1343 float *v; 1344 1345 dst_x = r->dst.x + op->dst.x; 1346 dst_y = r->dst.y + op->dst.y; 1347 src_x = r->src.x + op->src.offset[0]; 1348 src_y = r->src.y + op->src.offset[1]; 1349 msk_x = r->mask.x + op->mask.offset[0]; 1350 msk_y = r->mask.y + op->mask.offset[1]; 1351 w = r->width; 1352 h = r->height; 1353 1354 v = sna->render.vertices + sna->render.vertex_used; 1355 sna->render.vertex_used += 18; 1356 1357 v[0] = dst_x + w; 1358 v[1] = dst_y + h; 1359 v[2] = (src_x + w) * op->src.scale[0]; 1360 v[3] = (src_y + h) * op->src.scale[1]; 1361 v[4] = (msk_x + w) * op->mask.scale[0]; 1362 v[5] = (msk_y + h) * op->mask.scale[1]; 1363 1364 v[6] = dst_x; 1365 v[7] = v[1]; 1366 v[8] = src_x * op->src.scale[0]; 1367 v[9] = v[3]; 1368 v[10] = msk_x * op->mask.scale[0]; 1369 v[11] =v[5]; 1370 1371 v[12] = v[6]; 1372 v[13] = dst_y; 1373 v[14] = v[8]; 1374 v[15] = src_y * op->src.scale[1]; 1375 v[16] = v[10]; 1376 v[17] = msk_y * op->mask.scale[1]; 1377} 1378 1379sse2 fastcall static void 1380gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna, 1381 const struct sna_composite_op *op, 1382 const struct sna_composite_rectangles *r) 1383{ 1384 int16_t src_x, src_y; 1385 float dst_x, dst_y; 1386 float msk_x, msk_y; 1387 float w, h; 1388 float *v; 1389 1390 dst_x = r->dst.x + op->dst.x; 1391 dst_y = r->dst.y + op->dst.y; 1392 src_x = r->src.x + op->src.offset[0]; 1393 src_y = r->src.y + op->src.offset[1]; 1394 msk_x = r->mask.x + op->mask.offset[0]; 1395 msk_y = r->mask.y + op->mask.offset[1]; 1396 w = r->width; 1397 h = r->height; 1398 1399 v = sna->render.vertices + sna->render.vertex_used; 1400 sna->render.vertex_used += 18; 1401 1402 v[0] = dst_x + w; 1403 v[1] = dst_y + h; 1404 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1405 op->src.transform, op->src.scale, 1406 &v[2], &v[3]); 1407 v[4] = (msk_x + w) * op->mask.scale[0]; 1408 v[5] = (msk_y + h) * op->mask.scale[1]; 1409 1410 v[6] = dst_x; 1411 v[7] = v[1]; 1412 _sna_get_transformed_scaled(src_x, src_y + r->height, 1413 op->src.transform, op->src.scale, 1414 &v[8], &v[9]); 1415 v[10] = msk_x * op->mask.scale[0]; 1416 v[11] =v[5]; 1417 1418 v[12] = v[6]; 1419 v[13] = dst_y; 1420 _sna_get_transformed_scaled(src_x, src_y, 1421 op->src.transform, op->src.scale, 1422 &v[14], &v[15]); 1423 v[16] = v[10]; 1424 v[17] = msk_y * op->mask.scale[1]; 1425} 1426#endif 1427 1428static inline void 1429gen3_2d_perspective(struct sna *sna, int in, int out) 1430{ 1431 gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W)); 1432 gen3_fs_mul(out, 1433 gen3_fs_operand(in, X, Y, ZERO, ONE), 1434 gen3_fs_operand_reg(out)); 1435} 1436 1437static inline void 1438gen3_linear_coord(struct sna *sna, 1439 const struct sna_composite_channel *channel, 1440 int in, int out) 1441{ 1442 int c = channel->u.gen3.constants; 1443 1444 if (!channel->is_affine) { 1445 gen3_2d_perspective(sna, in, FS_U0); 1446 in = FS_U0; 1447 } 1448 1449 gen3_fs_mov(out, gen3_fs_operand_zero()); 1450 gen3_fs_dp3(out, MASK_X, 1451 gen3_fs_operand(in, X, Y, ONE, ZERO), 1452 gen3_fs_operand_reg(c)); 1453} 1454 1455static void 1456gen3_radial_coord(struct sna *sna, 1457 const struct sna_composite_channel *channel, 1458 int in, int out) 1459{ 1460 int c = channel->u.gen3.constants; 1461 1462 if (!channel->is_affine) { 1463 gen3_2d_perspective(sna, in, FS_U0); 1464 in = FS_U0; 1465 } 1466 1467 switch (channel->u.gen3.mode) { 1468 case RADIAL_ONE: 1469 /* 1470 pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; 1471 r² = pdx*pdx + pdy*pdy 1472 t = r²/sqrt(r²) - r1/dr; 1473 */ 1474 gen3_fs_mad(FS_U0, MASK_X | MASK_Y, 1475 gen3_fs_operand(in, X, Y, ZERO, ZERO), 1476 gen3_fs_operand(c, Z, Z, ZERO, ZERO), 1477 gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO)); 1478 gen3_fs_dp2add(FS_U0, MASK_X, 1479 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), 1480 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), 1481 gen3_fs_operand_zero()); 1482 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X)); 1483 gen3_fs_mad(out, 0, 1484 gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO), 1485 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1486 gen3_fs_operand(c, W, ZERO, ZERO, ZERO)); 1487 break; 1488 1489 case RADIAL_TWO: 1490 /* 1491 pdx = x - c1x, pdy = y - c1y; 1492 A = dx² + dy² - dr² 1493 B = -2*(pdx*dx + pdy*dy + r1*dr); 1494 C = pdx² + pdy² - r1²; 1495 det = B*B - 4*A*C; 1496 t = (-B + sqrt (det)) / (2 * A) 1497 */ 1498 1499 /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ 1500 gen3_fs_add(FS_U0, 1501 gen3_fs_operand(in, X, Y, ZERO, ZERO), 1502 gen3_fs_operand(c, X, Y, Z, ZERO)); 1503 /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ 1504 gen3_fs_dp3(FS_U0, MASK_W, 1505 gen3_fs_operand(FS_U0, X, Y, ONE, ZERO), 1506 gen3_fs_operand(c+1, X, Y, Z, ZERO)); 1507 /* u1.x = pdx² + pdy² - r1²; [C] */ 1508 gen3_fs_dp3(FS_U1, MASK_X, 1509 gen3_fs_operand(FS_U0, X, Y, Z, ZERO), 1510 gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO)); 1511 /* u1.x = C, u1.y = B, u1.z=-4*A; */ 1512 gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W)); 1513 gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W)); 1514 /* u1.x = B² - 4*A*C */ 1515 gen3_fs_dp2add(FS_U1, MASK_X, 1516 gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO), 1517 gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO), 1518 gen3_fs_operand_zero()); 1519 /* out.x = -B + sqrt (B² - 4*A*C), */ 1520 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X)); 1521 gen3_fs_mad(out, MASK_X, 1522 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1523 gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO), 1524 gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO)); 1525 /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */ 1526 gen3_fs_mul(out, 1527 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1528 gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO)); 1529 break; 1530 } 1531} 1532 1533static void 1534gen3_composite_emit_shader(struct sna *sna, 1535 const struct sna_composite_op *op, 1536 uint8_t blend) 1537{ 1538 bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0; 1539 const struct sna_composite_channel *src, *mask; 1540 struct gen3_render_state *state = &sna->render_state.gen3; 1541 uint32_t shader_offset, id; 1542 int src_reg, mask_reg; 1543 int t, length; 1544 1545 src = &op->src; 1546 mask = &op->mask; 1547 if (mask->u.gen3.type == SHADER_NONE) 1548 mask = NULL; 1549 1550 id = (src->u.gen3.type | 1551 src->is_affine << 4 | 1552 src->alpha_fixup << 5 | 1553 src->rb_reversed << 6); 1554 if (mask) { 1555 id |= (mask->u.gen3.type << 8 | 1556 mask->is_affine << 12 | 1557 gen3_blend_op[blend].src_alpha << 13 | 1558 op->has_component_alpha << 14 | 1559 mask->alpha_fixup << 15 | 1560 mask->rb_reversed << 16); 1561 } 1562 id |= dst_is_alpha << 24; 1563 id |= op->rb_reversed << 25; 1564 1565 if (id == state->last_shader) 1566 return; 1567 1568 state->last_shader = id; 1569 1570 shader_offset = sna->kgem.nbatch++; 1571 t = 0; 1572 switch (src->u.gen3.type) { 1573 case SHADER_NONE: 1574 case SHADER_OPACITY: 1575 assert(0); 1576 case SHADER_ZERO: 1577 case SHADER_BLACK: 1578 case SHADER_WHITE: 1579 break; 1580 case SHADER_CONSTANT: 1581 gen3_fs_dcl(FS_T8); 1582 src_reg = FS_T8; 1583 break; 1584 case SHADER_TEXTURE: 1585 case SHADER_RADIAL: 1586 case SHADER_LINEAR: 1587 gen3_fs_dcl(FS_S0); 1588 gen3_fs_dcl(FS_T0); 1589 t++; 1590 break; 1591 } 1592 1593 if (mask == NULL) { 1594 switch (src->u.gen3.type) { 1595 case SHADER_ZERO: 1596 gen3_fs_mov(FS_OC, gen3_fs_operand_zero()); 1597 goto done; 1598 case SHADER_BLACK: 1599 if (dst_is_alpha) 1600 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1601 else 1602 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE)); 1603 goto done; 1604 case SHADER_WHITE: 1605 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1606 goto done; 1607 } 1608 if (src->alpha_fixup && dst_is_alpha) { 1609 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1610 goto done; 1611 } 1612 /* No mask, so load directly to output color */ 1613 if (src->u.gen3.type != SHADER_CONSTANT) { 1614 if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed) 1615 src_reg = FS_R0; 1616 else 1617 src_reg = FS_OC; 1618 } 1619 switch (src->u.gen3.type) { 1620 case SHADER_LINEAR: 1621 gen3_linear_coord(sna, src, FS_T0, FS_R0); 1622 gen3_fs_texld(src_reg, FS_S0, FS_R0); 1623 break; 1624 1625 case SHADER_RADIAL: 1626 gen3_radial_coord(sna, src, FS_T0, FS_R0); 1627 gen3_fs_texld(src_reg, FS_S0, FS_R0); 1628 break; 1629 1630 case SHADER_TEXTURE: 1631 if (src->is_affine) 1632 gen3_fs_texld(src_reg, FS_S0, FS_T0); 1633 else 1634 gen3_fs_texldp(src_reg, FS_S0, FS_T0); 1635 break; 1636 1637 case SHADER_NONE: 1638 case SHADER_WHITE: 1639 case SHADER_BLACK: 1640 case SHADER_ZERO: 1641 assert(0); 1642 case SHADER_CONSTANT: 1643 break; 1644 } 1645 1646 if (src_reg != FS_OC) { 1647 if (src->alpha_fixup) 1648 gen3_fs_mov(FS_OC, 1649 src->rb_reversed ^ op->rb_reversed ? 1650 gen3_fs_operand(src_reg, Z, Y, X, ONE) : 1651 gen3_fs_operand(src_reg, X, Y, Z, ONE)); 1652 else if (dst_is_alpha) 1653 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W)); 1654 else if (src->rb_reversed ^ op->rb_reversed) 1655 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W)); 1656 else 1657 gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg)); 1658 } else if (src->alpha_fixup) 1659 gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one()); 1660 } else { 1661 int out_reg = FS_OC; 1662 if (op->rb_reversed) 1663 out_reg = FS_U0; 1664 1665 switch (mask->u.gen3.type) { 1666 case SHADER_CONSTANT: 1667 gen3_fs_dcl(FS_T9); 1668 mask_reg = FS_T9; 1669 break; 1670 case SHADER_TEXTURE: 1671 case SHADER_LINEAR: 1672 case SHADER_RADIAL: 1673 gen3_fs_dcl(FS_S0 + t); 1674 /* fall through */ 1675 case SHADER_OPACITY: 1676 gen3_fs_dcl(FS_T0 + t); 1677 break; 1678 case SHADER_ZERO: 1679 case SHADER_BLACK: 1680 assert(0); 1681 case SHADER_NONE: 1682 case SHADER_WHITE: 1683 break; 1684 } 1685 1686 t = 0; 1687 switch (src->u.gen3.type) { 1688 case SHADER_LINEAR: 1689 gen3_linear_coord(sna, src, FS_T0, FS_R0); 1690 gen3_fs_texld(FS_R0, FS_S0, FS_R0); 1691 src_reg = FS_R0; 1692 t++; 1693 break; 1694 1695 case SHADER_RADIAL: 1696 gen3_radial_coord(sna, src, FS_T0, FS_R0); 1697 gen3_fs_texld(FS_R0, FS_S0, FS_R0); 1698 src_reg = FS_R0; 1699 t++; 1700 break; 1701 1702 case SHADER_TEXTURE: 1703 if (src->is_affine) 1704 gen3_fs_texld(FS_R0, FS_S0, FS_T0); 1705 else 1706 gen3_fs_texldp(FS_R0, FS_S0, FS_T0); 1707 src_reg = FS_R0; 1708 t++; 1709 break; 1710 1711 case SHADER_CONSTANT: 1712 case SHADER_NONE: 1713 case SHADER_ZERO: 1714 case SHADER_BLACK: 1715 case SHADER_WHITE: 1716 break; 1717 } 1718 if (src->alpha_fixup) 1719 gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one()); 1720 if (src->rb_reversed) 1721 gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W)); 1722 1723 switch (mask->u.gen3.type) { 1724 case SHADER_LINEAR: 1725 gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1); 1726 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); 1727 mask_reg = FS_R1; 1728 break; 1729 1730 case SHADER_RADIAL: 1731 gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1); 1732 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); 1733 mask_reg = FS_R1; 1734 break; 1735 1736 case SHADER_TEXTURE: 1737 if (mask->is_affine) 1738 gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t); 1739 else 1740 gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t); 1741 mask_reg = FS_R1; 1742 break; 1743 1744 case SHADER_OPACITY: 1745 switch (src->u.gen3.type) { 1746 case SHADER_BLACK: 1747 case SHADER_WHITE: 1748 if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) { 1749 gen3_fs_mov(out_reg, 1750 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1751 } else { 1752 gen3_fs_mov(out_reg, 1753 gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X)); 1754 } 1755 break; 1756 default: 1757 if (dst_is_alpha) { 1758 gen3_fs_mul(out_reg, 1759 gen3_fs_operand(src_reg, W, W, W, W), 1760 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1761 } else { 1762 gen3_fs_mul(out_reg, 1763 gen3_fs_operand(src_reg, X, Y, Z, W), 1764 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1765 } 1766 } 1767 goto mask_done; 1768 1769 case SHADER_CONSTANT: 1770 case SHADER_ZERO: 1771 case SHADER_BLACK: 1772 case SHADER_WHITE: 1773 case SHADER_NONE: 1774 break; 1775 } 1776 if (mask->alpha_fixup) 1777 gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one()); 1778 if (mask->rb_reversed) 1779 gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W)); 1780 1781 if (dst_is_alpha) { 1782 switch (src->u.gen3.type) { 1783 case SHADER_BLACK: 1784 case SHADER_WHITE: 1785 gen3_fs_mov(out_reg, 1786 gen3_fs_operand(mask_reg, W, W, W, W)); 1787 break; 1788 default: 1789 gen3_fs_mul(out_reg, 1790 gen3_fs_operand(src_reg, W, W, W, W), 1791 gen3_fs_operand(mask_reg, W, W, W, W)); 1792 break; 1793 } 1794 } else { 1795 /* If component alpha is active in the mask and the blend 1796 * operation uses the source alpha, then we know we don't 1797 * need the source value (otherwise we would have hit a 1798 * fallback earlier), so we provide the source alpha (src.A * 1799 * mask.X) as output color. 1800 * Conversely, if CA is set and we don't need the source alpha, 1801 * then we produce the source value (src.X * mask.X) and the 1802 * source alpha is unused. Otherwise, we provide the non-CA 1803 * source value (src.X * mask.A). 1804 */ 1805 if (op->has_component_alpha) { 1806 switch (src->u.gen3.type) { 1807 case SHADER_BLACK: 1808 if (gen3_blend_op[blend].src_alpha) 1809 gen3_fs_mov(out_reg, 1810 gen3_fs_operand_reg(mask_reg)); 1811 else 1812 gen3_fs_mov(out_reg, 1813 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W)); 1814 break; 1815 case SHADER_WHITE: 1816 gen3_fs_mov(out_reg, 1817 gen3_fs_operand_reg(mask_reg)); 1818 break; 1819 default: 1820 if (gen3_blend_op[blend].src_alpha) 1821 gen3_fs_mul(out_reg, 1822 gen3_fs_operand(src_reg, W, W, W, W), 1823 gen3_fs_operand_reg(mask_reg)); 1824 else 1825 gen3_fs_mul(out_reg, 1826 gen3_fs_operand_reg(src_reg), 1827 gen3_fs_operand_reg(mask_reg)); 1828 break; 1829 } 1830 } else { 1831 switch (src->u.gen3.type) { 1832 case SHADER_WHITE: 1833 gen3_fs_mov(out_reg, 1834 gen3_fs_operand(mask_reg, W, W, W, W)); 1835 break; 1836 case SHADER_BLACK: 1837 gen3_fs_mov(out_reg, 1838 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W)); 1839 break; 1840 default: 1841 gen3_fs_mul(out_reg, 1842 gen3_fs_operand_reg(src_reg), 1843 gen3_fs_operand(mask_reg, W, W, W, W)); 1844 break; 1845 } 1846 } 1847 } 1848mask_done: 1849 if (op->rb_reversed) 1850 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W)); 1851 } 1852 1853done: 1854 length = sna->kgem.nbatch - shader_offset; 1855 sna->kgem.batch[shader_offset] = 1856 _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2); 1857} 1858 1859static uint32_t gen3_ms_tiling(uint32_t tiling) 1860{ 1861 uint32_t v = 0; 1862 switch (tiling) { 1863 case I915_TILING_Y: v |= MS3_TILE_WALK; 1864 case I915_TILING_X: v |= MS3_TILED_SURFACE; 1865 case I915_TILING_NONE: break; 1866 } 1867 return v; 1868} 1869 1870static void gen3_emit_invariant(struct sna *sna) 1871{ 1872 /* Disable independent alpha blend */ 1873 OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE | 1874 IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT | 1875 IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT | 1876 IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT); 1877 1878 OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | 1879 CSB_TCB(0, 0) | 1880 CSB_TCB(1, 1) | 1881 CSB_TCB(2, 2) | 1882 CSB_TCB(3, 3) | 1883 CSB_TCB(4, 4) | 1884 CSB_TCB(5, 5) | 1885 CSB_TCB(6, 6) | 1886 CSB_TCB(7, 7)); 1887 1888 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3); 1889 OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */ 1890 OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | 1891 S4_LINE_WIDTH_ONE | 1892 S4_CULLMODE_NONE | 1893 S4_VFMT_XY); 1894 OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */ 1895 OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */ 1896 1897 OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 1898 OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); 1899 1900 OUT_BATCH(_3DSTATE_LOAD_INDIRECT); 1901 OUT_BATCH(0x00000000); 1902 1903 OUT_BATCH(_3DSTATE_STIPPLE); 1904 OUT_BATCH(0x00000000); 1905 1906 sna->render_state.gen3.need_invariant = false; 1907} 1908 1909#define MAX_OBJECTS 3 /* worst case: dst + src + mask */ 1910 1911static void 1912gen3_get_batch(struct sna *sna, const struct sna_composite_op *op) 1913{ 1914 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1915 1916 if (!kgem_check_batch(&sna->kgem, 200)) { 1917 DBG(("%s: flushing batch: size %d > %d\n", 1918 __FUNCTION__, 200, 1919 sna->kgem.surface-sna->kgem.nbatch)); 1920 kgem_submit(&sna->kgem); 1921 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1922 } 1923 1924 if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) { 1925 DBG(("%s: flushing batch: reloc %d >= %d\n", 1926 __FUNCTION__, 1927 sna->kgem.nreloc, 1928 (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS)); 1929 kgem_submit(&sna->kgem); 1930 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1931 } 1932 1933 if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) { 1934 DBG(("%s: flushing batch: exec %d >= %d\n", 1935 __FUNCTION__, 1936 sna->kgem.nexec, 1937 (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1)); 1938 kgem_submit(&sna->kgem); 1939 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1940 } 1941 1942 if (sna->render_state.gen3.need_invariant) 1943 gen3_emit_invariant(sna); 1944#undef MAX_OBJECTS 1945} 1946 1947static void gen3_emit_target(struct sna *sna, 1948 struct kgem_bo *bo, 1949 int width, 1950 int height, 1951 int format) 1952{ 1953 struct gen3_render_state *state = &sna->render_state.gen3; 1954 1955 assert(!too_large(width, height)); 1956 1957 /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */ 1958 assert(bo->unique_id != 0); 1959 if (bo->unique_id != state->current_dst) { 1960 uint32_t v; 1961 1962 DBG(("%s: setting new target id=%d, handle=%d\n", 1963 __FUNCTION__, bo->unique_id, bo->handle)); 1964 1965 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); 1966 OUT_BATCH(BUF_3D_ID_COLOR_BACK | 1967 gen3_buf_tiling(bo->tiling) | 1968 bo->pitch); 1969 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 1970 bo, 1971 I915_GEM_DOMAIN_RENDER << 16 | 1972 I915_GEM_DOMAIN_RENDER, 1973 0)); 1974 1975 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); 1976 OUT_BATCH(gen3_get_dst_format(format)); 1977 1978 v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1); 1979 if (v != state->last_drawrect_limit) { 1980 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); 1981 OUT_BATCH(0); /* XXX dither origin? */ 1982 OUT_BATCH(0); 1983 OUT_BATCH(v); 1984 OUT_BATCH(0); 1985 state->last_drawrect_limit = v; 1986 } 1987 1988 state->current_dst = bo->unique_id; 1989 } 1990 assert(bo->exec); 1991 kgem_bo_mark_dirty(bo); 1992} 1993 1994static void gen3_emit_composite_state(struct sna *sna, 1995 const struct sna_composite_op *op) 1996{ 1997 struct gen3_render_state *state = &sna->render_state.gen3; 1998 uint32_t map[4]; 1999 uint32_t sampler[4]; 2000 struct kgem_bo *bo[2]; 2001 unsigned int tex_count, n; 2002 uint32_t ss2; 2003 2004 gen3_get_batch(sna, op); 2005 2006 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 2007 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 2008 OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 2009 else 2010 OUT_BATCH(_3DSTATE_MODES_5_CMD | 2011 PIPELINE_FLUSH_RENDER_CACHE | 2012 PIPELINE_FLUSH_TEXTURE_CACHE); 2013 kgem_clear_dirty(&sna->kgem); 2014 } 2015 2016 gen3_emit_target(sna, 2017 op->dst.bo, 2018 op->dst.width, 2019 op->dst.height, 2020 op->dst.format); 2021 2022 ss2 = ~0; 2023 tex_count = 0; 2024 switch (op->src.u.gen3.type) { 2025 case SHADER_OPACITY: 2026 case SHADER_NONE: 2027 assert(0); 2028 case SHADER_ZERO: 2029 case SHADER_BLACK: 2030 case SHADER_WHITE: 2031 break; 2032 case SHADER_CONSTANT: 2033 if (op->src.u.gen3.mode != state->last_diffuse) { 2034 OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2035 OUT_BATCH(op->src.u.gen3.mode); 2036 state->last_diffuse = op->src.u.gen3.mode; 2037 } 2038 break; 2039 case SHADER_LINEAR: 2040 case SHADER_RADIAL: 2041 case SHADER_TEXTURE: 2042 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2043 ss2 |= S2_TEXCOORD_FMT(tex_count, 2044 op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); 2045 assert(op->src.card_format); 2046 map[tex_count * 2 + 0] = 2047 op->src.card_format | 2048 gen3_ms_tiling(op->src.bo->tiling) | 2049 (op->src.height - 1) << MS3_HEIGHT_SHIFT | 2050 (op->src.width - 1) << MS3_WIDTH_SHIFT; 2051 map[tex_count * 2 + 1] = 2052 (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; 2053 2054 sampler[tex_count * 2 + 0] = op->src.filter; 2055 sampler[tex_count * 2 + 1] = 2056 op->src.repeat | 2057 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; 2058 bo[tex_count] = op->src.bo; 2059 tex_count++; 2060 break; 2061 } 2062 switch (op->mask.u.gen3.type) { 2063 case SHADER_NONE: 2064 case SHADER_ZERO: 2065 case SHADER_BLACK: 2066 case SHADER_WHITE: 2067 break; 2068 case SHADER_CONSTANT: 2069 if (op->mask.u.gen3.mode != state->last_specular) { 2070 OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); 2071 OUT_BATCH(op->mask.u.gen3.mode); 2072 state->last_specular = op->mask.u.gen3.mode; 2073 } 2074 break; 2075 case SHADER_LINEAR: 2076 case SHADER_RADIAL: 2077 case SHADER_TEXTURE: 2078 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2079 ss2 |= S2_TEXCOORD_FMT(tex_count, 2080 op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); 2081 assert(op->mask.card_format); 2082 map[tex_count * 2 + 0] = 2083 op->mask.card_format | 2084 gen3_ms_tiling(op->mask.bo->tiling) | 2085 (op->mask.height - 1) << MS3_HEIGHT_SHIFT | 2086 (op->mask.width - 1) << MS3_WIDTH_SHIFT; 2087 map[tex_count * 2 + 1] = 2088 (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; 2089 2090 sampler[tex_count * 2 + 0] = op->mask.filter; 2091 sampler[tex_count * 2 + 1] = 2092 op->mask.repeat | 2093 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; 2094 bo[tex_count] = op->mask.bo; 2095 tex_count++; 2096 break; 2097 case SHADER_OPACITY: 2098 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2099 ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D); 2100 break; 2101 } 2102 2103 { 2104 uint32_t blend_offset = sna->kgem.nbatch; 2105 2106 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); 2107 OUT_BATCH(ss2); 2108 OUT_BATCH(gen3_get_blend_cntl(op->op, 2109 op->has_component_alpha, 2110 op->dst.format)); 2111 2112 if (memcmp(sna->kgem.batch + state->last_blend + 1, 2113 sna->kgem.batch + blend_offset + 1, 2114 2 * 4) == 0) 2115 sna->kgem.nbatch = blend_offset; 2116 else 2117 state->last_blend = blend_offset; 2118 } 2119 2120 if (op->u.gen3.num_constants) { 2121 int count = op->u.gen3.num_constants; 2122 if (state->last_constants) { 2123 int last = sna->kgem.batch[state->last_constants+1]; 2124 if (last == (1 << (count >> 2)) - 1 && 2125 memcmp(&sna->kgem.batch[state->last_constants+2], 2126 op->u.gen3.constants, 2127 count * sizeof(uint32_t)) == 0) 2128 count = 0; 2129 } 2130 if (count) { 2131 state->last_constants = sna->kgem.nbatch; 2132 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count); 2133 OUT_BATCH((1 << (count >> 2)) - 1); 2134 2135 memcpy(sna->kgem.batch + sna->kgem.nbatch, 2136 op->u.gen3.constants, 2137 count * sizeof(uint32_t)); 2138 sna->kgem.nbatch += count; 2139 } 2140 } 2141 2142 if (tex_count != 0) { 2143 uint32_t rewind; 2144 2145 n = 0; 2146 if (tex_count == state->tex_count) { 2147 for (; n < tex_count; n++) { 2148 if (map[2*n+0] != state->tex_map[2*n+0] || 2149 map[2*n+1] != state->tex_map[2*n+1] || 2150 state->tex_handle[n] != bo[n]->handle || 2151 state->tex_delta[n] != bo[n]->delta) 2152 break; 2153 } 2154 } 2155 if (n < tex_count) { 2156 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); 2157 OUT_BATCH((1 << tex_count) - 1); 2158 for (n = 0; n < tex_count; n++) { 2159 OUT_BATCH(kgem_add_reloc(&sna->kgem, 2160 sna->kgem.nbatch, 2161 bo[n], 2162 I915_GEM_DOMAIN_SAMPLER<< 16, 2163 0)); 2164 OUT_BATCH(map[2*n + 0]); 2165 OUT_BATCH(map[2*n + 1]); 2166 2167 state->tex_map[2*n+0] = map[2*n+0]; 2168 state->tex_map[2*n+1] = map[2*n+1]; 2169 state->tex_handle[n] = bo[n]->handle; 2170 state->tex_delta[n] = bo[n]->delta; 2171 } 2172 state->tex_count = n; 2173 } 2174 2175 rewind = sna->kgem.nbatch; 2176 OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); 2177 OUT_BATCH((1 << tex_count) - 1); 2178 for (n = 0; n < tex_count; n++) { 2179 OUT_BATCH(sampler[2*n + 0]); 2180 OUT_BATCH(sampler[2*n + 1]); 2181 OUT_BATCH(0); 2182 } 2183 if (state->last_sampler && 2184 memcmp(&sna->kgem.batch[state->last_sampler+1], 2185 &sna->kgem.batch[rewind + 1], 2186 (3*tex_count + 1)*sizeof(uint32_t)) == 0) 2187 sna->kgem.nbatch = rewind; 2188 else 2189 state->last_sampler = rewind; 2190 } 2191 2192 gen3_composite_emit_shader(sna, op, op->op); 2193} 2194 2195static bool gen3_magic_ca_pass(struct sna *sna, 2196 const struct sna_composite_op *op) 2197{ 2198 if (!op->need_magic_ca_pass) 2199 return false; 2200 2201 DBG(("%s(%d)\n", __FUNCTION__, 2202 sna->render.vertex_index - sna->render.vertex_start)); 2203 2204 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); 2205 OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format)); 2206 gen3_composite_emit_shader(sna, op, PictOpAdd); 2207 2208 OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | 2209 (sna->render.vertex_index - sna->render.vertex_start)); 2210 OUT_BATCH(sna->render.vertex_start); 2211 2212 sna->render_state.gen3.last_blend = 0; 2213 return true; 2214} 2215 2216static void gen3_vertex_flush(struct sna *sna) 2217{ 2218 assert(sna->render.vertex_offset); 2219 2220 DBG(("%s[%x] = %d\n", __FUNCTION__, 2221 4*sna->render.vertex_offset, 2222 sna->render.vertex_index - sna->render.vertex_start)); 2223 2224 sna->kgem.batch[sna->render.vertex_offset] = 2225 PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | 2226 (sna->render.vertex_index - sna->render.vertex_start); 2227 sna->kgem.batch[sna->render.vertex_offset + 1] = 2228 sna->render.vertex_start; 2229 2230 sna->render.vertex_offset = 0; 2231} 2232 2233static int gen3_vertex_finish(struct sna *sna) 2234{ 2235 struct kgem_bo *bo; 2236 2237 DBG(("%s: used=%d/%d, vbo active? %d\n", 2238 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, 2239 sna->render.vbo ? sna->render.vbo->handle : 0)); 2240 assert(sna->render.vertex_offset == 0); 2241 assert(sna->render.vertex_used); 2242 assert(sna->render.vertex_used <= sna->render.vertex_size); 2243 2244 sna_vertex_wait__locked(&sna->render); 2245 2246 bo = sna->render.vbo; 2247 if (bo) { 2248 DBG(("%s: reloc = %d\n", __FUNCTION__, 2249 sna->render.vertex_reloc[0])); 2250 2251 if (sna->render.vertex_reloc[0]) { 2252 sna->kgem.batch[sna->render.vertex_reloc[0]] = 2253 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], 2254 bo, I915_GEM_DOMAIN_VERTEX << 16, 0); 2255 2256 sna->render.vertex_reloc[0] = 0; 2257 } 2258 sna->render.vertex_used = 0; 2259 sna->render.vertex_index = 0; 2260 sna->render.vbo = NULL; 2261 2262 kgem_bo_destroy(&sna->kgem, bo); 2263 } 2264 2265 sna->render.vertices = NULL; 2266 sna->render.vbo = kgem_create_linear(&sna->kgem, 2267 256*1024, CREATE_GTT_MAP); 2268 if (sna->render.vbo) 2269 sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); 2270 if (sna->render.vertices == NULL) { 2271 if (sna->render.vbo) 2272 kgem_bo_destroy(&sna->kgem, sna->render.vbo); 2273 sna->render.vbo = NULL; 2274 return 0; 2275 } 2276 assert(sna->render.vbo->snoop == false); 2277 2278 if (sna->render.vertex_used) { 2279 memcpy(sna->render.vertices, 2280 sna->render.vertex_data, 2281 sizeof(float)*sna->render.vertex_used); 2282 } 2283 sna->render.vertex_size = 64 * 1024 - 1; 2284 return sna->render.vertex_size - sna->render.vertex_used; 2285} 2286 2287static void gen3_vertex_close(struct sna *sna) 2288{ 2289 struct kgem_bo *bo, *free_bo = NULL; 2290 unsigned int delta = 0; 2291 2292 assert(sna->render.vertex_offset == 0); 2293 if (sna->render.vertex_reloc[0] == 0) 2294 return; 2295 2296 DBG(("%s: used=%d/%d, vbo active? %d\n", 2297 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, 2298 sna->render.vbo ? sna->render.vbo->handle : 0)); 2299 2300 bo = sna->render.vbo; 2301 if (bo) { 2302 if (sna->render.vertex_size - sna->render.vertex_used < 64) { 2303 DBG(("%s: discarding full vbo\n", __FUNCTION__)); 2304 sna->render.vbo = NULL; 2305 sna->render.vertices = sna->render.vertex_data; 2306 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2307 free_bo = bo; 2308 } else if (sna->render.vertices == MAP(bo->map__cpu)) { 2309 DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); 2310 sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo); 2311 if (sna->render.vertices == NULL) { 2312 DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__)); 2313 sna->render.vbo = NULL; 2314 sna->render.vertices = sna->render.vertex_data; 2315 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2316 free_bo = bo; 2317 } 2318 } 2319 } else { 2320 if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { 2321 DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, 2322 sna->render.vertex_used, sna->kgem.nbatch)); 2323 memcpy(sna->kgem.batch + sna->kgem.nbatch, 2324 sna->render.vertex_data, 2325 sna->render.vertex_used * 4); 2326 delta = sna->kgem.nbatch * 4; 2327 bo = NULL; 2328 sna->kgem.nbatch += sna->render.vertex_used; 2329 } else { 2330 DBG(("%s: new vbo: %d\n", __FUNCTION__, 2331 sna->render.vertex_used)); 2332 bo = kgem_create_linear(&sna->kgem, 2333 4*sna->render.vertex_used, 2334 CREATE_NO_THROTTLE); 2335 if (bo) { 2336 assert(bo->snoop == false); 2337 kgem_bo_write(&sna->kgem, bo, 2338 sna->render.vertex_data, 2339 4*sna->render.vertex_used); 2340 } 2341 free_bo = bo; 2342 } 2343 } 2344 2345 DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); 2346 sna->kgem.batch[sna->render.vertex_reloc[0]] = 2347 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], 2348 bo, I915_GEM_DOMAIN_VERTEX << 16, delta); 2349 sna->render.vertex_reloc[0] = 0; 2350 2351 if (sna->render.vbo == NULL) { 2352 DBG(("%s: resetting vbo\n", __FUNCTION__)); 2353 sna->render.vertex_used = 0; 2354 sna->render.vertex_index = 0; 2355 assert(sna->render.vertices == sna->render.vertex_data); 2356 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 2357 } 2358 2359 if (free_bo) 2360 kgem_bo_destroy(&sna->kgem, free_bo); 2361} 2362 2363static bool gen3_rectangle_begin(struct sna *sna, 2364 const struct sna_composite_op *op) 2365{ 2366 struct gen3_render_state *state = &sna->render_state.gen3; 2367 int ndwords, i1_cmd = 0, i1_len = 0; 2368 2369 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 2370 return true; 2371 2372 ndwords = 2; 2373 if (op->need_magic_ca_pass) 2374 ndwords += 100; 2375 if (sna->render.vertex_reloc[0] == 0) 2376 i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++; 2377 if (state->floats_per_vertex != op->floats_per_vertex) 2378 i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++; 2379 2380 if (!kgem_check_batch(&sna->kgem, ndwords+1)) 2381 return false; 2382 2383 if (i1_cmd) { 2384 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1)); 2385 if (sna->render.vertex_reloc[0] == 0) 2386 sna->render.vertex_reloc[0] = sna->kgem.nbatch++; 2387 if (state->floats_per_vertex != op->floats_per_vertex) { 2388 state->floats_per_vertex = op->floats_per_vertex; 2389 OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT | 2390 state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT); 2391 } 2392 } 2393 2394 if (sna->kgem.nbatch == 2 + state->last_vertex_offset && 2395 !op->need_magic_ca_pass) { 2396 sna->render.vertex_offset = state->last_vertex_offset; 2397 } else { 2398 sna->render.vertex_offset = sna->kgem.nbatch; 2399 OUT_BATCH(MI_NOOP); /* to be filled later */ 2400 OUT_BATCH(MI_NOOP); 2401 sna->render.vertex_start = sna->render.vertex_index; 2402 state->last_vertex_offset = sna->render.vertex_offset; 2403 } 2404 2405 return true; 2406} 2407 2408static int gen3_get_rectangles__flush(struct sna *sna, 2409 const struct sna_composite_op *op) 2410{ 2411 /* Preventing discarding new vbo after lock contention */ 2412 if (sna_vertex_wait__locked(&sna->render)) { 2413 int rem = vertex_space(sna); 2414 if (rem > op->floats_per_rect) 2415 return rem; 2416 } 2417 2418 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5)) 2419 return 0; 2420 if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) 2421 return 0; 2422 2423 if (sna->render.vertex_offset) { 2424 gen3_vertex_flush(sna); 2425 if (gen3_magic_ca_pass(sna, op)) { 2426 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); 2427 OUT_BATCH(gen3_get_blend_cntl(op->op, 2428 op->has_component_alpha, 2429 op->dst.format)); 2430 gen3_composite_emit_shader(sna, op, op->op); 2431 } 2432 } 2433 2434 return gen3_vertex_finish(sna); 2435} 2436 2437inline static int gen3_get_rectangles(struct sna *sna, 2438 const struct sna_composite_op *op, 2439 int want) 2440{ 2441 int rem; 2442 2443 DBG(("%s: want=%d, rem=%d\n", 2444 __FUNCTION__, want*op->floats_per_rect, vertex_space(sna))); 2445 2446 assert(want); 2447 assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used); 2448 2449start: 2450 rem = vertex_space(sna); 2451 if (unlikely(op->floats_per_rect > rem)) { 2452 DBG(("flushing vbo for %s: %d < %d\n", 2453 __FUNCTION__, rem, op->floats_per_rect)); 2454 rem = gen3_get_rectangles__flush(sna, op); 2455 if (unlikely(rem == 0)) 2456 goto flush; 2457 } 2458 2459 if (unlikely(sna->render.vertex_offset == 0)) { 2460 if (!gen3_rectangle_begin(sna, op)) 2461 goto flush; 2462 else 2463 goto start; 2464 } 2465 2466 assert(rem <= vertex_space(sna)); 2467 assert(op->floats_per_rect <= rem); 2468 if (want > 1 && want * op->floats_per_rect > rem) 2469 want = rem / op->floats_per_rect; 2470 sna->render.vertex_index += 3*want; 2471 2472 assert(want); 2473 assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size); 2474 return want; 2475 2476flush: 2477 DBG(("%s: flushing batch\n", __FUNCTION__)); 2478 if (sna->render.vertex_offset) { 2479 gen3_vertex_flush(sna); 2480 gen3_magic_ca_pass(sna, op); 2481 } 2482 sna_vertex_wait__locked(&sna->render); 2483 _kgem_submit(&sna->kgem); 2484 gen3_emit_composite_state(sna, op); 2485 assert(sna->render.vertex_offset == 0); 2486 assert(sna->render.vertex_reloc[0] == 0); 2487 goto start; 2488} 2489 2490fastcall static void 2491gen3_render_composite_blt(struct sna *sna, 2492 const struct sna_composite_op *op, 2493 const struct sna_composite_rectangles *r) 2494{ 2495 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__, 2496 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], 2497 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], 2498 r->dst.x, r->dst.y, op->dst.x, op->dst.y, 2499 r->width, r->height)); 2500 2501 gen3_get_rectangles(sna, op, 1); 2502 2503 op->prim_emit(sna, op, r); 2504} 2505 2506fastcall static void 2507gen3_render_composite_box(struct sna *sna, 2508 const struct sna_composite_op *op, 2509 const BoxRec *box) 2510{ 2511 struct sna_composite_rectangles r; 2512 2513 DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", 2514 __FUNCTION__, 2515 op->src.offset[0], op->src.offset[1], 2516 op->mask.offset[0], op->mask.offset[1], 2517 op->dst.x, op->dst.y)); 2518 2519 gen3_get_rectangles(sna, op, 1); 2520 2521 r.dst.x = box->x1; 2522 r.dst.y = box->y1; 2523 r.width = box->x2 - box->x1; 2524 r.height = box->y2 - box->y1; 2525 r.src = r.mask = r.dst; 2526 2527 op->prim_emit(sna, op, &r); 2528} 2529 2530static void 2531gen3_render_composite_boxes__blt(struct sna *sna, 2532 const struct sna_composite_op *op, 2533 const BoxRec *box, int nbox) 2534{ 2535 DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", 2536 __FUNCTION__, nbox, 2537 op->src.offset[0], op->src.offset[1], 2538 op->mask.offset[0], op->mask.offset[1], 2539 op->dst.x, op->dst.y)); 2540 2541 do { 2542 int nbox_this_time; 2543 2544 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2545 nbox -= nbox_this_time; 2546 2547 do { 2548 struct sna_composite_rectangles r; 2549 2550 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2551 box->x1, box->y1, 2552 box->x2 - box->x1, 2553 box->y2 - box->y1)); 2554 2555 r.dst.x = box->x1; r.dst.y = box->y1; 2556 r.width = box->x2 - box->x1; 2557 r.height = box->y2 - box->y1; 2558 r.src = r.mask = r.dst; 2559 2560 op->prim_emit(sna, op, &r); 2561 box++; 2562 } while (--nbox_this_time); 2563 } while (nbox); 2564} 2565 2566static void 2567gen3_render_composite_boxes(struct sna *sna, 2568 const struct sna_composite_op *op, 2569 const BoxRec *box, int nbox) 2570{ 2571 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 2572 2573 do { 2574 int nbox_this_time; 2575 float *v; 2576 2577 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2578 assert(nbox_this_time); 2579 nbox -= nbox_this_time; 2580 2581 v = sna->render.vertices + sna->render.vertex_used; 2582 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 2583 2584 op->emit_boxes(op, box, nbox_this_time, v); 2585 box += nbox_this_time; 2586 } while (nbox); 2587} 2588 2589static void 2590gen3_render_composite_boxes__thread(struct sna *sna, 2591 const struct sna_composite_op *op, 2592 const BoxRec *box, int nbox) 2593{ 2594 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 2595 2596 sna_vertex_lock(&sna->render); 2597 do { 2598 int nbox_this_time; 2599 float *v; 2600 2601 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2602 assert(nbox_this_time); 2603 nbox -= nbox_this_time; 2604 2605 v = sna->render.vertices + sna->render.vertex_used; 2606 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 2607 2608 sna_vertex_acquire__locked(&sna->render); 2609 sna_vertex_unlock(&sna->render); 2610 2611 op->emit_boxes(op, box, nbox_this_time, v); 2612 box += nbox_this_time; 2613 2614 sna_vertex_lock(&sna->render); 2615 sna_vertex_release__locked(&sna->render); 2616 } while (nbox); 2617 sna_vertex_unlock(&sna->render); 2618} 2619 2620static void 2621gen3_render_composite_done(struct sna *sna, 2622 const struct sna_composite_op *op) 2623{ 2624 DBG(("%s()\n", __FUNCTION__)); 2625 2626 if (sna->render.vertex_offset) { 2627 gen3_vertex_flush(sna); 2628 gen3_magic_ca_pass(sna, op); 2629 } 2630 2631 if (op->mask.bo) 2632 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2633 if (op->src.bo) 2634 kgem_bo_destroy(&sna->kgem, op->src.bo); 2635 2636 sna_render_composite_redirect_done(sna, op); 2637} 2638 2639static void 2640discard_vbo(struct sna *sna) 2641{ 2642 kgem_bo_destroy(&sna->kgem, sna->render.vbo); 2643 sna->render.vbo = NULL; 2644 sna->render.vertices = sna->render.vertex_data; 2645 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2646 sna->render.vertex_used = 0; 2647 sna->render.vertex_index = 0; 2648} 2649 2650static void 2651gen3_render_reset(struct sna *sna) 2652{ 2653 struct gen3_render_state *state = &sna->render_state.gen3; 2654 2655 state->need_invariant = true; 2656 state->current_dst = 0; 2657 state->tex_count = 0; 2658 state->last_drawrect_limit = ~0U; 2659 state->last_target = 0; 2660 state->last_blend = 0; 2661 state->last_constants = 0; 2662 state->last_sampler = 0; 2663 state->last_shader = 0x7fffffff; 2664 state->last_diffuse = 0xcc00ffee; 2665 state->last_specular = 0xcc00ffee; 2666 2667 state->floats_per_vertex = 0; 2668 state->last_floats_per_vertex = 0; 2669 state->last_vertex_offset = 0; 2670 2671 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 2672 DBG(("%s: discarding vbo as next access will stall: %lx\n", 2673 __FUNCTION__, (long)sna->render.vbo->presumed_offset)); 2674 discard_vbo(sna); 2675 } 2676 2677 sna->render.vertex_reloc[0] = 0; 2678 sna->render.vertex_offset = 0; 2679} 2680 2681static void 2682gen3_render_retire(struct kgem *kgem) 2683{ 2684 struct sna *sna; 2685 2686 sna = container_of(kgem, struct sna, kgem); 2687 if (sna->render.vertex_reloc[0] == 0 && 2688 sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { 2689 DBG(("%s: resetting idle vbo\n", __FUNCTION__)); 2690 sna->render.vertex_used = 0; 2691 sna->render.vertex_index = 0; 2692 } 2693} 2694 2695static void 2696gen3_render_expire(struct kgem *kgem) 2697{ 2698 struct sna *sna; 2699 2700 sna = container_of(kgem, struct sna, kgem); 2701 if (sna->render.vbo && !sna->render.vertex_used) { 2702 DBG(("%s: discarding vbo\n", __FUNCTION__)); 2703 discard_vbo(sna); 2704 } 2705} 2706 2707static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel, 2708 CARD32 format) 2709{ 2710 unsigned int i; 2711 2712 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 2713 if (gen3_tex_formats[i].fmt == format) { 2714 channel->card_format = gen3_tex_formats[i].card_fmt; 2715 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 2716 return true; 2717 } 2718 } 2719 return false; 2720} 2721 2722static bool source_is_covered(PicturePtr picture, 2723 int x, int y, 2724 int width, int height) 2725{ 2726 int x1, y1, x2, y2; 2727 2728 if (picture->repeat && picture->repeatType != RepeatNone) 2729 return true; 2730 2731 if (picture->pDrawable == NULL) 2732 return false; 2733 2734 if (picture->transform) { 2735 pixman_box16_t sample; 2736 2737 sample.x1 = x; 2738 sample.y1 = y; 2739 sample.x2 = x + width; 2740 sample.y2 = y + height; 2741 2742 pixman_transform_bounds(picture->transform, &sample); 2743 2744 x1 = sample.x1; 2745 x2 = sample.x2; 2746 y1 = sample.y1; 2747 y2 = sample.y2; 2748 } else { 2749 x1 = x; 2750 y1 = y; 2751 x2 = x + width; 2752 y2 = y + height; 2753 } 2754 2755 return 2756 x1 >= 0 && y1 >= 0 && 2757 x2 <= picture->pDrawable->width && 2758 y2 <= picture->pDrawable->height; 2759} 2760 2761static bool gen3_composite_channel_set_xformat(PicturePtr picture, 2762 struct sna_composite_channel *channel, 2763 int x, int y, 2764 int width, int height) 2765{ 2766 unsigned int i; 2767 2768 if (PICT_FORMAT_A(picture->format) != 0) 2769 return false; 2770 2771 if (width == 0 || height == 0) 2772 return false; 2773 2774 if (!source_is_covered(picture, x, y, width, height)) 2775 return false; 2776 2777 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 2778 if (gen3_tex_formats[i].xfmt == picture->format) { 2779 channel->card_format = gen3_tex_formats[i].card_fmt; 2780 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 2781 channel->alpha_fixup = true; 2782 return true; 2783 } 2784 } 2785 2786 return false; 2787} 2788 2789static int 2790gen3_init_solid(struct sna_composite_channel *channel, uint32_t color) 2791{ 2792 channel->u.gen3.mode = color; 2793 channel->u.gen3.type = SHADER_CONSTANT; 2794 if (color == 0) 2795 channel->u.gen3.type = SHADER_ZERO; 2796 else if (color == 0xff000000) 2797 channel->u.gen3.type = SHADER_BLACK; 2798 else if (color == 0xffffffff) 2799 channel->u.gen3.type = SHADER_WHITE; 2800 2801 channel->bo = NULL; 2802 channel->is_opaque = (color >> 24) == 0xff; 2803 channel->is_affine = 1; 2804 channel->alpha_fixup = 0; 2805 channel->rb_reversed = 0; 2806 2807 DBG(("%s: color=%08x, is_opaque=%d, type=%d\n", 2808 __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type)); 2809 2810 /* for consistency */ 2811 channel->repeat = RepeatNormal; 2812 channel->filter = PictFilterNearest; 2813 channel->pict_format = PICT_a8r8g8b8; 2814 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; 2815 2816 return 1; 2817} 2818 2819static void gen3_composite_channel_convert(struct sna_composite_channel *channel) 2820{ 2821 if (channel->u.gen3.type == SHADER_TEXTURE) 2822 channel->repeat = gen3_texture_repeat(channel->repeat); 2823 else 2824 channel->repeat = gen3_gradient_repeat(channel->repeat); 2825 2826 channel->filter = gen3_filter(channel->filter); 2827 if (channel->card_format == 0) 2828 gen3_composite_channel_set_format(channel, channel->pict_format); 2829 assert(channel->card_format); 2830} 2831 2832static bool gen3_gradient_setup(struct sna *sna, 2833 PicturePtr picture, 2834 struct sna_composite_channel *channel, 2835 int16_t ox, int16_t oy) 2836{ 2837 int16_t dx, dy; 2838 2839 if (picture->repeat == 0) { 2840 channel->repeat = RepeatNone; 2841 } else switch (picture->repeatType) { 2842 case RepeatNone: 2843 case RepeatNormal: 2844 case RepeatPad: 2845 case RepeatReflect: 2846 channel->repeat = picture->repeatType; 2847 break; 2848 default: 2849 return false; 2850 } 2851 2852 channel->bo = 2853 sna_render_get_gradient(sna, 2854 (PictGradient *)picture->pSourcePict); 2855 if (channel->bo == NULL) 2856 return false; 2857 2858 channel->pict_format = PICT_a8r8g8b8; 2859 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; 2860 channel->filter = PictFilterNearest; 2861 channel->is_affine = sna_transform_is_affine(picture->transform); 2862 if (sna_transform_is_imprecise_integer_translation(picture->transform, PictFilterNearest, false, &dx, &dy)) { 2863 DBG(("%s: integer translation (%d, %d), removing\n", 2864 __FUNCTION__, dx, dy)); 2865 ox += dx; 2866 oy += dy; 2867 channel->transform = NULL; 2868 } else 2869 channel->transform = picture->transform; 2870 channel->width = channel->bo->pitch / 4; 2871 channel->height = 1; 2872 channel->offset[0] = ox; 2873 channel->offset[1] = oy; 2874 channel->scale[0] = channel->scale[1] = 1; 2875 return true; 2876} 2877 2878static int 2879gen3_init_linear(struct sna *sna, 2880 PicturePtr picture, 2881 struct sna_composite_op *op, 2882 struct sna_composite_channel *channel, 2883 int ox, int oy) 2884{ 2885 PictLinearGradient *linear = 2886 (PictLinearGradient *)picture->pSourcePict; 2887 float x0, y0, sf; 2888 float dx, dy, offset; 2889 int n; 2890 2891 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 2892 __FUNCTION__, 2893 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 2894 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 2895 2896 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 2897 return 0; 2898 2899 dx = xFixedToDouble(linear->p2.x - linear->p1.x); 2900 dy = xFixedToDouble(linear->p2.y - linear->p1.y); 2901 sf = dx*dx + dy*dy; 2902 dx /= sf; 2903 dy /= sf; 2904 2905 x0 = xFixedToDouble(linear->p1.x); 2906 y0 = xFixedToDouble(linear->p1.y); 2907 offset = dx*x0 + dy*y0; 2908 2909 n = op->u.gen3.num_constants; 2910 channel->u.gen3.constants = FS_C0 + n / 4; 2911 op->u.gen3.constants[n++] = dx; 2912 op->u.gen3.constants[n++] = dy; 2913 op->u.gen3.constants[n++] = -offset; 2914 op->u.gen3.constants[n++] = 0; 2915 2916 if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) 2917 return -1; 2918 2919 channel->u.gen3.type = SHADER_LINEAR; 2920 op->u.gen3.num_constants = n; 2921 2922 DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n", 2923 __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0)); 2924 return 1; 2925} 2926 2927static int 2928gen3_init_radial(struct sna *sna, 2929 PicturePtr picture, 2930 struct sna_composite_op *op, 2931 struct sna_composite_channel *channel, 2932 int ox, int oy) 2933{ 2934 PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict; 2935 double dx, dy, dr, r1; 2936 int n; 2937 2938 dx = xFixedToDouble(radial->c2.x - radial->c1.x); 2939 dy = xFixedToDouble(radial->c2.y - radial->c1.y); 2940 dr = xFixedToDouble(radial->c2.radius - radial->c1.radius); 2941 2942 r1 = xFixedToDouble(radial->c1.radius); 2943 2944 n = op->u.gen3.num_constants; 2945 channel->u.gen3.constants = FS_C0 + n / 4; 2946 if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { 2947 if (radial->c2.radius == radial->c1.radius) { 2948 channel->u.gen3.type = SHADER_ZERO; 2949 return 1; 2950 } 2951 2952 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr; 2953 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr; 2954 op->u.gen3.constants[n++] = 1. / dr; 2955 op->u.gen3.constants[n++] = -r1 / dr; 2956 2957 channel->u.gen3.mode = RADIAL_ONE; 2958 } else { 2959 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x); 2960 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y); 2961 op->u.gen3.constants[n++] = r1; 2962 op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr); 2963 2964 op->u.gen3.constants[n++] = -2 * dx; 2965 op->u.gen3.constants[n++] = -2 * dy; 2966 op->u.gen3.constants[n++] = -2 * r1 * dr; 2967 op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); 2968 2969 channel->u.gen3.mode = RADIAL_TWO; 2970 } 2971 2972 if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) 2973 return -1; 2974 2975 channel->u.gen3.type = SHADER_RADIAL; 2976 op->u.gen3.num_constants = n; 2977 return 1; 2978} 2979 2980static bool 2981sna_picture_is_clear(PicturePtr picture, 2982 int x, int y, int w, int h, 2983 uint32_t *color) 2984{ 2985 struct sna_pixmap *priv; 2986 2987 if (!picture->pDrawable) 2988 return false; 2989 2990 priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable)); 2991 if (priv == NULL || !priv->clear) 2992 return false; 2993 2994 if (!source_is_covered(picture, x, y, w, h)) 2995 return false; 2996 2997 *color = priv->clear_color; 2998 return true; 2999} 3000 3001static int 3002gen3_composite_picture(struct sna *sna, 3003 PicturePtr picture, 3004 struct sna_composite_op *op, 3005 struct sna_composite_channel *channel, 3006 int16_t x, int16_t y, 3007 int16_t w, int16_t h, 3008 int16_t dst_x, int16_t dst_y, 3009 bool precise) 3010{ 3011 PixmapPtr pixmap; 3012 uint32_t color; 3013 int16_t dx, dy; 3014 3015 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 3016 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 3017 3018 channel->card_format = 0; 3019 3020 if (picture->pDrawable == NULL) { 3021 SourcePict *source = picture->pSourcePict; 3022 int ret = -1; 3023 3024 switch (source->type) { 3025 case SourcePictTypeSolidFill: 3026 DBG(("%s: solid fill [%08x], format %08x\n", 3027 __FUNCTION__, 3028 (unsigned)source->solidFill.color, 3029 (unsigned)picture->format)); 3030 ret = gen3_init_solid(channel, source->solidFill.color); 3031 break; 3032 3033 case SourcePictTypeLinear: 3034 ret = gen3_init_linear(sna, picture, op, channel, 3035 x - dst_x, y - dst_y); 3036 break; 3037 3038 case SourcePictTypeRadial: 3039 ret = gen3_init_radial(sna, picture, op, channel, 3040 x - dst_x, y - dst_y); 3041 break; 3042 } 3043 3044 if (ret == -1) { 3045 if (!precise) 3046 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 3047 x, y, w, h, dst_x, dst_y); 3048 if (ret == -1) 3049 ret = sna_render_picture_fixup(sna, picture, channel, 3050 x, y, w, h, dst_x, dst_y); 3051 } 3052 return ret; 3053 } 3054 3055 if (picture->alphaMap) { 3056 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 3057 return sna_render_picture_fixup(sna, picture, channel, 3058 x, y, w, h, dst_x, dst_y); 3059 } 3060 3061 if (sna_picture_is_solid(picture, &color)) { 3062 DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color)); 3063 return gen3_init_solid(channel, color); 3064 } 3065 3066 if (sna_picture_is_clear(picture, x, y, w, h, &color)) { 3067 DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); 3068 return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); 3069 } 3070 3071 if (!gen3_check_repeat(picture)) 3072 return sna_render_picture_fixup(sna, picture, channel, 3073 x, y, w, h, dst_x, dst_y); 3074 3075 if (!gen3_check_filter(picture)) 3076 return sna_render_picture_fixup(sna, picture, channel, 3077 x, y, w, h, dst_x, dst_y); 3078 3079 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 3080 channel->filter = picture->filter; 3081 channel->pict_format = picture->format; 3082 3083 pixmap = get_drawable_pixmap(picture->pDrawable); 3084 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 3085 3086 x += dx + picture->pDrawable->x; 3087 y += dy + picture->pDrawable->y; 3088 3089 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 3090 DBG(("%s: integer translation (%d, %d), removing\n", 3091 __FUNCTION__, dx, dy)); 3092 x += dx; 3093 y += dy; 3094 channel->transform = NULL; 3095 channel->filter = PictFilterNearest; 3096 3097 if (channel->repeat || 3098 (x >= 0 && 3099 y >= 0 && 3100 x + w < pixmap->drawable.width && 3101 y + h < pixmap->drawable.height)) { 3102 struct sna_pixmap *priv = sna_pixmap(pixmap); 3103 if (priv && priv->clear) { 3104 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 3105 return gen3_init_solid(channel, priv->clear_color); 3106 } 3107 } 3108 } else { 3109 channel->transform = picture->transform; 3110 channel->is_affine = sna_transform_is_affine(picture->transform); 3111 } 3112 3113 if (!gen3_composite_channel_set_format(channel, picture->format) && 3114 !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) 3115 return sna_render_picture_convert(sna, picture, channel, pixmap, 3116 x, y, w, h, dst_x, dst_y, 3117 false); 3118 assert(channel->card_format); 3119 3120 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 3121 DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n", 3122 __FUNCTION__, 3123 pixmap->drawable.width, pixmap->drawable.height, 3124 x, y, w, h)); 3125 return sna_render_picture_extract(sna, picture, channel, 3126 x, y, w, h, dst_x, dst_y); 3127 } 3128 3129 return sna_render_pixmap_bo(sna, channel, pixmap, 3130 x, y, w, h, dst_x, dst_y); 3131} 3132 3133static void 3134gen3_align_vertex(struct sna *sna, 3135 const struct sna_composite_op *op) 3136{ 3137 int vertex_index; 3138 3139 if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex) 3140 return; 3141 3142 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 3143 sna->render_state.gen3.last_floats_per_vertex, 3144 op->floats_per_vertex)); 3145 3146 assert(op->floats_per_rect == 3*op->floats_per_vertex); 3147 3148 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 3149 if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { 3150 DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", 3151 __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); 3152 if (gen3_vertex_finish(sna) < 2*op->floats_per_vertex) 3153 kgem_submit(&sna->kgem); 3154 3155 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 3156 } 3157 3158 sna->render.vertex_index = vertex_index; 3159 sna->render.vertex_used = vertex_index * op->floats_per_vertex; 3160} 3161 3162static bool 3163gen3_composite_set_target(struct sna *sna, 3164 struct sna_composite_op *op, 3165 PicturePtr dst, 3166 int x, int y, int w, int h, 3167 bool partial) 3168{ 3169 BoxRec box; 3170 unsigned hint; 3171 3172 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 3173 op->dst.format = dst->format; 3174 op->dst.width = op->dst.pixmap->drawable.width; 3175 op->dst.height = op->dst.pixmap->drawable.height; 3176 3177 if (w && h) { 3178 box.x1 = x; 3179 box.y1 = y; 3180 box.x2 = x + w; 3181 box.y2 = y + h; 3182 } else 3183 sna_render_picture_extents(dst, &box); 3184 3185 hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 3186 if (!partial) { 3187 hint |= IGNORE_DAMAGE; 3188 if (w == op->dst.width && h == op->dst.height) 3189 hint |= REPLACES; 3190 } 3191 3192 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 3193 if (op->dst.bo == NULL) 3194 return false; 3195 3196 if (hint & REPLACES) { 3197 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 3198 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 3199 } 3200 3201 assert(op->dst.bo->unique_id); 3202 3203 /* For single-stream mode there should be no minimum alignment 3204 * required, except that the width must be at least 2 elements. 3205 * Furthermore, it appears that the pitch must be a multiple of 3206 * 2 elements. 3207 */ 3208 if (op->dst.bo->pitch & ((2*op->dst.pixmap->drawable.bitsPerPixel >> 3) - 1)) 3209 return false; 3210 3211 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 3212 &op->dst.x, &op->dst.y); 3213 3214 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 3215 __FUNCTION__, 3216 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 3217 op->dst.width, op->dst.height, 3218 op->dst.bo->pitch, 3219 op->dst.x, op->dst.y, 3220 op->damage ? *op->damage : (void *)-1)); 3221 3222 assert(op->dst.bo->proxy == NULL); 3223 3224 if ((too_large(op->dst.width, op->dst.height) || 3225 !gen3_check_pitch_3d(op->dst.bo)) && 3226 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 3227 return false; 3228 3229 return true; 3230} 3231 3232static inline uint8_t 3233mul_8_8(uint8_t a, uint8_t b) 3234{ 3235 uint16_t t = a * (uint16_t)b + 0x7f; 3236 return ((t >> 8) + t) >> 8; 3237} 3238 3239static inline uint32_t multa(uint32_t s, uint32_t m, int shift) 3240{ 3241 return mul_8_8((s >> shift) & 0xff, m >> 24) << shift; 3242} 3243 3244static inline bool is_constant_ps(uint32_t type) 3245{ 3246 switch (type) { 3247 case SHADER_NONE: /* be warned! */ 3248 case SHADER_ZERO: 3249 case SHADER_BLACK: 3250 case SHADER_WHITE: 3251 case SHADER_CONSTANT: 3252 return true; 3253 default: 3254 return false; 3255 } 3256} 3257 3258static bool 3259has_alphamap(PicturePtr p) 3260{ 3261 return p->alphaMap != NULL; 3262} 3263 3264static bool 3265need_upload(PicturePtr p) 3266{ 3267 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 3268} 3269 3270static bool 3271source_is_busy(PixmapPtr pixmap) 3272{ 3273 struct sna_pixmap *priv = sna_pixmap(pixmap); 3274 if (priv == NULL) 3275 return false; 3276 3277 if (priv->clear) 3278 return false; 3279 3280 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 3281 return true; 3282 3283 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 3284 return true; 3285 3286 return priv->gpu_damage && !priv->cpu_damage; 3287} 3288 3289static bool 3290is_unhandled_gradient(PicturePtr picture, bool precise) 3291{ 3292 if (picture->pDrawable) 3293 return false; 3294 3295 switch (picture->pSourcePict->type) { 3296 case SourcePictTypeSolidFill: 3297 case SourcePictTypeLinear: 3298 case SourcePictTypeRadial: 3299 return false; 3300 default: 3301 return precise; 3302 } 3303} 3304 3305static bool 3306source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 3307{ 3308 if (sna_picture_is_solid(p, NULL)) 3309 return false; 3310 3311 if (is_unhandled_gradient(p, precise)) 3312 return true; 3313 3314 if (!gen3_check_xformat(p) || !gen3_check_repeat(p)) 3315 return true; 3316 3317 if (pixmap && source_is_busy(pixmap)) 3318 return false; 3319 3320 return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p); 3321} 3322 3323static bool 3324gen3_composite_fallback(struct sna *sna, 3325 uint8_t op, 3326 PicturePtr src, 3327 PicturePtr mask, 3328 PicturePtr dst) 3329{ 3330 PixmapPtr src_pixmap; 3331 PixmapPtr mask_pixmap; 3332 PixmapPtr dst_pixmap; 3333 bool src_fallback, mask_fallback; 3334 3335 if (!gen3_check_dst_format(dst->format)) { 3336 DBG(("%s: unknown destination format: %d\n", 3337 __FUNCTION__, dst->format)); 3338 return true; 3339 } 3340 3341 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 3342 3343 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 3344 src_fallback = source_fallback(src, src_pixmap, 3345 dst->polyMode == PolyModePrecise); 3346 3347 if (mask) { 3348 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 3349 mask_fallback = source_fallback(mask, mask_pixmap, 3350 dst->polyMode == PolyModePrecise); 3351 } else { 3352 mask_pixmap = NULL; 3353 mask_fallback = false; 3354 } 3355 3356 /* If we are using the destination as a source and need to 3357 * readback in order to upload the source, do it all 3358 * on the cpu. 3359 */ 3360 if (src_pixmap == dst_pixmap && src_fallback) { 3361 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 3362 return true; 3363 } 3364 if (mask_pixmap == dst_pixmap && mask_fallback) { 3365 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 3366 return true; 3367 } 3368 3369 if (mask && 3370 mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && 3371 gen3_blend_op[op].src_alpha && 3372 gen3_blend_op[op].src_blend != BLENDFACT_ZERO && 3373 op != PictOpOver) { 3374 DBG(("%s: component-alpha mask with op=%d, should fallback\n", 3375 __FUNCTION__, op)); 3376 return true; 3377 } 3378 3379 /* If anything is on the GPU, push everything out to the GPU */ 3380 if (dst_use_gpu(dst_pixmap)) { 3381 DBG(("%s: dst is already on the GPU, try to use GPU\n", 3382 __FUNCTION__)); 3383 return false; 3384 } 3385 3386 if (src_pixmap && !src_fallback) { 3387 DBG(("%s: src is already on the GPU, try to use GPU\n", 3388 __FUNCTION__)); 3389 return false; 3390 } 3391 if (mask_pixmap && !mask_fallback) { 3392 DBG(("%s: mask is already on the GPU, try to use GPU\n", 3393 __FUNCTION__)); 3394 return false; 3395 } 3396 3397 /* However if the dst is not on the GPU and we need to 3398 * render one of the sources using the CPU, we may 3399 * as well do the entire operation in place onthe CPU. 3400 */ 3401 if (src_fallback) { 3402 DBG(("%s: dst is on the CPU and src will fallback\n", 3403 __FUNCTION__)); 3404 return true; 3405 } 3406 3407 if (mask && mask_fallback) { 3408 DBG(("%s: dst is on the CPU and mask will fallback\n", 3409 __FUNCTION__)); 3410 return true; 3411 } 3412 3413 if (too_large(dst_pixmap->drawable.width, 3414 dst_pixmap->drawable.height) && 3415 dst_is_cpu(dst_pixmap)) { 3416 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 3417 return true; 3418 } 3419 3420 DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n", 3421 __FUNCTION__, dst_use_cpu(dst_pixmap))); 3422 return dst_use_cpu(dst_pixmap); 3423} 3424 3425static int 3426reuse_source(struct sna *sna, 3427 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 3428 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 3429{ 3430 if (src_x != msk_x || src_y != msk_y) 3431 return false; 3432 3433 if (mask == src) { 3434 *mc = *sc; 3435 if (mc->bo) 3436 kgem_bo_reference(mc->bo); 3437 return true; 3438 } 3439 3440 if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable)) 3441 return false; 3442 3443 if (sc->is_solid) 3444 return false; 3445 3446 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 3447 3448 if (!sna_transform_equal(src->transform, mask->transform)) 3449 return false; 3450 3451 if (!sna_picture_alphamap_equal(src, mask)) 3452 return false; 3453 3454 if (!gen3_check_repeat(mask)) 3455 return false; 3456 3457 if (!gen3_check_filter(mask)) 3458 return false; 3459 3460 if (!gen3_check_format(mask)) 3461 return false; 3462 3463 DBG(("%s: reusing source channel for mask with a twist\n", 3464 __FUNCTION__)); 3465 3466 *mc = *sc; 3467 mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone); 3468 mc->filter = gen3_filter(mask->filter); 3469 mc->pict_format = mask->format; 3470 gen3_composite_channel_set_format(mc, mask->format); 3471 assert(mc->card_format); 3472 if (mc->bo) 3473 kgem_bo_reference(mc->bo); 3474 return true; 3475} 3476 3477static bool 3478gen3_render_composite(struct sna *sna, 3479 uint8_t op, 3480 PicturePtr src, 3481 PicturePtr mask, 3482 PicturePtr dst, 3483 int16_t src_x, int16_t src_y, 3484 int16_t mask_x, int16_t mask_y, 3485 int16_t dst_x, int16_t dst_y, 3486 int16_t width, int16_t height, 3487 unsigned flags, 3488 struct sna_composite_op *tmp) 3489{ 3490 DBG(("%s()\n", __FUNCTION__)); 3491 3492 if (op >= ARRAY_SIZE(gen3_blend_op)) { 3493 DBG(("%s: fallback due to unhandled blend op: %d\n", 3494 __FUNCTION__, op)); 3495 return false; 3496 } 3497 3498 /* Try to use the BLT engine unless it implies a 3499 * 3D -> 2D context switch. 3500 */ 3501 if (mask == NULL && 3502 sna_blt_composite(sna, 3503 op, src, dst, 3504 src_x, src_y, 3505 dst_x, dst_y, 3506 width, height, 3507 flags, tmp)) 3508 return true; 3509 3510 if (gen3_composite_fallback(sna, op, src, mask, dst)) 3511 goto fallback; 3512 3513 if (need_tiling(sna, width, height)) 3514 return sna_tiling_composite(op, src, mask, dst, 3515 src_x, src_y, 3516 mask_x, mask_y, 3517 dst_x, dst_y, 3518 width, height, 3519 tmp); 3520 3521 if (!gen3_composite_set_target(sna, tmp, dst, 3522 dst_x, dst_y, width, height, 3523 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 3524 DBG(("%s: unable to set render target\n", 3525 __FUNCTION__)); 3526 goto fallback; 3527 } 3528 3529 tmp->op = op; 3530 tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format); 3531 tmp->u.gen3.num_constants = 0; 3532 tmp->src.u.gen3.type = SHADER_TEXTURE; 3533 tmp->src.is_affine = true; 3534 DBG(("%s: preparing source\n", __FUNCTION__)); 3535 switch (gen3_composite_picture(sna, src, tmp, &tmp->src, 3536 src_x, src_y, 3537 width, height, 3538 dst_x, dst_y, 3539 dst->polyMode == PolyModePrecise)) { 3540 case -1: 3541 goto cleanup_dst; 3542 case 0: 3543 tmp->src.u.gen3.type = SHADER_ZERO; 3544 break; 3545 case 1: 3546 if (mask == NULL && tmp->src.bo && 3547 sna_blt_composite__convert(sna, 3548 dst_x, dst_y, width, height, 3549 tmp)) 3550 return true; 3551 3552 gen3_composite_channel_convert(&tmp->src); 3553 break; 3554 } 3555 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type)); 3556 3557 tmp->mask.u.gen3.type = SHADER_NONE; 3558 tmp->mask.is_affine = true; 3559 tmp->need_magic_ca_pass = false; 3560 tmp->has_component_alpha = false; 3561 if (mask && tmp->src.u.gen3.type != SHADER_ZERO) { 3562 if (!reuse_source(sna, 3563 src, &tmp->src, src_x, src_y, 3564 mask, &tmp->mask, mask_x, mask_y)) { 3565 tmp->mask.u.gen3.type = SHADER_TEXTURE; 3566 DBG(("%s: preparing mask\n", __FUNCTION__)); 3567 switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask, 3568 mask_x, mask_y, 3569 width, height, 3570 dst_x, dst_y, 3571 dst->polyMode == PolyModePrecise)) { 3572 case -1: 3573 goto cleanup_src; 3574 case 0: 3575 tmp->mask.u.gen3.type = SHADER_ZERO; 3576 break; 3577 case 1: 3578 gen3_composite_channel_convert(&tmp->mask); 3579 break; 3580 } 3581 } 3582 DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type)); 3583 if (tmp->mask.u.gen3.type == SHADER_ZERO) { 3584 if (tmp->src.bo) { 3585 kgem_bo_destroy(&sna->kgem, 3586 tmp->src.bo); 3587 tmp->src.bo = NULL; 3588 } 3589 tmp->src.u.gen3.type = SHADER_ZERO; 3590 tmp->mask.u.gen3.type = SHADER_NONE; 3591 } 3592 3593 if (tmp->mask.u.gen3.type != SHADER_NONE) { 3594 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 3595 /* Check if it's component alpha that relies on a source alpha 3596 * and on the source value. We can only get one of those 3597 * into the single source value that we get to blend with. 3598 */ 3599 DBG(("%s: component-alpha mask: %d\n", 3600 __FUNCTION__, tmp->mask.u.gen3.type)); 3601 tmp->has_component_alpha = true; 3602 if (tmp->mask.u.gen3.type == SHADER_WHITE) { 3603 tmp->mask.u.gen3.type = SHADER_NONE; 3604 tmp->has_component_alpha = false; 3605 } else if (gen3_blend_op[op].src_alpha && 3606 gen3_blend_op[op].src_blend != BLENDFACT_ZERO) { 3607 if (op != PictOpOver) 3608 goto cleanup_mask; 3609 3610 tmp->need_magic_ca_pass = true; 3611 tmp->op = PictOpOutReverse; 3612 } 3613 } else { 3614 if (tmp->mask.is_opaque) { 3615 tmp->mask.u.gen3.type = SHADER_NONE; 3616 } else if (is_constant_ps(tmp->src.u.gen3.type) && 3617 is_constant_ps(tmp->mask.u.gen3.type)) { 3618 uint32_t v; 3619 3620 v = multa(tmp->src.u.gen3.mode, 3621 tmp->mask.u.gen3.mode, 3622 24); 3623 v |= multa(tmp->src.u.gen3.mode, 3624 tmp->mask.u.gen3.mode, 3625 16); 3626 v |= multa(tmp->src.u.gen3.mode, 3627 tmp->mask.u.gen3.mode, 3628 8); 3629 v |= multa(tmp->src.u.gen3.mode, 3630 tmp->mask.u.gen3.mode, 3631 0); 3632 3633 DBG(("%s: combining constant source/mask: %x x %x -> %x\n", 3634 __FUNCTION__, 3635 tmp->src.u.gen3.mode, 3636 tmp->mask.u.gen3.mode, 3637 v)); 3638 3639 tmp->src.u.gen3.type = SHADER_CONSTANT; 3640 tmp->src.u.gen3.mode = v; 3641 tmp->src.is_opaque = false; 3642 3643 tmp->mask.u.gen3.type = SHADER_NONE; 3644 } 3645 } 3646 } 3647 } 3648 DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, 3649 tmp->src.u.gen3.type, tmp->mask.u.gen3.type, 3650 tmp->src.is_affine, tmp->mask.is_affine)); 3651 3652 tmp->prim_emit = gen3_emit_composite_primitive; 3653 if (is_constant_ps(tmp->mask.u.gen3.type)) { 3654 switch (tmp->src.u.gen3.type) { 3655 case SHADER_NONE: 3656 case SHADER_ZERO: 3657 case SHADER_BLACK: 3658 case SHADER_WHITE: 3659 case SHADER_CONSTANT: 3660#if defined(sse2) && !defined(__x86_64__) 3661 if (sna->cpu_features & SSE2) { 3662 tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2; 3663 tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2; 3664 } else 3665#endif 3666 { 3667 tmp->prim_emit = gen3_emit_composite_primitive_constant; 3668 tmp->emit_boxes = gen3_emit_composite_boxes_constant; 3669 } 3670 3671 break; 3672 case SHADER_LINEAR: 3673 case SHADER_RADIAL: 3674 if (tmp->src.transform == NULL) { 3675#if defined(sse2) && !defined(__x86_64__) 3676 if (sna->cpu_features & SSE2) { 3677 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2; 3678 tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2; 3679 } else 3680#endif 3681 { 3682 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; 3683 tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; 3684 } 3685 } else if (tmp->src.is_affine) { 3686 tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2]; 3687#if defined(sse2) && !defined(__x86_64__) 3688 if (sna->cpu_features & SSE2) { 3689 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2; 3690 tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2; 3691 } else 3692#endif 3693 { 3694 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; 3695 tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; 3696 } 3697 } 3698 break; 3699 case SHADER_TEXTURE: 3700 if (tmp->src.transform == NULL) { 3701 if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { 3702#if defined(sse2) && !defined(__x86_64__) 3703 if (sna->cpu_features & SSE2) { 3704 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2; 3705 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2; 3706 } else 3707#endif 3708 { 3709 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; 3710 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; 3711 } 3712 } else { 3713#if defined(sse2) && !defined(__x86_64__) 3714 if (sna->cpu_features & SSE2) { 3715 tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2; 3716 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2; 3717 } else 3718#endif 3719 { 3720 tmp->prim_emit = gen3_emit_composite_primitive_identity_source; 3721 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; 3722 } 3723 } 3724 } else if (tmp->src.is_affine) { 3725 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 3726 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 3727#if defined(sse2) && !defined(__x86_64__) 3728 if (sna->cpu_features & SSE2) { 3729 tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2; 3730 tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2; 3731 } else 3732#endif 3733 { 3734 tmp->prim_emit = gen3_emit_composite_primitive_affine_source; 3735 tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; 3736 } 3737 } 3738 break; 3739 } 3740 } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) { 3741 if (tmp->mask.transform == NULL) { 3742 if (is_constant_ps(tmp->src.u.gen3.type)) { 3743 if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { 3744#if defined(sse2) && !defined(__x86_64__) 3745 if (sna->cpu_features & SSE2) { 3746 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2; 3747 } else 3748#endif 3749 { 3750 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset; 3751 } 3752 } else { 3753#if defined(sse2) && !defined(__x86_64__) 3754 if (sna->cpu_features & SSE2) { 3755 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2; 3756 } else 3757#endif 3758 { 3759 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask; 3760 } 3761 } 3762 } else if (tmp->src.transform == NULL) { 3763#if defined(sse2) && !defined(__x86_64__) 3764 if (sna->cpu_features & SSE2) { 3765 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2; 3766 } else 3767#endif 3768 { 3769 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask; 3770 } 3771 } else if (tmp->src.is_affine) { 3772 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 3773 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 3774#if defined(sse2) && !defined(__x86_64__) 3775 if (sna->cpu_features & SSE2) { 3776 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2; 3777 } else 3778#endif 3779 { 3780 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask; 3781 } 3782 } 3783 } 3784 } 3785 3786 tmp->floats_per_vertex = 2; 3787 if (!is_constant_ps(tmp->src.u.gen3.type)) 3788 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4; 3789 if (!is_constant_ps(tmp->mask.u.gen3.type)) 3790 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4; 3791 DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__, 3792 !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0, 3793 !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0, 3794 tmp->floats_per_vertex, 3795 tmp->prim_emit != gen3_emit_composite_primitive)); 3796 tmp->floats_per_rect = 3 * tmp->floats_per_vertex; 3797 3798 tmp->blt = gen3_render_composite_blt; 3799 tmp->box = gen3_render_composite_box; 3800 tmp->boxes = gen3_render_composite_boxes__blt; 3801 if (tmp->emit_boxes) { 3802 tmp->boxes = gen3_render_composite_boxes; 3803 tmp->thread_boxes = gen3_render_composite_boxes__thread; 3804 } 3805 tmp->done = gen3_render_composite_done; 3806 3807 if (!kgem_check_bo(&sna->kgem, 3808 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 3809 NULL)) { 3810 kgem_submit(&sna->kgem); 3811 if (!kgem_check_bo(&sna->kgem, 3812 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 3813 NULL)) 3814 goto cleanup_mask; 3815 } 3816 3817 gen3_align_vertex(sna, tmp); 3818 gen3_emit_composite_state(sna, tmp); 3819 return true; 3820 3821cleanup_mask: 3822 if (tmp->mask.bo) { 3823 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 3824 tmp->mask.bo = NULL; 3825 } 3826cleanup_src: 3827 if (tmp->src.bo) { 3828 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 3829 tmp->src.bo = NULL; 3830 } 3831cleanup_dst: 3832 if (tmp->redirect.real_bo) { 3833 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 3834 tmp->redirect.real_bo = NULL; 3835 } 3836fallback: 3837 return (mask == NULL && 3838 sna_blt_composite(sna, 3839 op, src, dst, 3840 src_x, src_y, 3841 dst_x, dst_y, 3842 width, height, 3843 flags | COMPOSITE_FALLBACK, tmp)); 3844} 3845 3846static void 3847gen3_emit_composite_spans_vertex(struct sna *sna, 3848 const struct sna_composite_spans_op *op, 3849 int16_t x, int16_t y, 3850 float opacity) 3851{ 3852 gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 3853 gen3_emit_composite_texcoord(sna, &op->base.src, x, y); 3854 OUT_VERTEX(opacity); 3855} 3856 3857fastcall static void 3858gen3_emit_composite_spans_primitive_zero(struct sna *sna, 3859 const struct sna_composite_spans_op *op, 3860 const BoxRec *box, 3861 float opacity) 3862{ 3863 float *v = sna->render.vertices + sna->render.vertex_used; 3864 sna->render.vertex_used += 6; 3865 3866 v[0] = op->base.dst.x + box->x2; 3867 v[1] = op->base.dst.y + box->y2; 3868 3869 v[2] = op->base.dst.x + box->x1; 3870 v[3] = v[1]; 3871 3872 v[4] = v[2]; 3873 v[5] = op->base.dst.x + box->y1; 3874} 3875 3876fastcall static void 3877gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op, 3878 const struct sna_opacity_box *b, 3879 int nbox, float *v) 3880{ 3881 do { 3882 v[0] = op->base.dst.x + b->box.x2; 3883 v[1] = op->base.dst.y + b->box.y2; 3884 3885 v[2] = op->base.dst.x + b->box.x1; 3886 v[3] = v[1]; 3887 3888 v[4] = v[2]; 3889 v[5] = op->base.dst.x + b->box.y1; 3890 3891 v += 6; 3892 b++; 3893 } while (--nbox); 3894} 3895 3896fastcall static void 3897gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, 3898 const struct sna_composite_spans_op *op, 3899 const BoxRec *box, 3900 float opacity) 3901{ 3902 float *v = sna->render.vertices + sna->render.vertex_used; 3903 sna->render.vertex_used += 6; 3904 3905 v[0] = box->x2; 3906 v[3] = v[1] = box->y2; 3907 v[4] = v[2] = box->x1; 3908 v[5] = box->y1; 3909} 3910 3911fastcall static void 3912gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op, 3913 const struct sna_opacity_box *b, 3914 int nbox, float *v) 3915{ 3916 do { 3917 v[0] = b->box.x2; 3918 v[3] = v[1] = b->box.y2; 3919 v[4] = v[2] = b->box.x1; 3920 v[5] = b->box.y1; 3921 3922 b++; 3923 v += 6; 3924 } while (--nbox); 3925} 3926 3927fastcall static void 3928gen3_emit_composite_spans_primitive_constant(struct sna *sna, 3929 const struct sna_composite_spans_op *op, 3930 const BoxRec *box, 3931 float opacity) 3932{ 3933 float *v = sna->render.vertices + sna->render.vertex_used; 3934 sna->render.vertex_used += 9; 3935 3936 v[0] = op->base.dst.x + box->x2; 3937 v[6] = v[3] = op->base.dst.x + box->x1; 3938 v[4] = v[1] = op->base.dst.y + box->y2; 3939 v[7] = op->base.dst.y + box->y1; 3940 v[8] = v[5] = v[2] = opacity; 3941} 3942 3943fastcall static void 3944gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op, 3945 const struct sna_opacity_box *b, 3946 int nbox, 3947 float *v) 3948{ 3949 do { 3950 v[0] = op->base.dst.x + b->box.x2; 3951 v[6] = v[3] = op->base.dst.x + b->box.x1; 3952 v[4] = v[1] = op->base.dst.y + b->box.y2; 3953 v[7] = op->base.dst.y + b->box.y1; 3954 v[8] = v[5] = v[2] = b->alpha; 3955 3956 v += 9; 3957 b++; 3958 } while (--nbox); 3959} 3960 3961fastcall static void 3962gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, 3963 const struct sna_composite_spans_op *op, 3964 const BoxRec *box, 3965 float opacity) 3966{ 3967 float *v = sna->render.vertices + sna->render.vertex_used; 3968 sna->render.vertex_used += 9; 3969 3970 v[0] = box->x2; 3971 v[6] = v[3] = box->x1; 3972 v[4] = v[1] = box->y2; 3973 v[7] = box->y1; 3974 v[8] = v[5] = v[2] = opacity; 3975} 3976 3977fastcall static void 3978gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op, 3979 const struct sna_opacity_box *b, 3980 int nbox, float *v) 3981{ 3982 do { 3983 v[0] = b->box.x2; 3984 v[6] = v[3] = b->box.x1; 3985 v[4] = v[1] = b->box.y2; 3986 v[7] = b->box.y1; 3987 v[8] = v[5] = v[2] = b->alpha; 3988 3989 v += 9; 3990 b++; 3991 } while (--nbox); 3992} 3993 3994fastcall static void 3995gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, 3996 const struct sna_composite_spans_op *op, 3997 const BoxRec *box, 3998 float opacity) 3999{ 4000 float *v = sna->render.vertices + sna->render.vertex_used; 4001 sna->render.vertex_used += 15; 4002 4003 v[0] = op->base.dst.x + box->x2; 4004 v[1] = op->base.dst.y + box->y2; 4005 v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 4006 v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 4007 v[4] = opacity; 4008 4009 v[5] = op->base.dst.x + box->x1; 4010 v[6] = v[1]; 4011 v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 4012 v[8] = v[3]; 4013 v[9] = opacity; 4014 4015 v[10] = v[5]; 4016 v[11] = op->base.dst.y + box->y1; 4017 v[12] = v[7]; 4018 v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 4019 v[14] = opacity; 4020} 4021 4022fastcall static void 4023gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op, 4024 const struct sna_opacity_box *b, 4025 int nbox, 4026 float *v) 4027{ 4028 do { 4029 v[0] = op->base.dst.x + b->box.x2; 4030 v[1] = op->base.dst.y + b->box.y2; 4031 v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; 4032 v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; 4033 v[4] = b->alpha; 4034 4035 v[5] = op->base.dst.x + b->box.x1; 4036 v[6] = v[1]; 4037 v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; 4038 v[8] = v[3]; 4039 v[9] = b->alpha; 4040 4041 v[10] = v[5]; 4042 v[11] = op->base.dst.y + b->box.y1; 4043 v[12] = v[7]; 4044 v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; 4045 v[14] = b->alpha; 4046 4047 v += 15; 4048 b++; 4049 } while (--nbox); 4050} 4051 4052fastcall static void 4053gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, 4054 const struct sna_composite_spans_op *op, 4055 const BoxRec *box, 4056 float opacity) 4057{ 4058 PictTransform *transform = op->base.src.transform; 4059 float *v; 4060 4061 v = sna->render.vertices + sna->render.vertex_used; 4062 sna->render.vertex_used += 15; 4063 4064 v[0] = op->base.dst.x + box->x2; 4065 v[6] = v[1] = op->base.dst.y + box->y2; 4066 v[10] = v[5] = op->base.dst.x + box->x1; 4067 v[11] = op->base.dst.y + box->y1; 4068 v[14] = v[9] = v[4] = opacity; 4069 4070 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 4071 (int)op->base.src.offset[1] + box->y2, 4072 transform, op->base.src.scale, 4073 &v[2], &v[3]); 4074 4075 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4076 (int)op->base.src.offset[1] + box->y2, 4077 transform, op->base.src.scale, 4078 &v[7], &v[8]); 4079 4080 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4081 (int)op->base.src.offset[1] + box->y1, 4082 transform, op->base.src.scale, 4083 &v[12], &v[13]); 4084} 4085 4086fastcall static void 4087gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op, 4088 const struct sna_opacity_box *b, 4089 int nbox, 4090 float *v) 4091{ 4092 PictTransform *transform = op->base.src.transform; 4093 4094 do { 4095 v[0] = op->base.dst.x + b->box.x2; 4096 v[6] = v[1] = op->base.dst.y + b->box.y2; 4097 v[10] = v[5] = op->base.dst.x + b->box.x1; 4098 v[11] = op->base.dst.y + b->box.y1; 4099 v[14] = v[9] = v[4] = b->alpha; 4100 4101 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, 4102 (int)op->base.src.offset[1] + b->box.y2, 4103 transform, op->base.src.scale, 4104 &v[2], &v[3]); 4105 4106 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4107 (int)op->base.src.offset[1] + b->box.y2, 4108 transform, op->base.src.scale, 4109 &v[7], &v[8]); 4110 4111 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4112 (int)op->base.src.offset[1] + b->box.y1, 4113 transform, op->base.src.scale, 4114 &v[12], &v[13]); 4115 v += 15; 4116 b++; 4117 } while (--nbox); 4118} 4119 4120fastcall static void 4121gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, 4122 const struct sna_composite_spans_op *op, 4123 const BoxRec *box, 4124 float opacity) 4125{ 4126 float *v = sna->render.vertices + sna->render.vertex_used; 4127 sna->render.vertex_used += 15; 4128 4129 v[0] = op->base.dst.x + box->x2; 4130 v[1] = op->base.dst.y + box->y2; 4131 v[2] = op->base.src.offset[0] + box->x2; 4132 v[3] = op->base.src.offset[1] + box->y2; 4133 v[4] = opacity; 4134 4135 v[5] = op->base.dst.x + box->x1; 4136 v[6] = v[1]; 4137 v[7] = op->base.src.offset[0] + box->x1; 4138 v[8] = v[3]; 4139 v[9] = opacity; 4140 4141 v[10] = v[5]; 4142 v[11] = op->base.dst.y + box->y1; 4143 v[12] = v[7]; 4144 v[13] = op->base.src.offset[1] + box->y1; 4145 v[14] = opacity; 4146} 4147 4148fastcall static void 4149gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op, 4150 const struct sna_opacity_box *b, 4151 int nbox, 4152 float *v) 4153{ 4154 do { 4155 v[0] = op->base.dst.x + b->box.x2; 4156 v[1] = op->base.dst.y + b->box.y2; 4157 v[2] = op->base.src.offset[0] + b->box.x2; 4158 v[3] = op->base.src.offset[1] + b->box.y2; 4159 v[4] = b->alpha; 4160 4161 v[5] = op->base.dst.x + b->box.x1; 4162 v[6] = v[1]; 4163 v[7] = op->base.src.offset[0] + b->box.x1; 4164 v[8] = v[3]; 4165 v[9] = b->alpha; 4166 4167 v[10] = v[5]; 4168 v[11] = op->base.dst.y + b->box.y1; 4169 v[12] = v[7]; 4170 v[13] = op->base.src.offset[1] + b->box.y1; 4171 v[14] = b->alpha; 4172 4173 v += 15; 4174 b++; 4175 } while (--nbox); 4176} 4177 4178#if defined(sse2) && !defined(__x86_64__) 4179sse2 fastcall static void 4180gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 4181 const struct sna_composite_spans_op *op, 4182 const BoxRec *box, 4183 float opacity) 4184{ 4185 float *v = sna->render.vertices + sna->render.vertex_used; 4186 sna->render.vertex_used += 9; 4187 4188 v[0] = op->base.dst.x + box->x2; 4189 v[6] = v[3] = op->base.dst.x + box->x1; 4190 v[4] = v[1] = op->base.dst.y + box->y2; 4191 v[7] = op->base.dst.y + box->y1; 4192 v[8] = v[5] = v[2] = opacity; 4193} 4194 4195sse2 fastcall static void 4196gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op, 4197 const struct sna_opacity_box *b, 4198 int nbox, 4199 float *v) 4200{ 4201 do { 4202 v[0] = op->base.dst.x + b->box.x2; 4203 v[6] = v[3] = op->base.dst.x + b->box.x1; 4204 v[4] = v[1] = op->base.dst.y + b->box.y2; 4205 v[7] = op->base.dst.y + b->box.y1; 4206 v[8] = v[5] = v[2] = b->alpha; 4207 4208 v += 9; 4209 b++; 4210 } while (--nbox); 4211} 4212 4213sse2 fastcall static void 4214gen3_render_composite_spans_constant_box__sse2(struct sna *sna, 4215 const struct sna_composite_spans_op *op, 4216 const BoxRec *box, float opacity) 4217{ 4218 float *v; 4219 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4220 __FUNCTION__, 4221 op->base.src.offset[0], op->base.src.offset[1], 4222 opacity, 4223 op->base.dst.x, op->base.dst.y, 4224 box->x1, box->y1, 4225 box->x2 - box->x1, 4226 box->y2 - box->y1)); 4227 4228 gen3_get_rectangles(sna, &op->base, 1); 4229 4230 v = sna->render.vertices + sna->render.vertex_used; 4231 sna->render.vertex_used += 9; 4232 4233 v[0] = box->x2; 4234 v[6] = v[3] = box->x1; 4235 v[4] = v[1] = box->y2; 4236 v[7] = box->y1; 4237 v[8] = v[5] = v[2] = opacity; 4238} 4239 4240sse2 fastcall static void 4241gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, 4242 const struct sna_composite_spans_op *op, 4243 const struct sna_opacity_box *box, 4244 int nbox) 4245{ 4246 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4247 __FUNCTION__, nbox, 4248 op->base.src.offset[0], op->base.src.offset[1], 4249 op->base.dst.x, op->base.dst.y)); 4250 4251 sna_vertex_lock(&sna->render); 4252 do { 4253 int nbox_this_time; 4254 float *v; 4255 4256 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4257 assert(nbox_this_time); 4258 nbox -= nbox_this_time; 4259 4260 v = sna->render.vertices + sna->render.vertex_used; 4261 sna->render.vertex_used += nbox_this_time * 9; 4262 4263 sna_vertex_acquire__locked(&sna->render); 4264 sna_vertex_unlock(&sna->render); 4265 4266 do { 4267 v[0] = box->box.x2; 4268 v[6] = v[3] = box->box.x1; 4269 v[4] = v[1] = box->box.y2; 4270 v[7] = box->box.y1; 4271 v[8] = v[5] = v[2] = box->alpha; 4272 v += 9; 4273 box++; 4274 } while (--nbox_this_time); 4275 4276 sna_vertex_lock(&sna->render); 4277 sna_vertex_release__locked(&sna->render); 4278 } while (nbox); 4279 sna_vertex_unlock(&sna->render); 4280} 4281 4282sse2 fastcall static void 4283gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, 4284 const struct sna_composite_spans_op *op, 4285 const BoxRec *box, 4286 float opacity) 4287{ 4288 float *v = sna->render.vertices + sna->render.vertex_used; 4289 sna->render.vertex_used += 9; 4290 4291 v[0] = box->x2; 4292 v[6] = v[3] = box->x1; 4293 v[4] = v[1] = box->y2; 4294 v[7] = box->y1; 4295 v[8] = v[5] = v[2] = opacity; 4296} 4297 4298sse2 fastcall static void 4299gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op, 4300 const struct sna_opacity_box *b, 4301 int nbox, float *v) 4302{ 4303 do { 4304 v[0] = b->box.x2; 4305 v[6] = v[3] = b->box.x1; 4306 v[4] = v[1] = b->box.y2; 4307 v[7] = b->box.y1; 4308 v[8] = v[5] = v[2] = b->alpha; 4309 4310 v += 9; 4311 b++; 4312 } while (--nbox); 4313} 4314 4315sse2 fastcall static void 4316gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 4317 const struct sna_composite_spans_op *op, 4318 const BoxRec *box, 4319 float opacity) 4320{ 4321 float *v = sna->render.vertices + sna->render.vertex_used; 4322 sna->render.vertex_used += 15; 4323 4324 v[0] = op->base.dst.x + box->x2; 4325 v[1] = op->base.dst.y + box->y2; 4326 v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 4327 v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 4328 v[4] = opacity; 4329 4330 v[5] = op->base.dst.x + box->x1; 4331 v[6] = v[1]; 4332 v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 4333 v[8] = v[3]; 4334 v[9] = opacity; 4335 4336 v[10] = v[5]; 4337 v[11] = op->base.dst.y + box->y1; 4338 v[12] = v[7]; 4339 v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 4340 v[14] = opacity; 4341} 4342 4343sse2 fastcall static void 4344gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op, 4345 const struct sna_opacity_box *b, 4346 int nbox, 4347 float *v) 4348{ 4349 do { 4350 v[0] = op->base.dst.x + b->box.x2; 4351 v[1] = op->base.dst.y + b->box.y2; 4352 v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; 4353 v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; 4354 v[4] = b->alpha; 4355 4356 v[5] = op->base.dst.x + b->box.x1; 4357 v[6] = v[1]; 4358 v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; 4359 v[8] = v[3]; 4360 v[9] = b->alpha; 4361 4362 v[10] = v[5]; 4363 v[11] = op->base.dst.y + b->box.y1; 4364 v[12] = v[7]; 4365 v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; 4366 v[14] = b->alpha; 4367 4368 v += 15; 4369 b++; 4370 } while (--nbox); 4371} 4372sse2 fastcall static void 4373gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 4374 const struct sna_composite_spans_op *op, 4375 const BoxRec *box, 4376 float opacity) 4377{ 4378 PictTransform *transform = op->base.src.transform; 4379 float *v; 4380 4381 v = sna->render.vertices + sna->render.vertex_used; 4382 sna->render.vertex_used += 15; 4383 4384 v[0] = op->base.dst.x + box->x2; 4385 v[6] = v[1] = op->base.dst.y + box->y2; 4386 v[10] = v[5] = op->base.dst.x + box->x1; 4387 v[11] = op->base.dst.y + box->y1; 4388 v[14] = v[9] = v[4] = opacity; 4389 4390 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 4391 (int)op->base.src.offset[1] + box->y2, 4392 transform, op->base.src.scale, 4393 &v[2], &v[3]); 4394 4395 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4396 (int)op->base.src.offset[1] + box->y2, 4397 transform, op->base.src.scale, 4398 &v[7], &v[8]); 4399 4400 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4401 (int)op->base.src.offset[1] + box->y1, 4402 transform, op->base.src.scale, 4403 &v[12], &v[13]); 4404} 4405 4406sse2 fastcall static void 4407gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op, 4408 const struct sna_opacity_box *b, 4409 int nbox, 4410 float *v) 4411{ 4412 PictTransform *transform = op->base.src.transform; 4413 4414 do { 4415 v[0] = op->base.dst.x + b->box.x2; 4416 v[6] = v[1] = op->base.dst.y + b->box.y2; 4417 v[10] = v[5] = op->base.dst.x + b->box.x1; 4418 v[11] = op->base.dst.y + b->box.y1; 4419 v[14] = v[9] = v[4] = b->alpha; 4420 4421 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, 4422 (int)op->base.src.offset[1] + b->box.y2, 4423 transform, op->base.src.scale, 4424 &v[2], &v[3]); 4425 4426 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4427 (int)op->base.src.offset[1] + b->box.y2, 4428 transform, op->base.src.scale, 4429 &v[7], &v[8]); 4430 4431 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4432 (int)op->base.src.offset[1] + b->box.y1, 4433 transform, op->base.src.scale, 4434 &v[12], &v[13]); 4435 v += 15; 4436 b++; 4437 } while (--nbox); 4438} 4439 4440sse2 fastcall static void 4441gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, 4442 const struct sna_composite_spans_op *op, 4443 const BoxRec *box, 4444 float opacity) 4445{ 4446 float *v = sna->render.vertices + sna->render.vertex_used; 4447 sna->render.vertex_used += 15; 4448 4449 v[0] = op->base.dst.x + box->x2; 4450 v[1] = op->base.dst.y + box->y2; 4451 v[2] = op->base.src.offset[0] + box->x2; 4452 v[3] = op->base.src.offset[1] + box->y2; 4453 v[4] = opacity; 4454 4455 v[5] = op->base.dst.x + box->x1; 4456 v[6] = v[1]; 4457 v[7] = op->base.src.offset[0] + box->x1; 4458 v[8] = v[3]; 4459 v[9] = opacity; 4460 4461 v[10] = v[5]; 4462 v[11] = op->base.dst.y + box->y1; 4463 v[12] = v[7]; 4464 v[13] = op->base.src.offset[1] + box->y1; 4465 v[14] = opacity; 4466} 4467 4468sse2 fastcall static void 4469gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op, 4470 const struct sna_opacity_box *b, 4471 int nbox, 4472 float *v) 4473{ 4474 do { 4475 v[0] = op->base.dst.x + b->box.x2; 4476 v[1] = op->base.dst.y + b->box.y2; 4477 v[2] = op->base.src.offset[0] + b->box.x2; 4478 v[3] = op->base.src.offset[1] + b->box.y2; 4479 v[4] = b->alpha; 4480 4481 v[5] = op->base.dst.x + b->box.x1; 4482 v[6] = v[1]; 4483 v[7] = op->base.src.offset[0] + b->box.x1; 4484 v[8] = v[3]; 4485 v[9] = b->alpha; 4486 4487 v[10] = v[5]; 4488 v[11] = op->base.dst.y + b->box.y1; 4489 v[12] = v[7]; 4490 v[13] = op->base.src.offset[1] + b->box.y1; 4491 v[14] = b->alpha; 4492 4493 v += 15; 4494 b++; 4495 } while (--nbox); 4496} 4497 4498sse2 fastcall static void 4499gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, 4500 const struct sna_composite_spans_op *op, 4501 const BoxRec *box, 4502 float opacity) 4503{ 4504 PictTransform *transform = op->base.src.transform; 4505 float *v = sna->render.vertices + sna->render.vertex_used; 4506 sna->render.vertex_used += 15; 4507 4508 v[0] = op->base.dst.x + box->x2; 4509 v[1] = op->base.dst.y + box->y2; 4510 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 4511 op->base.src.offset[1] + box->y2, 4512 transform, op->base.src.scale, 4513 &v[2], &v[3]); 4514 v[4] = opacity; 4515 4516 v[5] = op->base.dst.x + box->x1; 4517 v[6] = v[1]; 4518 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4519 op->base.src.offset[1] + box->y2, 4520 transform, op->base.src.scale, 4521 &v[7], &v[8]); 4522 v[9] = opacity; 4523 4524 v[10] = v[5]; 4525 v[11] = op->base.dst.y + box->y1; 4526 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4527 op->base.src.offset[1] + box->y1, 4528 transform, op->base.src.scale, 4529 &v[12], &v[13]); 4530 v[14] = opacity; 4531} 4532 4533sse2 fastcall static void 4534gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op, 4535 const struct sna_opacity_box *b, 4536 int nbox, 4537 float *v) 4538{ 4539 PictTransform *transform = op->base.src.transform; 4540 4541 do { 4542 v[0] = op->base.dst.x + b->box.x2; 4543 v[1] = op->base.dst.y + b->box.y2; 4544 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 4545 op->base.src.offset[1] + b->box.y2, 4546 transform, op->base.src.scale, 4547 &v[2], &v[3]); 4548 v[4] = b->alpha; 4549 4550 v[5] = op->base.dst.x + b->box.x1; 4551 v[6] = v[1]; 4552 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4553 op->base.src.offset[1] + b->box.y2, 4554 transform, op->base.src.scale, 4555 &v[7], &v[8]); 4556 v[9] = b->alpha; 4557 4558 v[10] = v[5]; 4559 v[11] = op->base.dst.y + b->box.y1; 4560 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4561 op->base.src.offset[1] + b->box.y1, 4562 transform, op->base.src.scale, 4563 &v[12], &v[13]); 4564 v[14] = b->alpha; 4565 v += 15; 4566 b++; 4567 } while (--nbox); 4568} 4569#endif 4570 4571fastcall static void 4572gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, 4573 const struct sna_composite_spans_op *op, 4574 const BoxRec *box, 4575 float opacity) 4576{ 4577 PictTransform *transform = op->base.src.transform; 4578 float *v = sna->render.vertices + sna->render.vertex_used; 4579 sna->render.vertex_used += 15; 4580 4581 v[0] = op->base.dst.x + box->x2; 4582 v[1] = op->base.dst.y + box->y2; 4583 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 4584 op->base.src.offset[1] + box->y2, 4585 transform, op->base.src.scale, 4586 &v[2], &v[3]); 4587 v[4] = opacity; 4588 4589 v[5] = op->base.dst.x + box->x1; 4590 v[6] = v[1]; 4591 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4592 op->base.src.offset[1] + box->y2, 4593 transform, op->base.src.scale, 4594 &v[7], &v[8]); 4595 v[9] = opacity; 4596 4597 v[10] = v[5]; 4598 v[11] = op->base.dst.y + box->y1; 4599 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4600 op->base.src.offset[1] + box->y1, 4601 transform, op->base.src.scale, 4602 &v[12], &v[13]); 4603 v[14] = opacity; 4604} 4605 4606fastcall static void 4607gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op, 4608 const struct sna_opacity_box *b, 4609 int nbox, 4610 float *v) 4611{ 4612 PictTransform *transform = op->base.src.transform; 4613 4614 do { 4615 v[0] = op->base.dst.x + b->box.x2; 4616 v[1] = op->base.dst.y + b->box.y2; 4617 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 4618 op->base.src.offset[1] + b->box.y2, 4619 transform, op->base.src.scale, 4620 &v[2], &v[3]); 4621 v[4] = b->alpha; 4622 4623 v[5] = op->base.dst.x + b->box.x1; 4624 v[6] = v[1]; 4625 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4626 op->base.src.offset[1] + b->box.y2, 4627 transform, op->base.src.scale, 4628 &v[7], &v[8]); 4629 v[9] = b->alpha; 4630 4631 v[10] = v[5]; 4632 v[11] = op->base.dst.y + b->box.y1; 4633 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4634 op->base.src.offset[1] + b->box.y1, 4635 transform, op->base.src.scale, 4636 &v[12], &v[13]); 4637 v[14] = b->alpha; 4638 v += 15; 4639 b++; 4640 } while (--nbox); 4641} 4642 4643fastcall static void 4644gen3_emit_composite_spans_primitive(struct sna *sna, 4645 const struct sna_composite_spans_op *op, 4646 const BoxRec *box, 4647 float opacity) 4648{ 4649 gen3_emit_composite_spans_vertex(sna, op, 4650 box->x2, box->y2, 4651 opacity); 4652 gen3_emit_composite_spans_vertex(sna, op, 4653 box->x1, box->y2, 4654 opacity); 4655 gen3_emit_composite_spans_vertex(sna, op, 4656 box->x1, box->y1, 4657 opacity); 4658} 4659 4660fastcall static void 4661gen3_render_composite_spans_constant_box(struct sna *sna, 4662 const struct sna_composite_spans_op *op, 4663 const BoxRec *box, float opacity) 4664{ 4665 float *v; 4666 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4667 __FUNCTION__, 4668 op->base.src.offset[0], op->base.src.offset[1], 4669 opacity, 4670 op->base.dst.x, op->base.dst.y, 4671 box->x1, box->y1, 4672 box->x2 - box->x1, 4673 box->y2 - box->y1)); 4674 4675 gen3_get_rectangles(sna, &op->base, 1); 4676 4677 v = sna->render.vertices + sna->render.vertex_used; 4678 sna->render.vertex_used += 9; 4679 4680 v[0] = box->x2; 4681 v[6] = v[3] = box->x1; 4682 v[4] = v[1] = box->y2; 4683 v[7] = box->y1; 4684 v[8] = v[5] = v[2] = opacity; 4685} 4686 4687fastcall static void 4688gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, 4689 const struct sna_composite_spans_op *op, 4690 const struct sna_opacity_box *box, 4691 int nbox) 4692{ 4693 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4694 __FUNCTION__, nbox, 4695 op->base.src.offset[0], op->base.src.offset[1], 4696 op->base.dst.x, op->base.dst.y)); 4697 4698 sna_vertex_lock(&sna->render); 4699 do { 4700 int nbox_this_time; 4701 float *v; 4702 4703 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4704 assert(nbox_this_time); 4705 nbox -= nbox_this_time; 4706 4707 v = sna->render.vertices + sna->render.vertex_used; 4708 sna->render.vertex_used += nbox_this_time * 9; 4709 4710 sna_vertex_acquire__locked(&sna->render); 4711 sna_vertex_unlock(&sna->render); 4712 4713 do { 4714 v[0] = box->box.x2; 4715 v[6] = v[3] = box->box.x1; 4716 v[4] = v[1] = box->box.y2; 4717 v[7] = box->box.y1; 4718 v[8] = v[5] = v[2] = box->alpha; 4719 v += 9; 4720 box++; 4721 } while (--nbox_this_time); 4722 4723 sna_vertex_lock(&sna->render); 4724 sna_vertex_release__locked(&sna->render); 4725 } while (nbox); 4726 sna_vertex_unlock(&sna->render); 4727} 4728 4729fastcall static void 4730gen3_render_composite_spans_box(struct sna *sna, 4731 const struct sna_composite_spans_op *op, 4732 const BoxRec *box, float opacity) 4733{ 4734 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4735 __FUNCTION__, 4736 op->base.src.offset[0], op->base.src.offset[1], 4737 opacity, 4738 op->base.dst.x, op->base.dst.y, 4739 box->x1, box->y1, 4740 box->x2 - box->x1, 4741 box->y2 - box->y1)); 4742 4743 gen3_get_rectangles(sna, &op->base, 1); 4744 op->prim_emit(sna, op, box, opacity); 4745} 4746 4747static void 4748gen3_render_composite_spans_boxes(struct sna *sna, 4749 const struct sna_composite_spans_op *op, 4750 const BoxRec *box, int nbox, 4751 float opacity) 4752{ 4753 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 4754 __FUNCTION__, nbox, 4755 op->base.src.offset[0], op->base.src.offset[1], 4756 opacity, 4757 op->base.dst.x, op->base.dst.y)); 4758 4759 do { 4760 int nbox_this_time; 4761 4762 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4763 nbox -= nbox_this_time; 4764 4765 do { 4766 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 4767 box->x1, box->y1, 4768 box->x2 - box->x1, 4769 box->y2 - box->y1)); 4770 4771 op->prim_emit(sna, op, box++, opacity); 4772 } while (--nbox_this_time); 4773 } while (nbox); 4774} 4775 4776fastcall static void 4777gen3_render_composite_spans_boxes__thread(struct sna *sna, 4778 const struct sna_composite_spans_op *op, 4779 const struct sna_opacity_box *box, 4780 int nbox) 4781{ 4782 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4783 __FUNCTION__, nbox, 4784 op->base.src.offset[0], op->base.src.offset[1], 4785 op->base.dst.x, op->base.dst.y)); 4786 4787 sna_vertex_lock(&sna->render); 4788 do { 4789 int nbox_this_time; 4790 float *v; 4791 4792 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4793 assert(nbox_this_time); 4794 nbox -= nbox_this_time; 4795 4796 v = sna->render.vertices + sna->render.vertex_used; 4797 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 4798 4799 sna_vertex_acquire__locked(&sna->render); 4800 sna_vertex_unlock(&sna->render); 4801 4802 op->emit_boxes(op, box, nbox_this_time, v); 4803 box += nbox_this_time; 4804 4805 sna_vertex_lock(&sna->render); 4806 sna_vertex_release__locked(&sna->render); 4807 } while (nbox); 4808 sna_vertex_unlock(&sna->render); 4809} 4810 4811fastcall static void 4812gen3_render_composite_spans_done(struct sna *sna, 4813 const struct sna_composite_spans_op *op) 4814{ 4815 if (sna->render.vertex_offset) 4816 gen3_vertex_flush(sna); 4817 4818 DBG(("%s()\n", __FUNCTION__)); 4819 4820 if (op->base.src.bo) 4821 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 4822 4823 sna_render_composite_redirect_done(sna, &op->base); 4824} 4825 4826static bool 4827gen3_check_composite_spans(struct sna *sna, 4828 uint8_t op, PicturePtr src, PicturePtr dst, 4829 int16_t width, int16_t height, unsigned flags) 4830{ 4831 if (op >= ARRAY_SIZE(gen3_blend_op)) 4832 return false; 4833 4834 if (gen3_composite_fallback(sna, op, src, NULL, dst)) 4835 return false; 4836 4837 if (need_tiling(sna, width, height) && 4838 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 4839 DBG(("%s: fallback, tiled operation not on GPU\n", 4840 __FUNCTION__)); 4841 return false; 4842 } 4843 4844 return true; 4845} 4846 4847static bool 4848gen3_render_composite_spans(struct sna *sna, 4849 uint8_t op, 4850 PicturePtr src, 4851 PicturePtr dst, 4852 int16_t src_x, int16_t src_y, 4853 int16_t dst_x, int16_t dst_y, 4854 int16_t width, int16_t height, 4855 unsigned flags, 4856 struct sna_composite_spans_op *tmp) 4857{ 4858 bool no_offset; 4859 4860 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 4861 src_x, src_y, dst_x, dst_y, width, height)); 4862 4863 assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags)); 4864 4865 if (need_tiling(sna, width, height)) { 4866 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 4867 __FUNCTION__, width, height)); 4868 return sna_tiling_composite_spans(op, src, dst, 4869 src_x, src_y, dst_x, dst_y, 4870 width, height, flags, tmp); 4871 } 4872 4873 if (!gen3_composite_set_target(sna, &tmp->base, dst, 4874 dst_x, dst_y, width, height, 4875 true)) { 4876 DBG(("%s: unable to set render target\n", 4877 __FUNCTION__)); 4878 return false; 4879 } 4880 4881 tmp->base.op = op; 4882 tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format); 4883 tmp->base.src.u.gen3.type = SHADER_TEXTURE; 4884 tmp->base.src.is_affine = true; 4885 DBG(("%s: preparing source\n", __FUNCTION__)); 4886 switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src, 4887 src_x, src_y, 4888 width, height, 4889 dst_x, dst_y, 4890 dst->polyMode == PolyModePrecise)) { 4891 case -1: 4892 goto cleanup_dst; 4893 case 0: 4894 tmp->base.src.u.gen3.type = SHADER_ZERO; 4895 break; 4896 case 1: 4897 gen3_composite_channel_convert(&tmp->base.src); 4898 break; 4899 } 4900 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type)); 4901 4902 if (tmp->base.src.u.gen3.type != SHADER_ZERO) 4903 tmp->base.mask.u.gen3.type = SHADER_OPACITY; 4904 4905 no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0; 4906 tmp->box = gen3_render_composite_spans_box; 4907 tmp->boxes = gen3_render_composite_spans_boxes; 4908 tmp->thread_boxes = gen3_render_composite_spans_boxes__thread; 4909 tmp->done = gen3_render_composite_spans_done; 4910 tmp->prim_emit = gen3_emit_composite_spans_primitive; 4911 switch (tmp->base.src.u.gen3.type) { 4912 case SHADER_NONE: 4913 assert(0); 4914 case SHADER_ZERO: 4915 if (no_offset) { 4916 tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset; 4917 tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes; 4918 } else { 4919 tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; 4920 tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes; 4921 } 4922 break; 4923 case SHADER_BLACK: 4924 case SHADER_WHITE: 4925 case SHADER_CONSTANT: 4926 if (no_offset) { 4927#if defined(sse2) && !defined(__x86_64__) 4928 if (sna->cpu_features & SSE2) { 4929 tmp->box = gen3_render_composite_spans_constant_box__sse2; 4930 tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes; 4931 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset; 4932 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes; 4933 } else 4934#endif 4935 { 4936 tmp->box = gen3_render_composite_spans_constant_box; 4937 tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes; 4938 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset; 4939 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes; 4940 } 4941 } else { 4942#if defined(sse2) && !defined(__x86_64__) 4943 if (sna->cpu_features & SSE2) { 4944 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2; 4945 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes; 4946 } else 4947#endif 4948 { 4949 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; 4950 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes; 4951 } 4952 } 4953 break; 4954 case SHADER_LINEAR: 4955 case SHADER_RADIAL: 4956 if (tmp->base.src.transform == NULL) { 4957#if defined(sse2) && !defined(__x86_64__) 4958 if (sna->cpu_features & SSE2) { 4959 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2; 4960 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes; 4961 } else 4962#endif 4963 { 4964 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; 4965 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes; 4966 } 4967 } else if (tmp->base.src.is_affine) { 4968 tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2]; 4969#if defined(sse2) && !defined(__x86_64__) 4970 if (sna->cpu_features & SSE2) { 4971 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2; 4972 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes; 4973 } else 4974#endif 4975 { 4976 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; 4977 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes; 4978 } 4979 } 4980 break; 4981 case SHADER_TEXTURE: 4982 if (tmp->base.src.transform == NULL) { 4983#if defined(sse2) && !defined(__x86_64__) 4984 if (sna->cpu_features & SSE2) { 4985 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2; 4986 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes; 4987 } else 4988#endif 4989 { 4990 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; 4991 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes; 4992 } 4993 } else if (tmp->base.src.is_affine) { 4994 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 4995 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 4996#if defined(sse2) && !defined(__x86_64__) 4997 if (sna->cpu_features & SSE2) { 4998 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2; 4999 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes; 5000 } else 5001#endif 5002 { 5003 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; 5004 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes; 5005 } 5006 } 5007 break; 5008 } 5009 if (tmp->emit_boxes == NULL) 5010 tmp->thread_boxes = NULL; 5011 5012 tmp->base.mask.bo = NULL; 5013 5014 tmp->base.floats_per_vertex = 2; 5015 if (!is_constant_ps(tmp->base.src.u.gen3.type)) 5016 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 5017 tmp->base.floats_per_vertex += 5018 tmp->base.mask.u.gen3.type == SHADER_OPACITY; 5019 tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; 5020 5021 if (!kgem_check_bo(&sna->kgem, 5022 tmp->base.dst.bo, tmp->base.src.bo, 5023 NULL)) { 5024 kgem_submit(&sna->kgem); 5025 if (!kgem_check_bo(&sna->kgem, 5026 tmp->base.dst.bo, tmp->base.src.bo, 5027 NULL)) 5028 goto cleanup_src; 5029 } 5030 5031 gen3_align_vertex(sna, &tmp->base); 5032 gen3_emit_composite_state(sna, &tmp->base); 5033 return true; 5034 5035cleanup_src: 5036 if (tmp->base.src.bo) 5037 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 5038cleanup_dst: 5039 if (tmp->base.redirect.real_bo) 5040 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 5041 return false; 5042} 5043 5044static void 5045gen3_emit_video_state(struct sna *sna, 5046 struct sna_video *video, 5047 struct sna_video_frame *frame, 5048 PixmapPtr pixmap, 5049 struct kgem_bo *dst_bo, 5050 int width, int height, 5051 bool bilinear) 5052{ 5053 struct gen3_render_state *state = &sna->render_state.gen3; 5054 uint32_t id, ms3, rewind; 5055 5056 gen3_emit_target(sna, dst_bo, width, height, 5057 sna_format_for_depth(pixmap->drawable.depth)); 5058 5059 /* XXX share with composite? Is it worth the effort? */ 5060 if ((state->last_shader & (1<<31)) == 0) { 5061 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 5062 I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 5063 2); 5064 OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT)); 5065 OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | 5066 S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | 5067 S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | 5068 S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | 5069 S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | 5070 S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | 5071 S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | 5072 S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); 5073 OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | 5074 (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | 5075 S6_COLOR_WRITE_ENABLE); 5076 5077 state->last_blend = 0; 5078 state->floats_per_vertex = 4; 5079 } 5080 5081 if (!is_planar_fourcc(frame->id)) { 5082 rewind = sna->kgem.nbatch; 5083 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); 5084 OUT_BATCH(0x0000001); /* constant 0 */ 5085 /* constant 0: brightness/contrast */ 5086 OUT_BATCH_F(video->brightness / 128.0); 5087 OUT_BATCH_F(video->contrast / 255.0); 5088 OUT_BATCH_F(0.0); 5089 OUT_BATCH_F(0.0); 5090 if (state->last_constants && 5091 memcmp(&sna->kgem.batch[state->last_constants], 5092 &sna->kgem.batch[rewind], 5093 6*sizeof(uint32_t)) == 0) 5094 sna->kgem.nbatch = rewind; 5095 else 5096 state->last_constants = rewind; 5097 5098 rewind = sna->kgem.nbatch; 5099 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); 5100 OUT_BATCH(0x00000001); 5101 OUT_BATCH(SS2_COLORSPACE_CONVERSION | 5102 (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5103 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5104 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5105 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5106 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5107 SS3_NORMALIZED_COORDS); 5108 OUT_BATCH(0x00000000); 5109 if (state->last_sampler && 5110 memcmp(&sna->kgem.batch[state->last_sampler], 5111 &sna->kgem.batch[rewind], 5112 5*sizeof(uint32_t)) == 0) 5113 sna->kgem.nbatch = rewind; 5114 else 5115 state->last_sampler = rewind; 5116 5117 OUT_BATCH(_3DSTATE_MAP_STATE | 3); 5118 OUT_BATCH(0x00000001); /* texture map #1 */ 5119 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5120 frame->bo, 5121 I915_GEM_DOMAIN_SAMPLER << 16, 5122 0)); 5123 5124 ms3 = MAPSURF_422; 5125 switch (frame->id) { 5126 case FOURCC_YUY2: 5127 ms3 |= MT_422_YCRCB_NORMAL; 5128 break; 5129 case FOURCC_UYVY: 5130 ms3 |= MT_422_YCRCB_SWAPY; 5131 break; 5132 } 5133 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; 5134 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; 5135 OUT_BATCH(ms3); 5136 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5137 5138 id = 1<<31 | 1<<1 | !!video->brightness; 5139 if (state->last_shader != id) { 5140 state->last_shader = id; 5141 id = sna->kgem.nbatch++; 5142 5143 gen3_fs_dcl(FS_S0); 5144 gen3_fs_dcl(FS_T0); 5145 gen3_fs_texld(FS_OC, FS_S0, FS_T0); 5146 if (video->brightness != 0) { 5147 gen3_fs_add(FS_OC, 5148 gen3_fs_operand_reg(FS_OC), 5149 gen3_fs_operand(FS_C0, X, X, X, ZERO)); 5150 } 5151 5152 sna->kgem.batch[id] = 5153 _3DSTATE_PIXEL_SHADER_PROGRAM | 5154 (sna->kgem.nbatch - id - 2); 5155 } 5156 } else { 5157 /* For the planar formats, we set up three samplers -- 5158 * one for each plane, in a Y8 format. Because I 5159 * couldn't get the special PLANAR_TO_PACKED 5160 * shader setup to work, I did the manual pixel shader: 5161 * 5162 * y' = y - .0625 5163 * u' = u - .5 5164 * v' = v - .5; 5165 * 5166 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 5167 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 5168 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 5169 * 5170 * register assignment: 5171 * r0 = (y',u',v',0) 5172 * r1 = (y,y,y,y) 5173 * r2 = (u,u,u,u) 5174 * r3 = (v,v,v,v) 5175 * OC = (r,g,b,1) 5176 */ 5177 rewind = sna->kgem.nbatch; 5178 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); 5179 OUT_BATCH(0x000001f); /* constants 0-4 */ 5180 /* constant 0: normalization offsets */ 5181 OUT_BATCH_F(-0.0625); 5182 OUT_BATCH_F(-0.5); 5183 OUT_BATCH_F(-0.5); 5184 OUT_BATCH_F(0.0); 5185 /* constant 1: r coefficients */ 5186 OUT_BATCH_F(1.1643); 5187 OUT_BATCH_F(0.0); 5188 OUT_BATCH_F(1.5958); 5189 OUT_BATCH_F(0.0); 5190 /* constant 2: g coefficients */ 5191 OUT_BATCH_F(1.1643); 5192 OUT_BATCH_F(-0.39173); 5193 OUT_BATCH_F(-0.81290); 5194 OUT_BATCH_F(0.0); 5195 /* constant 3: b coefficients */ 5196 OUT_BATCH_F(1.1643); 5197 OUT_BATCH_F(2.017); 5198 OUT_BATCH_F(0.0); 5199 OUT_BATCH_F(0.0); 5200 /* constant 4: brightness/contrast */ 5201 OUT_BATCH_F(video->brightness / 128.0); 5202 OUT_BATCH_F(video->contrast / 255.0); 5203 OUT_BATCH_F(0.0); 5204 OUT_BATCH_F(0.0); 5205 if (state->last_constants && 5206 memcmp(&sna->kgem.batch[state->last_constants], 5207 &sna->kgem.batch[rewind], 5208 22*sizeof(uint32_t)) == 0) 5209 sna->kgem.nbatch = rewind; 5210 else 5211 state->last_constants = rewind; 5212 5213 rewind = sna->kgem.nbatch; 5214 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); 5215 OUT_BATCH(0x00000007); 5216 /* sampler 0 */ 5217 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5218 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5219 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5220 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5221 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5222 SS3_NORMALIZED_COORDS); 5223 OUT_BATCH(0x00000000); 5224 /* sampler 1 */ 5225 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5226 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5227 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5228 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5229 (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5230 SS3_NORMALIZED_COORDS); 5231 OUT_BATCH(0x00000000); 5232 /* sampler 2 */ 5233 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5234 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5235 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5236 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5237 (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5238 SS3_NORMALIZED_COORDS); 5239 OUT_BATCH(0x00000000); 5240 if (state->last_sampler && 5241 memcmp(&sna->kgem.batch[state->last_sampler], 5242 &sna->kgem.batch[rewind], 5243 11*sizeof(uint32_t)) == 0) 5244 sna->kgem.nbatch = rewind; 5245 else 5246 state->last_sampler = rewind; 5247 5248 OUT_BATCH(_3DSTATE_MAP_STATE | 9); 5249 OUT_BATCH(0x00000007); 5250 5251 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5252 frame->bo, 5253 I915_GEM_DOMAIN_SAMPLER << 16, 5254 0)); 5255 5256 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5257 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; 5258 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; 5259 OUT_BATCH(ms3); 5260 /* check to see if Y has special pitch than normal 5261 * double u/v pitch, e.g i915 XvMC hw requires at 5262 * least 1K alignment, so Y pitch might 5263 * be same as U/V's.*/ 5264 if (frame->pitch[1]) 5265 OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT); 5266 else 5267 OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT); 5268 5269 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5270 frame->bo, 5271 I915_GEM_DOMAIN_SAMPLER << 16, 5272 frame->UBufOffset)); 5273 5274 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5275 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; 5276 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; 5277 OUT_BATCH(ms3); 5278 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5279 5280 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5281 frame->bo, 5282 I915_GEM_DOMAIN_SAMPLER << 16, 5283 frame->VBufOffset)); 5284 5285 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5286 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; 5287 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; 5288 OUT_BATCH(ms3); 5289 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5290 5291 id = 1<<31 | 2<<1 | !!video->brightness; 5292 if (state->last_shader != id) { 5293 state->last_shader = id; 5294 id = sna->kgem.nbatch++; 5295 5296 /* Declare samplers */ 5297 gen3_fs_dcl(FS_S0); /* Y */ 5298 gen3_fs_dcl(FS_S1); /* U */ 5299 gen3_fs_dcl(FS_S2); /* V */ 5300 gen3_fs_dcl(FS_T0); /* normalized coords */ 5301 5302 /* Load samplers to temporaries. */ 5303 gen3_fs_texld(FS_R1, FS_S0, FS_T0); 5304 gen3_fs_texld(FS_R2, FS_S1, FS_T0); 5305 gen3_fs_texld(FS_R3, FS_S2, FS_T0); 5306 5307 /* Move the sampled YUV data in R[123] to the first 5308 * 3 channels of R0. 5309 */ 5310 gen3_fs_mov_masked(FS_R0, MASK_X, 5311 gen3_fs_operand_reg(FS_R1)); 5312 gen3_fs_mov_masked(FS_R0, MASK_Y, 5313 gen3_fs_operand_reg(FS_R2)); 5314 gen3_fs_mov_masked(FS_R0, MASK_Z, 5315 gen3_fs_operand_reg(FS_R3)); 5316 5317 /* Normalize the YUV data */ 5318 gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0), 5319 gen3_fs_operand_reg(FS_C0)); 5320 /* dot-product the YUV data in R0 by the vectors of 5321 * coefficients for calculating R, G, and B, storing 5322 * the results in the R, G, or B channels of the output 5323 * color. The OC results are implicitly clamped 5324 * at the end of the program. 5325 */ 5326 gen3_fs_dp3(FS_OC, MASK_X, 5327 gen3_fs_operand_reg(FS_R0), 5328 gen3_fs_operand_reg(FS_C1)); 5329 gen3_fs_dp3(FS_OC, MASK_Y, 5330 gen3_fs_operand_reg(FS_R0), 5331 gen3_fs_operand_reg(FS_C2)); 5332 gen3_fs_dp3(FS_OC, MASK_Z, 5333 gen3_fs_operand_reg(FS_R0), 5334 gen3_fs_operand_reg(FS_C3)); 5335 /* Set alpha of the output to 1.0, by wiring W to 1 5336 * and not actually using the source. 5337 */ 5338 gen3_fs_mov_masked(FS_OC, MASK_W, 5339 gen3_fs_operand_one()); 5340 5341 if (video->brightness != 0) { 5342 gen3_fs_add(FS_OC, 5343 gen3_fs_operand_reg(FS_OC), 5344 gen3_fs_operand(FS_C4, X, X, X, ZERO)); 5345 } 5346 5347 sna->kgem.batch[id] = 5348 _3DSTATE_PIXEL_SHADER_PROGRAM | 5349 (sna->kgem.nbatch - id - 2); 5350 } 5351 } 5352} 5353 5354static void 5355gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo) 5356{ 5357 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 5358 5359 if (!kgem_check_batch(&sna->kgem, 120) || 5360 !kgem_check_reloc(&sna->kgem, 4) || 5361 !kgem_check_exec(&sna->kgem, 2)) { 5362 _kgem_submit(&sna->kgem); 5363 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 5364 } 5365 5366 if (sna->render_state.gen3.need_invariant) 5367 gen3_emit_invariant(sna); 5368} 5369 5370static int 5371gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) 5372{ 5373 int size = floats_per_vertex * 3; 5374 int rem = batch_space(sna) - 1; 5375 5376 if (size * want > rem) 5377 want = rem / size; 5378 5379 return want; 5380} 5381 5382static bool 5383gen3_render_video(struct sna *sna, 5384 struct sna_video *video, 5385 struct sna_video_frame *frame, 5386 RegionPtr dstRegion, 5387 PixmapPtr pixmap) 5388{ 5389 struct sna_pixmap *priv = sna_pixmap(pixmap); 5390 const BoxRec *pbox = region_rects(dstRegion); 5391 int nbox = region_num_rects(dstRegion); 5392 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 5393 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 5394 int src_width = frame->src.x2 - frame->src.x1; 5395 int src_height = frame->src.y2 - frame->src.y1; 5396 float src_offset_x, src_offset_y; 5397 float src_scale_x, src_scale_y; 5398 int pix_xoff, pix_yoff; 5399 struct kgem_bo *dst_bo; 5400 bool bilinear; 5401 int copy = 0; 5402 5403 DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, 5404 src_width, src_height, frame->width, frame->height, dst_width, dst_height)); 5405 5406 assert(priv->gpu_bo); 5407 dst_bo = priv->gpu_bo; 5408 5409 bilinear = src_width != dst_width || src_height != dst_height; 5410 5411 src_scale_x = (float)src_width / dst_width / frame->width; 5412 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 5413 5414 src_scale_y = (float)src_height / dst_height / frame->height; 5415 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 5416 DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", 5417 __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); 5418 5419 if (too_large(pixmap->drawable.width, pixmap->drawable.height) || 5420 !gen3_check_pitch_3d(dst_bo)) { 5421 int bpp = pixmap->drawable.bitsPerPixel; 5422 5423 if (too_large(dst_width, dst_height)) 5424 return false; 5425 5426 dst_bo = kgem_create_2d(&sna->kgem, 5427 dst_width, dst_height, bpp, 5428 kgem_choose_tiling(&sna->kgem, 5429 I915_TILING_X, 5430 dst_width, dst_height, bpp), 5431 0); 5432 if (!dst_bo) 5433 return false; 5434 5435 pix_xoff = -dstRegion->extents.x1; 5436 pix_yoff = -dstRegion->extents.y1; 5437 copy = 1; 5438 } else { 5439 /* Set up the offset for translating from the given region 5440 * (in screen coordinates) to the backing pixmap. 5441 */ 5442#ifdef COMPOSITE 5443 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 5444 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 5445#else 5446 pix_xoff = 0; 5447 pix_yoff = 0; 5448#endif 5449 5450 dst_width = pixmap->drawable.width; 5451 dst_height = pixmap->drawable.height; 5452 } 5453 5454 gen3_video_get_batch(sna, dst_bo); 5455 gen3_emit_video_state(sna, video, frame, pixmap, 5456 dst_bo, dst_width, dst_height, bilinear); 5457 do { 5458 int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); 5459 if (nbox_this_time == 0) { 5460 gen3_video_get_batch(sna, dst_bo); 5461 gen3_emit_video_state(sna, video, frame, pixmap, 5462 dst_bo, dst_width, dst_height, bilinear); 5463 nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); 5464 assert(nbox_this_time); 5465 } 5466 nbox -= nbox_this_time; 5467 5468 OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); 5469 do { 5470 int box_x1 = pbox->x1; 5471 int box_y1 = pbox->y1; 5472 int box_x2 = pbox->x2; 5473 int box_y2 = pbox->y2; 5474 5475 pbox++; 5476 5477 DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", 5478 __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, 5479 box_x1 * src_scale_x + src_offset_x, 5480 box_y1 * src_scale_y + src_offset_y, 5481 box_x2 * src_scale_x + src_offset_x, 5482 box_y2 * src_scale_y + src_offset_y)); 5483 5484 /* bottom right */ 5485 OUT_BATCH_F(box_x2 + pix_xoff); 5486 OUT_BATCH_F(box_y2 + pix_yoff); 5487 OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x); 5488 OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); 5489 5490 /* bottom left */ 5491 OUT_BATCH_F(box_x1 + pix_xoff); 5492 OUT_BATCH_F(box_y2 + pix_yoff); 5493 OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); 5494 OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); 5495 5496 /* top left */ 5497 OUT_BATCH_F(box_x1 + pix_xoff); 5498 OUT_BATCH_F(box_y1 + pix_yoff); 5499 OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); 5500 OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y); 5501 } while (--nbox_this_time); 5502 } while (nbox); 5503 5504 if (copy) { 5505#ifdef COMPOSITE 5506 pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 5507 pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 5508#else 5509 pix_xoff = 0; 5510 pix_yoff = 0; 5511#endif 5512 sna_blt_copy_boxes(sna, GXcopy, 5513 dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, 5514 priv->gpu_bo, pix_xoff, pix_yoff, 5515 pixmap->drawable.bitsPerPixel, 5516 region_rects(dstRegion), 5517 region_num_rects(dstRegion)); 5518 5519 kgem_bo_destroy(&sna->kgem, dst_bo); 5520 } 5521 5522 if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 5523 if ((pix_xoff | pix_yoff) == 0) { 5524 sna_damage_add(&priv->gpu_damage, dstRegion); 5525 sna_damage_subtract(&priv->cpu_damage, dstRegion); 5526 } else { 5527 sna_damage_add_boxes(&priv->gpu_damage, 5528 region_rects(dstRegion), 5529 region_num_rects(dstRegion), 5530 pix_xoff, pix_yoff); 5531 sna_damage_subtract_boxes(&priv->cpu_damage, 5532 region_rects(dstRegion), 5533 region_num_rects(dstRegion), 5534 pix_xoff, pix_yoff); 5535 } 5536 } 5537 5538 return true; 5539} 5540 5541static void 5542gen3_render_copy_setup_source(struct sna_composite_channel *channel, 5543 const DrawableRec *draw, 5544 struct kgem_bo *bo) 5545{ 5546 int i; 5547 5548 channel->u.gen3.type = SHADER_TEXTURE; 5549 channel->filter = gen3_filter(PictFilterNearest); 5550 channel->repeat = gen3_texture_repeat(RepeatNone); 5551 channel->width = draw->width; 5552 channel->height = draw->height; 5553 channel->scale[0] = 1.f/draw->width; 5554 channel->scale[1] = 1.f/draw->height; 5555 channel->offset[0] = 0; 5556 channel->offset[1] = 0; 5557 5558 channel->pict_format = sna_format_for_depth(draw->depth); 5559 if (!gen3_composite_channel_set_format(channel, channel->pict_format)) { 5560 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 5561 if (gen3_tex_formats[i].xfmt == channel->pict_format) { 5562 channel->card_format = gen3_tex_formats[i].card_fmt; 5563 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 5564 channel->alpha_fixup = true; 5565 break; 5566 } 5567 } 5568 } 5569 assert(channel->card_format); 5570 5571 channel->bo = bo; 5572 channel->is_affine = 1; 5573} 5574 5575static bool 5576gen3_render_copy_boxes(struct sna *sna, uint8_t alu, 5577 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 5578 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 5579 const BoxRec *box, int n, unsigned flags) 5580{ 5581 struct sna_composite_op tmp; 5582 5583#if NO_COPY_BOXES 5584 if (!sna_blt_compare_depth(src, dst)) 5585 return false; 5586 5587 return sna_blt_copy_boxes(sna, alu, 5588 src_bo, src_dx, src_dy, 5589 dst_bo, dst_dx, dst_dy, 5590 dst->bitsPerPixel, 5591 box, n); 5592#endif 5593 5594 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 5595 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 5596 5597 if (sna_blt_compare_depth(src, dst) && 5598 sna_blt_copy_boxes(sna, alu, 5599 src_bo, src_dx, src_dy, 5600 dst_bo, dst_dx, dst_dy, 5601 dst->bitsPerPixel, 5602 box, n)) 5603 return true; 5604 5605 if (!(alu == GXcopy || alu == GXclear) || 5606 src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 5607 src_bo->pitch > MAX_3D_PITCH || 5608 too_large(src->width, src->height)) { 5609fallback_blt: 5610 if (!kgem_bo_can_blt(&sna->kgem, src_bo) || 5611 !kgem_bo_can_blt(&sna->kgem, dst_bo)) 5612 return false; 5613 5614 return sna_blt_copy_boxes_fallback(sna, alu, 5615 src, src_bo, src_dx, src_dy, 5616 dst, dst_bo, dst_dx, dst_dy, 5617 box, n); 5618 } 5619 5620 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 5621 kgem_submit(&sna->kgem); 5622 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 5623 goto fallback_blt; 5624 } 5625 5626 memset(&tmp, 0, sizeof(tmp)); 5627 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; 5628 5629 tmp.dst.pixmap = (PixmapPtr)dst; 5630 tmp.dst.width = dst->width; 5631 tmp.dst.height = dst->height; 5632 tmp.dst.format = sna_format_for_depth(dst->depth); 5633 tmp.dst.bo = dst_bo; 5634 tmp.dst.x = tmp.dst.y = 0; 5635 tmp.damage = NULL; 5636 5637 sna_render_composite_redirect_init(&tmp); 5638 if (too_large(tmp.dst.width, tmp.dst.height) || 5639 dst_bo->pitch > MAX_3D_PITCH) { 5640 BoxRec extents = box[0]; 5641 int i; 5642 5643 for (i = 1; i < n; i++) { 5644 if (box[i].x1 < extents.x1) 5645 extents.x1 = box[i].x1; 5646 if (box[i].y1 < extents.y1) 5647 extents.y1 = box[i].y1; 5648 5649 if (box[i].x2 > extents.x2) 5650 extents.x2 = box[i].x2; 5651 if (box[i].y2 > extents.y2) 5652 extents.y2 = box[i].y2; 5653 } 5654 if (!sna_render_composite_redirect(sna, &tmp, 5655 extents.x1 + dst_dx, 5656 extents.y1 + dst_dy, 5657 extents.x2 - extents.x1, 5658 extents.y2 - extents.y1, 5659 n > 1)) 5660 goto fallback_tiled; 5661 } 5662 5663 gen3_render_copy_setup_source(&tmp.src, src, src_bo); 5664 5665 tmp.floats_per_vertex = 4; 5666 tmp.floats_per_rect = 12; 5667 tmp.mask.bo = NULL; 5668 tmp.mask.u.gen3.type = SHADER_NONE; 5669 5670 dst_dx += tmp.dst.x; 5671 dst_dy += tmp.dst.y; 5672 tmp.dst.x = tmp.dst.y = 0; 5673 5674 gen3_align_vertex(sna, &tmp); 5675 gen3_emit_composite_state(sna, &tmp); 5676 5677 do { 5678 int n_this_time; 5679 5680 n_this_time = gen3_get_rectangles(sna, &tmp, n); 5681 n -= n_this_time; 5682 5683 do { 5684 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 5685 box->x1 + src_dx, box->y1 + src_dy, 5686 box->x1 + dst_dx, box->y1 + dst_dy, 5687 box->x2 - box->x1, box->y2 - box->y1)); 5688 OUT_VERTEX(box->x2 + dst_dx); 5689 OUT_VERTEX(box->y2 + dst_dy); 5690 OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 5691 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 5692 5693 OUT_VERTEX(box->x1 + dst_dx); 5694 OUT_VERTEX(box->y2 + dst_dy); 5695 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 5696 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 5697 5698 OUT_VERTEX(box->x1 + dst_dx); 5699 OUT_VERTEX(box->y1 + dst_dy); 5700 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 5701 OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 5702 5703 box++; 5704 } while (--n_this_time); 5705 } while (n); 5706 5707 gen3_vertex_flush(sna); 5708 sna_render_composite_redirect_done(sna, &tmp); 5709 return true; 5710 5711fallback_tiled: 5712 return sna_tiling_copy_boxes(sna, alu, 5713 src, src_bo, src_dx, src_dy, 5714 dst, dst_bo, dst_dx, dst_dy, 5715 box, n); 5716} 5717 5718static void 5719gen3_render_copy_blt(struct sna *sna, 5720 const struct sna_copy_op *op, 5721 int16_t sx, int16_t sy, 5722 int16_t w, int16_t h, 5723 int16_t dx, int16_t dy) 5724{ 5725 gen3_get_rectangles(sna, &op->base, 1); 5726 5727 OUT_VERTEX(dx+w); 5728 OUT_VERTEX(dy+h); 5729 OUT_VERTEX((sx+w)*op->base.src.scale[0]); 5730 OUT_VERTEX((sy+h)*op->base.src.scale[1]); 5731 5732 OUT_VERTEX(dx); 5733 OUT_VERTEX(dy+h); 5734 OUT_VERTEX(sx*op->base.src.scale[0]); 5735 OUT_VERTEX((sy+h)*op->base.src.scale[1]); 5736 5737 OUT_VERTEX(dx); 5738 OUT_VERTEX(dy); 5739 OUT_VERTEX(sx*op->base.src.scale[0]); 5740 OUT_VERTEX(sy*op->base.src.scale[1]); 5741} 5742 5743static void 5744gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 5745{ 5746 if (sna->render.vertex_offset) 5747 gen3_vertex_flush(sna); 5748} 5749 5750static bool 5751gen3_render_copy(struct sna *sna, uint8_t alu, 5752 PixmapPtr src, struct kgem_bo *src_bo, 5753 PixmapPtr dst, struct kgem_bo *dst_bo, 5754 struct sna_copy_op *tmp) 5755{ 5756#if NO_COPY 5757 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 5758 return false; 5759 5760 return sna_blt_copy(sna, alu, 5761 src_bo, dst_bo, 5762 dst->drawable.bitsPerPixel, 5763 tmp); 5764#endif 5765 5766 /* Prefer to use the BLT */ 5767 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 5768 sna_blt_copy(sna, alu, 5769 src_bo, dst_bo, 5770 dst->drawable.bitsPerPixel, 5771 tmp)) 5772 return true; 5773 5774 /* Must use the BLT if we can't RENDER... */ 5775 if (!(alu == GXcopy || alu == GXclear) || 5776 too_large(src->drawable.width, src->drawable.height) || 5777 too_large(dst->drawable.width, dst->drawable.height) || 5778 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) { 5779fallback: 5780 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 5781 return false; 5782 5783 return sna_blt_copy(sna, alu, src_bo, dst_bo, 5784 dst->drawable.bitsPerPixel, 5785 tmp); 5786 } 5787 5788 tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; 5789 5790 tmp->base.dst.pixmap = dst; 5791 tmp->base.dst.width = dst->drawable.width; 5792 tmp->base.dst.height = dst->drawable.height; 5793 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 5794 tmp->base.dst.bo = dst_bo; 5795 5796 gen3_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo); 5797 5798 tmp->base.floats_per_vertex = 4; 5799 tmp->base.floats_per_rect = 12; 5800 tmp->base.mask.bo = NULL; 5801 tmp->base.mask.u.gen3.type = SHADER_NONE; 5802 5803 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 5804 kgem_submit(&sna->kgem); 5805 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 5806 goto fallback; 5807 } 5808 5809 tmp->blt = gen3_render_copy_blt; 5810 tmp->done = gen3_render_copy_done; 5811 5812 gen3_align_vertex(sna, &tmp->base); 5813 gen3_emit_composite_state(sna, &tmp->base); 5814 return true; 5815} 5816 5817static bool 5818gen3_render_fill_boxes_try_blt(struct sna *sna, 5819 CARD8 op, PictFormat format, 5820 const xRenderColor *color, 5821 const DrawableRec *dst, struct kgem_bo *dst_bo, 5822 const BoxRec *box, int n) 5823{ 5824 uint8_t alu; 5825 uint32_t pixel; 5826 5827 if (dst_bo->tiling == I915_TILING_Y) { 5828 DBG(("%s: y-tiling, can't blit\n", __FUNCTION__)); 5829 assert(!too_large(dst->width, dst->height)); 5830 return false; 5831 } 5832 5833 if (op > PictOpSrc) 5834 return false; 5835 5836 if (op == PictOpClear) { 5837 alu = GXclear; 5838 pixel = 0; 5839 } else if (!sna_get_pixel_from_rgba(&pixel, 5840 color->red, 5841 color->green, 5842 color->blue, 5843 color->alpha, 5844 format)) 5845 return false; 5846 else 5847 alu = GXcopy; 5848 5849 return sna_blt_fill_boxes(sna, alu, 5850 dst_bo, dst->bitsPerPixel, 5851 pixel, box, n); 5852} 5853 5854static inline bool prefer_fill_blt(struct sna *sna) 5855{ 5856#if PREFER_BLT_FILL 5857 return true; 5858#else 5859 return sna->kgem.mode != KGEM_RENDER; 5860#endif 5861} 5862 5863static bool 5864gen3_render_fill_boxes(struct sna *sna, 5865 CARD8 op, 5866 PictFormat format, 5867 const xRenderColor *color, 5868 const DrawableRec *dst, struct kgem_bo *dst_bo, 5869 const BoxRec *box, int n) 5870{ 5871 struct sna_composite_op tmp; 5872 uint32_t pixel; 5873 5874 if (op >= ARRAY_SIZE(gen3_blend_op)) { 5875 DBG(("%s: fallback due to unhandled blend op: %d\n", 5876 __FUNCTION__, op)); 5877 return false; 5878 } 5879 5880#if NO_FILL_BOXES 5881 return gen3_render_fill_boxes_try_blt(sna, op, format, color, 5882 dst, dst_bo, 5883 box, n); 5884#endif 5885 5886 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 5887 __FUNCTION__, op, (int)format, 5888 color->red, color->green, color->blue, color->alpha)); 5889 5890 if (too_large(dst->width, dst->height) || 5891 dst_bo->pitch > MAX_3D_PITCH || 5892 !gen3_check_dst_format(format)) { 5893 DBG(("%s: try blt, too large or incompatible destination\n", 5894 __FUNCTION__)); 5895 if (gen3_render_fill_boxes_try_blt(sna, op, format, color, 5896 dst, dst_bo, 5897 box, n)) 5898 return true; 5899 5900 if (!gen3_check_dst_format(format)) 5901 return false; 5902 5903 return sna_tiling_fill_boxes(sna, op, format, color, 5904 dst, dst_bo, box, n); 5905 } 5906 5907 if (prefer_fill_blt(sna) && 5908 gen3_render_fill_boxes_try_blt(sna, op, format, color, 5909 dst, dst_bo, 5910 box, n)) 5911 return true; 5912 5913 if (op == PictOpClear) { 5914 pixel = 0; 5915 } else { 5916 if (!sna_get_pixel_from_rgba(&pixel, 5917 color->red, 5918 color->green, 5919 color->blue, 5920 color->alpha, 5921 PICT_a8r8g8b8)) { 5922 assert(0); 5923 return false; 5924 } 5925 } 5926 DBG(("%s: using shader for op=%d, format=%08x, pixel=%08x\n", 5927 __FUNCTION__, op, (int)format, pixel)); 5928 5929 tmp.op = op; 5930 tmp.dst.pixmap = (PixmapPtr)dst; 5931 tmp.dst.width = dst->width; 5932 tmp.dst.height = dst->height; 5933 tmp.dst.format = format; 5934 tmp.dst.bo = dst_bo; 5935 tmp.damage = NULL; 5936 tmp.floats_per_vertex = 2; 5937 tmp.floats_per_rect = 6; 5938 tmp.rb_reversed = 0; 5939 tmp.has_component_alpha = 0; 5940 tmp.need_magic_ca_pass = false; 5941 5942 gen3_init_solid(&tmp.src, pixel); 5943 tmp.mask.bo = NULL; 5944 tmp.mask.u.gen3.type = SHADER_NONE; 5945 tmp.u.gen3.num_constants = 0; 5946 5947 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 5948 kgem_submit(&sna->kgem); 5949 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 5950 return false; 5951 } 5952 5953 gen3_align_vertex(sna, &tmp); 5954 gen3_emit_composite_state(sna, &tmp); 5955 5956 do { 5957 int n_this_time; 5958 5959 n_this_time = gen3_get_rectangles(sna, &tmp, n); 5960 n -= n_this_time; 5961 5962 do { 5963 DBG((" (%d, %d), (%d, %d): %x\n", 5964 box->x1, box->y1, box->x2, box->y2, pixel)); 5965 OUT_VERTEX(box->x2); 5966 OUT_VERTEX(box->y2); 5967 OUT_VERTEX(box->x1); 5968 OUT_VERTEX(box->y2); 5969 OUT_VERTEX(box->x1); 5970 OUT_VERTEX(box->y1); 5971 box++; 5972 } while (--n_this_time); 5973 } while (n); 5974 5975 gen3_vertex_flush(sna); 5976 return true; 5977} 5978 5979static void 5980gen3_render_fill_op_blt(struct sna *sna, 5981 const struct sna_fill_op *op, 5982 int16_t x, int16_t y, int16_t w, int16_t h) 5983{ 5984 gen3_get_rectangles(sna, &op->base, 1); 5985 5986 OUT_VERTEX(x+w); 5987 OUT_VERTEX(y+h); 5988 OUT_VERTEX(x); 5989 OUT_VERTEX(y+h); 5990 OUT_VERTEX(x); 5991 OUT_VERTEX(y); 5992} 5993 5994fastcall static void 5995gen3_render_fill_op_box(struct sna *sna, 5996 const struct sna_fill_op *op, 5997 const BoxRec *box) 5998{ 5999 gen3_get_rectangles(sna, &op->base, 1); 6000 6001 OUT_VERTEX(box->x2); 6002 OUT_VERTEX(box->y2); 6003 OUT_VERTEX(box->x1); 6004 OUT_VERTEX(box->y2); 6005 OUT_VERTEX(box->x1); 6006 OUT_VERTEX(box->y1); 6007} 6008 6009fastcall static void 6010gen3_render_fill_op_boxes(struct sna *sna, 6011 const struct sna_fill_op *op, 6012 const BoxRec *box, 6013 int nbox) 6014{ 6015 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 6016 box->x1, box->y1, box->x2, box->y2, nbox)); 6017 6018 do { 6019 int nbox_this_time; 6020 6021 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 6022 nbox -= nbox_this_time; 6023 6024 do { 6025 OUT_VERTEX(box->x2); 6026 OUT_VERTEX(box->y2); 6027 OUT_VERTEX(box->x1); 6028 OUT_VERTEX(box->y2); 6029 OUT_VERTEX(box->x1); 6030 OUT_VERTEX(box->y1); 6031 box++; 6032 } while (--nbox_this_time); 6033 } while (nbox); 6034} 6035 6036static void 6037gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 6038{ 6039 if (sna->render.vertex_offset) 6040 gen3_vertex_flush(sna); 6041} 6042 6043static bool 6044gen3_render_fill(struct sna *sna, uint8_t alu, 6045 PixmapPtr dst, struct kgem_bo *dst_bo, 6046 uint32_t color, unsigned flags, 6047 struct sna_fill_op *tmp) 6048{ 6049#if NO_FILL 6050 return sna_blt_fill(sna, alu, 6051 dst_bo, dst->drawable.bitsPerPixel, 6052 color, 6053 tmp); 6054#endif 6055 6056 /* Prefer to use the BLT if already engaged */ 6057 if (prefer_fill_blt(sna) && 6058 sna_blt_fill(sna, alu, 6059 dst_bo, dst->drawable.bitsPerPixel, 6060 color, 6061 tmp)) 6062 return true; 6063 6064 /* Must use the BLT if we can't RENDER... */ 6065 if (!(alu == GXcopy || alu == GXclear) || 6066 too_large(dst->drawable.width, dst->drawable.height) || 6067 dst_bo->pitch > MAX_3D_PITCH) 6068 return sna_blt_fill(sna, alu, 6069 dst_bo, dst->drawable.bitsPerPixel, 6070 color, 6071 tmp); 6072 6073 if (alu == GXclear) 6074 color = 0; 6075 6076 tmp->base.op = color == 0 ? PictOpClear : PictOpSrc; 6077 tmp->base.dst.pixmap = dst; 6078 tmp->base.dst.width = dst->drawable.width; 6079 tmp->base.dst.height = dst->drawable.height; 6080 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 6081 tmp->base.dst.bo = dst_bo; 6082 tmp->base.floats_per_vertex = 2; 6083 tmp->base.floats_per_rect = 6; 6084 tmp->base.need_magic_ca_pass = 0; 6085 tmp->base.has_component_alpha = 0; 6086 tmp->base.rb_reversed = 0; 6087 6088 gen3_init_solid(&tmp->base.src, 6089 sna_rgba_for_color(color, dst->drawable.depth)); 6090 tmp->base.mask.bo = NULL; 6091 tmp->base.mask.u.gen3.type = SHADER_NONE; 6092 tmp->base.u.gen3.num_constants = 0; 6093 6094 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 6095 kgem_submit(&sna->kgem); 6096 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 6097 return false; 6098 } 6099 6100 tmp->blt = gen3_render_fill_op_blt; 6101 tmp->box = gen3_render_fill_op_box; 6102 tmp->boxes = gen3_render_fill_op_boxes; 6103 tmp->points = NULL; 6104 tmp->done = gen3_render_fill_op_done; 6105 6106 gen3_align_vertex(sna, &tmp->base); 6107 gen3_emit_composite_state(sna, &tmp->base); 6108 return true; 6109} 6110 6111static bool 6112gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 6113 uint32_t color, 6114 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 6115 uint8_t alu) 6116{ 6117 BoxRec box; 6118 6119 box.x1 = x1; 6120 box.y1 = y1; 6121 box.x2 = x2; 6122 box.y2 = y2; 6123 6124 return sna_blt_fill_boxes(sna, alu, 6125 bo, dst->drawable.bitsPerPixel, 6126 color, &box, 1); 6127} 6128 6129static bool 6130gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 6131 uint32_t color, 6132 int16_t x1, int16_t y1, 6133 int16_t x2, int16_t y2, 6134 uint8_t alu) 6135{ 6136 struct sna_composite_op tmp; 6137 6138#if NO_FILL_ONE 6139 return gen3_render_fill_one_try_blt(sna, dst, bo, color, 6140 x1, y1, x2, y2, alu); 6141#endif 6142 6143 /* Prefer to use the BLT if already engaged */ 6144 if (prefer_fill_blt(sna) && 6145 gen3_render_fill_one_try_blt(sna, dst, bo, color, 6146 x1, y1, x2, y2, alu)) 6147 return true; 6148 6149 /* Must use the BLT if we can't RENDER... */ 6150 if (!(alu == GXcopy || alu == GXclear) || 6151 too_large(dst->drawable.width, dst->drawable.height) || 6152 bo->pitch > MAX_3D_PITCH) 6153 return gen3_render_fill_one_try_blt(sna, dst, bo, color, 6154 x1, y1, x2, y2, alu); 6155 6156 if (alu == GXclear) 6157 color = 0; 6158 6159 tmp.op = color == 0 ? PictOpClear : PictOpSrc; 6160 tmp.dst.pixmap = dst; 6161 tmp.dst.width = dst->drawable.width; 6162 tmp.dst.height = dst->drawable.height; 6163 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 6164 tmp.dst.bo = bo; 6165 tmp.floats_per_vertex = 2; 6166 tmp.floats_per_rect = 6; 6167 tmp.need_magic_ca_pass = 0; 6168 tmp.has_component_alpha = 0; 6169 tmp.rb_reversed = 0; 6170 6171 gen3_init_solid(&tmp.src, 6172 sna_rgba_for_color(color, dst->drawable.depth)); 6173 tmp.mask.bo = NULL; 6174 tmp.mask.u.gen3.type = SHADER_NONE; 6175 tmp.u.gen3.num_constants = 0; 6176 6177 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 6178 kgem_submit(&sna->kgem); 6179 6180 if (gen3_render_fill_one_try_blt(sna, dst, bo, color, 6181 x1, y1, x2, y2, alu)) 6182 return true; 6183 6184 if (!kgem_check_bo(&sna->kgem, bo, NULL)) 6185 return false; 6186 } 6187 6188 gen3_align_vertex(sna, &tmp); 6189 gen3_emit_composite_state(sna, &tmp); 6190 gen3_get_rectangles(sna, &tmp, 1); 6191 DBG((" (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color)); 6192 OUT_VERTEX(x2); 6193 OUT_VERTEX(y2); 6194 OUT_VERTEX(x1); 6195 OUT_VERTEX(y2); 6196 OUT_VERTEX(x1); 6197 OUT_VERTEX(y1); 6198 gen3_vertex_flush(sna); 6199 6200 return true; 6201} 6202 6203static void gen3_render_flush(struct sna *sna) 6204{ 6205 gen3_vertex_close(sna); 6206 6207 assert(sna->render.vertex_reloc[0] == 0); 6208 assert(sna->render.vertex_offset == 0); 6209} 6210 6211static void 6212gen3_render_fini(struct sna *sna) 6213{ 6214} 6215 6216const char *gen3_render_init(struct sna *sna, const char *backend) 6217{ 6218 struct sna_render *render = &sna->render; 6219 6220#if !NO_COMPOSITE 6221 render->composite = gen3_render_composite; 6222 render->prefer_gpu |= PREFER_GPU_RENDER; 6223#endif 6224#if !NO_COMPOSITE_SPANS 6225 render->check_composite_spans = gen3_check_composite_spans; 6226 render->composite_spans = gen3_render_composite_spans; 6227 render->prefer_gpu |= PREFER_GPU_SPANS; 6228#endif 6229 6230 render->video = gen3_render_video; 6231 6232 render->copy_boxes = gen3_render_copy_boxes; 6233 render->copy = gen3_render_copy; 6234 6235 render->fill_boxes = gen3_render_fill_boxes; 6236 render->fill = gen3_render_fill; 6237 render->fill_one = gen3_render_fill_one; 6238 6239 render->reset = gen3_render_reset; 6240 render->flush = gen3_render_flush; 6241 render->fini = gen3_render_fini; 6242 6243 render->max_3d_size = MAX_3D_SIZE; 6244 render->max_3d_pitch = MAX_3D_PITCH; 6245 6246 sna->kgem.retire = gen3_render_retire; 6247 sna->kgem.expire = gen3_render_expire; 6248 return "Alviso (gen3)"; 6249} 6250