1/* 2 * Copyright © 2010-2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_render.h" 34#include "sna_render_inline.h" 35#include "sna_reg.h" 36#include "sna_video.h" 37 38#include "gen3_render.h" 39 40#define NO_COMPOSITE 0 41#define NO_COMPOSITE_SPANS 0 42#define NO_COPY 0 43#define NO_COPY_BOXES 0 44#define NO_FILL 0 45#define NO_FILL_ONE 0 46#define NO_FILL_BOXES 0 47 48#define PREFER_BLT_FILL 1 49 50enum { 51 SHADER_NONE = 0, 52 SHADER_ZERO, 53 SHADER_BLACK, 54 SHADER_WHITE, 55 SHADER_CONSTANT, 56 SHADER_LINEAR, 57 SHADER_RADIAL, 58 SHADER_TEXTURE, 59 SHADER_OPACITY, 60}; 61 62#define MAX_3D_SIZE 2048 63#define MAX_3D_PITCH 8192 64 65#define OUT_BATCH(v) batch_emit(sna, v) 66#define OUT_BATCH_F(v) batch_emit_float(sna, v) 67#define OUT_VERTEX(v) vertex_emit(sna, v) 68 69enum gen3_radial_mode { 70 RADIAL_ONE, 71 RADIAL_TWO 72}; 73 74static const struct blendinfo { 75 bool dst_alpha; 76 bool src_alpha; 77 uint32_t src_blend; 78 uint32_t dst_blend; 79} gen3_blend_op[] = { 80 /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO}, 81 /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO}, 82 /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE}, 83 /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA}, 84 /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE}, 85 /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO}, 86 /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA}, 87 /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO}, 88 /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA}, 89 /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, 90 /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA}, 91 /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, 92 /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE}, 93}; 94 95#define S6_COLOR_WRITE_ONLY \ 96 (S6_COLOR_WRITE_ENABLE | \ 97 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \ 98 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \ 99 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT) 100 101static const struct formatinfo { 102 unsigned int fmt, xfmt; 103 uint32_t card_fmt; 104 bool rb_reversed; 105} gen3_tex_formats[] = { 106 {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false}, 107 {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false}, 108 {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false}, 109 {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false}, 110 {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}, 111#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 112 {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false}, 113 {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false}, 114#endif 115 {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false}, 116 {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true}, 117 {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false}, 118 {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true}, 119 {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false}, 120 {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true}, 121}; 122 123#define xFixedToDouble(f) pixman_fixed_to_double(f) 124 125static inline bool too_large(int width, int height) 126{ 127 return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 128} 129 130static inline uint32_t gen3_buf_tiling(uint32_t tiling) 131{ 132 uint32_t v = 0; 133 switch (tiling) { 134 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 135 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 136 case I915_TILING_NONE: break; 137 } 138 return v; 139} 140 141static inline bool 142gen3_check_pitch_3d(struct kgem_bo *bo) 143{ 144 return bo->pitch <= MAX_3D_PITCH; 145} 146 147static uint32_t gen3_get_blend_cntl(int op, 148 bool has_component_alpha, 149 uint32_t dst_format) 150{ 151 uint32_t sblend = gen3_blend_op[op].src_blend; 152 uint32_t dblend = gen3_blend_op[op].dst_blend; 153 154 if (op <= PictOpSrc) /* for clear and src disable blending */ 155 return S6_COLOR_WRITE_ONLY; 156 157 /* If there's no dst alpha channel, adjust the blend op so that we'll 158 * treat it as always 1. 159 */ 160 if (gen3_blend_op[op].dst_alpha) { 161 if (PICT_FORMAT_A(dst_format) == 0) { 162 if (sblend == BLENDFACT_DST_ALPHA) 163 sblend = BLENDFACT_ONE; 164 else if (sblend == BLENDFACT_INV_DST_ALPHA) 165 sblend = BLENDFACT_ZERO; 166 } 167 168 /* gen3 engine reads 8bit color buffer into green channel 169 * in cases like color buffer blending etc., and also writes 170 * back green channel. So with dst_alpha blend we should use 171 * color factor. See spec on "8-bit rendering". 172 */ 173 if (dst_format == PICT_a8) { 174 if (sblend == BLENDFACT_DST_ALPHA) 175 sblend = BLENDFACT_DST_COLR; 176 else if (sblend == BLENDFACT_INV_DST_ALPHA) 177 sblend = BLENDFACT_INV_DST_COLR; 178 } 179 } 180 181 /* If the source alpha is being used, then we should only be in a case 182 * where the source blend factor is 0, and the source blend value is the 183 * mask channels multiplied by the source picture's alpha. 184 */ 185 if (has_component_alpha && gen3_blend_op[op].src_alpha) { 186 if (dblend == BLENDFACT_SRC_ALPHA) 187 dblend = BLENDFACT_SRC_COLR; 188 else if (dblend == BLENDFACT_INV_SRC_ALPHA) 189 dblend = BLENDFACT_INV_SRC_COLR; 190 } 191 192 return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | 193 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | 194 sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT | 195 dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); 196} 197 198static bool gen3_check_dst_format(uint32_t format) 199{ 200 switch (format) { 201 case PICT_a8r8g8b8: 202 case PICT_x8r8g8b8: 203 case PICT_a8b8g8r8: 204 case PICT_x8b8g8r8: 205 case PICT_r5g6b5: 206 case PICT_b5g6r5: 207 case PICT_a1r5g5b5: 208 case PICT_x1r5g5b5: 209 case PICT_a1b5g5r5: 210 case PICT_x1b5g5r5: 211#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 212 case PICT_a2r10g10b10: 213 case PICT_x2r10g10b10: 214 case PICT_a2b10g10r10: 215 case PICT_x2b10g10r10: 216#endif 217 case PICT_a8: 218 case PICT_a4r4g4b4: 219 case PICT_x4r4g4b4: 220 case PICT_a4b4g4r4: 221 case PICT_x4b4g4r4: 222 return true; 223 default: 224 return false; 225 } 226} 227 228static bool gen3_dst_rb_reversed(uint32_t format) 229{ 230 switch (format) { 231 case PICT_a8r8g8b8: 232 case PICT_x8r8g8b8: 233 case PICT_r5g6b5: 234 case PICT_a1r5g5b5: 235 case PICT_x1r5g5b5: 236#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 237 case PICT_a2r10g10b10: 238 case PICT_x2r10g10b10: 239#endif 240 case PICT_a8: 241 case PICT_a4r4g4b4: 242 case PICT_x4r4g4b4: 243 return false; 244 default: 245 return true; 246 } 247} 248 249#define DSTORG_HORT_BIAS(x) ((x)<<20) 250#define DSTORG_VERT_BIAS(x) ((x)<<16) 251 252static uint32_t gen3_get_dst_format(uint32_t format) 253{ 254#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)) 255 switch (format) { 256 default: 257 case PICT_a8r8g8b8: 258 case PICT_x8r8g8b8: 259 case PICT_a8b8g8r8: 260 case PICT_x8b8g8r8: 261 return BIAS | COLR_BUF_ARGB8888; 262 case PICT_r5g6b5: 263 case PICT_b5g6r5: 264 return BIAS | COLR_BUF_RGB565; 265 case PICT_a1r5g5b5: 266 case PICT_x1r5g5b5: 267 case PICT_a1b5g5r5: 268 case PICT_x1b5g5r5: 269 return BIAS | COLR_BUF_ARGB1555; 270#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 271 case PICT_a2r10g10b10: 272 case PICT_x2r10g10b10: 273 case PICT_a2b10g10r10: 274 case PICT_x2b10g10r10: 275 return BIAS | COLR_BUF_ARGB2AAA; 276#endif 277 case PICT_a8: 278 return BIAS | COLR_BUF_8BIT; 279 case PICT_a4r4g4b4: 280 case PICT_x4r4g4b4: 281 case PICT_a4b4g4r4: 282 case PICT_x4b4g4r4: 283 return BIAS | COLR_BUF_ARGB4444; 284 } 285#undef BIAS 286} 287 288static bool gen3_check_format(PicturePtr p) 289{ 290 switch (p->format) { 291 case PICT_a8: 292 case PICT_a8r8g8b8: 293 case PICT_x8r8g8b8: 294 case PICT_a8b8g8r8: 295 case PICT_x8b8g8r8: 296#ifdef PICT_a2r10g10b10 297 case PICT_a2r10g10b10: 298 case PICT_a2b10g10r10: 299#endif 300 case PICT_r5g6b5: 301 case PICT_b5g6r5: 302 case PICT_a1r5g5b5: 303 case PICT_a1b5g5r5: 304 case PICT_a4r4g4b4: 305 case PICT_a4b4g4r4: 306 return true; 307 default: 308 return false; 309 } 310} 311 312static bool gen3_check_xformat(PicturePtr p) 313{ 314 switch (p->format) { 315 case PICT_a8r8g8b8: 316 case PICT_x8r8g8b8: 317 case PICT_a8b8g8r8: 318 case PICT_x8b8g8r8: 319 case PICT_r5g6b5: 320 case PICT_b5g6r5: 321 case PICT_a1r5g5b5: 322 case PICT_x1r5g5b5: 323 case PICT_a1b5g5r5: 324 case PICT_x1b5g5r5: 325#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 326 case PICT_a2r10g10b10: 327 case PICT_x2r10g10b10: 328 case PICT_a2b10g10r10: 329 case PICT_x2b10g10r10: 330#endif 331 case PICT_a8: 332 case PICT_a4r4g4b4: 333 case PICT_x4r4g4b4: 334 case PICT_a4b4g4r4: 335 case PICT_x4b4g4r4: 336 return true; 337 default: 338 return false; 339 } 340} 341 342static uint32_t gen3_texture_repeat(uint32_t repeat) 343{ 344#define REPEAT(x) \ 345 (SS3_NORMALIZED_COORDS | \ 346 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ 347 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT) 348 switch (repeat) { 349 default: 350 case RepeatNone: 351 return REPEAT(CLAMP_BORDER); 352 case RepeatNormal: 353 return REPEAT(WRAP); 354 case RepeatPad: 355 return REPEAT(CLAMP_EDGE); 356 case RepeatReflect: 357 return REPEAT(MIRROR); 358 } 359#undef REPEAT 360} 361 362static uint32_t gen3_gradient_repeat(uint32_t repeat) 363{ 364#define REPEAT(x) \ 365 (SS3_NORMALIZED_COORDS | \ 366 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ 367 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) 368 switch (repeat) { 369 default: 370 case RepeatNone: 371 return REPEAT(CLAMP_BORDER); 372 case RepeatNormal: 373 return REPEAT(WRAP); 374 case RepeatPad: 375 return REPEAT(CLAMP_EDGE); 376 case RepeatReflect: 377 return REPEAT(MIRROR); 378 } 379#undef REPEAT 380} 381 382static bool gen3_check_repeat(PicturePtr p) 383{ 384 if (!p->repeat) 385 return true; 386 387 switch (p->repeatType) { 388 case RepeatNone: 389 case RepeatNormal: 390 case RepeatPad: 391 case RepeatReflect: 392 return true; 393 default: 394 return false; 395 } 396} 397 398static uint32_t gen3_filter(uint32_t filter) 399{ 400 switch (filter) { 401 default: 402 assert(0); 403 case PictFilterNearest: 404 return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | 405 FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | 406 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); 407 case PictFilterBilinear: 408 return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | 409 FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | 410 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); 411 } 412} 413 414static bool gen3_check_filter(PicturePtr p) 415{ 416 switch (p->filter) { 417 case PictFilterNearest: 418 case PictFilterBilinear: 419 return true; 420 default: 421 return false; 422 } 423} 424 425static inline void 426gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY) 427{ 428 OUT_VERTEX(dstX); 429 OUT_VERTEX(dstY); 430} 431 432fastcall static void 433gen3_emit_composite_primitive_constant(struct sna *sna, 434 const struct sna_composite_op *op, 435 const struct sna_composite_rectangles *r) 436{ 437 int16_t dst_x = r->dst.x + op->dst.x; 438 int16_t dst_y = r->dst.y + op->dst.y; 439 440 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 441 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 442 gen3_emit_composite_dstcoord(sna, dst_x, dst_y); 443} 444 445fastcall static void 446gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, 447 const BoxRec *box, int nbox, 448 float *v) 449{ 450 do { 451 v[0] = box->x2 + op->dst.x; 452 v[1] = box->y2 + op->dst.y; 453 454 v[2] = box->x1 + op->dst.x; 455 v[3] = box->y2 + op->dst.y; 456 457 v[4] = box->x1 + op->dst.x; 458 v[5] = box->y1 + op->dst.y; 459 460 box++; 461 v += 6; 462 } while (--nbox); 463} 464 465fastcall static void 466gen3_emit_composite_primitive_identity_gradient(struct sna *sna, 467 const struct sna_composite_op *op, 468 const struct sna_composite_rectangles *r) 469{ 470 int16_t dst_x, dst_y; 471 int16_t src_x, src_y; 472 473 dst_x = r->dst.x + op->dst.x; 474 dst_y = r->dst.y + op->dst.y; 475 src_x = r->src.x + op->src.offset[0]; 476 src_y = r->src.y + op->src.offset[1]; 477 478 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 479 OUT_VERTEX(src_x + r->width); 480 OUT_VERTEX(src_y + r->height); 481 482 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 483 OUT_VERTEX(src_x); 484 OUT_VERTEX(src_y + r->height); 485 486 gen3_emit_composite_dstcoord(sna, dst_x, dst_y); 487 OUT_VERTEX(src_x); 488 OUT_VERTEX(src_y); 489} 490 491fastcall static void 492gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, 493 const BoxRec *box, int nbox, 494 float *v) 495{ 496 do { 497 v[0] = box->x2 + op->dst.x; 498 v[1] = box->y2 + op->dst.y; 499 v[2] = box->x2 + op->src.offset[0]; 500 v[3] = box->y2 + op->src.offset[1]; 501 502 v[4] = box->x1 + op->dst.x; 503 v[5] = box->y2 + op->dst.y; 504 v[6] = box->x1 + op->src.offset[0]; 505 v[7] = box->y2 + op->src.offset[1]; 506 507 v[8] = box->x1 + op->dst.x; 508 v[9] = box->y1 + op->dst.y; 509 v[10] = box->x1 + op->src.offset[0]; 510 v[11] = box->y1 + op->src.offset[1]; 511 512 v += 12; 513 box++; 514 } while (--nbox); 515} 516 517fastcall static void 518gen3_emit_composite_primitive_affine_gradient(struct sna *sna, 519 const struct sna_composite_op *op, 520 const struct sna_composite_rectangles *r) 521{ 522 PictTransform *transform = op->src.transform; 523 int16_t dst_x, dst_y; 524 int16_t src_x, src_y; 525 float *v; 526 527 dst_x = r->dst.x + op->dst.x; 528 dst_y = r->dst.y + op->dst.y; 529 src_x = r->src.x + op->src.offset[0]; 530 src_y = r->src.y + op->src.offset[1]; 531 532 v = sna->render.vertices + sna->render.vertex_used; 533 sna->render.vertex_used += 12; 534 assert(sna->render.vertex_used <= sna->render.vertex_size); 535 536 v[0] = dst_x + r->width; 537 v[1] = dst_y + r->height; 538 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 539 transform, op->src.scale, 540 &v[2], &v[3]); 541 542 v[4] = dst_x; 543 v[5] = dst_y + r->height; 544 _sna_get_transformed_scaled(src_x, src_y + r->height, 545 transform, op->src.scale, 546 &v[6], &v[7]); 547 548 v[8] = dst_x; 549 v[9] = dst_y; 550 _sna_get_transformed_scaled(src_x, src_y, 551 transform, op->src.scale, 552 &v[10], &v[11]); 553} 554 555fastcall static void 556gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, 557 const BoxRec *box, int nbox, 558 float *v) 559{ 560 const PictTransform *transform = op->src.transform; 561 562 do { 563 v[0] = box->x2 + op->dst.x; 564 v[1] = box->y2 + op->dst.y; 565 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 566 box->y2 + op->src.offset[1], 567 transform, op->src.scale, 568 &v[2], &v[3]); 569 570 v[4] = box->x1 + op->dst.x; 571 v[5] = box->y2 + op->dst.y; 572 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 573 box->y2 + op->src.offset[1], 574 transform, op->src.scale, 575 &v[6], &v[7]); 576 577 v[8] = box->x1 + op->dst.x; 578 v[9] = box->y1 + op->dst.y; 579 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 580 box->y1 + op->src.offset[1], 581 transform, op->src.scale, 582 &v[10], &v[11]); 583 584 box++; 585 v += 12; 586 } while (--nbox); 587} 588 589fastcall static void 590gen3_emit_composite_primitive_identity_source(struct sna *sna, 591 const struct sna_composite_op *op, 592 const struct sna_composite_rectangles *r) 593{ 594 float w = r->width; 595 float h = r->height; 596 float *v; 597 598 v = sna->render.vertices + sna->render.vertex_used; 599 sna->render.vertex_used += 12; 600 assert(sna->render.vertex_used <= sna->render.vertex_size); 601 602 v[8] = v[4] = r->dst.x + op->dst.x; 603 v[0] = v[4] + w; 604 605 v[9] = r->dst.y + op->dst.y; 606 v[5] = v[1] = v[9] + h; 607 608 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 609 v[2] = v[6] + w * op->src.scale[0]; 610 611 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 612 v[7] = v[3] = v[11] + h * op->src.scale[1]; 613} 614 615fastcall static void 616gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, 617 const BoxRec *box, int nbox, 618 float *v) 619{ 620 do { 621 v[0] = box->x2 + op->dst.x; 622 v[8] = v[4] = box->x1 + op->dst.x; 623 v[5] = v[1] = box->y2 + op->dst.y; 624 v[9] = box->y1 + op->dst.y; 625 626 v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 627 v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 628 629 v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 630 v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 631 632 v += 12; 633 box++; 634 } while (--nbox); 635} 636 637fastcall static void 638gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, 639 const struct sna_composite_op *op, 640 const struct sna_composite_rectangles *r) 641{ 642 float w = r->width; 643 float h = r->height; 644 float *v; 645 646 v = sna->render.vertices + sna->render.vertex_used; 647 sna->render.vertex_used += 12; 648 assert(sna->render.vertex_used <= sna->render.vertex_size); 649 650 v[8] = v[4] = r->dst.x; 651 v[9] = r->dst.y; 652 653 v[0] = v[4] + w; 654 v[5] = v[1] = v[9] + h; 655 656 v[10] = v[6] = r->src.x * op->src.scale[0]; 657 v[11] = r->src.y * op->src.scale[1]; 658 659 v[2] = v[6] + w * op->src.scale[0]; 660 v[7] = v[3] = v[11] + h * op->src.scale[1]; 661} 662 663fastcall static void 664gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op, 665 const BoxRec *box, int nbox, 666 float *v) 667{ 668 do { 669 v[0] = box->x2; 670 v[8] = v[4] = box->x1; 671 v[5] = v[1] = box->y2; 672 v[9] = box->y1; 673 674 v[10] = v[6] = box->x1 * op->src.scale[0]; 675 v[2] = box->x2 * op->src.scale[0]; 676 677 v[11] = box->y1 * op->src.scale[1]; 678 v[7] = v[3] = box->y2 * op->src.scale[1]; 679 680 v += 12; 681 box++; 682 } while (--nbox); 683} 684 685fastcall static void 686gen3_emit_composite_primitive_affine_source(struct sna *sna, 687 const struct sna_composite_op *op, 688 const struct sna_composite_rectangles *r) 689{ 690 PictTransform *transform = op->src.transform; 691 int16_t dst_x = r->dst.x + op->dst.x; 692 int16_t dst_y = r->dst.y + op->dst.y; 693 int src_x = r->src.x + (int)op->src.offset[0]; 694 int src_y = r->src.y + (int)op->src.offset[1]; 695 float *v; 696 697 v = sna->render.vertices + sna->render.vertex_used; 698 sna->render.vertex_used += 12; 699 assert(sna->render.vertex_used <= sna->render.vertex_size); 700 701 v[0] = dst_x + r->width; 702 v[5] = v[1] = dst_y + r->height; 703 v[8] = v[4] = dst_x; 704 v[9] = dst_y; 705 706 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 707 transform, op->src.scale, 708 &v[2], &v[3]); 709 710 _sna_get_transformed_scaled(src_x, src_y + r->height, 711 transform, op->src.scale, 712 &v[6], &v[7]); 713 714 _sna_get_transformed_scaled(src_x, src_y, 715 transform, op->src.scale, 716 &v[10], &v[11]); 717} 718 719fastcall static void 720gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, 721 const BoxRec *box, int nbox, 722 float *v) 723{ 724 const PictTransform *transform = op->src.transform; 725 726 do { 727 v[0] = box->x2 + op->dst.x; 728 v[5] = v[1] = box->y2 + op->dst.y; 729 v[8] = v[4] = box->x1 + op->dst.x; 730 v[9] = box->y1 + op->dst.y; 731 732 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 733 box->y2 + op->src.offset[1], 734 transform, op->src.scale, 735 &v[2], &v[3]); 736 737 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 738 box->y2 + op->src.offset[1], 739 transform, op->src.scale, 740 &v[6], &v[7]); 741 742 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 743 box->y1 + op->src.offset[1], 744 transform, op->src.scale, 745 &v[10], &v[11]); 746 747 v += 12; 748 box++; 749 } while (--nbox); 750} 751 752fastcall static void 753gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, 754 const struct sna_composite_op *op, 755 const struct sna_composite_rectangles *r) 756{ 757 float w = r->width; 758 float h = r->height; 759 float *v; 760 761 v = sna->render.vertices + sna->render.vertex_used; 762 sna->render.vertex_used += 12; 763 assert(sna->render.vertex_used <= sna->render.vertex_size); 764 765 v[8] = v[4] = r->dst.x + op->dst.x; 766 v[0] = v[4] + w; 767 768 v[9] = r->dst.y + op->dst.y; 769 v[5] = v[1] = v[9] + h; 770 771 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 772 v[2] = v[6] + w * op->mask.scale[0]; 773 774 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 775 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 776} 777 778fastcall static void 779gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna, 780 const struct sna_composite_op *op, 781 const struct sna_composite_rectangles *r) 782{ 783 float w = r->width; 784 float h = r->height; 785 float *v; 786 787 v = sna->render.vertices + sna->render.vertex_used; 788 sna->render.vertex_used += 12; 789 assert(sna->render.vertex_used <= sna->render.vertex_size); 790 791 v[8] = v[4] = r->dst.x; 792 v[9] = r->dst.y; 793 794 v[0] = v[4] + w; 795 v[5] = v[1] = v[9] + h; 796 797 v[10] = v[6] = r->mask.x * op->mask.scale[0]; 798 v[11] = r->mask.y * op->mask.scale[1]; 799 800 v[2] = v[6] + w * op->mask.scale[0]; 801 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 802} 803 804fastcall static void 805gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, 806 const struct sna_composite_op *op, 807 const struct sna_composite_rectangles *r) 808{ 809 float dst_x, dst_y; 810 float src_x, src_y; 811 float msk_x, msk_y; 812 float w, h; 813 float *v; 814 815 dst_x = r->dst.x + op->dst.x; 816 dst_y = r->dst.y + op->dst.y; 817 src_x = r->src.x + op->src.offset[0]; 818 src_y = r->src.y + op->src.offset[1]; 819 msk_x = r->mask.x + op->mask.offset[0]; 820 msk_y = r->mask.y + op->mask.offset[1]; 821 w = r->width; 822 h = r->height; 823 824 v = sna->render.vertices + sna->render.vertex_used; 825 sna->render.vertex_used += 18; 826 assert(sna->render.vertex_used <= sna->render.vertex_size); 827 828 v[0] = dst_x + w; 829 v[1] = dst_y + h; 830 v[2] = (src_x + w) * op->src.scale[0]; 831 v[3] = (src_y + h) * op->src.scale[1]; 832 v[4] = (msk_x + w) * op->mask.scale[0]; 833 v[5] = (msk_y + h) * op->mask.scale[1]; 834 835 v[6] = dst_x; 836 v[7] = v[1]; 837 v[8] = src_x * op->src.scale[0]; 838 v[9] = v[3]; 839 v[10] = msk_x * op->mask.scale[0]; 840 v[11] =v[5]; 841 842 v[12] = v[6]; 843 v[13] = dst_y; 844 v[14] = v[8]; 845 v[15] = src_y * op->src.scale[1]; 846 v[16] = v[10]; 847 v[17] = msk_y * op->mask.scale[1]; 848} 849 850fastcall static void 851gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, 852 const struct sna_composite_op *op, 853 const struct sna_composite_rectangles *r) 854{ 855 int16_t src_x, src_y; 856 float dst_x, dst_y; 857 float msk_x, msk_y; 858 float w, h; 859 float *v; 860 861 dst_x = r->dst.x + op->dst.x; 862 dst_y = r->dst.y + op->dst.y; 863 src_x = r->src.x + op->src.offset[0]; 864 src_y = r->src.y + op->src.offset[1]; 865 msk_x = r->mask.x + op->mask.offset[0]; 866 msk_y = r->mask.y + op->mask.offset[1]; 867 w = r->width; 868 h = r->height; 869 870 v = sna->render.vertices + sna->render.vertex_used; 871 sna->render.vertex_used += 18; 872 assert(sna->render.vertex_used <= sna->render.vertex_size); 873 874 v[0] = dst_x + w; 875 v[1] = dst_y + h; 876 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 877 op->src.transform, op->src.scale, 878 &v[2], &v[3]); 879 v[4] = (msk_x + w) * op->mask.scale[0]; 880 v[5] = (msk_y + h) * op->mask.scale[1]; 881 882 v[6] = dst_x; 883 v[7] = v[1]; 884 _sna_get_transformed_scaled(src_x, src_y + r->height, 885 op->src.transform, op->src.scale, 886 &v[8], &v[9]); 887 v[10] = msk_x * op->mask.scale[0]; 888 v[11] =v[5]; 889 890 v[12] = v[6]; 891 v[13] = dst_y; 892 _sna_get_transformed_scaled(src_x, src_y, 893 op->src.transform, op->src.scale, 894 &v[14], &v[15]); 895 v[16] = v[10]; 896 v[17] = msk_y * op->mask.scale[1]; 897} 898 899static void 900gen3_emit_composite_texcoord(struct sna *sna, 901 const struct sna_composite_channel *channel, 902 int16_t x, int16_t y) 903{ 904 float s = 0, t = 0, w = 1; 905 906 switch (channel->u.gen3.type) { 907 case SHADER_OPACITY: 908 case SHADER_NONE: 909 case SHADER_ZERO: 910 case SHADER_BLACK: 911 case SHADER_WHITE: 912 case SHADER_CONSTANT: 913 break; 914 915 case SHADER_LINEAR: 916 case SHADER_RADIAL: 917 case SHADER_TEXTURE: 918 x += channel->offset[0]; 919 y += channel->offset[1]; 920 if (channel->is_affine) { 921 sna_get_transformed_coordinates(x, y, 922 channel->transform, 923 &s, &t); 924 OUT_VERTEX(s * channel->scale[0]); 925 OUT_VERTEX(t * channel->scale[1]); 926 } else { 927 sna_get_transformed_coordinates_3d(x, y, 928 channel->transform, 929 &s, &t, &w); 930 OUT_VERTEX(s * channel->scale[0]); 931 OUT_VERTEX(t * channel->scale[1]); 932 OUT_VERTEX(0); 933 OUT_VERTEX(w); 934 } 935 break; 936 } 937} 938 939static void 940gen3_emit_composite_vertex(struct sna *sna, 941 const struct sna_composite_op *op, 942 int16_t srcX, int16_t srcY, 943 int16_t maskX, int16_t maskY, 944 int16_t dstX, int16_t dstY) 945{ 946 gen3_emit_composite_dstcoord(sna, dstX, dstY); 947 gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY); 948 gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY); 949} 950 951fastcall static void 952gen3_emit_composite_primitive(struct sna *sna, 953 const struct sna_composite_op *op, 954 const struct sna_composite_rectangles *r) 955{ 956 gen3_emit_composite_vertex(sna, op, 957 r->src.x + r->width, 958 r->src.y + r->height, 959 r->mask.x + r->width, 960 r->mask.y + r->height, 961 op->dst.x + r->dst.x + r->width, 962 op->dst.y + r->dst.y + r->height); 963 gen3_emit_composite_vertex(sna, op, 964 r->src.x, 965 r->src.y + r->height, 966 r->mask.x, 967 r->mask.y + r->height, 968 op->dst.x + r->dst.x, 969 op->dst.y + r->dst.y + r->height); 970 gen3_emit_composite_vertex(sna, op, 971 r->src.x, 972 r->src.y, 973 r->mask.x, 974 r->mask.y, 975 op->dst.x + r->dst.x, 976 op->dst.y + r->dst.y); 977} 978 979#if defined(sse2) && !defined(__x86_64__) 980sse2 fastcall static void 981gen3_emit_composite_primitive_constant__sse2(struct sna *sna, 982 const struct sna_composite_op *op, 983 const struct sna_composite_rectangles *r) 984{ 985 float *v; 986 987 v = sna->render.vertices + sna->render.vertex_used; 988 sna->render.vertex_used += 6; 989 assert(sna->render.vertex_used <= sna->render.vertex_size); 990 991 v[4] = v[2] = r->dst.x + op->dst.x; 992 v[5] = r->dst.y + op->dst.y; 993 994 v[0] = v[2] + r->width; 995 v[3] = v[1] = v[5] + r->height; 996 997} 998 999sse2 fastcall static void 1000gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op, 1001 const BoxRec *box, int nbox, 1002 float *v) 1003{ 1004 do { 1005 v[0] = box->x2 + op->dst.x; 1006 v[3] = v[1] = box->y2 + op->dst.y; 1007 v[4] = v[2] = box->x1 + op->dst.x; 1008 v[5] = box->y1 + op->dst.y; 1009 1010 box++; 1011 v += 6; 1012 } while (--nbox); 1013} 1014 1015sse2 fastcall static void 1016gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna, 1017 const struct sna_composite_op *op, 1018 const struct sna_composite_rectangles *r) 1019{ 1020 int16_t x, y; 1021 float *v; 1022 1023 v = sna->render.vertices + sna->render.vertex_used; 1024 sna->render.vertex_used += 12; 1025 assert(sna->render.vertex_used <= sna->render.vertex_size); 1026 1027 x = r->dst.x + op->dst.x; 1028 y = r->dst.y + op->dst.y; 1029 v[0] = x + r->width; 1030 v[5] = v[1] = y + r->height; 1031 v[8] = v[4] = x; 1032 v[9] = y; 1033 1034 x = r->src.x + op->src.offset[0]; 1035 y = r->src.y + op->src.offset[1]; 1036 v[2] = x + r->width; 1037 v[7] = v[3] = y + r->height; 1038 v[10] = v[6] = x; 1039 v[11] = y; 1040} 1041 1042sse2 fastcall static void 1043gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op, 1044 const BoxRec *box, int nbox, 1045 float *v) 1046{ 1047 do { 1048 v[0] = box->x2 + op->dst.x; 1049 v[5] = v[1] = box->y2 + op->dst.y; 1050 v[8] = v[4] = box->x1 + op->dst.x; 1051 v[9] = box->y1 + op->dst.y; 1052 1053 v[2] = box->x2 + op->src.offset[0]; 1054 v[7] = v[3] = box->y2 + op->src.offset[1]; 1055 v[10] = v[6] = box->x1 + op->src.offset[0]; 1056 v[11] = box->y1 + op->src.offset[1]; 1057 1058 v += 12; 1059 box++; 1060 } while (--nbox); 1061} 1062 1063sse2 fastcall static void 1064gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna, 1065 const struct sna_composite_op *op, 1066 const struct sna_composite_rectangles *r) 1067{ 1068 PictTransform *transform = op->src.transform; 1069 int16_t dst_x, dst_y; 1070 int16_t src_x, src_y; 1071 float *v; 1072 1073 dst_x = r->dst.x + op->dst.x; 1074 dst_y = r->dst.y + op->dst.y; 1075 src_x = r->src.x + op->src.offset[0]; 1076 src_y = r->src.y + op->src.offset[1]; 1077 1078 v = sna->render.vertices + sna->render.vertex_used; 1079 sna->render.vertex_used += 12; 1080 assert(sna->render.vertex_used <= sna->render.vertex_size); 1081 1082 v[0] = dst_x + r->width; 1083 v[1] = dst_y + r->height; 1084 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1085 transform, op->src.scale, 1086 &v[2], &v[3]); 1087 1088 v[4] = dst_x; 1089 v[5] = dst_y + r->height; 1090 _sna_get_transformed_scaled(src_x, src_y + r->height, 1091 transform, op->src.scale, 1092 &v[6], &v[7]); 1093 1094 v[8] = dst_x; 1095 v[9] = dst_y; 1096 _sna_get_transformed_scaled(src_x, src_y, 1097 transform, op->src.scale, 1098 &v[10], &v[11]); 1099} 1100 1101sse2 fastcall static void 1102gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op, 1103 const BoxRec *box, int nbox, 1104 float *v) 1105{ 1106 const PictTransform *transform = op->src.transform; 1107 1108 do { 1109 v[0] = box->x2 + op->dst.x; 1110 v[1] = box->y2 + op->dst.y; 1111 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 1112 box->y2 + op->src.offset[1], 1113 transform, op->src.scale, 1114 &v[2], &v[3]); 1115 1116 v[4] = box->x1 + op->dst.x; 1117 v[5] = box->y2 + op->dst.y; 1118 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1119 box->y2 + op->src.offset[1], 1120 transform, op->src.scale, 1121 &v[6], &v[7]); 1122 1123 v[8] = box->x1 + op->dst.x; 1124 v[9] = box->y1 + op->dst.y; 1125 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1126 box->y1 + op->src.offset[1], 1127 transform, op->src.scale, 1128 &v[10], &v[11]); 1129 1130 box++; 1131 v += 12; 1132 } while (--nbox); 1133} 1134 1135sse2 fastcall static void 1136gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna, 1137 const struct sna_composite_op *op, 1138 const struct sna_composite_rectangles *r) 1139{ 1140 float w = r->width; 1141 float h = r->height; 1142 float *v; 1143 1144 v = sna->render.vertices + sna->render.vertex_used; 1145 sna->render.vertex_used += 12; 1146 assert(sna->render.vertex_used <= sna->render.vertex_size); 1147 1148 v[8] = v[4] = r->dst.x + op->dst.x; 1149 v[0] = v[4] + w; 1150 1151 v[9] = r->dst.y + op->dst.y; 1152 v[5] = v[1] = v[9] + h; 1153 1154 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1155 v[2] = v[6] + w * op->src.scale[0]; 1156 1157 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1158 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1159} 1160 1161sse2 fastcall static void 1162gen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op, 1163 const BoxRec *box, int nbox, 1164 float *v) 1165{ 1166 do { 1167 v[0] = box->x2 + op->dst.x; 1168 v[8] = v[4] = box->x1 + op->dst.x; 1169 v[5] = v[1] = box->y2 + op->dst.y; 1170 v[9] = box->y1 + op->dst.y; 1171 1172 v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1173 v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1174 1175 v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1176 v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1177 1178 v += 12; 1179 box++; 1180 } while (--nbox); 1181} 1182 1183sse2 fastcall static void 1184gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna, 1185 const struct sna_composite_op *op, 1186 const struct sna_composite_rectangles *r) 1187{ 1188 float w = r->width; 1189 float h = r->height; 1190 float *v; 1191 1192 v = sna->render.vertices + sna->render.vertex_used; 1193 sna->render.vertex_used += 12; 1194 assert(sna->render.vertex_used <= sna->render.vertex_size); 1195 1196 v[8] = v[4] = r->dst.x; 1197 v[9] = r->dst.y; 1198 1199 v[0] = v[4] + w; 1200 v[5] = v[1] = v[9] + h; 1201 1202 v[10] = v[6] = r->src.x * op->src.scale[0]; 1203 v[11] = r->src.y * op->src.scale[1]; 1204 1205 v[2] = v[6] + w * op->src.scale[0]; 1206 v[7] = v[3] = v[11] + h * op->src.scale[1]; 1207} 1208 1209sse2 fastcall static void 1210gen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op, 1211 const BoxRec *box, int nbox, 1212 float *v) 1213{ 1214 do { 1215 v[0] = box->x2; 1216 v[8] = v[4] = box->x1; 1217 v[5] = v[1] = box->y2; 1218 v[9] = box->y1; 1219 1220 v[10] = v[6] = box->x1 * op->src.scale[0]; 1221 v[2] = box->x2 * op->src.scale[0]; 1222 1223 v[11] = box->y1 * op->src.scale[1]; 1224 v[7] = v[3] = box->y2 * op->src.scale[1]; 1225 1226 v += 12; 1227 box++; 1228 } while (--nbox); 1229} 1230 1231sse2 fastcall static void 1232gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna, 1233 const struct sna_composite_op *op, 1234 const struct sna_composite_rectangles *r) 1235{ 1236 PictTransform *transform = op->src.transform; 1237 int16_t dst_x = r->dst.x + op->dst.x; 1238 int16_t dst_y = r->dst.y + op->dst.y; 1239 int src_x = r->src.x + (int)op->src.offset[0]; 1240 int src_y = r->src.y + (int)op->src.offset[1]; 1241 float *v; 1242 1243 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", 1244 __FUNCTION__, src_x, src_y, dst_x, dst_y, r->width, r->height)); 1245 1246 v = sna->render.vertices + sna->render.vertex_used; 1247 sna->render.vertex_used += 12; 1248 assert(sna->render.vertex_used <= sna->render.vertex_size); 1249 1250 v[0] = dst_x + r->width; 1251 v[5] = v[1] = dst_y + r->height; 1252 v[8] = v[4] = dst_x; 1253 v[9] = dst_y; 1254 1255 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1256 transform, op->src.scale, 1257 &v[2], &v[3]); 1258 1259 _sna_get_transformed_scaled(src_x, src_y + r->height, 1260 transform, op->src.scale, 1261 &v[6], &v[7]); 1262 1263 _sna_get_transformed_scaled(src_x, src_y, 1264 transform, op->src.scale, 1265 &v[10], &v[11]); 1266} 1267 1268sse2 fastcall static void 1269gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op, 1270 const BoxRec *box, int nbox, 1271 float *v) 1272{ 1273 const PictTransform *transform = op->src.transform; 1274 1275 do { 1276 DBG(("%s: box=(%d, %d), (%d, %d), src.offset=(%d, %d)\n", 1277 __FUNCTION__, box->x1, box->y1, box->x2, box->y2, op->src.offset[0], op->src.offset[1])); 1278 1279 v[0] = box->x2 + op->dst.x; 1280 v[5] = v[1] = box->y2 + op->dst.y; 1281 v[8] = v[4] = box->x1 + op->dst.x; 1282 v[9] = box->y1 + op->dst.y; 1283 1284 _sna_get_transformed_scaled(box->x2 + op->src.offset[0], 1285 box->y2 + op->src.offset[1], 1286 transform, op->src.scale, 1287 &v[2], &v[3]); 1288 1289 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1290 box->y2 + op->src.offset[1], 1291 transform, op->src.scale, 1292 &v[6], &v[7]); 1293 1294 _sna_get_transformed_scaled(box->x1 + op->src.offset[0], 1295 box->y1 + op->src.offset[1], 1296 transform, op->src.scale, 1297 &v[10], &v[11]); 1298 1299 v += 12; 1300 box++; 1301 } while (--nbox); 1302} 1303 1304sse2 fastcall static void 1305gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1306 const struct sna_composite_op *op, 1307 const struct sna_composite_rectangles *r) 1308{ 1309 float w = r->width; 1310 float h = r->height; 1311 float *v; 1312 1313 v = sna->render.vertices + sna->render.vertex_used; 1314 sna->render.vertex_used += 12; 1315 assert(sna->render.vertex_used <= sna->render.vertex_size); 1316 1317 v[8] = v[4] = r->dst.x + op->dst.x; 1318 v[0] = v[4] + w; 1319 1320 v[9] = r->dst.y + op->dst.y; 1321 v[5] = v[1] = v[9] + h; 1322 1323 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1324 v[2] = v[6] + w * op->mask.scale[0]; 1325 1326 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1327 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1328} 1329 1330sse2 fastcall static void 1331gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna, 1332 const struct sna_composite_op *op, 1333 const struct sna_composite_rectangles *r) 1334{ 1335 float w = r->width; 1336 float h = r->height; 1337 float *v; 1338 1339 v = sna->render.vertices + sna->render.vertex_used; 1340 sna->render.vertex_used += 12; 1341 assert(sna->render.vertex_used <= sna->render.vertex_size); 1342 1343 v[8] = v[4] = r->dst.x; 1344 v[9] = r->dst.y; 1345 1346 v[0] = v[4] + w; 1347 v[5] = v[1] = v[9] + h; 1348 1349 v[10] = v[6] = r->mask.x * op->mask.scale[0]; 1350 v[11] = r->mask.y * op->mask.scale[1]; 1351 1352 v[2] = v[6] + w * op->mask.scale[0]; 1353 v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1354} 1355 1356sse2 fastcall static void 1357gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna, 1358 const struct sna_composite_op *op, 1359 const struct sna_composite_rectangles *r) 1360{ 1361 float dst_x, dst_y; 1362 float src_x, src_y; 1363 float msk_x, msk_y; 1364 float w, h; 1365 float *v; 1366 1367 dst_x = r->dst.x + op->dst.x; 1368 dst_y = r->dst.y + op->dst.y; 1369 src_x = r->src.x + op->src.offset[0]; 1370 src_y = r->src.y + op->src.offset[1]; 1371 msk_x = r->mask.x + op->mask.offset[0]; 1372 msk_y = r->mask.y + op->mask.offset[1]; 1373 w = r->width; 1374 h = r->height; 1375 1376 v = sna->render.vertices + sna->render.vertex_used; 1377 sna->render.vertex_used += 18; 1378 assert(sna->render.vertex_used <= sna->render.vertex_size); 1379 1380 v[0] = dst_x + w; 1381 v[1] = dst_y + h; 1382 v[2] = (src_x + w) * op->src.scale[0]; 1383 v[3] = (src_y + h) * op->src.scale[1]; 1384 v[4] = (msk_x + w) * op->mask.scale[0]; 1385 v[5] = (msk_y + h) * op->mask.scale[1]; 1386 1387 v[6] = dst_x; 1388 v[7] = v[1]; 1389 v[8] = src_x * op->src.scale[0]; 1390 v[9] = v[3]; 1391 v[10] = msk_x * op->mask.scale[0]; 1392 v[11] =v[5]; 1393 1394 v[12] = v[6]; 1395 v[13] = dst_y; 1396 v[14] = v[8]; 1397 v[15] = src_y * op->src.scale[1]; 1398 v[16] = v[10]; 1399 v[17] = msk_y * op->mask.scale[1]; 1400} 1401 1402sse2 fastcall static void 1403gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna, 1404 const struct sna_composite_op *op, 1405 const struct sna_composite_rectangles *r) 1406{ 1407 int16_t src_x, src_y; 1408 float dst_x, dst_y; 1409 float msk_x, msk_y; 1410 float w, h; 1411 float *v; 1412 1413 dst_x = r->dst.x + op->dst.x; 1414 dst_y = r->dst.y + op->dst.y; 1415 src_x = r->src.x + op->src.offset[0]; 1416 src_y = r->src.y + op->src.offset[1]; 1417 msk_x = r->mask.x + op->mask.offset[0]; 1418 msk_y = r->mask.y + op->mask.offset[1]; 1419 w = r->width; 1420 h = r->height; 1421 1422 v = sna->render.vertices + sna->render.vertex_used; 1423 sna->render.vertex_used += 18; 1424 assert(sna->render.vertex_used <= sna->render.vertex_size); 1425 1426 v[0] = dst_x + w; 1427 v[1] = dst_y + h; 1428 _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1429 op->src.transform, op->src.scale, 1430 &v[2], &v[3]); 1431 v[4] = (msk_x + w) * op->mask.scale[0]; 1432 v[5] = (msk_y + h) * op->mask.scale[1]; 1433 1434 v[6] = dst_x; 1435 v[7] = v[1]; 1436 _sna_get_transformed_scaled(src_x, src_y + r->height, 1437 op->src.transform, op->src.scale, 1438 &v[8], &v[9]); 1439 v[10] = msk_x * op->mask.scale[0]; 1440 v[11] =v[5]; 1441 1442 v[12] = v[6]; 1443 v[13] = dst_y; 1444 _sna_get_transformed_scaled(src_x, src_y, 1445 op->src.transform, op->src.scale, 1446 &v[14], &v[15]); 1447 v[16] = v[10]; 1448 v[17] = msk_y * op->mask.scale[1]; 1449} 1450#endif 1451 1452static inline void 1453gen3_2d_perspective(struct sna *sna, int in, int out) 1454{ 1455 gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W)); 1456 gen3_fs_mul(out, 1457 gen3_fs_operand(in, X, Y, ZERO, ONE), 1458 gen3_fs_operand_reg(out)); 1459} 1460 1461static inline void 1462gen3_linear_coord(struct sna *sna, 1463 const struct sna_composite_channel *channel, 1464 int in, int out) 1465{ 1466 int c = channel->u.gen3.constants; 1467 1468 if (!channel->is_affine) { 1469 gen3_2d_perspective(sna, in, FS_U0); 1470 in = FS_U0; 1471 } 1472 1473 gen3_fs_mov(out, gen3_fs_operand_zero()); 1474 gen3_fs_dp3(out, MASK_X, 1475 gen3_fs_operand(in, X, Y, ONE, ZERO), 1476 gen3_fs_operand_reg(c)); 1477} 1478 1479static void 1480gen3_radial_coord(struct sna *sna, 1481 const struct sna_composite_channel *channel, 1482 int in, int out) 1483{ 1484 int c = channel->u.gen3.constants; 1485 1486 if (!channel->is_affine) { 1487 gen3_2d_perspective(sna, in, FS_U0); 1488 in = FS_U0; 1489 } 1490 1491 switch (channel->u.gen3.mode) { 1492 case RADIAL_ONE: 1493 /* 1494 pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; 1495 r² = pdx*pdx + pdy*pdy 1496 t = r²/sqrt(r²) - r1/dr; 1497 */ 1498 gen3_fs_mad(FS_U0, MASK_X | MASK_Y, 1499 gen3_fs_operand(in, X, Y, ZERO, ZERO), 1500 gen3_fs_operand(c, Z, Z, ZERO, ZERO), 1501 gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO)); 1502 gen3_fs_dp2add(FS_U0, MASK_X, 1503 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), 1504 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), 1505 gen3_fs_operand_zero()); 1506 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X)); 1507 gen3_fs_mad(out, 0, 1508 gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO), 1509 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1510 gen3_fs_operand(c, W, ZERO, ZERO, ZERO)); 1511 break; 1512 1513 case RADIAL_TWO: 1514 /* 1515 pdx = x - c1x, pdy = y - c1y; 1516 A = dx² + dy² - dr² 1517 B = -2*(pdx*dx + pdy*dy + r1*dr); 1518 C = pdx² + pdy² - r1²; 1519 det = B*B - 4*A*C; 1520 t = (-B + sqrt (det)) / (2 * A) 1521 */ 1522 1523 /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ 1524 gen3_fs_add(FS_U0, 1525 gen3_fs_operand(in, X, Y, ZERO, ZERO), 1526 gen3_fs_operand(c, X, Y, Z, ZERO)); 1527 /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ 1528 gen3_fs_dp3(FS_U0, MASK_W, 1529 gen3_fs_operand(FS_U0, X, Y, ONE, ZERO), 1530 gen3_fs_operand(c+1, X, Y, Z, ZERO)); 1531 /* u1.x = pdx² + pdy² - r1²; [C] */ 1532 gen3_fs_dp3(FS_U1, MASK_X, 1533 gen3_fs_operand(FS_U0, X, Y, Z, ZERO), 1534 gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO)); 1535 /* u1.x = C, u1.y = B, u1.z=-4*A; */ 1536 gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W)); 1537 gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W)); 1538 /* u1.x = B² - 4*A*C */ 1539 gen3_fs_dp2add(FS_U1, MASK_X, 1540 gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO), 1541 gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO), 1542 gen3_fs_operand_zero()); 1543 /* out.x = -B + sqrt (B² - 4*A*C), */ 1544 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X)); 1545 gen3_fs_mad(out, MASK_X, 1546 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1547 gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO), 1548 gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO)); 1549 /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */ 1550 gen3_fs_mul(out, 1551 gen3_fs_operand(out, X, ZERO, ZERO, ZERO), 1552 gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO)); 1553 break; 1554 } 1555} 1556 1557static void 1558gen3_composite_emit_shader(struct sna *sna, 1559 const struct sna_composite_op *op, 1560 uint8_t blend) 1561{ 1562 bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0; 1563 const struct sna_composite_channel *src, *mask; 1564 struct gen3_render_state *state = &sna->render_state.gen3; 1565 uint32_t shader_offset, id; 1566 int src_reg, mask_reg; 1567 int t, length; 1568 1569 src = &op->src; 1570 mask = &op->mask; 1571 if (mask->u.gen3.type == SHADER_NONE) 1572 mask = NULL; 1573 1574 id = (src->u.gen3.type | 1575 src->is_affine << 4 | 1576 src->alpha_fixup << 5 | 1577 src->rb_reversed << 6); 1578 if (mask) { 1579 id |= (mask->u.gen3.type << 8 | 1580 mask->is_affine << 12 | 1581 gen3_blend_op[blend].src_alpha << 13 | 1582 op->has_component_alpha << 14 | 1583 mask->alpha_fixup << 15 | 1584 mask->rb_reversed << 16); 1585 } 1586 id |= dst_is_alpha << 24; 1587 id |= op->rb_reversed << 25; 1588 1589 if (id == state->last_shader) 1590 return; 1591 1592 state->last_shader = id; 1593 1594 shader_offset = sna->kgem.nbatch++; 1595 t = 0; 1596 switch (src->u.gen3.type) { 1597 case SHADER_NONE: 1598 case SHADER_OPACITY: 1599 assert(0); 1600 case SHADER_ZERO: 1601 case SHADER_BLACK: 1602 case SHADER_WHITE: 1603 break; 1604 case SHADER_CONSTANT: 1605 gen3_fs_dcl(FS_T8); 1606 src_reg = FS_T8; 1607 break; 1608 case SHADER_TEXTURE: 1609 case SHADER_RADIAL: 1610 case SHADER_LINEAR: 1611 gen3_fs_dcl(FS_S0); 1612 gen3_fs_dcl(FS_T0); 1613 t++; 1614 break; 1615 } 1616 1617 if (mask == NULL) { 1618 switch (src->u.gen3.type) { 1619 case SHADER_ZERO: 1620 gen3_fs_mov(FS_OC, gen3_fs_operand_zero()); 1621 goto done; 1622 case SHADER_BLACK: 1623 if (dst_is_alpha) 1624 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1625 else 1626 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE)); 1627 goto done; 1628 case SHADER_WHITE: 1629 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1630 goto done; 1631 } 1632 if (src->alpha_fixup && dst_is_alpha) { 1633 gen3_fs_mov(FS_OC, gen3_fs_operand_one()); 1634 goto done; 1635 } 1636 /* No mask, so load directly to output color */ 1637 if (src->u.gen3.type != SHADER_CONSTANT) { 1638 if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed) 1639 src_reg = FS_R0; 1640 else 1641 src_reg = FS_OC; 1642 } 1643 switch (src->u.gen3.type) { 1644 case SHADER_LINEAR: 1645 gen3_linear_coord(sna, src, FS_T0, FS_R0); 1646 gen3_fs_texld(src_reg, FS_S0, FS_R0); 1647 break; 1648 1649 case SHADER_RADIAL: 1650 gen3_radial_coord(sna, src, FS_T0, FS_R0); 1651 gen3_fs_texld(src_reg, FS_S0, FS_R0); 1652 break; 1653 1654 case SHADER_TEXTURE: 1655 if (src->is_affine) 1656 gen3_fs_texld(src_reg, FS_S0, FS_T0); 1657 else 1658 gen3_fs_texldp(src_reg, FS_S0, FS_T0); 1659 break; 1660 1661 case SHADER_NONE: 1662 case SHADER_WHITE: 1663 case SHADER_BLACK: 1664 case SHADER_ZERO: 1665 assert(0); 1666 case SHADER_CONSTANT: 1667 break; 1668 } 1669 1670 if (src_reg != FS_OC) { 1671 if (src->alpha_fixup) 1672 gen3_fs_mov(FS_OC, 1673 src->rb_reversed ^ op->rb_reversed ? 1674 gen3_fs_operand(src_reg, Z, Y, X, ONE) : 1675 gen3_fs_operand(src_reg, X, Y, Z, ONE)); 1676 else if (dst_is_alpha) 1677 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W)); 1678 else if (src->rb_reversed ^ op->rb_reversed) 1679 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W)); 1680 else 1681 gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg)); 1682 } else if (src->alpha_fixup) 1683 gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one()); 1684 } else { 1685 int out_reg = FS_OC; 1686 if (op->rb_reversed) 1687 out_reg = FS_U0; 1688 1689 switch (mask->u.gen3.type) { 1690 case SHADER_CONSTANT: 1691 gen3_fs_dcl(FS_T9); 1692 mask_reg = FS_T9; 1693 break; 1694 case SHADER_TEXTURE: 1695 case SHADER_LINEAR: 1696 case SHADER_RADIAL: 1697 gen3_fs_dcl(FS_S0 + t); 1698 /* fall through */ 1699 case SHADER_OPACITY: 1700 gen3_fs_dcl(FS_T0 + t); 1701 break; 1702 case SHADER_ZERO: 1703 case SHADER_BLACK: 1704 assert(0); 1705 case SHADER_NONE: 1706 case SHADER_WHITE: 1707 break; 1708 } 1709 1710 t = 0; 1711 switch (src->u.gen3.type) { 1712 case SHADER_LINEAR: 1713 gen3_linear_coord(sna, src, FS_T0, FS_R0); 1714 gen3_fs_texld(FS_R0, FS_S0, FS_R0); 1715 src_reg = FS_R0; 1716 t++; 1717 break; 1718 1719 case SHADER_RADIAL: 1720 gen3_radial_coord(sna, src, FS_T0, FS_R0); 1721 gen3_fs_texld(FS_R0, FS_S0, FS_R0); 1722 src_reg = FS_R0; 1723 t++; 1724 break; 1725 1726 case SHADER_TEXTURE: 1727 if (src->is_affine) 1728 gen3_fs_texld(FS_R0, FS_S0, FS_T0); 1729 else 1730 gen3_fs_texldp(FS_R0, FS_S0, FS_T0); 1731 src_reg = FS_R0; 1732 t++; 1733 break; 1734 1735 case SHADER_CONSTANT: 1736 case SHADER_NONE: 1737 case SHADER_ZERO: 1738 case SHADER_BLACK: 1739 case SHADER_WHITE: 1740 break; 1741 } 1742 if (src->alpha_fixup) 1743 gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one()); 1744 if (src->rb_reversed) 1745 gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W)); 1746 1747 switch (mask->u.gen3.type) { 1748 case SHADER_LINEAR: 1749 gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1); 1750 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); 1751 mask_reg = FS_R1; 1752 break; 1753 1754 case SHADER_RADIAL: 1755 gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1); 1756 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); 1757 mask_reg = FS_R1; 1758 break; 1759 1760 case SHADER_TEXTURE: 1761 if (mask->is_affine) 1762 gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t); 1763 else 1764 gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t); 1765 mask_reg = FS_R1; 1766 break; 1767 1768 case SHADER_OPACITY: 1769 switch (src->u.gen3.type) { 1770 case SHADER_BLACK: 1771 case SHADER_WHITE: 1772 if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) { 1773 gen3_fs_mov(out_reg, 1774 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1775 } else { 1776 gen3_fs_mov(out_reg, 1777 gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X)); 1778 } 1779 break; 1780 default: 1781 if (dst_is_alpha) { 1782 gen3_fs_mul(out_reg, 1783 gen3_fs_operand(src_reg, W, W, W, W), 1784 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1785 } else { 1786 gen3_fs_mul(out_reg, 1787 gen3_fs_operand(src_reg, X, Y, Z, W), 1788 gen3_fs_operand(FS_T0 + t, X, X, X, X)); 1789 } 1790 } 1791 goto mask_done; 1792 1793 case SHADER_CONSTANT: 1794 case SHADER_ZERO: 1795 case SHADER_BLACK: 1796 case SHADER_WHITE: 1797 case SHADER_NONE: 1798 break; 1799 } 1800 if (mask->alpha_fixup) 1801 gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one()); 1802 if (mask->rb_reversed) 1803 gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W)); 1804 1805 if (dst_is_alpha) { 1806 switch (src->u.gen3.type) { 1807 case SHADER_BLACK: 1808 case SHADER_WHITE: 1809 gen3_fs_mov(out_reg, 1810 gen3_fs_operand(mask_reg, W, W, W, W)); 1811 break; 1812 default: 1813 gen3_fs_mul(out_reg, 1814 gen3_fs_operand(src_reg, W, W, W, W), 1815 gen3_fs_operand(mask_reg, W, W, W, W)); 1816 break; 1817 } 1818 } else { 1819 /* If component alpha is active in the mask and the blend 1820 * operation uses the source alpha, then we know we don't 1821 * need the source value (otherwise we would have hit a 1822 * fallback earlier), so we provide the source alpha (src.A * 1823 * mask.X) as output color. 1824 * Conversely, if CA is set and we don't need the source alpha, 1825 * then we produce the source value (src.X * mask.X) and the 1826 * source alpha is unused. Otherwise, we provide the non-CA 1827 * source value (src.X * mask.A). 1828 */ 1829 if (op->has_component_alpha) { 1830 switch (src->u.gen3.type) { 1831 case SHADER_BLACK: 1832 if (gen3_blend_op[blend].src_alpha) 1833 gen3_fs_mov(out_reg, 1834 gen3_fs_operand_reg(mask_reg)); 1835 else 1836 gen3_fs_mov(out_reg, 1837 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W)); 1838 break; 1839 case SHADER_WHITE: 1840 gen3_fs_mov(out_reg, 1841 gen3_fs_operand_reg(mask_reg)); 1842 break; 1843 default: 1844 if (gen3_blend_op[blend].src_alpha) 1845 gen3_fs_mul(out_reg, 1846 gen3_fs_operand(src_reg, W, W, W, W), 1847 gen3_fs_operand_reg(mask_reg)); 1848 else 1849 gen3_fs_mul(out_reg, 1850 gen3_fs_operand_reg(src_reg), 1851 gen3_fs_operand_reg(mask_reg)); 1852 break; 1853 } 1854 } else { 1855 switch (src->u.gen3.type) { 1856 case SHADER_WHITE: 1857 gen3_fs_mov(out_reg, 1858 gen3_fs_operand(mask_reg, W, W, W, W)); 1859 break; 1860 case SHADER_BLACK: 1861 gen3_fs_mov(out_reg, 1862 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W)); 1863 break; 1864 default: 1865 gen3_fs_mul(out_reg, 1866 gen3_fs_operand_reg(src_reg), 1867 gen3_fs_operand(mask_reg, W, W, W, W)); 1868 break; 1869 } 1870 } 1871 } 1872mask_done: 1873 if (op->rb_reversed) 1874 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W)); 1875 } 1876 1877done: 1878 length = sna->kgem.nbatch - shader_offset; 1879 sna->kgem.batch[shader_offset] = 1880 _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2); 1881} 1882 1883static uint32_t gen3_ms_tiling(uint32_t tiling) 1884{ 1885 uint32_t v = 0; 1886 switch (tiling) { 1887 case I915_TILING_Y: v |= MS3_TILE_WALK; 1888 case I915_TILING_X: v |= MS3_TILED_SURFACE; 1889 case I915_TILING_NONE: break; 1890 } 1891 return v; 1892} 1893 1894static void gen3_emit_invariant(struct sna *sna) 1895{ 1896 /* Disable independent alpha blend */ 1897 OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE | 1898 IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT | 1899 IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT | 1900 IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT); 1901 1902 OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | 1903 CSB_TCB(0, 0) | 1904 CSB_TCB(1, 1) | 1905 CSB_TCB(2, 2) | 1906 CSB_TCB(3, 3) | 1907 CSB_TCB(4, 4) | 1908 CSB_TCB(5, 5) | 1909 CSB_TCB(6, 6) | 1910 CSB_TCB(7, 7)); 1911 1912 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3); 1913 OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */ 1914 OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | 1915 S4_LINE_WIDTH_ONE | 1916 S4_CULLMODE_NONE | 1917 S4_VFMT_XY); 1918 OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */ 1919 OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */ 1920 1921 OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 1922 OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); 1923 1924 OUT_BATCH(_3DSTATE_LOAD_INDIRECT); 1925 OUT_BATCH(0x00000000); 1926 1927 OUT_BATCH(_3DSTATE_STIPPLE); 1928 OUT_BATCH(0x00000000); 1929 1930 sna->render_state.gen3.need_invariant = false; 1931} 1932 1933#define MAX_OBJECTS 3 /* worst case: dst + src + mask */ 1934 1935static void 1936gen3_get_batch(struct sna *sna, const struct sna_composite_op *op) 1937{ 1938 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1939 1940 if (!kgem_check_batch(&sna->kgem, 200)) { 1941 DBG(("%s: flushing batch: size %d > %d\n", 1942 __FUNCTION__, 200, 1943 sna->kgem.surface-sna->kgem.nbatch)); 1944 kgem_submit(&sna->kgem); 1945 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1946 } 1947 1948 if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) { 1949 DBG(("%s: flushing batch: reloc %d >= %d\n", 1950 __FUNCTION__, 1951 sna->kgem.nreloc, 1952 (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS)); 1953 kgem_submit(&sna->kgem); 1954 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1955 } 1956 1957 if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) { 1958 DBG(("%s: flushing batch: exec %d >= %d\n", 1959 __FUNCTION__, 1960 sna->kgem.nexec, 1961 (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1)); 1962 kgem_submit(&sna->kgem); 1963 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1964 } 1965 1966 if (sna->render_state.gen3.need_invariant) 1967 gen3_emit_invariant(sna); 1968#undef MAX_OBJECTS 1969} 1970 1971static void gen3_emit_target(struct sna *sna, 1972 struct kgem_bo *bo, 1973 int width, 1974 int height, 1975 int format) 1976{ 1977 struct gen3_render_state *state = &sna->render_state.gen3; 1978 1979 assert(!too_large(width, height)); 1980 1981 /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */ 1982 assert(bo->unique_id != 0); 1983 if (bo->unique_id != state->current_dst) { 1984 uint32_t v; 1985 1986 DBG(("%s: setting new target id=%d, handle=%d\n", 1987 __FUNCTION__, bo->unique_id, bo->handle)); 1988 1989 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); 1990 OUT_BATCH(BUF_3D_ID_COLOR_BACK | 1991 gen3_buf_tiling(bo->tiling) | 1992 bo->pitch); 1993 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 1994 bo, 1995 I915_GEM_DOMAIN_RENDER << 16 | 1996 I915_GEM_DOMAIN_RENDER, 1997 0)); 1998 1999 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); 2000 OUT_BATCH(gen3_get_dst_format(format)); 2001 2002 v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1); 2003 if (v != state->last_drawrect_limit) { 2004 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); 2005 OUT_BATCH(0); /* XXX dither origin? */ 2006 OUT_BATCH(0); 2007 OUT_BATCH(v); 2008 OUT_BATCH(0); 2009 state->last_drawrect_limit = v; 2010 } 2011 2012 state->current_dst = bo->unique_id; 2013 } 2014 assert(bo->exec); 2015 kgem_bo_mark_dirty(bo); 2016} 2017 2018static void gen3_emit_composite_state(struct sna *sna, 2019 const struct sna_composite_op *op) 2020{ 2021 struct gen3_render_state *state = &sna->render_state.gen3; 2022 uint32_t map[4]; 2023 uint32_t sampler[4]; 2024 struct kgem_bo *bo[2]; 2025 unsigned int tex_count, n; 2026 uint32_t ss2; 2027 2028 gen3_get_batch(sna, op); 2029 2030 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 2031 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 2032 OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 2033 else 2034 OUT_BATCH(_3DSTATE_MODES_5_CMD | 2035 PIPELINE_FLUSH_RENDER_CACHE | 2036 PIPELINE_FLUSH_TEXTURE_CACHE); 2037 kgem_clear_dirty(&sna->kgem); 2038 } 2039 2040 gen3_emit_target(sna, 2041 op->dst.bo, 2042 op->dst.width, 2043 op->dst.height, 2044 op->dst.format); 2045 2046 ss2 = ~0; 2047 tex_count = 0; 2048 switch (op->src.u.gen3.type) { 2049 case SHADER_OPACITY: 2050 case SHADER_NONE: 2051 assert(0); 2052 case SHADER_ZERO: 2053 case SHADER_BLACK: 2054 case SHADER_WHITE: 2055 break; 2056 case SHADER_CONSTANT: 2057 if (op->src.u.gen3.mode != state->last_diffuse) { 2058 OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2059 OUT_BATCH(op->src.u.gen3.mode); 2060 state->last_diffuse = op->src.u.gen3.mode; 2061 } 2062 break; 2063 case SHADER_LINEAR: 2064 case SHADER_RADIAL: 2065 case SHADER_TEXTURE: 2066 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2067 ss2 |= S2_TEXCOORD_FMT(tex_count, 2068 op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); 2069 assert(op->src.card_format); 2070 map[tex_count * 2 + 0] = 2071 op->src.card_format | 2072 gen3_ms_tiling(op->src.bo->tiling) | 2073 (op->src.height - 1) << MS3_HEIGHT_SHIFT | 2074 (op->src.width - 1) << MS3_WIDTH_SHIFT; 2075 map[tex_count * 2 + 1] = 2076 (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; 2077 2078 sampler[tex_count * 2 + 0] = op->src.filter; 2079 sampler[tex_count * 2 + 1] = 2080 op->src.repeat | 2081 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; 2082 bo[tex_count] = op->src.bo; 2083 tex_count++; 2084 break; 2085 } 2086 switch (op->mask.u.gen3.type) { 2087 case SHADER_NONE: 2088 case SHADER_ZERO: 2089 case SHADER_BLACK: 2090 case SHADER_WHITE: 2091 break; 2092 case SHADER_CONSTANT: 2093 if (op->mask.u.gen3.mode != state->last_specular) { 2094 OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); 2095 OUT_BATCH(op->mask.u.gen3.mode); 2096 state->last_specular = op->mask.u.gen3.mode; 2097 } 2098 break; 2099 case SHADER_LINEAR: 2100 case SHADER_RADIAL: 2101 case SHADER_TEXTURE: 2102 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2103 ss2 |= S2_TEXCOORD_FMT(tex_count, 2104 op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); 2105 assert(op->mask.card_format); 2106 map[tex_count * 2 + 0] = 2107 op->mask.card_format | 2108 gen3_ms_tiling(op->mask.bo->tiling) | 2109 (op->mask.height - 1) << MS3_HEIGHT_SHIFT | 2110 (op->mask.width - 1) << MS3_WIDTH_SHIFT; 2111 map[tex_count * 2 + 1] = 2112 (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; 2113 2114 sampler[tex_count * 2 + 0] = op->mask.filter; 2115 sampler[tex_count * 2 + 1] = 2116 op->mask.repeat | 2117 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; 2118 bo[tex_count] = op->mask.bo; 2119 tex_count++; 2120 break; 2121 case SHADER_OPACITY: 2122 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); 2123 ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D); 2124 break; 2125 } 2126 2127 { 2128 uint32_t blend_offset = sna->kgem.nbatch; 2129 2130 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); 2131 OUT_BATCH(ss2); 2132 OUT_BATCH(gen3_get_blend_cntl(op->op, 2133 op->has_component_alpha, 2134 op->dst.format)); 2135 2136 if (memcmp(sna->kgem.batch + state->last_blend + 1, 2137 sna->kgem.batch + blend_offset + 1, 2138 2 * 4) == 0) 2139 sna->kgem.nbatch = blend_offset; 2140 else 2141 state->last_blend = blend_offset; 2142 } 2143 2144 if (op->u.gen3.num_constants) { 2145 int count = op->u.gen3.num_constants; 2146 if (state->last_constants) { 2147 int last = sna->kgem.batch[state->last_constants+1]; 2148 if (last == (1 << (count >> 2)) - 1 && 2149 memcmp(&sna->kgem.batch[state->last_constants+2], 2150 op->u.gen3.constants, 2151 count * sizeof(uint32_t)) == 0) 2152 count = 0; 2153 } 2154 if (count) { 2155 state->last_constants = sna->kgem.nbatch; 2156 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count); 2157 OUT_BATCH((1 << (count >> 2)) - 1); 2158 2159 memcpy(sna->kgem.batch + sna->kgem.nbatch, 2160 op->u.gen3.constants, 2161 count * sizeof(uint32_t)); 2162 sna->kgem.nbatch += count; 2163 } 2164 } 2165 2166 if (tex_count != 0) { 2167 uint32_t rewind; 2168 2169 n = 0; 2170 if (tex_count == state->tex_count) { 2171 for (; n < tex_count; n++) { 2172 if (map[2*n+0] != state->tex_map[2*n+0] || 2173 map[2*n+1] != state->tex_map[2*n+1] || 2174 state->tex_handle[n] != bo[n]->handle || 2175 state->tex_delta[n] != bo[n]->delta) 2176 break; 2177 } 2178 } 2179 if (n < tex_count) { 2180 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); 2181 OUT_BATCH((1 << tex_count) - 1); 2182 for (n = 0; n < tex_count; n++) { 2183 OUT_BATCH(kgem_add_reloc(&sna->kgem, 2184 sna->kgem.nbatch, 2185 bo[n], 2186 I915_GEM_DOMAIN_SAMPLER<< 16, 2187 0)); 2188 OUT_BATCH(map[2*n + 0]); 2189 OUT_BATCH(map[2*n + 1]); 2190 2191 state->tex_map[2*n+0] = map[2*n+0]; 2192 state->tex_map[2*n+1] = map[2*n+1]; 2193 state->tex_handle[n] = bo[n]->handle; 2194 state->tex_delta[n] = bo[n]->delta; 2195 } 2196 state->tex_count = n; 2197 } 2198 2199 rewind = sna->kgem.nbatch; 2200 OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); 2201 OUT_BATCH((1 << tex_count) - 1); 2202 for (n = 0; n < tex_count; n++) { 2203 OUT_BATCH(sampler[2*n + 0]); 2204 OUT_BATCH(sampler[2*n + 1]); 2205 OUT_BATCH(0); 2206 } 2207 if (state->last_sampler && 2208 memcmp(&sna->kgem.batch[state->last_sampler+1], 2209 &sna->kgem.batch[rewind + 1], 2210 (3*tex_count + 1)*sizeof(uint32_t)) == 0) 2211 sna->kgem.nbatch = rewind; 2212 else 2213 state->last_sampler = rewind; 2214 } 2215 2216 gen3_composite_emit_shader(sna, op, op->op); 2217} 2218 2219static bool gen3_magic_ca_pass(struct sna *sna, 2220 const struct sna_composite_op *op) 2221{ 2222 if (!op->need_magic_ca_pass) 2223 return false; 2224 2225 DBG(("%s(%d)\n", __FUNCTION__, 2226 sna->render.vertex_index - sna->render.vertex_start)); 2227 2228 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); 2229 OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format)); 2230 gen3_composite_emit_shader(sna, op, PictOpAdd); 2231 2232 OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | 2233 (sna->render.vertex_index - sna->render.vertex_start)); 2234 OUT_BATCH(sna->render.vertex_start); 2235 2236 sna->render_state.gen3.last_blend = 0; 2237 return true; 2238} 2239 2240static void gen3_vertex_flush(struct sna *sna) 2241{ 2242 assert(sna->render.vertex_offset); 2243 2244 DBG(("%s[%x] = %d\n", __FUNCTION__, 2245 4*sna->render.vertex_offset, 2246 sna->render.vertex_index - sna->render.vertex_start)); 2247 2248 sna->kgem.batch[sna->render.vertex_offset] = 2249 PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | 2250 (sna->render.vertex_index - sna->render.vertex_start); 2251 sna->kgem.batch[sna->render.vertex_offset + 1] = 2252 sna->render.vertex_start; 2253 2254 sna->render.vertex_offset = 0; 2255} 2256 2257static int gen3_vertex_finish(struct sna *sna) 2258{ 2259 struct kgem_bo *bo; 2260 unsigned hint, size; 2261 2262 DBG(("%s: used=%d/%d, vbo active? %d\n", 2263 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, 2264 sna->render.vbo ? sna->render.vbo->handle : 0)); 2265 assert(sna->render.vertex_offset == 0); 2266 assert(sna->render.vertex_used); 2267 assert(sna->render.vertex_used <= sna->render.vertex_size); 2268 2269 sna_vertex_wait__locked(&sna->render); 2270 2271 hint = CREATE_GTT_MAP; 2272 bo = sna->render.vbo; 2273 if (bo) { 2274 DBG(("%s: reloc = %d\n", __FUNCTION__, 2275 sna->render.vertex_reloc[0])); 2276 2277 if (sna->render.vertex_reloc[0]) { 2278 sna->kgem.batch[sna->render.vertex_reloc[0]] = 2279 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], 2280 bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0); 2281 2282 sna->render.vertex_reloc[0] = 0; 2283 } 2284 sna->render.vertex_used = 0; 2285 sna->render.vertex_index = 0; 2286 sna->render.vbo = NULL; 2287 2288 kgem_bo_destroy(&sna->kgem, bo); 2289 hint |= CREATE_CACHED | CREATE_NO_THROTTLE; 2290 } 2291 2292 size = 256*1024; 2293 sna->render.vertices = NULL; 2294 sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 2295 while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { 2296 size /= 2; 2297 sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 2298 } 2299 if (sna->render.vbo == NULL) 2300 sna->render.vbo = kgem_create_linear(&sna->kgem, 2301 256*1024, CREATE_GTT_MAP); 2302 if (sna->render.vbo && 2303 kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) 2304 sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); 2305 if (sna->render.vertices == NULL) { 2306 if (sna->render.vbo) { 2307 kgem_bo_destroy(&sna->kgem, sna->render.vbo); 2308 sna->render.vbo = NULL; 2309 } 2310 sna->render.vertices = sna->render.vertex_data; 2311 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2312 return 0; 2313 } 2314 assert(sna->render.vbo->snoop == false); 2315 2316 if (sna->render.vertex_used) { 2317 memcpy(sna->render.vertices, 2318 sna->render.vertex_data, 2319 sizeof(float)*sna->render.vertex_used); 2320 } 2321 2322 size = __kgem_bo_size(sna->render.vbo)/4; 2323 if (size >= UINT16_MAX) 2324 size = UINT16_MAX - 1; 2325 assert(size > sna->render.vertex_used); 2326 2327 sna->render.vertex_size = size; 2328 return size - sna->render.vertex_used; 2329} 2330 2331static void gen3_vertex_close(struct sna *sna) 2332{ 2333 struct kgem_bo *bo, *free_bo = NULL; 2334 unsigned int delta = 0; 2335 2336 assert(sna->render.vertex_offset == 0); 2337 if (sna->render.vertex_reloc[0] == 0) 2338 return; 2339 2340 DBG(("%s: used=%d/%d, vbo active? %d\n", 2341 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, 2342 sna->render.vbo ? sna->render.vbo->handle : 0)); 2343 2344 bo = sna->render.vbo; 2345 if (bo) { 2346 if (sna->render.vertex_size - sna->render.vertex_used < 64) { 2347 DBG(("%s: discarding full vbo\n", __FUNCTION__)); 2348 sna->render.vbo = NULL; 2349 sna->render.vertices = sna->render.vertex_data; 2350 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2351 free_bo = bo; 2352 } else if (sna->render.vertices == MAP(bo->map__cpu)) { 2353 DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); 2354 sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo); 2355 if (sna->render.vertices == NULL) { 2356 DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__)); 2357 sna->render.vbo = NULL; 2358 sna->render.vertices = sna->render.vertex_data; 2359 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2360 free_bo = bo; 2361 } 2362 } 2363 } else { 2364 if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { 2365 DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, 2366 sna->render.vertex_used, sna->kgem.nbatch)); 2367 memcpy(sna->kgem.batch + sna->kgem.nbatch, 2368 sna->render.vertex_data, 2369 sna->render.vertex_used * 4); 2370 delta = sna->kgem.nbatch * 4; 2371 bo = NULL; 2372 sna->kgem.nbatch += sna->render.vertex_used; 2373 } else { 2374 DBG(("%s: new vbo: %d\n", __FUNCTION__, 2375 sna->render.vertex_used)); 2376 bo = kgem_create_linear(&sna->kgem, 2377 4*sna->render.vertex_used, 2378 CREATE_NO_THROTTLE); 2379 if (bo) { 2380 assert(bo->snoop == false); 2381 kgem_bo_write(&sna->kgem, bo, 2382 sna->render.vertex_data, 2383 4*sna->render.vertex_used); 2384 } 2385 free_bo = bo; 2386 } 2387 } 2388 2389 DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); 2390 sna->kgem.batch[sna->render.vertex_reloc[0]] = 2391 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], 2392 bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, delta); 2393 sna->render.vertex_reloc[0] = 0; 2394 2395 if (sna->render.vbo == NULL) { 2396 DBG(("%s: resetting vbo\n", __FUNCTION__)); 2397 sna->render.vertex_used = 0; 2398 sna->render.vertex_index = 0; 2399 assert(sna->render.vertices == sna->render.vertex_data); 2400 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 2401 } 2402 2403 if (free_bo) 2404 kgem_bo_destroy(&sna->kgem, free_bo); 2405} 2406 2407static bool gen3_rectangle_begin(struct sna *sna, 2408 const struct sna_composite_op *op) 2409{ 2410 struct gen3_render_state *state = &sna->render_state.gen3; 2411 int ndwords, i1_cmd = 0, i1_len = 0; 2412 2413 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 2414 return true; 2415 2416 ndwords = 2; 2417 if (op->need_magic_ca_pass) 2418 ndwords += 100; 2419 if (sna->render.vertex_reloc[0] == 0) 2420 i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++; 2421 if (state->floats_per_vertex != op->floats_per_vertex) 2422 i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++; 2423 2424 if (!kgem_check_batch(&sna->kgem, ndwords+1)) 2425 return false; 2426 2427 if (i1_cmd) { 2428 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1)); 2429 if (sna->render.vertex_reloc[0] == 0) 2430 sna->render.vertex_reloc[0] = sna->kgem.nbatch++; 2431 if (state->floats_per_vertex != op->floats_per_vertex) { 2432 state->floats_per_vertex = op->floats_per_vertex; 2433 OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT | 2434 state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT); 2435 } 2436 } 2437 2438 if (sna->kgem.nbatch == 2 + state->last_vertex_offset && 2439 !op->need_magic_ca_pass) { 2440 sna->render.vertex_offset = state->last_vertex_offset; 2441 } else { 2442 sna->render.vertex_offset = sna->kgem.nbatch; 2443 OUT_BATCH(MI_NOOP); /* to be filled later */ 2444 OUT_BATCH(MI_NOOP); 2445 sna->render.vertex_start = sna->render.vertex_index; 2446 state->last_vertex_offset = sna->render.vertex_offset; 2447 } 2448 2449 return true; 2450} 2451 2452static int gen3_get_rectangles__flush(struct sna *sna, 2453 const struct sna_composite_op *op) 2454{ 2455 /* Preventing discarding new vbo after lock contention */ 2456 if (sna_vertex_wait__locked(&sna->render)) { 2457 int rem = vertex_space(sna); 2458 if (rem > op->floats_per_rect) 2459 return rem; 2460 } 2461 2462 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5)) 2463 return 0; 2464 if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) 2465 return 0; 2466 2467 if (sna->render.vertex_offset) { 2468 gen3_vertex_flush(sna); 2469 if (gen3_magic_ca_pass(sna, op)) { 2470 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); 2471 OUT_BATCH(gen3_get_blend_cntl(op->op, 2472 op->has_component_alpha, 2473 op->dst.format)); 2474 gen3_composite_emit_shader(sna, op, op->op); 2475 } 2476 } 2477 2478 return gen3_vertex_finish(sna); 2479} 2480 2481inline static int gen3_get_rectangles(struct sna *sna, 2482 const struct sna_composite_op *op, 2483 int want) 2484{ 2485 int rem; 2486 2487 DBG(("%s: want=%d, rem=%d\n", 2488 __FUNCTION__, want*op->floats_per_rect, vertex_space(sna))); 2489 2490 assert(want); 2491 assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used); 2492 2493start: 2494 rem = vertex_space(sna); 2495 if (unlikely(op->floats_per_rect > rem)) { 2496 DBG(("flushing vbo for %s: %d < %d\n", 2497 __FUNCTION__, rem, op->floats_per_rect)); 2498 rem = gen3_get_rectangles__flush(sna, op); 2499 if (unlikely(rem == 0)) 2500 goto flush; 2501 } 2502 2503 if (unlikely(sna->render.vertex_offset == 0)) { 2504 if (!gen3_rectangle_begin(sna, op)) 2505 goto flush; 2506 else 2507 goto start; 2508 } 2509 2510 assert(rem <= vertex_space(sna)); 2511 assert(op->floats_per_rect <= rem); 2512 if (want > 1 && want * op->floats_per_rect > rem) 2513 want = rem / op->floats_per_rect; 2514 sna->render.vertex_index += 3*want; 2515 2516 assert(want); 2517 assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size); 2518 return want; 2519 2520flush: 2521 DBG(("%s: flushing batch\n", __FUNCTION__)); 2522 if (sna->render.vertex_offset) { 2523 gen3_vertex_flush(sna); 2524 gen3_magic_ca_pass(sna, op); 2525 } 2526 sna_vertex_wait__locked(&sna->render); 2527 _kgem_submit(&sna->kgem); 2528 gen3_emit_composite_state(sna, op); 2529 assert(sna->render.vertex_offset == 0); 2530 assert(sna->render.vertex_reloc[0] == 0); 2531 goto start; 2532} 2533 2534fastcall static void 2535gen3_render_composite_blt(struct sna *sna, 2536 const struct sna_composite_op *op, 2537 const struct sna_composite_rectangles *r) 2538{ 2539 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__, 2540 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], 2541 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], 2542 r->dst.x, r->dst.y, op->dst.x, op->dst.y, 2543 r->width, r->height)); 2544 2545 gen3_get_rectangles(sna, op, 1); 2546 2547 op->prim_emit(sna, op, r); 2548} 2549 2550fastcall static void 2551gen3_render_composite_box(struct sna *sna, 2552 const struct sna_composite_op *op, 2553 const BoxRec *box) 2554{ 2555 struct sna_composite_rectangles r; 2556 2557 DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", 2558 __FUNCTION__, 2559 op->src.offset[0], op->src.offset[1], 2560 op->mask.offset[0], op->mask.offset[1], 2561 op->dst.x, op->dst.y)); 2562 2563 gen3_get_rectangles(sna, op, 1); 2564 2565 r.dst.x = box->x1; 2566 r.dst.y = box->y1; 2567 r.width = box->x2 - box->x1; 2568 r.height = box->y2 - box->y1; 2569 r.src = r.mask = r.dst; 2570 2571 op->prim_emit(sna, op, &r); 2572} 2573 2574static void 2575gen3_render_composite_boxes__blt(struct sna *sna, 2576 const struct sna_composite_op *op, 2577 const BoxRec *box, int nbox) 2578{ 2579 DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", 2580 __FUNCTION__, nbox, 2581 op->src.offset[0], op->src.offset[1], 2582 op->mask.offset[0], op->mask.offset[1], 2583 op->dst.x, op->dst.y)); 2584 2585 do { 2586 int nbox_this_time; 2587 2588 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2589 nbox -= nbox_this_time; 2590 2591 do { 2592 struct sna_composite_rectangles r; 2593 2594 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2595 box->x1, box->y1, 2596 box->x2 - box->x1, 2597 box->y2 - box->y1)); 2598 2599 r.dst.x = box->x1; r.dst.y = box->y1; 2600 r.width = box->x2 - box->x1; 2601 r.height = box->y2 - box->y1; 2602 r.src = r.mask = r.dst; 2603 2604 op->prim_emit(sna, op, &r); 2605 box++; 2606 } while (--nbox_this_time); 2607 } while (nbox); 2608} 2609 2610static void 2611gen3_render_composite_boxes(struct sna *sna, 2612 const struct sna_composite_op *op, 2613 const BoxRec *box, int nbox) 2614{ 2615 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 2616 2617 do { 2618 int nbox_this_time; 2619 float *v; 2620 2621 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2622 assert(nbox_this_time); 2623 nbox -= nbox_this_time; 2624 2625 v = sna->render.vertices + sna->render.vertex_used; 2626 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 2627 assert(sna->render.vertex_used <= sna->render.vertex_size); 2628 2629 op->emit_boxes(op, box, nbox_this_time, v); 2630 box += nbox_this_time; 2631 } while (nbox); 2632} 2633 2634static void 2635gen3_render_composite_boxes__thread(struct sna *sna, 2636 const struct sna_composite_op *op, 2637 const BoxRec *box, int nbox) 2638{ 2639 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 2640 2641 sna_vertex_lock(&sna->render); 2642 do { 2643 int nbox_this_time; 2644 float *v; 2645 2646 nbox_this_time = gen3_get_rectangles(sna, op, nbox); 2647 assert(nbox_this_time); 2648 nbox -= nbox_this_time; 2649 2650 v = sna->render.vertices + sna->render.vertex_used; 2651 sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 2652 assert(sna->render.vertex_used <= sna->render.vertex_size); 2653 2654 sna_vertex_acquire__locked(&sna->render); 2655 sna_vertex_unlock(&sna->render); 2656 2657 op->emit_boxes(op, box, nbox_this_time, v); 2658 box += nbox_this_time; 2659 2660 sna_vertex_lock(&sna->render); 2661 sna_vertex_release__locked(&sna->render); 2662 } while (nbox); 2663 sna_vertex_unlock(&sna->render); 2664} 2665 2666static void 2667gen3_render_composite_done(struct sna *sna, 2668 const struct sna_composite_op *op) 2669{ 2670 DBG(("%s()\n", __FUNCTION__)); 2671 2672 if (sna->render.vertex_offset) { 2673 gen3_vertex_flush(sna); 2674 gen3_magic_ca_pass(sna, op); 2675 } 2676 2677 if (op->mask.bo) 2678 kgem_bo_destroy(&sna->kgem, op->mask.bo); 2679 if (op->src.bo) 2680 kgem_bo_destroy(&sna->kgem, op->src.bo); 2681 2682 sna_render_composite_redirect_done(sna, op); 2683} 2684 2685static void 2686discard_vbo(struct sna *sna) 2687{ 2688 kgem_bo_destroy(&sna->kgem, sna->render.vbo); 2689 sna->render.vbo = NULL; 2690 sna->render.vertices = sna->render.vertex_data; 2691 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 2692 sna->render.vertex_used = 0; 2693 sna->render.vertex_index = 0; 2694} 2695 2696static void 2697gen3_render_reset(struct sna *sna) 2698{ 2699 struct gen3_render_state *state = &sna->render_state.gen3; 2700 2701 state->need_invariant = true; 2702 state->current_dst = 0; 2703 state->tex_count = 0; 2704 state->last_drawrect_limit = ~0U; 2705 state->last_target = 0; 2706 state->last_blend = 0; 2707 state->last_constants = 0; 2708 state->last_sampler = 0; 2709 state->last_shader = 0x7fffffff; 2710 state->last_diffuse = 0xcc00ffee; 2711 state->last_specular = 0xcc00ffee; 2712 2713 state->floats_per_vertex = 0; 2714 state->last_floats_per_vertex = 0; 2715 state->last_vertex_offset = 0; 2716 2717 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 2718 DBG(("%s: discarding vbo as next access will stall: %lx\n", 2719 __FUNCTION__, (long)sna->render.vbo->presumed_offset)); 2720 discard_vbo(sna); 2721 } 2722 2723 sna->render.vertex_reloc[0] = 0; 2724 sna->render.vertex_offset = 0; 2725} 2726 2727static void 2728gen3_render_retire(struct kgem *kgem) 2729{ 2730 struct sna *sna; 2731 2732 sna = container_of(kgem, struct sna, kgem); 2733 if (sna->render.vertex_reloc[0] == 0 && 2734 sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { 2735 DBG(("%s: resetting idle vbo\n", __FUNCTION__)); 2736 sna->render.vertex_used = 0; 2737 sna->render.vertex_index = 0; 2738 } 2739} 2740 2741static void 2742gen3_render_expire(struct kgem *kgem) 2743{ 2744 struct sna *sna; 2745 2746 sna = container_of(kgem, struct sna, kgem); 2747 if (sna->render.vbo && !sna->render.vertex_used) { 2748 DBG(("%s: discarding vbo\n", __FUNCTION__)); 2749 discard_vbo(sna); 2750 } 2751} 2752 2753static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel, 2754 CARD32 format) 2755{ 2756 unsigned int i; 2757 2758 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 2759 if (gen3_tex_formats[i].fmt == format) { 2760 channel->card_format = gen3_tex_formats[i].card_fmt; 2761 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 2762 return true; 2763 } 2764 } 2765 return false; 2766} 2767 2768static bool source_is_covered(PicturePtr picture, 2769 int x, int y, 2770 int width, int height) 2771{ 2772 int x1, y1, x2, y2; 2773 2774 if (picture->repeat && picture->repeatType != RepeatNone) 2775 return true; 2776 2777 if (picture->pDrawable == NULL) 2778 return false; 2779 2780 if (picture->transform) { 2781 pixman_box16_t sample; 2782 2783 sample.x1 = x; 2784 sample.y1 = y; 2785 sample.x2 = x + width; 2786 sample.y2 = y + height; 2787 2788 pixman_transform_bounds(picture->transform, &sample); 2789 2790 x1 = sample.x1; 2791 x2 = sample.x2; 2792 y1 = sample.y1; 2793 y2 = sample.y2; 2794 } else { 2795 x1 = x; 2796 y1 = y; 2797 x2 = x + width; 2798 y2 = y + height; 2799 } 2800 2801 return 2802 x1 >= 0 && y1 >= 0 && 2803 x2 <= picture->pDrawable->width && 2804 y2 <= picture->pDrawable->height; 2805} 2806 2807static bool gen3_composite_channel_set_xformat(PicturePtr picture, 2808 struct sna_composite_channel *channel, 2809 int x, int y, 2810 int width, int height) 2811{ 2812 unsigned int i; 2813 2814 if (PICT_FORMAT_A(picture->format) != 0) 2815 return false; 2816 2817 if (width == 0 || height == 0) 2818 return false; 2819 2820 if (!source_is_covered(picture, x, y, width, height)) 2821 return false; 2822 2823 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 2824 if (gen3_tex_formats[i].xfmt == picture->format) { 2825 channel->card_format = gen3_tex_formats[i].card_fmt; 2826 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 2827 channel->alpha_fixup = true; 2828 return true; 2829 } 2830 } 2831 2832 return false; 2833} 2834 2835static int 2836gen3_init_solid(struct sna_composite_channel *channel, uint32_t color) 2837{ 2838 channel->u.gen3.mode = color; 2839 channel->u.gen3.type = SHADER_CONSTANT; 2840 if (color == 0) 2841 channel->u.gen3.type = SHADER_ZERO; 2842 else if (color == 0xff000000) 2843 channel->u.gen3.type = SHADER_BLACK; 2844 else if (color == 0xffffffff) 2845 channel->u.gen3.type = SHADER_WHITE; 2846 2847 channel->bo = NULL; 2848 channel->is_opaque = (color >> 24) == 0xff; 2849 channel->is_affine = 1; 2850 channel->alpha_fixup = 0; 2851 channel->rb_reversed = 0; 2852 2853 DBG(("%s: color=%08x, is_opaque=%d, type=%d\n", 2854 __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type)); 2855 2856 /* for consistency */ 2857 channel->repeat = RepeatNormal; 2858 channel->filter = PictFilterNearest; 2859 channel->pict_format = PICT_a8r8g8b8; 2860 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; 2861 2862 return 1; 2863} 2864 2865static void gen3_composite_channel_convert(struct sna_composite_channel *channel) 2866{ 2867 if (channel->u.gen3.type == SHADER_TEXTURE) 2868 channel->repeat = gen3_texture_repeat(channel->repeat); 2869 else 2870 channel->repeat = gen3_gradient_repeat(channel->repeat); 2871 2872 channel->filter = gen3_filter(channel->filter); 2873 if (channel->card_format == 0) 2874 gen3_composite_channel_set_format(channel, channel->pict_format); 2875 assert(channel->card_format); 2876} 2877 2878static bool gen3_gradient_setup(struct sna *sna, 2879 PicturePtr picture, 2880 struct sna_composite_channel *channel, 2881 int16_t ox, int16_t oy) 2882{ 2883 int16_t dx, dy; 2884 2885 if (picture->repeat == 0) { 2886 channel->repeat = RepeatNone; 2887 } else switch (picture->repeatType) { 2888 case RepeatNone: 2889 case RepeatNormal: 2890 case RepeatPad: 2891 case RepeatReflect: 2892 channel->repeat = picture->repeatType; 2893 break; 2894 default: 2895 return false; 2896 } 2897 2898 channel->bo = 2899 sna_render_get_gradient(sna, 2900 (PictGradient *)picture->pSourcePict); 2901 if (channel->bo == NULL) 2902 return false; 2903 2904 channel->pict_format = PICT_a8r8g8b8; 2905 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; 2906 channel->filter = PictFilterNearest; 2907 channel->is_affine = sna_transform_is_affine(picture->transform); 2908 if (sna_transform_is_imprecise_integer_translation(picture->transform, PictFilterNearest, false, &dx, &dy)) { 2909 DBG(("%s: integer translation (%d, %d), removing\n", 2910 __FUNCTION__, dx, dy)); 2911 ox += dx; 2912 oy += dy; 2913 channel->transform = NULL; 2914 } else 2915 channel->transform = picture->transform; 2916 channel->width = channel->bo->pitch / 4; 2917 channel->height = 1; 2918 channel->offset[0] = ox; 2919 channel->offset[1] = oy; 2920 channel->scale[0] = channel->scale[1] = 1; 2921 return true; 2922} 2923 2924static int 2925gen3_init_linear(struct sna *sna, 2926 PicturePtr picture, 2927 struct sna_composite_op *op, 2928 struct sna_composite_channel *channel, 2929 int ox, int oy) 2930{ 2931 PictLinearGradient *linear = 2932 (PictLinearGradient *)picture->pSourcePict; 2933 float x0, y0, sf; 2934 float dx, dy, offset; 2935 int n; 2936 2937 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 2938 __FUNCTION__, 2939 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 2940 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 2941 2942 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 2943 return 0; 2944 2945 dx = xFixedToDouble(linear->p2.x - linear->p1.x); 2946 dy = xFixedToDouble(linear->p2.y - linear->p1.y); 2947 sf = dx*dx + dy*dy; 2948 dx /= sf; 2949 dy /= sf; 2950 2951 x0 = xFixedToDouble(linear->p1.x); 2952 y0 = xFixedToDouble(linear->p1.y); 2953 offset = dx*x0 + dy*y0; 2954 2955 n = op->u.gen3.num_constants; 2956 channel->u.gen3.constants = FS_C0 + n / 4; 2957 op->u.gen3.constants[n++] = dx; 2958 op->u.gen3.constants[n++] = dy; 2959 op->u.gen3.constants[n++] = -offset; 2960 op->u.gen3.constants[n++] = 0; 2961 2962 if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) 2963 return -1; 2964 2965 channel->u.gen3.type = SHADER_LINEAR; 2966 op->u.gen3.num_constants = n; 2967 2968 DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n", 2969 __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0)); 2970 return 1; 2971} 2972 2973static int 2974gen3_init_radial(struct sna *sna, 2975 PicturePtr picture, 2976 struct sna_composite_op *op, 2977 struct sna_composite_channel *channel, 2978 int ox, int oy) 2979{ 2980 PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict; 2981 double dx, dy, dr, r1; 2982 int n; 2983 2984 dx = xFixedToDouble(radial->c2.x - radial->c1.x); 2985 dy = xFixedToDouble(radial->c2.y - radial->c1.y); 2986 dr = xFixedToDouble(radial->c2.radius - radial->c1.radius); 2987 2988 r1 = xFixedToDouble(radial->c1.radius); 2989 2990 n = op->u.gen3.num_constants; 2991 channel->u.gen3.constants = FS_C0 + n / 4; 2992 if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { 2993 if (radial->c2.radius == radial->c1.radius) { 2994 channel->u.gen3.type = SHADER_ZERO; 2995 return 1; 2996 } 2997 2998 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr; 2999 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr; 3000 op->u.gen3.constants[n++] = 1. / dr; 3001 op->u.gen3.constants[n++] = -r1 / dr; 3002 3003 channel->u.gen3.mode = RADIAL_ONE; 3004 } else { 3005 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x); 3006 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y); 3007 op->u.gen3.constants[n++] = r1; 3008 op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr); 3009 3010 op->u.gen3.constants[n++] = -2 * dx; 3011 op->u.gen3.constants[n++] = -2 * dy; 3012 op->u.gen3.constants[n++] = -2 * r1 * dr; 3013 op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); 3014 3015 channel->u.gen3.mode = RADIAL_TWO; 3016 } 3017 3018 if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) 3019 return -1; 3020 3021 channel->u.gen3.type = SHADER_RADIAL; 3022 op->u.gen3.num_constants = n; 3023 return 1; 3024} 3025 3026static bool 3027sna_picture_is_clear(PicturePtr picture, 3028 int x, int y, int w, int h, 3029 uint32_t *color) 3030{ 3031 struct sna_pixmap *priv; 3032 3033 if (!picture->pDrawable) 3034 return false; 3035 3036 priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable)); 3037 if (priv == NULL || !priv->clear) 3038 return false; 3039 3040 if (!source_is_covered(picture, x, y, w, h)) 3041 return false; 3042 3043 *color = priv->clear_color; 3044 return true; 3045} 3046 3047static int 3048gen3_composite_picture(struct sna *sna, 3049 PicturePtr picture, 3050 struct sna_composite_op *op, 3051 struct sna_composite_channel *channel, 3052 int16_t x, int16_t y, 3053 int16_t w, int16_t h, 3054 int16_t dst_x, int16_t dst_y, 3055 bool precise) 3056{ 3057 PixmapPtr pixmap; 3058 uint32_t color; 3059 int16_t dx, dy; 3060 3061 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 3062 __FUNCTION__, x, y, w, h, dst_x, dst_y)); 3063 3064 channel->card_format = 0; 3065 3066 if (picture->pDrawable == NULL) { 3067 SourcePict *source = picture->pSourcePict; 3068 int ret = -1; 3069 3070 switch (source->type) { 3071 case SourcePictTypeSolidFill: 3072 DBG(("%s: solid fill [%08x], format %08x\n", 3073 __FUNCTION__, 3074 (unsigned)source->solidFill.color, 3075 (unsigned)picture->format)); 3076 ret = gen3_init_solid(channel, source->solidFill.color); 3077 break; 3078 3079 case SourcePictTypeLinear: 3080 ret = gen3_init_linear(sna, picture, op, channel, 3081 x - dst_x, y - dst_y); 3082 break; 3083 3084 case SourcePictTypeRadial: 3085 ret = gen3_init_radial(sna, picture, op, channel, 3086 x - dst_x, y - dst_y); 3087 break; 3088 } 3089 3090 if (ret == -1) { 3091 if (!precise) 3092 ret = sna_render_picture_approximate_gradient(sna, picture, channel, 3093 x, y, w, h, dst_x, dst_y); 3094 if (ret == -1) 3095 ret = sna_render_picture_fixup(sna, picture, channel, 3096 x, y, w, h, dst_x, dst_y); 3097 } 3098 return ret; 3099 } 3100 3101 if (picture->alphaMap) { 3102 DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 3103 return sna_render_picture_fixup(sna, picture, channel, 3104 x, y, w, h, dst_x, dst_y); 3105 } 3106 3107 if (sna_picture_is_solid(picture, &color)) { 3108 DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color)); 3109 return gen3_init_solid(channel, color); 3110 } 3111 3112 if (sna_picture_is_clear(picture, x, y, w, h, &color)) { 3113 DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); 3114 return gen3_init_solid(channel, solid_color(picture->format, color)); 3115 } 3116 3117 if (!gen3_check_repeat(picture)) 3118 return sna_render_picture_fixup(sna, picture, channel, 3119 x, y, w, h, dst_x, dst_y); 3120 3121 if (!gen3_check_filter(picture)) 3122 return sna_render_picture_fixup(sna, picture, channel, 3123 x, y, w, h, dst_x, dst_y); 3124 3125 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 3126 channel->filter = picture->filter; 3127 channel->pict_format = picture->format; 3128 3129 pixmap = get_drawable_pixmap(picture->pDrawable); 3130 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 3131 3132 x += dx + picture->pDrawable->x; 3133 y += dy + picture->pDrawable->y; 3134 3135 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 3136 DBG(("%s: integer translation (%d, %d), removing\n", 3137 __FUNCTION__, dx, dy)); 3138 x += dx; 3139 y += dy; 3140 channel->transform = NULL; 3141 channel->filter = PictFilterNearest; 3142 3143 if (channel->repeat || 3144 (x >= 0 && 3145 y >= 0 && 3146 x + w <= pixmap->drawable.width && 3147 y + h <= pixmap->drawable.height)) { 3148 struct sna_pixmap *priv = sna_pixmap(pixmap); 3149 if (priv && priv->clear) { 3150 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 3151 return gen3_init_solid(channel, solid_color(picture->format, priv->clear_color)); 3152 } 3153 } 3154 } else { 3155 channel->transform = picture->transform; 3156 channel->is_affine = sna_transform_is_affine(picture->transform); 3157 } 3158 3159 if (!gen3_composite_channel_set_format(channel, picture->format) && 3160 !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) 3161 return sna_render_picture_convert(sna, picture, channel, pixmap, 3162 x, y, w, h, dst_x, dst_y, 3163 false); 3164 assert(channel->card_format); 3165 3166 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 3167 DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n", 3168 __FUNCTION__, 3169 pixmap->drawable.width, pixmap->drawable.height, 3170 x, y, w, h)); 3171 return sna_render_picture_extract(sna, picture, channel, 3172 x, y, w, h, dst_x, dst_y); 3173 } 3174 3175 return sna_render_pixmap_bo(sna, channel, pixmap, 3176 x, y, w, h, dst_x, dst_y); 3177} 3178 3179static void 3180gen3_align_vertex(struct sna *sna, 3181 const struct sna_composite_op *op) 3182{ 3183 int vertex_index; 3184 3185 if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex) 3186 return; 3187 3188 DBG(("aligning vertex: was %d, now %d floats per vertex\n", 3189 sna->render_state.gen3.last_floats_per_vertex, 3190 op->floats_per_vertex)); 3191 3192 assert(op->floats_per_rect == 3*op->floats_per_vertex); 3193 3194 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 3195 if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { 3196 DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", 3197 __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); 3198 if (gen3_vertex_finish(sna) < 2*op->floats_per_vertex) 3199 kgem_submit(&sna->kgem); 3200 3201 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 3202 } 3203 3204 sna->render.vertex_index = vertex_index; 3205 sna->render.vertex_used = vertex_index * op->floats_per_vertex; 3206} 3207 3208static bool 3209gen3_composite_set_target(struct sna *sna, 3210 struct sna_composite_op *op, 3211 PicturePtr dst, 3212 int x, int y, int w, int h, 3213 bool partial) 3214{ 3215 BoxRec box; 3216 unsigned hint; 3217 3218 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 3219 op->dst.format = dst->format; 3220 op->dst.width = op->dst.pixmap->drawable.width; 3221 op->dst.height = op->dst.pixmap->drawable.height; 3222 3223 if (w && h) { 3224 box.x1 = x; 3225 box.y1 = y; 3226 box.x2 = x + w; 3227 box.y2 = y + h; 3228 } else 3229 sna_render_picture_extents(dst, &box); 3230 3231 hint = PREFER_GPU | RENDER_GPU; 3232 if (!need_tiling(sna, op->dst.width, op->dst.height)) 3233 hint |= FORCE_GPU; 3234 if (!partial) { 3235 hint |= IGNORE_DAMAGE; 3236 if (w == op->dst.width && h == op->dst.height) 3237 hint |= REPLACES; 3238 } 3239 3240 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 3241 if (op->dst.bo == NULL) 3242 return false; 3243 3244 if (hint & REPLACES) { 3245 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 3246 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 3247 } 3248 3249 assert(op->dst.bo->unique_id); 3250 3251 /* For single-stream mode there should be no minimum alignment 3252 * required, except that the width must be at least 2 elements. 3253 * Furthermore, it appears that the pitch must be a multiple of 3254 * 2 elements. 3255 */ 3256 if (op->dst.bo->pitch & ((2*op->dst.pixmap->drawable.bitsPerPixel >> 3) - 1)) 3257 return false; 3258 3259 get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 3260 &op->dst.x, &op->dst.y); 3261 3262 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 3263 __FUNCTION__, 3264 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 3265 op->dst.width, op->dst.height, 3266 op->dst.bo->pitch, 3267 op->dst.x, op->dst.y, 3268 op->damage ? *op->damage : (void *)-1)); 3269 3270 assert(op->dst.bo->proxy == NULL); 3271 3272 if ((too_large(op->dst.width, op->dst.height) || 3273 !gen3_check_pitch_3d(op->dst.bo)) && 3274 !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 3275 return false; 3276 3277 return true; 3278} 3279 3280static inline uint8_t 3281mul_8_8(uint8_t a, uint8_t b) 3282{ 3283 uint16_t t = a * (uint16_t)b + 0x7f; 3284 return ((t >> 8) + t) >> 8; 3285} 3286 3287static inline uint32_t multa(uint32_t s, uint32_t m, int shift) 3288{ 3289 return mul_8_8((s >> shift) & 0xff, m >> 24) << shift; 3290} 3291 3292static inline bool is_constant_ps(uint32_t type) 3293{ 3294 switch (type) { 3295 case SHADER_NONE: /* be warned! */ 3296 case SHADER_ZERO: 3297 case SHADER_BLACK: 3298 case SHADER_WHITE: 3299 case SHADER_CONSTANT: 3300 return true; 3301 default: 3302 return false; 3303 } 3304} 3305 3306static bool 3307has_alphamap(PicturePtr p) 3308{ 3309 return p->alphaMap != NULL; 3310} 3311 3312static bool 3313need_upload(PicturePtr p) 3314{ 3315 return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 3316} 3317 3318static bool 3319source_is_busy(PixmapPtr pixmap) 3320{ 3321 struct sna_pixmap *priv = sna_pixmap(pixmap); 3322 if (priv == NULL) 3323 return false; 3324 3325 if (priv->clear) 3326 return false; 3327 3328 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 3329 return true; 3330 3331 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 3332 return true; 3333 3334 return priv->gpu_damage && !priv->cpu_damage; 3335} 3336 3337static bool 3338is_unhandled_gradient(PicturePtr picture, bool precise) 3339{ 3340 if (picture->pDrawable) 3341 return false; 3342 3343 switch (picture->pSourcePict->type) { 3344 case SourcePictTypeSolidFill: 3345 case SourcePictTypeLinear: 3346 case SourcePictTypeRadial: 3347 return false; 3348 default: 3349 return precise; 3350 } 3351} 3352 3353static bool 3354source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 3355{ 3356 if (sna_picture_is_solid(p, NULL)) 3357 return false; 3358 3359 if (is_unhandled_gradient(p, precise)) 3360 return true; 3361 3362 if (!gen3_check_xformat(p) || !gen3_check_repeat(p)) 3363 return true; 3364 3365 if (pixmap && source_is_busy(pixmap)) 3366 return false; 3367 3368 return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p); 3369} 3370 3371static bool 3372gen3_composite_fallback(struct sna *sna, 3373 uint8_t op, 3374 PicturePtr src, 3375 PicturePtr mask, 3376 PicturePtr dst) 3377{ 3378 PixmapPtr src_pixmap; 3379 PixmapPtr mask_pixmap; 3380 PixmapPtr dst_pixmap; 3381 bool src_fallback, mask_fallback; 3382 3383 if (!gen3_check_dst_format(dst->format)) { 3384 DBG(("%s: unknown destination format: %d\n", 3385 __FUNCTION__, dst->format)); 3386 return true; 3387 } 3388 3389 dst_pixmap = get_drawable_pixmap(dst->pDrawable); 3390 3391 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 3392 src_fallback = source_fallback(src, src_pixmap, 3393 dst->polyMode == PolyModePrecise); 3394 3395 if (mask) { 3396 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 3397 mask_fallback = source_fallback(mask, mask_pixmap, 3398 dst->polyMode == PolyModePrecise); 3399 } else { 3400 mask_pixmap = NULL; 3401 mask_fallback = false; 3402 } 3403 3404 /* If we are using the destination as a source and need to 3405 * readback in order to upload the source, do it all 3406 * on the cpu. 3407 */ 3408 if (src_pixmap == dst_pixmap && src_fallback) { 3409 DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 3410 return true; 3411 } 3412 if (mask_pixmap == dst_pixmap && mask_fallback) { 3413 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 3414 return true; 3415 } 3416 3417 if (mask && 3418 mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && 3419 gen3_blend_op[op].src_alpha && 3420 gen3_blend_op[op].src_blend != BLENDFACT_ZERO && 3421 op != PictOpOver) { 3422 DBG(("%s: component-alpha mask with op=%d, should fallback\n", 3423 __FUNCTION__, op)); 3424 return true; 3425 } 3426 3427 /* If anything is on the GPU, push everything out to the GPU */ 3428 if (dst_use_gpu(dst_pixmap)) { 3429 DBG(("%s: dst is already on the GPU, try to use GPU\n", 3430 __FUNCTION__)); 3431 return false; 3432 } 3433 3434 if (src_pixmap && !src_fallback) { 3435 DBG(("%s: src is already on the GPU, try to use GPU\n", 3436 __FUNCTION__)); 3437 return false; 3438 } 3439 if (mask_pixmap && !mask_fallback) { 3440 DBG(("%s: mask is already on the GPU, try to use GPU\n", 3441 __FUNCTION__)); 3442 return false; 3443 } 3444 3445 /* However if the dst is not on the GPU and we need to 3446 * render one of the sources using the CPU, we may 3447 * as well do the entire operation in place onthe CPU. 3448 */ 3449 if (src_fallback) { 3450 DBG(("%s: dst is on the CPU and src will fallback\n", 3451 __FUNCTION__)); 3452 return true; 3453 } 3454 3455 if (mask && mask_fallback) { 3456 DBG(("%s: dst is on the CPU and mask will fallback\n", 3457 __FUNCTION__)); 3458 return true; 3459 } 3460 3461 if (too_large(dst_pixmap->drawable.width, 3462 dst_pixmap->drawable.height) && 3463 dst_is_cpu(dst_pixmap)) { 3464 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 3465 return true; 3466 } 3467 3468 DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n", 3469 __FUNCTION__, dst_use_cpu(dst_pixmap))); 3470 return dst_use_cpu(dst_pixmap); 3471} 3472 3473static int 3474reuse_source(struct sna *sna, 3475 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 3476 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 3477{ 3478 if (src_x != msk_x || src_y != msk_y) 3479 return false; 3480 3481 if (mask == src) { 3482 *mc = *sc; 3483 if (mc->bo) 3484 kgem_bo_reference(mc->bo); 3485 return true; 3486 } 3487 3488 if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable)) 3489 return false; 3490 3491 if (sc->is_solid) 3492 return false; 3493 3494 DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 3495 3496 if (!sna_transform_equal(src->transform, mask->transform)) 3497 return false; 3498 3499 if (!sna_picture_alphamap_equal(src, mask)) 3500 return false; 3501 3502 if (!gen3_check_repeat(mask)) 3503 return false; 3504 3505 if (!gen3_check_filter(mask)) 3506 return false; 3507 3508 if (!gen3_check_format(mask)) 3509 return false; 3510 3511 DBG(("%s: reusing source channel for mask with a twist\n", 3512 __FUNCTION__)); 3513 3514 *mc = *sc; 3515 mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone); 3516 mc->filter = gen3_filter(mask->filter); 3517 mc->pict_format = mask->format; 3518 gen3_composite_channel_set_format(mc, mask->format); 3519 assert(mc->card_format); 3520 if (mc->bo) 3521 kgem_bo_reference(mc->bo); 3522 return true; 3523} 3524 3525static bool 3526gen3_render_composite(struct sna *sna, 3527 uint8_t op, 3528 PicturePtr src, 3529 PicturePtr mask, 3530 PicturePtr dst, 3531 int16_t src_x, int16_t src_y, 3532 int16_t mask_x, int16_t mask_y, 3533 int16_t dst_x, int16_t dst_y, 3534 int16_t width, int16_t height, 3535 unsigned flags, 3536 struct sna_composite_op *tmp) 3537{ 3538 DBG(("%s()\n", __FUNCTION__)); 3539 3540 if (op >= ARRAY_SIZE(gen3_blend_op)) { 3541 DBG(("%s: fallback due to unhandled blend op: %d\n", 3542 __FUNCTION__, op)); 3543 return false; 3544 } 3545 3546 /* Try to use the BLT engine unless it implies a 3547 * 3D -> 2D context switch. 3548 */ 3549 if (mask == NULL && 3550 sna_blt_composite(sna, 3551 op, src, dst, 3552 src_x, src_y, 3553 dst_x, dst_y, 3554 width, height, 3555 flags, tmp)) 3556 return true; 3557 3558 if (gen3_composite_fallback(sna, op, src, mask, dst)) 3559 goto fallback; 3560 3561 if (need_tiling(sna, width, height)) 3562 return sna_tiling_composite(op, src, mask, dst, 3563 src_x, src_y, 3564 mask_x, mask_y, 3565 dst_x, dst_y, 3566 width, height, 3567 tmp); 3568 3569 if (!gen3_composite_set_target(sna, tmp, dst, 3570 dst_x, dst_y, width, height, 3571 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 3572 DBG(("%s: unable to set render target\n", 3573 __FUNCTION__)); 3574 goto fallback; 3575 } 3576 3577 tmp->op = op; 3578 tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format); 3579 tmp->u.gen3.num_constants = 0; 3580 tmp->src.u.gen3.type = SHADER_TEXTURE; 3581 tmp->src.is_affine = true; 3582 DBG(("%s: preparing source\n", __FUNCTION__)); 3583 switch (gen3_composite_picture(sna, src, tmp, &tmp->src, 3584 src_x, src_y, 3585 width, height, 3586 dst_x, dst_y, 3587 dst->polyMode == PolyModePrecise)) { 3588 case -1: 3589 goto cleanup_dst; 3590 case 0: 3591 tmp->src.u.gen3.type = SHADER_ZERO; 3592 break; 3593 case 1: 3594 if (mask == NULL && tmp->src.bo && 3595 sna_blt_composite__convert(sna, 3596 dst_x, dst_y, width, height, 3597 tmp)) 3598 return true; 3599 3600 gen3_composite_channel_convert(&tmp->src); 3601 break; 3602 } 3603 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type)); 3604 3605 tmp->mask.u.gen3.type = SHADER_NONE; 3606 tmp->mask.is_affine = true; 3607 tmp->need_magic_ca_pass = false; 3608 tmp->has_component_alpha = false; 3609 if (mask && tmp->src.u.gen3.type != SHADER_ZERO) { 3610 if (!reuse_source(sna, 3611 src, &tmp->src, src_x, src_y, 3612 mask, &tmp->mask, mask_x, mask_y)) { 3613 tmp->mask.u.gen3.type = SHADER_TEXTURE; 3614 DBG(("%s: preparing mask\n", __FUNCTION__)); 3615 switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask, 3616 mask_x, mask_y, 3617 width, height, 3618 dst_x, dst_y, 3619 dst->polyMode == PolyModePrecise)) { 3620 case -1: 3621 goto cleanup_src; 3622 case 0: 3623 tmp->mask.u.gen3.type = SHADER_ZERO; 3624 break; 3625 case 1: 3626 gen3_composite_channel_convert(&tmp->mask); 3627 break; 3628 } 3629 } 3630 DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type)); 3631 if (tmp->mask.u.gen3.type == SHADER_ZERO) { 3632 if (tmp->src.bo) { 3633 kgem_bo_destroy(&sna->kgem, 3634 tmp->src.bo); 3635 tmp->src.bo = NULL; 3636 } 3637 tmp->src.u.gen3.type = SHADER_ZERO; 3638 tmp->mask.u.gen3.type = SHADER_NONE; 3639 } 3640 3641 if (tmp->mask.u.gen3.type != SHADER_NONE) { 3642 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 3643 /* Check if it's component alpha that relies on a source alpha 3644 * and on the source value. We can only get one of those 3645 * into the single source value that we get to blend with. 3646 */ 3647 DBG(("%s: component-alpha mask: %d\n", 3648 __FUNCTION__, tmp->mask.u.gen3.type)); 3649 tmp->has_component_alpha = true; 3650 if (tmp->mask.u.gen3.type == SHADER_WHITE) { 3651 tmp->mask.u.gen3.type = SHADER_NONE; 3652 tmp->has_component_alpha = false; 3653 } else if (gen3_blend_op[op].src_alpha && 3654 gen3_blend_op[op].src_blend != BLENDFACT_ZERO) { 3655 if (op != PictOpOver) 3656 goto cleanup_mask; 3657 3658 tmp->need_magic_ca_pass = true; 3659 tmp->op = PictOpOutReverse; 3660 } 3661 } else { 3662 if (tmp->mask.is_opaque) { 3663 tmp->mask.u.gen3.type = SHADER_NONE; 3664 } else if (is_constant_ps(tmp->src.u.gen3.type) && 3665 is_constant_ps(tmp->mask.u.gen3.type)) { 3666 uint32_t v; 3667 3668 v = multa(tmp->src.u.gen3.mode, 3669 tmp->mask.u.gen3.mode, 3670 24); 3671 v |= multa(tmp->src.u.gen3.mode, 3672 tmp->mask.u.gen3.mode, 3673 16); 3674 v |= multa(tmp->src.u.gen3.mode, 3675 tmp->mask.u.gen3.mode, 3676 8); 3677 v |= multa(tmp->src.u.gen3.mode, 3678 tmp->mask.u.gen3.mode, 3679 0); 3680 3681 DBG(("%s: combining constant source/mask: %x x %x -> %x\n", 3682 __FUNCTION__, 3683 tmp->src.u.gen3.mode, 3684 tmp->mask.u.gen3.mode, 3685 v)); 3686 3687 tmp->src.u.gen3.type = SHADER_CONSTANT; 3688 tmp->src.u.gen3.mode = v; 3689 tmp->src.is_opaque = false; 3690 3691 tmp->mask.u.gen3.type = SHADER_NONE; 3692 } 3693 } 3694 } 3695 } 3696 DBG(("%s: final src/mask type=%d/%d [constant? %d/%d], transform? %d/%d, affine=%d/%d\n", __FUNCTION__, 3697 tmp->src.u.gen3.type, tmp->mask.u.gen3.type, 3698 is_constant_ps(tmp->src.u.gen3.type), 3699 is_constant_ps(tmp->mask.u.gen3.type), 3700 !!tmp->src.transform, !!tmp->mask.transform, 3701 tmp->src.is_affine, tmp->mask.is_affine)); 3702 3703 tmp->prim_emit = gen3_emit_composite_primitive; 3704 if (is_constant_ps(tmp->mask.u.gen3.type)) { 3705 switch (tmp->src.u.gen3.type) { 3706 case SHADER_NONE: 3707 case SHADER_ZERO: 3708 case SHADER_BLACK: 3709 case SHADER_WHITE: 3710 case SHADER_CONSTANT: 3711#if defined(sse2) && !defined(__x86_64__) 3712 if (sna->cpu_features & SSE2) { 3713 tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2; 3714 tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2; 3715 } else 3716#endif 3717 { 3718 tmp->prim_emit = gen3_emit_composite_primitive_constant; 3719 tmp->emit_boxes = gen3_emit_composite_boxes_constant; 3720 } 3721 3722 break; 3723 case SHADER_LINEAR: 3724 case SHADER_RADIAL: 3725 if (tmp->src.transform == NULL) { 3726#if defined(sse2) && !defined(__x86_64__) 3727 if (sna->cpu_features & SSE2) { 3728 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2; 3729 tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2; 3730 } else 3731#endif 3732 { 3733 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; 3734 tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; 3735 } 3736 } else if (tmp->src.is_affine) { 3737 tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2]; 3738#if defined(sse2) && !defined(__x86_64__) 3739 if (sna->cpu_features & SSE2) { 3740 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2; 3741 tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2; 3742 } else 3743#endif 3744 { 3745 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; 3746 tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; 3747 } 3748 } 3749 break; 3750 case SHADER_TEXTURE: 3751 if (tmp->src.transform == NULL) { 3752 if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { 3753#if defined(sse2) && !defined(__x86_64__) 3754 if (sna->cpu_features & SSE2) { 3755 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2; 3756 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2; 3757 } else 3758#endif 3759 { 3760 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; 3761 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; 3762 } 3763 } else { 3764#if defined(sse2) && !defined(__x86_64__) 3765 if (sna->cpu_features & SSE2) { 3766 tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2; 3767 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2; 3768 } else 3769#endif 3770 { 3771 tmp->prim_emit = gen3_emit_composite_primitive_identity_source; 3772 tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; 3773 } 3774 } 3775 } else if (tmp->src.is_affine) { 3776 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 3777 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 3778#if defined(sse2) && !defined(__x86_64__) 3779 if (sna->cpu_features & SSE2) { 3780 tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2; 3781 tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2; 3782 } else 3783#endif 3784 { 3785 tmp->prim_emit = gen3_emit_composite_primitive_affine_source; 3786 tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; 3787 } 3788 } 3789 break; 3790 } 3791 } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) { 3792 if (tmp->mask.transform == NULL) { 3793 if (is_constant_ps(tmp->src.u.gen3.type)) { 3794 if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { 3795#if defined(sse2) && !defined(__x86_64__) 3796 if (sna->cpu_features & SSE2) { 3797 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2; 3798 } else 3799#endif 3800 { 3801 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset; 3802 } 3803 } else { 3804#if defined(sse2) && !defined(__x86_64__) 3805 if (sna->cpu_features & SSE2) { 3806 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2; 3807 } else 3808#endif 3809 { 3810 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask; 3811 } 3812 } 3813 } else if (tmp->src.transform == NULL) { 3814#if defined(sse2) && !defined(__x86_64__) 3815 if (sna->cpu_features & SSE2) { 3816 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2; 3817 } else 3818#endif 3819 { 3820 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask; 3821 } 3822 } else if (tmp->src.is_affine) { 3823 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 3824 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 3825#if defined(sse2) && !defined(__x86_64__) 3826 if (sna->cpu_features & SSE2) { 3827 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2; 3828 } else 3829#endif 3830 { 3831 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask; 3832 } 3833 } 3834 } 3835 } 3836 3837 tmp->floats_per_vertex = 2; 3838 if (!is_constant_ps(tmp->src.u.gen3.type)) 3839 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4; 3840 if (!is_constant_ps(tmp->mask.u.gen3.type)) 3841 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4; 3842 DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__, 3843 !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0, 3844 !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0, 3845 tmp->floats_per_vertex, 3846 tmp->prim_emit != gen3_emit_composite_primitive)); 3847 tmp->floats_per_rect = 3 * tmp->floats_per_vertex; 3848 3849 tmp->blt = gen3_render_composite_blt; 3850 tmp->box = gen3_render_composite_box; 3851 tmp->boxes = gen3_render_composite_boxes__blt; 3852 if (tmp->emit_boxes) { 3853 tmp->boxes = gen3_render_composite_boxes; 3854 tmp->thread_boxes = gen3_render_composite_boxes__thread; 3855 } 3856 tmp->done = gen3_render_composite_done; 3857 3858 if (!kgem_check_bo(&sna->kgem, 3859 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 3860 NULL)) { 3861 kgem_submit(&sna->kgem); 3862 if (!kgem_check_bo(&sna->kgem, 3863 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 3864 NULL)) 3865 goto cleanup_mask; 3866 } 3867 3868 gen3_align_vertex(sna, tmp); 3869 gen3_emit_composite_state(sna, tmp); 3870 return true; 3871 3872cleanup_mask: 3873 if (tmp->mask.bo) { 3874 kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 3875 tmp->mask.bo = NULL; 3876 } 3877cleanup_src: 3878 if (tmp->src.bo) { 3879 kgem_bo_destroy(&sna->kgem, tmp->src.bo); 3880 tmp->src.bo = NULL; 3881 } 3882cleanup_dst: 3883 if (tmp->redirect.real_bo) { 3884 kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 3885 tmp->redirect.real_bo = NULL; 3886 } 3887fallback: 3888 return (mask == NULL && 3889 sna_blt_composite(sna, 3890 op, src, dst, 3891 src_x, src_y, 3892 dst_x, dst_y, 3893 width, height, 3894 flags | COMPOSITE_FALLBACK, tmp)); 3895} 3896 3897static void 3898gen3_emit_composite_spans_vertex(struct sna *sna, 3899 const struct sna_composite_spans_op *op, 3900 int16_t x, int16_t y, 3901 float opacity) 3902{ 3903 gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 3904 gen3_emit_composite_texcoord(sna, &op->base.src, x, y); 3905 OUT_VERTEX(opacity); 3906} 3907 3908fastcall static void 3909gen3_emit_composite_spans_primitive_zero(struct sna *sna, 3910 const struct sna_composite_spans_op *op, 3911 const BoxRec *box, 3912 float opacity) 3913{ 3914 float *v = sna->render.vertices + sna->render.vertex_used; 3915 sna->render.vertex_used += 6; 3916 assert(sna->render.vertex_used <= sna->render.vertex_size); 3917 3918 v[0] = op->base.dst.x + box->x2; 3919 v[1] = op->base.dst.y + box->y2; 3920 3921 v[2] = op->base.dst.x + box->x1; 3922 v[3] = v[1]; 3923 3924 v[4] = v[2]; 3925 v[5] = op->base.dst.x + box->y1; 3926} 3927 3928fastcall static void 3929gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op, 3930 const struct sna_opacity_box *b, 3931 int nbox, float *v) 3932{ 3933 do { 3934 v[0] = op->base.dst.x + b->box.x2; 3935 v[1] = op->base.dst.y + b->box.y2; 3936 3937 v[2] = op->base.dst.x + b->box.x1; 3938 v[3] = v[1]; 3939 3940 v[4] = v[2]; 3941 v[5] = op->base.dst.x + b->box.y1; 3942 3943 v += 6; 3944 b++; 3945 } while (--nbox); 3946} 3947 3948fastcall static void 3949gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, 3950 const struct sna_composite_spans_op *op, 3951 const BoxRec *box, 3952 float opacity) 3953{ 3954 float *v = sna->render.vertices + sna->render.vertex_used; 3955 sna->render.vertex_used += 6; 3956 assert(sna->render.vertex_used <= sna->render.vertex_size); 3957 3958 v[0] = box->x2; 3959 v[3] = v[1] = box->y2; 3960 v[4] = v[2] = box->x1; 3961 v[5] = box->y1; 3962} 3963 3964fastcall static void 3965gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op, 3966 const struct sna_opacity_box *b, 3967 int nbox, float *v) 3968{ 3969 do { 3970 v[0] = b->box.x2; 3971 v[3] = v[1] = b->box.y2; 3972 v[4] = v[2] = b->box.x1; 3973 v[5] = b->box.y1; 3974 3975 b++; 3976 v += 6; 3977 } while (--nbox); 3978} 3979 3980fastcall static void 3981gen3_emit_composite_spans_primitive_constant(struct sna *sna, 3982 const struct sna_composite_spans_op *op, 3983 const BoxRec *box, 3984 float opacity) 3985{ 3986 float *v = sna->render.vertices + sna->render.vertex_used; 3987 sna->render.vertex_used += 9; 3988 assert(sna->render.vertex_used <= sna->render.vertex_size); 3989 3990 v[0] = op->base.dst.x + box->x2; 3991 v[6] = v[3] = op->base.dst.x + box->x1; 3992 v[4] = v[1] = op->base.dst.y + box->y2; 3993 v[7] = op->base.dst.y + box->y1; 3994 v[8] = v[5] = v[2] = opacity; 3995} 3996 3997fastcall static void 3998gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op, 3999 const struct sna_opacity_box *b, 4000 int nbox, 4001 float *v) 4002{ 4003 do { 4004 v[0] = op->base.dst.x + b->box.x2; 4005 v[6] = v[3] = op->base.dst.x + b->box.x1; 4006 v[4] = v[1] = op->base.dst.y + b->box.y2; 4007 v[7] = op->base.dst.y + b->box.y1; 4008 v[8] = v[5] = v[2] = b->alpha; 4009 4010 v += 9; 4011 b++; 4012 } while (--nbox); 4013} 4014 4015fastcall static void 4016gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, 4017 const struct sna_composite_spans_op *op, 4018 const BoxRec *box, 4019 float opacity) 4020{ 4021 float *v = sna->render.vertices + sna->render.vertex_used; 4022 sna->render.vertex_used += 9; 4023 assert(sna->render.vertex_used <= sna->render.vertex_size); 4024 4025 v[0] = box->x2; 4026 v[6] = v[3] = box->x1; 4027 v[4] = v[1] = box->y2; 4028 v[7] = box->y1; 4029 v[8] = v[5] = v[2] = opacity; 4030} 4031 4032fastcall static void 4033gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op, 4034 const struct sna_opacity_box *b, 4035 int nbox, float *v) 4036{ 4037 do { 4038 v[0] = b->box.x2; 4039 v[6] = v[3] = b->box.x1; 4040 v[4] = v[1] = b->box.y2; 4041 v[7] = b->box.y1; 4042 v[8] = v[5] = v[2] = b->alpha; 4043 4044 v += 9; 4045 b++; 4046 } while (--nbox); 4047} 4048 4049fastcall static void 4050gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, 4051 const struct sna_composite_spans_op *op, 4052 const BoxRec *box, 4053 float opacity) 4054{ 4055 float *v = sna->render.vertices + sna->render.vertex_used; 4056 sna->render.vertex_used += 15; 4057 assert(sna->render.vertex_used <= sna->render.vertex_size); 4058 4059 v[0] = op->base.dst.x + box->x2; 4060 v[1] = op->base.dst.y + box->y2; 4061 v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 4062 v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 4063 v[4] = opacity; 4064 4065 v[5] = op->base.dst.x + box->x1; 4066 v[6] = v[1]; 4067 v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 4068 v[8] = v[3]; 4069 v[9] = opacity; 4070 4071 v[10] = v[5]; 4072 v[11] = op->base.dst.y + box->y1; 4073 v[12] = v[7]; 4074 v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 4075 v[14] = opacity; 4076} 4077 4078fastcall static void 4079gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op, 4080 const struct sna_opacity_box *b, 4081 int nbox, 4082 float *v) 4083{ 4084 do { 4085 v[0] = op->base.dst.x + b->box.x2; 4086 v[1] = op->base.dst.y + b->box.y2; 4087 v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; 4088 v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; 4089 v[4] = b->alpha; 4090 4091 v[5] = op->base.dst.x + b->box.x1; 4092 v[6] = v[1]; 4093 v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; 4094 v[8] = v[3]; 4095 v[9] = b->alpha; 4096 4097 v[10] = v[5]; 4098 v[11] = op->base.dst.y + b->box.y1; 4099 v[12] = v[7]; 4100 v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; 4101 v[14] = b->alpha; 4102 4103 v += 15; 4104 b++; 4105 } while (--nbox); 4106} 4107 4108fastcall static void 4109gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, 4110 const struct sna_composite_spans_op *op, 4111 const BoxRec *box, 4112 float opacity) 4113{ 4114 PictTransform *transform = op->base.src.transform; 4115 float *v; 4116 4117 v = sna->render.vertices + sna->render.vertex_used; 4118 sna->render.vertex_used += 15; 4119 assert(sna->render.vertex_used <= sna->render.vertex_size); 4120 4121 v[0] = op->base.dst.x + box->x2; 4122 v[6] = v[1] = op->base.dst.y + box->y2; 4123 v[10] = v[5] = op->base.dst.x + box->x1; 4124 v[11] = op->base.dst.y + box->y1; 4125 v[14] = v[9] = v[4] = opacity; 4126 4127 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 4128 (int)op->base.src.offset[1] + box->y2, 4129 transform, op->base.src.scale, 4130 &v[2], &v[3]); 4131 4132 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4133 (int)op->base.src.offset[1] + box->y2, 4134 transform, op->base.src.scale, 4135 &v[7], &v[8]); 4136 4137 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4138 (int)op->base.src.offset[1] + box->y1, 4139 transform, op->base.src.scale, 4140 &v[12], &v[13]); 4141} 4142 4143fastcall static void 4144gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op, 4145 const struct sna_opacity_box *b, 4146 int nbox, 4147 float *v) 4148{ 4149 PictTransform *transform = op->base.src.transform; 4150 4151 do { 4152 v[0] = op->base.dst.x + b->box.x2; 4153 v[6] = v[1] = op->base.dst.y + b->box.y2; 4154 v[10] = v[5] = op->base.dst.x + b->box.x1; 4155 v[11] = op->base.dst.y + b->box.y1; 4156 v[14] = v[9] = v[4] = b->alpha; 4157 4158 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, 4159 (int)op->base.src.offset[1] + b->box.y2, 4160 transform, op->base.src.scale, 4161 &v[2], &v[3]); 4162 4163 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4164 (int)op->base.src.offset[1] + b->box.y2, 4165 transform, op->base.src.scale, 4166 &v[7], &v[8]); 4167 4168 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4169 (int)op->base.src.offset[1] + b->box.y1, 4170 transform, op->base.src.scale, 4171 &v[12], &v[13]); 4172 v += 15; 4173 b++; 4174 } while (--nbox); 4175} 4176 4177fastcall static void 4178gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, 4179 const struct sna_composite_spans_op *op, 4180 const BoxRec *box, 4181 float opacity) 4182{ 4183 float *v = sna->render.vertices + sna->render.vertex_used; 4184 sna->render.vertex_used += 15; 4185 assert(sna->render.vertex_used <= sna->render.vertex_size); 4186 4187 v[0] = op->base.dst.x + box->x2; 4188 v[1] = op->base.dst.y + box->y2; 4189 v[2] = op->base.src.offset[0] + box->x2; 4190 v[3] = op->base.src.offset[1] + box->y2; 4191 v[4] = opacity; 4192 4193 v[5] = op->base.dst.x + box->x1; 4194 v[6] = v[1]; 4195 v[7] = op->base.src.offset[0] + box->x1; 4196 v[8] = v[3]; 4197 v[9] = opacity; 4198 4199 v[10] = v[5]; 4200 v[11] = op->base.dst.y + box->y1; 4201 v[12] = v[7]; 4202 v[13] = op->base.src.offset[1] + box->y1; 4203 v[14] = opacity; 4204} 4205 4206fastcall static void 4207gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op, 4208 const struct sna_opacity_box *b, 4209 int nbox, 4210 float *v) 4211{ 4212 do { 4213 v[0] = op->base.dst.x + b->box.x2; 4214 v[1] = op->base.dst.y + b->box.y2; 4215 v[2] = op->base.src.offset[0] + b->box.x2; 4216 v[3] = op->base.src.offset[1] + b->box.y2; 4217 v[4] = b->alpha; 4218 4219 v[5] = op->base.dst.x + b->box.x1; 4220 v[6] = v[1]; 4221 v[7] = op->base.src.offset[0] + b->box.x1; 4222 v[8] = v[3]; 4223 v[9] = b->alpha; 4224 4225 v[10] = v[5]; 4226 v[11] = op->base.dst.y + b->box.y1; 4227 v[12] = v[7]; 4228 v[13] = op->base.src.offset[1] + b->box.y1; 4229 v[14] = b->alpha; 4230 4231 v += 15; 4232 b++; 4233 } while (--nbox); 4234} 4235 4236#if defined(sse2) && !defined(__x86_64__) 4237sse2 fastcall static void 4238gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 4239 const struct sna_composite_spans_op *op, 4240 const BoxRec *box, 4241 float opacity) 4242{ 4243 float *v = sna->render.vertices + sna->render.vertex_used; 4244 sna->render.vertex_used += 9; 4245 assert(sna->render.vertex_used <= sna->render.vertex_size); 4246 4247 v[0] = op->base.dst.x + box->x2; 4248 v[6] = v[3] = op->base.dst.x + box->x1; 4249 v[4] = v[1] = op->base.dst.y + box->y2; 4250 v[7] = op->base.dst.y + box->y1; 4251 v[8] = v[5] = v[2] = opacity; 4252} 4253 4254sse2 fastcall static void 4255gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op, 4256 const struct sna_opacity_box *b, 4257 int nbox, 4258 float *v) 4259{ 4260 do { 4261 v[0] = op->base.dst.x + b->box.x2; 4262 v[6] = v[3] = op->base.dst.x + b->box.x1; 4263 v[4] = v[1] = op->base.dst.y + b->box.y2; 4264 v[7] = op->base.dst.y + b->box.y1; 4265 v[8] = v[5] = v[2] = b->alpha; 4266 4267 v += 9; 4268 b++; 4269 } while (--nbox); 4270} 4271 4272sse2 fastcall static void 4273gen3_render_composite_spans_constant_box__sse2(struct sna *sna, 4274 const struct sna_composite_spans_op *op, 4275 const BoxRec *box, float opacity) 4276{ 4277 float *v; 4278 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4279 __FUNCTION__, 4280 op->base.src.offset[0], op->base.src.offset[1], 4281 opacity, 4282 op->base.dst.x, op->base.dst.y, 4283 box->x1, box->y1, 4284 box->x2 - box->x1, 4285 box->y2 - box->y1)); 4286 4287 gen3_get_rectangles(sna, &op->base, 1); 4288 4289 v = sna->render.vertices + sna->render.vertex_used; 4290 sna->render.vertex_used += 9; 4291 assert(sna->render.vertex_used <= sna->render.vertex_size); 4292 4293 v[0] = box->x2; 4294 v[6] = v[3] = box->x1; 4295 v[4] = v[1] = box->y2; 4296 v[7] = box->y1; 4297 v[8] = v[5] = v[2] = opacity; 4298} 4299 4300sse2 fastcall static void 4301gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, 4302 const struct sna_composite_spans_op *op, 4303 const struct sna_opacity_box *box, 4304 int nbox) 4305{ 4306 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4307 __FUNCTION__, nbox, 4308 op->base.src.offset[0], op->base.src.offset[1], 4309 op->base.dst.x, op->base.dst.y)); 4310 4311 sna_vertex_lock(&sna->render); 4312 do { 4313 int nbox_this_time; 4314 float *v; 4315 4316 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4317 assert(nbox_this_time); 4318 nbox -= nbox_this_time; 4319 4320 v = sna->render.vertices + sna->render.vertex_used; 4321 sna->render.vertex_used += nbox_this_time * 9; 4322 assert(sna->render.vertex_used <= sna->render.vertex_size); 4323 4324 sna_vertex_acquire__locked(&sna->render); 4325 sna_vertex_unlock(&sna->render); 4326 4327 do { 4328 v[0] = box->box.x2; 4329 v[6] = v[3] = box->box.x1; 4330 v[4] = v[1] = box->box.y2; 4331 v[7] = box->box.y1; 4332 v[8] = v[5] = v[2] = box->alpha; 4333 v += 9; 4334 box++; 4335 } while (--nbox_this_time); 4336 4337 sna_vertex_lock(&sna->render); 4338 sna_vertex_release__locked(&sna->render); 4339 } while (nbox); 4340 sna_vertex_unlock(&sna->render); 4341} 4342 4343sse2 fastcall static void 4344gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, 4345 const struct sna_composite_spans_op *op, 4346 const BoxRec *box, 4347 float opacity) 4348{ 4349 float *v = sna->render.vertices + sna->render.vertex_used; 4350 sna->render.vertex_used += 9; 4351 assert(sna->render.vertex_used <= sna->render.vertex_size); 4352 4353 v[0] = box->x2; 4354 v[6] = v[3] = box->x1; 4355 v[4] = v[1] = box->y2; 4356 v[7] = box->y1; 4357 v[8] = v[5] = v[2] = opacity; 4358} 4359 4360sse2 fastcall static void 4361gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op, 4362 const struct sna_opacity_box *b, 4363 int nbox, float *v) 4364{ 4365 do { 4366 v[0] = b->box.x2; 4367 v[6] = v[3] = b->box.x1; 4368 v[4] = v[1] = b->box.y2; 4369 v[7] = b->box.y1; 4370 v[8] = v[5] = v[2] = b->alpha; 4371 4372 v += 9; 4373 b++; 4374 } while (--nbox); 4375} 4376 4377sse2 fastcall static void 4378gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 4379 const struct sna_composite_spans_op *op, 4380 const BoxRec *box, 4381 float opacity) 4382{ 4383 float *v = sna->render.vertices + sna->render.vertex_used; 4384 sna->render.vertex_used += 15; 4385 assert(sna->render.vertex_used <= sna->render.vertex_size); 4386 4387 v[0] = op->base.dst.x + box->x2; 4388 v[1] = op->base.dst.y + box->y2; 4389 v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 4390 v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 4391 v[4] = opacity; 4392 4393 v[5] = op->base.dst.x + box->x1; 4394 v[6] = v[1]; 4395 v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 4396 v[8] = v[3]; 4397 v[9] = opacity; 4398 4399 v[10] = v[5]; 4400 v[11] = op->base.dst.y + box->y1; 4401 v[12] = v[7]; 4402 v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 4403 v[14] = opacity; 4404} 4405 4406sse2 fastcall static void 4407gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op, 4408 const struct sna_opacity_box *b, 4409 int nbox, 4410 float *v) 4411{ 4412 do { 4413 v[0] = op->base.dst.x + b->box.x2; 4414 v[1] = op->base.dst.y + b->box.y2; 4415 v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; 4416 v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; 4417 v[4] = b->alpha; 4418 4419 v[5] = op->base.dst.x + b->box.x1; 4420 v[6] = v[1]; 4421 v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; 4422 v[8] = v[3]; 4423 v[9] = b->alpha; 4424 4425 v[10] = v[5]; 4426 v[11] = op->base.dst.y + b->box.y1; 4427 v[12] = v[7]; 4428 v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; 4429 v[14] = b->alpha; 4430 4431 v += 15; 4432 b++; 4433 } while (--nbox); 4434} 4435sse2 fastcall static void 4436gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 4437 const struct sna_composite_spans_op *op, 4438 const BoxRec *box, 4439 float opacity) 4440{ 4441 PictTransform *transform = op->base.src.transform; 4442 float *v; 4443 4444 v = sna->render.vertices + sna->render.vertex_used; 4445 sna->render.vertex_used += 15; 4446 assert(sna->render.vertex_used <= sna->render.vertex_size); 4447 4448 v[0] = op->base.dst.x + box->x2; 4449 v[6] = v[1] = op->base.dst.y + box->y2; 4450 v[10] = v[5] = op->base.dst.x + box->x1; 4451 v[11] = op->base.dst.y + box->y1; 4452 v[14] = v[9] = v[4] = opacity; 4453 4454 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 4455 (int)op->base.src.offset[1] + box->y2, 4456 transform, op->base.src.scale, 4457 &v[2], &v[3]); 4458 4459 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4460 (int)op->base.src.offset[1] + box->y2, 4461 transform, op->base.src.scale, 4462 &v[7], &v[8]); 4463 4464 _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 4465 (int)op->base.src.offset[1] + box->y1, 4466 transform, op->base.src.scale, 4467 &v[12], &v[13]); 4468} 4469 4470sse2 fastcall static void 4471gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op, 4472 const struct sna_opacity_box *b, 4473 int nbox, 4474 float *v) 4475{ 4476 PictTransform *transform = op->base.src.transform; 4477 4478 do { 4479 v[0] = op->base.dst.x + b->box.x2; 4480 v[6] = v[1] = op->base.dst.y + b->box.y2; 4481 v[10] = v[5] = op->base.dst.x + b->box.x1; 4482 v[11] = op->base.dst.y + b->box.y1; 4483 v[14] = v[9] = v[4] = b->alpha; 4484 4485 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, 4486 (int)op->base.src.offset[1] + b->box.y2, 4487 transform, op->base.src.scale, 4488 &v[2], &v[3]); 4489 4490 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4491 (int)op->base.src.offset[1] + b->box.y2, 4492 transform, op->base.src.scale, 4493 &v[7], &v[8]); 4494 4495 _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, 4496 (int)op->base.src.offset[1] + b->box.y1, 4497 transform, op->base.src.scale, 4498 &v[12], &v[13]); 4499 v += 15; 4500 b++; 4501 } while (--nbox); 4502} 4503 4504sse2 fastcall static void 4505gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, 4506 const struct sna_composite_spans_op *op, 4507 const BoxRec *box, 4508 float opacity) 4509{ 4510 float *v = sna->render.vertices + sna->render.vertex_used; 4511 sna->render.vertex_used += 15; 4512 assert(sna->render.vertex_used <= sna->render.vertex_size); 4513 4514 v[0] = op->base.dst.x + box->x2; 4515 v[1] = op->base.dst.y + box->y2; 4516 v[2] = op->base.src.offset[0] + box->x2; 4517 v[3] = op->base.src.offset[1] + box->y2; 4518 v[4] = opacity; 4519 4520 v[5] = op->base.dst.x + box->x1; 4521 v[6] = v[1]; 4522 v[7] = op->base.src.offset[0] + box->x1; 4523 v[8] = v[3]; 4524 v[9] = opacity; 4525 4526 v[10] = v[5]; 4527 v[11] = op->base.dst.y + box->y1; 4528 v[12] = v[7]; 4529 v[13] = op->base.src.offset[1] + box->y1; 4530 v[14] = opacity; 4531} 4532 4533sse2 fastcall static void 4534gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op, 4535 const struct sna_opacity_box *b, 4536 int nbox, 4537 float *v) 4538{ 4539 do { 4540 v[0] = op->base.dst.x + b->box.x2; 4541 v[1] = op->base.dst.y + b->box.y2; 4542 v[2] = op->base.src.offset[0] + b->box.x2; 4543 v[3] = op->base.src.offset[1] + b->box.y2; 4544 v[4] = b->alpha; 4545 4546 v[5] = op->base.dst.x + b->box.x1; 4547 v[6] = v[1]; 4548 v[7] = op->base.src.offset[0] + b->box.x1; 4549 v[8] = v[3]; 4550 v[9] = b->alpha; 4551 4552 v[10] = v[5]; 4553 v[11] = op->base.dst.y + b->box.y1; 4554 v[12] = v[7]; 4555 v[13] = op->base.src.offset[1] + b->box.y1; 4556 v[14] = b->alpha; 4557 4558 v += 15; 4559 b++; 4560 } while (--nbox); 4561} 4562 4563sse2 fastcall static void 4564gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, 4565 const struct sna_composite_spans_op *op, 4566 const BoxRec *box, 4567 float opacity) 4568{ 4569 PictTransform *transform = op->base.src.transform; 4570 float *v = sna->render.vertices + sna->render.vertex_used; 4571 sna->render.vertex_used += 15; 4572 assert(sna->render.vertex_used <= sna->render.vertex_size); 4573 4574 v[0] = op->base.dst.x + box->x2; 4575 v[1] = op->base.dst.y + box->y2; 4576 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 4577 op->base.src.offset[1] + box->y2, 4578 transform, op->base.src.scale, 4579 &v[2], &v[3]); 4580 v[4] = opacity; 4581 4582 v[5] = op->base.dst.x + box->x1; 4583 v[6] = v[1]; 4584 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4585 op->base.src.offset[1] + box->y2, 4586 transform, op->base.src.scale, 4587 &v[7], &v[8]); 4588 v[9] = opacity; 4589 4590 v[10] = v[5]; 4591 v[11] = op->base.dst.y + box->y1; 4592 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4593 op->base.src.offset[1] + box->y1, 4594 transform, op->base.src.scale, 4595 &v[12], &v[13]); 4596 v[14] = opacity; 4597} 4598 4599sse2 fastcall static void 4600gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op, 4601 const struct sna_opacity_box *b, 4602 int nbox, 4603 float *v) 4604{ 4605 PictTransform *transform = op->base.src.transform; 4606 4607 do { 4608 v[0] = op->base.dst.x + b->box.x2; 4609 v[1] = op->base.dst.y + b->box.y2; 4610 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 4611 op->base.src.offset[1] + b->box.y2, 4612 transform, op->base.src.scale, 4613 &v[2], &v[3]); 4614 v[4] = b->alpha; 4615 4616 v[5] = op->base.dst.x + b->box.x1; 4617 v[6] = v[1]; 4618 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4619 op->base.src.offset[1] + b->box.y2, 4620 transform, op->base.src.scale, 4621 &v[7], &v[8]); 4622 v[9] = b->alpha; 4623 4624 v[10] = v[5]; 4625 v[11] = op->base.dst.y + b->box.y1; 4626 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4627 op->base.src.offset[1] + b->box.y1, 4628 transform, op->base.src.scale, 4629 &v[12], &v[13]); 4630 v[14] = b->alpha; 4631 v += 15; 4632 b++; 4633 } while (--nbox); 4634} 4635#endif 4636 4637fastcall static void 4638gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, 4639 const struct sna_composite_spans_op *op, 4640 const BoxRec *box, 4641 float opacity) 4642{ 4643 PictTransform *transform = op->base.src.transform; 4644 float *v = sna->render.vertices + sna->render.vertex_used; 4645 sna->render.vertex_used += 15; 4646 assert(sna->render.vertex_used <= sna->render.vertex_size); 4647 4648 v[0] = op->base.dst.x + box->x2; 4649 v[1] = op->base.dst.y + box->y2; 4650 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 4651 op->base.src.offset[1] + box->y2, 4652 transform, op->base.src.scale, 4653 &v[2], &v[3]); 4654 v[4] = opacity; 4655 4656 v[5] = op->base.dst.x + box->x1; 4657 v[6] = v[1]; 4658 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4659 op->base.src.offset[1] + box->y2, 4660 transform, op->base.src.scale, 4661 &v[7], &v[8]); 4662 v[9] = opacity; 4663 4664 v[10] = v[5]; 4665 v[11] = op->base.dst.y + box->y1; 4666 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 4667 op->base.src.offset[1] + box->y1, 4668 transform, op->base.src.scale, 4669 &v[12], &v[13]); 4670 v[14] = opacity; 4671} 4672 4673fastcall static void 4674gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op, 4675 const struct sna_opacity_box *b, 4676 int nbox, 4677 float *v) 4678{ 4679 PictTransform *transform = op->base.src.transform; 4680 4681 do { 4682 v[0] = op->base.dst.x + b->box.x2; 4683 v[1] = op->base.dst.y + b->box.y2; 4684 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 4685 op->base.src.offset[1] + b->box.y2, 4686 transform, op->base.src.scale, 4687 &v[2], &v[3]); 4688 v[4] = b->alpha; 4689 4690 v[5] = op->base.dst.x + b->box.x1; 4691 v[6] = v[1]; 4692 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4693 op->base.src.offset[1] + b->box.y2, 4694 transform, op->base.src.scale, 4695 &v[7], &v[8]); 4696 v[9] = b->alpha; 4697 4698 v[10] = v[5]; 4699 v[11] = op->base.dst.y + b->box.y1; 4700 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 4701 op->base.src.offset[1] + b->box.y1, 4702 transform, op->base.src.scale, 4703 &v[12], &v[13]); 4704 v[14] = b->alpha; 4705 v += 15; 4706 b++; 4707 } while (--nbox); 4708} 4709 4710fastcall static void 4711gen3_emit_composite_spans_primitive(struct sna *sna, 4712 const struct sna_composite_spans_op *op, 4713 const BoxRec *box, 4714 float opacity) 4715{ 4716 gen3_emit_composite_spans_vertex(sna, op, 4717 box->x2, box->y2, 4718 opacity); 4719 gen3_emit_composite_spans_vertex(sna, op, 4720 box->x1, box->y2, 4721 opacity); 4722 gen3_emit_composite_spans_vertex(sna, op, 4723 box->x1, box->y1, 4724 opacity); 4725} 4726 4727fastcall static void 4728gen3_render_composite_spans_constant_box(struct sna *sna, 4729 const struct sna_composite_spans_op *op, 4730 const BoxRec *box, float opacity) 4731{ 4732 float *v; 4733 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4734 __FUNCTION__, 4735 op->base.src.offset[0], op->base.src.offset[1], 4736 opacity, 4737 op->base.dst.x, op->base.dst.y, 4738 box->x1, box->y1, 4739 box->x2 - box->x1, 4740 box->y2 - box->y1)); 4741 4742 gen3_get_rectangles(sna, &op->base, 1); 4743 4744 v = sna->render.vertices + sna->render.vertex_used; 4745 sna->render.vertex_used += 9; 4746 assert(sna->render.vertex_used <= sna->render.vertex_size); 4747 4748 v[0] = box->x2; 4749 v[6] = v[3] = box->x1; 4750 v[4] = v[1] = box->y2; 4751 v[7] = box->y1; 4752 v[8] = v[5] = v[2] = opacity; 4753} 4754 4755fastcall static void 4756gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, 4757 const struct sna_composite_spans_op *op, 4758 const struct sna_opacity_box *box, 4759 int nbox) 4760{ 4761 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4762 __FUNCTION__, nbox, 4763 op->base.src.offset[0], op->base.src.offset[1], 4764 op->base.dst.x, op->base.dst.y)); 4765 4766 sna_vertex_lock(&sna->render); 4767 do { 4768 int nbox_this_time; 4769 float *v; 4770 4771 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4772 assert(nbox_this_time); 4773 nbox -= nbox_this_time; 4774 4775 v = sna->render.vertices + sna->render.vertex_used; 4776 sna->render.vertex_used += nbox_this_time * 9; 4777 assert(sna->render.vertex_used <= sna->render.vertex_size); 4778 4779 sna_vertex_acquire__locked(&sna->render); 4780 sna_vertex_unlock(&sna->render); 4781 4782 do { 4783 v[0] = box->box.x2; 4784 v[6] = v[3] = box->box.x1; 4785 v[4] = v[1] = box->box.y2; 4786 v[7] = box->box.y1; 4787 v[8] = v[5] = v[2] = box->alpha; 4788 v += 9; 4789 box++; 4790 } while (--nbox_this_time); 4791 4792 sna_vertex_lock(&sna->render); 4793 sna_vertex_release__locked(&sna->render); 4794 } while (nbox); 4795 sna_vertex_unlock(&sna->render); 4796} 4797 4798fastcall static void 4799gen3_render_composite_spans_box(struct sna *sna, 4800 const struct sna_composite_spans_op *op, 4801 const BoxRec *box, float opacity) 4802{ 4803 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 4804 __FUNCTION__, 4805 op->base.src.offset[0], op->base.src.offset[1], 4806 opacity, 4807 op->base.dst.x, op->base.dst.y, 4808 box->x1, box->y1, 4809 box->x2 - box->x1, 4810 box->y2 - box->y1)); 4811 4812 gen3_get_rectangles(sna, &op->base, 1); 4813 op->prim_emit(sna, op, box, opacity); 4814} 4815 4816static void 4817gen3_render_composite_spans_boxes(struct sna *sna, 4818 const struct sna_composite_spans_op *op, 4819 const BoxRec *box, int nbox, 4820 float opacity) 4821{ 4822 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 4823 __FUNCTION__, nbox, 4824 op->base.src.offset[0], op->base.src.offset[1], 4825 opacity, 4826 op->base.dst.x, op->base.dst.y)); 4827 4828 do { 4829 int nbox_this_time; 4830 4831 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4832 nbox -= nbox_this_time; 4833 4834 do { 4835 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 4836 box->x1, box->y1, 4837 box->x2 - box->x1, 4838 box->y2 - box->y1)); 4839 4840 op->prim_emit(sna, op, box++, opacity); 4841 } while (--nbox_this_time); 4842 } while (nbox); 4843} 4844 4845fastcall static void 4846gen3_render_composite_spans_boxes__thread(struct sna *sna, 4847 const struct sna_composite_spans_op *op, 4848 const struct sna_opacity_box *box, 4849 int nbox) 4850{ 4851 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 4852 __FUNCTION__, nbox, 4853 op->base.src.offset[0], op->base.src.offset[1], 4854 op->base.dst.x, op->base.dst.y)); 4855 4856 sna_vertex_lock(&sna->render); 4857 do { 4858 int nbox_this_time; 4859 float *v; 4860 4861 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 4862 assert(nbox_this_time); 4863 nbox -= nbox_this_time; 4864 4865 v = sna->render.vertices + sna->render.vertex_used; 4866 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 4867 assert(sna->render.vertex_used <= sna->render.vertex_size); 4868 4869 sna_vertex_acquire__locked(&sna->render); 4870 sna_vertex_unlock(&sna->render); 4871 4872 op->emit_boxes(op, box, nbox_this_time, v); 4873 box += nbox_this_time; 4874 4875 sna_vertex_lock(&sna->render); 4876 sna_vertex_release__locked(&sna->render); 4877 } while (nbox); 4878 sna_vertex_unlock(&sna->render); 4879} 4880 4881fastcall static void 4882gen3_render_composite_spans_done(struct sna *sna, 4883 const struct sna_composite_spans_op *op) 4884{ 4885 if (sna->render.vertex_offset) 4886 gen3_vertex_flush(sna); 4887 4888 DBG(("%s()\n", __FUNCTION__)); 4889 4890 if (op->base.src.bo) 4891 kgem_bo_destroy(&sna->kgem, op->base.src.bo); 4892 4893 sna_render_composite_redirect_done(sna, &op->base); 4894} 4895 4896static bool 4897gen3_check_composite_spans(struct sna *sna, 4898 uint8_t op, PicturePtr src, PicturePtr dst, 4899 int16_t width, int16_t height, unsigned flags) 4900{ 4901 if (op >= ARRAY_SIZE(gen3_blend_op)) 4902 return false; 4903 4904 if (gen3_composite_fallback(sna, op, src, NULL, dst)) 4905 return false; 4906 4907 if (need_tiling(sna, width, height) && 4908 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 4909 DBG(("%s: fallback, tiled operation not on GPU\n", 4910 __FUNCTION__)); 4911 return false; 4912 } 4913 4914 return true; 4915} 4916 4917static bool 4918gen3_render_composite_spans(struct sna *sna, 4919 uint8_t op, 4920 PicturePtr src, 4921 PicturePtr dst, 4922 int16_t src_x, int16_t src_y, 4923 int16_t dst_x, int16_t dst_y, 4924 int16_t width, int16_t height, 4925 unsigned flags, 4926 struct sna_composite_spans_op *tmp) 4927{ 4928 bool no_offset; 4929 4930 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 4931 src_x, src_y, dst_x, dst_y, width, height)); 4932 4933 assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags)); 4934 4935 if (need_tiling(sna, width, height)) { 4936 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 4937 __FUNCTION__, width, height)); 4938 return sna_tiling_composite_spans(op, src, dst, 4939 src_x, src_y, dst_x, dst_y, 4940 width, height, flags, tmp); 4941 } 4942 4943 if (!gen3_composite_set_target(sna, &tmp->base, dst, 4944 dst_x, dst_y, width, height, 4945 true)) { 4946 DBG(("%s: unable to set render target\n", 4947 __FUNCTION__)); 4948 return false; 4949 } 4950 4951 tmp->base.op = op; 4952 tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format); 4953 tmp->base.src.u.gen3.type = SHADER_TEXTURE; 4954 tmp->base.src.is_affine = true; 4955 DBG(("%s: preparing source\n", __FUNCTION__)); 4956 switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src, 4957 src_x, src_y, 4958 width, height, 4959 dst_x, dst_y, 4960 dst->polyMode == PolyModePrecise)) { 4961 case -1: 4962 goto cleanup_dst; 4963 case 0: 4964 tmp->base.src.u.gen3.type = SHADER_ZERO; 4965 break; 4966 case 1: 4967 gen3_composite_channel_convert(&tmp->base.src); 4968 break; 4969 } 4970 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type)); 4971 4972 if (tmp->base.src.u.gen3.type != SHADER_ZERO) 4973 tmp->base.mask.u.gen3.type = SHADER_OPACITY; 4974 4975 no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0; 4976 tmp->box = gen3_render_composite_spans_box; 4977 tmp->boxes = gen3_render_composite_spans_boxes; 4978 tmp->thread_boxes = gen3_render_composite_spans_boxes__thread; 4979 tmp->done = gen3_render_composite_spans_done; 4980 tmp->prim_emit = gen3_emit_composite_spans_primitive; 4981 switch (tmp->base.src.u.gen3.type) { 4982 case SHADER_NONE: 4983 assert(0); 4984 case SHADER_ZERO: 4985 if (no_offset) { 4986 tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset; 4987 tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes; 4988 } else { 4989 tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; 4990 tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes; 4991 } 4992 break; 4993 case SHADER_BLACK: 4994 case SHADER_WHITE: 4995 case SHADER_CONSTANT: 4996 if (no_offset) { 4997#if defined(sse2) && !defined(__x86_64__) 4998 if (sna->cpu_features & SSE2) { 4999 tmp->box = gen3_render_composite_spans_constant_box__sse2; 5000 tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes; 5001 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset; 5002 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes; 5003 } else 5004#endif 5005 { 5006 tmp->box = gen3_render_composite_spans_constant_box; 5007 tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes; 5008 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset; 5009 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes; 5010 } 5011 } else { 5012#if defined(sse2) && !defined(__x86_64__) 5013 if (sna->cpu_features & SSE2) { 5014 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2; 5015 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes; 5016 } else 5017#endif 5018 { 5019 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; 5020 tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes; 5021 } 5022 } 5023 break; 5024 case SHADER_LINEAR: 5025 case SHADER_RADIAL: 5026 if (tmp->base.src.transform == NULL) { 5027#if defined(sse2) && !defined(__x86_64__) 5028 if (sna->cpu_features & SSE2) { 5029 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2; 5030 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes; 5031 } else 5032#endif 5033 { 5034 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; 5035 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes; 5036 } 5037 } else if (tmp->base.src.is_affine) { 5038 tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2]; 5039#if defined(sse2) && !defined(__x86_64__) 5040 if (sna->cpu_features & SSE2) { 5041 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2; 5042 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes; 5043 } else 5044#endif 5045 { 5046 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; 5047 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes; 5048 } 5049 } 5050 break; 5051 case SHADER_TEXTURE: 5052 if (tmp->base.src.transform == NULL) { 5053#if defined(sse2) && !defined(__x86_64__) 5054 if (sna->cpu_features & SSE2) { 5055 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2; 5056 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes; 5057 } else 5058#endif 5059 { 5060 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; 5061 tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes; 5062 } 5063 } else if (tmp->base.src.is_affine) { 5064 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 5065 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 5066#if defined(sse2) && !defined(__x86_64__) 5067 if (sna->cpu_features & SSE2) { 5068 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2; 5069 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes; 5070 } else 5071#endif 5072 { 5073 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; 5074 tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes; 5075 } 5076 } 5077 break; 5078 } 5079 if (tmp->emit_boxes == NULL) 5080 tmp->thread_boxes = NULL; 5081 5082 tmp->base.mask.bo = NULL; 5083 5084 tmp->base.floats_per_vertex = 2; 5085 if (!is_constant_ps(tmp->base.src.u.gen3.type)) 5086 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 5087 tmp->base.floats_per_vertex += 5088 tmp->base.mask.u.gen3.type == SHADER_OPACITY; 5089 tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; 5090 5091 if (!kgem_check_bo(&sna->kgem, 5092 tmp->base.dst.bo, tmp->base.src.bo, 5093 NULL)) { 5094 kgem_submit(&sna->kgem); 5095 if (!kgem_check_bo(&sna->kgem, 5096 tmp->base.dst.bo, tmp->base.src.bo, 5097 NULL)) 5098 goto cleanup_src; 5099 } 5100 5101 gen3_align_vertex(sna, &tmp->base); 5102 gen3_emit_composite_state(sna, &tmp->base); 5103 return true; 5104 5105cleanup_src: 5106 if (tmp->base.src.bo) 5107 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 5108cleanup_dst: 5109 if (tmp->base.redirect.real_bo) 5110 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 5111 return false; 5112} 5113 5114static void 5115gen3_emit_video_state(struct sna *sna, 5116 struct sna_video *video, 5117 struct sna_video_frame *frame, 5118 PixmapPtr pixmap, 5119 struct kgem_bo *dst_bo, 5120 int width, int height, 5121 bool bilinear) 5122{ 5123 struct gen3_render_state *state = &sna->render_state.gen3; 5124 uint32_t id, ms3, rewind; 5125 5126 gen3_emit_target(sna, dst_bo, width, height, 5127 sna_format_for_depth(pixmap->drawable.depth)); 5128 5129 /* XXX share with composite? Is it worth the effort? */ 5130 if ((state->last_shader & (1<<31)) == 0) { 5131 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 5132 I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 5133 2); 5134 OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT)); 5135 OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | 5136 S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | 5137 S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | 5138 S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | 5139 S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | 5140 S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | 5141 S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | 5142 S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); 5143 OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | 5144 (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | 5145 S6_COLOR_WRITE_ENABLE); 5146 5147 state->last_blend = 0; 5148 state->floats_per_vertex = 4; 5149 } 5150 5151 if (!is_planar_fourcc(frame->id)) { 5152 rewind = sna->kgem.nbatch; 5153 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); 5154 OUT_BATCH(0x0000001); /* constant 0 */ 5155 /* constant 0: brightness/contrast */ 5156 OUT_BATCH_F(video->brightness / 128.0); 5157 OUT_BATCH_F(video->contrast / 255.0); 5158 OUT_BATCH_F(0.0); 5159 OUT_BATCH_F(0.0); 5160 if (state->last_constants && 5161 memcmp(&sna->kgem.batch[state->last_constants], 5162 &sna->kgem.batch[rewind], 5163 6*sizeof(uint32_t)) == 0) 5164 sna->kgem.nbatch = rewind; 5165 else 5166 state->last_constants = rewind; 5167 5168 rewind = sna->kgem.nbatch; 5169 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); 5170 OUT_BATCH(0x00000001); 5171 OUT_BATCH(SS2_COLORSPACE_CONVERSION | 5172 (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5173 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5174 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5175 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5176 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5177 SS3_NORMALIZED_COORDS); 5178 OUT_BATCH(0x00000000); 5179 if (state->last_sampler && 5180 memcmp(&sna->kgem.batch[state->last_sampler], 5181 &sna->kgem.batch[rewind], 5182 5*sizeof(uint32_t)) == 0) 5183 sna->kgem.nbatch = rewind; 5184 else 5185 state->last_sampler = rewind; 5186 5187 OUT_BATCH(_3DSTATE_MAP_STATE | 3); 5188 OUT_BATCH(0x00000001); /* texture map #1 */ 5189 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5190 frame->bo, 5191 I915_GEM_DOMAIN_SAMPLER << 16, 5192 0)); 5193 5194 ms3 = MAPSURF_422; 5195 switch (frame->id) { 5196 case FOURCC_YUY2: 5197 ms3 |= MT_422_YCRCB_NORMAL; 5198 break; 5199 case FOURCC_UYVY: 5200 ms3 |= MT_422_YCRCB_SWAPY; 5201 break; 5202 } 5203 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; 5204 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; 5205 OUT_BATCH(ms3); 5206 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5207 5208 id = 1<<31 | 1<<1 | !!video->brightness; 5209 if (state->last_shader != id) { 5210 state->last_shader = id; 5211 id = sna->kgem.nbatch++; 5212 5213 gen3_fs_dcl(FS_S0); 5214 gen3_fs_dcl(FS_T0); 5215 gen3_fs_texld(FS_OC, FS_S0, FS_T0); 5216 if (video->brightness != 0) { 5217 gen3_fs_add(FS_OC, 5218 gen3_fs_operand_reg(FS_OC), 5219 gen3_fs_operand(FS_C0, X, X, X, ZERO)); 5220 } 5221 5222 sna->kgem.batch[id] = 5223 _3DSTATE_PIXEL_SHADER_PROGRAM | 5224 (sna->kgem.nbatch - id - 2); 5225 } 5226 } else { 5227 /* For the planar formats, we set up three samplers -- 5228 * one for each plane, in a Y8 format. Because I 5229 * couldn't get the special PLANAR_TO_PACKED 5230 * shader setup to work, I did the manual pixel shader: 5231 * 5232 * y' = y - .0625 5233 * u' = u - .5 5234 * v' = v - .5; 5235 * 5236 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 5237 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 5238 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 5239 * 5240 * register assignment: 5241 * r0 = (y',u',v',0) 5242 * r1 = (y,y,y,y) 5243 * r2 = (u,u,u,u) 5244 * r3 = (v,v,v,v) 5245 * OC = (r,g,b,1) 5246 */ 5247 rewind = sna->kgem.nbatch; 5248 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); 5249 OUT_BATCH(0x000001f); /* constants 0-4 */ 5250 /* constant 0: normalization offsets */ 5251 OUT_BATCH_F(-0.0625); 5252 OUT_BATCH_F(-0.5); 5253 OUT_BATCH_F(-0.5); 5254 OUT_BATCH_F(0.0); 5255 /* constant 1: r coefficients */ 5256 OUT_BATCH_F(1.1643); 5257 OUT_BATCH_F(0.0); 5258 OUT_BATCH_F(1.5958); 5259 OUT_BATCH_F(0.0); 5260 /* constant 2: g coefficients */ 5261 OUT_BATCH_F(1.1643); 5262 OUT_BATCH_F(-0.39173); 5263 OUT_BATCH_F(-0.81290); 5264 OUT_BATCH_F(0.0); 5265 /* constant 3: b coefficients */ 5266 OUT_BATCH_F(1.1643); 5267 OUT_BATCH_F(2.017); 5268 OUT_BATCH_F(0.0); 5269 OUT_BATCH_F(0.0); 5270 /* constant 4: brightness/contrast */ 5271 OUT_BATCH_F(video->brightness / 128.0); 5272 OUT_BATCH_F(video->contrast / 255.0); 5273 OUT_BATCH_F(0.0); 5274 OUT_BATCH_F(0.0); 5275 if (state->last_constants && 5276 memcmp(&sna->kgem.batch[state->last_constants], 5277 &sna->kgem.batch[rewind], 5278 22*sizeof(uint32_t)) == 0) 5279 sna->kgem.nbatch = rewind; 5280 else 5281 state->last_constants = rewind; 5282 5283 rewind = sna->kgem.nbatch; 5284 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); 5285 OUT_BATCH(0x00000007); 5286 /* sampler 0 */ 5287 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5288 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5289 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5290 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5291 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5292 SS3_NORMALIZED_COORDS); 5293 OUT_BATCH(0x00000000); 5294 /* sampler 1 */ 5295 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5296 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5297 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5298 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5299 (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5300 SS3_NORMALIZED_COORDS); 5301 OUT_BATCH(0x00000000); 5302 /* sampler 2 */ 5303 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | 5304 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); 5305 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | 5306 (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | 5307 (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | 5308 SS3_NORMALIZED_COORDS); 5309 OUT_BATCH(0x00000000); 5310 if (state->last_sampler && 5311 memcmp(&sna->kgem.batch[state->last_sampler], 5312 &sna->kgem.batch[rewind], 5313 11*sizeof(uint32_t)) == 0) 5314 sna->kgem.nbatch = rewind; 5315 else 5316 state->last_sampler = rewind; 5317 5318 OUT_BATCH(_3DSTATE_MAP_STATE | 9); 5319 OUT_BATCH(0x00000007); 5320 5321 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5322 frame->bo, 5323 I915_GEM_DOMAIN_SAMPLER << 16, 5324 0)); 5325 5326 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5327 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; 5328 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; 5329 OUT_BATCH(ms3); 5330 /* check to see if Y has special pitch than normal 5331 * double u/v pitch, e.g i915 XvMC hw requires at 5332 * least 1K alignment, so Y pitch might 5333 * be same as U/V's.*/ 5334 if (frame->pitch[1]) 5335 OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT); 5336 else 5337 OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT); 5338 5339 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5340 frame->bo, 5341 I915_GEM_DOMAIN_SAMPLER << 16, 5342 frame->UBufOffset)); 5343 5344 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5345 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; 5346 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; 5347 OUT_BATCH(ms3); 5348 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5349 5350 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 5351 frame->bo, 5352 I915_GEM_DOMAIN_SAMPLER << 16, 5353 frame->VBufOffset)); 5354 5355 ms3 = MAPSURF_8BIT | MT_8BIT_I8; 5356 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; 5357 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; 5358 OUT_BATCH(ms3); 5359 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); 5360 5361 id = 1<<31 | 2<<1 | !!video->brightness; 5362 if (state->last_shader != id) { 5363 state->last_shader = id; 5364 id = sna->kgem.nbatch++; 5365 5366 /* Declare samplers */ 5367 gen3_fs_dcl(FS_S0); /* Y */ 5368 gen3_fs_dcl(FS_S1); /* U */ 5369 gen3_fs_dcl(FS_S2); /* V */ 5370 gen3_fs_dcl(FS_T0); /* normalized coords */ 5371 5372 /* Load samplers to temporaries. */ 5373 gen3_fs_texld(FS_R1, FS_S0, FS_T0); 5374 gen3_fs_texld(FS_R2, FS_S1, FS_T0); 5375 gen3_fs_texld(FS_R3, FS_S2, FS_T0); 5376 5377 /* Move the sampled YUV data in R[123] to the first 5378 * 3 channels of R0. 5379 */ 5380 gen3_fs_mov_masked(FS_R0, MASK_X, 5381 gen3_fs_operand_reg(FS_R1)); 5382 gen3_fs_mov_masked(FS_R0, MASK_Y, 5383 gen3_fs_operand_reg(FS_R2)); 5384 gen3_fs_mov_masked(FS_R0, MASK_Z, 5385 gen3_fs_operand_reg(FS_R3)); 5386 5387 /* Normalize the YUV data */ 5388 gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0), 5389 gen3_fs_operand_reg(FS_C0)); 5390 /* dot-product the YUV data in R0 by the vectors of 5391 * coefficients for calculating R, G, and B, storing 5392 * the results in the R, G, or B channels of the output 5393 * color. The OC results are implicitly clamped 5394 * at the end of the program. 5395 */ 5396 gen3_fs_dp3(FS_OC, MASK_X, 5397 gen3_fs_operand_reg(FS_R0), 5398 gen3_fs_operand_reg(FS_C1)); 5399 gen3_fs_dp3(FS_OC, MASK_Y, 5400 gen3_fs_operand_reg(FS_R0), 5401 gen3_fs_operand_reg(FS_C2)); 5402 gen3_fs_dp3(FS_OC, MASK_Z, 5403 gen3_fs_operand_reg(FS_R0), 5404 gen3_fs_operand_reg(FS_C3)); 5405 /* Set alpha of the output to 1.0, by wiring W to 1 5406 * and not actually using the source. 5407 */ 5408 gen3_fs_mov_masked(FS_OC, MASK_W, 5409 gen3_fs_operand_one()); 5410 5411 if (video->brightness != 0) { 5412 gen3_fs_add(FS_OC, 5413 gen3_fs_operand_reg(FS_OC), 5414 gen3_fs_operand(FS_C4, X, X, X, ZERO)); 5415 } 5416 5417 sna->kgem.batch[id] = 5418 _3DSTATE_PIXEL_SHADER_PROGRAM | 5419 (sna->kgem.nbatch - id - 2); 5420 } 5421 } 5422} 5423 5424static void 5425gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo) 5426{ 5427 kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 5428 5429 if (!kgem_check_batch(&sna->kgem, 120) || 5430 !kgem_check_reloc(&sna->kgem, 4) || 5431 !kgem_check_exec(&sna->kgem, 2)) { 5432 _kgem_submit(&sna->kgem); 5433 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 5434 } 5435 5436 if (sna->render_state.gen3.need_invariant) 5437 gen3_emit_invariant(sna); 5438} 5439 5440static int 5441gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) 5442{ 5443 int size = floats_per_vertex * 3; 5444 int rem = batch_space(sna) - 1; 5445 5446 if (size * want > rem) 5447 want = rem / size; 5448 5449 return want; 5450} 5451 5452static bool 5453gen3_render_video(struct sna *sna, 5454 struct sna_video *video, 5455 struct sna_video_frame *frame, 5456 RegionPtr dstRegion, 5457 PixmapPtr pixmap) 5458{ 5459 struct sna_pixmap *priv = sna_pixmap(pixmap); 5460 const BoxRec *pbox = region_rects(dstRegion); 5461 int nbox = region_num_rects(dstRegion); 5462 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 5463 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 5464 int src_width = frame->src.x2 - frame->src.x1; 5465 int src_height = frame->src.y2 - frame->src.y1; 5466 float src_offset_x, src_offset_y; 5467 float src_scale_x, src_scale_y; 5468 int pix_xoff, pix_yoff; 5469 struct kgem_bo *dst_bo; 5470 bool bilinear; 5471 int copy = 0; 5472 5473 DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, 5474 src_width, src_height, frame->width, frame->height, dst_width, dst_height)); 5475 5476 assert(priv->gpu_bo); 5477 dst_bo = priv->gpu_bo; 5478 5479 bilinear = src_width != dst_width || src_height != dst_height; 5480 5481 src_scale_x = (float)src_width / dst_width / frame->width; 5482 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 5483 5484 src_scale_y = (float)src_height / dst_height / frame->height; 5485 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 5486 DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", 5487 __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); 5488 5489 if (too_large(pixmap->drawable.width, pixmap->drawable.height) || 5490 !gen3_check_pitch_3d(dst_bo)) { 5491 int bpp = pixmap->drawable.bitsPerPixel; 5492 5493 if (too_large(dst_width, dst_height)) 5494 return false; 5495 5496 dst_bo = kgem_create_2d(&sna->kgem, 5497 dst_width, dst_height, bpp, 5498 kgem_choose_tiling(&sna->kgem, 5499 I915_TILING_X, 5500 dst_width, dst_height, bpp), 5501 0); 5502 if (!dst_bo) 5503 return false; 5504 5505 pix_xoff = -dstRegion->extents.x1; 5506 pix_yoff = -dstRegion->extents.y1; 5507 copy = 1; 5508 } else { 5509 pix_xoff = pix_yoff = 0; 5510 dst_width = pixmap->drawable.width; 5511 dst_height = pixmap->drawable.height; 5512 } 5513 5514 gen3_video_get_batch(sna, dst_bo); 5515 gen3_emit_video_state(sna, video, frame, pixmap, 5516 dst_bo, dst_width, dst_height, bilinear); 5517 do { 5518 int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); 5519 if (nbox_this_time == 0) { 5520 gen3_video_get_batch(sna, dst_bo); 5521 gen3_emit_video_state(sna, video, frame, pixmap, 5522 dst_bo, dst_width, dst_height, bilinear); 5523 nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); 5524 assert(nbox_this_time); 5525 } 5526 nbox -= nbox_this_time; 5527 5528 OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); 5529 do { 5530 int box_x1 = pbox->x1; 5531 int box_y1 = pbox->y1; 5532 int box_x2 = pbox->x2; 5533 int box_y2 = pbox->y2; 5534 5535 pbox++; 5536 5537 DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", 5538 __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, 5539 box_x1 * src_scale_x + src_offset_x, 5540 box_y1 * src_scale_y + src_offset_y, 5541 box_x2 * src_scale_x + src_offset_x, 5542 box_y2 * src_scale_y + src_offset_y)); 5543 5544 /* bottom right */ 5545 OUT_BATCH_F(box_x2 + pix_xoff); 5546 OUT_BATCH_F(box_y2 + pix_yoff); 5547 OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x); 5548 OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); 5549 5550 /* bottom left */ 5551 OUT_BATCH_F(box_x1 + pix_xoff); 5552 OUT_BATCH_F(box_y2 + pix_yoff); 5553 OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); 5554 OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); 5555 5556 /* top left */ 5557 OUT_BATCH_F(box_x1 + pix_xoff); 5558 OUT_BATCH_F(box_y1 + pix_yoff); 5559 OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); 5560 OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y); 5561 } while (--nbox_this_time); 5562 } while (nbox); 5563 5564 if (copy) { 5565 sna_blt_copy_boxes(sna, GXcopy, 5566 dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, 5567 priv->gpu_bo, 0, 0, 5568 pixmap->drawable.bitsPerPixel, 5569 region_rects(dstRegion), 5570 region_num_rects(dstRegion)); 5571 5572 kgem_bo_destroy(&sna->kgem, dst_bo); 5573 } 5574 5575 if (!DAMAGE_IS_ALL(priv->gpu_damage)) 5576 sna_damage_add(&priv->gpu_damage, dstRegion); 5577 5578 return true; 5579} 5580 5581static void 5582gen3_render_copy_setup_source(struct sna_composite_channel *channel, 5583 const DrawableRec *draw, 5584 struct kgem_bo *bo) 5585{ 5586 int i; 5587 5588 channel->u.gen3.type = SHADER_TEXTURE; 5589 channel->filter = gen3_filter(PictFilterNearest); 5590 channel->repeat = gen3_texture_repeat(RepeatNone); 5591 channel->width = draw->width; 5592 channel->height = draw->height; 5593 channel->scale[0] = 1.f/draw->width; 5594 channel->scale[1] = 1.f/draw->height; 5595 channel->offset[0] = 0; 5596 channel->offset[1] = 0; 5597 5598 channel->pict_format = sna_format_for_depth(draw->depth); 5599 if (!gen3_composite_channel_set_format(channel, channel->pict_format)) { 5600 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { 5601 if (gen3_tex_formats[i].xfmt == channel->pict_format) { 5602 channel->card_format = gen3_tex_formats[i].card_fmt; 5603 channel->rb_reversed = gen3_tex_formats[i].rb_reversed; 5604 channel->alpha_fixup = true; 5605 break; 5606 } 5607 } 5608 } 5609 assert(channel->card_format); 5610 5611 channel->bo = bo; 5612 channel->is_affine = 1; 5613} 5614 5615static bool 5616gen3_render_copy_boxes(struct sna *sna, uint8_t alu, 5617 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 5618 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 5619 const BoxRec *box, int n, unsigned flags) 5620{ 5621 struct sna_composite_op tmp; 5622 5623#if NO_COPY_BOXES 5624 if (!sna_blt_compare_depth(src, dst)) 5625 return false; 5626 5627 return sna_blt_copy_boxes(sna, alu, 5628 src_bo, src_dx, src_dy, 5629 dst_bo, dst_dx, dst_dy, 5630 dst->bitsPerPixel, 5631 box, n); 5632#endif 5633 5634 DBG(("%s (%d, %d)->(%d, %d) x %d\n", 5635 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 5636 5637 if (sna_blt_compare_depth(src, dst) && 5638 sna_blt_copy_boxes(sna, alu, 5639 src_bo, src_dx, src_dy, 5640 dst_bo, dst_dx, dst_dy, 5641 dst->bitsPerPixel, 5642 box, n)) 5643 return true; 5644 5645 if (!(alu == GXcopy || alu == GXclear) || 5646 src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 5647 src_bo->pitch > MAX_3D_PITCH || 5648 too_large(src->width, src->height)) { 5649fallback_blt: 5650 if (!kgem_bo_can_blt(&sna->kgem, src_bo) || 5651 !kgem_bo_can_blt(&sna->kgem, dst_bo)) 5652 return false; 5653 5654 return sna_blt_copy_boxes_fallback(sna, alu, 5655 src, src_bo, src_dx, src_dy, 5656 dst, dst_bo, dst_dx, dst_dy, 5657 box, n); 5658 } 5659 5660 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 5661 kgem_submit(&sna->kgem); 5662 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 5663 goto fallback_blt; 5664 } 5665 5666 memset(&tmp, 0, sizeof(tmp)); 5667 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; 5668 5669 tmp.dst.pixmap = (PixmapPtr)dst; 5670 tmp.dst.width = dst->width; 5671 tmp.dst.height = dst->height; 5672 tmp.dst.format = sna_format_for_depth(dst->depth); 5673 tmp.dst.bo = dst_bo; 5674 tmp.dst.x = tmp.dst.y = 0; 5675 tmp.damage = NULL; 5676 5677 sna_render_composite_redirect_init(&tmp); 5678 if (too_large(tmp.dst.width, tmp.dst.height) || 5679 dst_bo->pitch > MAX_3D_PITCH) { 5680 BoxRec extents = box[0]; 5681 int i; 5682 5683 for (i = 1; i < n; i++) { 5684 if (box[i].x1 < extents.x1) 5685 extents.x1 = box[i].x1; 5686 if (box[i].y1 < extents.y1) 5687 extents.y1 = box[i].y1; 5688 5689 if (box[i].x2 > extents.x2) 5690 extents.x2 = box[i].x2; 5691 if (box[i].y2 > extents.y2) 5692 extents.y2 = box[i].y2; 5693 } 5694 if (!sna_render_composite_redirect(sna, &tmp, 5695 extents.x1 + dst_dx, 5696 extents.y1 + dst_dy, 5697 extents.x2 - extents.x1, 5698 extents.y2 - extents.y1, 5699 n > 1)) 5700 goto fallback_tiled; 5701 } 5702 5703 gen3_render_copy_setup_source(&tmp.src, src, src_bo); 5704 5705 tmp.floats_per_vertex = 4; 5706 tmp.floats_per_rect = 12; 5707 tmp.mask.bo = NULL; 5708 tmp.mask.u.gen3.type = SHADER_NONE; 5709 5710 dst_dx += tmp.dst.x; 5711 dst_dy += tmp.dst.y; 5712 tmp.dst.x = tmp.dst.y = 0; 5713 5714 gen3_align_vertex(sna, &tmp); 5715 gen3_emit_composite_state(sna, &tmp); 5716 5717 do { 5718 int n_this_time; 5719 5720 n_this_time = gen3_get_rectangles(sna, &tmp, n); 5721 n -= n_this_time; 5722 5723 do { 5724 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 5725 box->x1 + src_dx, box->y1 + src_dy, 5726 box->x1 + dst_dx, box->y1 + dst_dy, 5727 box->x2 - box->x1, box->y2 - box->y1)); 5728 OUT_VERTEX(box->x2 + dst_dx); 5729 OUT_VERTEX(box->y2 + dst_dy); 5730 OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 5731 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 5732 5733 OUT_VERTEX(box->x1 + dst_dx); 5734 OUT_VERTEX(box->y2 + dst_dy); 5735 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 5736 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 5737 5738 OUT_VERTEX(box->x1 + dst_dx); 5739 OUT_VERTEX(box->y1 + dst_dy); 5740 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 5741 OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 5742 5743 box++; 5744 } while (--n_this_time); 5745 } while (n); 5746 5747 gen3_vertex_flush(sna); 5748 sna_render_composite_redirect_done(sna, &tmp); 5749 return true; 5750 5751fallback_tiled: 5752 return sna_tiling_copy_boxes(sna, alu, 5753 src, src_bo, src_dx, src_dy, 5754 dst, dst_bo, dst_dx, dst_dy, 5755 box, n); 5756} 5757 5758static void 5759gen3_render_copy_blt(struct sna *sna, 5760 const struct sna_copy_op *op, 5761 int16_t sx, int16_t sy, 5762 int16_t w, int16_t h, 5763 int16_t dx, int16_t dy) 5764{ 5765 gen3_get_rectangles(sna, &op->base, 1); 5766 5767 OUT_VERTEX(dx+w); 5768 OUT_VERTEX(dy+h); 5769 OUT_VERTEX((sx+w)*op->base.src.scale[0]); 5770 OUT_VERTEX((sy+h)*op->base.src.scale[1]); 5771 5772 OUT_VERTEX(dx); 5773 OUT_VERTEX(dy+h); 5774 OUT_VERTEX(sx*op->base.src.scale[0]); 5775 OUT_VERTEX((sy+h)*op->base.src.scale[1]); 5776 5777 OUT_VERTEX(dx); 5778 OUT_VERTEX(dy); 5779 OUT_VERTEX(sx*op->base.src.scale[0]); 5780 OUT_VERTEX(sy*op->base.src.scale[1]); 5781} 5782 5783static void 5784gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 5785{ 5786 if (sna->render.vertex_offset) 5787 gen3_vertex_flush(sna); 5788} 5789 5790static bool 5791gen3_render_copy(struct sna *sna, uint8_t alu, 5792 PixmapPtr src, struct kgem_bo *src_bo, 5793 PixmapPtr dst, struct kgem_bo *dst_bo, 5794 struct sna_copy_op *tmp) 5795{ 5796#if NO_COPY 5797 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 5798 return false; 5799 5800 return sna_blt_copy(sna, alu, 5801 src_bo, dst_bo, 5802 dst->drawable.bitsPerPixel, 5803 tmp); 5804#endif 5805 5806 /* Prefer to use the BLT */ 5807 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 5808 sna_blt_copy(sna, alu, 5809 src_bo, dst_bo, 5810 dst->drawable.bitsPerPixel, 5811 tmp)) 5812 return true; 5813 5814 /* Must use the BLT if we can't RENDER... */ 5815 if (!(alu == GXcopy || alu == GXclear) || 5816 too_large(src->drawable.width, src->drawable.height) || 5817 too_large(dst->drawable.width, dst->drawable.height) || 5818 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) { 5819fallback: 5820 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 5821 return false; 5822 5823 return sna_blt_copy(sna, alu, src_bo, dst_bo, 5824 dst->drawable.bitsPerPixel, 5825 tmp); 5826 } 5827 5828 tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; 5829 5830 tmp->base.dst.pixmap = dst; 5831 tmp->base.dst.width = dst->drawable.width; 5832 tmp->base.dst.height = dst->drawable.height; 5833 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 5834 tmp->base.dst.bo = dst_bo; 5835 5836 gen3_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo); 5837 5838 tmp->base.floats_per_vertex = 4; 5839 tmp->base.floats_per_rect = 12; 5840 tmp->base.mask.bo = NULL; 5841 tmp->base.mask.u.gen3.type = SHADER_NONE; 5842 5843 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 5844 kgem_submit(&sna->kgem); 5845 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 5846 goto fallback; 5847 } 5848 5849 tmp->blt = gen3_render_copy_blt; 5850 tmp->done = gen3_render_copy_done; 5851 5852 gen3_align_vertex(sna, &tmp->base); 5853 gen3_emit_composite_state(sna, &tmp->base); 5854 return true; 5855} 5856 5857static bool 5858gen3_render_fill_boxes_try_blt(struct sna *sna, 5859 CARD8 op, PictFormat format, 5860 const xRenderColor *color, 5861 const DrawableRec *dst, struct kgem_bo *dst_bo, 5862 const BoxRec *box, int n) 5863{ 5864 uint8_t alu; 5865 uint32_t pixel; 5866 5867 if (dst_bo->tiling == I915_TILING_Y) { 5868 DBG(("%s: y-tiling, can't blit\n", __FUNCTION__)); 5869 assert(!too_large(dst->width, dst->height)); 5870 return false; 5871 } 5872 5873 if (op > PictOpSrc) 5874 return false; 5875 5876 if (op == PictOpClear) { 5877 alu = GXclear; 5878 pixel = 0; 5879 } else if (!sna_get_pixel_from_rgba(&pixel, 5880 color->red, 5881 color->green, 5882 color->blue, 5883 color->alpha, 5884 format)) 5885 return false; 5886 else 5887 alu = GXcopy; 5888 5889 return sna_blt_fill_boxes(sna, alu, 5890 dst_bo, dst->bitsPerPixel, 5891 pixel, box, n); 5892} 5893 5894static inline bool prefer_fill_blt(struct sna *sna) 5895{ 5896#if PREFER_BLT_FILL 5897 return true; 5898#else 5899 return sna->kgem.mode != KGEM_RENDER; 5900#endif 5901} 5902 5903static bool 5904gen3_render_fill_boxes(struct sna *sna, 5905 CARD8 op, 5906 PictFormat format, 5907 const xRenderColor *color, 5908 const DrawableRec *dst, struct kgem_bo *dst_bo, 5909 const BoxRec *box, int n) 5910{ 5911 struct sna_composite_op tmp; 5912 uint32_t pixel; 5913 5914 if (op >= ARRAY_SIZE(gen3_blend_op)) { 5915 DBG(("%s: fallback due to unhandled blend op: %d\n", 5916 __FUNCTION__, op)); 5917 return false; 5918 } 5919 5920#if NO_FILL_BOXES 5921 return gen3_render_fill_boxes_try_blt(sna, op, format, color, 5922 dst, dst_bo, 5923 box, n); 5924#endif 5925 5926 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 5927 __FUNCTION__, op, (int)format, 5928 color->red, color->green, color->blue, color->alpha)); 5929 5930 if (too_large(dst->width, dst->height) || 5931 dst_bo->pitch > MAX_3D_PITCH || 5932 !gen3_check_dst_format(format)) { 5933 DBG(("%s: try blt, too large or incompatible destination\n", 5934 __FUNCTION__)); 5935 if (gen3_render_fill_boxes_try_blt(sna, op, format, color, 5936 dst, dst_bo, 5937 box, n)) 5938 return true; 5939 5940 if (!gen3_check_dst_format(format)) 5941 return false; 5942 5943 return sna_tiling_fill_boxes(sna, op, format, color, 5944 dst, dst_bo, box, n); 5945 } 5946 5947 if (prefer_fill_blt(sna) && 5948 gen3_render_fill_boxes_try_blt(sna, op, format, color, 5949 dst, dst_bo, 5950 box, n)) 5951 return true; 5952 5953 if (op == PictOpClear) { 5954 pixel = 0; 5955 } else { 5956 if (!sna_get_pixel_from_rgba(&pixel, 5957 color->red, 5958 color->green, 5959 color->blue, 5960 color->alpha, 5961 PICT_a8r8g8b8)) { 5962 assert(0); 5963 return false; 5964 } 5965 } 5966 DBG(("%s: using shader for op=%d, format=%08x, pixel=%08x\n", 5967 __FUNCTION__, op, (int)format, pixel)); 5968 5969 tmp.op = op; 5970 tmp.dst.pixmap = (PixmapPtr)dst; 5971 tmp.dst.width = dst->width; 5972 tmp.dst.height = dst->height; 5973 tmp.dst.format = format; 5974 tmp.dst.bo = dst_bo; 5975 tmp.damage = NULL; 5976 tmp.floats_per_vertex = 2; 5977 tmp.floats_per_rect = 6; 5978 tmp.rb_reversed = 0; 5979 tmp.has_component_alpha = 0; 5980 tmp.need_magic_ca_pass = false; 5981 5982 gen3_init_solid(&tmp.src, pixel); 5983 tmp.mask.bo = NULL; 5984 tmp.mask.u.gen3.type = SHADER_NONE; 5985 tmp.u.gen3.num_constants = 0; 5986 5987 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 5988 kgem_submit(&sna->kgem); 5989 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 5990 return false; 5991 } 5992 5993 gen3_align_vertex(sna, &tmp); 5994 gen3_emit_composite_state(sna, &tmp); 5995 5996 do { 5997 int n_this_time; 5998 5999 n_this_time = gen3_get_rectangles(sna, &tmp, n); 6000 n -= n_this_time; 6001 6002 do { 6003 DBG((" (%d, %d), (%d, %d): %x\n", 6004 box->x1, box->y1, box->x2, box->y2, pixel)); 6005 OUT_VERTEX(box->x2); 6006 OUT_VERTEX(box->y2); 6007 OUT_VERTEX(box->x1); 6008 OUT_VERTEX(box->y2); 6009 OUT_VERTEX(box->x1); 6010 OUT_VERTEX(box->y1); 6011 box++; 6012 } while (--n_this_time); 6013 } while (n); 6014 6015 gen3_vertex_flush(sna); 6016 return true; 6017} 6018 6019static void 6020gen3_render_fill_op_blt(struct sna *sna, 6021 const struct sna_fill_op *op, 6022 int16_t x, int16_t y, int16_t w, int16_t h) 6023{ 6024 gen3_get_rectangles(sna, &op->base, 1); 6025 6026 OUT_VERTEX(x+w); 6027 OUT_VERTEX(y+h); 6028 OUT_VERTEX(x); 6029 OUT_VERTEX(y+h); 6030 OUT_VERTEX(x); 6031 OUT_VERTEX(y); 6032} 6033 6034fastcall static void 6035gen3_render_fill_op_box(struct sna *sna, 6036 const struct sna_fill_op *op, 6037 const BoxRec *box) 6038{ 6039 gen3_get_rectangles(sna, &op->base, 1); 6040 6041 OUT_VERTEX(box->x2); 6042 OUT_VERTEX(box->y2); 6043 OUT_VERTEX(box->x1); 6044 OUT_VERTEX(box->y2); 6045 OUT_VERTEX(box->x1); 6046 OUT_VERTEX(box->y1); 6047} 6048 6049fastcall static void 6050gen3_render_fill_op_boxes(struct sna *sna, 6051 const struct sna_fill_op *op, 6052 const BoxRec *box, 6053 int nbox) 6054{ 6055 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 6056 box->x1, box->y1, box->x2, box->y2, nbox)); 6057 6058 do { 6059 int nbox_this_time; 6060 6061 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); 6062 nbox -= nbox_this_time; 6063 6064 do { 6065 OUT_VERTEX(box->x2); 6066 OUT_VERTEX(box->y2); 6067 OUT_VERTEX(box->x1); 6068 OUT_VERTEX(box->y2); 6069 OUT_VERTEX(box->x1); 6070 OUT_VERTEX(box->y1); 6071 box++; 6072 } while (--nbox_this_time); 6073 } while (nbox); 6074} 6075 6076static void 6077gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 6078{ 6079 if (sna->render.vertex_offset) 6080 gen3_vertex_flush(sna); 6081} 6082 6083static bool 6084gen3_render_fill(struct sna *sna, uint8_t alu, 6085 PixmapPtr dst, struct kgem_bo *dst_bo, 6086 uint32_t color, unsigned flags, 6087 struct sna_fill_op *tmp) 6088{ 6089#if NO_FILL 6090 return sna_blt_fill(sna, alu, 6091 dst_bo, dst->drawable.bitsPerPixel, 6092 color, 6093 tmp); 6094#endif 6095 6096 /* Prefer to use the BLT if already engaged */ 6097 if (prefer_fill_blt(sna) && 6098 sna_blt_fill(sna, alu, 6099 dst_bo, dst->drawable.bitsPerPixel, 6100 color, 6101 tmp)) 6102 return true; 6103 6104 /* Must use the BLT if we can't RENDER... */ 6105 if (!(alu == GXcopy || alu == GXclear) || 6106 too_large(dst->drawable.width, dst->drawable.height) || 6107 dst_bo->pitch > MAX_3D_PITCH) 6108 return sna_blt_fill(sna, alu, 6109 dst_bo, dst->drawable.bitsPerPixel, 6110 color, 6111 tmp); 6112 6113 if (alu == GXclear) 6114 color = 0; 6115 6116 tmp->base.op = color == 0 ? PictOpClear : PictOpSrc; 6117 tmp->base.dst.pixmap = dst; 6118 tmp->base.dst.width = dst->drawable.width; 6119 tmp->base.dst.height = dst->drawable.height; 6120 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 6121 tmp->base.dst.bo = dst_bo; 6122 tmp->base.floats_per_vertex = 2; 6123 tmp->base.floats_per_rect = 6; 6124 tmp->base.need_magic_ca_pass = 0; 6125 tmp->base.has_component_alpha = 0; 6126 tmp->base.rb_reversed = 0; 6127 6128 gen3_init_solid(&tmp->base.src, 6129 sna_rgba_for_color(color, dst->drawable.depth)); 6130 tmp->base.mask.bo = NULL; 6131 tmp->base.mask.u.gen3.type = SHADER_NONE; 6132 tmp->base.u.gen3.num_constants = 0; 6133 6134 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 6135 kgem_submit(&sna->kgem); 6136 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 6137 return false; 6138 } 6139 6140 tmp->blt = gen3_render_fill_op_blt; 6141 tmp->box = gen3_render_fill_op_box; 6142 tmp->boxes = gen3_render_fill_op_boxes; 6143 tmp->points = NULL; 6144 tmp->done = gen3_render_fill_op_done; 6145 6146 gen3_align_vertex(sna, &tmp->base); 6147 gen3_emit_composite_state(sna, &tmp->base); 6148 return true; 6149} 6150 6151static bool 6152gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 6153 uint32_t color, 6154 int16_t x1, int16_t y1, int16_t x2, int16_t y2, 6155 uint8_t alu) 6156{ 6157 BoxRec box; 6158 6159 box.x1 = x1; 6160 box.y1 = y1; 6161 box.x2 = x2; 6162 box.y2 = y2; 6163 6164 return sna_blt_fill_boxes(sna, alu, 6165 bo, dst->drawable.bitsPerPixel, 6166 color, &box, 1); 6167} 6168 6169static bool 6170gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 6171 uint32_t color, 6172 int16_t x1, int16_t y1, 6173 int16_t x2, int16_t y2, 6174 uint8_t alu) 6175{ 6176 struct sna_composite_op tmp; 6177 6178#if NO_FILL_ONE 6179 return gen3_render_fill_one_try_blt(sna, dst, bo, color, 6180 x1, y1, x2, y2, alu); 6181#endif 6182 6183 /* Prefer to use the BLT if already engaged */ 6184 if (prefer_fill_blt(sna) && 6185 gen3_render_fill_one_try_blt(sna, dst, bo, color, 6186 x1, y1, x2, y2, alu)) 6187 return true; 6188 6189 /* Must use the BLT if we can't RENDER... */ 6190 if (!(alu == GXcopy || alu == GXclear) || 6191 too_large(dst->drawable.width, dst->drawable.height) || 6192 bo->pitch > MAX_3D_PITCH) 6193 return gen3_render_fill_one_try_blt(sna, dst, bo, color, 6194 x1, y1, x2, y2, alu); 6195 6196 if (alu == GXclear) 6197 color = 0; 6198 6199 tmp.op = color == 0 ? PictOpClear : PictOpSrc; 6200 tmp.dst.pixmap = dst; 6201 tmp.dst.width = dst->drawable.width; 6202 tmp.dst.height = dst->drawable.height; 6203 tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 6204 tmp.dst.bo = bo; 6205 tmp.floats_per_vertex = 2; 6206 tmp.floats_per_rect = 6; 6207 tmp.need_magic_ca_pass = 0; 6208 tmp.has_component_alpha = 0; 6209 tmp.rb_reversed = 0; 6210 6211 gen3_init_solid(&tmp.src, 6212 sna_rgba_for_color(color, dst->drawable.depth)); 6213 tmp.mask.bo = NULL; 6214 tmp.mask.u.gen3.type = SHADER_NONE; 6215 tmp.u.gen3.num_constants = 0; 6216 6217 if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 6218 kgem_submit(&sna->kgem); 6219 6220 if (gen3_render_fill_one_try_blt(sna, dst, bo, color, 6221 x1, y1, x2, y2, alu)) 6222 return true; 6223 6224 if (!kgem_check_bo(&sna->kgem, bo, NULL)) 6225 return false; 6226 } 6227 6228 gen3_align_vertex(sna, &tmp); 6229 gen3_emit_composite_state(sna, &tmp); 6230 gen3_get_rectangles(sna, &tmp, 1); 6231 DBG((" (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color)); 6232 OUT_VERTEX(x2); 6233 OUT_VERTEX(y2); 6234 OUT_VERTEX(x1); 6235 OUT_VERTEX(y2); 6236 OUT_VERTEX(x1); 6237 OUT_VERTEX(y1); 6238 gen3_vertex_flush(sna); 6239 6240 return true; 6241} 6242 6243static void gen3_render_flush(struct sna *sna) 6244{ 6245 gen3_vertex_close(sna); 6246 6247 assert(sna->render.vertex_reloc[0] == 0); 6248 assert(sna->render.vertex_offset == 0); 6249} 6250 6251static void 6252gen3_render_fini(struct sna *sna) 6253{ 6254} 6255 6256const char *gen3_render_init(struct sna *sna, const char *backend) 6257{ 6258 struct sna_render *render = &sna->render; 6259 6260#if !NO_COMPOSITE 6261 render->composite = gen3_render_composite; 6262 render->prefer_gpu |= PREFER_GPU_RENDER; 6263#endif 6264#if !NO_COMPOSITE_SPANS 6265 render->check_composite_spans = gen3_check_composite_spans; 6266 render->composite_spans = gen3_render_composite_spans; 6267 render->prefer_gpu |= PREFER_GPU_SPANS; 6268#endif 6269 6270 render->video = gen3_render_video; 6271 6272 render->copy_boxes = gen3_render_copy_boxes; 6273 render->copy = gen3_render_copy; 6274 6275 render->fill_boxes = gen3_render_fill_boxes; 6276 render->fill = gen3_render_fill; 6277 render->fill_one = gen3_render_fill_one; 6278 6279 render->reset = gen3_render_reset; 6280 render->flush = gen3_render_flush; 6281 render->fini = gen3_render_fini; 6282 6283 render->max_3d_size = MAX_3D_SIZE; 6284 render->max_3d_pitch = MAX_3D_PITCH; 6285 6286 sna->kgem.retire = gen3_render_retire; 6287 sna->kgem.expire = gen3_render_expire; 6288 return "Alviso (gen3)"; 6289} 6290