1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_render.h" 34#include "sna_render_inline.h" 35#include "gen4_vertex.h" 36 37#ifndef sse2 38#define sse2 39#endif 40 41void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op) 42{ 43 int vertex_index; 44 45 assert(op->floats_per_vertex); 46 assert(op->floats_per_rect == 3*op->floats_per_vertex); 47 assert(sna->render.vertex_used <= sna->render.vertex_size); 48 49 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 50 if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { 51 DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", 52 __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); 53 if (gen4_vertex_finish(sna) < 2*op->floats_per_rect) { 54 kgem_submit(&sna->kgem); 55 _kgem_set_mode(&sna->kgem, KGEM_RENDER); 56 } 57 assert(sna->render.vertex_used < sna->render.vertex_size); 58 59 vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 60 assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size); 61 } 62 63 sna->render.vertex_index = vertex_index; 64 sna->render.vertex_used = vertex_index * op->floats_per_vertex; 65} 66 67void gen4_vertex_flush(struct sna *sna) 68{ 69 DBG(("%s[%x] = %d\n", __FUNCTION__, 70 4*sna->render.vertex_offset, 71 sna->render.vertex_index - sna->render.vertex_start)); 72 73 assert(sna->render.vertex_offset); 74 assert(sna->render.vertex_offset <= sna->kgem.nbatch); 75 assert(sna->render.vertex_index > sna->render.vertex_start); 76 assert(sna->render.vertex_used <= sna->render.vertex_size); 77 78 sna->kgem.batch[sna->render.vertex_offset] = 79 sna->render.vertex_index - sna->render.vertex_start; 80 sna->render.vertex_offset = 0; 81} 82 83int gen4_vertex_finish(struct sna *sna) 84{ 85 struct kgem_bo *bo; 86 unsigned int i; 87 unsigned hint, size; 88 89 DBG(("%s: used=%d / %d\n", __FUNCTION__, 90 sna->render.vertex_used, sna->render.vertex_size)); 91 assert(sna->render.vertex_offset == 0); 92 assert(sna->render.vertex_used); 93 assert(sna->render.vertex_used <= sna->render.vertex_size); 94 95 sna_vertex_wait__locked(&sna->render); 96 97 /* Note: we only need dword alignment (currently) */ 98 99 hint = CREATE_GTT_MAP; 100 101 bo = sna->render.vbo; 102 if (bo) { 103 for (i = 0; i < sna->render.nvertex_reloc; i++) { 104 DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, 105 i, sna->render.vertex_reloc[i])); 106 107 sna->kgem.batch[sna->render.vertex_reloc[i]] = 108 kgem_add_reloc(&sna->kgem, 109 sna->render.vertex_reloc[i], bo, 110 I915_GEM_DOMAIN_VERTEX << 16, 111 0); 112 } 113 114 assert(!sna->render.active); 115 sna->render.nvertex_reloc = 0; 116 sna->render.vertex_used = 0; 117 sna->render.vertex_index = 0; 118 sna->render.vbo = NULL; 119 sna->render.vb_id = 0; 120 121 kgem_bo_destroy(&sna->kgem, bo); 122 hint |= CREATE_CACHED | CREATE_NO_THROTTLE; 123 } else { 124 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 125 assert(sna->render.vertices == sna->render.vertex_data); 126 if (kgem_is_idle(&sna->kgem)) 127 return 0; 128 } 129 130 size = 256*1024; 131 assert(!sna->render.active); 132 sna->render.vertices = NULL; 133 sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 134 while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { 135 size /= 2; 136 sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 137 } 138 if (sna->render.vbo == NULL) 139 sna->render.vbo = kgem_create_linear(&sna->kgem, 140 256*1024, CREATE_GTT_MAP); 141 if (sna->render.vbo && 142 kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) 143 sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); 144 if (sna->render.vertices == NULL) { 145 if (sna->render.vbo) { 146 kgem_bo_destroy(&sna->kgem, sna->render.vbo); 147 sna->render.vbo = NULL; 148 } 149 sna->render.vertices = sna->render.vertex_data; 150 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 151 return 0; 152 } 153 154 if (sna->render.vertex_used) { 155 DBG(("%s: copying initial buffer x %d to handle=%d\n", 156 __FUNCTION__, 157 sna->render.vertex_used, 158 sna->render.vbo->handle)); 159 assert(sizeof(float)*sna->render.vertex_used <= 160 __kgem_bo_size(sna->render.vbo)); 161 memcpy(sna->render.vertices, 162 sna->render.vertex_data, 163 sizeof(float)*sna->render.vertex_used); 164 } 165 166 size = __kgem_bo_size(sna->render.vbo)/4; 167 if (size >= UINT16_MAX) 168 size = UINT16_MAX - 1; 169 170 DBG(("%s: create vbo handle=%d, size=%d floats [%d bytes]\n", 171 __FUNCTION__, sna->render.vbo->handle, size, __kgem_bo_size(sna->render.vbo))); 172 assert(size > sna->render.vertex_used); 173 174 sna->render.vertex_size = size; 175 return size - sna->render.vertex_used; 176} 177 178void gen4_vertex_close(struct sna *sna) 179{ 180 struct kgem_bo *bo, *free_bo = NULL; 181 unsigned int i, delta = 0; 182 183 assert(sna->render.vertex_offset == 0); 184 if (!sna->render.vb_id) 185 return; 186 187 DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", 188 __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, 189 sna->render.vb_id, sna->render.nvertex_reloc)); 190 191 assert(!sna->render.active); 192 193 bo = sna->render.vbo; 194 if (bo) { 195 if (sna->render.vertex_size - sna->render.vertex_used < 64) { 196 DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); 197 sna->render.vbo = NULL; 198 sna->render.vertices = sna->render.vertex_data; 199 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 200 free_bo = bo; 201 } else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) { 202 DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); 203 sna->render.vertices = 204 kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); 205 if (sna->render.vertices == NULL) { 206 sna->render.vbo = NULL; 207 sna->render.vertices = sna->render.vertex_data; 208 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 209 free_bo = bo; 210 } 211 212 } 213 } else { 214 int size; 215 216 size = sna->kgem.nbatch; 217 size += sna->kgem.batch_size - sna->kgem.surface; 218 size += sna->render.vertex_used; 219 220 if (size <= 1024) { 221 DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, 222 sna->render.vertex_used, sna->kgem.nbatch)); 223 assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface); 224 memcpy(sna->kgem.batch + sna->kgem.nbatch, 225 sna->render.vertex_data, 226 sna->render.vertex_used * 4); 227 delta = sna->kgem.nbatch * 4; 228 bo = NULL; 229 sna->kgem.nbatch += sna->render.vertex_used; 230 } else { 231 size = 256 * 1024; 232 do { 233 bo = kgem_create_linear(&sna->kgem, size, 234 CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED); 235 } while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used); 236 237 sna->render.vertices = NULL; 238 if (bo) 239 sna->render.vertices = kgem_bo_map(&sna->kgem, bo); 240 if (sna->render.vertices != NULL) { 241 DBG(("%s: new vbo: %d / %d\n", __FUNCTION__, 242 sna->render.vertex_used, __kgem_bo_size(bo)/4)); 243 244 assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo)); 245 memcpy(sna->render.vertices, 246 sna->render.vertex_data, 247 sizeof(float)*sna->render.vertex_used); 248 249 size = __kgem_bo_size(bo)/4; 250 if (size >= UINT16_MAX) 251 size = UINT16_MAX - 1; 252 253 sna->render.vbo = bo; 254 sna->render.vertex_size = size; 255 } else { 256 DBG(("%s: tmp vbo: %d\n", __FUNCTION__, 257 sna->render.vertex_used)); 258 259 if (bo) 260 kgem_bo_destroy(&sna->kgem, bo); 261 262 bo = kgem_create_linear(&sna->kgem, 263 4*sna->render.vertex_used, 264 CREATE_NO_THROTTLE); 265 if (bo && !kgem_bo_write(&sna->kgem, bo, 266 sna->render.vertex_data, 267 4*sna->render.vertex_used)) { 268 kgem_bo_destroy(&sna->kgem, bo); 269 bo = NULL; 270 } 271 272 assert(sna->render.vbo == NULL); 273 sna->render.vertices = sna->render.vertex_data; 274 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 275 free_bo = bo; 276 } 277 } 278 } 279 280 assert(sna->render.nvertex_reloc); 281 for (i = 0; i < sna->render.nvertex_reloc; i++) { 282 DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, 283 i, sna->render.vertex_reloc[i])); 284 285 sna->kgem.batch[sna->render.vertex_reloc[i]] = 286 kgem_add_reloc(&sna->kgem, 287 sna->render.vertex_reloc[i], bo, 288 I915_GEM_DOMAIN_VERTEX << 16, 289 delta); 290 } 291 sna->render.nvertex_reloc = 0; 292 sna->render.vb_id = 0; 293 294 if (sna->render.vbo == NULL) { 295 assert(!sna->render.active); 296 sna->render.vertex_used = 0; 297 sna->render.vertex_index = 0; 298 assert(sna->render.vertices == sna->render.vertex_data); 299 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 300 } 301 302 if (free_bo) 303 kgem_bo_destroy(&sna->kgem, free_bo); 304} 305 306/* specialised vertex emission routines */ 307 308#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ 309#define OUT_VERTEX_F(v) vertex_emit(sna, v) 310 311force_inline static float 312compute_linear(const struct sna_composite_channel *channel, 313 int16_t x, int16_t y) 314{ 315 return ((x+channel->offset[0]) * channel->u.linear.dx + 316 (y+channel->offset[1]) * channel->u.linear.dy + 317 channel->u.linear.offset); 318} 319 320sse2 inline static void 321emit_texcoord(struct sna *sna, 322 const struct sna_composite_channel *channel, 323 int16_t x, int16_t y) 324{ 325 if (channel->is_solid) { 326 OUT_VERTEX_F(0.5); 327 return; 328 } 329 330 x += channel->offset[0]; 331 y += channel->offset[1]; 332 333 if (channel->is_affine) { 334 float s, t; 335 336 sna_get_transformed_coordinates(x, y, 337 channel->transform, 338 &s, &t); 339 OUT_VERTEX_F(s * channel->scale[0]); 340 OUT_VERTEX_F(t * channel->scale[1]); 341 } else { 342 float s, t, w; 343 344 sna_get_transformed_coordinates_3d(x, y, 345 channel->transform, 346 &s, &t, &w); 347 OUT_VERTEX_F(s * channel->scale[0]); 348 OUT_VERTEX_F(t * channel->scale[1]); 349 OUT_VERTEX_F(w); 350 } 351} 352 353sse2 force_inline static void 354emit_vertex(struct sna *sna, 355 const struct sna_composite_op *op, 356 int16_t srcX, int16_t srcY, 357 int16_t mskX, int16_t mskY, 358 int16_t dstX, int16_t dstY) 359{ 360 OUT_VERTEX(dstX, dstY); 361 emit_texcoord(sna, &op->src, srcX, srcY); 362} 363 364sse2 fastcall static void 365emit_primitive(struct sna *sna, 366 const struct sna_composite_op *op, 367 const struct sna_composite_rectangles *r) 368{ 369 emit_vertex(sna, op, 370 r->src.x + r->width, r->src.y + r->height, 371 r->mask.x + r->width, r->mask.y + r->height, 372 r->dst.x + r->width, r->dst.y + r->height); 373 emit_vertex(sna, op, 374 r->src.x, r->src.y + r->height, 375 r->mask.x, r->mask.y + r->height, 376 r->dst.x, r->dst.y + r->height); 377 emit_vertex(sna, op, 378 r->src.x, r->src.y, 379 r->mask.x, r->mask.y, 380 r->dst.x, r->dst.y); 381} 382 383sse2 inline static float * 384vemit_texcoord(float *v, 385 const struct sna_composite_channel *channel, 386 int16_t x, int16_t y) 387{ 388 if (channel->is_solid) { 389 *v++ = 0.5; 390 } else { 391 x += channel->offset[0]; 392 y += channel->offset[1]; 393 394 if (channel->is_affine) { 395 float s, t; 396 397 sna_get_transformed_coordinates(x, y, 398 channel->transform, 399 &s, &t); 400 *v++ = s * channel->scale[0]; 401 *v++ = t * channel->scale[1]; 402 } else { 403 float s, t, w; 404 405 sna_get_transformed_coordinates_3d(x, y, 406 channel->transform, 407 &s, &t, &w); 408 *v++ = s * channel->scale[0]; 409 *v++ = t * channel->scale[1]; 410 *v++ = w; 411 } 412 } 413 414 return v; 415} 416 417sse2 force_inline static float * 418vemit_vertex(float *v, 419 const struct sna_composite_op *op, 420 int16_t x, int16_t y) 421{ 422 *v++ = pack_2s(x, y); 423 return vemit_texcoord(v, &op->src, x, y); 424} 425 426sse2 fastcall static void 427emit_boxes(const struct sna_composite_op *op, 428 const BoxRec *box, int nbox, 429 float *v) 430{ 431 do { 432 v = vemit_vertex(v, op, box->x2, box->y2); 433 v = vemit_vertex(v, op, box->x1, box->y2); 434 v = vemit_vertex(v, op, box->x1, box->y1); 435 436 box++; 437 } while (--nbox); 438} 439 440sse2 force_inline static void 441emit_vertex_mask(struct sna *sna, 442 const struct sna_composite_op *op, 443 int16_t srcX, int16_t srcY, 444 int16_t mskX, int16_t mskY, 445 int16_t dstX, int16_t dstY) 446{ 447 OUT_VERTEX(dstX, dstY); 448 emit_texcoord(sna, &op->src, srcX, srcY); 449 emit_texcoord(sna, &op->mask, mskX, mskY); 450} 451 452sse2 fastcall static void 453emit_primitive_mask(struct sna *sna, 454 const struct sna_composite_op *op, 455 const struct sna_composite_rectangles *r) 456{ 457 emit_vertex_mask(sna, op, 458 r->src.x + r->width, r->src.y + r->height, 459 r->mask.x + r->width, r->mask.y + r->height, 460 r->dst.x + r->width, r->dst.y + r->height); 461 emit_vertex_mask(sna, op, 462 r->src.x, r->src.y + r->height, 463 r->mask.x, r->mask.y + r->height, 464 r->dst.x, r->dst.y + r->height); 465 emit_vertex_mask(sna, op, 466 r->src.x, r->src.y, 467 r->mask.x, r->mask.y, 468 r->dst.x, r->dst.y); 469} 470 471sse2 force_inline static float * 472vemit_vertex_mask(float *v, 473 const struct sna_composite_op *op, 474 int16_t x, int16_t y) 475{ 476 *v++ = pack_2s(x, y); 477 v = vemit_texcoord(v, &op->src, x, y); 478 v = vemit_texcoord(v, &op->mask, x, y); 479 return v; 480} 481 482sse2 fastcall static void 483emit_boxes_mask(const struct sna_composite_op *op, 484 const BoxRec *box, int nbox, 485 float *v) 486{ 487 do { 488 v = vemit_vertex_mask(v, op, box->x2, box->y2); 489 v = vemit_vertex_mask(v, op, box->x1, box->y2); 490 v = vemit_vertex_mask(v, op, box->x1, box->y1); 491 492 box++; 493 } while (--nbox); 494} 495 496 497sse2 fastcall static void 498emit_primitive_solid(struct sna *sna, 499 const struct sna_composite_op *op, 500 const struct sna_composite_rectangles *r) 501{ 502 float *v; 503 union { 504 struct sna_coordinate p; 505 float f; 506 } dst; 507 508 assert(op->floats_per_rect == 6); 509 assert((sna->render.vertex_used % 2) == 0); 510 v = sna->render.vertices + sna->render.vertex_used; 511 sna->render.vertex_used += 6; 512 assert(sna->render.vertex_used <= sna->render.vertex_size); 513 514 dst.p.x = r->dst.x + r->width; 515 dst.p.y = r->dst.y + r->height; 516 v[0] = dst.f; 517 dst.p.x = r->dst.x; 518 v[2] = dst.f; 519 dst.p.y = r->dst.y; 520 v[4] = dst.f; 521 522 v[5] = v[3] = v[1] = .5; 523} 524 525sse2 fastcall static void 526emit_boxes_solid(const struct sna_composite_op *op, 527 const BoxRec *box, int nbox, 528 float *v) 529{ 530 do { 531 union { 532 struct sna_coordinate p; 533 float f; 534 } dst; 535 536 dst.p.x = box->x2; 537 dst.p.y = box->y2; 538 v[0] = dst.f; 539 dst.p.x = box->x1; 540 v[2] = dst.f; 541 dst.p.y = box->y1; 542 v[4] = dst.f; 543 544 v[5] = v[3] = v[1] = .5; 545 box++; 546 v += 6; 547 } while (--nbox); 548} 549 550sse2 fastcall static void 551emit_primitive_linear(struct sna *sna, 552 const struct sna_composite_op *op, 553 const struct sna_composite_rectangles *r) 554{ 555 float *v; 556 union { 557 struct sna_coordinate p; 558 float f; 559 } dst; 560 561 assert(op->floats_per_rect == 6); 562 assert((sna->render.vertex_used % 2) == 0); 563 v = sna->render.vertices + sna->render.vertex_used; 564 sna->render.vertex_used += 6; 565 assert(sna->render.vertex_used <= sna->render.vertex_size); 566 567 dst.p.x = r->dst.x + r->width; 568 dst.p.y = r->dst.y + r->height; 569 v[0] = dst.f; 570 dst.p.x = r->dst.x; 571 v[2] = dst.f; 572 dst.p.y = r->dst.y; 573 v[4] = dst.f; 574 575 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 576 v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 577 v[5] = compute_linear(&op->src, r->src.x, r->src.y); 578} 579 580sse2 fastcall static void 581emit_boxes_linear(const struct sna_composite_op *op, 582 const BoxRec *box, int nbox, 583 float *v) 584{ 585 union { 586 struct sna_coordinate p; 587 float f; 588 } dst; 589 590 do { 591 dst.p.x = box->x2; 592 dst.p.y = box->y2; 593 v[0] = dst.f; 594 dst.p.x = box->x1; 595 v[2] = dst.f; 596 dst.p.y = box->y1; 597 v[4] = dst.f; 598 599 v[1] = compute_linear(&op->src, box->x2, box->y2); 600 v[3] = compute_linear(&op->src, box->x1, box->y2); 601 v[5] = compute_linear(&op->src, box->x1, box->y1); 602 603 v += 6; 604 box++; 605 } while (--nbox); 606} 607 608sse2 fastcall static void 609emit_primitive_identity_source(struct sna *sna, 610 const struct sna_composite_op *op, 611 const struct sna_composite_rectangles *r) 612{ 613 union { 614 struct sna_coordinate p; 615 float f; 616 } dst; 617 float *v; 618 619 assert(op->floats_per_rect == 9); 620 assert((sna->render.vertex_used % 3) == 0); 621 v = sna->render.vertices + sna->render.vertex_used; 622 sna->render.vertex_used += 9; 623 624 dst.p.x = r->dst.x + r->width; 625 dst.p.y = r->dst.y + r->height; 626 v[0] = dst.f; 627 dst.p.x = r->dst.x; 628 v[3] = dst.f; 629 dst.p.y = r->dst.y; 630 v[6] = dst.f; 631 632 v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 633 v[1] = v[4] + r->width * op->src.scale[0]; 634 635 v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 636 v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 637} 638 639sse2 fastcall static void 640emit_boxes_identity_source(const struct sna_composite_op *op, 641 const BoxRec *box, int nbox, 642 float *v) 643{ 644 do { 645 union { 646 struct sna_coordinate p; 647 float f; 648 } dst; 649 650 dst.p.x = box->x2; 651 dst.p.y = box->y2; 652 v[0] = dst.f; 653 dst.p.x = box->x1; 654 v[3] = dst.f; 655 dst.p.y = box->y1; 656 v[6] = dst.f; 657 658 v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 659 v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 660 661 v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 662 v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 663 664 v += 9; 665 box++; 666 } while (--nbox); 667} 668 669sse2 fastcall static void 670emit_primitive_simple_source(struct sna *sna, 671 const struct sna_composite_op *op, 672 const struct sna_composite_rectangles *r) 673{ 674 float *v; 675 union { 676 struct sna_coordinate p; 677 float f; 678 } dst; 679 680 float xx = op->src.transform->matrix[0][0]; 681 float x0 = op->src.transform->matrix[0][2]; 682 float yy = op->src.transform->matrix[1][1]; 683 float y0 = op->src.transform->matrix[1][2]; 684 float sx = op->src.scale[0]; 685 float sy = op->src.scale[1]; 686 int16_t tx = op->src.offset[0]; 687 int16_t ty = op->src.offset[1]; 688 689 assert(op->floats_per_rect == 9); 690 assert((sna->render.vertex_used % 3) == 0); 691 v = sna->render.vertices + sna->render.vertex_used; 692 sna->render.vertex_used += 3*3; 693 694 dst.p.x = r->dst.x + r->width; 695 dst.p.y = r->dst.y + r->height; 696 v[0] = dst.f; 697 v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 698 v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 699 700 dst.p.x = r->dst.x; 701 v[3] = dst.f; 702 v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 703 704 dst.p.y = r->dst.y; 705 v[6] = dst.f; 706 v[8] = ((r->src.y + ty) * yy + y0) * sy; 707} 708 709sse2 fastcall static void 710emit_boxes_simple_source(const struct sna_composite_op *op, 711 const BoxRec *box, int nbox, 712 float *v) 713{ 714 float xx = op->src.transform->matrix[0][0]; 715 float x0 = op->src.transform->matrix[0][2]; 716 float yy = op->src.transform->matrix[1][1]; 717 float y0 = op->src.transform->matrix[1][2]; 718 float sx = op->src.scale[0]; 719 float sy = op->src.scale[1]; 720 int16_t tx = op->src.offset[0]; 721 int16_t ty = op->src.offset[1]; 722 723 do { 724 union { 725 struct sna_coordinate p; 726 float f; 727 } dst; 728 729 dst.p.x = box->x2; 730 dst.p.y = box->y2; 731 v[0] = dst.f; 732 v[1] = ((box->x2 + tx) * xx + x0) * sx; 733 v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 734 735 dst.p.x = box->x1; 736 v[3] = dst.f; 737 v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 738 739 dst.p.y = box->y1; 740 v[6] = dst.f; 741 v[8] = ((box->y1 + ty) * yy + y0) * sy; 742 743 v += 9; 744 box++; 745 } while (--nbox); 746} 747 748sse2 fastcall static void 749emit_primitive_affine_source(struct sna *sna, 750 const struct sna_composite_op *op, 751 const struct sna_composite_rectangles *r) 752{ 753 union { 754 struct sna_coordinate p; 755 float f; 756 } dst; 757 float *v; 758 759 assert(op->floats_per_rect == 9); 760 assert((sna->render.vertex_used % 3) == 0); 761 v = sna->render.vertices + sna->render.vertex_used; 762 sna->render.vertex_used += 9; 763 764 dst.p.x = r->dst.x + r->width; 765 dst.p.y = r->dst.y + r->height; 766 v[0] = dst.f; 767 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, 768 op->src.offset[1] + r->src.y + r->height, 769 op->src.transform, op->src.scale, 770 &v[1], &v[2]); 771 772 dst.p.x = r->dst.x; 773 v[3] = dst.f; 774 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 775 op->src.offset[1] + r->src.y + r->height, 776 op->src.transform, op->src.scale, 777 &v[4], &v[5]); 778 779 dst.p.y = r->dst.y; 780 v[6] = dst.f; 781 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 782 op->src.offset[1] + r->src.y, 783 op->src.transform, op->src.scale, 784 &v[7], &v[8]); 785} 786 787sse2 fastcall static void 788emit_boxes_affine_source(const struct sna_composite_op *op, 789 const BoxRec *box, int nbox, 790 float *v) 791{ 792 do { 793 union { 794 struct sna_coordinate p; 795 float f; 796 } dst; 797 798 dst.p.x = box->x2; 799 dst.p.y = box->y2; 800 v[0] = dst.f; 801 _sna_get_transformed_scaled(op->src.offset[0] + box->x2, 802 op->src.offset[1] + box->y2, 803 op->src.transform, op->src.scale, 804 &v[1], &v[2]); 805 806 dst.p.x = box->x1; 807 v[3] = dst.f; 808 _sna_get_transformed_scaled(op->src.offset[0] + box->x1, 809 op->src.offset[1] + box->y2, 810 op->src.transform, op->src.scale, 811 &v[4], &v[5]); 812 813 dst.p.y = box->y1; 814 v[6] = dst.f; 815 _sna_get_transformed_scaled(op->src.offset[0] + box->x1, 816 op->src.offset[1] + box->y1, 817 op->src.transform, op->src.scale, 818 &v[7], &v[8]); 819 box++; 820 v += 9; 821 } while (--nbox); 822} 823 824sse2 fastcall static void 825emit_primitive_identity_mask(struct sna *sna, 826 const struct sna_composite_op *op, 827 const struct sna_composite_rectangles *r) 828{ 829 union { 830 struct sna_coordinate p; 831 float f; 832 } dst; 833 float msk_x, msk_y; 834 float w, h; 835 float *v; 836 837 msk_x = r->mask.x + op->mask.offset[0]; 838 msk_y = r->mask.y + op->mask.offset[1]; 839 w = r->width; 840 h = r->height; 841 842 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 843 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 844 845 assert(op->floats_per_rect == 12); 846 assert((sna->render.vertex_used % 4) == 0); 847 v = sna->render.vertices + sna->render.vertex_used; 848 sna->render.vertex_used += 12; 849 850 dst.p.x = r->dst.x + r->width; 851 dst.p.y = r->dst.y + r->height; 852 v[0] = dst.f; 853 v[2] = (msk_x + w) * op->mask.scale[0]; 854 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 855 856 dst.p.x = r->dst.x; 857 v[4] = dst.f; 858 v[10] = v[6] = msk_x * op->mask.scale[0]; 859 860 dst.p.y = r->dst.y; 861 v[8] = dst.f; 862 v[11] = msk_y * op->mask.scale[1]; 863 864 v[9] = v[5] = v[1] = .5; 865} 866 867sse2 fastcall static void 868emit_boxes_identity_mask(const struct sna_composite_op *op, 869 const BoxRec *box, int nbox, 870 float *v) 871{ 872 float msk_x = op->mask.offset[0]; 873 float msk_y = op->mask.offset[1]; 874 875 do { 876 union { 877 struct sna_coordinate p; 878 float f; 879 } dst; 880 881 dst.p.x = box->x2; 882 dst.p.y = box->y2; 883 v[0] = dst.f; 884 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 885 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 886 887 dst.p.x = box->x1; 888 v[4] = dst.f; 889 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 890 891 dst.p.y = box->y1; 892 v[8] = dst.f; 893 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 894 895 v[9] = v[5] = v[1] = .5; 896 v += 12; 897 box++; 898 } while (--nbox); 899} 900 901sse2 fastcall static void 902emit_primitive_linear_identity_mask(struct sna *sna, 903 const struct sna_composite_op *op, 904 const struct sna_composite_rectangles *r) 905{ 906 union { 907 struct sna_coordinate p; 908 float f; 909 } dst; 910 float msk_x, msk_y; 911 float w, h; 912 float *v; 913 914 msk_x = r->mask.x + op->mask.offset[0]; 915 msk_y = r->mask.y + op->mask.offset[1]; 916 w = r->width; 917 h = r->height; 918 919 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 920 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 921 922 assert(op->floats_per_rect == 12); 923 assert((sna->render.vertex_used % 4) == 0); 924 v = sna->render.vertices + sna->render.vertex_used; 925 sna->render.vertex_used += 12; 926 927 dst.p.x = r->dst.x + r->width; 928 dst.p.y = r->dst.y + r->height; 929 v[0] = dst.f; 930 v[2] = (msk_x + w) * op->mask.scale[0]; 931 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 932 933 dst.p.x = r->dst.x; 934 v[4] = dst.f; 935 v[10] = v[6] = msk_x * op->mask.scale[0]; 936 937 dst.p.y = r->dst.y; 938 v[8] = dst.f; 939 v[11] = msk_y * op->mask.scale[1]; 940 941 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 942 v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 943 v[9] = compute_linear(&op->src, r->src.x, r->src.y); 944} 945 946sse2 fastcall static void 947emit_boxes_linear_identity_mask(const struct sna_composite_op *op, 948 const BoxRec *box, int nbox, 949 float *v) 950{ 951 float msk_x = op->mask.offset[0]; 952 float msk_y = op->mask.offset[1]; 953 954 do { 955 union { 956 struct sna_coordinate p; 957 float f; 958 } dst; 959 960 dst.p.x = box->x2; 961 dst.p.y = box->y2; 962 v[0] = dst.f; 963 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 964 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 965 966 dst.p.x = box->x1; 967 v[4] = dst.f; 968 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 969 970 dst.p.y = box->y1; 971 v[8] = dst.f; 972 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 973 974 v[1] = compute_linear(&op->src, box->x2, box->y2); 975 v[5] = compute_linear(&op->src, box->x1, box->y2); 976 v[9] = compute_linear(&op->src, box->x1, box->y1); 977 978 v += 12; 979 box++; 980 } while (--nbox); 981} 982 983sse2 fastcall static void 984emit_primitive_identity_source_mask(struct sna *sna, 985 const struct sna_composite_op *op, 986 const struct sna_composite_rectangles *r) 987{ 988 union { 989 struct sna_coordinate p; 990 float f; 991 } dst; 992 float src_x, src_y; 993 float msk_x, msk_y; 994 float w, h; 995 float *v; 996 997 src_x = r->src.x + op->src.offset[0]; 998 src_y = r->src.y + op->src.offset[1]; 999 msk_x = r->mask.x + op->mask.offset[0]; 1000 msk_y = r->mask.y + op->mask.offset[1]; 1001 w = r->width; 1002 h = r->height; 1003 1004 assert(op->floats_per_rect == 15); 1005 assert((sna->render.vertex_used % 5) == 0); 1006 v = sna->render.vertices + sna->render.vertex_used; 1007 sna->render.vertex_used += 15; 1008 1009 dst.p.x = r->dst.x + r->width; 1010 dst.p.y = r->dst.y + r->height; 1011 v[0] = dst.f; 1012 v[1] = (src_x + w) * op->src.scale[0]; 1013 v[2] = (src_y + h) * op->src.scale[1]; 1014 v[3] = (msk_x + w) * op->mask.scale[0]; 1015 v[4] = (msk_y + h) * op->mask.scale[1]; 1016 1017 dst.p.x = r->dst.x; 1018 v[5] = dst.f; 1019 v[6] = src_x * op->src.scale[0]; 1020 v[7] = v[2]; 1021 v[8] = msk_x * op->mask.scale[0]; 1022 v[9] = v[4]; 1023 1024 dst.p.y = r->dst.y; 1025 v[10] = dst.f; 1026 v[11] = v[6]; 1027 v[12] = src_y * op->src.scale[1]; 1028 v[13] = v[8]; 1029 v[14] = msk_y * op->mask.scale[1]; 1030} 1031 1032sse2 fastcall static void 1033emit_primitive_simple_source_identity(struct sna *sna, 1034 const struct sna_composite_op *op, 1035 const struct sna_composite_rectangles *r) 1036{ 1037 float *v; 1038 union { 1039 struct sna_coordinate p; 1040 float f; 1041 } dst; 1042 1043 float xx = op->src.transform->matrix[0][0]; 1044 float x0 = op->src.transform->matrix[0][2]; 1045 float yy = op->src.transform->matrix[1][1]; 1046 float y0 = op->src.transform->matrix[1][2]; 1047 float sx = op->src.scale[0]; 1048 float sy = op->src.scale[1]; 1049 int16_t tx = op->src.offset[0]; 1050 int16_t ty = op->src.offset[1]; 1051 float msk_x = r->mask.x + op->mask.offset[0]; 1052 float msk_y = r->mask.y + op->mask.offset[1]; 1053 float w = r->width, h = r->height; 1054 1055 assert(op->floats_per_rect == 15); 1056 assert((sna->render.vertex_used % 5) == 0); 1057 v = sna->render.vertices + sna->render.vertex_used; 1058 sna->render.vertex_used += 3*5; 1059 1060 dst.p.x = r->dst.x + r->width; 1061 dst.p.y = r->dst.y + r->height; 1062 v[0] = dst.f; 1063 v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1064 v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1065 v[3] = (msk_x + w) * op->mask.scale[0]; 1066 v[4] = (msk_y + h) * op->mask.scale[1]; 1067 1068 dst.p.x = r->dst.x; 1069 v[5] = dst.f; 1070 v[6] = ((r->src.x + tx) * xx + x0) * sx; 1071 v[7] = v[2]; 1072 v[8] = msk_x * op->mask.scale[0]; 1073 v[9] = v[4]; 1074 1075 dst.p.y = r->dst.y; 1076 v[10] = dst.f; 1077 v[11] = v[6]; 1078 v[12] = ((r->src.y + ty) * yy + y0) * sy; 1079 v[13] = v[8]; 1080 v[14] = msk_y * op->mask.scale[1]; 1081} 1082 1083sse2 fastcall static void 1084emit_primitive_affine_source_identity(struct sna *sna, 1085 const struct sna_composite_op *op, 1086 const struct sna_composite_rectangles *r) 1087{ 1088 float *v; 1089 union { 1090 struct sna_coordinate p; 1091 float f; 1092 } dst; 1093 float msk_x = r->mask.x + op->mask.offset[0]; 1094 float msk_y = r->mask.y + op->mask.offset[1]; 1095 float w = r->width, h = r->height; 1096 1097 assert(op->floats_per_rect == 15); 1098 assert((sna->render.vertex_used % 5) == 0); 1099 v = sna->render.vertices + sna->render.vertex_used; 1100 sna->render.vertex_used += 3*5; 1101 1102 dst.p.x = r->dst.x + r->width; 1103 dst.p.y = r->dst.y + r->height; 1104 v[0] = dst.f; 1105 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, 1106 op->src.offset[1] + r->src.y + r->height, 1107 op->src.transform, op->src.scale, 1108 &v[1], &v[2]); 1109 v[3] = (msk_x + w) * op->mask.scale[0]; 1110 v[4] = (msk_y + h) * op->mask.scale[1]; 1111 1112 dst.p.x = r->dst.x; 1113 v[5] = dst.f; 1114 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 1115 op->src.offset[1] + r->src.y + r->height, 1116 op->src.transform, op->src.scale, 1117 &v[6], &v[7]); 1118 v[8] = msk_x * op->mask.scale[0]; 1119 v[9] = v[4]; 1120 1121 dst.p.y = r->dst.y; 1122 v[10] = dst.f; 1123 _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 1124 op->src.offset[1] + r->src.y, 1125 op->src.transform, op->src.scale, 1126 &v[11], &v[12]); 1127 v[13] = v[8]; 1128 v[14] = msk_y * op->mask.scale[1]; 1129} 1130 1131/* SSE4_2 */ 1132#if defined(sse4_2) 1133 1134sse4_2 fastcall static void 1135emit_primitive_linear__sse4_2(struct sna *sna, 1136 const struct sna_composite_op *op, 1137 const struct sna_composite_rectangles *r) 1138{ 1139 float *v; 1140 union { 1141 struct sna_coordinate p; 1142 float f; 1143 } dst; 1144 1145 assert(op->floats_per_rect == 6); 1146 assert((sna->render.vertex_used % 2) == 0); 1147 v = sna->render.vertices + sna->render.vertex_used; 1148 sna->render.vertex_used += 6; 1149 assert(sna->render.vertex_used <= sna->render.vertex_size); 1150 1151 dst.p.x = r->dst.x + r->width; 1152 dst.p.y = r->dst.y + r->height; 1153 v[0] = dst.f; 1154 dst.p.x = r->dst.x; 1155 v[2] = dst.f; 1156 dst.p.y = r->dst.y; 1157 v[4] = dst.f; 1158 1159 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1160 v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1161 v[5] = compute_linear(&op->src, r->src.x, r->src.y); 1162} 1163 1164sse4_2 fastcall static void 1165emit_boxes_linear__sse4_2(const struct sna_composite_op *op, 1166 const BoxRec *box, int nbox, 1167 float *v) 1168{ 1169 union { 1170 struct sna_coordinate p; 1171 float f; 1172 } dst; 1173 1174 do { 1175 dst.p.x = box->x2; 1176 dst.p.y = box->y2; 1177 v[0] = dst.f; 1178 dst.p.x = box->x1; 1179 v[2] = dst.f; 1180 dst.p.y = box->y1; 1181 v[4] = dst.f; 1182 1183 v[1] = compute_linear(&op->src, box->x2, box->y2); 1184 v[3] = compute_linear(&op->src, box->x1, box->y2); 1185 v[5] = compute_linear(&op->src, box->x1, box->y1); 1186 1187 v += 6; 1188 box++; 1189 } while (--nbox); 1190} 1191 1192sse4_2 fastcall static void 1193emit_primitive_identity_source__sse4_2(struct sna *sna, 1194 const struct sna_composite_op *op, 1195 const struct sna_composite_rectangles *r) 1196{ 1197 union { 1198 struct sna_coordinate p; 1199 float f; 1200 } dst; 1201 float *v; 1202 1203 assert(op->floats_per_rect == 9); 1204 assert((sna->render.vertex_used % 3) == 0); 1205 v = sna->render.vertices + sna->render.vertex_used; 1206 sna->render.vertex_used += 9; 1207 1208 dst.p.x = r->dst.x + r->width; 1209 dst.p.y = r->dst.y + r->height; 1210 v[0] = dst.f; 1211 dst.p.x = r->dst.x; 1212 v[3] = dst.f; 1213 dst.p.y = r->dst.y; 1214 v[6] = dst.f; 1215 1216 v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1217 v[1] = v[4] + r->width * op->src.scale[0]; 1218 1219 v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1220 v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 1221} 1222 1223sse4_2 fastcall static void 1224emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, 1225 const BoxRec *box, int nbox, 1226 float *v) 1227{ 1228 do { 1229 union { 1230 struct sna_coordinate p; 1231 float f; 1232 } dst; 1233 1234 dst.p.x = box->x2; 1235 dst.p.y = box->y2; 1236 v[0] = dst.f; 1237 dst.p.x = box->x1; 1238 v[3] = dst.f; 1239 dst.p.y = box->y1; 1240 v[6] = dst.f; 1241 1242 v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1243 v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1244 1245 v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1246 v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1247 1248 v += 9; 1249 box++; 1250 } while (--nbox); 1251} 1252 1253sse4_2 fastcall static void 1254emit_primitive_simple_source__sse4_2(struct sna *sna, 1255 const struct sna_composite_op *op, 1256 const struct sna_composite_rectangles *r) 1257{ 1258 float *v; 1259 union { 1260 struct sna_coordinate p; 1261 float f; 1262 } dst; 1263 1264 float xx = op->src.transform->matrix[0][0]; 1265 float x0 = op->src.transform->matrix[0][2]; 1266 float yy = op->src.transform->matrix[1][1]; 1267 float y0 = op->src.transform->matrix[1][2]; 1268 float sx = op->src.scale[0]; 1269 float sy = op->src.scale[1]; 1270 int16_t tx = op->src.offset[0]; 1271 int16_t ty = op->src.offset[1]; 1272 1273 assert(op->floats_per_rect == 9); 1274 assert((sna->render.vertex_used % 3) == 0); 1275 v = sna->render.vertices + sna->render.vertex_used; 1276 sna->render.vertex_used += 3*3; 1277 1278 dst.p.x = r->dst.x + r->width; 1279 dst.p.y = r->dst.y + r->height; 1280 v[0] = dst.f; 1281 v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1282 v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1283 1284 dst.p.x = r->dst.x; 1285 v[3] = dst.f; 1286 v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 1287 1288 dst.p.y = r->dst.y; 1289 v[6] = dst.f; 1290 v[8] = ((r->src.y + ty) * yy + y0) * sy; 1291} 1292 1293sse4_2 fastcall static void 1294emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, 1295 const BoxRec *box, int nbox, 1296 float *v) 1297{ 1298 float xx = op->src.transform->matrix[0][0]; 1299 float x0 = op->src.transform->matrix[0][2]; 1300 float yy = op->src.transform->matrix[1][1]; 1301 float y0 = op->src.transform->matrix[1][2]; 1302 float sx = op->src.scale[0]; 1303 float sy = op->src.scale[1]; 1304 int16_t tx = op->src.offset[0]; 1305 int16_t ty = op->src.offset[1]; 1306 1307 do { 1308 union { 1309 struct sna_coordinate p; 1310 float f; 1311 } dst; 1312 1313 dst.p.x = box->x2; 1314 dst.p.y = box->y2; 1315 v[0] = dst.f; 1316 v[1] = ((box->x2 + tx) * xx + x0) * sx; 1317 v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 1318 1319 dst.p.x = box->x1; 1320 v[3] = dst.f; 1321 v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 1322 1323 dst.p.y = box->y1; 1324 v[6] = dst.f; 1325 v[8] = ((box->y1 + ty) * yy + y0) * sy; 1326 1327 v += 9; 1328 box++; 1329 } while (--nbox); 1330} 1331 1332sse4_2 fastcall static void 1333emit_primitive_identity_mask__sse4_2(struct sna *sna, 1334 const struct sna_composite_op *op, 1335 const struct sna_composite_rectangles *r) 1336{ 1337 union { 1338 struct sna_coordinate p; 1339 float f; 1340 } dst; 1341 float msk_x, msk_y; 1342 float w, h; 1343 float *v; 1344 1345 msk_x = r->mask.x + op->mask.offset[0]; 1346 msk_y = r->mask.y + op->mask.offset[1]; 1347 w = r->width; 1348 h = r->height; 1349 1350 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1351 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1352 1353 assert(op->floats_per_rect == 12); 1354 assert((sna->render.vertex_used % 4) == 0); 1355 v = sna->render.vertices + sna->render.vertex_used; 1356 sna->render.vertex_used += 12; 1357 1358 dst.p.x = r->dst.x + r->width; 1359 dst.p.y = r->dst.y + r->height; 1360 v[0] = dst.f; 1361 v[2] = (msk_x + w) * op->mask.scale[0]; 1362 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1363 1364 dst.p.x = r->dst.x; 1365 v[4] = dst.f; 1366 v[10] = v[6] = msk_x * op->mask.scale[0]; 1367 1368 dst.p.y = r->dst.y; 1369 v[8] = dst.f; 1370 v[11] = msk_y * op->mask.scale[1]; 1371 1372 v[9] = v[5] = v[1] = .5; 1373} 1374 1375sse4_2 fastcall static void 1376emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op, 1377 const BoxRec *box, int nbox, 1378 float *v) 1379{ 1380 float msk_x = op->mask.offset[0]; 1381 float msk_y = op->mask.offset[1]; 1382 1383 do { 1384 union { 1385 struct sna_coordinate p; 1386 float f; 1387 } dst; 1388 1389 dst.p.x = box->x2; 1390 dst.p.y = box->y2; 1391 v[0] = dst.f; 1392 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1393 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1394 1395 dst.p.x = box->x1; 1396 v[4] = dst.f; 1397 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1398 1399 dst.p.y = box->y1; 1400 v[8] = dst.f; 1401 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1402 1403 v[9] = v[5] = v[1] = .5; 1404 v += 12; 1405 box++; 1406 } while (--nbox); 1407} 1408 1409sse4_2 fastcall static void 1410emit_primitive_linear_identity_mask__sse4_2(struct sna *sna, 1411 const struct sna_composite_op *op, 1412 const struct sna_composite_rectangles *r) 1413{ 1414 union { 1415 struct sna_coordinate p; 1416 float f; 1417 } dst; 1418 float msk_x, msk_y; 1419 float w, h; 1420 float *v; 1421 1422 msk_x = r->mask.x + op->mask.offset[0]; 1423 msk_y = r->mask.y + op->mask.offset[1]; 1424 w = r->width; 1425 h = r->height; 1426 1427 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1428 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1429 1430 assert(op->floats_per_rect == 12); 1431 assert((sna->render.vertex_used % 4) == 0); 1432 v = sna->render.vertices + sna->render.vertex_used; 1433 sna->render.vertex_used += 12; 1434 1435 dst.p.x = r->dst.x + r->width; 1436 dst.p.y = r->dst.y + r->height; 1437 v[0] = dst.f; 1438 v[2] = (msk_x + w) * op->mask.scale[0]; 1439 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1440 1441 dst.p.x = r->dst.x; 1442 v[4] = dst.f; 1443 v[10] = v[6] = msk_x * op->mask.scale[0]; 1444 1445 dst.p.y = r->dst.y; 1446 v[8] = dst.f; 1447 v[11] = msk_y * op->mask.scale[1]; 1448 1449 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1450 v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1451 v[9] = compute_linear(&op->src, r->src.x, r->src.y); 1452} 1453 1454sse4_2 fastcall static void 1455emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op, 1456 const BoxRec *box, int nbox, 1457 float *v) 1458{ 1459 float msk_x = op->mask.offset[0]; 1460 float msk_y = op->mask.offset[1]; 1461 1462 do { 1463 union { 1464 struct sna_coordinate p; 1465 float f; 1466 } dst; 1467 1468 dst.p.x = box->x2; 1469 dst.p.y = box->y2; 1470 v[0] = dst.f; 1471 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1472 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1473 1474 dst.p.x = box->x1; 1475 v[4] = dst.f; 1476 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1477 1478 dst.p.y = box->y1; 1479 v[8] = dst.f; 1480 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1481 1482 v[1] = compute_linear(&op->src, box->x2, box->y2); 1483 v[5] = compute_linear(&op->src, box->x1, box->y2); 1484 v[9] = compute_linear(&op->src, box->x1, box->y1); 1485 1486 v += 12; 1487 box++; 1488 } while (--nbox); 1489} 1490 1491#endif 1492 1493/* AVX2 */ 1494#if defined(avx2) 1495 1496avx2 fastcall static void 1497emit_primitive_linear__avx2(struct sna *sna, 1498 const struct sna_composite_op *op, 1499 const struct sna_composite_rectangles *r) 1500{ 1501 float *v; 1502 union { 1503 struct sna_coordinate p; 1504 float f; 1505 } dst; 1506 1507 assert(op->floats_per_rect == 6); 1508 assert((sna->render.vertex_used % 2) == 0); 1509 v = sna->render.vertices + sna->render.vertex_used; 1510 sna->render.vertex_used += 6; 1511 assert(sna->render.vertex_used <= sna->render.vertex_size); 1512 1513 dst.p.x = r->dst.x + r->width; 1514 dst.p.y = r->dst.y + r->height; 1515 v[0] = dst.f; 1516 dst.p.x = r->dst.x; 1517 v[2] = dst.f; 1518 dst.p.y = r->dst.y; 1519 v[4] = dst.f; 1520 1521 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1522 v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1523 v[5] = compute_linear(&op->src, r->src.x, r->src.y); 1524} 1525 1526avx2 fastcall static void 1527emit_boxes_linear__avx2(const struct sna_composite_op *op, 1528 const BoxRec *box, int nbox, 1529 float *v) 1530{ 1531 union { 1532 struct sna_coordinate p; 1533 float f; 1534 } dst; 1535 1536 do { 1537 dst.p.x = box->x2; 1538 dst.p.y = box->y2; 1539 v[0] = dst.f; 1540 dst.p.x = box->x1; 1541 v[2] = dst.f; 1542 dst.p.y = box->y1; 1543 v[4] = dst.f; 1544 1545 v[1] = compute_linear(&op->src, box->x2, box->y2); 1546 v[3] = compute_linear(&op->src, box->x1, box->y2); 1547 v[5] = compute_linear(&op->src, box->x1, box->y1); 1548 1549 v += 6; 1550 box++; 1551 } while (--nbox); 1552} 1553 1554avx2 fastcall static void 1555emit_primitive_identity_source__avx2(struct sna *sna, 1556 const struct sna_composite_op *op, 1557 const struct sna_composite_rectangles *r) 1558{ 1559 union { 1560 struct sna_coordinate p; 1561 float f; 1562 } dst; 1563 float *v; 1564 1565 assert(op->floats_per_rect == 9); 1566 assert((sna->render.vertex_used % 3) == 0); 1567 v = sna->render.vertices + sna->render.vertex_used; 1568 sna->render.vertex_used += 9; 1569 1570 dst.p.x = r->dst.x + r->width; 1571 dst.p.y = r->dst.y + r->height; 1572 v[0] = dst.f; 1573 dst.p.x = r->dst.x; 1574 v[3] = dst.f; 1575 dst.p.y = r->dst.y; 1576 v[6] = dst.f; 1577 1578 v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1579 v[1] = v[4] + r->width * op->src.scale[0]; 1580 1581 v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1582 v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 1583} 1584 1585avx2 fastcall static void 1586emit_boxes_identity_source__avx2(const struct sna_composite_op *op, 1587 const BoxRec *box, int nbox, 1588 float *v) 1589{ 1590 do { 1591 union { 1592 struct sna_coordinate p; 1593 float f; 1594 } dst; 1595 1596 dst.p.x = box->x2; 1597 dst.p.y = box->y2; 1598 v[0] = dst.f; 1599 dst.p.x = box->x1; 1600 v[3] = dst.f; 1601 dst.p.y = box->y1; 1602 v[6] = dst.f; 1603 1604 v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1605 v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1606 1607 v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1608 v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1609 1610 v += 9; 1611 box++; 1612 } while (--nbox); 1613} 1614 1615avx2 fastcall static void 1616emit_primitive_simple_source__avx2(struct sna *sna, 1617 const struct sna_composite_op *op, 1618 const struct sna_composite_rectangles *r) 1619{ 1620 float *v; 1621 union { 1622 struct sna_coordinate p; 1623 float f; 1624 } dst; 1625 1626 float xx = op->src.transform->matrix[0][0]; 1627 float x0 = op->src.transform->matrix[0][2]; 1628 float yy = op->src.transform->matrix[1][1]; 1629 float y0 = op->src.transform->matrix[1][2]; 1630 float sx = op->src.scale[0]; 1631 float sy = op->src.scale[1]; 1632 int16_t tx = op->src.offset[0]; 1633 int16_t ty = op->src.offset[1]; 1634 1635 assert(op->floats_per_rect == 9); 1636 assert((sna->render.vertex_used % 3) == 0); 1637 v = sna->render.vertices + sna->render.vertex_used; 1638 sna->render.vertex_used += 3*3; 1639 1640 dst.p.x = r->dst.x + r->width; 1641 dst.p.y = r->dst.y + r->height; 1642 v[0] = dst.f; 1643 v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1644 v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1645 1646 dst.p.x = r->dst.x; 1647 v[3] = dst.f; 1648 v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 1649 1650 dst.p.y = r->dst.y; 1651 v[6] = dst.f; 1652 v[8] = ((r->src.y + ty) * yy + y0) * sy; 1653} 1654 1655avx2 fastcall static void 1656emit_boxes_simple_source__avx2(const struct sna_composite_op *op, 1657 const BoxRec *box, int nbox, 1658 float *v) 1659{ 1660 float xx = op->src.transform->matrix[0][0]; 1661 float x0 = op->src.transform->matrix[0][2]; 1662 float yy = op->src.transform->matrix[1][1]; 1663 float y0 = op->src.transform->matrix[1][2]; 1664 float sx = op->src.scale[0]; 1665 float sy = op->src.scale[1]; 1666 int16_t tx = op->src.offset[0]; 1667 int16_t ty = op->src.offset[1]; 1668 1669 do { 1670 union { 1671 struct sna_coordinate p; 1672 float f; 1673 } dst; 1674 1675 dst.p.x = box->x2; 1676 dst.p.y = box->y2; 1677 v[0] = dst.f; 1678 v[1] = ((box->x2 + tx) * xx + x0) * sx; 1679 v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 1680 1681 dst.p.x = box->x1; 1682 v[3] = dst.f; 1683 v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 1684 1685 dst.p.y = box->y1; 1686 v[6] = dst.f; 1687 v[8] = ((box->y1 + ty) * yy + y0) * sy; 1688 1689 v += 9; 1690 box++; 1691 } while (--nbox); 1692} 1693 1694avx2 fastcall static void 1695emit_primitive_identity_mask__avx2(struct sna *sna, 1696 const struct sna_composite_op *op, 1697 const struct sna_composite_rectangles *r) 1698{ 1699 union { 1700 struct sna_coordinate p; 1701 float f; 1702 } dst; 1703 float msk_x, msk_y; 1704 float w, h; 1705 float *v; 1706 1707 msk_x = r->mask.x + op->mask.offset[0]; 1708 msk_y = r->mask.y + op->mask.offset[1]; 1709 w = r->width; 1710 h = r->height; 1711 1712 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1713 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1714 1715 assert(op->floats_per_rect == 12); 1716 assert((sna->render.vertex_used % 4) == 0); 1717 v = sna->render.vertices + sna->render.vertex_used; 1718 sna->render.vertex_used += 12; 1719 1720 dst.p.x = r->dst.x + r->width; 1721 dst.p.y = r->dst.y + r->height; 1722 v[0] = dst.f; 1723 v[2] = (msk_x + w) * op->mask.scale[0]; 1724 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1725 1726 dst.p.x = r->dst.x; 1727 v[4] = dst.f; 1728 v[10] = v[6] = msk_x * op->mask.scale[0]; 1729 1730 dst.p.y = r->dst.y; 1731 v[8] = dst.f; 1732 v[11] = msk_y * op->mask.scale[1]; 1733 1734 v[9] = v[5] = v[1] = .5; 1735} 1736 1737avx2 fastcall static void 1738emit_boxes_identity_mask__avx2(const struct sna_composite_op *op, 1739 const BoxRec *box, int nbox, 1740 float *v) 1741{ 1742 float msk_x = op->mask.offset[0]; 1743 float msk_y = op->mask.offset[1]; 1744 1745 do { 1746 union { 1747 struct sna_coordinate p; 1748 float f; 1749 } dst; 1750 1751 dst.p.x = box->x2; 1752 dst.p.y = box->y2; 1753 v[0] = dst.f; 1754 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1755 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1756 1757 dst.p.x = box->x1; 1758 v[4] = dst.f; 1759 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1760 1761 dst.p.y = box->y1; 1762 v[8] = dst.f; 1763 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1764 1765 v[9] = v[5] = v[1] = .5; 1766 v += 12; 1767 box++; 1768 } while (--nbox); 1769} 1770 1771avx2 fastcall static void 1772emit_primitive_linear_identity_mask__avx2(struct sna *sna, 1773 const struct sna_composite_op *op, 1774 const struct sna_composite_rectangles *r) 1775{ 1776 union { 1777 struct sna_coordinate p; 1778 float f; 1779 } dst; 1780 float msk_x, msk_y; 1781 float w, h; 1782 float *v; 1783 1784 msk_x = r->mask.x + op->mask.offset[0]; 1785 msk_y = r->mask.y + op->mask.offset[1]; 1786 w = r->width; 1787 h = r->height; 1788 1789 DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1790 __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1791 1792 assert(op->floats_per_rect == 12); 1793 assert((sna->render.vertex_used % 4) == 0); 1794 v = sna->render.vertices + sna->render.vertex_used; 1795 sna->render.vertex_used += 12; 1796 1797 dst.p.x = r->dst.x + r->width; 1798 dst.p.y = r->dst.y + r->height; 1799 v[0] = dst.f; 1800 v[2] = (msk_x + w) * op->mask.scale[0]; 1801 v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1802 1803 dst.p.x = r->dst.x; 1804 v[4] = dst.f; 1805 v[10] = v[6] = msk_x * op->mask.scale[0]; 1806 1807 dst.p.y = r->dst.y; 1808 v[8] = dst.f; 1809 v[11] = msk_y * op->mask.scale[1]; 1810 1811 v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1812 v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1813 v[9] = compute_linear(&op->src, r->src.x, r->src.y); 1814} 1815 1816avx2 fastcall static void 1817emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op, 1818 const BoxRec *box, int nbox, 1819 float *v) 1820{ 1821 float msk_x = op->mask.offset[0]; 1822 float msk_y = op->mask.offset[1]; 1823 1824 do { 1825 union { 1826 struct sna_coordinate p; 1827 float f; 1828 } dst; 1829 1830 dst.p.x = box->x2; 1831 dst.p.y = box->y2; 1832 v[0] = dst.f; 1833 v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1834 v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1835 1836 dst.p.x = box->x1; 1837 v[4] = dst.f; 1838 v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1839 1840 dst.p.y = box->y1; 1841 v[8] = dst.f; 1842 v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1843 1844 v[1] = compute_linear(&op->src, box->x2, box->y2); 1845 v[5] = compute_linear(&op->src, box->x1, box->y2); 1846 v[9] = compute_linear(&op->src, box->x1, box->y1); 1847 1848 v += 12; 1849 box++; 1850 } while (--nbox); 1851} 1852 1853#endif 1854 1855unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) 1856{ 1857 unsigned vb; 1858 1859 if (tmp->mask.bo) { 1860 if (tmp->mask.transform == NULL) { 1861 if (tmp->src.is_solid) { 1862 DBG(("%s: solid, identity mask\n", __FUNCTION__)); 1863#if defined(avx2) 1864 if (sna->cpu_features & AVX2) { 1865 tmp->prim_emit = emit_primitive_identity_mask__avx2; 1866 tmp->emit_boxes = emit_boxes_identity_mask__avx2; 1867 } else 1868#endif 1869#if defined(sse4_2) 1870 if (sna->cpu_features & SSE4_2) { 1871 tmp->prim_emit = emit_primitive_identity_mask__sse4_2; 1872 tmp->emit_boxes = emit_boxes_identity_mask__sse4_2; 1873 } else 1874#endif 1875 { 1876 tmp->prim_emit = emit_primitive_identity_mask; 1877 tmp->emit_boxes = emit_boxes_identity_mask; 1878 } 1879 tmp->floats_per_vertex = 4; 1880 vb = 1 | 2 << 2; 1881 } else if (tmp->src.is_linear) { 1882 DBG(("%s: linear, identity mask\n", __FUNCTION__)); 1883#if defined(avx2) 1884 if (sna->cpu_features & AVX2) { 1885 tmp->prim_emit = emit_primitive_linear_identity_mask__avx2; 1886 tmp->emit_boxes = emit_boxes_linear_identity_mask__avx2; 1887 } else 1888#endif 1889#if defined(sse4_2) 1890 if (sna->cpu_features & SSE4_2) { 1891 tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2; 1892 tmp->emit_boxes = emit_boxes_linear_identity_mask__sse4_2; 1893 } else 1894#endif 1895 { 1896 tmp->prim_emit = emit_primitive_linear_identity_mask; 1897 tmp->emit_boxes = emit_boxes_linear_identity_mask; 1898 } 1899 tmp->floats_per_vertex = 4; 1900 vb = 1 | 2 << 2; 1901 } else if (tmp->src.transform == NULL) { 1902 DBG(("%s: identity source, identity mask\n", __FUNCTION__)); 1903 tmp->prim_emit = emit_primitive_identity_source_mask; 1904 tmp->floats_per_vertex = 5; 1905 vb = 2 << 2 | 2; 1906 } else if (tmp->src.is_affine) { 1907 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 1908 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 1909 if (!sna_affine_transform_is_rotation(tmp->src.transform)) { 1910 DBG(("%s: simple src, identity mask\n", __FUNCTION__)); 1911 tmp->prim_emit = emit_primitive_simple_source_identity; 1912 } else { 1913 DBG(("%s: affine src, identity mask\n", __FUNCTION__)); 1914 tmp->prim_emit = emit_primitive_affine_source_identity; 1915 } 1916 tmp->floats_per_vertex = 5; 1917 vb = 2 << 2 | 2; 1918 } else { 1919 DBG(("%s: projective source, identity mask\n", __FUNCTION__)); 1920 tmp->prim_emit = emit_primitive_mask; 1921 tmp->floats_per_vertex = 6; 1922 vb = 2 << 2 | 3; 1923 } 1924 } else { 1925 tmp->prim_emit = emit_primitive_mask; 1926 tmp->emit_boxes = emit_boxes_mask; 1927 tmp->floats_per_vertex = 1; 1928 vb = 0; 1929 if (tmp->mask.is_solid) { 1930 tmp->floats_per_vertex += 1; 1931 vb |= 1 << 2; 1932 } else if (tmp->mask.is_affine) { 1933 tmp->floats_per_vertex += 2; 1934 vb |= 2 << 2; 1935 }else { 1936 tmp->floats_per_vertex += 3; 1937 vb |= 3 << 2; 1938 } 1939 if (tmp->src.is_solid) { 1940 tmp->floats_per_vertex += 1; 1941 vb |= 1; 1942 } else if (tmp->src.is_affine) { 1943 tmp->floats_per_vertex += 2; 1944 vb |= 2 ; 1945 }else { 1946 tmp->floats_per_vertex += 3; 1947 vb |= 3; 1948 } 1949 DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", 1950 __FUNCTION__,tmp->floats_per_vertex, vb)); 1951 } 1952 } else { 1953 if (tmp->src.is_solid) { 1954 DBG(("%s: solid, no mask\n", __FUNCTION__)); 1955 tmp->prim_emit = emit_primitive_solid; 1956 tmp->emit_boxes = emit_boxes_solid; 1957 if (tmp->src.is_opaque && tmp->op == PictOpOver) 1958 tmp->op = PictOpSrc; 1959 tmp->floats_per_vertex = 2; 1960 vb = 1; 1961 } else if (tmp->src.is_linear) { 1962 DBG(("%s: linear, no mask\n", __FUNCTION__)); 1963#if defined(avx2) 1964 if (sna->cpu_features & AVX2) { 1965 tmp->prim_emit = emit_primitive_linear__avx2; 1966 tmp->emit_boxes = emit_boxes_linear__avx2; 1967 } else 1968#endif 1969#if defined(sse4_2) 1970 if (sna->cpu_features & SSE4_2) { 1971 tmp->prim_emit = emit_primitive_linear__sse4_2; 1972 tmp->emit_boxes = emit_boxes_linear__sse4_2; 1973 } else 1974#endif 1975 { 1976 tmp->prim_emit = emit_primitive_linear; 1977 tmp->emit_boxes = emit_boxes_linear; 1978 } 1979 tmp->floats_per_vertex = 2; 1980 vb = 1; 1981 } else if (tmp->src.transform == NULL) { 1982 DBG(("%s: identity src, no mask\n", __FUNCTION__)); 1983#if defined(avx2) 1984 if (sna->cpu_features & AVX2) { 1985 tmp->prim_emit = emit_primitive_identity_source__avx2; 1986 tmp->emit_boxes = emit_boxes_identity_source__avx2; 1987 } else 1988#endif 1989#if defined(sse4_2) 1990 if (sna->cpu_features & SSE4_2) { 1991 tmp->prim_emit = emit_primitive_identity_source__sse4_2; 1992 tmp->emit_boxes = emit_boxes_identity_source__sse4_2; 1993 } else 1994#endif 1995 { 1996 tmp->prim_emit = emit_primitive_identity_source; 1997 tmp->emit_boxes = emit_boxes_identity_source; 1998 } 1999 tmp->floats_per_vertex = 3; 2000 vb = 2; 2001 } else if (tmp->src.is_affine) { 2002 tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2003 tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2004 if (!sna_affine_transform_is_rotation(tmp->src.transform)) { 2005 DBG(("%s: simple src, no mask\n", __FUNCTION__)); 2006#if defined(avx2) 2007 if (sna->cpu_features & AVX2) { 2008 tmp->prim_emit = emit_primitive_simple_source__avx2; 2009 tmp->emit_boxes = emit_boxes_simple_source__avx2; 2010 } else 2011#endif 2012#if defined(sse4_2) 2013 if (sna->cpu_features & SSE4_2) { 2014 tmp->prim_emit = emit_primitive_simple_source__sse4_2; 2015 tmp->emit_boxes = emit_boxes_simple_source__sse4_2; 2016 } else 2017#endif 2018 { 2019 tmp->prim_emit = emit_primitive_simple_source; 2020 tmp->emit_boxes = emit_boxes_simple_source; 2021 } 2022 } else { 2023 DBG(("%s: affine src, no mask\n", __FUNCTION__)); 2024 tmp->prim_emit = emit_primitive_affine_source; 2025 tmp->emit_boxes = emit_boxes_affine_source; 2026 } 2027 tmp->floats_per_vertex = 3; 2028 vb = 2; 2029 } else { 2030 DBG(("%s: projective src, no mask\n", __FUNCTION__)); 2031 assert(!tmp->src.is_solid); 2032 tmp->prim_emit = emit_primitive; 2033 tmp->emit_boxes = emit_boxes; 2034 tmp->floats_per_vertex = 4; 2035 vb = 3; 2036 } 2037 } 2038 tmp->floats_per_rect = 3 * tmp->floats_per_vertex; 2039 2040 return vb; 2041} 2042 2043sse2 force_inline static void 2044emit_span_vertex(struct sna *sna, 2045 const struct sna_composite_spans_op *op, 2046 int16_t x, int16_t y) 2047{ 2048 OUT_VERTEX(x, y); 2049 emit_texcoord(sna, &op->base.src, x, y); 2050} 2051 2052sse2 fastcall static void 2053emit_span_primitive(struct sna *sna, 2054 const struct sna_composite_spans_op *op, 2055 const BoxRec *box, 2056 float opacity) 2057{ 2058 emit_span_vertex(sna, op, box->x2, box->y2); 2059 OUT_VERTEX_F(opacity); 2060 2061 emit_span_vertex(sna, op, box->x1, box->y2); 2062 OUT_VERTEX_F(opacity); 2063 2064 emit_span_vertex(sna, op, box->x1, box->y1); 2065 OUT_VERTEX_F(opacity); 2066} 2067 2068sse2 fastcall static void 2069emit_span_boxes(const struct sna_composite_spans_op *op, 2070 const struct sna_opacity_box *b, int nbox, 2071 float *v) 2072{ 2073 do { 2074 v = vemit_vertex(v, &op->base, b->box.x2, b->box.y2); 2075 *v++ = b->alpha; 2076 2077 v = vemit_vertex(v, &op->base, b->box.x1, b->box.y2); 2078 *v++ = b->alpha; 2079 2080 v = vemit_vertex(v, &op->base, b->box.x1, b->box.y1); 2081 *v++ = b->alpha; 2082 2083 b++; 2084 } while (--nbox); 2085} 2086 2087sse2 fastcall static void 2088emit_span_solid(struct sna *sna, 2089 const struct sna_composite_spans_op *op, 2090 const BoxRec *box, 2091 float opacity) 2092{ 2093 float *v; 2094 union { 2095 struct sna_coordinate p; 2096 float f; 2097 } dst; 2098 2099 assert(op->base.floats_per_rect == 9); 2100 assert((sna->render.vertex_used % 3) == 0); 2101 v = sna->render.vertices + sna->render.vertex_used; 2102 sna->render.vertex_used += 3*3; 2103 2104 dst.p.x = box->x2; 2105 dst.p.y = box->y2; 2106 v[0] = dst.f; 2107 2108 dst.p.x = box->x1; 2109 v[3] = dst.f; 2110 2111 dst.p.y = box->y1; 2112 v[6] = dst.f; 2113 2114 v[7] = v[4] = v[1] = .5; 2115 v[8] = v[5] = v[2] = opacity; 2116} 2117 2118sse2 fastcall static void 2119emit_span_boxes_solid(const struct sna_composite_spans_op *op, 2120 const struct sna_opacity_box *b, 2121 int nbox, float *v) 2122{ 2123 do { 2124 union { 2125 struct sna_coordinate p; 2126 float f; 2127 } dst; 2128 2129 dst.p.x = b->box.x2; 2130 dst.p.y = b->box.y2; 2131 v[0] = dst.f; 2132 2133 dst.p.x = b->box.x1; 2134 v[3] = dst.f; 2135 2136 dst.p.y = b->box.y1; 2137 v[6] = dst.f; 2138 2139 v[7] = v[4] = v[1] = .5; 2140 v[8] = v[5] = v[2] = b->alpha; 2141 2142 v += 9; 2143 b++; 2144 } while (--nbox); 2145} 2146 2147sse2 fastcall static void 2148emit_span_identity(struct sna *sna, 2149 const struct sna_composite_spans_op *op, 2150 const BoxRec *box, 2151 float opacity) 2152{ 2153 float *v; 2154 union { 2155 struct sna_coordinate p; 2156 float f; 2157 } dst; 2158 2159 float sx = op->base.src.scale[0]; 2160 float sy = op->base.src.scale[1]; 2161 int16_t tx = op->base.src.offset[0]; 2162 int16_t ty = op->base.src.offset[1]; 2163 2164 assert(op->base.floats_per_rect == 12); 2165 assert((sna->render.vertex_used % 4) == 0); 2166 v = sna->render.vertices + sna->render.vertex_used; 2167 sna->render.vertex_used += 3*4; 2168 assert(sna->render.vertex_used <= sna->render.vertex_size); 2169 2170 dst.p.x = box->x2; 2171 dst.p.y = box->y2; 2172 v[0] = dst.f; 2173 v[1] = (box->x2 + tx) * sx; 2174 v[6] = v[2] = (box->y2 + ty) * sy; 2175 2176 dst.p.x = box->x1; 2177 v[4] = dst.f; 2178 v[9] = v[5] = (box->x1 + tx) * sx; 2179 2180 dst.p.y = box->y1; 2181 v[8] = dst.f; 2182 v[10] = (box->y1 + ty) * sy; 2183 2184 v[11] = v[7] = v[3] = opacity; 2185} 2186 2187sse2 fastcall static void 2188emit_span_boxes_identity(const struct sna_composite_spans_op *op, 2189 const struct sna_opacity_box *b, int nbox, 2190 float *v) 2191{ 2192 do { 2193 union { 2194 struct sna_coordinate p; 2195 float f; 2196 } dst; 2197 2198 float sx = op->base.src.scale[0]; 2199 float sy = op->base.src.scale[1]; 2200 int16_t tx = op->base.src.offset[0]; 2201 int16_t ty = op->base.src.offset[1]; 2202 2203 dst.p.x = b->box.x2; 2204 dst.p.y = b->box.y2; 2205 v[0] = dst.f; 2206 v[1] = (b->box.x2 + tx) * sx; 2207 v[6] = v[2] = (b->box.y2 + ty) * sy; 2208 2209 dst.p.x = b->box.x1; 2210 v[4] = dst.f; 2211 v[9] = v[5] = (b->box.x1 + tx) * sx; 2212 2213 dst.p.y = b->box.y1; 2214 v[8] = dst.f; 2215 v[10] = (b->box.y1 + ty) * sy; 2216 2217 v[11] = v[7] = v[3] = b->alpha; 2218 2219 v += 12; 2220 b++; 2221 } while (--nbox); 2222} 2223 2224sse2 fastcall static void 2225emit_span_simple(struct sna *sna, 2226 const struct sna_composite_spans_op *op, 2227 const BoxRec *box, 2228 float opacity) 2229{ 2230 float *v; 2231 union { 2232 struct sna_coordinate p; 2233 float f; 2234 } dst; 2235 2236 float xx = op->base.src.transform->matrix[0][0]; 2237 float x0 = op->base.src.transform->matrix[0][2]; 2238 float yy = op->base.src.transform->matrix[1][1]; 2239 float y0 = op->base.src.transform->matrix[1][2]; 2240 float sx = op->base.src.scale[0]; 2241 float sy = op->base.src.scale[1]; 2242 int16_t tx = op->base.src.offset[0]; 2243 int16_t ty = op->base.src.offset[1]; 2244 2245 assert(op->base.floats_per_rect == 12); 2246 assert((sna->render.vertex_used % 4) == 0); 2247 v = sna->render.vertices + sna->render.vertex_used; 2248 sna->render.vertex_used += 3*4; 2249 assert(sna->render.vertex_used <= sna->render.vertex_size); 2250 2251 dst.p.x = box->x2; 2252 dst.p.y = box->y2; 2253 v[0] = dst.f; 2254 v[1] = ((box->x2 + tx) * xx + x0) * sx; 2255 v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2256 2257 dst.p.x = box->x1; 2258 v[4] = dst.f; 2259 v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2260 2261 dst.p.y = box->y1; 2262 v[8] = dst.f; 2263 v[10] = ((box->y1 + ty) * yy + y0) * sy; 2264 2265 v[11] = v[7] = v[3] = opacity; 2266} 2267 2268sse2 fastcall static void 2269emit_span_boxes_simple(const struct sna_composite_spans_op *op, 2270 const struct sna_opacity_box *b, int nbox, 2271 float *v) 2272{ 2273 float xx = op->base.src.transform->matrix[0][0]; 2274 float x0 = op->base.src.transform->matrix[0][2]; 2275 float yy = op->base.src.transform->matrix[1][1]; 2276 float y0 = op->base.src.transform->matrix[1][2]; 2277 float sx = op->base.src.scale[0]; 2278 float sy = op->base.src.scale[1]; 2279 int16_t tx = op->base.src.offset[0]; 2280 int16_t ty = op->base.src.offset[1]; 2281 2282 do { 2283 union { 2284 struct sna_coordinate p; 2285 float f; 2286 } dst; 2287 2288 dst.p.x = b->box.x2; 2289 dst.p.y = b->box.y2; 2290 v[0] = dst.f; 2291 v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2292 v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2293 2294 dst.p.x = b->box.x1; 2295 v[4] = dst.f; 2296 v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2297 2298 dst.p.y = b->box.y1; 2299 v[8] = dst.f; 2300 v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2301 2302 v[11] = v[7] = v[3] = b->alpha; 2303 2304 v += 12; 2305 b++; 2306 } while (--nbox); 2307} 2308 2309sse2 fastcall static void 2310emit_span_affine(struct sna *sna, 2311 const struct sna_composite_spans_op *op, 2312 const BoxRec *box, 2313 float opacity) 2314{ 2315 union { 2316 struct sna_coordinate p; 2317 float f; 2318 } dst; 2319 float *v; 2320 2321 assert(op->base.floats_per_rect == 12); 2322 assert((sna->render.vertex_used % 4) == 0); 2323 v = sna->render.vertices + sna->render.vertex_used; 2324 sna->render.vertex_used += 12; 2325 2326 dst.p.x = box->x2; 2327 dst.p.y = box->y2; 2328 v[0] = dst.f; 2329 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2330 op->base.src.offset[1] + box->y2, 2331 op->base.src.transform, 2332 op->base.src.scale, 2333 &v[1], &v[2]); 2334 2335 dst.p.x = box->x1; 2336 v[4] = dst.f; 2337 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2338 op->base.src.offset[1] + box->y2, 2339 op->base.src.transform, 2340 op->base.src.scale, 2341 &v[5], &v[6]); 2342 2343 dst.p.y = box->y1; 2344 v[8] = dst.f; 2345 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2346 op->base.src.offset[1] + box->y1, 2347 op->base.src.transform, 2348 op->base.src.scale, 2349 &v[9], &v[10]); 2350 2351 v[11] = v[7] = v[3] = opacity; 2352} 2353 2354sse2 fastcall static void 2355emit_span_boxes_affine(const struct sna_composite_spans_op *op, 2356 const struct sna_opacity_box *b, int nbox, 2357 float *v) 2358{ 2359 do { 2360 union { 2361 struct sna_coordinate p; 2362 float f; 2363 } dst; 2364 2365 dst.p.x = b->box.x2; 2366 dst.p.y = b->box.y2; 2367 v[0] = dst.f; 2368 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 2369 op->base.src.offset[1] + b->box.y2, 2370 op->base.src.transform, 2371 op->base.src.scale, 2372 &v[1], &v[2]); 2373 2374 dst.p.x = b->box.x1; 2375 v[4] = dst.f; 2376 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2377 op->base.src.offset[1] + b->box.y2, 2378 op->base.src.transform, 2379 op->base.src.scale, 2380 &v[5], &v[6]); 2381 2382 dst.p.y = b->box.y1; 2383 v[8] = dst.f; 2384 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2385 op->base.src.offset[1] + b->box.y1, 2386 op->base.src.transform, 2387 op->base.src.scale, 2388 &v[9], &v[10]); 2389 2390 v[11] = v[7] = v[3] = b->alpha; 2391 2392 v += 12; 2393 b++; 2394 } while (--nbox); 2395} 2396 2397sse2 fastcall static void 2398emit_span_linear(struct sna *sna, 2399 const struct sna_composite_spans_op *op, 2400 const BoxRec *box, 2401 float opacity) 2402{ 2403 union { 2404 struct sna_coordinate p; 2405 float f; 2406 } dst; 2407 float *v; 2408 2409 assert(op->base.floats_per_rect == 9); 2410 assert((sna->render.vertex_used % 3) == 0); 2411 v = sna->render.vertices + sna->render.vertex_used; 2412 sna->render.vertex_used += 9; 2413 2414 dst.p.x = box->x2; 2415 dst.p.y = box->y2; 2416 v[0] = dst.f; 2417 dst.p.x = box->x1; 2418 v[3] = dst.f; 2419 dst.p.y = box->y1; 2420 v[6] = dst.f; 2421 2422 v[1] = compute_linear(&op->base.src, box->x2, box->y2); 2423 v[4] = compute_linear(&op->base.src, box->x1, box->y2); 2424 v[7] = compute_linear(&op->base.src, box->x1, box->y1); 2425 2426 v[8] = v[5] = v[2] = opacity; 2427} 2428 2429sse2 fastcall static void 2430emit_span_boxes_linear(const struct sna_composite_spans_op *op, 2431 const struct sna_opacity_box *b, int nbox, 2432 float *v) 2433{ 2434 do { 2435 union { 2436 struct sna_coordinate p; 2437 float f; 2438 } dst; 2439 2440 dst.p.x = b->box.x2; 2441 dst.p.y = b->box.y2; 2442 v[0] = dst.f; 2443 dst.p.x = b->box.x1; 2444 v[3] = dst.f; 2445 dst.p.y = b->box.y1; 2446 v[6] = dst.f; 2447 2448 v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 2449 v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 2450 v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 2451 2452 v[8] = v[5] = v[2] = b->alpha; 2453 2454 v += 9; 2455 b++; 2456 } while (--nbox); 2457} 2458 2459/* SSE4_2 */ 2460#if defined(sse4_2) 2461 2462sse4_2 fastcall static void 2463emit_span_identity__sse4_2(struct sna *sna, 2464 const struct sna_composite_spans_op *op, 2465 const BoxRec *box, 2466 float opacity) 2467{ 2468 float *v; 2469 union { 2470 struct sna_coordinate p; 2471 float f; 2472 } dst; 2473 2474 float sx = op->base.src.scale[0]; 2475 float sy = op->base.src.scale[1]; 2476 int16_t tx = op->base.src.offset[0]; 2477 int16_t ty = op->base.src.offset[1]; 2478 2479 assert(op->base.floats_per_rect == 12); 2480 assert((sna->render.vertex_used % 4) == 0); 2481 v = sna->render.vertices + sna->render.vertex_used; 2482 sna->render.vertex_used += 3*4; 2483 assert(sna->render.vertex_used <= sna->render.vertex_size); 2484 2485 dst.p.x = box->x2; 2486 dst.p.y = box->y2; 2487 v[0] = dst.f; 2488 v[1] = (box->x2 + tx) * sx; 2489 v[6] = v[2] = (box->y2 + ty) * sy; 2490 2491 dst.p.x = box->x1; 2492 v[4] = dst.f; 2493 v[9] = v[5] = (box->x1 + tx) * sx; 2494 2495 dst.p.y = box->y1; 2496 v[8] = dst.f; 2497 v[10] = (box->y1 + ty) * sy; 2498 2499 v[11] = v[7] = v[3] = opacity; 2500} 2501 2502sse4_2 fastcall static void 2503emit_span_boxes_identity__sse4_2(const struct sna_composite_spans_op *op, 2504 const struct sna_opacity_box *b, int nbox, 2505 float *v) 2506{ 2507 do { 2508 union { 2509 struct sna_coordinate p; 2510 float f; 2511 } dst; 2512 2513 float sx = op->base.src.scale[0]; 2514 float sy = op->base.src.scale[1]; 2515 int16_t tx = op->base.src.offset[0]; 2516 int16_t ty = op->base.src.offset[1]; 2517 2518 dst.p.x = b->box.x2; 2519 dst.p.y = b->box.y2; 2520 v[0] = dst.f; 2521 v[1] = (b->box.x2 + tx) * sx; 2522 v[6] = v[2] = (b->box.y2 + ty) * sy; 2523 2524 dst.p.x = b->box.x1; 2525 v[4] = dst.f; 2526 v[9] = v[5] = (b->box.x1 + tx) * sx; 2527 2528 dst.p.y = b->box.y1; 2529 v[8] = dst.f; 2530 v[10] = (b->box.y1 + ty) * sy; 2531 2532 v[11] = v[7] = v[3] = b->alpha; 2533 2534 v += 12; 2535 b++; 2536 } while (--nbox); 2537} 2538 2539sse4_2 fastcall static void 2540emit_span_simple__sse4_2(struct sna *sna, 2541 const struct sna_composite_spans_op *op, 2542 const BoxRec *box, 2543 float opacity) 2544{ 2545 float *v; 2546 union { 2547 struct sna_coordinate p; 2548 float f; 2549 } dst; 2550 2551 float xx = op->base.src.transform->matrix[0][0]; 2552 float x0 = op->base.src.transform->matrix[0][2]; 2553 float yy = op->base.src.transform->matrix[1][1]; 2554 float y0 = op->base.src.transform->matrix[1][2]; 2555 float sx = op->base.src.scale[0]; 2556 float sy = op->base.src.scale[1]; 2557 int16_t tx = op->base.src.offset[0]; 2558 int16_t ty = op->base.src.offset[1]; 2559 2560 assert(op->base.floats_per_rect == 12); 2561 assert((sna->render.vertex_used % 4) == 0); 2562 v = sna->render.vertices + sna->render.vertex_used; 2563 sna->render.vertex_used += 3*4; 2564 assert(sna->render.vertex_used <= sna->render.vertex_size); 2565 2566 dst.p.x = box->x2; 2567 dst.p.y = box->y2; 2568 v[0] = dst.f; 2569 v[1] = ((box->x2 + tx) * xx + x0) * sx; 2570 v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2571 2572 dst.p.x = box->x1; 2573 v[4] = dst.f; 2574 v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2575 2576 dst.p.y = box->y1; 2577 v[8] = dst.f; 2578 v[10] = ((box->y1 + ty) * yy + y0) * sy; 2579 2580 v[11] = v[7] = v[3] = opacity; 2581} 2582 2583sse4_2 fastcall static void 2584emit_span_boxes_simple__sse4_2(const struct sna_composite_spans_op *op, 2585 const struct sna_opacity_box *b, int nbox, 2586 float *v) 2587{ 2588 float xx = op->base.src.transform->matrix[0][0]; 2589 float x0 = op->base.src.transform->matrix[0][2]; 2590 float yy = op->base.src.transform->matrix[1][1]; 2591 float y0 = op->base.src.transform->matrix[1][2]; 2592 float sx = op->base.src.scale[0]; 2593 float sy = op->base.src.scale[1]; 2594 int16_t tx = op->base.src.offset[0]; 2595 int16_t ty = op->base.src.offset[1]; 2596 2597 do { 2598 union { 2599 struct sna_coordinate p; 2600 float f; 2601 } dst; 2602 2603 dst.p.x = b->box.x2; 2604 dst.p.y = b->box.y2; 2605 v[0] = dst.f; 2606 v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2607 v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2608 2609 dst.p.x = b->box.x1; 2610 v[4] = dst.f; 2611 v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2612 2613 dst.p.y = b->box.y1; 2614 v[8] = dst.f; 2615 v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2616 2617 v[11] = v[7] = v[3] = b->alpha; 2618 2619 v += 12; 2620 b++; 2621 } while (--nbox); 2622} 2623 2624sse4_2 fastcall static void 2625emit_span_affine__sse4_2(struct sna *sna, 2626 const struct sna_composite_spans_op *op, 2627 const BoxRec *box, 2628 float opacity) 2629{ 2630 union { 2631 struct sna_coordinate p; 2632 float f; 2633 } dst; 2634 float *v; 2635 2636 assert(op->base.floats_per_rect == 12); 2637 assert((sna->render.vertex_used % 4) == 0); 2638 v = sna->render.vertices + sna->render.vertex_used; 2639 sna->render.vertex_used += 12; 2640 2641 dst.p.x = box->x2; 2642 dst.p.y = box->y2; 2643 v[0] = dst.f; 2644 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2645 op->base.src.offset[1] + box->y2, 2646 op->base.src.transform, 2647 op->base.src.scale, 2648 &v[1], &v[2]); 2649 2650 dst.p.x = box->x1; 2651 v[4] = dst.f; 2652 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2653 op->base.src.offset[1] + box->y2, 2654 op->base.src.transform, 2655 op->base.src.scale, 2656 &v[5], &v[6]); 2657 2658 dst.p.y = box->y1; 2659 v[8] = dst.f; 2660 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2661 op->base.src.offset[1] + box->y1, 2662 op->base.src.transform, 2663 op->base.src.scale, 2664 &v[9], &v[10]); 2665 2666 v[11] = v[7] = v[3] = opacity; 2667} 2668 2669sse4_2 fastcall static void 2670emit_span_boxes_affine__sse4_2(const struct sna_composite_spans_op *op, 2671 const struct sna_opacity_box *b, int nbox, 2672 float *v) 2673{ 2674 do { 2675 union { 2676 struct sna_coordinate p; 2677 float f; 2678 } dst; 2679 2680 dst.p.x = b->box.x2; 2681 dst.p.y = b->box.y2; 2682 v[0] = dst.f; 2683 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 2684 op->base.src.offset[1] + b->box.y2, 2685 op->base.src.transform, 2686 op->base.src.scale, 2687 &v[1], &v[2]); 2688 2689 dst.p.x = b->box.x1; 2690 v[4] = dst.f; 2691 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2692 op->base.src.offset[1] + b->box.y2, 2693 op->base.src.transform, 2694 op->base.src.scale, 2695 &v[5], &v[6]); 2696 2697 dst.p.y = b->box.y1; 2698 v[8] = dst.f; 2699 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2700 op->base.src.offset[1] + b->box.y1, 2701 op->base.src.transform, 2702 op->base.src.scale, 2703 &v[9], &v[10]); 2704 2705 v[11] = v[7] = v[3] = b->alpha; 2706 2707 v += 12; 2708 b++; 2709 } while (--nbox); 2710} 2711 2712sse4_2 fastcall static void 2713emit_span_linear__sse4_2(struct sna *sna, 2714 const struct sna_composite_spans_op *op, 2715 const BoxRec *box, 2716 float opacity) 2717{ 2718 union { 2719 struct sna_coordinate p; 2720 float f; 2721 } dst; 2722 float *v; 2723 2724 assert(op->base.floats_per_rect == 9); 2725 assert((sna->render.vertex_used % 3) == 0); 2726 v = sna->render.vertices + sna->render.vertex_used; 2727 sna->render.vertex_used += 9; 2728 2729 dst.p.x = box->x2; 2730 dst.p.y = box->y2; 2731 v[0] = dst.f; 2732 dst.p.x = box->x1; 2733 v[3] = dst.f; 2734 dst.p.y = box->y1; 2735 v[6] = dst.f; 2736 2737 v[1] = compute_linear(&op->base.src, box->x2, box->y2); 2738 v[4] = compute_linear(&op->base.src, box->x1, box->y2); 2739 v[7] = compute_linear(&op->base.src, box->x1, box->y1); 2740 2741 v[8] = v[5] = v[2] = opacity; 2742} 2743 2744sse4_2 fastcall static void 2745emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op, 2746 const struct sna_opacity_box *b, int nbox, 2747 float *v) 2748{ 2749 do { 2750 union { 2751 struct sna_coordinate p; 2752 float f; 2753 } dst; 2754 2755 dst.p.x = b->box.x2; 2756 dst.p.y = b->box.y2; 2757 v[0] = dst.f; 2758 dst.p.x = b->box.x1; 2759 v[3] = dst.f; 2760 dst.p.y = b->box.y1; 2761 v[6] = dst.f; 2762 2763 v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 2764 v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 2765 v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 2766 2767 v[8] = v[5] = v[2] = b->alpha; 2768 2769 v += 9; 2770 b++; 2771 } while (--nbox); 2772} 2773 2774#endif 2775 2776/* AVX2 */ 2777#if defined(avx2) 2778 2779avx2 fastcall static void 2780emit_span_identity__avx2(struct sna *sna, 2781 const struct sna_composite_spans_op *op, 2782 const BoxRec *box, 2783 float opacity) 2784{ 2785 float *v; 2786 union { 2787 struct sna_coordinate p; 2788 float f; 2789 } dst; 2790 2791 float sx = op->base.src.scale[0]; 2792 float sy = op->base.src.scale[1]; 2793 int16_t tx = op->base.src.offset[0]; 2794 int16_t ty = op->base.src.offset[1]; 2795 2796 assert(op->base.floats_per_rect == 12); 2797 assert((sna->render.vertex_used % 4) == 0); 2798 v = sna->render.vertices + sna->render.vertex_used; 2799 sna->render.vertex_used += 3*4; 2800 assert(sna->render.vertex_used <= sna->render.vertex_size); 2801 2802 dst.p.x = box->x2; 2803 dst.p.y = box->y2; 2804 v[0] = dst.f; 2805 v[1] = (box->x2 + tx) * sx; 2806 v[6] = v[2] = (box->y2 + ty) * sy; 2807 2808 dst.p.x = box->x1; 2809 v[4] = dst.f; 2810 v[9] = v[5] = (box->x1 + tx) * sx; 2811 2812 dst.p.y = box->y1; 2813 v[8] = dst.f; 2814 v[10] = (box->y1 + ty) * sy; 2815 2816 v[11] = v[7] = v[3] = opacity; 2817} 2818 2819avx2 fastcall static void 2820emit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op, 2821 const struct sna_opacity_box *b, int nbox, 2822 float *v) 2823{ 2824 do { 2825 union { 2826 struct sna_coordinate p; 2827 float f; 2828 } dst; 2829 2830 float sx = op->base.src.scale[0]; 2831 float sy = op->base.src.scale[1]; 2832 int16_t tx = op->base.src.offset[0]; 2833 int16_t ty = op->base.src.offset[1]; 2834 2835 dst.p.x = b->box.x2; 2836 dst.p.y = b->box.y2; 2837 v[0] = dst.f; 2838 v[1] = (b->box.x2 + tx) * sx; 2839 v[6] = v[2] = (b->box.y2 + ty) * sy; 2840 2841 dst.p.x = b->box.x1; 2842 v[4] = dst.f; 2843 v[9] = v[5] = (b->box.x1 + tx) * sx; 2844 2845 dst.p.y = b->box.y1; 2846 v[8] = dst.f; 2847 v[10] = (b->box.y1 + ty) * sy; 2848 2849 v[11] = v[7] = v[3] = b->alpha; 2850 2851 v += 12; 2852 b++; 2853 } while (--nbox); 2854} 2855 2856avx2 fastcall static void 2857emit_span_simple__avx2(struct sna *sna, 2858 const struct sna_composite_spans_op *op, 2859 const BoxRec *box, 2860 float opacity) 2861{ 2862 float *v; 2863 union { 2864 struct sna_coordinate p; 2865 float f; 2866 } dst; 2867 2868 float xx = op->base.src.transform->matrix[0][0]; 2869 float x0 = op->base.src.transform->matrix[0][2]; 2870 float yy = op->base.src.transform->matrix[1][1]; 2871 float y0 = op->base.src.transform->matrix[1][2]; 2872 float sx = op->base.src.scale[0]; 2873 float sy = op->base.src.scale[1]; 2874 int16_t tx = op->base.src.offset[0]; 2875 int16_t ty = op->base.src.offset[1]; 2876 2877 assert(op->base.floats_per_rect == 12); 2878 assert((sna->render.vertex_used % 4) == 0); 2879 v = sna->render.vertices + sna->render.vertex_used; 2880 sna->render.vertex_used += 3*4; 2881 assert(sna->render.vertex_used <= sna->render.vertex_size); 2882 2883 dst.p.x = box->x2; 2884 dst.p.y = box->y2; 2885 v[0] = dst.f; 2886 v[1] = ((box->x2 + tx) * xx + x0) * sx; 2887 v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2888 2889 dst.p.x = box->x1; 2890 v[4] = dst.f; 2891 v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2892 2893 dst.p.y = box->y1; 2894 v[8] = dst.f; 2895 v[10] = ((box->y1 + ty) * yy + y0) * sy; 2896 2897 v[11] = v[7] = v[3] = opacity; 2898} 2899 2900avx2 fastcall static void 2901emit_span_boxes_simple__avx2(const struct sna_composite_spans_op *op, 2902 const struct sna_opacity_box *b, int nbox, 2903 float *v) 2904{ 2905 float xx = op->base.src.transform->matrix[0][0]; 2906 float x0 = op->base.src.transform->matrix[0][2]; 2907 float yy = op->base.src.transform->matrix[1][1]; 2908 float y0 = op->base.src.transform->matrix[1][2]; 2909 float sx = op->base.src.scale[0]; 2910 float sy = op->base.src.scale[1]; 2911 int16_t tx = op->base.src.offset[0]; 2912 int16_t ty = op->base.src.offset[1]; 2913 2914 do { 2915 union { 2916 struct sna_coordinate p; 2917 float f; 2918 } dst; 2919 2920 dst.p.x = b->box.x2; 2921 dst.p.y = b->box.y2; 2922 v[0] = dst.f; 2923 v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2924 v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2925 2926 dst.p.x = b->box.x1; 2927 v[4] = dst.f; 2928 v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2929 2930 dst.p.y = b->box.y1; 2931 v[8] = dst.f; 2932 v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2933 2934 v[11] = v[7] = v[3] = b->alpha; 2935 2936 v += 12; 2937 b++; 2938 } while (--nbox); 2939} 2940 2941avx2 fastcall static void 2942emit_span_affine__avx2(struct sna *sna, 2943 const struct sna_composite_spans_op *op, 2944 const BoxRec *box, 2945 float opacity) 2946{ 2947 union { 2948 struct sna_coordinate p; 2949 float f; 2950 } dst; 2951 float *v; 2952 2953 assert(op->base.floats_per_rect == 12); 2954 assert((sna->render.vertex_used % 4) == 0); 2955 v = sna->render.vertices + sna->render.vertex_used; 2956 sna->render.vertex_used += 12; 2957 2958 dst.p.x = box->x2; 2959 dst.p.y = box->y2; 2960 v[0] = dst.f; 2961 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2962 op->base.src.offset[1] + box->y2, 2963 op->base.src.transform, 2964 op->base.src.scale, 2965 &v[1], &v[2]); 2966 2967 dst.p.x = box->x1; 2968 v[4] = dst.f; 2969 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2970 op->base.src.offset[1] + box->y2, 2971 op->base.src.transform, 2972 op->base.src.scale, 2973 &v[5], &v[6]); 2974 2975 dst.p.y = box->y1; 2976 v[8] = dst.f; 2977 _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2978 op->base.src.offset[1] + box->y1, 2979 op->base.src.transform, 2980 op->base.src.scale, 2981 &v[9], &v[10]); 2982 2983 v[11] = v[7] = v[3] = opacity; 2984} 2985 2986avx2 fastcall static void 2987emit_span_boxes_affine__avx2(const struct sna_composite_spans_op *op, 2988 const struct sna_opacity_box *b, int nbox, 2989 float *v) 2990{ 2991 do { 2992 union { 2993 struct sna_coordinate p; 2994 float f; 2995 } dst; 2996 2997 dst.p.x = b->box.x2; 2998 dst.p.y = b->box.y2; 2999 v[0] = dst.f; 3000 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 3001 op->base.src.offset[1] + b->box.y2, 3002 op->base.src.transform, 3003 op->base.src.scale, 3004 &v[1], &v[2]); 3005 3006 dst.p.x = b->box.x1; 3007 v[4] = dst.f; 3008 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 3009 op->base.src.offset[1] + b->box.y2, 3010 op->base.src.transform, 3011 op->base.src.scale, 3012 &v[5], &v[6]); 3013 3014 dst.p.y = b->box.y1; 3015 v[8] = dst.f; 3016 _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 3017 op->base.src.offset[1] + b->box.y1, 3018 op->base.src.transform, 3019 op->base.src.scale, 3020 &v[9], &v[10]); 3021 3022 v[11] = v[7] = v[3] = b->alpha; 3023 3024 v += 12; 3025 b++; 3026 } while (--nbox); 3027} 3028 3029avx2 fastcall static void 3030emit_span_linear__avx2(struct sna *sna, 3031 const struct sna_composite_spans_op *op, 3032 const BoxRec *box, 3033 float opacity) 3034{ 3035 union { 3036 struct sna_coordinate p; 3037 float f; 3038 } dst; 3039 float *v; 3040 3041 assert(op->base.floats_per_rect == 9); 3042 assert((sna->render.vertex_used % 3) == 0); 3043 v = sna->render.vertices + sna->render.vertex_used; 3044 sna->render.vertex_used += 9; 3045 3046 dst.p.x = box->x2; 3047 dst.p.y = box->y2; 3048 v[0] = dst.f; 3049 dst.p.x = box->x1; 3050 v[3] = dst.f; 3051 dst.p.y = box->y1; 3052 v[6] = dst.f; 3053 3054 v[1] = compute_linear(&op->base.src, box->x2, box->y2); 3055 v[4] = compute_linear(&op->base.src, box->x1, box->y2); 3056 v[7] = compute_linear(&op->base.src, box->x1, box->y1); 3057 3058 v[8] = v[5] = v[2] = opacity; 3059} 3060 3061avx2 fastcall static void 3062emit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op, 3063 const struct sna_opacity_box *b, int nbox, 3064 float *v) 3065{ 3066 do { 3067 union { 3068 struct sna_coordinate p; 3069 float f; 3070 } dst; 3071 3072 dst.p.x = b->box.x2; 3073 dst.p.y = b->box.y2; 3074 v[0] = dst.f; 3075 dst.p.x = b->box.x1; 3076 v[3] = dst.f; 3077 dst.p.y = b->box.y1; 3078 v[6] = dst.f; 3079 3080 v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 3081 v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 3082 v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 3083 3084 v[8] = v[5] = v[2] = b->alpha; 3085 3086 v += 9; 3087 b++; 3088 } while (--nbox); 3089} 3090#endif 3091 3092unsigned gen4_choose_spans_emitter(struct sna *sna, 3093 struct sna_composite_spans_op *tmp) 3094{ 3095 unsigned vb; 3096 3097 if (tmp->base.src.is_solid) { 3098 DBG(("%s: solid source\n", __FUNCTION__)); 3099 tmp->prim_emit = emit_span_solid; 3100 tmp->emit_boxes = emit_span_boxes_solid; 3101 tmp->base.floats_per_vertex = 3; 3102 vb = 1 << 2 | 1; 3103 } else if (tmp->base.src.is_linear) { 3104 DBG(("%s: linear source\n", __FUNCTION__)); 3105#if defined(avx2) 3106 if (sna->cpu_features & AVX2) { 3107 tmp->prim_emit = emit_span_linear__avx2; 3108 tmp->emit_boxes = emit_span_boxes_linear__avx2; 3109 } else 3110#endif 3111#if defined(sse4_2) 3112 if (sna->cpu_features & SSE4_2) { 3113 tmp->prim_emit = emit_span_linear__sse4_2; 3114 tmp->emit_boxes = emit_span_boxes_linear__sse4_2; 3115 } else 3116#endif 3117 { 3118 tmp->prim_emit = emit_span_linear; 3119 tmp->emit_boxes = emit_span_boxes_linear; 3120 } 3121 tmp->base.floats_per_vertex = 3; 3122 vb = 1 << 2 | 1; 3123 } else if (tmp->base.src.transform == NULL) { 3124 DBG(("%s: identity transform\n", __FUNCTION__)); 3125#if defined(avx2) 3126 if (sna->cpu_features & AVX2) { 3127 tmp->prim_emit = emit_span_identity__avx2; 3128 tmp->emit_boxes = emit_span_boxes_identity__avx2; 3129 } else 3130#endif 3131#if defined(sse4_2) 3132 if (sna->cpu_features & SSE4_2) { 3133 tmp->prim_emit = emit_span_identity__sse4_2; 3134 tmp->emit_boxes = emit_span_boxes_identity__sse4_2; 3135 } else 3136#endif 3137 { 3138 tmp->prim_emit = emit_span_identity; 3139 tmp->emit_boxes = emit_span_boxes_identity; 3140 } 3141 tmp->base.floats_per_vertex = 4; 3142 vb = 1 << 2 | 2; 3143 } else if (tmp->base.is_affine) { 3144 tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 3145 tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 3146 if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) { 3147 DBG(("%s: simple (unrotated affine) transform\n", __FUNCTION__)); 3148#if defined(avx2) 3149 if (sna->cpu_features & AVX2) { 3150 tmp->prim_emit = emit_span_simple__avx2; 3151 tmp->emit_boxes = emit_span_boxes_simple__avx2; 3152 } else 3153#endif 3154#if defined(sse4_2) 3155 if (sna->cpu_features & SSE4_2) { 3156 tmp->prim_emit = emit_span_simple__sse4_2; 3157 tmp->emit_boxes = emit_span_boxes_simple__sse4_2; 3158 } else 3159#endif 3160 { 3161 tmp->prim_emit = emit_span_simple; 3162 tmp->emit_boxes = emit_span_boxes_simple; 3163 } 3164 } else { 3165 DBG(("%s: affine transform\n", __FUNCTION__)); 3166#if defined(avx2) 3167 if (sna->cpu_features & AVX2) { 3168 tmp->prim_emit = emit_span_affine__avx2; 3169 tmp->emit_boxes = emit_span_boxes_affine__avx2; 3170 } else 3171#endif 3172#if defined(sse4_2) 3173 if (sna->cpu_features & SSE4_2) { 3174 tmp->prim_emit = emit_span_affine__sse4_2; 3175 tmp->emit_boxes = emit_span_boxes_affine__sse4_2; 3176 } else 3177#endif 3178 { 3179 tmp->prim_emit = emit_span_affine; 3180 tmp->emit_boxes = emit_span_boxes_affine; 3181 } 3182 } 3183 tmp->base.floats_per_vertex = 4; 3184 vb = 1 << 2 | 2; 3185 } else { 3186 DBG(("%s: projective transform\n", __FUNCTION__)); 3187 tmp->prim_emit = emit_span_primitive; 3188 tmp->emit_boxes = emit_span_boxes; 3189 tmp->base.floats_per_vertex = 5; 3190 vb = 1 << 2 | 3; 3191 } 3192 tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; 3193 return vb; 3194} 3195