1/* 2 * Based on code from intel_uxa.c and i830_xaa.c 3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. 4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> 5 * Copyright (c) 2009-2011 Intel Corporation 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 * 26 * Authors: 27 * Chris Wilson <chris@chris-wilson.co.uk> 28 * 29 */ 30 31#ifdef HAVE_CONFIG_H 32#include "config.h" 33#endif 34 35#include "sna.h" 36#include "sna_render.h" 37#include "sna_render_inline.h" 38#include "sna_reg.h" 39#include "rop.h" 40 41#define NO_BLT_COMPOSITE 0 42#define NO_BLT_COPY 0 43#define NO_BLT_COPY_BOXES 0 44#define NO_BLT_FILL 0 45#define NO_BLT_FILL_BOXES 0 46 47#ifndef PICT_TYPE_BGRA 48#define PICT_TYPE_BGRA 8 49#endif 50 51static const uint8_t copy_ROP[] = { 52 ROP_0, /* GXclear */ 53 ROP_DSa, /* GXand */ 54 ROP_SDna, /* GXandReverse */ 55 ROP_S, /* GXcopy */ 56 ROP_DSna, /* GXandInverted */ 57 ROP_D, /* GXnoop */ 58 ROP_DSx, /* GXxor */ 59 ROP_DSo, /* GXor */ 60 ROP_DSon, /* GXnor */ 61 ROP_DSxn, /* GXequiv */ 62 ROP_Dn, /* GXinvert */ 63 ROP_SDno, /* GXorReverse */ 64 ROP_Sn, /* GXcopyInverted */ 65 ROP_DSno, /* GXorInverted */ 66 ROP_DSan, /* GXnand */ 67 ROP_1 /* GXset */ 68}; 69 70static const uint8_t fill_ROP[] = { 71 ROP_0, 72 ROP_DPa, 73 ROP_PDna, 74 ROP_P, 75 ROP_DPna, 76 ROP_D, 77 ROP_DPx, 78 ROP_DPo, 79 ROP_DPon, 80 ROP_PDxn, 81 ROP_Dn, 82 ROP_PDno, 83 ROP_Pn, 84 ROP_DPno, 85 ROP_DPan, 86 ROP_1 87}; 88 89static void sig_done(struct sna *sna, const struct sna_composite_op *op) 90{ 91 sigtrap_put(); 92} 93 94static void nop_done(struct sna *sna, const struct sna_composite_op *op) 95{ 96 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 97 if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) { 98 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 99 _kgem_submit(&sna->kgem); 100 } 101 (void)op; 102} 103 104static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op) 105{ 106 struct kgem *kgem = &sna->kgem; 107 108 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 109 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 110 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 111 _kgem_submit(kgem); 112 return; 113 } 114 115 if (kgem_check_batch(kgem, 3)) { 116 uint32_t *b = kgem->batch + kgem->nbatch; 117 assert(sna->kgem.mode == KGEM_BLT); 118 b[0] = XY_SETUP_CLIP; 119 b[1] = b[2] = 0; 120 kgem->nbatch += 3; 121 assert(kgem->nbatch < kgem->surface); 122 } 123 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 124 (void)op; 125} 126 127static bool sna_blt_fill_init(struct sna *sna, 128 struct sna_blt_state *blt, 129 struct kgem_bo *bo, 130 int bpp, 131 uint8_t alu, 132 uint32_t pixel) 133{ 134 struct kgem *kgem = &sna->kgem; 135 136 assert(kgem_bo_can_blt (kgem, bo)); 137 blt->bo[0] = bo; 138 139 blt->br13 = bo->pitch; 140 blt->cmd = XY_SCANLINE_BLT; 141 if (kgem->gen >= 040 && bo->tiling) { 142 blt->cmd |= BLT_DST_TILED; 143 blt->br13 >>= 2; 144 } 145 assert(blt->br13 <= MAXSHORT); 146 147 if (alu == GXclear) 148 pixel = 0; 149 else if (alu == GXcopy) { 150 if (pixel == 0) 151 alu = GXclear; 152 else if (pixel == -1) 153 alu = GXset; 154 } 155 156 blt->br13 |= 1<<31 | (fill_ROP[alu] << 16); 157 switch (bpp) { 158 default: assert(0); 159 case 32: blt->br13 |= 1 << 25; /* RGB8888 */ 160 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 161 case 8: break; 162 } 163 164 blt->pixel = pixel; 165 blt->bpp = bpp; 166 blt->alu = alu; 167 168 kgem_set_mode(kgem, KGEM_BLT, bo); 169 if (!kgem_check_batch(kgem, 14) || 170 !kgem_check_bo_fenced(kgem, bo)) { 171 kgem_submit(kgem); 172 if (!kgem_check_bo_fenced(kgem, bo)) 173 return false; 174 _kgem_set_mode(kgem, KGEM_BLT); 175 } 176 177 if (sna->blt_state.fill_bo != bo->unique_id || 178 sna->blt_state.fill_pixel != pixel || 179 sna->blt_state.fill_alu != alu) 180 { 181 uint32_t *b; 182 183 if (!kgem_check_batch(kgem, 24) || 184 !kgem_check_reloc(kgem, 1)) { 185 _kgem_submit(kgem); 186 if (!kgem_check_bo_fenced(kgem, bo)) 187 return false; 188 _kgem_set_mode(kgem, KGEM_BLT); 189 } 190 kgem_bcs_set_tiling(kgem, NULL, bo); 191 192 assert(sna->kgem.mode == KGEM_BLT); 193 b = kgem->batch + kgem->nbatch; 194 if (sna->kgem.gen >= 0100) { 195 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 196 if (bpp == 32) 197 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 198 if (bo->tiling) 199 b[0] |= BLT_DST_TILED; 200 b[1] = blt->br13; 201 b[2] = 0; 202 b[3] = 0; 203 *(uint64_t *)(b+4) = 204 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 205 I915_GEM_DOMAIN_RENDER << 16 | 206 I915_GEM_DOMAIN_RENDER | 207 KGEM_RELOC_FENCED, 208 0); 209 b[6] = pixel; 210 b[7] = pixel; 211 b[8] = 0; 212 b[9] = 0; 213 kgem->nbatch += 10; 214 } else { 215 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 216 if (bpp == 32) 217 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 218 if (bo->tiling && kgem->gen >= 040) 219 b[0] |= BLT_DST_TILED; 220 b[1] = blt->br13; 221 b[2] = 0; 222 b[3] = 0; 223 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 224 I915_GEM_DOMAIN_RENDER << 16 | 225 I915_GEM_DOMAIN_RENDER | 226 KGEM_RELOC_FENCED, 227 0); 228 b[5] = pixel; 229 b[6] = pixel; 230 b[7] = 0; 231 b[8] = 0; 232 kgem->nbatch += 9; 233 } 234 assert(kgem->nbatch < kgem->surface); 235 236 sna->blt_state.fill_bo = bo->unique_id; 237 sna->blt_state.fill_pixel = pixel; 238 sna->blt_state.fill_alu = alu; 239 } 240 241 assert(sna->kgem.mode == KGEM_BLT); 242 return true; 243} 244 245noinline static void __sna_blt_fill_begin(struct sna *sna, 246 const struct sna_blt_state *blt) 247{ 248 struct kgem *kgem = &sna->kgem; 249 uint32_t *b; 250 251 kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]); 252 253 assert(kgem->mode == KGEM_BLT); 254 b = kgem->batch + kgem->nbatch; 255 if (sna->kgem.gen >= 0100) { 256 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 257 if (blt->bpp == 32) 258 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 259 if (blt->bo[0]->tiling) 260 b[0] |= BLT_DST_TILED; 261 b[1] = blt->br13; 262 b[2] = 0; 263 b[3] = 0; 264 *(uint64_t *)(b+4) = 265 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0], 266 I915_GEM_DOMAIN_RENDER << 16 | 267 I915_GEM_DOMAIN_RENDER | 268 KGEM_RELOC_FENCED, 269 0); 270 b[6] = blt->pixel; 271 b[7] = blt->pixel; 272 b[8] = 0; 273 b[9] = 0; 274 kgem->nbatch += 10; 275 } else { 276 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 277 if (blt->bpp == 32) 278 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 279 if (blt->bo[0]->tiling && kgem->gen >= 040) 280 b[0] |= BLT_DST_TILED; 281 b[1] = blt->br13; 282 b[2] = 0; 283 b[3] = 0; 284 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], 285 I915_GEM_DOMAIN_RENDER << 16 | 286 I915_GEM_DOMAIN_RENDER | 287 KGEM_RELOC_FENCED, 288 0); 289 b[5] = blt->pixel; 290 b[6] = blt->pixel; 291 b[7] = 0; 292 b[8] = 0; 293 kgem->nbatch += 9; 294 } 295} 296 297inline static void sna_blt_fill_begin(struct sna *sna, 298 const struct sna_blt_state *blt) 299{ 300 struct kgem *kgem = &sna->kgem; 301 302 if (kgem->nreloc) { 303 _kgem_submit(kgem); 304 _kgem_set_mode(kgem, KGEM_BLT); 305 kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]); 306 assert(kgem->nbatch == 0); 307 } 308 309 __sna_blt_fill_begin(sna, blt); 310} 311 312inline static void sna_blt_fill_one(struct sna *sna, 313 const struct sna_blt_state *blt, 314 int16_t x, int16_t y, 315 int16_t width, int16_t height) 316{ 317 struct kgem *kgem = &sna->kgem; 318 uint32_t *b; 319 320 DBG(("%s: (%d, %d) x (%d, %d): %08x\n", 321 __FUNCTION__, x, y, width, height, blt->pixel)); 322 323 assert(x >= 0); 324 assert(y >= 0); 325 assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 326 327 if (!kgem_check_batch(kgem, 3)) 328 sna_blt_fill_begin(sna, blt); 329 330 assert(sna->kgem.mode == KGEM_BLT); 331 b = kgem->batch + kgem->nbatch; 332 kgem->nbatch += 3; 333 assert(kgem->nbatch < kgem->surface); 334 335 b[0] = blt->cmd; 336 b[1] = y << 16 | x; 337 b[2] = b[1] + (height << 16 | width); 338} 339 340static bool sna_blt_copy_init(struct sna *sna, 341 struct sna_blt_state *blt, 342 struct kgem_bo *src, 343 struct kgem_bo *dst, 344 int bpp, 345 uint8_t alu) 346{ 347 struct kgem *kgem = &sna->kgem; 348 349 assert(kgem_bo_can_blt(kgem, src)); 350 assert(kgem_bo_can_blt(kgem, dst)); 351 352 blt->bo[0] = src; 353 blt->bo[1] = dst; 354 355 blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6); 356 if (bpp == 32) 357 blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 358 359 blt->pitch[0] = src->pitch; 360 if (kgem->gen >= 040 && src->tiling) { 361 blt->cmd |= BLT_SRC_TILED; 362 blt->pitch[0] >>= 2; 363 } 364 assert(blt->pitch[0] <= MAXSHORT); 365 366 blt->pitch[1] = dst->pitch; 367 if (kgem->gen >= 040 && dst->tiling) { 368 blt->cmd |= BLT_DST_TILED; 369 blt->pitch[1] >>= 2; 370 } 371 assert(blt->pitch[1] <= MAXSHORT); 372 373 blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset; 374 blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1]; 375 switch (bpp) { 376 default: assert(0); 377 case 32: blt->br13 |= 1 << 25; /* RGB8888 */ 378 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 379 case 8: break; 380 } 381 382 kgem_set_mode(kgem, KGEM_BLT, dst); 383 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 384 kgem_submit(kgem); 385 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) 386 return false; 387 _kgem_set_mode(kgem, KGEM_BLT); 388 } 389 kgem_bcs_set_tiling(&sna->kgem, src, dst); 390 391 sna->blt_state.fill_bo = 0; 392 return true; 393} 394 395static bool sna_blt_alpha_fixup_init(struct sna *sna, 396 struct sna_blt_state *blt, 397 struct kgem_bo *src, 398 struct kgem_bo *dst, 399 int bpp, uint32_t alpha) 400{ 401 struct kgem *kgem = &sna->kgem; 402 403 DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n", 404 __FUNCTION__, dst->handle, src->handle, bpp, alpha)); 405 assert(kgem_bo_can_blt(kgem, src)); 406 assert(kgem_bo_can_blt(kgem, dst)); 407 408 blt->bo[0] = src; 409 blt->bo[1] = dst; 410 411 blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); 412 blt->pitch[0] = src->pitch; 413 if (kgem->gen >= 040 && src->tiling) { 414 blt->cmd |= BLT_SRC_TILED; 415 blt->pitch[0] >>= 2; 416 } 417 assert(blt->pitch[0] <= MAXSHORT); 418 419 blt->pitch[1] = dst->pitch; 420 if (kgem->gen >= 040 && dst->tiling) { 421 blt->cmd |= BLT_DST_TILED; 422 blt->pitch[1] >>= 2; 423 } 424 assert(blt->pitch[1] <= MAXSHORT); 425 426 blt->overwrites = 1; 427 blt->br13 = (0xfc << 16) | blt->pitch[1]; 428 switch (bpp) { 429 default: assert(0); 430 case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 431 blt->br13 |= 1 << 25; /* RGB8888 */ 432 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 433 case 8: break; 434 } 435 blt->pixel = alpha; 436 437 kgem_set_mode(kgem, KGEM_BLT, dst); 438 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 439 kgem_submit(kgem); 440 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) 441 return false; 442 _kgem_set_mode(kgem, KGEM_BLT); 443 } 444 kgem_bcs_set_tiling(&sna->kgem, src, dst); 445 446 sna->blt_state.fill_bo = 0; 447 return true; 448} 449 450static void sna_blt_alpha_fixup_one(struct sna *sna, 451 const struct sna_blt_state *blt, 452 int src_x, int src_y, 453 int width, int height, 454 int dst_x, int dst_y) 455{ 456 struct kgem *kgem = &sna->kgem; 457 uint32_t *b; 458 459 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", 460 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); 461 462 assert(src_x >= 0); 463 assert(src_y >= 0); 464 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 465 assert(dst_x >= 0); 466 assert(dst_y >= 0); 467 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); 468 assert(width > 0); 469 assert(height > 0); 470 471 if (!kgem_check_batch(kgem, 14) || 472 !kgem_check_reloc(kgem, 2)) { 473 _kgem_submit(kgem); 474 _kgem_set_mode(kgem, KGEM_BLT); 475 kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); 476 } 477 478 assert(sna->kgem.mode == KGEM_BLT); 479 b = kgem->batch + kgem->nbatch; 480 b[0] = blt->cmd; 481 b[1] = blt->br13; 482 b[2] = (dst_y << 16) | dst_x; 483 b[3] = ((dst_y + height) << 16) | (dst_x + width); 484 if (sna->kgem.gen >= 0100) { 485 *(uint64_t *)(b+4) = 486 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], 487 I915_GEM_DOMAIN_RENDER << 16 | 488 I915_GEM_DOMAIN_RENDER | 489 KGEM_RELOC_FENCED, 490 0); 491 b[6] = blt->pitch[0]; 492 b[7] = (src_y << 16) | src_x; 493 *(uint64_t *)(b+8) = 494 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], 495 I915_GEM_DOMAIN_RENDER << 16 | 496 KGEM_RELOC_FENCED, 497 0); 498 b[10] = blt->pixel; 499 b[11] = blt->pixel; 500 b[12] = 0; 501 b[13] = 0; 502 kgem->nbatch += 14; 503 } else { 504 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], 505 I915_GEM_DOMAIN_RENDER << 16 | 506 I915_GEM_DOMAIN_RENDER | 507 KGEM_RELOC_FENCED, 508 0); 509 b[5] = blt->pitch[0]; 510 b[6] = (src_y << 16) | src_x; 511 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], 512 I915_GEM_DOMAIN_RENDER << 16 | 513 KGEM_RELOC_FENCED, 514 0); 515 b[8] = blt->pixel; 516 b[9] = blt->pixel; 517 b[10] = 0; 518 b[11] = 0; 519 kgem->nbatch += 12; 520 } 521 assert(kgem->nbatch < kgem->surface); 522} 523 524static void sna_blt_copy_one(struct sna *sna, 525 const struct sna_blt_state *blt, 526 int src_x, int src_y, 527 int width, int height, 528 int dst_x, int dst_y) 529{ 530 struct kgem *kgem = &sna->kgem; 531 uint32_t *b; 532 533 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", 534 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); 535 536 assert(src_x >= 0); 537 assert(src_y >= 0); 538 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 539 assert(dst_x >= 0); 540 assert(dst_y >= 0); 541 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); 542 assert(width > 0); 543 assert(height > 0); 544 545 /* Compare against a previous fill */ 546 if (blt->overwrites && 547 kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) { 548 if (sna->kgem.gen >= 0100) { 549 if (kgem->nbatch >= 7 && 550 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 551 kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && 552 kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { 553 DBG(("%s: replacing last fill\n", __FUNCTION__)); 554 if (kgem_check_batch(kgem, 3)) { 555 assert(kgem->mode == KGEM_BLT); 556 b = kgem->batch + kgem->nbatch - 7; 557 b[0] = blt->cmd; 558 b[1] = blt->br13; 559 b[6] = (src_y << 16) | src_x; 560 b[7] = blt->pitch[0]; 561 *(uint64_t *)(b+8) = 562 kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0], 563 I915_GEM_DOMAIN_RENDER << 16 | 564 KGEM_RELOC_FENCED, 565 0); 566 kgem->nbatch += 3; 567 assert(kgem->nbatch < kgem->surface); 568 return; 569 } 570 kgem->nbatch -= 7; 571 kgem->nreloc--; 572 } 573 } else { 574 if (kgem->nbatch >= 6 && 575 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 576 kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && 577 kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { 578 DBG(("%s: replacing last fill\n", __FUNCTION__)); 579 if (kgem_check_batch(kgem, 8-6)) { 580 assert(kgem->mode == KGEM_BLT); 581 b = kgem->batch + kgem->nbatch - 6; 582 b[0] = blt->cmd; 583 b[1] = blt->br13; 584 b[5] = (src_y << 16) | src_x; 585 b[6] = blt->pitch[0]; 586 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0], 587 I915_GEM_DOMAIN_RENDER << 16 | 588 KGEM_RELOC_FENCED, 589 0); 590 kgem->nbatch += 8 - 6; 591 assert(kgem->nbatch < kgem->surface); 592 return; 593 } 594 kgem->nbatch -= 6; 595 kgem->nreloc--; 596 } 597 } 598 } 599 600 if (!kgem_check_batch(kgem, 10) || 601 !kgem_check_reloc(kgem, 2)) { 602 _kgem_submit(kgem); 603 _kgem_set_mode(kgem, KGEM_BLT); 604 kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); 605 } 606 607 assert(sna->kgem.mode == KGEM_BLT); 608 b = kgem->batch + kgem->nbatch; 609 b[0] = blt->cmd; 610 b[1] = blt->br13; 611 b[2] = (dst_y << 16) | dst_x; 612 b[3] = ((dst_y + height) << 16) | (dst_x + width); 613 if (kgem->gen >= 0100) { 614 *(uint64_t *)(b+4) = 615 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], 616 I915_GEM_DOMAIN_RENDER << 16 | 617 I915_GEM_DOMAIN_RENDER | 618 KGEM_RELOC_FENCED, 619 0); 620 b[6] = (src_y << 16) | src_x; 621 b[7] = blt->pitch[0]; 622 *(uint64_t *)(b+8) = 623 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], 624 I915_GEM_DOMAIN_RENDER << 16 | 625 KGEM_RELOC_FENCED, 626 0); 627 kgem->nbatch += 10; 628 } else { 629 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], 630 I915_GEM_DOMAIN_RENDER << 16 | 631 I915_GEM_DOMAIN_RENDER | 632 KGEM_RELOC_FENCED, 633 0); 634 b[5] = (src_y << 16) | src_x; 635 b[6] = blt->pitch[0]; 636 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], 637 I915_GEM_DOMAIN_RENDER << 16 | 638 KGEM_RELOC_FENCED, 639 0); 640 kgem->nbatch += 8; 641 } 642 assert(kgem->nbatch < kgem->surface); 643} 644 645bool 646sna_get_rgba_from_pixel(uint32_t pixel, 647 uint16_t *red, 648 uint16_t *green, 649 uint16_t *blue, 650 uint16_t *alpha, 651 uint32_t format) 652{ 653 int rbits, bbits, gbits, abits; 654 int rshift, bshift, gshift, ashift; 655 656 rbits = PICT_FORMAT_R(format); 657 gbits = PICT_FORMAT_G(format); 658 bbits = PICT_FORMAT_B(format); 659 abits = PICT_FORMAT_A(format); 660 661 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { 662 rshift = gshift = bshift = ashift = 0; 663 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { 664 bshift = 0; 665 gshift = bbits; 666 rshift = gshift + gbits; 667 ashift = rshift + rbits; 668 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 669 rshift = 0; 670 gshift = rbits; 671 bshift = gshift + gbits; 672 ashift = bshift + bbits; 673 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 674 ashift = 0; 675 rshift = abits; 676 if (abits == 0) 677 rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); 678 gshift = rshift + rbits; 679 bshift = gshift + gbits; 680 } else { 681 return false; 682 } 683 684 if (rbits) { 685 *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits); 686 while (rbits < 16) { 687 *red |= *red >> rbits; 688 rbits <<= 1; 689 } 690 } else 691 *red = 0; 692 693 if (gbits) { 694 *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits); 695 while (gbits < 16) { 696 *green |= *green >> gbits; 697 gbits <<= 1; 698 } 699 } else 700 *green = 0; 701 702 if (bbits) { 703 *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits); 704 while (bbits < 16) { 705 *blue |= *blue >> bbits; 706 bbits <<= 1; 707 } 708 } else 709 *blue = 0; 710 711 if (abits) { 712 *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits); 713 while (abits < 16) { 714 *alpha |= *alpha >> abits; 715 abits <<= 1; 716 } 717 } else 718 *alpha = 0xffff; 719 720 return true; 721} 722 723bool 724_sna_get_pixel_from_rgba(uint32_t * pixel, 725 uint16_t red, 726 uint16_t green, 727 uint16_t blue, 728 uint16_t alpha, 729 uint32_t format) 730{ 731 int rbits, bbits, gbits, abits; 732 int rshift, bshift, gshift, ashift; 733 734 rbits = PICT_FORMAT_R(format); 735 gbits = PICT_FORMAT_G(format); 736 bbits = PICT_FORMAT_B(format); 737 abits = PICT_FORMAT_A(format); 738 if (abits == 0) 739 abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); 740 741 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { 742 *pixel = alpha >> (16 - abits); 743 return true; 744 } 745 746 if (!PICT_FORMAT_COLOR(format)) 747 return false; 748 749 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { 750 bshift = 0; 751 gshift = bbits; 752 rshift = gshift + gbits; 753 ashift = rshift + rbits; 754 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 755 rshift = 0; 756 gshift = rbits; 757 bshift = gshift + gbits; 758 ashift = bshift + bbits; 759 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 760 ashift = 0; 761 rshift = abits; 762 gshift = rshift + rbits; 763 bshift = gshift + gbits; 764 } else 765 return false; 766 767 *pixel = 0; 768 *pixel |= (blue >> (16 - bbits)) << bshift; 769 *pixel |= (green >> (16 - gbits)) << gshift; 770 *pixel |= (red >> (16 - rbits)) << rshift; 771 *pixel |= (alpha >> (16 - abits)) << ashift; 772 773 return true; 774} 775 776uint32_t 777sna_rgba_for_color(uint32_t color, int depth) 778{ 779 return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8); 780} 781 782uint32_t 783sna_rgba_to_color(uint32_t rgba, uint32_t format) 784{ 785 return color_convert(rgba, PICT_a8r8g8b8, format); 786} 787 788static uint32_t 789get_pixel(PicturePtr picture) 790{ 791 PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable); 792 793 DBG(("%s: %p\n", __FUNCTION__, pixmap)); 794 795 if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ)) 796 return 0; 797 798 switch (pixmap->drawable.bitsPerPixel) { 799 case 32: return *(uint32_t *)pixmap->devPrivate.ptr; 800 case 16: return *(uint16_t *)pixmap->devPrivate.ptr; 801 default: return *(uint8_t *)pixmap->devPrivate.ptr; 802 } 803} 804 805static uint32_t 806get_solid_color(PicturePtr picture, uint32_t format) 807{ 808 if (picture->pSourcePict) { 809 PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict; 810 return color_convert(fill->color, PICT_a8r8g8b8, format); 811 } else 812 return color_convert(get_pixel(picture), picture->format, format); 813} 814 815static bool 816is_solid(PicturePtr picture) 817{ 818 if (picture->pSourcePict) { 819 if (picture->pSourcePict->type == SourcePictTypeSolidFill) 820 return true; 821 } 822 823 if (picture->pDrawable) { 824 if (picture->pDrawable->width == 1 && 825 picture->pDrawable->height == 1 && 826 picture->repeat) 827 return true; 828 } 829 830 return false; 831} 832 833bool 834sna_picture_is_solid(PicturePtr picture, uint32_t *color) 835{ 836 if (!is_solid(picture)) 837 return false; 838 839 if (color) 840 *color = get_solid_color(picture, PICT_a8r8g8b8); 841 return true; 842} 843 844static bool 845pixel_is_transparent(uint32_t pixel, uint32_t format) 846{ 847 unsigned int abits; 848 849 abits = PICT_FORMAT_A(format); 850 if (!abits) 851 return false; 852 853 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A || 854 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 855 return (pixel & ((1 << abits) - 1)) == 0; 856 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB || 857 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 858 unsigned int ashift = PICT_FORMAT_BPP(format) - abits; 859 return (pixel >> ashift) == 0; 860 } else 861 return false; 862} 863 864static bool 865pixel_is_opaque(uint32_t pixel, uint32_t format) 866{ 867 unsigned int abits; 868 869 abits = PICT_FORMAT_A(format); 870 if (!abits) 871 return true; 872 873 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A || 874 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 875 return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1); 876 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB || 877 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 878 unsigned int ashift = PICT_FORMAT_BPP(format) - abits; 879 return (pixel >> ashift) == (unsigned)((1 << abits) - 1); 880 } else 881 return false; 882} 883 884static bool 885pixel_is_white(uint32_t pixel, uint32_t format) 886{ 887 switch (PICT_FORMAT_TYPE(format)) { 888 case PICT_TYPE_A: 889 case PICT_TYPE_ARGB: 890 case PICT_TYPE_ABGR: 891 case PICT_TYPE_BGRA: 892 return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1); 893 default: 894 return false; 895 } 896} 897 898static bool 899is_opaque_solid(PicturePtr picture) 900{ 901 if (picture->pSourcePict) { 902 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 903 return (fill->color >> 24) == 0xff; 904 } else 905 return pixel_is_opaque(get_pixel(picture), picture->format); 906} 907 908static bool 909is_white(PicturePtr picture) 910{ 911 if (picture->pSourcePict) { 912 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 913 return fill->color == 0xffffffff; 914 } else 915 return pixel_is_white(get_pixel(picture), picture->format); 916} 917 918static bool 919is_transparent(PicturePtr picture) 920{ 921 if (picture->pSourcePict) { 922 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 923 return fill->color == 0; 924 } else 925 return pixel_is_transparent(get_pixel(picture), picture->format); 926} 927 928bool 929sna_composite_mask_is_opaque(PicturePtr mask) 930{ 931 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) 932 return is_solid(mask) && is_white(mask); 933 else if (!PICT_FORMAT_A(mask->format)) 934 return true; 935 else if (mask->pSourcePict) { 936 PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict; 937 return (fill->color >> 24) == 0xff; 938 } else { 939 struct sna_pixmap *priv; 940 assert(mask->pDrawable); 941 942 if (mask->pDrawable->width == 1 && 943 mask->pDrawable->height == 1 && 944 mask->repeat) 945 return pixel_is_opaque(get_pixel(mask), mask->format); 946 947 if (mask->transform) 948 return false; 949 950 priv = sna_pixmap_from_drawable(mask->pDrawable); 951 if (priv == NULL || !priv->clear) 952 return false; 953 954 return pixel_is_opaque(priv->clear_color, mask->format); 955 } 956} 957 958fastcall 959static void blt_composite_fill(struct sna *sna, 960 const struct sna_composite_op *op, 961 const struct sna_composite_rectangles *r) 962{ 963 int x1, x2, y1, y2; 964 965 x1 = r->dst.x + op->dst.x; 966 y1 = r->dst.y + op->dst.y; 967 x2 = x1 + r->width; 968 y2 = y1 + r->height; 969 970 if (x1 < 0) 971 x1 = 0; 972 if (y1 < 0) 973 y1 = 0; 974 975 if (x2 > op->dst.width) 976 x2 = op->dst.width; 977 if (y2 > op->dst.height) 978 y2 = op->dst.height; 979 980 if (x2 <= x1 || y2 <= y1) 981 return; 982 983 sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1); 984} 985 986fastcall 987static void blt_composite_fill__cpu(struct sna *sna, 988 const struct sna_composite_op *op, 989 const struct sna_composite_rectangles *r) 990{ 991 int x1, x2, y1, y2; 992 993 x1 = r->dst.x + op->dst.x; 994 y1 = r->dst.y + op->dst.y; 995 x2 = x1 + r->width; 996 y2 = y1 + r->height; 997 998 if (x1 < 0) 999 x1 = 0; 1000 if (y1 < 0) 1001 y1 = 0; 1002 1003 if (x2 > op->dst.width) 1004 x2 = op->dst.width; 1005 if (y2 > op->dst.height) 1006 y2 = op->dst.height; 1007 1008 if (x2 <= x1 || y2 <= y1) 1009 return; 1010 1011 assert(op->dst.pixmap->devPrivate.ptr); 1012 assert(op->dst.pixmap->devKind); 1013 sigtrap_assert_active(); 1014 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1015 op->dst.pixmap->devKind / sizeof(uint32_t), 1016 op->dst.pixmap->drawable.bitsPerPixel, 1017 x1, y1, x2-x1, y2-y1, 1018 op->u.blt.pixel); 1019} 1020 1021fastcall static void 1022blt_composite_fill_box_no_offset__cpu(struct sna *sna, 1023 const struct sna_composite_op *op, 1024 const BoxRec *box) 1025{ 1026 assert(box->x1 >= 0); 1027 assert(box->y1 >= 0); 1028 assert(box->x2 <= op->dst.pixmap->drawable.width); 1029 assert(box->y2 <= op->dst.pixmap->drawable.height); 1030 1031 assert(op->dst.pixmap->devPrivate.ptr); 1032 assert(op->dst.pixmap->devKind); 1033 sigtrap_assert_active(); 1034 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1035 op->dst.pixmap->devKind / sizeof(uint32_t), 1036 op->dst.pixmap->drawable.bitsPerPixel, 1037 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1, 1038 op->u.blt.pixel); 1039} 1040 1041static void 1042blt_composite_fill_boxes_no_offset__cpu(struct sna *sna, 1043 const struct sna_composite_op *op, 1044 const BoxRec *box, int n) 1045{ 1046 do { 1047 assert(box->x1 >= 0); 1048 assert(box->y1 >= 0); 1049 assert(box->x2 <= op->dst.pixmap->drawable.width); 1050 assert(box->y2 <= op->dst.pixmap->drawable.height); 1051 1052 assert(op->dst.pixmap->devPrivate.ptr); 1053 assert(op->dst.pixmap->devKind); 1054 sigtrap_assert_active(); 1055 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1056 op->dst.pixmap->devKind / sizeof(uint32_t), 1057 op->dst.pixmap->drawable.bitsPerPixel, 1058 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1, 1059 op->u.blt.pixel); 1060 box++; 1061 } while (--n); 1062} 1063 1064fastcall static void 1065blt_composite_fill_box__cpu(struct sna *sna, 1066 const struct sna_composite_op *op, 1067 const BoxRec *box) 1068{ 1069 assert(box->x1 + op->dst.x >= 0); 1070 assert(box->y1 + op->dst.y >= 0); 1071 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width); 1072 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height); 1073 1074 assert(op->dst.pixmap->devPrivate.ptr); 1075 assert(op->dst.pixmap->devKind); 1076 sigtrap_assert_active(); 1077 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1078 op->dst.pixmap->devKind / sizeof(uint32_t), 1079 op->dst.pixmap->drawable.bitsPerPixel, 1080 box->x1 + op->dst.x, box->y1 + op->dst.y, 1081 box->x2 - box->x1, box->y2 - box->y1, 1082 op->u.blt.pixel); 1083} 1084 1085static void 1086blt_composite_fill_boxes__cpu(struct sna *sna, 1087 const struct sna_composite_op *op, 1088 const BoxRec *box, int n) 1089{ 1090 do { 1091 assert(box->x1 + op->dst.x >= 0); 1092 assert(box->y1 + op->dst.y >= 0); 1093 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width); 1094 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height); 1095 1096 assert(op->dst.pixmap->devPrivate.ptr); 1097 assert(op->dst.pixmap->devKind); 1098 sigtrap_assert_active(); 1099 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1100 op->dst.pixmap->devKind / sizeof(uint32_t), 1101 op->dst.pixmap->drawable.bitsPerPixel, 1102 box->x1 + op->dst.x, box->y1 + op->dst.y, 1103 box->x2 - box->x1, box->y2 - box->y1, 1104 op->u.blt.pixel); 1105 box++; 1106 } while (--n); 1107} 1108 1109inline static void _sna_blt_fill_box(struct sna *sna, 1110 const struct sna_blt_state *blt, 1111 const BoxRec *box) 1112{ 1113 struct kgem *kgem = &sna->kgem; 1114 uint32_t *b; 1115 1116 DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__, 1117 box->x1, box->y1, box->x2, box->y2, 1118 blt->pixel)); 1119 1120 assert(box->x1 >= 0); 1121 assert(box->y1 >= 0); 1122 assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 1123 1124 if (!kgem_check_batch(kgem, 3)) 1125 sna_blt_fill_begin(sna, blt); 1126 1127 assert(sna->kgem.mode == KGEM_BLT); 1128 b = kgem->batch + kgem->nbatch; 1129 kgem->nbatch += 3; 1130 assert(kgem->nbatch < kgem->surface); 1131 1132 b[0] = blt->cmd; 1133 *(uint64_t *)(b+1) = *(const uint64_t *)box; 1134} 1135 1136inline static void _sna_blt_fill_boxes(struct sna *sna, 1137 const struct sna_blt_state *blt, 1138 const BoxRec *box, 1139 int nbox) 1140{ 1141 struct kgem *kgem = &sna->kgem; 1142 uint32_t cmd = blt->cmd; 1143 1144 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1145 1146 if (!kgem_check_batch(kgem, 3)) 1147 sna_blt_fill_begin(sna, blt); 1148 1149 do { 1150 uint32_t *b = kgem->batch + kgem->nbatch; 1151 int nbox_this_time, rem; 1152 1153 assert(sna->kgem.mode == KGEM_BLT); 1154 nbox_this_time = nbox; 1155 rem = kgem_batch_space(kgem); 1156 if (3*nbox_this_time > rem) 1157 nbox_this_time = rem / 3; 1158 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1159 __FUNCTION__, nbox_this_time, nbox, rem)); 1160 assert(nbox_this_time > 0); 1161 nbox -= nbox_this_time; 1162 1163 kgem->nbatch += 3 * nbox_this_time; 1164 assert(kgem->nbatch < kgem->surface); 1165 while (nbox_this_time >= 8) { 1166 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1167 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1168 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1169 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1170 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; 1171 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; 1172 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; 1173 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; 1174 b += 24; 1175 nbox_this_time -= 8; 1176 } 1177 if (nbox_this_time & 4) { 1178 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1179 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1180 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1181 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1182 b += 12; 1183 } 1184 if (nbox_this_time & 2) { 1185 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1186 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1187 b += 6; 1188 } 1189 if (nbox_this_time & 1) { 1190 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1191 } 1192 1193 if (!nbox) 1194 return; 1195 1196 sna_blt_fill_begin(sna, blt); 1197 } while (1); 1198} 1199 1200static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box) 1201{ 1202 if (box->x2 - box->x1 >= op->dst.width && 1203 box->y2 - box->y1 >= op->dst.height) { 1204 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1205 if (op->dst.bo == priv->gpu_bo) { 1206 sna_damage_all(&priv->gpu_damage, op->dst.pixmap); 1207 sna_damage_destroy(&priv->cpu_damage); 1208 priv->clear = true; 1209 priv->clear_color = op->u.blt.pixel; 1210 DBG(("%s: pixmap=%ld marking clear [%08x]\n", 1211 __FUNCTION__, 1212 op->dst.pixmap->drawable.serialNumber, 1213 op->u.blt.pixel)); 1214 ((struct sna_composite_op *)op)->damage = NULL; 1215 } 1216 } 1217} 1218 1219fastcall static void blt_composite_fill_box_no_offset(struct sna *sna, 1220 const struct sna_composite_op *op, 1221 const BoxRec *box) 1222{ 1223 _sna_blt_fill_box(sna, &op->u.blt, box); 1224 _sna_blt_maybe_clear(op, box); 1225} 1226 1227static void blt_composite_fill_boxes_no_offset(struct sna *sna, 1228 const struct sna_composite_op *op, 1229 const BoxRec *box, int n) 1230{ 1231 _sna_blt_fill_boxes(sna, &op->u.blt, box, n); 1232} 1233 1234static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna, 1235 const struct sna_composite_op *op, 1236 const BoxRec *box, int nbox) 1237{ 1238 struct kgem *kgem = &sna->kgem; 1239 const struct sna_blt_state *blt = &op->u.blt; 1240 uint32_t cmd = blt->cmd; 1241 1242 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1243 1244 sna_vertex_lock(&sna->render); 1245 assert(kgem->mode == KGEM_BLT); 1246 if (!kgem_check_batch(kgem, 3)) { 1247 sna_vertex_wait__locked(&sna->render); 1248 sna_blt_fill_begin(sna, blt); 1249 } 1250 1251 do { 1252 uint32_t *b = kgem->batch + kgem->nbatch; 1253 int nbox_this_time, rem; 1254 1255 assert(sna->kgem.mode == KGEM_BLT); 1256 nbox_this_time = nbox; 1257 rem = kgem_batch_space(kgem); 1258 if (3*nbox_this_time > rem) 1259 nbox_this_time = rem / 3; 1260 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1261 __FUNCTION__, nbox_this_time, nbox, rem)); 1262 assert(nbox_this_time > 0); 1263 nbox -= nbox_this_time; 1264 1265 kgem->nbatch += 3 * nbox_this_time; 1266 assert(kgem->nbatch < kgem->surface); 1267 sna_vertex_acquire__locked(&sna->render); 1268 sna_vertex_unlock(&sna->render); 1269 1270 while (nbox_this_time >= 8) { 1271 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1272 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1273 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1274 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1275 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; 1276 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; 1277 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; 1278 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; 1279 b += 24; 1280 nbox_this_time -= 8; 1281 } 1282 if (nbox_this_time & 4) { 1283 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1284 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1285 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1286 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1287 b += 12; 1288 } 1289 if (nbox_this_time & 2) { 1290 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1291 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1292 b += 6; 1293 } 1294 if (nbox_this_time & 1) { 1295 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1296 } 1297 1298 sna_vertex_lock(&sna->render); 1299 sna_vertex_release__locked(&sna->render); 1300 if (!nbox) 1301 break; 1302 1303 sna_vertex_wait__locked(&sna->render); 1304 sna_blt_fill_begin(sna, blt); 1305 } while (1); 1306 sna_vertex_unlock(&sna->render); 1307} 1308 1309fastcall static void blt_composite_fill_box(struct sna *sna, 1310 const struct sna_composite_op *op, 1311 const BoxRec *box) 1312{ 1313 sna_blt_fill_one(sna, &op->u.blt, 1314 box->x1 + op->dst.x, 1315 box->y1 + op->dst.y, 1316 box->x2 - box->x1, 1317 box->y2 - box->y1); 1318 _sna_blt_maybe_clear(op, box); 1319} 1320 1321static void blt_composite_fill_boxes(struct sna *sna, 1322 const struct sna_composite_op *op, 1323 const BoxRec *box, int n) 1324{ 1325 do { 1326 sna_blt_fill_one(sna, &op->u.blt, 1327 box->x1 + op->dst.x, box->y1 + op->dst.y, 1328 box->x2 - box->x1, box->y2 - box->y1); 1329 box++; 1330 } while (--n); 1331} 1332 1333static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y) 1334{ 1335 union { 1336 uint64_t v; 1337 int16_t i[4]; 1338 } vi; 1339 vi.v = *(uint64_t *)b; 1340 vi.i[0] += x; 1341 vi.i[1] += y; 1342 vi.i[2] += x; 1343 vi.i[3] += y; 1344 return vi.v; 1345} 1346 1347static void blt_composite_fill_boxes__thread(struct sna *sna, 1348 const struct sna_composite_op *op, 1349 const BoxRec *box, int nbox) 1350{ 1351 struct kgem *kgem = &sna->kgem; 1352 const struct sna_blt_state *blt = &op->u.blt; 1353 uint32_t cmd = blt->cmd; 1354 int16_t dx = op->dst.x; 1355 int16_t dy = op->dst.y; 1356 1357 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1358 1359 sna_vertex_lock(&sna->render); 1360 assert(kgem->mode == KGEM_BLT); 1361 if (!kgem_check_batch(kgem, 3)) { 1362 sna_vertex_wait__locked(&sna->render); 1363 sna_blt_fill_begin(sna, blt); 1364 } 1365 1366 do { 1367 uint32_t *b = kgem->batch + kgem->nbatch; 1368 int nbox_this_time, rem; 1369 1370 assert(sna->kgem.mode == KGEM_BLT); 1371 nbox_this_time = nbox; 1372 rem = kgem_batch_space(kgem); 1373 if (3*nbox_this_time > rem) 1374 nbox_this_time = rem / 3; 1375 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1376 __FUNCTION__, nbox_this_time, nbox, rem)); 1377 assert(nbox_this_time > 0); 1378 nbox -= nbox_this_time; 1379 1380 kgem->nbatch += 3 * nbox_this_time; 1381 assert(kgem->nbatch < kgem->surface); 1382 sna_vertex_acquire__locked(&sna->render); 1383 sna_vertex_unlock(&sna->render); 1384 1385 while (nbox_this_time >= 8) { 1386 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1387 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1388 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); 1389 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); 1390 b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy); 1391 b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy); 1392 b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy); 1393 b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy); 1394 b += 24; 1395 nbox_this_time -= 8; 1396 } 1397 if (nbox_this_time & 4) { 1398 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1399 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1400 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); 1401 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); 1402 b += 12; 1403 } 1404 if (nbox_this_time & 2) { 1405 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1406 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1407 b += 6; 1408 } 1409 if (nbox_this_time & 1) { 1410 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1411 } 1412 1413 sna_vertex_lock(&sna->render); 1414 sna_vertex_release__locked(&sna->render); 1415 if (!nbox) 1416 break; 1417 1418 sna_vertex_wait__locked(&sna->render); 1419 sna_blt_fill_begin(sna, blt); 1420 } while (1); 1421 sna_vertex_unlock(&sna->render); 1422} 1423 1424fastcall 1425static void blt_composite_nop(struct sna *sna, 1426 const struct sna_composite_op *op, 1427 const struct sna_composite_rectangles *r) 1428{ 1429} 1430 1431fastcall static void blt_composite_nop_box(struct sna *sna, 1432 const struct sna_composite_op *op, 1433 const BoxRec *box) 1434{ 1435} 1436 1437static void blt_composite_nop_boxes(struct sna *sna, 1438 const struct sna_composite_op *op, 1439 const BoxRec *box, int n) 1440{ 1441} 1442 1443static bool 1444begin_blt(struct sna *sna, 1445 struct sna_composite_op *op) 1446{ 1447 assert(sna->kgem.mode == KGEM_BLT); 1448 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) { 1449 kgem_submit(&sna->kgem); 1450 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) 1451 return false; 1452 1453 _kgem_set_mode(&sna->kgem, KGEM_BLT); 1454 kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo); 1455 } 1456 1457 return true; 1458} 1459 1460static bool 1461prepare_blt_nop(struct sna *sna, 1462 struct sna_composite_op *op) 1463{ 1464 DBG(("%s\n", __FUNCTION__)); 1465 1466 op->blt = blt_composite_nop; 1467 op->box = blt_composite_nop_box; 1468 op->boxes = blt_composite_nop_boxes; 1469 op->done = nop_done; 1470 return true; 1471} 1472 1473static bool 1474prepare_blt_clear(struct sna *sna, 1475 struct sna_composite_op *op) 1476{ 1477 DBG(("%s\n", __FUNCTION__)); 1478 1479 if (op->dst.bo == NULL) { 1480 op->u.blt.pixel = 0; 1481 op->blt = blt_composite_fill__cpu; 1482 if (op->dst.x|op->dst.y) { 1483 op->box = blt_composite_fill_box__cpu; 1484 op->boxes = blt_composite_fill_boxes__cpu; 1485 op->thread_boxes = blt_composite_fill_boxes__cpu; 1486 } else { 1487 op->box = blt_composite_fill_box_no_offset__cpu; 1488 op->boxes = blt_composite_fill_boxes_no_offset__cpu; 1489 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; 1490 } 1491 op->done = sig_done; 1492 return sigtrap_get() == 0; 1493 } 1494 1495 op->blt = blt_composite_fill; 1496 if (op->dst.x|op->dst.y) { 1497 op->box = blt_composite_fill_box; 1498 op->boxes = blt_composite_fill_boxes; 1499 op->thread_boxes = blt_composite_fill_boxes__thread; 1500 } else { 1501 op->box = blt_composite_fill_box_no_offset; 1502 op->boxes = blt_composite_fill_boxes_no_offset; 1503 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; 1504 } 1505 op->done = nop_done; 1506 1507 if (!sna_blt_fill_init(sna, &op->u.blt, 1508 op->dst.bo, 1509 op->dst.pixmap->drawable.bitsPerPixel, 1510 GXclear, 0)) 1511 return false; 1512 1513 return begin_blt(sna, op); 1514} 1515 1516static bool 1517prepare_blt_fill(struct sna *sna, 1518 struct sna_composite_op *op, 1519 uint32_t pixel) 1520{ 1521 DBG(("%s\n", __FUNCTION__)); 1522 1523 if (op->dst.bo == NULL) { 1524 op->u.blt.pixel = pixel; 1525 op->blt = blt_composite_fill__cpu; 1526 if (op->dst.x|op->dst.y) { 1527 op->box = blt_composite_fill_box__cpu; 1528 op->boxes = blt_composite_fill_boxes__cpu; 1529 op->thread_boxes = blt_composite_fill_boxes__cpu; 1530 } else { 1531 op->box = blt_composite_fill_box_no_offset__cpu; 1532 op->boxes = blt_composite_fill_boxes_no_offset__cpu; 1533 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; 1534 } 1535 op->done = sig_done; 1536 return sigtrap_get() == 0; 1537 } 1538 1539 op->blt = blt_composite_fill; 1540 if (op->dst.x|op->dst.y) { 1541 op->box = blt_composite_fill_box; 1542 op->boxes = blt_composite_fill_boxes; 1543 op->thread_boxes = blt_composite_fill_boxes__thread; 1544 } else { 1545 op->box = blt_composite_fill_box_no_offset; 1546 op->boxes = blt_composite_fill_boxes_no_offset; 1547 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; 1548 } 1549 op->done = nop_done; 1550 1551 if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo, 1552 op->dst.pixmap->drawable.bitsPerPixel, 1553 GXcopy, pixel)) 1554 return false; 1555 1556 return begin_blt(sna, op); 1557} 1558 1559fastcall static void 1560blt_composite_copy(struct sna *sna, 1561 const struct sna_composite_op *op, 1562 const struct sna_composite_rectangles *r) 1563{ 1564 int x1, x2, y1, y2; 1565 int src_x, src_y; 1566 1567 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", 1568 __FUNCTION__, 1569 r->src.x, r->src.y, 1570 r->dst.x, r->dst.y, 1571 r->width, r->height)); 1572 1573 /* XXX higher layer should have clipped? */ 1574 1575 x1 = r->dst.x + op->dst.x; 1576 y1 = r->dst.y + op->dst.y; 1577 x2 = x1 + r->width; 1578 y2 = y1 + r->height; 1579 1580 src_x = r->src.x - x1 + op->u.blt.sx; 1581 src_y = r->src.y - y1 + op->u.blt.sy; 1582 1583 /* clip against dst */ 1584 if (x1 < 0) 1585 x1 = 0; 1586 if (y1 < 0) 1587 y1 = 0; 1588 1589 if (x2 > op->dst.width) 1590 x2 = op->dst.width; 1591 1592 if (y2 > op->dst.height) 1593 y2 = op->dst.height; 1594 1595 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); 1596 1597 if (x2 <= x1 || y2 <= y1) 1598 return; 1599 1600 sna_blt_copy_one(sna, &op->u.blt, 1601 x1 + src_x, y1 + src_y, 1602 x2 - x1, y2 - y1, 1603 x1, y1); 1604} 1605 1606fastcall static void blt_composite_copy_box(struct sna *sna, 1607 const struct sna_composite_op *op, 1608 const BoxRec *box) 1609{ 1610 DBG(("%s: box (%d, %d), (%d, %d)\n", 1611 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1612 sna_blt_copy_one(sna, &op->u.blt, 1613 box->x1 + op->u.blt.sx, 1614 box->y1 + op->u.blt.sy, 1615 box->x2 - box->x1, 1616 box->y2 - box->y1, 1617 box->x1 + op->dst.x, 1618 box->y1 + op->dst.y); 1619} 1620 1621static void blt_composite_copy_boxes(struct sna *sna, 1622 const struct sna_composite_op *op, 1623 const BoxRec *box, int nbox) 1624{ 1625 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1626 do { 1627 DBG(("%s: box (%d, %d), (%d, %d)\n", 1628 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1629 sna_blt_copy_one(sna, &op->u.blt, 1630 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 1631 box->x2 - box->x1, box->y2 - box->y1, 1632 box->x1 + op->dst.x, box->y1 + op->dst.y); 1633 box++; 1634 } while(--nbox); 1635} 1636 1637static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) 1638{ 1639 x += v & 0xffff; 1640 y += v >> 16; 1641 return (uint16_t)y << 16 | x; 1642} 1643 1644static void blt_composite_copy_boxes__thread(struct sna *sna, 1645 const struct sna_composite_op *op, 1646 const BoxRec *box, int nbox) 1647{ 1648 struct kgem *kgem = &sna->kgem; 1649 int dst_dx = op->dst.x; 1650 int dst_dy = op->dst.y; 1651 int src_dx = op->src.offset[0]; 1652 int src_dy = op->src.offset[1]; 1653 uint32_t cmd = op->u.blt.cmd; 1654 uint32_t br13 = op->u.blt.br13; 1655 struct kgem_bo *src_bo = op->u.blt.bo[0]; 1656 struct kgem_bo *dst_bo = op->u.blt.bo[1]; 1657 int src_pitch = op->u.blt.pitch[0]; 1658 1659 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1660 1661 sna_vertex_lock(&sna->render); 1662 1663 if ((dst_dx | dst_dy) == 0) { 1664 uint64_t hdr = (uint64_t)br13 << 32 | cmd; 1665 do { 1666 int nbox_this_time, rem; 1667 1668 nbox_this_time = nbox; 1669 rem = kgem_batch_space(kgem); 1670 if (8*nbox_this_time > rem) 1671 nbox_this_time = rem / 8; 1672 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1673 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1674 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1675 __FUNCTION__, nbox_this_time, nbox, rem)); 1676 assert(nbox_this_time > 0); 1677 nbox -= nbox_this_time; 1678 1679 assert(sna->kgem.mode == KGEM_BLT); 1680 do { 1681 uint32_t *b = kgem->batch + kgem->nbatch; 1682 1683 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1684 __FUNCTION__, 1685 box->x1, box->y1, 1686 box->x2 - box->x1, box->y2 - box->y1)); 1687 1688 assert(box->x1 + src_dx >= 0); 1689 assert(box->y1 + src_dy >= 0); 1690 assert(box->x1 + src_dx <= INT16_MAX); 1691 assert(box->y1 + src_dy <= INT16_MAX); 1692 1693 assert(box->x1 >= 0); 1694 assert(box->y1 >= 0); 1695 1696 *(uint64_t *)&b[0] = hdr; 1697 *(uint64_t *)&b[2] = *(const uint64_t *)box; 1698 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1699 I915_GEM_DOMAIN_RENDER << 16 | 1700 I915_GEM_DOMAIN_RENDER | 1701 KGEM_RELOC_FENCED, 1702 0); 1703 b[5] = add2(b[2], src_dx, src_dy); 1704 b[6] = src_pitch; 1705 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1706 I915_GEM_DOMAIN_RENDER << 16 | 1707 KGEM_RELOC_FENCED, 1708 0); 1709 kgem->nbatch += 8; 1710 assert(kgem->nbatch < kgem->surface); 1711 box++; 1712 } while (--nbox_this_time); 1713 1714 if (!nbox) 1715 break; 1716 1717 _kgem_submit(kgem); 1718 _kgem_set_mode(kgem, KGEM_BLT); 1719 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 1720 } while (1); 1721 } else { 1722 do { 1723 int nbox_this_time, rem; 1724 1725 nbox_this_time = nbox; 1726 rem = kgem_batch_space(kgem); 1727 if (8*nbox_this_time > rem) 1728 nbox_this_time = rem / 8; 1729 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1730 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1731 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1732 __FUNCTION__, nbox_this_time, nbox, rem)); 1733 assert(nbox_this_time > 0); 1734 nbox -= nbox_this_time; 1735 1736 assert(sna->kgem.mode == KGEM_BLT); 1737 do { 1738 uint32_t *b = kgem->batch + kgem->nbatch; 1739 1740 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1741 __FUNCTION__, 1742 box->x1, box->y1, 1743 box->x2 - box->x1, box->y2 - box->y1)); 1744 1745 assert(box->x1 + src_dx >= 0); 1746 assert(box->y1 + src_dy >= 0); 1747 1748 assert(box->x1 + dst_dx >= 0); 1749 assert(box->y1 + dst_dy >= 0); 1750 1751 b[0] = cmd; 1752 b[1] = br13; 1753 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 1754 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 1755 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1756 I915_GEM_DOMAIN_RENDER << 16 | 1757 I915_GEM_DOMAIN_RENDER | 1758 KGEM_RELOC_FENCED, 1759 0); 1760 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 1761 b[6] = src_pitch; 1762 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1763 I915_GEM_DOMAIN_RENDER << 16 | 1764 KGEM_RELOC_FENCED, 1765 0); 1766 kgem->nbatch += 8; 1767 assert(kgem->nbatch < kgem->surface); 1768 box++; 1769 } while (--nbox_this_time); 1770 1771 if (!nbox) 1772 break; 1773 1774 _kgem_submit(kgem); 1775 _kgem_set_mode(kgem, KGEM_BLT); 1776 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 1777 } while (1); 1778 } 1779 sna_vertex_unlock(&sna->render); 1780} 1781 1782static void blt_composite_copy_boxes__thread64(struct sna *sna, 1783 const struct sna_composite_op *op, 1784 const BoxRec *box, int nbox) 1785{ 1786 struct kgem *kgem = &sna->kgem; 1787 int dst_dx = op->dst.x; 1788 int dst_dy = op->dst.y; 1789 int src_dx = op->src.offset[0]; 1790 int src_dy = op->src.offset[1]; 1791 uint32_t cmd = op->u.blt.cmd; 1792 uint32_t br13 = op->u.blt.br13; 1793 struct kgem_bo *src_bo = op->u.blt.bo[0]; 1794 struct kgem_bo *dst_bo = op->u.blt.bo[1]; 1795 int src_pitch = op->u.blt.pitch[0]; 1796 1797 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1798 1799 sna_vertex_lock(&sna->render); 1800 1801 if ((dst_dx | dst_dy) == 0) { 1802 uint64_t hdr = (uint64_t)br13 << 32 | cmd; 1803 do { 1804 int nbox_this_time, rem; 1805 1806 nbox_this_time = nbox; 1807 rem = kgem_batch_space(kgem); 1808 if (10*nbox_this_time > rem) 1809 nbox_this_time = rem / 10; 1810 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1811 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1812 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1813 __FUNCTION__, nbox_this_time, nbox, rem)); 1814 assert(nbox_this_time > 0); 1815 nbox -= nbox_this_time; 1816 1817 assert(kgem->mode == KGEM_BLT); 1818 do { 1819 uint32_t *b = kgem->batch + kgem->nbatch; 1820 1821 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1822 __FUNCTION__, 1823 box->x1, box->y1, 1824 box->x2 - box->x1, box->y2 - box->y1)); 1825 1826 assert(box->x1 + src_dx >= 0); 1827 assert(box->y1 + src_dy >= 0); 1828 assert(box->x1 + src_dx <= INT16_MAX); 1829 assert(box->y1 + src_dy <= INT16_MAX); 1830 1831 assert(box->x1 >= 0); 1832 assert(box->y1 >= 0); 1833 1834 *(uint64_t *)&b[0] = hdr; 1835 *(uint64_t *)&b[2] = *(const uint64_t *)box; 1836 *(uint64_t *)(b+4) = 1837 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1838 I915_GEM_DOMAIN_RENDER << 16 | 1839 I915_GEM_DOMAIN_RENDER | 1840 KGEM_RELOC_FENCED, 1841 0); 1842 b[6] = add2(b[2], src_dx, src_dy); 1843 b[7] = src_pitch; 1844 *(uint64_t *)(b+8) = 1845 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1846 I915_GEM_DOMAIN_RENDER << 16 | 1847 KGEM_RELOC_FENCED, 1848 0); 1849 kgem->nbatch += 10; 1850 assert(kgem->nbatch < kgem->surface); 1851 box++; 1852 } while (--nbox_this_time); 1853 1854 if (!nbox) 1855 break; 1856 1857 _kgem_submit(kgem); 1858 _kgem_set_mode(kgem, KGEM_BLT); 1859 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 1860 } while (1); 1861 } else { 1862 do { 1863 int nbox_this_time, rem; 1864 1865 nbox_this_time = nbox; 1866 rem = kgem_batch_space(kgem); 1867 if (10*nbox_this_time > rem) 1868 nbox_this_time = rem / 10; 1869 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1870 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1871 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 1872 __FUNCTION__, nbox_this_time, nbox, rem)); 1873 assert(nbox_this_time > 0); 1874 nbox -= nbox_this_time; 1875 1876 assert(kgem->mode == KGEM_BLT); 1877 do { 1878 uint32_t *b = kgem->batch + kgem->nbatch; 1879 1880 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1881 __FUNCTION__, 1882 box->x1, box->y1, 1883 box->x2 - box->x1, box->y2 - box->y1)); 1884 1885 assert(box->x1 + src_dx >= 0); 1886 assert(box->y1 + src_dy >= 0); 1887 1888 assert(box->x1 + dst_dx >= 0); 1889 assert(box->y1 + dst_dy >= 0); 1890 1891 b[0] = cmd; 1892 b[1] = br13; 1893 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 1894 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 1895 *(uint64_t *)(b+4) = 1896 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1897 I915_GEM_DOMAIN_RENDER << 16 | 1898 I915_GEM_DOMAIN_RENDER | 1899 KGEM_RELOC_FENCED, 1900 0); 1901 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 1902 b[7] = src_pitch; 1903 *(uint64_t *)(b+8) = 1904 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1905 I915_GEM_DOMAIN_RENDER << 16 | 1906 KGEM_RELOC_FENCED, 1907 0); 1908 kgem->nbatch += 10; 1909 assert(kgem->nbatch < kgem->surface); 1910 box++; 1911 } while (--nbox_this_time); 1912 1913 if (!nbox) 1914 break; 1915 1916 _kgem_submit(kgem); 1917 _kgem_set_mode(kgem, KGEM_BLT); 1918 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 1919 } while (1); 1920 } 1921 sna_vertex_unlock(&sna->render); 1922} 1923 1924fastcall static void 1925blt_composite_copy_with_alpha(struct sna *sna, 1926 const struct sna_composite_op *op, 1927 const struct sna_composite_rectangles *r) 1928{ 1929 int x1, x2, y1, y2; 1930 int src_x, src_y; 1931 1932 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", 1933 __FUNCTION__, 1934 r->src.x, r->src.y, 1935 r->dst.x, r->dst.y, 1936 r->width, r->height)); 1937 1938 /* XXX higher layer should have clipped? */ 1939 1940 x1 = r->dst.x + op->dst.x; 1941 y1 = r->dst.y + op->dst.y; 1942 x2 = x1 + r->width; 1943 y2 = y1 + r->height; 1944 1945 src_x = r->src.x - x1 + op->u.blt.sx; 1946 src_y = r->src.y - y1 + op->u.blt.sy; 1947 1948 /* clip against dst */ 1949 if (x1 < 0) 1950 x1 = 0; 1951 if (y1 < 0) 1952 y1 = 0; 1953 1954 if (x2 > op->dst.width) 1955 x2 = op->dst.width; 1956 1957 if (y2 > op->dst.height) 1958 y2 = op->dst.height; 1959 1960 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); 1961 1962 if (x2 <= x1 || y2 <= y1) 1963 return; 1964 1965 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1966 x1 + src_x, y1 + src_y, 1967 x2 - x1, y2 - y1, 1968 x1, y1); 1969} 1970 1971fastcall static void 1972blt_composite_copy_box_with_alpha(struct sna *sna, 1973 const struct sna_composite_op *op, 1974 const BoxRec *box) 1975{ 1976 DBG(("%s: box (%d, %d), (%d, %d)\n", 1977 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1978 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1979 box->x1 + op->u.blt.sx, 1980 box->y1 + op->u.blt.sy, 1981 box->x2 - box->x1, 1982 box->y2 - box->y1, 1983 box->x1 + op->dst.x, 1984 box->y1 + op->dst.y); 1985} 1986 1987static void 1988blt_composite_copy_boxes_with_alpha(struct sna *sna, 1989 const struct sna_composite_op *op, 1990 const BoxRec *box, int nbox) 1991{ 1992 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1993 do { 1994 DBG(("%s: box (%d, %d), (%d, %d)\n", 1995 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1996 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1997 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 1998 box->x2 - box->x1, box->y2 - box->y1, 1999 box->x1 + op->dst.x, box->y1 + op->dst.y); 2000 box++; 2001 } while(--nbox); 2002} 2003 2004static bool 2005prepare_blt_copy(struct sna *sna, 2006 struct sna_composite_op *op, 2007 struct kgem_bo *bo, 2008 uint32_t alpha_fixup) 2009{ 2010 PixmapPtr src = op->u.blt.src_pixmap; 2011 2012 assert(op->dst.bo); 2013 assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo)); 2014 assert(kgem_bo_can_blt(&sna->kgem, bo)); 2015 2016 kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo); 2017 if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { 2018 kgem_submit(&sna->kgem); 2019 if (!kgem_check_many_bo_fenced(&sna->kgem, 2020 op->dst.bo, bo, NULL)) { 2021 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); 2022 return sna_tiling_blt_composite(sna, op, bo, 2023 src->drawable.bitsPerPixel, 2024 alpha_fixup); 2025 } 2026 _kgem_set_mode(&sna->kgem, KGEM_BLT); 2027 } 2028 kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo); 2029 2030 DBG(("%s\n", __FUNCTION__)); 2031 2032 if (sna->kgem.gen >= 060 && op->dst.bo == bo) 2033 op->done = gen6_blt_copy_done; 2034 else 2035 op->done = nop_done; 2036 2037 if (alpha_fixup) { 2038 op->blt = blt_composite_copy_with_alpha; 2039 op->box = blt_composite_copy_box_with_alpha; 2040 op->boxes = blt_composite_copy_boxes_with_alpha; 2041 2042 if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo, 2043 src->drawable.bitsPerPixel, 2044 alpha_fixup)) 2045 return false; 2046 } else { 2047 op->blt = blt_composite_copy; 2048 op->box = blt_composite_copy_box; 2049 op->boxes = blt_composite_copy_boxes; 2050 if (sna->kgem.gen >= 0100) 2051 op->thread_boxes = blt_composite_copy_boxes__thread64; 2052 else 2053 op->thread_boxes = blt_composite_copy_boxes__thread; 2054 2055 if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo, 2056 src->drawable.bitsPerPixel, 2057 GXcopy)) 2058 return false; 2059 } 2060 2061 return true; 2062} 2063 2064fastcall static void 2065blt_put_composite__cpu(struct sna *sna, 2066 const struct sna_composite_op *op, 2067 const struct sna_composite_rectangles *r) 2068{ 2069 PixmapPtr dst = op->dst.pixmap; 2070 PixmapPtr src = op->u.blt.src_pixmap; 2071 assert(src->devPrivate.ptr); 2072 assert(src->devKind); 2073 assert(dst->devPrivate.ptr); 2074 assert(dst->devKind); 2075 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 2076 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2077 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy, 2078 r->dst.x + op->dst.x, r->dst.y + op->dst.y, 2079 r->width, r->height); 2080} 2081 2082fastcall static void 2083blt_put_composite_box__cpu(struct sna *sna, 2084 const struct sna_composite_op *op, 2085 const BoxRec *box) 2086{ 2087 PixmapPtr dst = op->dst.pixmap; 2088 PixmapPtr src = op->u.blt.src_pixmap; 2089 assert(src->devPrivate.ptr); 2090 assert(src->devKind); 2091 assert(dst->devPrivate.ptr); 2092 assert(dst->devKind); 2093 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 2094 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2095 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2096 box->x1 + op->dst.x, box->y1 + op->dst.y, 2097 box->x2-box->x1, box->y2-box->y1); 2098} 2099 2100static void 2101blt_put_composite_boxes__cpu(struct sna *sna, 2102 const struct sna_composite_op *op, 2103 const BoxRec *box, int n) 2104{ 2105 PixmapPtr dst = op->dst.pixmap; 2106 PixmapPtr src = op->u.blt.src_pixmap; 2107 assert(src->devPrivate.ptr); 2108 assert(src->devKind); 2109 assert(dst->devPrivate.ptr); 2110 assert(dst->devKind); 2111 do { 2112 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 2113 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2114 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2115 box->x1 + op->dst.x, box->y1 + op->dst.y, 2116 box->x2-box->x1, box->y2-box->y1); 2117 box++; 2118 } while (--n); 2119} 2120 2121fastcall static void 2122blt_put_composite_with_alpha__cpu(struct sna *sna, 2123 const struct sna_composite_op *op, 2124 const struct sna_composite_rectangles *r) 2125{ 2126 PixmapPtr dst = op->dst.pixmap; 2127 PixmapPtr src = op->u.blt.src_pixmap; 2128 assert(src->devPrivate.ptr); 2129 assert(src->devKind); 2130 assert(dst->devPrivate.ptr); 2131 assert(dst->devKind); 2132 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2133 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2134 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy, 2135 r->dst.x + op->dst.x, r->dst.y + op->dst.y, 2136 r->width, r->height, 2137 0xffffffff, op->u.blt.pixel); 2138 2139} 2140 2141fastcall static void 2142blt_put_composite_box_with_alpha__cpu(struct sna *sna, 2143 const struct sna_composite_op *op, 2144 const BoxRec *box) 2145{ 2146 PixmapPtr dst = op->dst.pixmap; 2147 PixmapPtr src = op->u.blt.src_pixmap; 2148 assert(src->devPrivate.ptr); 2149 assert(src->devKind); 2150 assert(dst->devPrivate.ptr); 2151 assert(dst->devKind); 2152 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2153 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2154 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2155 box->x1 + op->dst.x, box->y1 + op->dst.y, 2156 box->x2-box->x1, box->y2-box->y1, 2157 0xffffffff, op->u.blt.pixel); 2158} 2159 2160static void 2161blt_put_composite_boxes_with_alpha__cpu(struct sna *sna, 2162 const struct sna_composite_op *op, 2163 const BoxRec *box, int n) 2164{ 2165 PixmapPtr dst = op->dst.pixmap; 2166 PixmapPtr src = op->u.blt.src_pixmap; 2167 assert(src->devPrivate.ptr); 2168 assert(src->devKind); 2169 assert(dst->devPrivate.ptr); 2170 assert(dst->devKind); 2171 do { 2172 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2173 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2174 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2175 box->x1 + op->dst.x, box->y1 + op->dst.y, 2176 box->x2-box->x1, box->y2-box->y1, 2177 0xffffffff, op->u.blt.pixel); 2178 box++; 2179 } while (--n); 2180} 2181 2182fastcall static void 2183blt_put_composite(struct sna *sna, 2184 const struct sna_composite_op *op, 2185 const struct sna_composite_rectangles *r) 2186{ 2187 PixmapPtr dst = op->dst.pixmap; 2188 PixmapPtr src = op->u.blt.src_pixmap; 2189 struct sna_pixmap *dst_priv = sna_pixmap(dst); 2190 int pitch = src->devKind; 2191 char *data = src->devPrivate.ptr; 2192 int bpp = src->drawable.bitsPerPixel; 2193 2194 int16_t dst_x = r->dst.x + op->dst.x; 2195 int16_t dst_y = r->dst.y + op->dst.y; 2196 int16_t src_x = r->src.x + op->u.blt.sx; 2197 int16_t src_y = r->src.y + op->u.blt.sy; 2198 2199 if (!dst_priv->pinned && 2200 dst_x <= 0 && dst_y <= 0 && 2201 dst_x + r->width >= op->dst.width && 2202 dst_y + r->height >= op->dst.height) { 2203 data += (src_x - dst_x) * bpp / 8; 2204 data += (src_y - dst_y) * pitch; 2205 2206 assert(op->dst.bo == dst_priv->gpu_bo); 2207 sna_replace(sna, op->dst.pixmap, data, pitch); 2208 } else { 2209 BoxRec box; 2210 bool ok; 2211 2212 box.x1 = dst_x; 2213 box.y1 = dst_y; 2214 box.x2 = dst_x + r->width; 2215 box.y2 = dst_y + r->height; 2216 2217 ok = sna_write_boxes(sna, dst, 2218 dst_priv->gpu_bo, 0, 0, 2219 data, pitch, src_x, src_y, 2220 &box, 1); 2221 assert(ok); 2222 (void)ok; 2223 } 2224} 2225 2226fastcall static void blt_put_composite_box(struct sna *sna, 2227 const struct sna_composite_op *op, 2228 const BoxRec *box) 2229{ 2230 PixmapPtr src = op->u.blt.src_pixmap; 2231 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2232 2233 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, 2234 op->u.blt.sx, op->u.blt.sy, 2235 op->dst.x, op->dst.y)); 2236 2237 assert(src->devPrivate.ptr); 2238 assert(src->devKind); 2239 if (!dst_priv->pinned && 2240 box->x2 - box->x1 == op->dst.width && 2241 box->y2 - box->y1 == op->dst.height) { 2242 int pitch = src->devKind; 2243 int bpp = src->drawable.bitsPerPixel / 8; 2244 char *data = src->devPrivate.ptr; 2245 2246 data += (box->y1 + op->u.blt.sy) * pitch; 2247 data += (box->x1 + op->u.blt.sx) * bpp; 2248 2249 assert(op->dst.bo == dst_priv->gpu_bo); 2250 sna_replace(sna, op->dst.pixmap, data, pitch); 2251 } else { 2252 bool ok; 2253 2254 ok = sna_write_boxes(sna, op->dst.pixmap, 2255 op->dst.bo, op->dst.x, op->dst.y, 2256 src->devPrivate.ptr, 2257 src->devKind, 2258 op->u.blt.sx, op->u.blt.sy, 2259 box, 1); 2260 assert(ok); 2261 (void)ok; 2262 } 2263} 2264 2265static void blt_put_composite_boxes(struct sna *sna, 2266 const struct sna_composite_op *op, 2267 const BoxRec *box, int n) 2268{ 2269 PixmapPtr src = op->u.blt.src_pixmap; 2270 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2271 2272 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__, 2273 op->u.blt.sx, op->u.blt.sy, 2274 op->dst.x, op->dst.y, 2275 box->x1, box->y1, box->x2, box->y2, n)); 2276 2277 assert(src->devPrivate.ptr); 2278 assert(src->devKind); 2279 if (n == 1 && !dst_priv->pinned && 2280 box->x2 - box->x1 == op->dst.width && 2281 box->y2 - box->y1 == op->dst.height) { 2282 int pitch = src->devKind; 2283 int bpp = src->drawable.bitsPerPixel / 8; 2284 char *data = src->devPrivate.ptr; 2285 2286 data += (box->y1 + op->u.blt.sy) * pitch; 2287 data += (box->x1 + op->u.blt.sx) * bpp; 2288 2289 assert(op->dst.bo == dst_priv->gpu_bo); 2290 sna_replace(sna, op->dst.pixmap, data, pitch); 2291 } else { 2292 bool ok; 2293 2294 ok = sna_write_boxes(sna, op->dst.pixmap, 2295 op->dst.bo, op->dst.x, op->dst.y, 2296 src->devPrivate.ptr, 2297 src->devKind, 2298 op->u.blt.sx, op->u.blt.sy, 2299 box, n); 2300 assert(ok); 2301 (void)ok; 2302 } 2303} 2304 2305fastcall static void 2306blt_put_composite_with_alpha(struct sna *sna, 2307 const struct sna_composite_op *op, 2308 const struct sna_composite_rectangles *r) 2309{ 2310 PixmapPtr dst = op->dst.pixmap; 2311 PixmapPtr src = op->u.blt.src_pixmap; 2312 struct sna_pixmap *dst_priv = sna_pixmap(dst); 2313 int pitch = src->devKind; 2314 char *data = src->devPrivate.ptr; 2315 2316 int16_t dst_x = r->dst.x + op->dst.x; 2317 int16_t dst_y = r->dst.y + op->dst.y; 2318 int16_t src_x = r->src.x + op->u.blt.sx; 2319 int16_t src_y = r->src.y + op->u.blt.sy; 2320 2321 assert(src->devPrivate.ptr); 2322 assert(src->devKind); 2323 2324 if (!dst_priv->pinned && 2325 dst_x <= 0 && dst_y <= 0 && 2326 dst_x + r->width >= op->dst.width && 2327 dst_y + r->height >= op->dst.height) { 2328 int bpp = dst->drawable.bitsPerPixel / 8; 2329 2330 data += (src_x - dst_x) * bpp; 2331 data += (src_y - dst_y) * pitch; 2332 2333 assert(op->dst.bo == dst_priv->gpu_bo); 2334 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2335 0xffffffff, op->u.blt.pixel); 2336 } else { 2337 BoxRec box; 2338 2339 box.x1 = dst_x; 2340 box.y1 = dst_y; 2341 box.x2 = dst_x + r->width; 2342 box.y2 = dst_y + r->height; 2343 2344 sna_write_boxes__xor(sna, dst, 2345 dst_priv->gpu_bo, 0, 0, 2346 data, pitch, src_x, src_y, 2347 &box, 1, 2348 0xffffffff, op->u.blt.pixel); 2349 } 2350} 2351 2352fastcall static void 2353blt_put_composite_box_with_alpha(struct sna *sna, 2354 const struct sna_composite_op *op, 2355 const BoxRec *box) 2356{ 2357 PixmapPtr src = op->u.blt.src_pixmap; 2358 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2359 2360 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, 2361 op->u.blt.sx, op->u.blt.sy, 2362 op->dst.x, op->dst.y)); 2363 2364 assert(src->devPrivate.ptr); 2365 assert(src->devKind); 2366 2367 if (!dst_priv->pinned && 2368 box->x2 - box->x1 == op->dst.width && 2369 box->y2 - box->y1 == op->dst.height) { 2370 int pitch = src->devKind; 2371 int bpp = src->drawable.bitsPerPixel / 8; 2372 char *data = src->devPrivate.ptr; 2373 2374 data += (box->y1 + op->u.blt.sy) * pitch; 2375 data += (box->x1 + op->u.blt.sx) * bpp; 2376 2377 assert(op->dst.bo == dst_priv->gpu_bo); 2378 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2379 0xffffffff, op->u.blt.pixel); 2380 } else { 2381 sna_write_boxes__xor(sna, op->dst.pixmap, 2382 op->dst.bo, op->dst.x, op->dst.y, 2383 src->devPrivate.ptr, 2384 src->devKind, 2385 op->u.blt.sx, op->u.blt.sy, 2386 box, 1, 2387 0xffffffff, op->u.blt.pixel); 2388 } 2389} 2390 2391static void 2392blt_put_composite_boxes_with_alpha(struct sna *sna, 2393 const struct sna_composite_op *op, 2394 const BoxRec *box, int n) 2395{ 2396 PixmapPtr src = op->u.blt.src_pixmap; 2397 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2398 2399 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__, 2400 op->u.blt.sx, op->u.blt.sy, 2401 op->dst.x, op->dst.y, 2402 box->x1, box->y1, box->x2, box->y2, n)); 2403 2404 assert(src->devPrivate.ptr); 2405 assert(src->devKind); 2406 2407 if (n == 1 && !dst_priv->pinned && 2408 box->x2 - box->x1 == op->dst.width && 2409 box->y2 - box->y1 == op->dst.height) { 2410 int pitch = src->devKind; 2411 int bpp = src->drawable.bitsPerPixel / 8; 2412 char *data = src->devPrivate.ptr; 2413 2414 data += (box->y1 + op->u.blt.sy) * pitch; 2415 data += (box->x1 + op->u.blt.sx) * bpp; 2416 2417 assert(dst_priv->gpu_bo == op->dst.bo); 2418 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2419 0xffffffff, op->u.blt.pixel); 2420 } else { 2421 sna_write_boxes__xor(sna, op->dst.pixmap, 2422 op->dst.bo, op->dst.x, op->dst.y, 2423 src->devPrivate.ptr, 2424 src->devKind, 2425 op->u.blt.sx, op->u.blt.sy, 2426 box, n, 2427 0xffffffff, op->u.blt.pixel); 2428 } 2429} 2430 2431static bool 2432prepare_blt_put(struct sna *sna, 2433 struct sna_composite_op *op, 2434 uint32_t alpha_fixup) 2435{ 2436 DBG(("%s\n", __FUNCTION__)); 2437 2438 assert(!sna_pixmap(op->dst.pixmap)->clear); 2439 2440 if (op->dst.bo) { 2441 assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo); 2442 if (alpha_fixup) { 2443 op->u.blt.pixel = alpha_fixup; 2444 op->blt = blt_put_composite_with_alpha; 2445 op->box = blt_put_composite_box_with_alpha; 2446 op->boxes = blt_put_composite_boxes_with_alpha; 2447 } else { 2448 op->blt = blt_put_composite; 2449 op->box = blt_put_composite_box; 2450 op->boxes = blt_put_composite_boxes; 2451 } 2452 2453 op->done = nop_done; 2454 return true; 2455 } else { 2456 if (alpha_fixup) { 2457 op->u.blt.pixel = alpha_fixup; 2458 op->blt = blt_put_composite_with_alpha__cpu; 2459 op->box = blt_put_composite_box_with_alpha__cpu; 2460 op->boxes = blt_put_composite_boxes_with_alpha__cpu; 2461 } else { 2462 op->blt = blt_put_composite__cpu; 2463 op->box = blt_put_composite_box__cpu; 2464 op->boxes = blt_put_composite_boxes__cpu; 2465 } 2466 2467 op->done = sig_done; 2468 return sigtrap_get() == 0; 2469 } 2470} 2471 2472static bool 2473is_clear(PixmapPtr pixmap) 2474{ 2475 struct sna_pixmap *priv = sna_pixmap(pixmap); 2476 return priv && priv->clear; 2477} 2478 2479static inline uint32_t 2480over(uint32_t src, uint32_t dst) 2481{ 2482 uint32_t a = ~src >> 24; 2483 2484#define G_SHIFT 8 2485#define RB_MASK 0xff00ff 2486#define RB_ONE_HALF 0x800080 2487#define RB_MASK_PLUS_ONE 0x10000100 2488 2489#define UN8_rb_MUL_UN8(x, a, t) do { \ 2490 t = ((x) & RB_MASK) * (a); \ 2491 t += RB_ONE_HALF; \ 2492 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 2493 x &= RB_MASK; \ 2494} while (0) 2495 2496#define UN8_rb_ADD_UN8_rb(x, y, t) do { \ 2497 t = ((x) + (y)); \ 2498 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ 2499 x = (t & RB_MASK); \ 2500} while (0) 2501 2502#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do { \ 2503 uint32_t r1__, r2__, r3__, t__; \ 2504 \ 2505 r1__ = (x); \ 2506 r2__ = (y) & RB_MASK; \ 2507 UN8_rb_MUL_UN8(r1__, (a), t__); \ 2508 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \ 2509 \ 2510 r2__ = (x) >> G_SHIFT; \ 2511 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 2512 UN8_rb_MUL_UN8(r2__, (a), t__); \ 2513 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \ 2514 \ 2515 (x) = r1__ | (r2__ << G_SHIFT); \ 2516} while (0) 2517 2518 UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src); 2519 2520 return dst; 2521} 2522 2523static inline uint32_t 2524add(uint32_t src, uint32_t dst) 2525{ 2526#define UN8x4_ADD_UN8x4(x, y) do { \ 2527 uint32_t r1__, r2__, r3__, t__; \ 2528 \ 2529 r1__ = (x) & RB_MASK; \ 2530 r2__ = (y) & RB_MASK; \ 2531 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \ 2532 \ 2533 r2__ = ((x) >> G_SHIFT) & RB_MASK; \ 2534 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 2535 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \ 2536 \ 2537 x = r1__ | (r2__ << G_SHIFT); \ 2538} while (0) 2539 2540 UN8x4_ADD_UN8x4(src, dst); 2541 return src; 2542} 2543 2544bool 2545sna_blt_composite(struct sna *sna, 2546 uint32_t op, 2547 PicturePtr src, 2548 PicturePtr dst, 2549 int16_t x, int16_t y, 2550 int16_t dst_x, int16_t dst_y, 2551 int16_t width, int16_t height, 2552 unsigned flags, 2553 struct sna_composite_op *tmp) 2554{ 2555 PictFormat src_format = src->format; 2556 PixmapPtr src_pixmap; 2557 struct kgem_bo *bo; 2558 int16_t tx, ty; 2559 BoxRec dst_box, src_box; 2560 uint32_t alpha_fixup; 2561 uint32_t color, hint; 2562 bool was_clear; 2563 bool ret; 2564 2565#if DEBUG_NO_BLT || NO_BLT_COMPOSITE 2566 return false; 2567#endif 2568 DBG(("%s (%d, %d), (%d, %d), %dx%d\n", 2569 __FUNCTION__, x, y, dst_x, dst_y, width, height)); 2570 2571 switch (dst->pDrawable->bitsPerPixel) { 2572 case 8: 2573 case 16: 2574 case 32: 2575 break; 2576 default: 2577 DBG(("%s: unhandled bpp: %d\n", __FUNCTION__, 2578 dst->pDrawable->bitsPerPixel)); 2579 return false; 2580 } 2581 2582 tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2583 was_clear = is_clear(tmp->dst.pixmap); 2584 2585 if (width | height) { 2586 dst_box.x1 = dst_x; 2587 dst_box.x2 = bound(dst_x, width); 2588 dst_box.y1 = dst_y; 2589 dst_box.y2 = bound(dst_y, height); 2590 } else 2591 sna_render_picture_extents(dst, &dst_box); 2592 2593 tmp->dst.format = dst->format; 2594 tmp->dst.width = tmp->dst.pixmap->drawable.width; 2595 tmp->dst.height = tmp->dst.pixmap->drawable.height; 2596 get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap, 2597 &tmp->dst.x, &tmp->dst.y); 2598 2599 if (op == PictOpClear) { 2600clear: 2601 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) { 2602 sna_pixmap(tmp->dst.pixmap)->clear = true; 2603nop: 2604 return prepare_blt_nop(sna, tmp); 2605 } 2606 2607 hint = 0; 2608 if (can_render(sna)) { 2609 hint |= PREFER_GPU; 2610 if ((flags & COMPOSITE_PARTIAL) == 0) { 2611 hint |= IGNORE_DAMAGE; 2612 if (width == tmp->dst.pixmap->drawable.width && 2613 height == tmp->dst.pixmap->drawable.height) 2614 hint |= REPLACES; 2615 } 2616 } 2617 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2618 &dst_box, &tmp->damage); 2619 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); 2620 if (tmp->dst.bo) { 2621 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2622 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", 2623 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch)); 2624 return false; 2625 } 2626 if (hint & REPLACES) 2627 kgem_bo_undo(&sna->kgem, tmp->dst.bo); 2628 if (flags & COMPOSITE_UPLOAD) 2629 return false; 2630 } else { 2631 RegionRec region; 2632 2633 region.extents = dst_box; 2634 region.data = NULL; 2635 2636 hint = MOVE_WRITE | MOVE_INPLACE_HINT; 2637 if (flags & COMPOSITE_PARTIAL) 2638 hint |= MOVE_READ; 2639 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint)) 2640 return false; 2641 } 2642 2643 return prepare_blt_clear(sna, tmp); 2644 } 2645 2646 if (is_solid(src)) { 2647 if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) { 2648 sna_pixmap(tmp->dst.pixmap)->clear = was_clear; 2649 return prepare_blt_nop(sna, tmp); 2650 } 2651 if (op == PictOpOver && is_opaque_solid(src)) 2652 op = PictOpSrc; 2653 if (op == PictOpAdd && 2654 PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) && 2655 is_white(src)) 2656 op = PictOpSrc; 2657 if (was_clear && (op == PictOpAdd || op == PictOpOver)) { 2658 if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0) 2659 op = PictOpSrc; 2660 if (op == PictOpOver) { 2661 unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); 2662 color = over(get_solid_color(src, PICT_a8r8g8b8), 2663 dst_color); 2664 op = PictOpSrc; 2665 DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n", 2666 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), 2667 solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), 2668 color)); 2669 if (color == dst_color) 2670 goto nop; 2671 else 2672 goto fill; 2673 } 2674 if (op == PictOpAdd) { 2675 unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); 2676 color = add(get_solid_color(src, PICT_a8r8g8b8), 2677 dst_color); 2678 op = PictOpSrc; 2679 DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n", 2680 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), 2681 solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), 2682 color)); 2683 if (color == dst_color) 2684 goto nop; 2685 else 2686 goto fill; 2687 } 2688 } 2689 if (op == PictOpOutReverse && is_opaque_solid(src)) 2690 goto clear; 2691 2692 if (op != PictOpSrc) { 2693 DBG(("%s: unsupported op [%d] for blitting\n", 2694 __FUNCTION__, op)); 2695 return false; 2696 } 2697 2698 color = get_solid_color(src, tmp->dst.format); 2699fill: 2700 if (color == 0) 2701 goto clear; 2702 2703 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) { 2704 sna_pixmap(tmp->dst.pixmap)->clear = true; 2705 return prepare_blt_nop(sna, tmp); 2706 } 2707 2708 hint = 0; 2709 if (can_render(sna)) { 2710 hint |= PREFER_GPU; 2711 if ((flags & COMPOSITE_PARTIAL) == 0) { 2712 hint |= IGNORE_DAMAGE; 2713 if (width == tmp->dst.pixmap->drawable.width && 2714 height == tmp->dst.pixmap->drawable.height) 2715 hint |= REPLACES; 2716 } 2717 } 2718 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2719 &dst_box, &tmp->damage); 2720 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); 2721 if (tmp->dst.bo) { 2722 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2723 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", 2724 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch)); 2725 return false; 2726 } 2727 if (hint & REPLACES) 2728 kgem_bo_undo(&sna->kgem, tmp->dst.bo); 2729 if (flags & COMPOSITE_UPLOAD) 2730 return false; 2731 } else { 2732 RegionRec region; 2733 2734 region.extents = dst_box; 2735 region.data = NULL; 2736 2737 hint = MOVE_WRITE | MOVE_INPLACE_HINT; 2738 if (flags & COMPOSITE_PARTIAL) 2739 hint |= MOVE_READ; 2740 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint)) 2741 return false; 2742 } 2743 2744 return prepare_blt_fill(sna, tmp, color); 2745 } 2746 2747 if (!src->pDrawable) { 2748 DBG(("%s: unsupported procedural source\n", 2749 __FUNCTION__)); 2750 return false; 2751 } 2752 2753 if (src->filter == PictFilterConvolution) { 2754 DBG(("%s: convolutions filters not handled\n", 2755 __FUNCTION__)); 2756 return false; 2757 } 2758 2759 if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0) 2760 op = PictOpSrc; 2761 2762 if (op != PictOpSrc) { 2763 DBG(("%s: unsupported op [%d] for blitting\n", 2764 __FUNCTION__, op)); 2765 return false; 2766 } 2767 2768 if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter, 2769 dst->polyMode == PolyModePrecise, 2770 &tx, &ty)) { 2771 DBG(("%s: source transform is not an integer translation\n", 2772 __FUNCTION__)); 2773 return false; 2774 } 2775 DBG(("%s: converting transform to integer translation? (%d, %d)\n", 2776 __FUNCTION__, src->transform != NULL, tx, ty)); 2777 x += tx; 2778 y += ty; 2779 2780 if ((x >= src->pDrawable->width || 2781 y >= src->pDrawable->height || 2782 x + width <= 0 || 2783 y + height <= 0) && 2784 (!src->repeat || src->repeatType == RepeatNone)) { 2785 DBG(("%s: source is outside of valid area, converting to clear\n", 2786 __FUNCTION__)); 2787 goto clear; 2788 } 2789 2790 src_pixmap = get_drawable_pixmap(src->pDrawable); 2791 if (is_clear(src_pixmap)) { 2792 if (src->repeat || 2793 (x >= 0 && y >= 0 && 2794 x + width <= src_pixmap->drawable.width && 2795 y + height <= src_pixmap->drawable.height)) { 2796 color = color_convert(sna_pixmap(src_pixmap)->clear_color, 2797 src->format, tmp->dst.format); 2798 goto fill; 2799 } 2800 } 2801 2802 alpha_fixup = 0; 2803 if (!(dst->format == src_format || 2804 dst->format == alphaless(src_format) || 2805 (alphaless(dst->format) == alphaless(src_format) && 2806 sna_get_pixel_from_rgba(&alpha_fixup, 2807 0, 0, 0, 0xffff, 2808 dst->format)))) { 2809 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", 2810 __FUNCTION__, (unsigned)src_format, dst->format)); 2811 return false; 2812 } 2813 2814 /* XXX tiling? fixup extend none? */ 2815 if (x < 0 || y < 0 || 2816 x + width > src->pDrawable->width || 2817 y + height > src->pDrawable->height) { 2818 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n", 2819 __FUNCTION__, 2820 x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType)); 2821 if (src->repeat && src->repeatType == RepeatNormal) { 2822 x = x % src->pDrawable->width; 2823 y = y % src->pDrawable->height; 2824 if (x < 0) 2825 x += src->pDrawable->width; 2826 if (y < 0) 2827 y += src->pDrawable->height; 2828 if (x + width > src->pDrawable->width || 2829 y + height > src->pDrawable->height) 2830 return false; 2831 } else 2832 return false; 2833 } 2834 2835 get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty); 2836 x += tx + src->pDrawable->x; 2837 y += ty + src->pDrawable->y; 2838 if (x < 0 || y < 0 || 2839 x + width > src_pixmap->drawable.width || 2840 y + height > src_pixmap->drawable.height) { 2841 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n", 2842 __FUNCTION__, 2843 x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height)); 2844 return false; 2845 } 2846 2847 tmp->u.blt.src_pixmap = src_pixmap; 2848 tmp->u.blt.sx = x - dst_x; 2849 tmp->u.blt.sy = y - dst_y; 2850 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", 2851 __FUNCTION__, 2852 tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); 2853 2854 src_box.x1 = x; 2855 src_box.y1 = y; 2856 src_box.x2 = x + width; 2857 src_box.y2 = y + height; 2858 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); 2859 if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) { 2860 DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n", 2861 __FUNCTION__, 2862 src_pixmap->drawable.width < sna->render.max_3d_size, 2863 src_pixmap->drawable.height < sna->render.max_3d_size, 2864 bo->tiling, bo->pitch)); 2865 2866 if (src_pixmap->drawable.width <= sna->render.max_3d_size && 2867 src_pixmap->drawable.height <= sna->render.max_3d_size && 2868 bo->pitch <= sna->render.max_3d_pitch && 2869 (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0) 2870 { 2871 return false; 2872 } 2873 2874 bo = NULL; 2875 } 2876 2877 hint = 0; 2878 if (bo || can_render(sna)) { 2879 hint |= PREFER_GPU; 2880 if ((flags & COMPOSITE_PARTIAL) == 0) { 2881 hint |= IGNORE_DAMAGE; 2882 if (width == tmp->dst.pixmap->drawable.width && 2883 height == tmp->dst.pixmap->drawable.height) 2884 hint |= REPLACES; 2885 } 2886 if (bo) 2887 hint |= FORCE_GPU; 2888 } 2889 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2890 &dst_box, &tmp->damage); 2891 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); 2892 2893 if (tmp->dst.bo && hint & REPLACES) { 2894 struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); 2895 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2896 } 2897 2898 if (tmp->dst.pixmap == src_pixmap) 2899 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); 2900 2901 ret = false; 2902 if (bo) { 2903 if (!tmp->dst.bo) { 2904 DBG(("%s: fallback -- unaccelerated read back\n", 2905 __FUNCTION__)); 2906fallback: 2907 if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo)) 2908 goto put; 2909 } else if (!kgem_bo_can_blt(&sna->kgem, bo)) { 2910 DBG(("%s: fallback -- cannot blit from source\n", 2911 __FUNCTION__)); 2912 goto fallback; 2913 } else if (bo->snoop && tmp->dst.bo->snoop) { 2914 DBG(("%s: fallback -- can not copy between snooped bo\n", 2915 __FUNCTION__)); 2916 goto put; 2917 } else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2918 DBG(("%s: fallback -- unaccelerated upload\n", 2919 __FUNCTION__)); 2920 goto fallback; 2921 } else if ((flags & COMPOSITE_UPLOAD) == 0) { 2922 ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); 2923 if (!ret) 2924 goto fallback; 2925 } 2926 } else { 2927 RegionRec region; 2928 2929put: 2930 if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) { 2931 DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__)); 2932 tmp->dst.bo = NULL; 2933 tmp->damage = NULL; 2934 } 2935 2936 if (tmp->dst.bo == NULL) { 2937 hint = MOVE_INPLACE_HINT | MOVE_WRITE; 2938 if (flags & COMPOSITE_PARTIAL) 2939 hint |= MOVE_READ; 2940 2941 region.extents = dst_box; 2942 region.data = NULL; 2943 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, 2944 ®ion, hint)) 2945 return false; 2946 2947 assert(tmp->damage == NULL); 2948 } 2949 2950 region.extents = src_box; 2951 region.data = NULL; 2952 if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable, 2953 ®ion, MOVE_READ)) 2954 return false; 2955 2956 ret = prepare_blt_put(sna, tmp, alpha_fixup); 2957 } 2958 2959 return ret; 2960} 2961 2962static void convert_done(struct sna *sna, const struct sna_composite_op *op) 2963{ 2964 struct kgem *kgem = &sna->kgem; 2965 2966 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 2967 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 2968 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 2969 _kgem_submit(kgem); 2970 } 2971 2972 kgem_bo_destroy(kgem, op->src.bo); 2973 sna_render_composite_redirect_done(sna, op); 2974} 2975 2976static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op) 2977{ 2978 struct kgem *kgem = &sna->kgem; 2979 2980 if (kgem_check_batch(kgem, 3)) { 2981 uint32_t *b = kgem->batch + kgem->nbatch; 2982 assert(sna->kgem.mode == KGEM_BLT); 2983 b[0] = XY_SETUP_CLIP; 2984 b[1] = b[2] = 0; 2985 kgem->nbatch += 3; 2986 assert(kgem->nbatch < kgem->surface); 2987 } 2988 2989 convert_done(sna, op); 2990} 2991 2992bool 2993sna_blt_composite__convert(struct sna *sna, 2994 int x, int y, 2995 int width, int height, 2996 struct sna_composite_op *tmp) 2997{ 2998 uint32_t alpha_fixup; 2999 int sx, sy; 3000 uint8_t op; 3001 3002#if DEBUG_NO_BLT || NO_BLT_COMPOSITE 3003 return false; 3004#endif 3005 3006 DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__, 3007 tmp->src.bo->handle, tmp->dst.bo->handle, 3008 tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0)); 3009 3010 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) || 3011 !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) { 3012 DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__)); 3013 return false; 3014 } 3015 3016 if (tmp->src.transform) { 3017 DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__)); 3018 return false; 3019 } 3020 3021 if (tmp->src.filter == PictFilterConvolution) { 3022 DBG(("%s: convolutions filters not handled\n", 3023 __FUNCTION__)); 3024 return false; 3025 } 3026 3027 op = tmp->op; 3028 if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0) 3029 op = PictOpSrc; 3030 if (op != PictOpSrc) { 3031 DBG(("%s: unsupported op [%d] for blitting\n", 3032 __FUNCTION__, op)); 3033 return false; 3034 } 3035 3036 alpha_fixup = 0; 3037 if (!(tmp->dst.format == tmp->src.pict_format || 3038 tmp->dst.format == alphaless(tmp->src.pict_format) || 3039 (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) && 3040 sna_get_pixel_from_rgba(&alpha_fixup, 3041 0, 0, 0, 0xffff, 3042 tmp->dst.format)))) { 3043 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", 3044 __FUNCTION__, 3045 (unsigned)tmp->src.pict_format, 3046 (unsigned)tmp->dst.format)); 3047 return false; 3048 } 3049 3050 sx = tmp->src.offset[0]; 3051 sy = tmp->src.offset[1]; 3052 3053 x += sx; 3054 y += sy; 3055 if (x < 0 || y < 0 || 3056 x + width > tmp->src.width || 3057 y + height > tmp->src.height) { 3058 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n", 3059 __FUNCTION__, 3060 x, y, x+width, y+width, tmp->src.width, tmp->src.height)); 3061 if (tmp->src.repeat == RepeatNormal) { 3062 int xx = x % tmp->src.width; 3063 int yy = y % tmp->src.height; 3064 if (xx < 0) 3065 xx += tmp->src.width; 3066 if (yy < 0) 3067 yy += tmp->src.height; 3068 if (xx + width > tmp->src.width || 3069 yy + height > tmp->src.height) 3070 return false; 3071 3072 sx += xx - x; 3073 sy += yy - y; 3074 } else 3075 return false; 3076 } 3077 3078 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", 3079 __FUNCTION__, 3080 tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup)); 3081 3082 tmp->u.blt.src_pixmap = NULL; 3083 tmp->u.blt.sx = sx; 3084 tmp->u.blt.sy = sy; 3085 3086 kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo); 3087 if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { 3088 kgem_submit(&sna->kgem); 3089 if (!kgem_check_many_bo_fenced(&sna->kgem, 3090 tmp->dst.bo, tmp->src.bo, NULL)) { 3091 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); 3092 return sna_tiling_blt_composite(sna, tmp, tmp->src.bo, 3093 PICT_FORMAT_BPP(tmp->src.pict_format), 3094 alpha_fixup); 3095 } 3096 _kgem_set_mode(&sna->kgem, KGEM_BLT); 3097 } 3098 kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo); 3099 3100 if (alpha_fixup) { 3101 tmp->blt = blt_composite_copy_with_alpha; 3102 tmp->box = blt_composite_copy_box_with_alpha; 3103 tmp->boxes = blt_composite_copy_boxes_with_alpha; 3104 3105 if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt, 3106 tmp->src.bo, tmp->dst.bo, 3107 PICT_FORMAT_BPP(tmp->src.pict_format), 3108 alpha_fixup)) 3109 return false; 3110 } else { 3111 tmp->blt = blt_composite_copy; 3112 tmp->box = blt_composite_copy_box; 3113 tmp->boxes = blt_composite_copy_boxes; 3114 if (sna->kgem.gen >= 0100) 3115 tmp->thread_boxes = blt_composite_copy_boxes__thread64; 3116 else 3117 tmp->thread_boxes = blt_composite_copy_boxes__thread; 3118 3119 if (!sna_blt_copy_init(sna, &tmp->u.blt, 3120 tmp->src.bo, tmp->dst.bo, 3121 PICT_FORMAT_BPP(tmp->src.pict_format), 3122 GXcopy)) 3123 return false; 3124 } 3125 3126 tmp->done = convert_done; 3127 if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo) 3128 tmp->done = gen6_convert_done; 3129 3130 return true; 3131} 3132 3133static void sna_blt_fill_op_blt(struct sna *sna, 3134 const struct sna_fill_op *op, 3135 int16_t x, int16_t y, 3136 int16_t width, int16_t height) 3137{ 3138 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3139 const struct sna_blt_state *blt = &op->base.u.blt; 3140 3141 __sna_blt_fill_begin(sna, blt); 3142 3143 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3144 sna->blt_state.fill_pixel = blt->pixel; 3145 sna->blt_state.fill_alu = blt->alu; 3146 } 3147 3148 sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height); 3149} 3150 3151fastcall static void sna_blt_fill_op_box(struct sna *sna, 3152 const struct sna_fill_op *op, 3153 const BoxRec *box) 3154{ 3155 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3156 const struct sna_blt_state *blt = &op->base.u.blt; 3157 3158 __sna_blt_fill_begin(sna, blt); 3159 3160 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3161 sna->blt_state.fill_pixel = blt->pixel; 3162 sna->blt_state.fill_alu = blt->alu; 3163 } 3164 3165 _sna_blt_fill_box(sna, &op->base.u.blt, box); 3166} 3167 3168fastcall static void sna_blt_fill_op_boxes(struct sna *sna, 3169 const struct sna_fill_op *op, 3170 const BoxRec *box, 3171 int nbox) 3172{ 3173 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3174 const struct sna_blt_state *blt = &op->base.u.blt; 3175 3176 __sna_blt_fill_begin(sna, blt); 3177 3178 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3179 sna->blt_state.fill_pixel = blt->pixel; 3180 sna->blt_state.fill_alu = blt->alu; 3181 } 3182 3183 _sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox); 3184} 3185 3186static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy) 3187{ 3188 union { 3189 DDXPointRec pt; 3190 uint32_t i; 3191 } u; 3192 3193 u.pt.x = pt->x + dx; 3194 u.pt.y = pt->y + dy; 3195 3196 return cmd | (uint64_t)u.i<<32; 3197} 3198 3199fastcall static void sna_blt_fill_op_points(struct sna *sna, 3200 const struct sna_fill_op *op, 3201 int16_t dx, int16_t dy, 3202 const DDXPointRec *p, int n) 3203{ 3204 const struct sna_blt_state *blt = &op->base.u.blt; 3205 struct kgem *kgem = &sna->kgem; 3206 uint32_t cmd; 3207 3208 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n)); 3209 3210 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3211 __sna_blt_fill_begin(sna, blt); 3212 3213 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3214 sna->blt_state.fill_pixel = blt->pixel; 3215 sna->blt_state.fill_alu = blt->alu; 3216 } 3217 3218 if (!kgem_check_batch(kgem, 2)) 3219 sna_blt_fill_begin(sna, blt); 3220 3221 cmd = XY_PIXEL_BLT; 3222 if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling) 3223 cmd |= BLT_DST_TILED; 3224 3225 do { 3226 uint32_t *b = kgem->batch + kgem->nbatch; 3227 int n_this_time, rem; 3228 3229 assert(sna->kgem.mode == KGEM_BLT); 3230 n_this_time = n; 3231 rem = kgem_batch_space(kgem); 3232 if (2*n_this_time > rem) 3233 n_this_time = rem / 2; 3234 assert(n_this_time); 3235 n -= n_this_time; 3236 3237 kgem->nbatch += 2 * n_this_time; 3238 assert(kgem->nbatch < kgem->surface); 3239 3240 if ((dx|dy) == 0) { 3241 do { 3242 *(uint64_t *)b = pt_add(cmd, p++, 0, 0); 3243 b += 2; 3244 } while (--n_this_time); 3245 } else { 3246 do { 3247 *(uint64_t *)b = pt_add(cmd, p++, dx, dy); 3248 b += 2; 3249 } while (--n_this_time); 3250 } 3251 3252 if (!n) 3253 return; 3254 3255 sna_blt_fill_begin(sna, blt); 3256 } while (1); 3257} 3258 3259bool sna_blt_fill(struct sna *sna, uint8_t alu, 3260 struct kgem_bo *bo, int bpp, 3261 uint32_t pixel, 3262 struct sna_fill_op *fill) 3263{ 3264#if DEBUG_NO_BLT || NO_BLT_FILL 3265 return false; 3266#endif 3267 3268 DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp)); 3269 3270 if (!kgem_bo_can_blt(&sna->kgem, bo)) { 3271 DBG(("%s: rejected due to incompatible Y-tiling\n", 3272 __FUNCTION__)); 3273 return false; 3274 } 3275 3276 if (!sna_blt_fill_init(sna, &fill->base.u.blt, 3277 bo, bpp, alu, pixel)) 3278 return false; 3279 3280 assert(sna->kgem.mode == KGEM_BLT); 3281 fill->blt = sna_blt_fill_op_blt; 3282 fill->box = sna_blt_fill_op_box; 3283 fill->boxes = sna_blt_fill_op_boxes; 3284 fill->points = sna_blt_fill_op_points; 3285 fill->done = 3286 (void (*)(struct sna *, const struct sna_fill_op *))nop_done; 3287 return true; 3288} 3289 3290static void sna_blt_copy_op_blt(struct sna *sna, 3291 const struct sna_copy_op *op, 3292 int16_t src_x, int16_t src_y, 3293 int16_t width, int16_t height, 3294 int16_t dst_x, int16_t dst_y) 3295{ 3296 sna_blt_copy_one(sna, &op->base.u.blt, 3297 src_x, src_y, 3298 width, height, 3299 dst_x, dst_y); 3300} 3301 3302bool sna_blt_copy(struct sna *sna, uint8_t alu, 3303 struct kgem_bo *src, 3304 struct kgem_bo *dst, 3305 int bpp, 3306 struct sna_copy_op *op) 3307{ 3308#if DEBUG_NO_BLT || NO_BLT_COPY 3309 return false; 3310#endif 3311 3312 if (!kgem_bo_can_blt(&sna->kgem, src)) 3313 return false; 3314 3315 if (!kgem_bo_can_blt(&sna->kgem, dst)) 3316 return false; 3317 3318 if (!sna_blt_copy_init(sna, &op->base.u.blt, 3319 src, dst, 3320 bpp, alu)) 3321 return false; 3322 3323 op->blt = sna_blt_copy_op_blt; 3324 if (sna->kgem.gen >= 060 && src == dst) 3325 op->done = (void (*)(struct sna *, const struct sna_copy_op *)) 3326 gen6_blt_copy_done; 3327 else 3328 op->done = (void (*)(struct sna *, const struct sna_copy_op *)) 3329 nop_done; 3330 return true; 3331} 3332 3333static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, 3334 struct kgem_bo *bo, int bpp, 3335 uint32_t color, 3336 const BoxRec *box) 3337{ 3338 struct kgem *kgem = &sna->kgem; 3339 uint32_t br13, cmd, *b; 3340 bool overwrites; 3341 3342 assert(kgem_bo_can_blt (kgem, bo)); 3343 3344 DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__, 3345 box->x1, box->y1, box->x2, box->y2)); 3346 3347 assert(box->x1 >= 0); 3348 assert(box->y1 >= 0); 3349 3350 cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4); 3351 br13 = bo->pitch; 3352 if (kgem->gen >= 040 && bo->tiling) { 3353 cmd |= BLT_DST_TILED; 3354 br13 >>= 2; 3355 } 3356 assert(br13 <= MAXSHORT); 3357 3358 br13 |= fill_ROP[alu] << 16; 3359 switch (bpp) { 3360 default: assert(0); 3361 case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3362 br13 |= 1 << 25; /* RGB8888 */ 3363 case 16: br13 |= 1 << 24; /* RGB565 */ 3364 case 8: break; 3365 } 3366 3367 /* All too frequently one blt completely overwrites the previous */ 3368 overwrites = alu == GXcopy || alu == GXclear || alu == GXset; 3369 if (overwrites) { 3370 if (sna->kgem.gen >= 0100) { 3371 if (kgem->nbatch >= 7 && 3372 kgem->batch[kgem->nbatch-7] == cmd && 3373 *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box && 3374 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { 3375 DBG(("%s: replacing last fill\n", __FUNCTION__)); 3376 kgem->batch[kgem->nbatch-6] = br13; 3377 kgem->batch[kgem->nbatch-1] = color; 3378 return true; 3379 } 3380 if (kgem->nbatch >= 10 && 3381 (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD && 3382 *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box && 3383 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { 3384 DBG(("%s: replacing last copy\n", __FUNCTION__)); 3385 kgem->batch[kgem->nbatch-10] = cmd; 3386 kgem->batch[kgem->nbatch-8] = br13; 3387 kgem->batch[kgem->nbatch-4] = color; 3388 /* Keep the src bo as part of the execlist, just remove 3389 * its relocation entry. 3390 */ 3391 kgem->nreloc--; 3392 kgem->nbatch -= 3; 3393 return true; 3394 } 3395 } else { 3396 if (kgem->nbatch >= 6 && 3397 kgem->batch[kgem->nbatch-6] == cmd && 3398 *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box && 3399 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { 3400 DBG(("%s: replacing last fill\n", __FUNCTION__)); 3401 kgem->batch[kgem->nbatch-5] = br13; 3402 kgem->batch[kgem->nbatch-1] = color; 3403 return true; 3404 } 3405 if (kgem->nbatch >= 8 && 3406 (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD && 3407 *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box && 3408 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { 3409 DBG(("%s: replacing last copy\n", __FUNCTION__)); 3410 kgem->batch[kgem->nbatch-8] = cmd; 3411 kgem->batch[kgem->nbatch-7] = br13; 3412 kgem->batch[kgem->nbatch-3] = color; 3413 /* Keep the src bo as part of the execlist, just remove 3414 * its relocation entry. 3415 */ 3416 kgem->nreloc--; 3417 kgem->nbatch -= 2; 3418 return true; 3419 } 3420 } 3421 } 3422 3423 /* If we are currently emitting SCANLINES, keep doing so */ 3424 if (sna->blt_state.fill_bo == bo->unique_id && 3425 sna->blt_state.fill_pixel == color && 3426 (sna->blt_state.fill_alu == alu || 3427 sna->blt_state.fill_alu == ~alu)) { 3428 DBG(("%s: matching last fill, converting to scanlines\n", 3429 __FUNCTION__)); 3430 return false; 3431 } 3432 3433 kgem_set_mode(kgem, KGEM_BLT, bo); 3434 if (!kgem_check_batch(kgem, 7) || 3435 !kgem_check_reloc(kgem, 1) || 3436 !kgem_check_bo_fenced(kgem, bo)) { 3437 kgem_submit(kgem); 3438 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3439 return false; 3440 3441 _kgem_set_mode(kgem, KGEM_BLT); 3442 } 3443 kgem_bcs_set_tiling(&sna->kgem, NULL, bo); 3444 3445 assert(kgem_check_batch(kgem, 6)); 3446 assert(kgem_check_reloc(kgem, 1)); 3447 3448 assert(sna->kgem.mode == KGEM_BLT); 3449 b = kgem->batch + kgem->nbatch; 3450 b[0] = cmd; 3451 b[1] = br13; 3452 *(uint64_t *)(b+2) = *(const uint64_t *)box; 3453 if (kgem->gen >= 0100) { 3454 *(uint64_t *)(b+4) = 3455 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3456 I915_GEM_DOMAIN_RENDER << 16 | 3457 I915_GEM_DOMAIN_RENDER | 3458 KGEM_RELOC_FENCED, 3459 0); 3460 b[6] = color; 3461 kgem->nbatch += 7; 3462 } else { 3463 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3464 I915_GEM_DOMAIN_RENDER << 16 | 3465 I915_GEM_DOMAIN_RENDER | 3466 KGEM_RELOC_FENCED, 3467 0); 3468 b[5] = color; 3469 kgem->nbatch += 6; 3470 } 3471 assert(kgem->nbatch < kgem->surface); 3472 3473 sna->blt_state.fill_bo = bo->unique_id; 3474 sna->blt_state.fill_pixel = color; 3475 sna->blt_state.fill_alu = ~alu; 3476 return true; 3477} 3478 3479bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, 3480 struct kgem_bo *bo, int bpp, 3481 uint32_t pixel, 3482 const BoxRec *box, int nbox) 3483{ 3484 struct kgem *kgem = &sna->kgem; 3485 uint32_t br13, cmd; 3486 3487#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES 3488 return false; 3489#endif 3490 3491 DBG(("%s (%d, %08x, %d) x %d\n", 3492 __FUNCTION__, bpp, pixel, alu, nbox)); 3493 3494 if (!kgem_bo_can_blt(kgem, bo)) { 3495 DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__)); 3496 return false; 3497 } 3498 3499 if (alu == GXclear) 3500 pixel = 0; 3501 else if (alu == GXcopy) { 3502 if (pixel == 0) 3503 alu = GXclear; 3504 else if (pixel == -1) 3505 alu = GXset; 3506 } 3507 3508 if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box)) 3509 return true; 3510 3511 br13 = bo->pitch; 3512 cmd = XY_SCANLINE_BLT; 3513 if (kgem->gen >= 040 && bo->tiling) { 3514 cmd |= 1 << 11; 3515 br13 >>= 2; 3516 } 3517 assert(br13 <= MAXSHORT); 3518 3519 br13 |= 1<<31 | fill_ROP[alu] << 16; 3520 switch (bpp) { 3521 default: assert(0); 3522 case 32: br13 |= 1 << 25; /* RGB8888 */ 3523 case 16: br13 |= 1 << 24; /* RGB565 */ 3524 case 8: break; 3525 } 3526 3527 kgem_set_mode(kgem, KGEM_BLT, bo); 3528 if (!kgem_check_batch(kgem, 14) || 3529 !kgem_check_bo_fenced(kgem, bo)) { 3530 kgem_submit(kgem); 3531 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3532 return false; 3533 _kgem_set_mode(kgem, KGEM_BLT); 3534 } 3535 3536 if (sna->blt_state.fill_bo != bo->unique_id || 3537 sna->blt_state.fill_pixel != pixel || 3538 sna->blt_state.fill_alu != alu) 3539 { 3540 uint32_t *b; 3541 3542 if (!kgem_check_batch(kgem, 24) || 3543 !kgem_check_reloc(kgem, 1)) { 3544 _kgem_submit(kgem); 3545 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3546 return false; 3547 _kgem_set_mode(kgem, KGEM_BLT); 3548 } 3549 3550 kgem_bcs_set_tiling(&sna->kgem, NULL, bo); 3551 3552 assert(sna->kgem.mode == KGEM_BLT); 3553 b = kgem->batch + kgem->nbatch; 3554 if (kgem->gen >= 0100) { 3555 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 3556 if (bpp == 32) 3557 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3558 if (bo->tiling) 3559 b[0] |= BLT_DST_TILED; 3560 b[1] = br13; 3561 b[2] = 0; 3562 b[3] = 0; 3563 *(uint64_t *)(b+4) = 3564 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3565 I915_GEM_DOMAIN_RENDER << 16 | 3566 I915_GEM_DOMAIN_RENDER | 3567 KGEM_RELOC_FENCED, 3568 0); 3569 b[6] = pixel; 3570 b[7] = pixel; 3571 b[8] = 0; 3572 b[9] = 0; 3573 kgem->nbatch += 10; 3574 } else { 3575 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 3576 if (bpp == 32) 3577 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3578 if (bo->tiling && kgem->gen >= 040) 3579 b[0] |= BLT_DST_TILED; 3580 b[1] = br13; 3581 b[2] = 0; 3582 b[3] = 0; 3583 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3584 I915_GEM_DOMAIN_RENDER << 16 | 3585 I915_GEM_DOMAIN_RENDER | 3586 KGEM_RELOC_FENCED, 3587 0); 3588 b[5] = pixel; 3589 b[6] = pixel; 3590 b[7] = 0; 3591 b[8] = 0; 3592 kgem->nbatch += 9; 3593 } 3594 assert(kgem->nbatch < kgem->surface); 3595 3596 sna->blt_state.fill_bo = bo->unique_id; 3597 sna->blt_state.fill_pixel = pixel; 3598 sna->blt_state.fill_alu = alu; 3599 } 3600 3601 do { 3602 int nbox_this_time, rem; 3603 3604 nbox_this_time = nbox; 3605 rem = kgem_batch_space(kgem); 3606 if (3*nbox_this_time > rem) 3607 nbox_this_time = rem / 3; 3608 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 3609 __FUNCTION__, nbox_this_time, nbox, rem)); 3610 assert(nbox_this_time > 0); 3611 nbox -= nbox_this_time; 3612 3613 assert(sna->kgem.mode == KGEM_BLT); 3614 do { 3615 uint32_t *b; 3616 3617 DBG(("%s: (%d, %d), (%d, %d): %08x\n", 3618 __FUNCTION__, 3619 box->x1, box->y1, 3620 box->x2, box->y2, 3621 pixel)); 3622 3623 assert(box->x1 >= 0); 3624 assert(box->y1 >= 0); 3625 assert(box->y2 * bo->pitch <= kgem_bo_size(bo)); 3626 3627 b = kgem->batch + kgem->nbatch; 3628 kgem->nbatch += 3; 3629 assert(kgem->nbatch < kgem->surface); 3630 b[0] = cmd; 3631 *(uint64_t *)(b+1) = *(const uint64_t *)box; 3632 box++; 3633 } while (--nbox_this_time); 3634 3635 if (nbox) { 3636 uint32_t *b; 3637 3638 _kgem_submit(kgem); 3639 _kgem_set_mode(kgem, KGEM_BLT); 3640 kgem_bcs_set_tiling(&sna->kgem, NULL, bo); 3641 3642 assert(sna->kgem.mode == KGEM_BLT); 3643 b = kgem->batch + kgem->nbatch; 3644 if (kgem->gen >= 0100) { 3645 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 3646 if (bpp == 32) 3647 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3648 if (bo->tiling) 3649 b[0] |= BLT_DST_TILED; 3650 b[1] = br13; 3651 b[2] = 0; 3652 b[3] = 0; 3653 *(uint64_t *)(b+4) = 3654 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3655 I915_GEM_DOMAIN_RENDER << 16 | 3656 I915_GEM_DOMAIN_RENDER | 3657 KGEM_RELOC_FENCED, 3658 0); 3659 b[6] = pixel; 3660 b[7] = pixel; 3661 b[8] = 0; 3662 b[9] = 0; 3663 kgem->nbatch += 10; 3664 } else { 3665 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 3666 if (bpp == 32) 3667 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3668 if (bo->tiling && kgem->gen >= 040) 3669 b[0] |= BLT_DST_TILED; 3670 b[1] = br13; 3671 b[2] = 0; 3672 b[3] = 0; 3673 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3674 I915_GEM_DOMAIN_RENDER << 16 | 3675 I915_GEM_DOMAIN_RENDER | 3676 KGEM_RELOC_FENCED, 3677 0); 3678 b[5] = pixel; 3679 b[6] = pixel; 3680 b[7] = 0; 3681 b[8] = 0; 3682 kgem->nbatch += 9; 3683 } 3684 assert(kgem->nbatch < kgem->surface); 3685 assert(kgem_check_batch(kgem, 3)); 3686 } 3687 } while (nbox); 3688 3689 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 3690 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 3691 _kgem_submit(kgem); 3692 } 3693 3694 return true; 3695} 3696 3697bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, 3698 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3699 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3700 int bpp, const BoxRec *box, int nbox) 3701{ 3702 struct kgem *kgem = &sna->kgem; 3703 unsigned src_pitch, br13, cmd; 3704 3705#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES 3706 return false; 3707#endif 3708 3709 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", 3710 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, 3711 src_bo->tiling, dst_bo->tiling, 3712 src_bo->pitch, dst_bo->pitch)); 3713 assert(nbox); 3714 3715 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { 3716 DBG(("%s: cannot blt to src? %d or dst? %d\n", 3717 __FUNCTION__, 3718 kgem_bo_can_blt(kgem, src_bo), 3719 kgem_bo_can_blt(kgem, dst_bo))); 3720 return false; 3721 } 3722 3723 cmd = XY_SRC_COPY_BLT_CMD; 3724 if (bpp == 32) 3725 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3726 3727 src_pitch = src_bo->pitch; 3728 if (kgem->gen >= 040 && src_bo->tiling) { 3729 cmd |= BLT_SRC_TILED; 3730 src_pitch >>= 2; 3731 } 3732 assert(src_pitch <= MAXSHORT); 3733 3734 br13 = dst_bo->pitch; 3735 if (kgem->gen >= 040 && dst_bo->tiling) { 3736 cmd |= BLT_DST_TILED; 3737 br13 >>= 2; 3738 } 3739 assert(br13 <= MAXSHORT); 3740 3741 br13 |= copy_ROP[alu] << 16; 3742 switch (bpp) { 3743 default: assert(0); 3744 case 32: br13 |= 1 << 25; /* RGB8888 */ 3745 case 16: br13 |= 1 << 24; /* RGB565 */ 3746 case 8: break; 3747 } 3748 3749 /* Compare first box against a previous fill */ 3750 if ((alu == GXcopy || alu == GXclear || alu == GXset) && 3751 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { 3752 if (kgem->gen >= 0100) { 3753 if (kgem->nbatch >= 7 && 3754 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 3755 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 3756 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 3757 DBG(("%s: deleting last fill\n", __FUNCTION__)); 3758 kgem->nbatch -= 7; 3759 kgem->nreloc--; 3760 } 3761 } else { 3762 if (kgem->nbatch >= 6 && 3763 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 3764 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 3765 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 3766 DBG(("%s: deleting last fill\n", __FUNCTION__)); 3767 kgem->nbatch -= 6; 3768 kgem->nreloc--; 3769 } 3770 } 3771 } 3772 3773 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 3774 if (!kgem_check_batch(kgem, 10) || 3775 !kgem_check_reloc(kgem, 2) || 3776 !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 3777 kgem_submit(kgem); 3778 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 3779 DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__)); 3780 return sna_tiling_blt_copy_boxes(sna, alu, 3781 src_bo, src_dx, src_dy, 3782 dst_bo, dst_dx, dst_dy, 3783 bpp, box, nbox); 3784 } 3785 _kgem_set_mode(kgem, KGEM_BLT); 3786 } 3787 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 3788 3789 if ((dst_dx | dst_dy) == 0) { 3790 if (kgem->gen >= 0100) { 3791 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8; 3792 do { 3793 int nbox_this_time, rem; 3794 3795 nbox_this_time = nbox; 3796 rem = kgem_batch_space(kgem); 3797 if (10*nbox_this_time > rem) 3798 nbox_this_time = rem / 10; 3799 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3800 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3801 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 3802 __FUNCTION__, nbox_this_time, nbox, rem)); 3803 assert(nbox_this_time > 0); 3804 nbox -= nbox_this_time; 3805 3806 assert(sna->kgem.mode == KGEM_BLT); 3807 do { 3808 uint32_t *b = kgem->batch + kgem->nbatch; 3809 3810 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3811 __FUNCTION__, 3812 box->x1, box->y1, 3813 box->x2 - box->x1, box->y2 - box->y1)); 3814 3815 assert(box->x1 + src_dx >= 0); 3816 assert(box->y1 + src_dy >= 0); 3817 assert(box->x1 + src_dx <= INT16_MAX); 3818 assert(box->y1 + src_dy <= INT16_MAX); 3819 3820 assert(box->x1 >= 0); 3821 assert(box->y1 >= 0); 3822 3823 *(uint64_t *)&b[0] = hdr; 3824 *(uint64_t *)&b[2] = *(const uint64_t *)box; 3825 *(uint64_t *)(b+4) = 3826 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 3827 I915_GEM_DOMAIN_RENDER << 16 | 3828 I915_GEM_DOMAIN_RENDER | 3829 KGEM_RELOC_FENCED, 3830 0); 3831 b[6] = add2(b[2], src_dx, src_dy); 3832 b[7] = src_pitch; 3833 *(uint64_t *)(b+8) = 3834 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 3835 I915_GEM_DOMAIN_RENDER << 16 | 3836 KGEM_RELOC_FENCED, 3837 0); 3838 kgem->nbatch += 10; 3839 assert(kgem->nbatch < kgem->surface); 3840 box++; 3841 } while (--nbox_this_time); 3842 3843 if (!nbox) 3844 break; 3845 3846 _kgem_submit(kgem); 3847 _kgem_set_mode(kgem, KGEM_BLT); 3848 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 3849 } while (1); 3850 } else { 3851 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; 3852 do { 3853 int nbox_this_time, rem; 3854 3855 nbox_this_time = nbox; 3856 rem = kgem_batch_space(kgem); 3857 if (8*nbox_this_time > rem) 3858 nbox_this_time = rem / 8; 3859 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3860 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3861 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 3862 __FUNCTION__, nbox_this_time, nbox, rem)); 3863 assert(nbox_this_time > 0); 3864 nbox -= nbox_this_time; 3865 3866 assert(sna->kgem.mode == KGEM_BLT); 3867 do { 3868 uint32_t *b = kgem->batch + kgem->nbatch; 3869 3870 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3871 __FUNCTION__, 3872 box->x1, box->y1, 3873 box->x2 - box->x1, box->y2 - box->y1)); 3874 3875 assert(box->x1 + src_dx >= 0); 3876 assert(box->y1 + src_dy >= 0); 3877 assert(box->x1 + src_dx <= INT16_MAX); 3878 assert(box->y1 + src_dy <= INT16_MAX); 3879 3880 assert(box->x1 >= 0); 3881 assert(box->y1 >= 0); 3882 3883 *(uint64_t *)&b[0] = hdr; 3884 *(uint64_t *)&b[2] = *(const uint64_t *)box; 3885 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 3886 I915_GEM_DOMAIN_RENDER << 16 | 3887 I915_GEM_DOMAIN_RENDER | 3888 KGEM_RELOC_FENCED, 3889 0); 3890 b[5] = add2(b[2], src_dx, src_dy); 3891 b[6] = src_pitch; 3892 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 3893 I915_GEM_DOMAIN_RENDER << 16 | 3894 KGEM_RELOC_FENCED, 3895 0); 3896 kgem->nbatch += 8; 3897 assert(kgem->nbatch < kgem->surface); 3898 box++; 3899 } while (--nbox_this_time); 3900 3901 if (!nbox) 3902 break; 3903 3904 _kgem_submit(kgem); 3905 _kgem_set_mode(kgem, KGEM_BLT); 3906 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 3907 } while (1); 3908 } 3909 } else { 3910 if (kgem->gen >= 0100) { 3911 cmd |= 8; 3912 do { 3913 int nbox_this_time, rem; 3914 3915 nbox_this_time = nbox; 3916 rem = kgem_batch_space(kgem); 3917 if (10*nbox_this_time > rem) 3918 nbox_this_time = rem / 10; 3919 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3920 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3921 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 3922 __FUNCTION__, nbox_this_time, nbox, rem)); 3923 assert(nbox_this_time > 0); 3924 nbox -= nbox_this_time; 3925 3926 assert(sna->kgem.mode == KGEM_BLT); 3927 do { 3928 uint32_t *b = kgem->batch + kgem->nbatch; 3929 3930 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3931 __FUNCTION__, 3932 box->x1, box->y1, 3933 box->x2 - box->x1, box->y2 - box->y1)); 3934 3935 assert(box->x1 + src_dx >= 0); 3936 assert(box->y1 + src_dy >= 0); 3937 3938 assert(box->x1 + dst_dx >= 0); 3939 assert(box->y1 + dst_dy >= 0); 3940 3941 b[0] = cmd; 3942 b[1] = br13; 3943 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 3944 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 3945 *(uint64_t *)(b+4) = 3946 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 3947 I915_GEM_DOMAIN_RENDER << 16 | 3948 I915_GEM_DOMAIN_RENDER | 3949 KGEM_RELOC_FENCED, 3950 0); 3951 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 3952 b[7] = src_pitch; 3953 *(uint64_t *)(b+8) = 3954 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 3955 I915_GEM_DOMAIN_RENDER << 16 | 3956 KGEM_RELOC_FENCED, 3957 0); 3958 kgem->nbatch += 10; 3959 assert(kgem->nbatch < kgem->surface); 3960 box++; 3961 } while (--nbox_this_time); 3962 3963 if (!nbox) 3964 break; 3965 3966 _kgem_submit(kgem); 3967 _kgem_set_mode(kgem, KGEM_BLT); 3968 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 3969 } while (1); 3970 } else { 3971 cmd |= 6; 3972 do { 3973 int nbox_this_time, rem; 3974 3975 nbox_this_time = nbox; 3976 rem = kgem_batch_space(kgem); 3977 if (8*nbox_this_time > rem) 3978 nbox_this_time = rem / 8; 3979 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3980 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3981 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n", 3982 __FUNCTION__, nbox_this_time, nbox, rem)); 3983 assert(nbox_this_time > 0); 3984 nbox -= nbox_this_time; 3985 3986 assert(sna->kgem.mode == KGEM_BLT); 3987 do { 3988 uint32_t *b = kgem->batch + kgem->nbatch; 3989 3990 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3991 __FUNCTION__, 3992 box->x1, box->y1, 3993 box->x2 - box->x1, box->y2 - box->y1)); 3994 3995 assert(box->x1 + src_dx >= 0); 3996 assert(box->y1 + src_dy >= 0); 3997 3998 assert(box->x1 + dst_dx >= 0); 3999 assert(box->y1 + dst_dy >= 0); 4000 4001 b[0] = cmd; 4002 b[1] = br13; 4003 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 4004 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 4005 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 4006 I915_GEM_DOMAIN_RENDER << 16 | 4007 I915_GEM_DOMAIN_RENDER | 4008 KGEM_RELOC_FENCED, 4009 0); 4010 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 4011 b[6] = src_pitch; 4012 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 4013 I915_GEM_DOMAIN_RENDER << 16 | 4014 KGEM_RELOC_FENCED, 4015 0); 4016 kgem->nbatch += 8; 4017 assert(kgem->nbatch < kgem->surface); 4018 box++; 4019 } while (--nbox_this_time); 4020 4021 if (!nbox) 4022 break; 4023 4024 _kgem_submit(kgem); 4025 _kgem_set_mode(kgem, KGEM_BLT); 4026 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 4027 } while (1); 4028 } 4029 } 4030 4031 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 4032 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 4033 _kgem_submit(kgem); 4034 } else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) { 4035 uint32_t *b = kgem->batch + kgem->nbatch; 4036 assert(sna->kgem.mode == KGEM_BLT); 4037 b[0] = XY_SETUP_CLIP; 4038 b[1] = b[2] = 0; 4039 kgem->nbatch += 3; 4040 assert(kgem->nbatch < kgem->surface); 4041 } 4042 4043 sna->blt_state.fill_bo = 0; 4044 return true; 4045} 4046 4047bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, 4048 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 4049 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 4050 int bpp, int alpha_fixup, 4051 const BoxRec *box, int nbox) 4052{ 4053 struct kgem *kgem = &sna->kgem; 4054 unsigned src_pitch, br13, cmd; 4055 4056#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES 4057 return false; 4058#endif 4059 4060 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", 4061 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, 4062 src_bo->tiling, dst_bo->tiling, 4063 src_bo->pitch, dst_bo->pitch)); 4064 4065 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { 4066 DBG(("%s: cannot blt to src? %d or dst? %d\n", 4067 __FUNCTION__, 4068 kgem_bo_can_blt(kgem, src_bo), 4069 kgem_bo_can_blt(kgem, dst_bo))); 4070 return false; 4071 } 4072 4073 cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); 4074 src_pitch = src_bo->pitch; 4075 if (kgem->gen >= 040 && src_bo->tiling) { 4076 cmd |= BLT_SRC_TILED; 4077 src_pitch >>= 2; 4078 } 4079 assert(src_pitch <= MAXSHORT); 4080 4081 br13 = dst_bo->pitch; 4082 if (kgem->gen >= 040 && dst_bo->tiling) { 4083 cmd |= BLT_DST_TILED; 4084 br13 >>= 2; 4085 } 4086 assert(br13 <= MAXSHORT); 4087 4088 br13 |= copy_ROP[alu] << 16; 4089 switch (bpp) { 4090 default: assert(0); 4091 case 32: br13 |= 1 << 25; /* RGB8888 */ 4092 case 16: br13 |= 1 << 24; /* RGB565 */ 4093 case 8: break; 4094 } 4095 4096 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 4097 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 4098 DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__)); 4099 return false; 4100 } 4101 4102 /* Compare first box against a previous fill */ 4103 if ((alu == GXcopy || alu == GXclear || alu == GXset) && 4104 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { 4105 if (kgem->gen >= 0100) { 4106 if (kgem->nbatch >= 7 && 4107 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 4108 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 4109 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 4110 DBG(("%s: deleting last fill\n", __FUNCTION__)); 4111 kgem->nbatch -= 7; 4112 kgem->nreloc--; 4113 } 4114 } else { 4115 if (kgem->nbatch >= 6 && 4116 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 4117 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 4118 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 4119 DBG(("%s: deleting last fill\n", __FUNCTION__)); 4120 kgem->nbatch -= 6; 4121 kgem->nreloc--; 4122 } 4123 } 4124 } 4125 4126 while (nbox--) { 4127 uint32_t *b; 4128 4129 if (!kgem_check_batch(kgem, 14) || 4130 !kgem_check_reloc(kgem, 2)) { 4131 _kgem_submit(kgem); 4132 _kgem_set_mode(kgem, KGEM_BLT); 4133 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); 4134 } 4135 4136 assert(sna->kgem.mode == KGEM_BLT); 4137 b = kgem->batch + kgem->nbatch; 4138 b[0] = cmd; 4139 b[1] = br13; 4140 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 4141 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 4142 if (sna->kgem.gen >= 0100) { 4143 *(uint64_t *)(b+4) = 4144 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 4145 I915_GEM_DOMAIN_RENDER << 16 | 4146 I915_GEM_DOMAIN_RENDER | 4147 KGEM_RELOC_FENCED, 4148 0); 4149 b[6] = src_pitch; 4150 b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); 4151 *(uint64_t *)(b+8) = 4152 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 4153 I915_GEM_DOMAIN_RENDER << 16 | 4154 KGEM_RELOC_FENCED, 4155 0); 4156 b[10] = alpha_fixup; 4157 b[11] = alpha_fixup; 4158 b[12] = 0; 4159 b[13] = 0; 4160 kgem->nbatch += 14; 4161 } else { 4162 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 4163 I915_GEM_DOMAIN_RENDER << 16 | 4164 I915_GEM_DOMAIN_RENDER | 4165 KGEM_RELOC_FENCED, 4166 0); 4167 b[5] = src_pitch; 4168 b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); 4169 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 4170 I915_GEM_DOMAIN_RENDER << 16 | 4171 KGEM_RELOC_FENCED, 4172 0); 4173 b[8] = alpha_fixup; 4174 b[9] = alpha_fixup; 4175 b[10] = 0; 4176 b[11] = 0; 4177 kgem->nbatch += 12; 4178 } 4179 assert(kgem->nbatch < kgem->surface); 4180 box++; 4181 } 4182 4183 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 4184 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 4185 _kgem_submit(kgem); 4186 } 4187 4188 sna->blt_state.fill_bo = 0; 4189 return true; 4190} 4191 4192static void box_extents(const BoxRec *box, int n, BoxRec *extents) 4193{ 4194 *extents = *box; 4195 while (--n) { 4196 box++; 4197 if (box->x1 < extents->x1) 4198 extents->x1 = box->x1; 4199 if (box->y1 < extents->y1) 4200 extents->y1 = box->y1; 4201 4202 if (box->x2 > extents->x2) 4203 extents->x2 = box->x2; 4204 if (box->y2 > extents->y2) 4205 extents->y2 = box->y2; 4206 } 4207} 4208 4209bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, 4210 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 4211 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 4212 const BoxRec *box, int nbox) 4213{ 4214 struct kgem_bo *free_bo = NULL; 4215 bool ret; 4216 4217 DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox)); 4218 4219 if (!sna_blt_compare_depth(src, dst)) { 4220 DBG(("%s: mismatching depths %d -> %d\n", 4221 __FUNCTION__, src->depth, dst->depth)); 4222 return false; 4223 } 4224 4225 if (src_bo == dst_bo) { 4226 DBG(("%s: dst == src\n", __FUNCTION__)); 4227 4228 if (src_bo->tiling == I915_TILING_Y && 4229 !sna->kgem.can_blt_y && 4230 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { 4231 struct kgem_bo *bo; 4232 4233 DBG(("%s: src is Y-tiled\n", __FUNCTION__)); 4234 4235 if (src->type != DRAWABLE_PIXMAP) 4236 return false; 4237 4238 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo); 4239 bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X); 4240 if (bo == NULL) { 4241 BoxRec extents; 4242 4243 DBG(("%s: y-tiling conversion failed\n", 4244 __FUNCTION__)); 4245 4246 box_extents(box, nbox, &extents); 4247 free_bo = kgem_create_2d(&sna->kgem, 4248 extents.x2 - extents.x1, 4249 extents.y2 - extents.y1, 4250 src->bitsPerPixel, 4251 I915_TILING_X, 0); 4252 if (free_bo == NULL) { 4253 DBG(("%s: fallback -- temp allocation failed\n", 4254 __FUNCTION__)); 4255 return false; 4256 } 4257 4258 if (!sna_blt_copy_boxes(sna, GXcopy, 4259 src_bo, src_dx, src_dy, 4260 free_bo, -extents.x1, -extents.y1, 4261 src->bitsPerPixel, 4262 box, nbox)) { 4263 DBG(("%s: fallback -- temp copy failed\n", 4264 __FUNCTION__)); 4265 kgem_bo_destroy(&sna->kgem, free_bo); 4266 return false; 4267 } 4268 4269 src_dx = -extents.x1; 4270 src_dy = -extents.y1; 4271 src_bo = free_bo; 4272 } else 4273 dst_bo = src_bo = bo; 4274 } 4275 } else { 4276 if (src_bo->tiling == I915_TILING_Y && 4277 !sna->kgem.can_blt_y && 4278 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { 4279 DBG(("%s: src is y-tiled\n", __FUNCTION__)); 4280 if (src->type != DRAWABLE_PIXMAP) 4281 return false; 4282 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo); 4283 src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X); 4284 if (src_bo == NULL) { 4285 DBG(("%s: fallback -- src y-tiling conversion failed\n", 4286 __FUNCTION__)); 4287 return false; 4288 } 4289 } 4290 4291 if (dst_bo->tiling == I915_TILING_Y && 4292 !sna->kgem.can_blt_y && 4293 kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { 4294 DBG(("%s: dst is y-tiled\n", __FUNCTION__)); 4295 if (dst->type != DRAWABLE_PIXMAP) 4296 return false; 4297 assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo); 4298 dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X); 4299 if (dst_bo == NULL) { 4300 DBG(("%s: fallback -- dst y-tiling conversion failed\n", 4301 __FUNCTION__)); 4302 return false; 4303 } 4304 } 4305 } 4306 4307 ret = sna_blt_copy_boxes(sna, alu, 4308 src_bo, src_dx, src_dy, 4309 dst_bo, dst_dx, dst_dy, 4310 dst->bitsPerPixel, 4311 box, nbox); 4312 4313 if (free_bo) 4314 kgem_bo_destroy(&sna->kgem, free_bo); 4315 4316 return ret; 4317} 4318