sna_blt.c revision 42542f5f
1/* 2 * Based on code from intel_uxa.c and i830_xaa.c 3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. 4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> 5 * Copyright (c) 2009-2011 Intel Corporation 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 * 26 * Authors: 27 * Chris Wilson <chris@chris-wilson.co.uk> 28 * 29 */ 30 31#ifdef HAVE_CONFIG_H 32#include "config.h" 33#endif 34 35#include "sna.h" 36#include "sna_render.h" 37#include "sna_render_inline.h" 38#include "sna_reg.h" 39#include "rop.h" 40 41#define NO_BLT_COMPOSITE 0 42#define NO_BLT_COPY 0 43#define NO_BLT_COPY_BOXES 0 44#define NO_BLT_FILL 0 45#define NO_BLT_FILL_BOXES 0 46 47#ifndef PICT_TYPE_BGRA 48#define PICT_TYPE_BGRA 8 49#endif 50 51static const uint8_t copy_ROP[] = { 52 ROP_0, /* GXclear */ 53 ROP_DSa, /* GXand */ 54 ROP_SDna, /* GXandReverse */ 55 ROP_S, /* GXcopy */ 56 ROP_DSna, /* GXandInverted */ 57 ROP_D, /* GXnoop */ 58 ROP_DSx, /* GXxor */ 59 ROP_DSo, /* GXor */ 60 ROP_DSon, /* GXnor */ 61 ROP_DSxn, /* GXequiv */ 62 ROP_Dn, /* GXinvert */ 63 ROP_SDno, /* GXorReverse */ 64 ROP_Sn, /* GXcopyInverted */ 65 ROP_DSno, /* GXorInverted */ 66 ROP_DSan, /* GXnand */ 67 ROP_1 /* GXset */ 68}; 69 70static const uint8_t fill_ROP[] = { 71 ROP_0, 72 ROP_DPa, 73 ROP_PDna, 74 ROP_P, 75 ROP_DPna, 76 ROP_D, 77 ROP_DPx, 78 ROP_DPo, 79 ROP_DPon, 80 ROP_PDxn, 81 ROP_Dn, 82 ROP_PDno, 83 ROP_Pn, 84 ROP_DPno, 85 ROP_DPan, 86 ROP_1 87}; 88 89static void nop_done(struct sna *sna, const struct sna_composite_op *op) 90{ 91 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 92 if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) { 93 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 94 _kgem_submit(&sna->kgem); 95 } 96 (void)op; 97} 98 99static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op) 100{ 101 struct kgem *kgem = &sna->kgem; 102 103 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 104 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 105 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 106 _kgem_submit(kgem); 107 return; 108 } 109 110 if (kgem_check_batch(kgem, 3)) { 111 uint32_t *b = kgem->batch + kgem->nbatch; 112 assert(sna->kgem.mode == KGEM_BLT); 113 b[0] = XY_SETUP_CLIP; 114 b[1] = b[2] = 0; 115 kgem->nbatch += 3; 116 assert(kgem->nbatch < kgem->surface); 117 } 118 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 119 (void)op; 120} 121 122static bool sna_blt_fill_init(struct sna *sna, 123 struct sna_blt_state *blt, 124 struct kgem_bo *bo, 125 int bpp, 126 uint8_t alu, 127 uint32_t pixel) 128{ 129 struct kgem *kgem = &sna->kgem; 130 131 assert(kgem_bo_can_blt (kgem, bo)); 132 assert(bo->tiling != I915_TILING_Y); 133 blt->bo[0] = bo; 134 135 blt->br13 = bo->pitch; 136 blt->cmd = XY_SCANLINE_BLT; 137 if (kgem->gen >= 040 && bo->tiling) { 138 blt->cmd |= BLT_DST_TILED; 139 blt->br13 >>= 2; 140 } 141 assert(blt->br13 <= MAXSHORT); 142 143 if (alu == GXclear) 144 pixel = 0; 145 else if (alu == GXcopy) { 146 if (pixel == 0) 147 alu = GXclear; 148 else if (pixel == -1) 149 alu = GXset; 150 } 151 152 blt->br13 |= 1<<31 | (fill_ROP[alu] << 16); 153 switch (bpp) { 154 default: assert(0); 155 case 32: blt->br13 |= 1 << 25; /* RGB8888 */ 156 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 157 case 8: break; 158 } 159 160 blt->pixel = pixel; 161 blt->bpp = bpp; 162 blt->alu = alu; 163 164 kgem_set_mode(kgem, KGEM_BLT, bo); 165 if (!kgem_check_batch(kgem, 14) || 166 !kgem_check_bo_fenced(kgem, bo)) { 167 kgem_submit(kgem); 168 if (!kgem_check_bo_fenced(kgem, bo)) 169 return false; 170 _kgem_set_mode(kgem, KGEM_BLT); 171 } 172 173 if (sna->blt_state.fill_bo != bo->unique_id || 174 sna->blt_state.fill_pixel != pixel || 175 sna->blt_state.fill_alu != alu) 176 { 177 uint32_t *b; 178 179 if (!kgem_check_reloc(kgem, 1)) { 180 _kgem_submit(kgem); 181 if (!kgem_check_bo_fenced(kgem, bo)) 182 return false; 183 _kgem_set_mode(kgem, KGEM_BLT); 184 } 185 186 assert(sna->kgem.mode == KGEM_BLT); 187 b = kgem->batch + kgem->nbatch; 188 if (sna->kgem.gen >= 0100) { 189 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 190 if (bpp == 32) 191 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 192 if (bo->tiling) 193 b[0] |= BLT_DST_TILED; 194 b[1] = blt->br13; 195 b[2] = 0; 196 b[3] = 0; 197 *(uint64_t *)(b+4) = 198 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 199 I915_GEM_DOMAIN_RENDER << 16 | 200 I915_GEM_DOMAIN_RENDER | 201 KGEM_RELOC_FENCED, 202 0); 203 b[6] = pixel; 204 b[7] = pixel; 205 b[8] = 0; 206 b[9] = 0; 207 kgem->nbatch += 10; 208 } else { 209 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 210 if (bpp == 32) 211 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 212 if (bo->tiling && kgem->gen >= 040) 213 b[0] |= BLT_DST_TILED; 214 b[1] = blt->br13; 215 b[2] = 0; 216 b[3] = 0; 217 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 218 I915_GEM_DOMAIN_RENDER << 16 | 219 I915_GEM_DOMAIN_RENDER | 220 KGEM_RELOC_FENCED, 221 0); 222 b[5] = pixel; 223 b[6] = pixel; 224 b[7] = 0; 225 b[8] = 0; 226 kgem->nbatch += 9; 227 } 228 assert(kgem->nbatch < kgem->surface); 229 230 sna->blt_state.fill_bo = bo->unique_id; 231 sna->blt_state.fill_pixel = pixel; 232 sna->blt_state.fill_alu = alu; 233 } 234 235 return true; 236} 237 238noinline static void sna_blt_fill_begin(struct sna *sna, 239 const struct sna_blt_state *blt) 240{ 241 struct kgem *kgem = &sna->kgem; 242 uint32_t *b; 243 244 if (kgem->nreloc) { 245 _kgem_submit(kgem); 246 _kgem_set_mode(kgem, KGEM_BLT); 247 assert(kgem->nbatch == 0); 248 } 249 250 assert(kgem->mode == KGEM_BLT); 251 b = kgem->batch + kgem->nbatch; 252 if (sna->kgem.gen >= 0100) { 253 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 254 if (blt->bpp == 32) 255 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 256 if (blt->bo[0]->tiling) 257 b[0] |= BLT_DST_TILED; 258 b[1] = blt->br13; 259 b[2] = 0; 260 b[3] = 0; 261 *(uint32_t *)(b+4) = 262 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0], 263 I915_GEM_DOMAIN_RENDER << 16 | 264 I915_GEM_DOMAIN_RENDER | 265 KGEM_RELOC_FENCED, 266 0); 267 b[6] = blt->pixel; 268 b[7] = blt->pixel; 269 b[8] = 0; 270 b[9] = 0; 271 kgem->nbatch += 10; 272 } else { 273 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 274 if (blt->bpp == 32) 275 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 276 if (blt->bo[0]->tiling && kgem->gen >= 040) 277 b[0] |= BLT_DST_TILED; 278 b[1] = blt->br13; 279 b[2] = 0; 280 b[3] = 0; 281 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], 282 I915_GEM_DOMAIN_RENDER << 16 | 283 I915_GEM_DOMAIN_RENDER | 284 KGEM_RELOC_FENCED, 285 0); 286 b[5] = blt->pixel; 287 b[6] = blt->pixel; 288 b[7] = 0; 289 b[8] = 0; 290 kgem->nbatch += 9; 291 } 292} 293 294inline static void sna_blt_fill_one(struct sna *sna, 295 const struct sna_blt_state *blt, 296 int16_t x, int16_t y, 297 int16_t width, int16_t height) 298{ 299 struct kgem *kgem = &sna->kgem; 300 uint32_t *b; 301 302 DBG(("%s: (%d, %d) x (%d, %d): %08x\n", 303 __FUNCTION__, x, y, width, height, blt->pixel)); 304 305 assert(x >= 0); 306 assert(y >= 0); 307 assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 308 309 if (!kgem_check_batch(kgem, 3)) 310 sna_blt_fill_begin(sna, blt); 311 312 assert(sna->kgem.mode == KGEM_BLT); 313 b = kgem->batch + kgem->nbatch; 314 kgem->nbatch += 3; 315 assert(kgem->nbatch < kgem->surface); 316 317 b[0] = blt->cmd; 318 b[1] = y << 16 | x; 319 b[2] = b[1] + (height << 16 | width); 320} 321 322static bool sna_blt_copy_init(struct sna *sna, 323 struct sna_blt_state *blt, 324 struct kgem_bo *src, 325 struct kgem_bo *dst, 326 int bpp, 327 uint8_t alu) 328{ 329 struct kgem *kgem = &sna->kgem; 330 331 assert(kgem_bo_can_blt (kgem, src)); 332 assert(kgem_bo_can_blt (kgem, dst)); 333 334 blt->bo[0] = src; 335 blt->bo[1] = dst; 336 337 blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6); 338 if (bpp == 32) 339 blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 340 341 blt->pitch[0] = src->pitch; 342 if (kgem->gen >= 040 && src->tiling) { 343 blt->cmd |= BLT_SRC_TILED; 344 blt->pitch[0] >>= 2; 345 } 346 assert(blt->pitch[0] <= MAXSHORT); 347 348 blt->pitch[1] = dst->pitch; 349 if (kgem->gen >= 040 && dst->tiling) { 350 blt->cmd |= BLT_DST_TILED; 351 blt->pitch[1] >>= 2; 352 } 353 assert(blt->pitch[1] <= MAXSHORT); 354 355 blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset; 356 blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1]; 357 switch (bpp) { 358 default: assert(0); 359 case 32: blt->br13 |= 1 << 25; /* RGB8888 */ 360 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 361 case 8: break; 362 } 363 364 kgem_set_mode(kgem, KGEM_BLT, dst); 365 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 366 kgem_submit(kgem); 367 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) 368 return false; 369 _kgem_set_mode(kgem, KGEM_BLT); 370 } 371 372 sna->blt_state.fill_bo = 0; 373 return true; 374} 375 376static bool sna_blt_alpha_fixup_init(struct sna *sna, 377 struct sna_blt_state *blt, 378 struct kgem_bo *src, 379 struct kgem_bo *dst, 380 int bpp, uint32_t alpha) 381{ 382 struct kgem *kgem = &sna->kgem; 383 384 DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n", 385 __FUNCTION__, dst->handle, src->handle, bpp, alpha)); 386 assert(kgem_bo_can_blt(kgem, src)); 387 assert(kgem_bo_can_blt(kgem, dst)); 388 389 blt->bo[0] = src; 390 blt->bo[1] = dst; 391 392 blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); 393 blt->pitch[0] = src->pitch; 394 if (kgem->gen >= 040 && src->tiling) { 395 blt->cmd |= BLT_SRC_TILED; 396 blt->pitch[0] >>= 2; 397 } 398 assert(blt->pitch[0] <= MAXSHORT); 399 400 blt->pitch[1] = dst->pitch; 401 if (kgem->gen >= 040 && dst->tiling) { 402 blt->cmd |= BLT_DST_TILED; 403 blt->pitch[1] >>= 2; 404 } 405 assert(blt->pitch[1] <= MAXSHORT); 406 407 blt->overwrites = 1; 408 blt->br13 = (0xfc << 16) | blt->pitch[1]; 409 switch (bpp) { 410 default: assert(0); 411 case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 412 blt->br13 |= 1 << 25; /* RGB8888 */ 413 case 16: blt->br13 |= 1 << 24; /* RGB565 */ 414 case 8: break; 415 } 416 blt->pixel = alpha; 417 418 kgem_set_mode(kgem, KGEM_BLT, dst); 419 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 420 kgem_submit(kgem); 421 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) 422 return false; 423 _kgem_set_mode(kgem, KGEM_BLT); 424 } 425 426 sna->blt_state.fill_bo = 0; 427 return true; 428} 429 430static void sna_blt_alpha_fixup_one(struct sna *sna, 431 const struct sna_blt_state *blt, 432 int src_x, int src_y, 433 int width, int height, 434 int dst_x, int dst_y) 435{ 436 struct kgem *kgem = &sna->kgem; 437 uint32_t *b; 438 439 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", 440 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); 441 442 assert(src_x >= 0); 443 assert(src_y >= 0); 444 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 445 assert(dst_x >= 0); 446 assert(dst_y >= 0); 447 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); 448 assert(width > 0); 449 assert(height > 0); 450 451 if (!kgem_check_batch(kgem, 14) || 452 !kgem_check_reloc(kgem, 2)) { 453 _kgem_submit(kgem); 454 _kgem_set_mode(kgem, KGEM_BLT); 455 } 456 457 assert(sna->kgem.mode == KGEM_BLT); 458 b = kgem->batch + kgem->nbatch; 459 b[0] = blt->cmd; 460 b[1] = blt->br13; 461 b[2] = (dst_y << 16) | dst_x; 462 b[3] = ((dst_y + height) << 16) | (dst_x + width); 463 if (sna->kgem.gen >= 0100) { 464 *(uint64_t *)(b+4) = 465 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], 466 I915_GEM_DOMAIN_RENDER << 16 | 467 I915_GEM_DOMAIN_RENDER | 468 KGEM_RELOC_FENCED, 469 0); 470 b[6] = blt->pitch[0]; 471 b[7] = (src_y << 16) | src_x; 472 *(uint64_t *)(b+8) = 473 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], 474 I915_GEM_DOMAIN_RENDER << 16 | 475 KGEM_RELOC_FENCED, 476 0); 477 b[10] = blt->pixel; 478 b[11] = blt->pixel; 479 b[12] = 0; 480 b[13] = 0; 481 kgem->nbatch += 14; 482 } else { 483 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], 484 I915_GEM_DOMAIN_RENDER << 16 | 485 I915_GEM_DOMAIN_RENDER | 486 KGEM_RELOC_FENCED, 487 0); 488 b[5] = blt->pitch[0]; 489 b[6] = (src_y << 16) | src_x; 490 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], 491 I915_GEM_DOMAIN_RENDER << 16 | 492 KGEM_RELOC_FENCED, 493 0); 494 b[8] = blt->pixel; 495 b[9] = blt->pixel; 496 b[10] = 0; 497 b[11] = 0; 498 kgem->nbatch += 12; 499 } 500 assert(kgem->nbatch < kgem->surface); 501} 502 503static void sna_blt_copy_one(struct sna *sna, 504 const struct sna_blt_state *blt, 505 int src_x, int src_y, 506 int width, int height, 507 int dst_x, int dst_y) 508{ 509 struct kgem *kgem = &sna->kgem; 510 uint32_t *b; 511 512 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", 513 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); 514 515 assert(src_x >= 0); 516 assert(src_y >= 0); 517 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 518 assert(dst_x >= 0); 519 assert(dst_y >= 0); 520 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); 521 assert(width > 0); 522 assert(height > 0); 523 524 /* Compare against a previous fill */ 525 if (blt->overwrites && 526 kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) { 527 if (sna->kgem.gen >= 0100) { 528 if (kgem->nbatch >= 7 && 529 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 530 kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && 531 kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { 532 DBG(("%s: replacing last fill\n", __FUNCTION__)); 533 if (kgem_check_batch(kgem, 3)) { 534 assert(kgem->mode == KGEM_BLT); 535 b = kgem->batch + kgem->nbatch - 7; 536 b[0] = blt->cmd; 537 b[1] = blt->br13; 538 b[6] = (src_y << 16) | src_x; 539 b[7] = blt->pitch[0]; 540 *(uint64_t *)(b+8) = 541 kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0], 542 I915_GEM_DOMAIN_RENDER << 16 | 543 KGEM_RELOC_FENCED, 544 0); 545 kgem->nbatch += 3; 546 assert(kgem->nbatch < kgem->surface); 547 return; 548 } 549 kgem->nbatch -= 7; 550 kgem->nreloc--; 551 } 552 } else { 553 if (kgem->nbatch >= 6 && 554 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 555 kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && 556 kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { 557 DBG(("%s: replacing last fill\n", __FUNCTION__)); 558 if (kgem_check_batch(kgem, 8-6)) { 559 assert(kgem->mode == KGEM_BLT); 560 b = kgem->batch + kgem->nbatch - 6; 561 b[0] = blt->cmd; 562 b[1] = blt->br13; 563 b[5] = (src_y << 16) | src_x; 564 b[6] = blt->pitch[0]; 565 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0], 566 I915_GEM_DOMAIN_RENDER << 16 | 567 KGEM_RELOC_FENCED, 568 0); 569 kgem->nbatch += 8 - 6; 570 assert(kgem->nbatch < kgem->surface); 571 return; 572 } 573 kgem->nbatch -= 6; 574 kgem->nreloc--; 575 } 576 } 577 } 578 579 if (!kgem_check_batch(kgem, 10) || 580 !kgem_check_reloc(kgem, 2)) { 581 _kgem_submit(kgem); 582 _kgem_set_mode(kgem, KGEM_BLT); 583 } 584 585 assert(sna->kgem.mode == KGEM_BLT); 586 b = kgem->batch + kgem->nbatch; 587 b[0] = blt->cmd; 588 b[1] = blt->br13; 589 b[2] = (dst_y << 16) | dst_x; 590 b[3] = ((dst_y + height) << 16) | (dst_x + width); 591 if (kgem->gen >= 0100) { 592 *(uint64_t *)(b+4) = 593 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], 594 I915_GEM_DOMAIN_RENDER << 16 | 595 I915_GEM_DOMAIN_RENDER | 596 KGEM_RELOC_FENCED, 597 0); 598 b[6] = (src_y << 16) | src_x; 599 b[7] = blt->pitch[0]; 600 *(uint64_t *)(b+8) = 601 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], 602 I915_GEM_DOMAIN_RENDER << 16 | 603 KGEM_RELOC_FENCED, 604 0); 605 kgem->nbatch += 10; 606 } else { 607 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], 608 I915_GEM_DOMAIN_RENDER << 16 | 609 I915_GEM_DOMAIN_RENDER | 610 KGEM_RELOC_FENCED, 611 0); 612 b[5] = (src_y << 16) | src_x; 613 b[6] = blt->pitch[0]; 614 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], 615 I915_GEM_DOMAIN_RENDER << 16 | 616 KGEM_RELOC_FENCED, 617 0); 618 kgem->nbatch += 8; 619 } 620 assert(kgem->nbatch < kgem->surface); 621} 622 623bool 624sna_get_rgba_from_pixel(uint32_t pixel, 625 uint16_t *red, 626 uint16_t *green, 627 uint16_t *blue, 628 uint16_t *alpha, 629 uint32_t format) 630{ 631 int rbits, bbits, gbits, abits; 632 int rshift, bshift, gshift, ashift; 633 634 rbits = PICT_FORMAT_R(format); 635 gbits = PICT_FORMAT_G(format); 636 bbits = PICT_FORMAT_B(format); 637 abits = PICT_FORMAT_A(format); 638 639 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { 640 rshift = gshift = bshift = ashift = 0; 641 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { 642 bshift = 0; 643 gshift = bbits; 644 rshift = gshift + gbits; 645 ashift = rshift + rbits; 646 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 647 rshift = 0; 648 gshift = rbits; 649 bshift = gshift + gbits; 650 ashift = bshift + bbits; 651 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 652 ashift = 0; 653 rshift = abits; 654 if (abits == 0) 655 rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); 656 gshift = rshift + rbits; 657 bshift = gshift + gbits; 658 } else { 659 return false; 660 } 661 662 if (rbits) { 663 *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits); 664 while (rbits < 16) { 665 *red |= *red >> rbits; 666 rbits <<= 1; 667 } 668 } else 669 *red = 0; 670 671 if (gbits) { 672 *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits); 673 while (gbits < 16) { 674 *green |= *green >> gbits; 675 gbits <<= 1; 676 } 677 } else 678 *green = 0; 679 680 if (bbits) { 681 *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits); 682 while (bbits < 16) { 683 *blue |= *blue >> bbits; 684 bbits <<= 1; 685 } 686 } else 687 *blue = 0; 688 689 if (abits) { 690 *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits); 691 while (abits < 16) { 692 *alpha |= *alpha >> abits; 693 abits <<= 1; 694 } 695 } else 696 *alpha = 0xffff; 697 698 return true; 699} 700 701bool 702_sna_get_pixel_from_rgba(uint32_t * pixel, 703 uint16_t red, 704 uint16_t green, 705 uint16_t blue, 706 uint16_t alpha, 707 uint32_t format) 708{ 709 int rbits, bbits, gbits, abits; 710 int rshift, bshift, gshift, ashift; 711 712 rbits = PICT_FORMAT_R(format); 713 gbits = PICT_FORMAT_G(format); 714 bbits = PICT_FORMAT_B(format); 715 abits = PICT_FORMAT_A(format); 716 if (abits == 0) 717 abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); 718 719 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { 720 *pixel = alpha >> (16 - abits); 721 return true; 722 } 723 724 if (!PICT_FORMAT_COLOR(format)) 725 return false; 726 727 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { 728 bshift = 0; 729 gshift = bbits; 730 rshift = gshift + gbits; 731 ashift = rshift + rbits; 732 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 733 rshift = 0; 734 gshift = rbits; 735 bshift = gshift + gbits; 736 ashift = bshift + bbits; 737 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 738 ashift = 0; 739 rshift = abits; 740 gshift = rshift + rbits; 741 bshift = gshift + gbits; 742 } else 743 return false; 744 745 *pixel = 0; 746 *pixel |= (blue >> (16 - bbits)) << bshift; 747 *pixel |= (green >> (16 - gbits)) << gshift; 748 *pixel |= (red >> (16 - rbits)) << rshift; 749 *pixel |= (alpha >> (16 - abits)) << ashift; 750 751 return true; 752} 753 754uint32_t 755sna_rgba_for_color(uint32_t color, int depth) 756{ 757 return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8); 758} 759 760uint32_t 761sna_rgba_to_color(uint32_t rgba, uint32_t format) 762{ 763 return color_convert(rgba, PICT_a8r8g8b8, format); 764} 765 766static uint32_t 767get_pixel(PicturePtr picture) 768{ 769 PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable); 770 771 DBG(("%s: %p\n", __FUNCTION__, pixmap)); 772 773 if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ)) 774 return 0; 775 776 switch (pixmap->drawable.bitsPerPixel) { 777 case 32: return *(uint32_t *)pixmap->devPrivate.ptr; 778 case 16: return *(uint16_t *)pixmap->devPrivate.ptr; 779 default: return *(uint8_t *)pixmap->devPrivate.ptr; 780 } 781} 782 783static uint32_t 784get_solid_color(PicturePtr picture, uint32_t format) 785{ 786 if (picture->pSourcePict) { 787 PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict; 788 return color_convert(fill->color, PICT_a8r8g8b8, format); 789 } else 790 return color_convert(get_pixel(picture), picture->format, format); 791} 792 793static bool 794is_solid(PicturePtr picture) 795{ 796 if (picture->pSourcePict) { 797 if (picture->pSourcePict->type == SourcePictTypeSolidFill) 798 return true; 799 } 800 801 if (picture->pDrawable) { 802 if (picture->pDrawable->width == 1 && 803 picture->pDrawable->height == 1 && 804 picture->repeat) 805 return true; 806 } 807 808 return false; 809} 810 811bool 812sna_picture_is_solid(PicturePtr picture, uint32_t *color) 813{ 814 if (!is_solid(picture)) 815 return false; 816 817 if (color) 818 *color = get_solid_color(picture, PICT_a8r8g8b8); 819 return true; 820} 821 822static bool 823pixel_is_transparent(uint32_t pixel, uint32_t format) 824{ 825 unsigned int abits; 826 827 abits = PICT_FORMAT_A(format); 828 if (!abits) 829 return false; 830 831 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A || 832 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 833 return (pixel & ((1 << abits) - 1)) == 0; 834 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB || 835 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 836 unsigned int ashift = PICT_FORMAT_BPP(format) - abits; 837 return (pixel >> ashift) == 0; 838 } else 839 return false; 840} 841 842static bool 843pixel_is_opaque(uint32_t pixel, uint32_t format) 844{ 845 unsigned int abits; 846 847 abits = PICT_FORMAT_A(format); 848 if (!abits) 849 return true; 850 851 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A || 852 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { 853 return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1); 854 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB || 855 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { 856 unsigned int ashift = PICT_FORMAT_BPP(format) - abits; 857 return (pixel >> ashift) == (unsigned)((1 << abits) - 1); 858 } else 859 return false; 860} 861 862static bool 863pixel_is_white(uint32_t pixel, uint32_t format) 864{ 865 switch (PICT_FORMAT_TYPE(format)) { 866 case PICT_TYPE_A: 867 case PICT_TYPE_ARGB: 868 case PICT_TYPE_ABGR: 869 case PICT_TYPE_BGRA: 870 return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1); 871 default: 872 return false; 873 } 874} 875 876static bool 877is_opaque_solid(PicturePtr picture) 878{ 879 if (picture->pSourcePict) { 880 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 881 return (fill->color >> 24) == 0xff; 882 } else 883 return pixel_is_opaque(get_pixel(picture), picture->format); 884} 885 886static bool 887is_white(PicturePtr picture) 888{ 889 if (picture->pSourcePict) { 890 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 891 return fill->color == 0xffffffff; 892 } else 893 return pixel_is_white(get_pixel(picture), picture->format); 894} 895 896static bool 897is_transparent(PicturePtr picture) 898{ 899 if (picture->pSourcePict) { 900 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; 901 return fill->color == 0; 902 } else 903 return pixel_is_transparent(get_pixel(picture), picture->format); 904} 905 906bool 907sna_composite_mask_is_opaque(PicturePtr mask) 908{ 909 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) 910 return is_solid(mask) && is_white(mask); 911 else if (!PICT_FORMAT_A(mask->format)) 912 return true; 913 else 914 return is_solid(mask) && is_opaque_solid(mask); 915} 916 917fastcall 918static void blt_composite_fill(struct sna *sna, 919 const struct sna_composite_op *op, 920 const struct sna_composite_rectangles *r) 921{ 922 int x1, x2, y1, y2; 923 924 x1 = r->dst.x + op->dst.x; 925 y1 = r->dst.y + op->dst.y; 926 x2 = x1 + r->width; 927 y2 = y1 + r->height; 928 929 if (x1 < 0) 930 x1 = 0; 931 if (y1 < 0) 932 y1 = 0; 933 934 if (x2 > op->dst.width) 935 x2 = op->dst.width; 936 if (y2 > op->dst.height) 937 y2 = op->dst.height; 938 939 if (x2 <= x1 || y2 <= y1) 940 return; 941 942 sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1); 943} 944 945fastcall 946static void blt_composite_fill__cpu(struct sna *sna, 947 const struct sna_composite_op *op, 948 const struct sna_composite_rectangles *r) 949{ 950 int x1, x2, y1, y2; 951 952 x1 = r->dst.x + op->dst.x; 953 y1 = r->dst.y + op->dst.y; 954 x2 = x1 + r->width; 955 y2 = y1 + r->height; 956 957 if (x1 < 0) 958 x1 = 0; 959 if (y1 < 0) 960 y1 = 0; 961 962 if (x2 > op->dst.width) 963 x2 = op->dst.width; 964 if (y2 > op->dst.height) 965 y2 = op->dst.height; 966 967 if (x2 <= x1 || y2 <= y1) 968 return; 969 970 assert(op->dst.pixmap->devPrivate.ptr); 971 assert(op->dst.pixmap->devKind); 972 pixman_fill(op->dst.pixmap->devPrivate.ptr, 973 op->dst.pixmap->devKind / sizeof(uint32_t), 974 op->dst.pixmap->drawable.bitsPerPixel, 975 x1, y1, x2-x1, y2-y1, 976 op->u.blt.pixel); 977} 978 979fastcall static void 980blt_composite_fill_box_no_offset__cpu(struct sna *sna, 981 const struct sna_composite_op *op, 982 const BoxRec *box) 983{ 984 assert(box->x1 >= 0); 985 assert(box->y1 >= 0); 986 assert(box->x2 <= op->dst.pixmap->drawable.width); 987 assert(box->y2 <= op->dst.pixmap->drawable.height); 988 989 assert(op->dst.pixmap->devPrivate.ptr); 990 assert(op->dst.pixmap->devKind); 991 pixman_fill(op->dst.pixmap->devPrivate.ptr, 992 op->dst.pixmap->devKind / sizeof(uint32_t), 993 op->dst.pixmap->drawable.bitsPerPixel, 994 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1, 995 op->u.blt.pixel); 996} 997 998static void 999blt_composite_fill_boxes_no_offset__cpu(struct sna *sna, 1000 const struct sna_composite_op *op, 1001 const BoxRec *box, int n) 1002{ 1003 do { 1004 assert(box->x1 >= 0); 1005 assert(box->y1 >= 0); 1006 assert(box->x2 <= op->dst.pixmap->drawable.width); 1007 assert(box->y2 <= op->dst.pixmap->drawable.height); 1008 1009 assert(op->dst.pixmap->devPrivate.ptr); 1010 assert(op->dst.pixmap->devKind); 1011 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1012 op->dst.pixmap->devKind / sizeof(uint32_t), 1013 op->dst.pixmap->drawable.bitsPerPixel, 1014 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1, 1015 op->u.blt.pixel); 1016 box++; 1017 } while (--n); 1018} 1019 1020fastcall static void 1021blt_composite_fill_box__cpu(struct sna *sna, 1022 const struct sna_composite_op *op, 1023 const BoxRec *box) 1024{ 1025 assert(box->x1 + op->dst.x >= 0); 1026 assert(box->y1 + op->dst.y >= 0); 1027 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width); 1028 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height); 1029 1030 assert(op->dst.pixmap->devPrivate.ptr); 1031 assert(op->dst.pixmap->devKind); 1032 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1033 op->dst.pixmap->devKind / sizeof(uint32_t), 1034 op->dst.pixmap->drawable.bitsPerPixel, 1035 box->x1 + op->dst.x, box->y1 + op->dst.y, 1036 box->x2 - box->x1, box->y2 - box->y1, 1037 op->u.blt.pixel); 1038} 1039 1040static void 1041blt_composite_fill_boxes__cpu(struct sna *sna, 1042 const struct sna_composite_op *op, 1043 const BoxRec *box, int n) 1044{ 1045 do { 1046 assert(box->x1 + op->dst.x >= 0); 1047 assert(box->y1 + op->dst.y >= 0); 1048 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width); 1049 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height); 1050 1051 assert(op->dst.pixmap->devPrivate.ptr); 1052 assert(op->dst.pixmap->devKind); 1053 pixman_fill(op->dst.pixmap->devPrivate.ptr, 1054 op->dst.pixmap->devKind / sizeof(uint32_t), 1055 op->dst.pixmap->drawable.bitsPerPixel, 1056 box->x1 + op->dst.x, box->y1 + op->dst.y, 1057 box->x2 - box->x1, box->y2 - box->y1, 1058 op->u.blt.pixel); 1059 box++; 1060 } while (--n); 1061} 1062 1063inline static void _sna_blt_fill_box(struct sna *sna, 1064 const struct sna_blt_state *blt, 1065 const BoxRec *box) 1066{ 1067 struct kgem *kgem = &sna->kgem; 1068 uint32_t *b; 1069 1070 DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__, 1071 box->x1, box->y1, box->x2, box->y2, 1072 blt->pixel)); 1073 1074 assert(box->x1 >= 0); 1075 assert(box->y1 >= 0); 1076 assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); 1077 1078 if (!kgem_check_batch(kgem, 3)) 1079 sna_blt_fill_begin(sna, blt); 1080 1081 assert(sna->kgem.mode == KGEM_BLT); 1082 b = kgem->batch + kgem->nbatch; 1083 kgem->nbatch += 3; 1084 assert(kgem->nbatch < kgem->surface); 1085 1086 b[0] = blt->cmd; 1087 *(uint64_t *)(b+1) = *(const uint64_t *)box; 1088} 1089 1090inline static void _sna_blt_fill_boxes(struct sna *sna, 1091 const struct sna_blt_state *blt, 1092 const BoxRec *box, 1093 int nbox) 1094{ 1095 struct kgem *kgem = &sna->kgem; 1096 uint32_t cmd = blt->cmd; 1097 1098 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1099 1100 if (!kgem_check_batch(kgem, 3)) 1101 sna_blt_fill_begin(sna, blt); 1102 1103 do { 1104 uint32_t *b = kgem->batch + kgem->nbatch; 1105 int nbox_this_time; 1106 1107 assert(sna->kgem.mode == KGEM_BLT); 1108 nbox_this_time = nbox; 1109 if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1110 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; 1111 assert(nbox_this_time); 1112 nbox -= nbox_this_time; 1113 1114 kgem->nbatch += 3 * nbox_this_time; 1115 assert(kgem->nbatch < kgem->surface); 1116 while (nbox_this_time >= 8) { 1117 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1118 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1119 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1120 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1121 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; 1122 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; 1123 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; 1124 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; 1125 b += 24; 1126 nbox_this_time -= 8; 1127 } 1128 if (nbox_this_time & 4) { 1129 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1130 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1131 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1132 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1133 b += 12; 1134 } 1135 if (nbox_this_time & 2) { 1136 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1137 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1138 b += 6; 1139 } 1140 if (nbox_this_time & 1) { 1141 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1142 } 1143 1144 if (!nbox) 1145 return; 1146 1147 sna_blt_fill_begin(sna, blt); 1148 } while (1); 1149} 1150 1151static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box) 1152{ 1153 if (box->x2 - box->x1 >= op->dst.width && 1154 box->y2 - box->y1 >= op->dst.height) { 1155 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1156 if (op->dst.bo == priv->gpu_bo) { 1157 priv->clear = true; 1158 priv->clear_color = op->u.blt.pixel; 1159 DBG(("%s: pixmap=%ld marking clear [%08x]\n", 1160 __FUNCTION__, 1161 op->dst.pixmap->drawable.serialNumber, 1162 op->u.blt.pixel)); 1163 } 1164 } 1165} 1166 1167fastcall static void blt_composite_fill_box_no_offset(struct sna *sna, 1168 const struct sna_composite_op *op, 1169 const BoxRec *box) 1170{ 1171 _sna_blt_fill_box(sna, &op->u.blt, box); 1172 _sna_blt_maybe_clear(op, box); 1173} 1174 1175static void blt_composite_fill_boxes_no_offset(struct sna *sna, 1176 const struct sna_composite_op *op, 1177 const BoxRec *box, int n) 1178{ 1179 _sna_blt_fill_boxes(sna, &op->u.blt, box, n); 1180} 1181 1182static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna, 1183 const struct sna_composite_op *op, 1184 const BoxRec *box, int nbox) 1185{ 1186 struct kgem *kgem = &sna->kgem; 1187 const struct sna_blt_state *blt = &op->u.blt; 1188 uint32_t cmd = blt->cmd; 1189 1190 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1191 1192 sna_vertex_lock(&sna->render); 1193 assert(kgem->mode == KGEM_BLT); 1194 if (!kgem_check_batch(kgem, 3)) { 1195 sna_vertex_wait__locked(&sna->render); 1196 sna_blt_fill_begin(sna, blt); 1197 } 1198 1199 do { 1200 uint32_t *b = kgem->batch + kgem->nbatch; 1201 int nbox_this_time; 1202 1203 assert(sna->kgem.mode == KGEM_BLT); 1204 nbox_this_time = nbox; 1205 if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1206 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; 1207 assert(nbox_this_time); 1208 nbox -= nbox_this_time; 1209 1210 kgem->nbatch += 3 * nbox_this_time; 1211 assert(kgem->nbatch < kgem->surface); 1212 sna_vertex_acquire__locked(&sna->render); 1213 sna_vertex_unlock(&sna->render); 1214 1215 while (nbox_this_time >= 8) { 1216 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1217 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1218 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1219 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1220 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; 1221 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; 1222 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; 1223 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; 1224 b += 24; 1225 nbox_this_time -= 8; 1226 } 1227 if (nbox_this_time & 4) { 1228 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1229 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1230 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; 1231 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; 1232 b += 12; 1233 } 1234 if (nbox_this_time & 2) { 1235 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1236 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; 1237 b += 6; 1238 } 1239 if (nbox_this_time & 1) { 1240 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; 1241 } 1242 1243 sna_vertex_lock(&sna->render); 1244 sna_vertex_release__locked(&sna->render); 1245 if (!nbox) 1246 break; 1247 1248 sna_vertex_wait__locked(&sna->render); 1249 sna_blt_fill_begin(sna, blt); 1250 } while (1); 1251 sna_vertex_unlock(&sna->render); 1252} 1253 1254fastcall static void blt_composite_fill_box(struct sna *sna, 1255 const struct sna_composite_op *op, 1256 const BoxRec *box) 1257{ 1258 sna_blt_fill_one(sna, &op->u.blt, 1259 box->x1 + op->dst.x, 1260 box->y1 + op->dst.y, 1261 box->x2 - box->x1, 1262 box->y2 - box->y1); 1263 _sna_blt_maybe_clear(op, box); 1264} 1265 1266static void blt_composite_fill_boxes(struct sna *sna, 1267 const struct sna_composite_op *op, 1268 const BoxRec *box, int n) 1269{ 1270 do { 1271 sna_blt_fill_one(sna, &op->u.blt, 1272 box->x1 + op->dst.x, box->y1 + op->dst.y, 1273 box->x2 - box->x1, box->y2 - box->y1); 1274 box++; 1275 } while (--n); 1276} 1277 1278static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y) 1279{ 1280 union { 1281 uint64_t v; 1282 int16_t i[4]; 1283 } vi; 1284 vi.v = *(uint64_t *)b; 1285 vi.i[0] += x; 1286 vi.i[1] += y; 1287 vi.i[2] += x; 1288 vi.i[3] += y; 1289 return vi.v; 1290} 1291 1292static void blt_composite_fill_boxes__thread(struct sna *sna, 1293 const struct sna_composite_op *op, 1294 const BoxRec *box, int nbox) 1295{ 1296 struct kgem *kgem = &sna->kgem; 1297 const struct sna_blt_state *blt = &op->u.blt; 1298 uint32_t cmd = blt->cmd; 1299 int16_t dx = op->dst.x; 1300 int16_t dy = op->dst.y; 1301 1302 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); 1303 1304 sna_vertex_lock(&sna->render); 1305 assert(kgem->mode == KGEM_BLT); 1306 if (!kgem_check_batch(kgem, 3)) { 1307 sna_vertex_wait__locked(&sna->render); 1308 sna_blt_fill_begin(sna, blt); 1309 } 1310 1311 do { 1312 uint32_t *b = kgem->batch + kgem->nbatch; 1313 int nbox_this_time; 1314 1315 assert(sna->kgem.mode == KGEM_BLT); 1316 nbox_this_time = nbox; 1317 if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1318 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; 1319 assert(nbox_this_time); 1320 nbox -= nbox_this_time; 1321 1322 kgem->nbatch += 3 * nbox_this_time; 1323 assert(kgem->nbatch < kgem->surface); 1324 sna_vertex_acquire__locked(&sna->render); 1325 sna_vertex_unlock(&sna->render); 1326 1327 while (nbox_this_time >= 8) { 1328 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1329 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1330 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); 1331 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); 1332 b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy); 1333 b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy); 1334 b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy); 1335 b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy); 1336 b += 24; 1337 nbox_this_time -= 8; 1338 } 1339 if (nbox_this_time & 4) { 1340 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1341 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1342 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); 1343 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); 1344 b += 12; 1345 } 1346 if (nbox_this_time & 2) { 1347 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1348 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); 1349 b += 6; 1350 } 1351 if (nbox_this_time & 1) { 1352 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); 1353 } 1354 1355 sna_vertex_lock(&sna->render); 1356 sna_vertex_release__locked(&sna->render); 1357 if (!nbox) 1358 break; 1359 1360 sna_vertex_wait__locked(&sna->render); 1361 sna_blt_fill_begin(sna, blt); 1362 } while (1); 1363 sna_vertex_unlock(&sna->render); 1364} 1365 1366fastcall 1367static void blt_composite_nop(struct sna *sna, 1368 const struct sna_composite_op *op, 1369 const struct sna_composite_rectangles *r) 1370{ 1371} 1372 1373fastcall static void blt_composite_nop_box(struct sna *sna, 1374 const struct sna_composite_op *op, 1375 const BoxRec *box) 1376{ 1377} 1378 1379static void blt_composite_nop_boxes(struct sna *sna, 1380 const struct sna_composite_op *op, 1381 const BoxRec *box, int n) 1382{ 1383} 1384 1385static bool 1386begin_blt(struct sna *sna, 1387 struct sna_composite_op *op) 1388{ 1389 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) { 1390 kgem_submit(&sna->kgem); 1391 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) 1392 return false; 1393 1394 _kgem_set_mode(&sna->kgem, KGEM_BLT); 1395 } 1396 1397 return true; 1398} 1399 1400static bool 1401prepare_blt_nop(struct sna *sna, 1402 struct sna_composite_op *op) 1403{ 1404 DBG(("%s\n", __FUNCTION__)); 1405 1406 op->blt = blt_composite_nop; 1407 op->box = blt_composite_nop_box; 1408 op->boxes = blt_composite_nop_boxes; 1409 op->done = nop_done; 1410 return true; 1411} 1412 1413static bool 1414prepare_blt_clear(struct sna *sna, 1415 struct sna_composite_op *op) 1416{ 1417 DBG(("%s\n", __FUNCTION__)); 1418 1419 if (op->dst.bo == NULL) { 1420 op->blt = blt_composite_fill__cpu; 1421 if (op->dst.x|op->dst.y) { 1422 op->box = blt_composite_fill_box__cpu; 1423 op->boxes = blt_composite_fill_boxes__cpu; 1424 op->thread_boxes = blt_composite_fill_boxes__cpu; 1425 } else { 1426 op->box = blt_composite_fill_box_no_offset__cpu; 1427 op->boxes = blt_composite_fill_boxes_no_offset__cpu; 1428 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; 1429 } 1430 op->done = nop_done; 1431 op->u.blt.pixel = 0; 1432 return true; 1433 } 1434 1435 op->blt = blt_composite_fill; 1436 if (op->dst.x|op->dst.y) { 1437 op->box = blt_composite_fill_box; 1438 op->boxes = blt_composite_fill_boxes; 1439 op->thread_boxes = blt_composite_fill_boxes__thread; 1440 } else { 1441 op->box = blt_composite_fill_box_no_offset; 1442 op->boxes = blt_composite_fill_boxes_no_offset; 1443 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; 1444 } 1445 op->done = nop_done; 1446 1447 if (!sna_blt_fill_init(sna, &op->u.blt, 1448 op->dst.bo, 1449 op->dst.pixmap->drawable.bitsPerPixel, 1450 GXclear, 0)) 1451 return false; 1452 1453 return begin_blt(sna, op); 1454} 1455 1456static bool 1457prepare_blt_fill(struct sna *sna, 1458 struct sna_composite_op *op, 1459 uint32_t pixel) 1460{ 1461 DBG(("%s\n", __FUNCTION__)); 1462 1463 if (op->dst.bo == NULL) { 1464 op->u.blt.pixel = pixel; 1465 op->blt = blt_composite_fill__cpu; 1466 if (op->dst.x|op->dst.y) { 1467 op->box = blt_composite_fill_box__cpu; 1468 op->boxes = blt_composite_fill_boxes__cpu; 1469 op->thread_boxes = blt_composite_fill_boxes__cpu; 1470 } else { 1471 op->box = blt_composite_fill_box_no_offset__cpu; 1472 op->boxes = blt_composite_fill_boxes_no_offset__cpu; 1473 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; 1474 } 1475 op->done = nop_done; 1476 return true; 1477 } 1478 1479 op->blt = blt_composite_fill; 1480 if (op->dst.x|op->dst.y) { 1481 op->box = blt_composite_fill_box; 1482 op->boxes = blt_composite_fill_boxes; 1483 op->thread_boxes = blt_composite_fill_boxes__thread; 1484 } else { 1485 op->box = blt_composite_fill_box_no_offset; 1486 op->boxes = blt_composite_fill_boxes_no_offset; 1487 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; 1488 } 1489 op->done = nop_done; 1490 1491 if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo, 1492 op->dst.pixmap->drawable.bitsPerPixel, 1493 GXcopy, pixel)) 1494 return false; 1495 1496 return begin_blt(sna, op); 1497} 1498 1499fastcall static void 1500blt_composite_copy(struct sna *sna, 1501 const struct sna_composite_op *op, 1502 const struct sna_composite_rectangles *r) 1503{ 1504 int x1, x2, y1, y2; 1505 int src_x, src_y; 1506 1507 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", 1508 __FUNCTION__, 1509 r->src.x, r->src.y, 1510 r->dst.x, r->dst.y, 1511 r->width, r->height)); 1512 1513 /* XXX higher layer should have clipped? */ 1514 1515 x1 = r->dst.x + op->dst.x; 1516 y1 = r->dst.y + op->dst.y; 1517 x2 = x1 + r->width; 1518 y2 = y1 + r->height; 1519 1520 src_x = r->src.x - x1 + op->u.blt.sx; 1521 src_y = r->src.y - y1 + op->u.blt.sy; 1522 1523 /* clip against dst */ 1524 if (x1 < 0) 1525 x1 = 0; 1526 if (y1 < 0) 1527 y1 = 0; 1528 1529 if (x2 > op->dst.width) 1530 x2 = op->dst.width; 1531 1532 if (y2 > op->dst.height) 1533 y2 = op->dst.height; 1534 1535 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); 1536 1537 if (x2 <= x1 || y2 <= y1) 1538 return; 1539 1540 sna_blt_copy_one(sna, &op->u.blt, 1541 x1 + src_x, y1 + src_y, 1542 x2 - x1, y2 - y1, 1543 x1, y1); 1544} 1545 1546fastcall static void blt_composite_copy_box(struct sna *sna, 1547 const struct sna_composite_op *op, 1548 const BoxRec *box) 1549{ 1550 DBG(("%s: box (%d, %d), (%d, %d)\n", 1551 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1552 sna_blt_copy_one(sna, &op->u.blt, 1553 box->x1 + op->u.blt.sx, 1554 box->y1 + op->u.blt.sy, 1555 box->x2 - box->x1, 1556 box->y2 - box->y1, 1557 box->x1 + op->dst.x, 1558 box->y1 + op->dst.y); 1559} 1560 1561static void blt_composite_copy_boxes(struct sna *sna, 1562 const struct sna_composite_op *op, 1563 const BoxRec *box, int nbox) 1564{ 1565 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1566 do { 1567 DBG(("%s: box (%d, %d), (%d, %d)\n", 1568 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1569 sna_blt_copy_one(sna, &op->u.blt, 1570 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 1571 box->x2 - box->x1, box->y2 - box->y1, 1572 box->x1 + op->dst.x, box->y1 + op->dst.y); 1573 box++; 1574 } while(--nbox); 1575} 1576 1577static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) 1578{ 1579 x += v & 0xffff; 1580 y += v >> 16; 1581 return (uint16_t)y << 16 | x; 1582} 1583 1584static void blt_composite_copy_boxes__thread(struct sna *sna, 1585 const struct sna_composite_op *op, 1586 const BoxRec *box, int nbox) 1587{ 1588 struct kgem *kgem = &sna->kgem; 1589 int dst_dx = op->dst.x; 1590 int dst_dy = op->dst.y; 1591 int src_dx = op->src.offset[0]; 1592 int src_dy = op->src.offset[1]; 1593 uint32_t cmd = op->u.blt.cmd; 1594 uint32_t br13 = op->u.blt.br13; 1595 struct kgem_bo *src_bo = op->u.blt.bo[0]; 1596 struct kgem_bo *dst_bo = op->u.blt.bo[1]; 1597 int src_pitch = op->u.blt.pitch[0]; 1598 1599 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1600 1601 sna_vertex_lock(&sna->render); 1602 1603 if ((dst_dx | dst_dy) == 0) { 1604 uint64_t hdr = (uint64_t)br13 << 32 | cmd; 1605 do { 1606 int nbox_this_time; 1607 1608 nbox_this_time = nbox; 1609 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1610 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 1611 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1612 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1613 assert(nbox_this_time); 1614 nbox -= nbox_this_time; 1615 1616 assert(sna->kgem.mode == KGEM_BLT); 1617 do { 1618 uint32_t *b = kgem->batch + kgem->nbatch; 1619 1620 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1621 __FUNCTION__, 1622 box->x1, box->y1, 1623 box->x2 - box->x1, box->y2 - box->y1)); 1624 1625 assert(box->x1 + src_dx >= 0); 1626 assert(box->y1 + src_dy >= 0); 1627 assert(box->x1 + src_dx <= INT16_MAX); 1628 assert(box->y1 + src_dy <= INT16_MAX); 1629 1630 assert(box->x1 >= 0); 1631 assert(box->y1 >= 0); 1632 1633 *(uint64_t *)&b[0] = hdr; 1634 *(uint64_t *)&b[2] = *(const uint64_t *)box; 1635 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1636 I915_GEM_DOMAIN_RENDER << 16 | 1637 I915_GEM_DOMAIN_RENDER | 1638 KGEM_RELOC_FENCED, 1639 0); 1640 b[5] = add2(b[2], src_dx, src_dy); 1641 b[6] = src_pitch; 1642 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1643 I915_GEM_DOMAIN_RENDER << 16 | 1644 KGEM_RELOC_FENCED, 1645 0); 1646 kgem->nbatch += 8; 1647 assert(kgem->nbatch < kgem->surface); 1648 box++; 1649 } while (--nbox_this_time); 1650 1651 if (!nbox) 1652 break; 1653 1654 _kgem_submit(kgem); 1655 _kgem_set_mode(kgem, KGEM_BLT); 1656 } while (1); 1657 } else { 1658 do { 1659 int nbox_this_time; 1660 1661 nbox_this_time = nbox; 1662 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1663 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 1664 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1665 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1666 assert(nbox_this_time); 1667 nbox -= nbox_this_time; 1668 1669 assert(sna->kgem.mode == KGEM_BLT); 1670 do { 1671 uint32_t *b = kgem->batch + kgem->nbatch; 1672 1673 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1674 __FUNCTION__, 1675 box->x1, box->y1, 1676 box->x2 - box->x1, box->y2 - box->y1)); 1677 1678 assert(box->x1 + src_dx >= 0); 1679 assert(box->y1 + src_dy >= 0); 1680 1681 assert(box->x1 + dst_dx >= 0); 1682 assert(box->y1 + dst_dy >= 0); 1683 1684 b[0] = cmd; 1685 b[1] = br13; 1686 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 1687 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 1688 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1689 I915_GEM_DOMAIN_RENDER << 16 | 1690 I915_GEM_DOMAIN_RENDER | 1691 KGEM_RELOC_FENCED, 1692 0); 1693 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 1694 b[6] = src_pitch; 1695 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1696 I915_GEM_DOMAIN_RENDER << 16 | 1697 KGEM_RELOC_FENCED, 1698 0); 1699 kgem->nbatch += 8; 1700 assert(kgem->nbatch < kgem->surface); 1701 box++; 1702 } while (--nbox_this_time); 1703 1704 if (!nbox) 1705 break; 1706 1707 _kgem_submit(kgem); 1708 _kgem_set_mode(kgem, KGEM_BLT); 1709 } while (1); 1710 } 1711 sna_vertex_unlock(&sna->render); 1712} 1713 1714static void blt_composite_copy_boxes__thread64(struct sna *sna, 1715 const struct sna_composite_op *op, 1716 const BoxRec *box, int nbox) 1717{ 1718 struct kgem *kgem = &sna->kgem; 1719 int dst_dx = op->dst.x; 1720 int dst_dy = op->dst.y; 1721 int src_dx = op->src.offset[0]; 1722 int src_dy = op->src.offset[1]; 1723 uint32_t cmd = op->u.blt.cmd; 1724 uint32_t br13 = op->u.blt.br13; 1725 struct kgem_bo *src_bo = op->u.blt.bo[0]; 1726 struct kgem_bo *dst_bo = op->u.blt.bo[1]; 1727 int src_pitch = op->u.blt.pitch[0]; 1728 1729 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1730 1731 sna_vertex_lock(&sna->render); 1732 1733 if ((dst_dx | dst_dy) == 0) { 1734 uint64_t hdr = (uint64_t)br13 << 32 | cmd; 1735 do { 1736 int nbox_this_time; 1737 1738 nbox_this_time = nbox; 1739 if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1740 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10; 1741 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1742 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1743 assert(nbox_this_time); 1744 nbox -= nbox_this_time; 1745 1746 assert(kgem->mode == KGEM_BLT); 1747 do { 1748 uint32_t *b = kgem->batch + kgem->nbatch; 1749 1750 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1751 __FUNCTION__, 1752 box->x1, box->y1, 1753 box->x2 - box->x1, box->y2 - box->y1)); 1754 1755 assert(box->x1 + src_dx >= 0); 1756 assert(box->y1 + src_dy >= 0); 1757 assert(box->x1 + src_dx <= INT16_MAX); 1758 assert(box->y1 + src_dy <= INT16_MAX); 1759 1760 assert(box->x1 >= 0); 1761 assert(box->y1 >= 0); 1762 1763 *(uint64_t *)&b[0] = hdr; 1764 *(uint64_t *)&b[2] = *(const uint64_t *)box; 1765 *(uint64_t *)(b+4) = 1766 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1767 I915_GEM_DOMAIN_RENDER << 16 | 1768 I915_GEM_DOMAIN_RENDER | 1769 KGEM_RELOC_FENCED, 1770 0); 1771 b[6] = add2(b[2], src_dx, src_dy); 1772 b[7] = src_pitch; 1773 *(uint64_t *)(b+8) = 1774 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1775 I915_GEM_DOMAIN_RENDER << 16 | 1776 KGEM_RELOC_FENCED, 1777 0); 1778 kgem->nbatch += 10; 1779 assert(kgem->nbatch < kgem->surface); 1780 box++; 1781 } while (--nbox_this_time); 1782 1783 if (!nbox) 1784 break; 1785 1786 _kgem_submit(kgem); 1787 _kgem_set_mode(kgem, KGEM_BLT); 1788 } while (1); 1789 } else { 1790 do { 1791 int nbox_this_time; 1792 1793 nbox_this_time = nbox; 1794 if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1795 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10; 1796 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1797 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 1798 assert(nbox_this_time); 1799 nbox -= nbox_this_time; 1800 1801 assert(kgem->mode == KGEM_BLT); 1802 do { 1803 uint32_t *b = kgem->batch + kgem->nbatch; 1804 1805 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 1806 __FUNCTION__, 1807 box->x1, box->y1, 1808 box->x2 - box->x1, box->y2 - box->y1)); 1809 1810 assert(box->x1 + src_dx >= 0); 1811 assert(box->y1 + src_dy >= 0); 1812 1813 assert(box->x1 + dst_dx >= 0); 1814 assert(box->y1 + dst_dy >= 0); 1815 1816 b[0] = cmd; 1817 b[1] = br13; 1818 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 1819 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 1820 *(uint64_t *)(b+4) = 1821 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1822 I915_GEM_DOMAIN_RENDER << 16 | 1823 I915_GEM_DOMAIN_RENDER | 1824 KGEM_RELOC_FENCED, 1825 0); 1826 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 1827 b[7] = src_pitch; 1828 *(uint64_t *)(b+8) = 1829 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1830 I915_GEM_DOMAIN_RENDER << 16 | 1831 KGEM_RELOC_FENCED, 1832 0); 1833 kgem->nbatch += 10; 1834 assert(kgem->nbatch < kgem->surface); 1835 box++; 1836 } while (--nbox_this_time); 1837 1838 if (!nbox) 1839 break; 1840 1841 _kgem_submit(kgem); 1842 _kgem_set_mode(kgem, KGEM_BLT); 1843 } while (1); 1844 } 1845 sna_vertex_unlock(&sna->render); 1846} 1847 1848fastcall static void 1849blt_composite_copy_with_alpha(struct sna *sna, 1850 const struct sna_composite_op *op, 1851 const struct sna_composite_rectangles *r) 1852{ 1853 int x1, x2, y1, y2; 1854 int src_x, src_y; 1855 1856 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", 1857 __FUNCTION__, 1858 r->src.x, r->src.y, 1859 r->dst.x, r->dst.y, 1860 r->width, r->height)); 1861 1862 /* XXX higher layer should have clipped? */ 1863 1864 x1 = r->dst.x + op->dst.x; 1865 y1 = r->dst.y + op->dst.y; 1866 x2 = x1 + r->width; 1867 y2 = y1 + r->height; 1868 1869 src_x = r->src.x - x1 + op->u.blt.sx; 1870 src_y = r->src.y - y1 + op->u.blt.sy; 1871 1872 /* clip against dst */ 1873 if (x1 < 0) 1874 x1 = 0; 1875 if (y1 < 0) 1876 y1 = 0; 1877 1878 if (x2 > op->dst.width) 1879 x2 = op->dst.width; 1880 1881 if (y2 > op->dst.height) 1882 y2 = op->dst.height; 1883 1884 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); 1885 1886 if (x2 <= x1 || y2 <= y1) 1887 return; 1888 1889 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1890 x1 + src_x, y1 + src_y, 1891 x2 - x1, y2 - y1, 1892 x1, y1); 1893} 1894 1895fastcall static void 1896blt_composite_copy_box_with_alpha(struct sna *sna, 1897 const struct sna_composite_op *op, 1898 const BoxRec *box) 1899{ 1900 DBG(("%s: box (%d, %d), (%d, %d)\n", 1901 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1902 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1903 box->x1 + op->u.blt.sx, 1904 box->y1 + op->u.blt.sy, 1905 box->x2 - box->x1, 1906 box->y2 - box->y1, 1907 box->x1 + op->dst.x, 1908 box->y1 + op->dst.y); 1909} 1910 1911static void 1912blt_composite_copy_boxes_with_alpha(struct sna *sna, 1913 const struct sna_composite_op *op, 1914 const BoxRec *box, int nbox) 1915{ 1916 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1917 do { 1918 DBG(("%s: box (%d, %d), (%d, %d)\n", 1919 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 1920 sna_blt_alpha_fixup_one(sna, &op->u.blt, 1921 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 1922 box->x2 - box->x1, box->y2 - box->y1, 1923 box->x1 + op->dst.x, box->y1 + op->dst.y); 1924 box++; 1925 } while(--nbox); 1926} 1927 1928static bool 1929prepare_blt_copy(struct sna *sna, 1930 struct sna_composite_op *op, 1931 struct kgem_bo *bo, 1932 uint32_t alpha_fixup) 1933{ 1934 PixmapPtr src = op->u.blt.src_pixmap; 1935 1936 assert(op->dst.bo); 1937 assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo)); 1938 assert(kgem_bo_can_blt(&sna->kgem, bo)); 1939 1940 kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo); 1941 if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { 1942 kgem_submit(&sna->kgem); 1943 if (!kgem_check_many_bo_fenced(&sna->kgem, 1944 op->dst.bo, bo, NULL)) { 1945 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); 1946 return sna_tiling_blt_composite(sna, op, bo, 1947 src->drawable.bitsPerPixel, 1948 alpha_fixup); 1949 } 1950 _kgem_set_mode(&sna->kgem, KGEM_BLT); 1951 } 1952 1953 DBG(("%s\n", __FUNCTION__)); 1954 1955 if (sna->kgem.gen >= 060 && op->dst.bo == bo) 1956 op->done = gen6_blt_copy_done; 1957 else 1958 op->done = nop_done; 1959 1960 if (alpha_fixup) { 1961 op->blt = blt_composite_copy_with_alpha; 1962 op->box = blt_composite_copy_box_with_alpha; 1963 op->boxes = blt_composite_copy_boxes_with_alpha; 1964 1965 if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo, 1966 src->drawable.bitsPerPixel, 1967 alpha_fixup)) 1968 return false; 1969 } else { 1970 op->blt = blt_composite_copy; 1971 op->box = blt_composite_copy_box; 1972 op->boxes = blt_composite_copy_boxes; 1973 if (sna->kgem.gen >= 0100) 1974 op->thread_boxes = blt_composite_copy_boxes__thread64; 1975 else 1976 op->thread_boxes = blt_composite_copy_boxes__thread; 1977 1978 if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo, 1979 src->drawable.bitsPerPixel, 1980 GXcopy)) 1981 return false; 1982 } 1983 1984 return true; 1985} 1986 1987fastcall static void 1988blt_put_composite__cpu(struct sna *sna, 1989 const struct sna_composite_op *op, 1990 const struct sna_composite_rectangles *r) 1991{ 1992 PixmapPtr dst = op->dst.pixmap; 1993 PixmapPtr src = op->u.blt.src_pixmap; 1994 assert(src->devPrivate.ptr); 1995 assert(src->devKind); 1996 assert(dst->devPrivate.ptr); 1997 assert(dst->devKind); 1998 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 1999 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2000 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy, 2001 r->dst.x + op->dst.x, r->dst.y + op->dst.y, 2002 r->width, r->height); 2003} 2004 2005fastcall static void 2006blt_put_composite_box__cpu(struct sna *sna, 2007 const struct sna_composite_op *op, 2008 const BoxRec *box) 2009{ 2010 PixmapPtr dst = op->dst.pixmap; 2011 PixmapPtr src = op->u.blt.src_pixmap; 2012 assert(src->devPrivate.ptr); 2013 assert(src->devKind); 2014 assert(dst->devPrivate.ptr); 2015 assert(dst->devKind); 2016 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 2017 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2018 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2019 box->x1 + op->dst.x, box->y1 + op->dst.y, 2020 box->x2-box->x1, box->y2-box->y1); 2021} 2022 2023static void 2024blt_put_composite_boxes__cpu(struct sna *sna, 2025 const struct sna_composite_op *op, 2026 const BoxRec *box, int n) 2027{ 2028 PixmapPtr dst = op->dst.pixmap; 2029 PixmapPtr src = op->u.blt.src_pixmap; 2030 assert(src->devPrivate.ptr); 2031 assert(src->devKind); 2032 assert(dst->devPrivate.ptr); 2033 assert(dst->devKind); 2034 do { 2035 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, 2036 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2037 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2038 box->x1 + op->dst.x, box->y1 + op->dst.y, 2039 box->x2-box->x1, box->y2-box->y1); 2040 box++; 2041 } while (--n); 2042} 2043 2044fastcall static void 2045blt_put_composite_with_alpha__cpu(struct sna *sna, 2046 const struct sna_composite_op *op, 2047 const struct sna_composite_rectangles *r) 2048{ 2049 PixmapPtr dst = op->dst.pixmap; 2050 PixmapPtr src = op->u.blt.src_pixmap; 2051 assert(src->devPrivate.ptr); 2052 assert(src->devKind); 2053 assert(dst->devPrivate.ptr); 2054 assert(dst->devKind); 2055 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2056 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2057 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy, 2058 r->dst.x + op->dst.x, r->dst.y + op->dst.y, 2059 r->width, r->height, 2060 0xffffffff, op->u.blt.pixel); 2061 2062} 2063 2064fastcall static void 2065blt_put_composite_box_with_alpha__cpu(struct sna *sna, 2066 const struct sna_composite_op *op, 2067 const BoxRec *box) 2068{ 2069 PixmapPtr dst = op->dst.pixmap; 2070 PixmapPtr src = op->u.blt.src_pixmap; 2071 assert(src->devPrivate.ptr); 2072 assert(src->devKind); 2073 assert(dst->devPrivate.ptr); 2074 assert(dst->devKind); 2075 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2076 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2077 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2078 box->x1 + op->dst.x, box->y1 + op->dst.y, 2079 box->x2-box->x1, box->y2-box->y1, 2080 0xffffffff, op->u.blt.pixel); 2081} 2082 2083static void 2084blt_put_composite_boxes_with_alpha__cpu(struct sna *sna, 2085 const struct sna_composite_op *op, 2086 const BoxRec *box, int n) 2087{ 2088 PixmapPtr dst = op->dst.pixmap; 2089 PixmapPtr src = op->u.blt.src_pixmap; 2090 assert(src->devPrivate.ptr); 2091 assert(src->devKind); 2092 assert(dst->devPrivate.ptr); 2093 assert(dst->devKind); 2094 do { 2095 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr, 2096 src->drawable.bitsPerPixel, src->devKind, dst->devKind, 2097 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, 2098 box->x1 + op->dst.x, box->y1 + op->dst.y, 2099 box->x2-box->x1, box->y2-box->y1, 2100 0xffffffff, op->u.blt.pixel); 2101 box++; 2102 } while (--n); 2103} 2104 2105fastcall static void 2106blt_put_composite(struct sna *sna, 2107 const struct sna_composite_op *op, 2108 const struct sna_composite_rectangles *r) 2109{ 2110 PixmapPtr dst = op->dst.pixmap; 2111 PixmapPtr src = op->u.blt.src_pixmap; 2112 struct sna_pixmap *dst_priv = sna_pixmap(dst); 2113 int pitch = src->devKind; 2114 char *data = src->devPrivate.ptr; 2115 int bpp = src->drawable.bitsPerPixel; 2116 2117 int16_t dst_x = r->dst.x + op->dst.x; 2118 int16_t dst_y = r->dst.y + op->dst.y; 2119 int16_t src_x = r->src.x + op->u.blt.sx; 2120 int16_t src_y = r->src.y + op->u.blt.sy; 2121 2122 if (!dst_priv->pinned && 2123 dst_x <= 0 && dst_y <= 0 && 2124 dst_x + r->width >= op->dst.width && 2125 dst_y + r->height >= op->dst.height) { 2126 data += (src_x - dst_x) * bpp / 8; 2127 data += (src_y - dst_y) * pitch; 2128 2129 assert(op->dst.bo == dst_priv->gpu_bo); 2130 sna_replace(sna, op->dst.pixmap, data, pitch); 2131 } else { 2132 BoxRec box; 2133 bool ok; 2134 2135 box.x1 = dst_x; 2136 box.y1 = dst_y; 2137 box.x2 = dst_x + r->width; 2138 box.y2 = dst_y + r->height; 2139 2140 ok = sna_write_boxes(sna, dst, 2141 dst_priv->gpu_bo, 0, 0, 2142 data, pitch, src_x, src_y, 2143 &box, 1); 2144 assert(ok); 2145 (void)ok; 2146 } 2147} 2148 2149fastcall static void blt_put_composite_box(struct sna *sna, 2150 const struct sna_composite_op *op, 2151 const BoxRec *box) 2152{ 2153 PixmapPtr src = op->u.blt.src_pixmap; 2154 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2155 2156 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, 2157 op->u.blt.sx, op->u.blt.sy, 2158 op->dst.x, op->dst.y)); 2159 2160 assert(src->devPrivate.ptr); 2161 assert(src->devKind); 2162 if (!dst_priv->pinned && 2163 box->x2 - box->x1 == op->dst.width && 2164 box->y2 - box->y1 == op->dst.height) { 2165 int pitch = src->devKind; 2166 int bpp = src->drawable.bitsPerPixel / 8; 2167 char *data = src->devPrivate.ptr; 2168 2169 data += (box->y1 + op->u.blt.sy) * pitch; 2170 data += (box->x1 + op->u.blt.sx) * bpp; 2171 2172 assert(op->dst.bo == dst_priv->gpu_bo); 2173 sna_replace(sna, op->dst.pixmap, data, pitch); 2174 } else { 2175 bool ok; 2176 2177 ok = sna_write_boxes(sna, op->dst.pixmap, 2178 op->dst.bo, op->dst.x, op->dst.y, 2179 src->devPrivate.ptr, 2180 src->devKind, 2181 op->u.blt.sx, op->u.blt.sy, 2182 box, 1); 2183 assert(ok); 2184 (void)ok; 2185 } 2186} 2187 2188static void blt_put_composite_boxes(struct sna *sna, 2189 const struct sna_composite_op *op, 2190 const BoxRec *box, int n) 2191{ 2192 PixmapPtr src = op->u.blt.src_pixmap; 2193 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2194 2195 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__, 2196 op->u.blt.sx, op->u.blt.sy, 2197 op->dst.x, op->dst.y, 2198 box->x1, box->y1, box->x2, box->y2, n)); 2199 2200 assert(src->devPrivate.ptr); 2201 assert(src->devKind); 2202 if (n == 1 && !dst_priv->pinned && 2203 box->x2 - box->x1 == op->dst.width && 2204 box->y2 - box->y1 == op->dst.height) { 2205 int pitch = src->devKind; 2206 int bpp = src->drawable.bitsPerPixel / 8; 2207 char *data = src->devPrivate.ptr; 2208 2209 data += (box->y1 + op->u.blt.sy) * pitch; 2210 data += (box->x1 + op->u.blt.sx) * bpp; 2211 2212 assert(op->dst.bo == dst_priv->gpu_bo); 2213 sna_replace(sna, op->dst.pixmap, data, pitch); 2214 } else { 2215 bool ok; 2216 2217 ok = sna_write_boxes(sna, op->dst.pixmap, 2218 op->dst.bo, op->dst.x, op->dst.y, 2219 src->devPrivate.ptr, 2220 src->devKind, 2221 op->u.blt.sx, op->u.blt.sy, 2222 box, n); 2223 assert(ok); 2224 (void)ok; 2225 } 2226} 2227 2228fastcall static void 2229blt_put_composite_with_alpha(struct sna *sna, 2230 const struct sna_composite_op *op, 2231 const struct sna_composite_rectangles *r) 2232{ 2233 PixmapPtr dst = op->dst.pixmap; 2234 PixmapPtr src = op->u.blt.src_pixmap; 2235 struct sna_pixmap *dst_priv = sna_pixmap(dst); 2236 int pitch = src->devKind; 2237 char *data = src->devPrivate.ptr; 2238 2239 int16_t dst_x = r->dst.x + op->dst.x; 2240 int16_t dst_y = r->dst.y + op->dst.y; 2241 int16_t src_x = r->src.x + op->u.blt.sx; 2242 int16_t src_y = r->src.y + op->u.blt.sy; 2243 2244 assert(src->devPrivate.ptr); 2245 assert(src->devKind); 2246 2247 if (!dst_priv->pinned && 2248 dst_x <= 0 && dst_y <= 0 && 2249 dst_x + r->width >= op->dst.width && 2250 dst_y + r->height >= op->dst.height) { 2251 int bpp = dst->drawable.bitsPerPixel / 8; 2252 2253 data += (src_x - dst_x) * bpp; 2254 data += (src_y - dst_y) * pitch; 2255 2256 assert(op->dst.bo == dst_priv->gpu_bo); 2257 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2258 0xffffffff, op->u.blt.pixel); 2259 } else { 2260 BoxRec box; 2261 2262 box.x1 = dst_x; 2263 box.y1 = dst_y; 2264 box.x2 = dst_x + r->width; 2265 box.y2 = dst_y + r->height; 2266 2267 sna_write_boxes__xor(sna, dst, 2268 dst_priv->gpu_bo, 0, 0, 2269 data, pitch, src_x, src_y, 2270 &box, 1, 2271 0xffffffff, op->u.blt.pixel); 2272 } 2273} 2274 2275fastcall static void 2276blt_put_composite_box_with_alpha(struct sna *sna, 2277 const struct sna_composite_op *op, 2278 const BoxRec *box) 2279{ 2280 PixmapPtr src = op->u.blt.src_pixmap; 2281 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2282 2283 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, 2284 op->u.blt.sx, op->u.blt.sy, 2285 op->dst.x, op->dst.y)); 2286 2287 assert(src->devPrivate.ptr); 2288 assert(src->devKind); 2289 2290 if (!dst_priv->pinned && 2291 box->x2 - box->x1 == op->dst.width && 2292 box->y2 - box->y1 == op->dst.height) { 2293 int pitch = src->devKind; 2294 int bpp = src->drawable.bitsPerPixel / 8; 2295 char *data = src->devPrivate.ptr; 2296 2297 data += (box->y1 + op->u.blt.sy) * pitch; 2298 data += (box->x1 + op->u.blt.sx) * bpp; 2299 2300 assert(op->dst.bo == dst_priv->gpu_bo); 2301 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2302 0xffffffff, op->u.blt.pixel); 2303 } else { 2304 sna_write_boxes__xor(sna, op->dst.pixmap, 2305 op->dst.bo, op->dst.x, op->dst.y, 2306 src->devPrivate.ptr, 2307 src->devKind, 2308 op->u.blt.sx, op->u.blt.sy, 2309 box, 1, 2310 0xffffffff, op->u.blt.pixel); 2311 } 2312} 2313 2314static void 2315blt_put_composite_boxes_with_alpha(struct sna *sna, 2316 const struct sna_composite_op *op, 2317 const BoxRec *box, int n) 2318{ 2319 PixmapPtr src = op->u.blt.src_pixmap; 2320 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); 2321 2322 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__, 2323 op->u.blt.sx, op->u.blt.sy, 2324 op->dst.x, op->dst.y, 2325 box->x1, box->y1, box->x2, box->y2, n)); 2326 2327 assert(src->devPrivate.ptr); 2328 assert(src->devKind); 2329 2330 if (n == 1 && !dst_priv->pinned && 2331 box->x2 - box->x1 == op->dst.width && 2332 box->y2 - box->y1 == op->dst.height) { 2333 int pitch = src->devKind; 2334 int bpp = src->drawable.bitsPerPixel / 8; 2335 char *data = src->devPrivate.ptr; 2336 2337 data += (box->y1 + op->u.blt.sy) * pitch; 2338 data += (box->x1 + op->u.blt.sx) * bpp; 2339 2340 assert(dst_priv->gpu_bo == op->dst.bo); 2341 sna_replace__xor(sna, op->dst.pixmap, data, pitch, 2342 0xffffffff, op->u.blt.pixel); 2343 } else { 2344 sna_write_boxes__xor(sna, op->dst.pixmap, 2345 op->dst.bo, op->dst.x, op->dst.y, 2346 src->devPrivate.ptr, 2347 src->devKind, 2348 op->u.blt.sx, op->u.blt.sy, 2349 box, n, 2350 0xffffffff, op->u.blt.pixel); 2351 } 2352} 2353 2354static bool 2355prepare_blt_put(struct sna *sna, 2356 struct sna_composite_op *op, 2357 uint32_t alpha_fixup) 2358{ 2359 DBG(("%s\n", __FUNCTION__)); 2360 2361 assert(!sna_pixmap(op->dst.pixmap)->clear); 2362 2363 if (op->dst.bo) { 2364 assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo); 2365 if (alpha_fixup) { 2366 op->u.blt.pixel = alpha_fixup; 2367 op->blt = blt_put_composite_with_alpha; 2368 op->box = blt_put_composite_box_with_alpha; 2369 op->boxes = blt_put_composite_boxes_with_alpha; 2370 } else { 2371 op->blt = blt_put_composite; 2372 op->box = blt_put_composite_box; 2373 op->boxes = blt_put_composite_boxes; 2374 } 2375 } else { 2376 if (alpha_fixup) { 2377 op->u.blt.pixel = alpha_fixup; 2378 op->blt = blt_put_composite_with_alpha__cpu; 2379 op->box = blt_put_composite_box_with_alpha__cpu; 2380 op->boxes = blt_put_composite_boxes_with_alpha__cpu; 2381 } else { 2382 op->blt = blt_put_composite__cpu; 2383 op->box = blt_put_composite_box__cpu; 2384 op->boxes = blt_put_composite_boxes__cpu; 2385 } 2386 } 2387 op->done = nop_done; 2388 2389 return true; 2390} 2391 2392static bool 2393is_clear(PixmapPtr pixmap) 2394{ 2395 struct sna_pixmap *priv = sna_pixmap(pixmap); 2396 return priv && priv->clear; 2397} 2398 2399static inline uint32_t 2400over(uint32_t src, uint32_t dst) 2401{ 2402 uint32_t a = ~src >> 24; 2403 2404#define G_SHIFT 8 2405#define RB_MASK 0xff00ff 2406#define RB_ONE_HALF 0x800080 2407#define RB_MASK_PLUS_ONE 0x10000100 2408 2409#define UN8_rb_MUL_UN8(x, a, t) do { \ 2410 t = ((x) & RB_MASK) * (a); \ 2411 t += RB_ONE_HALF; \ 2412 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 2413 x &= RB_MASK; \ 2414} while (0) 2415 2416#define UN8_rb_ADD_UN8_rb(x, y, t) do { \ 2417 t = ((x) + (y)); \ 2418 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ 2419 x = (t & RB_MASK); \ 2420} while (0) 2421 2422#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do { \ 2423 uint32_t r1__, r2__, r3__, t__; \ 2424 \ 2425 r1__ = (x); \ 2426 r2__ = (y) & RB_MASK; \ 2427 UN8_rb_MUL_UN8(r1__, (a), t__); \ 2428 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \ 2429 \ 2430 r2__ = (x) >> G_SHIFT; \ 2431 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 2432 UN8_rb_MUL_UN8(r2__, (a), t__); \ 2433 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \ 2434 \ 2435 (x) = r1__ | (r2__ << G_SHIFT); \ 2436} while (0) 2437 2438 UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src); 2439 2440 return dst; 2441} 2442 2443static inline uint32_t 2444add(uint32_t src, uint32_t dst) 2445{ 2446#define UN8x4_ADD_UN8x4(x, y) do { \ 2447 uint32_t r1__, r2__, r3__, t__; \ 2448 \ 2449 r1__ = (x) & RB_MASK; \ 2450 r2__ = (y) & RB_MASK; \ 2451 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \ 2452 \ 2453 r2__ = ((x) >> G_SHIFT) & RB_MASK; \ 2454 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 2455 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \ 2456 \ 2457 x = r1__ | (r2__ << G_SHIFT); \ 2458} while (0) 2459 2460 UN8x4_ADD_UN8x4(src, dst); 2461 return src; 2462} 2463 2464bool 2465sna_blt_composite(struct sna *sna, 2466 uint32_t op, 2467 PicturePtr src, 2468 PicturePtr dst, 2469 int16_t x, int16_t y, 2470 int16_t dst_x, int16_t dst_y, 2471 int16_t width, int16_t height, 2472 unsigned flags, 2473 struct sna_composite_op *tmp) 2474{ 2475 PictFormat src_format = src->format; 2476 PixmapPtr src_pixmap; 2477 struct kgem_bo *bo; 2478 int16_t tx, ty; 2479 BoxRec dst_box, src_box; 2480 uint32_t alpha_fixup; 2481 uint32_t color, hint; 2482 bool was_clear; 2483 bool ret; 2484 2485#if DEBUG_NO_BLT || NO_BLT_COMPOSITE 2486 return false; 2487#endif 2488 DBG(("%s (%d, %d), (%d, %d), %dx%d\n", 2489 __FUNCTION__, x, y, dst_x, dst_y, width, height)); 2490 2491 switch (dst->pDrawable->bitsPerPixel) { 2492 case 8: 2493 case 16: 2494 case 32: 2495 break; 2496 default: 2497 DBG(("%s: unhandled bpp: %d\n", __FUNCTION__, 2498 dst->pDrawable->bitsPerPixel)); 2499 return false; 2500 } 2501 2502 tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2503 was_clear = is_clear(tmp->dst.pixmap); 2504 2505 if (width | height) { 2506 dst_box.x1 = dst_x; 2507 dst_box.x2 = bound(dst_x, width); 2508 dst_box.y1 = dst_y; 2509 dst_box.y2 = bound(dst_y, height); 2510 } else 2511 sna_render_picture_extents(dst, &dst_box); 2512 2513 tmp->dst.format = dst->format; 2514 tmp->dst.width = tmp->dst.pixmap->drawable.width; 2515 tmp->dst.height = tmp->dst.pixmap->drawable.height; 2516 get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap, 2517 &tmp->dst.x, &tmp->dst.y); 2518 2519 if (op == PictOpClear) { 2520clear: 2521 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) { 2522 sna_pixmap(tmp->dst.pixmap)->clear = true; 2523 return prepare_blt_nop(sna, tmp); 2524 } 2525 2526 hint = 0; 2527 if (can_render(sna)) { 2528 hint |= PREFER_GPU; 2529 if ((flags & COMPOSITE_PARTIAL) == 0) { 2530 hint |= IGNORE_DAMAGE; 2531 if (width == tmp->dst.pixmap->drawable.width && 2532 height == tmp->dst.pixmap->drawable.height) 2533 hint |= REPLACES; 2534 } 2535 } 2536 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2537 &dst_box, &tmp->damage); 2538 if (tmp->dst.bo) { 2539 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2540 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", 2541 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch)); 2542 return false; 2543 } 2544 if (hint & REPLACES) 2545 kgem_bo_undo(&sna->kgem, tmp->dst.bo); 2546 } else { 2547 RegionRec region; 2548 2549 region.extents = dst_box; 2550 region.data = NULL; 2551 2552 hint = MOVE_WRITE | MOVE_INPLACE_HINT; 2553 if (flags & COMPOSITE_PARTIAL) 2554 hint |= MOVE_READ; 2555 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint)) 2556 return false; 2557 } 2558 2559 return prepare_blt_clear(sna, tmp); 2560 } 2561 2562 if (is_solid(src)) { 2563 if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) { 2564 sna_pixmap(tmp->dst.pixmap)->clear = was_clear; 2565 return prepare_blt_nop(sna, tmp); 2566 } 2567 if (op == PictOpOver && is_opaque_solid(src)) 2568 op = PictOpSrc; 2569 if (op == PictOpAdd && is_white(src)) 2570 op = PictOpSrc; 2571 if (was_clear && (op == PictOpAdd || op == PictOpOver)) { 2572 if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0) 2573 op = PictOpSrc; 2574 if (op == PictOpOver) { 2575 color = over(get_solid_color(src, PICT_a8r8g8b8), 2576 color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, 2577 dst->format, PICT_a8r8g8b8)); 2578 op = PictOpSrc; 2579 DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n", 2580 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), 2581 color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, 2582 dst->format, PICT_a8r8g8b8), 2583 color)); 2584 } 2585 if (op == PictOpAdd) { 2586 color = add(get_solid_color(src, PICT_a8r8g8b8), 2587 color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, 2588 dst->format, PICT_a8r8g8b8)); 2589 op = PictOpSrc; 2590 DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n", 2591 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), 2592 color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, 2593 dst->format, PICT_a8r8g8b8), 2594 color)); 2595 } 2596 } 2597 if (op == PictOpOutReverse && is_opaque_solid(src)) 2598 goto clear; 2599 2600 if (op != PictOpSrc) { 2601 DBG(("%s: unsupported op [%d] for blitting\n", 2602 __FUNCTION__, op)); 2603 return false; 2604 } 2605 2606 color = get_solid_color(src, tmp->dst.format); 2607fill: 2608 if (color == 0) 2609 goto clear; 2610 2611 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) { 2612 sna_pixmap(tmp->dst.pixmap)->clear = true; 2613 return prepare_blt_nop(sna, tmp); 2614 } 2615 2616 hint = 0; 2617 if (can_render(sna)) { 2618 hint |= PREFER_GPU; 2619 if ((flags & COMPOSITE_PARTIAL) == 0) { 2620 hint |= IGNORE_DAMAGE; 2621 if (width == tmp->dst.pixmap->drawable.width && 2622 height == tmp->dst.pixmap->drawable.height) 2623 hint |= REPLACES; 2624 } 2625 } 2626 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2627 &dst_box, &tmp->damage); 2628 if (tmp->dst.bo) { 2629 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2630 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", 2631 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch)); 2632 return false; 2633 } 2634 if (hint & REPLACES) 2635 kgem_bo_undo(&sna->kgem, tmp->dst.bo); 2636 } else { 2637 RegionRec region; 2638 2639 region.extents = dst_box; 2640 region.data = NULL; 2641 2642 hint = MOVE_WRITE | MOVE_INPLACE_HINT; 2643 if (flags & COMPOSITE_PARTIAL) 2644 hint |= MOVE_READ; 2645 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint)) 2646 return false; 2647 } 2648 2649 return prepare_blt_fill(sna, tmp, color); 2650 } 2651 2652 if (!src->pDrawable) { 2653 DBG(("%s: unsupported procedural source\n", 2654 __FUNCTION__)); 2655 return false; 2656 } 2657 2658 if (src->filter == PictFilterConvolution) { 2659 DBG(("%s: convolutions filters not handled\n", 2660 __FUNCTION__)); 2661 return false; 2662 } 2663 2664 if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0) 2665 op = PictOpSrc; 2666 2667 if (op != PictOpSrc) { 2668 DBG(("%s: unsupported op [%d] for blitting\n", 2669 __FUNCTION__, op)); 2670 return false; 2671 } 2672 2673 if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter, 2674 dst->polyMode == PolyModePrecise, 2675 &tx, &ty)) { 2676 DBG(("%s: source transform is not an integer translation\n", 2677 __FUNCTION__)); 2678 return false; 2679 } 2680 DBG(("%s: converting transform to integer translation? (%d, %d)\n", 2681 __FUNCTION__, src->transform != NULL, tx, ty)); 2682 x += tx; 2683 y += ty; 2684 2685 if ((x >= src->pDrawable->width || 2686 y >= src->pDrawable->height || 2687 x + width <= 0 || 2688 y + height <= 0) && 2689 (!src->repeat || src->repeatType == RepeatNone)) { 2690 DBG(("%s: source is outside of valid area, converting to clear\n", 2691 __FUNCTION__)); 2692 goto clear; 2693 } 2694 2695 src_pixmap = get_drawable_pixmap(src->pDrawable); 2696 if (is_clear(src_pixmap)) { 2697 if (src->repeat || 2698 (x >= 0 && y >= 0 && 2699 x + width < src_pixmap->drawable.width && 2700 y + height < src_pixmap->drawable.height)) { 2701 color = color_convert(sna_pixmap(src_pixmap)->clear_color, 2702 src->format, tmp->dst.format); 2703 goto fill; 2704 } 2705 } 2706 2707 alpha_fixup = 0; 2708 if (!(dst->format == src_format || 2709 dst->format == alphaless(src_format) || 2710 (alphaless(dst->format) == alphaless(src_format) && 2711 sna_get_pixel_from_rgba(&alpha_fixup, 2712 0, 0, 0, 0xffff, 2713 dst->format)))) { 2714 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", 2715 __FUNCTION__, (unsigned)src_format, dst->format)); 2716 return false; 2717 } 2718 2719 /* XXX tiling? fixup extend none? */ 2720 if (x < 0 || y < 0 || 2721 x + width > src->pDrawable->width || 2722 y + height > src->pDrawable->height) { 2723 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n", 2724 __FUNCTION__, 2725 x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType)); 2726 if (src->repeat && src->repeatType == RepeatNormal) { 2727 x = x % src->pDrawable->width; 2728 y = y % src->pDrawable->height; 2729 if (x < 0) 2730 x += src->pDrawable->width; 2731 if (y < 0) 2732 y += src->pDrawable->height; 2733 if (x + width > src->pDrawable->width || 2734 y + height > src->pDrawable->height) 2735 return false; 2736 } else 2737 return false; 2738 } 2739 2740 get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty); 2741 x += tx + src->pDrawable->x; 2742 y += ty + src->pDrawable->y; 2743 if (x < 0 || y < 0 || 2744 x + width > src_pixmap->drawable.width || 2745 y + height > src_pixmap->drawable.height) { 2746 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n", 2747 __FUNCTION__, 2748 x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height)); 2749 return false; 2750 } 2751 2752 tmp->u.blt.src_pixmap = src_pixmap; 2753 tmp->u.blt.sx = x - dst_x; 2754 tmp->u.blt.sy = y - dst_y; 2755 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", 2756 __FUNCTION__, 2757 tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); 2758 2759 src_box.x1 = x; 2760 src_box.y1 = y; 2761 src_box.x2 = x + width; 2762 src_box.y2 = y + height; 2763 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); 2764 if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) { 2765 DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n", 2766 __FUNCTION__, 2767 src_pixmap->drawable.width < sna->render.max_3d_size, 2768 src_pixmap->drawable.height < sna->render.max_3d_size, 2769 bo->tiling, bo->pitch)); 2770 2771 if (src_pixmap->drawable.width <= sna->render.max_3d_size && 2772 src_pixmap->drawable.height <= sna->render.max_3d_size && 2773 bo->pitch <= sna->render.max_3d_pitch && 2774 (flags & COMPOSITE_FALLBACK) == 0) 2775 { 2776 return false; 2777 } 2778 2779 bo = NULL; 2780 } 2781 2782 hint = 0; 2783 if (bo || can_render(sna)) { 2784 hint |= PREFER_GPU; 2785 if ((flags & COMPOSITE_PARTIAL) == 0) { 2786 hint |= IGNORE_DAMAGE; 2787 if (width == tmp->dst.pixmap->drawable.width && 2788 height == tmp->dst.pixmap->drawable.height) 2789 hint |= REPLACES; 2790 } 2791 if (bo) 2792 hint |= FORCE_GPU; 2793 } 2794 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, 2795 &dst_box, &tmp->damage); 2796 2797 if (tmp->dst.bo && hint & REPLACES) { 2798 struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); 2799 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2800 } 2801 2802 if (tmp->dst.pixmap == src_pixmap) 2803 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); 2804 2805 ret = false; 2806 if (bo) { 2807 if (!tmp->dst.bo) { 2808 DBG(("%s: fallback -- unaccelerated read back\n", 2809 __FUNCTION__)); 2810fallback: 2811 if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo)) 2812 goto put; 2813 } else if (!kgem_bo_can_blt(&sna->kgem, bo)) { 2814 DBG(("%s: fallback -- cannot blit from source\n", 2815 __FUNCTION__)); 2816 goto fallback; 2817 } else if (bo->snoop && tmp->dst.bo->snoop) { 2818 DBG(("%s: fallback -- can not copy between snooped bo\n", 2819 __FUNCTION__)); 2820 goto put; 2821 } else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { 2822 DBG(("%s: fallback -- unaccelerated upload\n", 2823 __FUNCTION__)); 2824 goto fallback; 2825 } else { 2826 ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); 2827 if (!ret) 2828 goto fallback; 2829 } 2830 } else { 2831 RegionRec region; 2832 2833put: 2834 if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) { 2835 DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__)); 2836 tmp->dst.bo = NULL; 2837 tmp->damage = NULL; 2838 } 2839 2840 if (tmp->dst.bo == NULL) { 2841 hint = MOVE_INPLACE_HINT | MOVE_WRITE; 2842 if (flags & COMPOSITE_PARTIAL) 2843 hint |= MOVE_READ; 2844 2845 region.extents = dst_box; 2846 region.data = NULL; 2847 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, 2848 ®ion, hint)) 2849 return false; 2850 2851 assert(tmp->damage == NULL); 2852 } 2853 2854 region.extents = src_box; 2855 region.data = NULL; 2856 if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable, 2857 ®ion, MOVE_READ)) 2858 return false; 2859 2860 ret = prepare_blt_put(sna, tmp, alpha_fixup); 2861 } 2862 2863 return ret; 2864} 2865 2866static void convert_done(struct sna *sna, const struct sna_composite_op *op) 2867{ 2868 struct kgem *kgem = &sna->kgem; 2869 2870 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 2871 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 2872 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 2873 _kgem_submit(kgem); 2874 } 2875 2876 kgem_bo_destroy(kgem, op->src.bo); 2877 sna_render_composite_redirect_done(sna, op); 2878} 2879 2880static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op) 2881{ 2882 struct kgem *kgem = &sna->kgem; 2883 2884 if (kgem_check_batch(kgem, 3)) { 2885 uint32_t *b = kgem->batch + kgem->nbatch; 2886 assert(sna->kgem.mode == KGEM_BLT); 2887 b[0] = XY_SETUP_CLIP; 2888 b[1] = b[2] = 0; 2889 kgem->nbatch += 3; 2890 assert(kgem->nbatch < kgem->surface); 2891 } 2892 2893 convert_done(sna, op); 2894} 2895 2896bool 2897sna_blt_composite__convert(struct sna *sna, 2898 int x, int y, 2899 int width, int height, 2900 struct sna_composite_op *tmp) 2901{ 2902 uint32_t alpha_fixup; 2903 int sx, sy; 2904 uint8_t op; 2905 2906#if DEBUG_NO_BLT || NO_BLT_COMPOSITE 2907 return false; 2908#endif 2909 2910 DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__, 2911 tmp->src.bo->handle, tmp->dst.bo->handle, 2912 tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0)); 2913 2914 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) || 2915 !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) { 2916 DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__)); 2917 return false; 2918 } 2919 2920 if (tmp->src.transform) { 2921 DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__)); 2922 return false; 2923 } 2924 2925 if (tmp->src.filter == PictFilterConvolution) { 2926 DBG(("%s: convolutions filters not handled\n", 2927 __FUNCTION__)); 2928 return false; 2929 } 2930 2931 op = tmp->op; 2932 if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0) 2933 op = PictOpSrc; 2934 if (op != PictOpSrc) { 2935 DBG(("%s: unsupported op [%d] for blitting\n", 2936 __FUNCTION__, op)); 2937 return false; 2938 } 2939 2940 alpha_fixup = 0; 2941 if (!(tmp->dst.format == tmp->src.pict_format || 2942 tmp->dst.format == alphaless(tmp->src.pict_format) || 2943 (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) && 2944 sna_get_pixel_from_rgba(&alpha_fixup, 2945 0, 0, 0, 0xffff, 2946 tmp->dst.format)))) { 2947 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", 2948 __FUNCTION__, 2949 (unsigned)tmp->src.pict_format, 2950 (unsigned)tmp->dst.format)); 2951 return false; 2952 } 2953 2954 sx = tmp->src.offset[0]; 2955 sy = tmp->src.offset[1]; 2956 2957 x += sx; 2958 y += sy; 2959 if (x < 0 || y < 0 || 2960 x + width > tmp->src.width || 2961 y + height > tmp->src.height) { 2962 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n", 2963 __FUNCTION__, 2964 x, y, x+width, y+width, tmp->src.width, tmp->src.height)); 2965 if (tmp->src.repeat == RepeatNormal) { 2966 int xx = x % tmp->src.width; 2967 int yy = y % tmp->src.height; 2968 if (xx < 0) 2969 xx += tmp->src.width; 2970 if (yy < 0) 2971 yy += tmp->src.height; 2972 if (xx + width > tmp->src.width || 2973 yy + height > tmp->src.height) 2974 return false; 2975 2976 sx += xx - x; 2977 sy += yy - y; 2978 } else 2979 return false; 2980 } 2981 2982 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", 2983 __FUNCTION__, 2984 tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup)); 2985 2986 tmp->u.blt.src_pixmap = NULL; 2987 tmp->u.blt.sx = sx; 2988 tmp->u.blt.sy = sy; 2989 2990 kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo); 2991 if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { 2992 kgem_submit(&sna->kgem); 2993 if (!kgem_check_many_bo_fenced(&sna->kgem, 2994 tmp->dst.bo, tmp->src.bo, NULL)) { 2995 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); 2996 return sna_tiling_blt_composite(sna, tmp, tmp->src.bo, 2997 PICT_FORMAT_BPP(tmp->src.pict_format), 2998 alpha_fixup); 2999 } 3000 _kgem_set_mode(&sna->kgem, KGEM_BLT); 3001 } 3002 3003 if (alpha_fixup) { 3004 tmp->blt = blt_composite_copy_with_alpha; 3005 tmp->box = blt_composite_copy_box_with_alpha; 3006 tmp->boxes = blt_composite_copy_boxes_with_alpha; 3007 3008 if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt, 3009 tmp->src.bo, tmp->dst.bo, 3010 PICT_FORMAT_BPP(tmp->src.pict_format), 3011 alpha_fixup)) 3012 return false; 3013 } else { 3014 tmp->blt = blt_composite_copy; 3015 tmp->box = blt_composite_copy_box; 3016 tmp->boxes = blt_composite_copy_boxes; 3017 tmp->thread_boxes = blt_composite_copy_boxes__thread; 3018 3019 if (!sna_blt_copy_init(sna, &tmp->u.blt, 3020 tmp->src.bo, tmp->dst.bo, 3021 PICT_FORMAT_BPP(tmp->src.pict_format), 3022 GXcopy)) 3023 return false; 3024 } 3025 3026 tmp->done = convert_done; 3027 if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo) 3028 tmp->done = gen6_convert_done; 3029 3030 return true; 3031} 3032 3033static void sna_blt_fill_op_blt(struct sna *sna, 3034 const struct sna_fill_op *op, 3035 int16_t x, int16_t y, 3036 int16_t width, int16_t height) 3037{ 3038 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3039 const struct sna_blt_state *blt = &op->base.u.blt; 3040 3041 sna_blt_fill_begin(sna, blt); 3042 3043 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3044 sna->blt_state.fill_pixel = blt->pixel; 3045 sna->blt_state.fill_alu = blt->alu; 3046 } 3047 3048 sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height); 3049} 3050 3051fastcall static void sna_blt_fill_op_box(struct sna *sna, 3052 const struct sna_fill_op *op, 3053 const BoxRec *box) 3054{ 3055 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3056 const struct sna_blt_state *blt = &op->base.u.blt; 3057 3058 sna_blt_fill_begin(sna, blt); 3059 3060 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3061 sna->blt_state.fill_pixel = blt->pixel; 3062 sna->blt_state.fill_alu = blt->alu; 3063 } 3064 3065 _sna_blt_fill_box(sna, &op->base.u.blt, box); 3066} 3067 3068fastcall static void sna_blt_fill_op_boxes(struct sna *sna, 3069 const struct sna_fill_op *op, 3070 const BoxRec *box, 3071 int nbox) 3072{ 3073 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3074 const struct sna_blt_state *blt = &op->base.u.blt; 3075 3076 sna_blt_fill_begin(sna, blt); 3077 3078 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3079 sna->blt_state.fill_pixel = blt->pixel; 3080 sna->blt_state.fill_alu = blt->alu; 3081 } 3082 3083 _sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox); 3084} 3085 3086static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy) 3087{ 3088 union { 3089 DDXPointRec pt; 3090 uint32_t i; 3091 } u; 3092 3093 u.pt.x = pt->x + dx; 3094 u.pt.y = pt->y + dy; 3095 3096 return cmd | (uint64_t)u.i<<32; 3097} 3098 3099fastcall static void sna_blt_fill_op_points(struct sna *sna, 3100 const struct sna_fill_op *op, 3101 int16_t dx, int16_t dy, 3102 const DDXPointRec *p, int n) 3103{ 3104 const struct sna_blt_state *blt = &op->base.u.blt; 3105 struct kgem *kgem = &sna->kgem; 3106 uint32_t cmd; 3107 3108 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n)); 3109 3110 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { 3111 sna_blt_fill_begin(sna, blt); 3112 3113 sna->blt_state.fill_bo = blt->bo[0]->unique_id; 3114 sna->blt_state.fill_pixel = blt->pixel; 3115 sna->blt_state.fill_alu = blt->alu; 3116 } 3117 3118 if (!kgem_check_batch(kgem, 2)) 3119 sna_blt_fill_begin(sna, blt); 3120 3121 cmd = XY_PIXEL_BLT; 3122 if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling) 3123 cmd |= BLT_DST_TILED; 3124 3125 do { 3126 uint32_t *b = kgem->batch + kgem->nbatch; 3127 int n_this_time; 3128 3129 assert(sna->kgem.mode == KGEM_BLT); 3130 n_this_time = n; 3131 if (2*n_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3132 n_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 2; 3133 assert(n_this_time); 3134 n -= n_this_time; 3135 3136 kgem->nbatch += 2 * n_this_time; 3137 assert(kgem->nbatch < kgem->surface); 3138 3139 if ((dx|dy) == 0) { 3140 while (n_this_time >= 8) { 3141 *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); 3142 *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); 3143 *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); 3144 *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); 3145 *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0); 3146 *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0); 3147 *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0); 3148 *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0); 3149 b += 16; 3150 n_this_time -= 8; 3151 p += 8; 3152 } 3153 if (n_this_time & 4) { 3154 *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); 3155 *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); 3156 *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); 3157 *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); 3158 b += 8; 3159 p += 4; 3160 } 3161 if (n_this_time & 2) { 3162 *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); 3163 *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); 3164 b += 4; 3165 p += 2; 3166 } 3167 if (n_this_time & 1) 3168 *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0); 3169 } else { 3170 while (n_this_time >= 8) { 3171 *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); 3172 *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); 3173 *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); 3174 *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); 3175 *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy); 3176 *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy); 3177 *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy); 3178 *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy); 3179 b += 16; 3180 n_this_time -= 8; 3181 p += 8; 3182 } 3183 if (n_this_time & 4) { 3184 *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); 3185 *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); 3186 *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); 3187 *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); 3188 b += 8; 3189 p += 8; 3190 } 3191 if (n_this_time & 2) { 3192 *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); 3193 *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); 3194 b += 4; 3195 p += 2; 3196 } 3197 if (n_this_time & 1) 3198 *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy); 3199 } 3200 3201 if (!n) 3202 return; 3203 3204 sna_blt_fill_begin(sna, blt); 3205 } while (1); 3206} 3207 3208bool sna_blt_fill(struct sna *sna, uint8_t alu, 3209 struct kgem_bo *bo, int bpp, 3210 uint32_t pixel, 3211 struct sna_fill_op *fill) 3212{ 3213#if DEBUG_NO_BLT || NO_BLT_FILL 3214 return false; 3215#endif 3216 3217 DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp)); 3218 3219 if (!kgem_bo_can_blt(&sna->kgem, bo)) { 3220 DBG(("%s: rejected due to incompatible Y-tiling\n", 3221 __FUNCTION__)); 3222 return false; 3223 } 3224 3225 if (!sna_blt_fill_init(sna, &fill->base.u.blt, 3226 bo, bpp, alu, pixel)) 3227 return false; 3228 3229 fill->blt = sna_blt_fill_op_blt; 3230 fill->box = sna_blt_fill_op_box; 3231 fill->boxes = sna_blt_fill_op_boxes; 3232 fill->points = sna_blt_fill_op_points; 3233 fill->done = 3234 (void (*)(struct sna *, const struct sna_fill_op *))nop_done; 3235 return true; 3236} 3237 3238static void sna_blt_copy_op_blt(struct sna *sna, 3239 const struct sna_copy_op *op, 3240 int16_t src_x, int16_t src_y, 3241 int16_t width, int16_t height, 3242 int16_t dst_x, int16_t dst_y) 3243{ 3244 sna_blt_copy_one(sna, &op->base.u.blt, 3245 src_x, src_y, 3246 width, height, 3247 dst_x, dst_y); 3248} 3249 3250bool sna_blt_copy(struct sna *sna, uint8_t alu, 3251 struct kgem_bo *src, 3252 struct kgem_bo *dst, 3253 int bpp, 3254 struct sna_copy_op *op) 3255{ 3256#if DEBUG_NO_BLT || NO_BLT_COPY 3257 return false; 3258#endif 3259 3260 if (!kgem_bo_can_blt(&sna->kgem, src)) 3261 return false; 3262 3263 if (!kgem_bo_can_blt(&sna->kgem, dst)) 3264 return false; 3265 3266 if (!sna_blt_copy_init(sna, &op->base.u.blt, 3267 src, dst, 3268 bpp, alu)) 3269 return false; 3270 3271 op->blt = sna_blt_copy_op_blt; 3272 if (sna->kgem.gen >= 060 && src == dst) 3273 op->done = (void (*)(struct sna *, const struct sna_copy_op *)) 3274 gen6_blt_copy_done; 3275 else 3276 op->done = (void (*)(struct sna *, const struct sna_copy_op *)) 3277 nop_done; 3278 return true; 3279} 3280 3281static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, 3282 struct kgem_bo *bo, int bpp, 3283 uint32_t color, 3284 const BoxRec *box) 3285{ 3286 struct kgem *kgem = &sna->kgem; 3287 uint32_t br13, cmd, *b; 3288 bool overwrites; 3289 3290 assert(kgem_bo_can_blt (kgem, bo)); 3291 3292 DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__, 3293 box->x1, box->y1, box->x2, box->y2)); 3294 3295 assert(box->x1 >= 0); 3296 assert(box->y1 >= 0); 3297 3298 cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4); 3299 br13 = bo->pitch; 3300 if (kgem->gen >= 040 && bo->tiling) { 3301 cmd |= BLT_DST_TILED; 3302 br13 >>= 2; 3303 } 3304 assert(br13 <= MAXSHORT); 3305 3306 br13 |= fill_ROP[alu] << 16; 3307 switch (bpp) { 3308 default: assert(0); 3309 case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3310 br13 |= 1 << 25; /* RGB8888 */ 3311 case 16: br13 |= 1 << 24; /* RGB565 */ 3312 case 8: break; 3313 } 3314 3315 /* All too frequently one blt completely overwrites the previous */ 3316 overwrites = alu == GXcopy || alu == GXclear || alu == GXset; 3317 if (overwrites) { 3318 if (sna->kgem.gen >= 0100) { 3319 if (kgem->nbatch >= 7 && 3320 kgem->batch[kgem->nbatch-7] == cmd && 3321 *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box && 3322 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { 3323 DBG(("%s: replacing last fill\n", __FUNCTION__)); 3324 kgem->batch[kgem->nbatch-6] = br13; 3325 kgem->batch[kgem->nbatch-1] = color; 3326 return true; 3327 } 3328 if (kgem->nbatch >= 10 && 3329 (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD && 3330 *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box && 3331 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { 3332 DBG(("%s: replacing last copy\n", __FUNCTION__)); 3333 kgem->batch[kgem->nbatch-10] = cmd; 3334 kgem->batch[kgem->nbatch-8] = br13; 3335 kgem->batch[kgem->nbatch-4] = color; 3336 /* Keep the src bo as part of the execlist, just remove 3337 * its relocation entry. 3338 */ 3339 kgem->nreloc--; 3340 kgem->nbatch -= 3; 3341 return true; 3342 } 3343 } else { 3344 if (kgem->nbatch >= 6 && 3345 kgem->batch[kgem->nbatch-6] == cmd && 3346 *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box && 3347 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { 3348 DBG(("%s: replacing last fill\n", __FUNCTION__)); 3349 kgem->batch[kgem->nbatch-5] = br13; 3350 kgem->batch[kgem->nbatch-1] = color; 3351 return true; 3352 } 3353 if (kgem->nbatch >= 8 && 3354 (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD && 3355 *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box && 3356 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { 3357 DBG(("%s: replacing last copy\n", __FUNCTION__)); 3358 kgem->batch[kgem->nbatch-8] = cmd; 3359 kgem->batch[kgem->nbatch-7] = br13; 3360 kgem->batch[kgem->nbatch-3] = color; 3361 /* Keep the src bo as part of the execlist, just remove 3362 * its relocation entry. 3363 */ 3364 kgem->nreloc--; 3365 kgem->nbatch -= 2; 3366 return true; 3367 } 3368 } 3369 } 3370 3371 /* If we are currently emitting SCANLINES, keep doing so */ 3372 if (sna->blt_state.fill_bo == bo->unique_id && 3373 sna->blt_state.fill_pixel == color && 3374 (sna->blt_state.fill_alu == alu || 3375 sna->blt_state.fill_alu == ~alu)) { 3376 DBG(("%s: matching last fill, converting to scanlines\n", 3377 __FUNCTION__)); 3378 return false; 3379 } 3380 3381 kgem_set_mode(kgem, KGEM_BLT, bo); 3382 if (!kgem_check_batch(kgem, 7) || 3383 !kgem_check_reloc(kgem, 1) || 3384 !kgem_check_bo_fenced(kgem, bo)) { 3385 kgem_submit(kgem); 3386 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3387 return false; 3388 3389 _kgem_set_mode(kgem, KGEM_BLT); 3390 } 3391 3392 assert(kgem_check_batch(kgem, 6)); 3393 assert(kgem_check_reloc(kgem, 1)); 3394 3395 assert(sna->kgem.mode == KGEM_BLT); 3396 b = kgem->batch + kgem->nbatch; 3397 b[0] = cmd; 3398 b[1] = br13; 3399 *(uint64_t *)(b+2) = *(const uint64_t *)box; 3400 if (kgem->gen >= 0100) { 3401 *(uint64_t *)(b+4) = 3402 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3403 I915_GEM_DOMAIN_RENDER << 16 | 3404 I915_GEM_DOMAIN_RENDER | 3405 KGEM_RELOC_FENCED, 3406 0); 3407 b[6] = color; 3408 kgem->nbatch += 7; 3409 } else { 3410 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3411 I915_GEM_DOMAIN_RENDER << 16 | 3412 I915_GEM_DOMAIN_RENDER | 3413 KGEM_RELOC_FENCED, 3414 0); 3415 b[5] = color; 3416 kgem->nbatch += 6; 3417 } 3418 assert(kgem->nbatch < kgem->surface); 3419 3420 sna->blt_state.fill_bo = bo->unique_id; 3421 sna->blt_state.fill_pixel = color; 3422 sna->blt_state.fill_alu = ~alu; 3423 return true; 3424} 3425 3426bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, 3427 struct kgem_bo *bo, int bpp, 3428 uint32_t pixel, 3429 const BoxRec *box, int nbox) 3430{ 3431 struct kgem *kgem = &sna->kgem; 3432 uint32_t br13, cmd; 3433 3434#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES 3435 return false; 3436#endif 3437 3438 DBG(("%s (%d, %08x, %d) x %d\n", 3439 __FUNCTION__, bpp, pixel, alu, nbox)); 3440 3441 if (!kgem_bo_can_blt(kgem, bo)) { 3442 DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__)); 3443 return false; 3444 } 3445 3446 if (alu == GXclear) 3447 pixel = 0; 3448 else if (alu == GXcopy) { 3449 if (pixel == 0) 3450 alu = GXclear; 3451 else if (pixel == -1) 3452 alu = GXset; 3453 } 3454 3455 if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box)) 3456 return true; 3457 3458 br13 = bo->pitch; 3459 cmd = XY_SCANLINE_BLT; 3460 if (kgem->gen >= 040 && bo->tiling) { 3461 cmd |= 1 << 11; 3462 br13 >>= 2; 3463 } 3464 assert(br13 <= MAXSHORT); 3465 3466 br13 |= 1<<31 | fill_ROP[alu] << 16; 3467 switch (bpp) { 3468 default: assert(0); 3469 case 32: br13 |= 1 << 25; /* RGB8888 */ 3470 case 16: br13 |= 1 << 24; /* RGB565 */ 3471 case 8: break; 3472 } 3473 3474 kgem_set_mode(kgem, KGEM_BLT, bo); 3475 if (!kgem_check_batch(kgem, 14) || 3476 !kgem_check_bo_fenced(kgem, bo)) { 3477 kgem_submit(kgem); 3478 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3479 return false; 3480 _kgem_set_mode(kgem, KGEM_BLT); 3481 } 3482 3483 if (sna->blt_state.fill_bo != bo->unique_id || 3484 sna->blt_state.fill_pixel != pixel || 3485 sna->blt_state.fill_alu != alu) 3486 { 3487 uint32_t *b; 3488 3489 if (!kgem_check_reloc(kgem, 1)) { 3490 _kgem_submit(kgem); 3491 if (!kgem_check_bo_fenced(&sna->kgem, bo)) 3492 return false; 3493 _kgem_set_mode(kgem, KGEM_BLT); 3494 } 3495 3496 assert(sna->kgem.mode == KGEM_BLT); 3497 b = kgem->batch + kgem->nbatch; 3498 if (kgem->gen >= 0100) { 3499 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 3500 if (bpp == 32) 3501 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3502 if (bo->tiling) 3503 b[0] |= BLT_DST_TILED; 3504 b[1] = br13; 3505 b[2] = 0; 3506 b[3] = 0; 3507 *(uint64_t *)(b+4) = 3508 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3509 I915_GEM_DOMAIN_RENDER << 16 | 3510 I915_GEM_DOMAIN_RENDER | 3511 KGEM_RELOC_FENCED, 3512 0); 3513 b[6] = pixel; 3514 b[7] = pixel; 3515 b[8] = 0; 3516 b[9] = 0; 3517 kgem->nbatch += 10; 3518 } else { 3519 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 3520 if (bpp == 32) 3521 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3522 if (bo->tiling && kgem->gen >= 040) 3523 b[0] |= BLT_DST_TILED; 3524 b[1] = br13; 3525 b[2] = 0; 3526 b[3] = 0; 3527 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3528 I915_GEM_DOMAIN_RENDER << 16 | 3529 I915_GEM_DOMAIN_RENDER | 3530 KGEM_RELOC_FENCED, 3531 0); 3532 b[5] = pixel; 3533 b[6] = pixel; 3534 b[7] = 0; 3535 b[8] = 0; 3536 kgem->nbatch += 9; 3537 } 3538 assert(kgem->nbatch < kgem->surface); 3539 3540 sna->blt_state.fill_bo = bo->unique_id; 3541 sna->blt_state.fill_pixel = pixel; 3542 sna->blt_state.fill_alu = alu; 3543 } 3544 3545 do { 3546 int nbox_this_time; 3547 3548 nbox_this_time = nbox; 3549 if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3550 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; 3551 assert(nbox_this_time); 3552 nbox -= nbox_this_time; 3553 3554 assert(sna->kgem.mode == KGEM_BLT); 3555 do { 3556 uint32_t *b; 3557 3558 DBG(("%s: (%d, %d), (%d, %d): %08x\n", 3559 __FUNCTION__, 3560 box->x1, box->y1, 3561 box->x2, box->y2, 3562 pixel)); 3563 3564 assert(box->x1 >= 0); 3565 assert(box->y1 >= 0); 3566 assert(box->y2 * bo->pitch <= kgem_bo_size(bo)); 3567 3568 b = kgem->batch + kgem->nbatch; 3569 kgem->nbatch += 3; 3570 assert(kgem->nbatch < kgem->surface); 3571 b[0] = cmd; 3572 *(uint64_t *)(b+1) = *(const uint64_t *)box; 3573 box++; 3574 } while (--nbox_this_time); 3575 3576 if (nbox) { 3577 uint32_t *b; 3578 3579 _kgem_submit(kgem); 3580 _kgem_set_mode(kgem, KGEM_BLT); 3581 3582 assert(sna->kgem.mode == KGEM_BLT); 3583 b = kgem->batch + kgem->nbatch; 3584 if (kgem->gen >= 0100) { 3585 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; 3586 if (bpp == 32) 3587 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3588 if (bo->tiling) 3589 b[0] |= BLT_DST_TILED; 3590 b[1] = br13; 3591 b[2] = 0; 3592 b[3] = 0; 3593 *(uint64_t *)(b+4) = 3594 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, 3595 I915_GEM_DOMAIN_RENDER << 16 | 3596 I915_GEM_DOMAIN_RENDER | 3597 KGEM_RELOC_FENCED, 3598 0); 3599 b[6] = pixel; 3600 b[7] = pixel; 3601 b[8] = 0; 3602 b[9] = 0; 3603 kgem->nbatch += 10; 3604 } else { 3605 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; 3606 if (bpp == 32) 3607 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3608 if (bo->tiling && kgem->gen >= 040) 3609 b[0] |= BLT_DST_TILED; 3610 b[1] = br13; 3611 b[2] = 0; 3612 b[3] = 0; 3613 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, 3614 I915_GEM_DOMAIN_RENDER << 16 | 3615 I915_GEM_DOMAIN_RENDER | 3616 KGEM_RELOC_FENCED, 3617 0); 3618 b[5] = pixel; 3619 b[6] = pixel; 3620 b[7] = 0; 3621 b[8] = 0; 3622 kgem->nbatch += 9; 3623 } 3624 assert(kgem->nbatch < kgem->surface); 3625 } 3626 } while (nbox); 3627 3628 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 3629 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 3630 _kgem_submit(kgem); 3631 } 3632 3633 return true; 3634} 3635 3636bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, 3637 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3638 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3639 int bpp, const BoxRec *box, int nbox) 3640{ 3641 struct kgem *kgem = &sna->kgem; 3642 unsigned src_pitch, br13, cmd; 3643 3644#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES 3645 return false; 3646#endif 3647 3648 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", 3649 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, 3650 src_bo->tiling, dst_bo->tiling, 3651 src_bo->pitch, dst_bo->pitch)); 3652 assert(nbox); 3653 3654 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { 3655 DBG(("%s: cannot blt to src? %d or dst? %d\n", 3656 __FUNCTION__, 3657 kgem_bo_can_blt(kgem, src_bo), 3658 kgem_bo_can_blt(kgem, dst_bo))); 3659 return false; 3660 } 3661 3662 cmd = XY_SRC_COPY_BLT_CMD; 3663 if (bpp == 32) 3664 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 3665 3666 src_pitch = src_bo->pitch; 3667 if (kgem->gen >= 040 && src_bo->tiling) { 3668 cmd |= BLT_SRC_TILED; 3669 src_pitch >>= 2; 3670 } 3671 assert(src_pitch <= MAXSHORT); 3672 3673 br13 = dst_bo->pitch; 3674 if (kgem->gen >= 040 && dst_bo->tiling) { 3675 cmd |= BLT_DST_TILED; 3676 br13 >>= 2; 3677 } 3678 assert(br13 <= MAXSHORT); 3679 3680 br13 |= copy_ROP[alu] << 16; 3681 switch (bpp) { 3682 default: assert(0); 3683 case 32: br13 |= 1 << 25; /* RGB8888 */ 3684 case 16: br13 |= 1 << 24; /* RGB565 */ 3685 case 8: break; 3686 } 3687 3688 /* Compare first box against a previous fill */ 3689 if ((alu == GXcopy || alu == GXclear || alu == GXset) && 3690 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { 3691 if (kgem->gen >= 0100) { 3692 if (kgem->nbatch >= 7 && 3693 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 3694 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 3695 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 3696 DBG(("%s: deleting last fill\n", __FUNCTION__)); 3697 kgem->nbatch -= 7; 3698 kgem->nreloc--; 3699 } 3700 } else { 3701 if (kgem->nbatch >= 6 && 3702 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 3703 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 3704 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 3705 DBG(("%s: deleting last fill\n", __FUNCTION__)); 3706 kgem->nbatch -= 6; 3707 kgem->nreloc--; 3708 } 3709 } 3710 } 3711 3712 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 3713 if (!kgem_check_batch(kgem, 10) || 3714 !kgem_check_reloc(kgem, 2) || 3715 !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 3716 kgem_submit(kgem); 3717 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 3718 DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__)); 3719 return sna_tiling_blt_copy_boxes(sna, alu, 3720 src_bo, src_dx, src_dy, 3721 dst_bo, dst_dx, dst_dy, 3722 bpp, box, nbox); 3723 } 3724 _kgem_set_mode(kgem, KGEM_BLT); 3725 } 3726 3727 if ((dst_dx | dst_dy) == 0) { 3728 if (kgem->gen >= 0100) { 3729 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8; 3730 do { 3731 int nbox_this_time; 3732 3733 nbox_this_time = nbox; 3734 if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3735 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 3736 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3737 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3738 assert(nbox_this_time); 3739 nbox -= nbox_this_time; 3740 3741 assert(sna->kgem.mode == KGEM_BLT); 3742 do { 3743 uint32_t *b = kgem->batch + kgem->nbatch; 3744 3745 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3746 __FUNCTION__, 3747 box->x1, box->y1, 3748 box->x2 - box->x1, box->y2 - box->y1)); 3749 3750 assert(box->x1 + src_dx >= 0); 3751 assert(box->y1 + src_dy >= 0); 3752 assert(box->x1 + src_dx <= INT16_MAX); 3753 assert(box->y1 + src_dy <= INT16_MAX); 3754 3755 assert(box->x1 >= 0); 3756 assert(box->y1 >= 0); 3757 3758 *(uint64_t *)&b[0] = hdr; 3759 *(uint64_t *)&b[2] = *(const uint64_t *)box; 3760 *(uint64_t *)(b+4) = 3761 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 3762 I915_GEM_DOMAIN_RENDER << 16 | 3763 I915_GEM_DOMAIN_RENDER | 3764 KGEM_RELOC_FENCED, 3765 0); 3766 b[6] = add2(b[2], src_dx, src_dy); 3767 b[7] = src_pitch; 3768 *(uint64_t *)(b+8) = 3769 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 3770 I915_GEM_DOMAIN_RENDER << 16 | 3771 KGEM_RELOC_FENCED, 3772 0); 3773 kgem->nbatch += 10; 3774 assert(kgem->nbatch < kgem->surface); 3775 box++; 3776 } while (--nbox_this_time); 3777 3778 if (!nbox) 3779 break; 3780 3781 _kgem_submit(kgem); 3782 _kgem_set_mode(kgem, KGEM_BLT); 3783 } while (1); 3784 } else { 3785 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; 3786 do { 3787 int nbox_this_time; 3788 3789 nbox_this_time = nbox; 3790 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3791 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 3792 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3793 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3794 assert(nbox_this_time); 3795 nbox -= nbox_this_time; 3796 3797 assert(sna->kgem.mode == KGEM_BLT); 3798 do { 3799 uint32_t *b = kgem->batch + kgem->nbatch; 3800 3801 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3802 __FUNCTION__, 3803 box->x1, box->y1, 3804 box->x2 - box->x1, box->y2 - box->y1)); 3805 3806 assert(box->x1 + src_dx >= 0); 3807 assert(box->y1 + src_dy >= 0); 3808 assert(box->x1 + src_dx <= INT16_MAX); 3809 assert(box->y1 + src_dy <= INT16_MAX); 3810 3811 assert(box->x1 >= 0); 3812 assert(box->y1 >= 0); 3813 3814 *(uint64_t *)&b[0] = hdr; 3815 *(uint64_t *)&b[2] = *(const uint64_t *)box; 3816 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 3817 I915_GEM_DOMAIN_RENDER << 16 | 3818 I915_GEM_DOMAIN_RENDER | 3819 KGEM_RELOC_FENCED, 3820 0); 3821 b[5] = add2(b[2], src_dx, src_dy); 3822 b[6] = src_pitch; 3823 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 3824 I915_GEM_DOMAIN_RENDER << 16 | 3825 KGEM_RELOC_FENCED, 3826 0); 3827 kgem->nbatch += 8; 3828 assert(kgem->nbatch < kgem->surface); 3829 box++; 3830 } while (--nbox_this_time); 3831 3832 if (!nbox) 3833 break; 3834 3835 _kgem_submit(kgem); 3836 _kgem_set_mode(kgem, KGEM_BLT); 3837 } while (1); 3838 } 3839 } else { 3840 if (kgem->gen >= 0100) { 3841 cmd |= 8; 3842 do { 3843 int nbox_this_time; 3844 3845 nbox_this_time = nbox; 3846 if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3847 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 3848 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3849 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3850 assert(nbox_this_time); 3851 nbox -= nbox_this_time; 3852 3853 assert(sna->kgem.mode == KGEM_BLT); 3854 do { 3855 uint32_t *b = kgem->batch + kgem->nbatch; 3856 3857 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3858 __FUNCTION__, 3859 box->x1, box->y1, 3860 box->x2 - box->x1, box->y2 - box->y1)); 3861 3862 assert(box->x1 + src_dx >= 0); 3863 assert(box->y1 + src_dy >= 0); 3864 3865 assert(box->x1 + dst_dx >= 0); 3866 assert(box->y1 + dst_dy >= 0); 3867 3868 b[0] = cmd; 3869 b[1] = br13; 3870 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 3871 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 3872 *(uint64_t *)(b+4) = 3873 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 3874 I915_GEM_DOMAIN_RENDER << 16 | 3875 I915_GEM_DOMAIN_RENDER | 3876 KGEM_RELOC_FENCED, 3877 0); 3878 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 3879 b[7] = src_pitch; 3880 *(uint64_t *)(b+8) = 3881 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 3882 I915_GEM_DOMAIN_RENDER << 16 | 3883 KGEM_RELOC_FENCED, 3884 0); 3885 kgem->nbatch += 10; 3886 assert(kgem->nbatch < kgem->surface); 3887 box++; 3888 } while (--nbox_this_time); 3889 3890 if (!nbox) 3891 break; 3892 3893 _kgem_submit(kgem); 3894 _kgem_set_mode(kgem, KGEM_BLT); 3895 } while (1); 3896 } else { 3897 cmd |= 6; 3898 do { 3899 int nbox_this_time; 3900 3901 nbox_this_time = nbox; 3902 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 3903 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 3904 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 3905 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; 3906 assert(nbox_this_time); 3907 nbox -= nbox_this_time; 3908 3909 assert(sna->kgem.mode == KGEM_BLT); 3910 do { 3911 uint32_t *b = kgem->batch + kgem->nbatch; 3912 3913 DBG((" %s: box=(%d, %d)x(%d, %d)\n", 3914 __FUNCTION__, 3915 box->x1, box->y1, 3916 box->x2 - box->x1, box->y2 - box->y1)); 3917 3918 assert(box->x1 + src_dx >= 0); 3919 assert(box->y1 + src_dy >= 0); 3920 3921 assert(box->x1 + dst_dx >= 0); 3922 assert(box->y1 + dst_dy >= 0); 3923 3924 b[0] = cmd; 3925 b[1] = br13; 3926 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); 3927 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); 3928 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 3929 I915_GEM_DOMAIN_RENDER << 16 | 3930 I915_GEM_DOMAIN_RENDER | 3931 KGEM_RELOC_FENCED, 3932 0); 3933 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); 3934 b[6] = src_pitch; 3935 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 3936 I915_GEM_DOMAIN_RENDER << 16 | 3937 KGEM_RELOC_FENCED, 3938 0); 3939 kgem->nbatch += 8; 3940 assert(kgem->nbatch < kgem->surface); 3941 box++; 3942 } while (--nbox_this_time); 3943 3944 if (!nbox) 3945 break; 3946 3947 _kgem_submit(kgem); 3948 _kgem_set_mode(kgem, KGEM_BLT); 3949 } while (1); 3950 } 3951 } 3952 3953 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 3954 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 3955 _kgem_submit(kgem); 3956 } else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) { 3957 uint32_t *b = kgem->batch + kgem->nbatch; 3958 assert(sna->kgem.mode == KGEM_BLT); 3959 b[0] = XY_SETUP_CLIP; 3960 b[1] = b[2] = 0; 3961 kgem->nbatch += 3; 3962 assert(kgem->nbatch < kgem->surface); 3963 } 3964 3965 sna->blt_state.fill_bo = 0; 3966 return true; 3967} 3968 3969bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, 3970 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3971 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3972 int bpp, int alpha_fixup, 3973 const BoxRec *box, int nbox) 3974{ 3975 struct kgem *kgem = &sna->kgem; 3976 unsigned src_pitch, br13, cmd; 3977 3978#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES 3979 return false; 3980#endif 3981 3982 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", 3983 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, 3984 src_bo->tiling, dst_bo->tiling, 3985 src_bo->pitch, dst_bo->pitch)); 3986 3987 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { 3988 DBG(("%s: cannot blt to src? %d or dst? %d\n", 3989 __FUNCTION__, 3990 kgem_bo_can_blt(kgem, src_bo), 3991 kgem_bo_can_blt(kgem, dst_bo))); 3992 return false; 3993 } 3994 3995 cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); 3996 src_pitch = src_bo->pitch; 3997 if (kgem->gen >= 040 && src_bo->tiling) { 3998 cmd |= BLT_SRC_TILED; 3999 src_pitch >>= 2; 4000 } 4001 assert(src_pitch <= MAXSHORT); 4002 4003 br13 = dst_bo->pitch; 4004 if (kgem->gen >= 040 && dst_bo->tiling) { 4005 cmd |= BLT_DST_TILED; 4006 br13 >>= 2; 4007 } 4008 assert(br13 <= MAXSHORT); 4009 4010 br13 |= copy_ROP[alu] << 16; 4011 switch (bpp) { 4012 default: assert(0); 4013 case 32: br13 |= 1 << 25; /* RGB8888 */ 4014 case 16: br13 |= 1 << 24; /* RGB565 */ 4015 case 8: break; 4016 } 4017 4018 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 4019 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 4020 DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__)); 4021 return false; 4022 } 4023 4024 /* Compare first box against a previous fill */ 4025 if ((alu == GXcopy || alu == GXclear || alu == GXset) && 4026 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { 4027 if (kgem->gen >= 0100) { 4028 if (kgem->nbatch >= 7 && 4029 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && 4030 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 4031 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 4032 DBG(("%s: deleting last fill\n", __FUNCTION__)); 4033 kgem->nbatch -= 7; 4034 kgem->nreloc--; 4035 } 4036 } else { 4037 if (kgem->nbatch >= 6 && 4038 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && 4039 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && 4040 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { 4041 DBG(("%s: deleting last fill\n", __FUNCTION__)); 4042 kgem->nbatch -= 6; 4043 kgem->nreloc--; 4044 } 4045 } 4046 } 4047 4048 while (nbox--) { 4049 uint32_t *b; 4050 4051 if (!kgem_check_batch(kgem, 14) || 4052 !kgem_check_reloc(kgem, 2)) { 4053 _kgem_submit(kgem); 4054 _kgem_set_mode(kgem, KGEM_BLT); 4055 } 4056 4057 assert(sna->kgem.mode == KGEM_BLT); 4058 b = kgem->batch + kgem->nbatch; 4059 b[0] = cmd; 4060 b[1] = br13; 4061 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 4062 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 4063 if (sna->kgem.gen >= 0100) { 4064 *(uint64_t *)(b+4) = 4065 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 4066 I915_GEM_DOMAIN_RENDER << 16 | 4067 I915_GEM_DOMAIN_RENDER | 4068 KGEM_RELOC_FENCED, 4069 0); 4070 b[6] = src_pitch; 4071 b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); 4072 *(uint64_t *)(b+8) = 4073 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 4074 I915_GEM_DOMAIN_RENDER << 16 | 4075 KGEM_RELOC_FENCED, 4076 0); 4077 b[10] = alpha_fixup; 4078 b[11] = alpha_fixup; 4079 b[12] = 0; 4080 b[13] = 0; 4081 kgem->nbatch += 14; 4082 } else { 4083 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 4084 I915_GEM_DOMAIN_RENDER << 16 | 4085 I915_GEM_DOMAIN_RENDER | 4086 KGEM_RELOC_FENCED, 4087 0); 4088 b[5] = src_pitch; 4089 b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); 4090 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 4091 I915_GEM_DOMAIN_RENDER << 16 | 4092 KGEM_RELOC_FENCED, 4093 0); 4094 b[8] = alpha_fixup; 4095 b[9] = alpha_fixup; 4096 b[10] = 0; 4097 b[11] = 0; 4098 kgem->nbatch += 12; 4099 } 4100 assert(kgem->nbatch < kgem->surface); 4101 box++; 4102 } 4103 4104 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { 4105 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__)); 4106 _kgem_submit(kgem); 4107 } 4108 4109 sna->blt_state.fill_bo = 0; 4110 return true; 4111} 4112 4113static void box_extents(const BoxRec *box, int n, BoxRec *extents) 4114{ 4115 *extents = *box; 4116 while (--n) { 4117 box++; 4118 if (box->x1 < extents->x1) 4119 extents->x1 = box->x1; 4120 if (box->y1 < extents->y1) 4121 extents->y1 = box->y1; 4122 4123 if (box->x2 > extents->x2) 4124 extents->x2 = box->x2; 4125 if (box->y2 > extents->y2) 4126 extents->y2 = box->y2; 4127 } 4128} 4129 4130bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, 4131 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 4132 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 4133 const BoxRec *box, int nbox) 4134{ 4135 struct kgem_bo *free_bo = NULL; 4136 bool ret; 4137 4138 DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox)); 4139 4140 if (!sna_blt_compare_depth(src, dst)) { 4141 DBG(("%s: mismatching depths %d -> %d\n", 4142 __FUNCTION__, src->depth, dst->depth)); 4143 return false; 4144 } 4145 4146 if (src_bo == dst_bo) { 4147 DBG(("%s: dst == src\n", __FUNCTION__)); 4148 4149 if (src_bo->tiling == I915_TILING_Y && 4150 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { 4151 struct kgem_bo *bo; 4152 4153 DBG(("%s: src is Y-tiled\n", __FUNCTION__)); 4154 4155 if (src->type != DRAWABLE_PIXMAP) 4156 return false; 4157 4158 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo); 4159 bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X); 4160 if (bo == NULL) { 4161 BoxRec extents; 4162 4163 DBG(("%s: y-tiling conversion failed\n", 4164 __FUNCTION__)); 4165 4166 box_extents(box, nbox, &extents); 4167 free_bo = kgem_create_2d(&sna->kgem, 4168 extents.x2 - extents.x1, 4169 extents.y2 - extents.y1, 4170 src->bitsPerPixel, 4171 I915_TILING_X, 0); 4172 if (free_bo == NULL) { 4173 DBG(("%s: fallback -- temp allocation failed\n", 4174 __FUNCTION__)); 4175 return false; 4176 } 4177 4178 if (!sna_blt_copy_boxes(sna, GXcopy, 4179 src_bo, src_dx, src_dy, 4180 free_bo, -extents.x1, -extents.y1, 4181 src->bitsPerPixel, 4182 box, nbox)) { 4183 DBG(("%s: fallback -- temp copy failed\n", 4184 __FUNCTION__)); 4185 kgem_bo_destroy(&sna->kgem, free_bo); 4186 return false; 4187 } 4188 4189 src_dx = -extents.x1; 4190 src_dy = -extents.y1; 4191 src_bo = free_bo; 4192 } else 4193 dst_bo = src_bo = bo; 4194 } 4195 } else { 4196 if (src_bo->tiling == I915_TILING_Y && 4197 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { 4198 DBG(("%s: src is y-tiled\n", __FUNCTION__)); 4199 if (src->type != DRAWABLE_PIXMAP) 4200 return false; 4201 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo); 4202 src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X); 4203 if (src_bo == NULL) { 4204 DBG(("%s: fallback -- src y-tiling conversion failed\n", 4205 __FUNCTION__)); 4206 return false; 4207 } 4208 } 4209 4210 if (dst_bo->tiling == I915_TILING_Y && 4211 kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { 4212 DBG(("%s: dst is y-tiled\n", __FUNCTION__)); 4213 if (dst->type != DRAWABLE_PIXMAP) 4214 return false; 4215 assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo); 4216 dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X); 4217 if (dst_bo == NULL) { 4218 DBG(("%s: fallback -- dst y-tiling conversion failed\n", 4219 __FUNCTION__)); 4220 return false; 4221 } 4222 } 4223 } 4224 4225 ret = sna_blt_copy_boxes(sna, alu, 4226 src_bo, src_dx, src_dy, 4227 dst_bo, dst_dx, dst_dy, 4228 dst->bitsPerPixel, 4229 box, nbox); 4230 4231 if (free_bo) 4232 kgem_bo_destroy(&sna->kgem, free_bo); 4233 4234 return ret; 4235} 4236