sna_io.c revision 03b705cf
1/* 2 * Copyright (c) 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_render.h" 34#include "sna_reg.h" 35 36#include <sys/mman.h> 37 38#define PITCH(x, y) ALIGN((x)*(y), 4) 39 40#define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */ 41 42/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */ 43 44static inline bool upload_too_large(struct sna *sna, int width, int height) 45{ 46 return width * height * 4 > sna->kgem.max_upload_tile_size; 47} 48 49static inline bool must_tile(struct sna *sna, int width, int height) 50{ 51 return (width > sna->render.max_3d_size || 52 height > sna->render.max_3d_size || 53 upload_too_large(sna, width, height)); 54} 55 56static bool download_inplace__cpu(struct kgem *kgem, 57 PixmapPtr p, struct kgem_bo *bo, 58 const BoxRec *box, int nbox) 59{ 60 BoxRec extents; 61 62 switch (bo->tiling) { 63 case I915_TILING_X: 64 if (!kgem->memcpy_from_tiled_x) 65 return false; 66 case I915_TILING_NONE: 67 break; 68 default: 69 return false; 70 } 71 72 if (!kgem_bo_can_map__cpu(kgem, bo, false)) 73 return false; 74 75 if (kgem->has_llc) 76 return true; 77 78 extents = *box; 79 while (--nbox) { 80 ++box; 81 if (box->x1 < extents.x1) 82 extents.x1 = box->x1; 83 if (box->x2 > extents.x2) 84 extents.x2 = box->x2; 85 extents.y2 = box->y2; 86 } 87 88 if (extents.x2 - extents.x1 == p->drawable.width && 89 extents.y2 - extents.y1 == p->drawable.height) 90 return true; 91 92 return __kgem_bo_size(bo) <= PAGE_SIZE; 93} 94 95static bool 96read_boxes_inplace__cpu(struct kgem *kgem, 97 PixmapPtr pixmap, struct kgem_bo *bo, 98 const BoxRec *box, int n) 99{ 100 int bpp = pixmap->drawable.bitsPerPixel; 101 void *src, *dst = pixmap->devPrivate.ptr; 102 int src_pitch = bo->pitch; 103 int dst_pitch = pixmap->devKind; 104 105 if (!download_inplace__cpu(kgem, dst, bo, box, n)) 106 return false; 107 108 assert(kgem_bo_can_map__cpu(kgem, bo, false)); 109 assert(bo->tiling != I915_TILING_Y); 110 111 src = __kgem_bo_map__cpu(kgem, bo); 112 if (src == NULL) 113 return false; 114 115 kgem_bo_sync__cpu_full(kgem, bo, 0); 116 if (bo->tiling == I915_TILING_X) { 117 assert(kgem->memcpy_from_tiled_x); 118 do { 119 memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch, 120 box->x1, box->y1, 121 box->x1, box->y1, 122 box->x2 - box->x1, box->y2 - box->y1); 123 box++; 124 } while (--n); 125 } else { 126 do { 127 memcpy_blt(src, dst, bpp, src_pitch, dst_pitch, 128 box->x1, box->y1, 129 box->x1, box->y1, 130 box->x2 - box->x1, box->y2 - box->y1); 131 box++; 132 } while (--n); 133 } 134 __kgem_bo_unmap__cpu(kgem, bo, src); 135 136 return true; 137} 138 139static void read_boxes_inplace(struct kgem *kgem, 140 PixmapPtr pixmap, struct kgem_bo *bo, 141 const BoxRec *box, int n) 142{ 143 int bpp = pixmap->drawable.bitsPerPixel; 144 void *src, *dst = pixmap->devPrivate.ptr; 145 int src_pitch = bo->pitch; 146 int dst_pitch = pixmap->devKind; 147 148 if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n)) 149 return; 150 151 DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); 152 153 if (!kgem_bo_can_map(kgem, bo)) 154 return; 155 156 kgem_bo_submit(kgem, bo); 157 158 src = kgem_bo_map(kgem, bo); 159 if (src == NULL) 160 return; 161 162 assert(src != dst); 163 do { 164 DBG(("%s: copying box (%d, %d), (%d, %d)\n", 165 __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 166 167 assert(box->x2 > box->x1); 168 assert(box->y2 > box->y1); 169 170 assert(box->x1 >= 0); 171 assert(box->y1 >= 0); 172 assert(box->x2 <= pixmap->drawable.width); 173 assert(box->y2 <= pixmap->drawable.height); 174 175 assert(box->x1 >= 0); 176 assert(box->y1 >= 0); 177 assert(box->x2 <= pixmap->drawable.width); 178 assert(box->y2 <= pixmap->drawable.height); 179 180 memcpy_blt(src, dst, bpp, 181 src_pitch, dst_pitch, 182 box->x1, box->y1, 183 box->x1, box->y1, 184 box->x2 - box->x1, box->y2 - box->y1); 185 box++; 186 } while (--n); 187} 188 189static bool download_inplace(struct kgem *kgem, 190 PixmapPtr p, struct kgem_bo *bo, 191 const BoxRec *box, int nbox) 192{ 193 bool cpu; 194 195 if (unlikely(kgem->wedged)) 196 return true; 197 198 cpu = download_inplace__cpu(kgem, p, bo, box, nbox); 199 if (!cpu && !kgem_bo_can_map(kgem, bo)) 200 return false; 201 202 if (FORCE_INPLACE) 203 return FORCE_INPLACE > 0; 204 205 if (kgem->can_blt_cpu && kgem->max_cpu_size) 206 return false; 207 208 return !__kgem_bo_is_busy(kgem, bo) || cpu; 209} 210 211void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo, 212 const BoxRec *box, int nbox) 213{ 214 struct kgem *kgem = &sna->kgem; 215 struct kgem_bo *dst_bo; 216 BoxRec extents; 217 const BoxRec *tmp_box; 218 int tmp_nbox; 219 char *src; 220 void *ptr; 221 int src_pitch, cpp, offset; 222 int n, cmd, br13; 223 bool can_blt; 224 225 DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n", 226 __FUNCTION__, nbox, src_bo->handle, 227 dst->drawable.width, dst->drawable.height)); 228 229#ifndef NDEBUG 230 for (n = 0; n < nbox; n++) { 231 if (box[n].x1 < 0 || box[n].y1 < 0 || 232 box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch || 233 box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo)) 234 { 235 FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n, 236 box[n].x1, box[n].y1, 237 box[n].x2, box[n].y2, 238 src_bo->pitch, kgem_bo_size(src_bo)); 239 } 240 } 241#endif 242 243 /* XXX The gpu is faster to perform detiling in bulk, but takes 244 * longer to setup and retrieve the results, with an additional 245 * copy. The long term solution is to use snoopable bo and avoid 246 * this path. 247 */ 248 249 if (download_inplace(kgem, dst, src_bo, box ,nbox)) { 250fallback: 251 read_boxes_inplace(kgem, dst, src_bo, box, nbox); 252 return; 253 } 254 255 can_blt = kgem_bo_can_blt(kgem, src_bo) && 256 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 257 extents = box[0]; 258 for (n = 1; n < nbox; n++) { 259 if (box[n].x1 < extents.x1) 260 extents.x1 = box[n].x1; 261 if (box[n].x2 > extents.x2) 262 extents.x2 = box[n].x2; 263 264 if (can_blt) 265 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 266 267 if (box[n].y1 < extents.y1) 268 extents.y1 = box[n].y1; 269 if (box[n].y2 > extents.y2) 270 extents.y2 = box[n].y2; 271 } 272 if (kgem_bo_is_mappable(kgem, src_bo)) { 273 /* Is it worth detiling? */ 274 if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) 275 goto fallback; 276 } 277 278 /* Try to avoid switching rings... */ 279 if (!can_blt || kgem->ring == KGEM_RENDER || 280 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 281 PixmapRec tmp; 282 283 tmp.drawable.width = extents.x2 - extents.x1; 284 tmp.drawable.height = extents.y2 - extents.y1; 285 tmp.drawable.depth = dst->drawable.depth; 286 tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; 287 tmp.devPrivate.ptr = NULL; 288 289 assert(tmp.drawable.width); 290 assert(tmp.drawable.height); 291 292 if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { 293 BoxRec tile, stack[64], *clipped, *c; 294 int step; 295 296 if (n > ARRAY_SIZE(stack)) { 297 clipped = malloc(sizeof(BoxRec) * n); 298 if (clipped == NULL) 299 goto fallback; 300 } else 301 clipped = stack; 302 303 step = MIN(sna->render.max_3d_size, 304 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); 305 while (step * step * 4 > sna->kgem.max_upload_tile_size) 306 step /= 2; 307 308 DBG(("%s: tiling download, using %dx%d tiles\n", 309 __FUNCTION__, step, step)); 310 assert(step); 311 312 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 313 int y2 = tile.y1 + step; 314 if (y2 > extents.y2) 315 y2 = extents.y2; 316 tile.y2 = y2; 317 318 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 319 int x2 = tile.x1 + step; 320 if (x2 > extents.x2) 321 x2 = extents.x2; 322 tile.x2 = x2; 323 324 tmp.drawable.width = tile.x2 - tile.x1; 325 tmp.drawable.height = tile.y2 - tile.y1; 326 327 c = clipped; 328 for (n = 0; n < nbox; n++) { 329 *c = box[n]; 330 if (!box_intersect(c, &tile)) 331 continue; 332 333 DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n", 334 __FUNCTION__, 335 c->x1, c->y1, 336 c->x2, c->y2, 337 c->x1 - tile.x1, 338 c->y1 - tile.y1)); 339 c++; 340 } 341 if (c == clipped) 342 continue; 343 344 dst_bo = kgem_create_buffer_2d(kgem, 345 tmp.drawable.width, 346 tmp.drawable.height, 347 tmp.drawable.bitsPerPixel, 348 KGEM_BUFFER_LAST, 349 &ptr); 350 if (!dst_bo) { 351 if (clipped != stack) 352 free(clipped); 353 goto fallback; 354 } 355 356 if (!sna->render.copy_boxes(sna, GXcopy, 357 dst, src_bo, 0, 0, 358 &tmp, dst_bo, -tile.x1, -tile.y1, 359 clipped, c-clipped, COPY_LAST)) { 360 kgem_bo_destroy(&sna->kgem, dst_bo); 361 if (clipped != stack) 362 free(clipped); 363 goto fallback; 364 } 365 366 kgem_bo_submit(&sna->kgem, dst_bo); 367 kgem_buffer_read_sync(kgem, dst_bo); 368 369 while (c-- != clipped) { 370 memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, 371 dst_bo->pitch, dst->devKind, 372 c->x1 - tile.x1, 373 c->y1 - tile.y1, 374 c->x1, c->y1, 375 c->x2 - c->x1, 376 c->y2 - c->y1); 377 } 378 379 kgem_bo_destroy(&sna->kgem, dst_bo); 380 } 381 } 382 383 if (clipped != stack) 384 free(clipped); 385 } else { 386 dst_bo = kgem_create_buffer_2d(kgem, 387 tmp.drawable.width, 388 tmp.drawable.height, 389 tmp.drawable.bitsPerPixel, 390 KGEM_BUFFER_LAST, 391 &ptr); 392 if (!dst_bo) 393 goto fallback; 394 395 if (!sna->render.copy_boxes(sna, GXcopy, 396 dst, src_bo, 0, 0, 397 &tmp, dst_bo, -extents.x1, -extents.y1, 398 box, nbox, COPY_LAST)) { 399 kgem_bo_destroy(&sna->kgem, dst_bo); 400 goto fallback; 401 } 402 403 kgem_bo_submit(&sna->kgem, dst_bo); 404 kgem_buffer_read_sync(kgem, dst_bo); 405 406 for (n = 0; n < nbox; n++) { 407 memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, 408 dst_bo->pitch, dst->devKind, 409 box[n].x1 - extents.x1, 410 box[n].y1 - extents.y1, 411 box[n].x1, box[n].y1, 412 box[n].x2 - box[n].x1, 413 box[n].y2 - box[n].y1); 414 } 415 416 kgem_bo_destroy(&sna->kgem, dst_bo); 417 } 418 return; 419 } 420 421 /* count the total number of bytes to be read and allocate a bo */ 422 cpp = dst->drawable.bitsPerPixel / 8; 423 offset = 0; 424 for (n = 0; n < nbox; n++) { 425 int height = box[n].y2 - box[n].y1; 426 int width = box[n].x2 - box[n].x1; 427 offset += PITCH(width, cpp) * height; 428 } 429 430 DBG((" read buffer size=%d\n", offset)); 431 432 dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr); 433 if (!dst_bo) { 434 read_boxes_inplace(kgem, dst, src_bo, box, nbox); 435 return; 436 } 437 438 cmd = XY_SRC_COPY_BLT_CMD; 439 src_pitch = src_bo->pitch; 440 if (kgem->gen >= 040 && src_bo->tiling) { 441 cmd |= BLT_SRC_TILED; 442 src_pitch >>= 2; 443 } 444 445 br13 = 0xcc << 16; 446 switch (cpp) { 447 default: 448 case 4: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 449 br13 |= 1 << 25; /* RGB8888 */ 450 case 2: br13 |= 1 << 24; /* RGB565 */ 451 case 1: break; 452 } 453 454 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 455 if (!kgem_check_batch(kgem, 8) || 456 !kgem_check_reloc_and_exec(kgem, 2) || 457 !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 458 kgem_submit(kgem); 459 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) 460 goto fallback; 461 _kgem_set_mode(kgem, KGEM_BLT); 462 } 463 464 tmp_nbox = nbox; 465 tmp_box = box; 466 offset = 0; 467 do { 468 int nbox_this_time; 469 470 nbox_this_time = tmp_nbox; 471 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 472 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 473 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 474 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 475 assert(nbox_this_time); 476 tmp_nbox -= nbox_this_time; 477 478 for (n = 0; n < nbox_this_time; n++) { 479 int height = tmp_box[n].y2 - tmp_box[n].y1; 480 int width = tmp_box[n].x2 - tmp_box[n].x1; 481 int pitch = PITCH(width, cpp); 482 uint32_t *b = kgem->batch + kgem->nbatch; 483 484 DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", 485 offset, tmp_box[n].x1, tmp_box[n].y1, 486 width, height, pitch)); 487 488 assert(tmp_box[n].x1 >= 0); 489 assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); 490 assert(tmp_box[n].y1 >= 0); 491 assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); 492 493 b[0] = cmd; 494 b[1] = br13 | pitch; 495 b[2] = 0; 496 b[3] = height << 16 | width; 497 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 498 I915_GEM_DOMAIN_RENDER << 16 | 499 I915_GEM_DOMAIN_RENDER | 500 KGEM_RELOC_FENCED, 501 offset); 502 b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1; 503 b[6] = src_pitch; 504 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 505 I915_GEM_DOMAIN_RENDER << 16 | 506 KGEM_RELOC_FENCED, 507 0); 508 kgem->nbatch += 8; 509 510 offset += pitch * height; 511 } 512 513 _kgem_submit(kgem); 514 if (!tmp_nbox) 515 break; 516 517 _kgem_set_mode(kgem, KGEM_BLT); 518 tmp_box += nbox_this_time; 519 } while (1); 520 assert(offset == __kgem_buffer_size(dst_bo)); 521 522 kgem_buffer_read_sync(kgem, dst_bo); 523 524 src = ptr; 525 do { 526 int height = box->y2 - box->y1; 527 int width = box->x2 - box->x1; 528 int pitch = PITCH(width, cpp); 529 530 DBG((" copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n", 531 (long)((char *)src - (char *)ptr), 532 *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4), 533 box->x1, box->y1, 534 width, height, 535 pitch, dst->devKind, cpp*8)); 536 537 assert(box->x1 >= 0); 538 assert(box->x2 <= dst->drawable.width); 539 assert(box->y1 >= 0); 540 assert(box->y2 <= dst->drawable.height); 541 542 memcpy_blt(src, dst->devPrivate.ptr, cpp*8, 543 pitch, dst->devKind, 544 0, 0, 545 box->x1, box->y1, 546 width, height); 547 box++; 548 549 src += pitch * height; 550 } while (--nbox); 551 assert(src - (char *)ptr == __kgem_buffer_size(dst_bo)); 552 kgem_bo_destroy(kgem, dst_bo); 553 sna->blt_state.fill_bo = 0; 554} 555 556static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) 557{ 558 if (!kgem->memcpy_to_tiled_x) 559 return false; 560 561 if (bo->tiling != I915_TILING_X) 562 return false; 563 564 return kgem_bo_can_map__cpu(kgem, bo, true); 565} 566 567static bool 568write_boxes_inplace__tiled(struct kgem *kgem, 569 const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 570 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 571 const BoxRec *box, int n) 572{ 573 uint8_t *dst; 574 575 assert(bo->tiling == I915_TILING_X); 576 577 dst = __kgem_bo_map__cpu(kgem, bo); 578 if (dst == NULL) 579 return false; 580 581 kgem_bo_sync__cpu(kgem, bo); 582 do { 583 memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch, 584 box->x1 + src_dx, box->y1 + src_dy, 585 box->x1 + dst_dx, box->y1 + dst_dy, 586 box->x2 - box->x1, box->y2 - box->y1); 587 box++; 588 } while (--n); 589 __kgem_bo_unmap__cpu(kgem, bo, dst); 590 591 return true; 592} 593 594static bool write_boxes_inplace(struct kgem *kgem, 595 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 596 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 597 const BoxRec *box, int n) 598{ 599 void *dst; 600 601 DBG(("%s x %d, handle=%d, tiling=%d\n", 602 __FUNCTION__, n, bo->handle, bo->tiling)); 603 604 if (upload_inplace__tiled(kgem, bo) && 605 write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy, 606 bo, dst_dx, dst_dy, box, n)) 607 return true; 608 609 if (!kgem_bo_can_map(kgem, bo)) 610 return false; 611 612 kgem_bo_submit(kgem, bo); 613 614 dst = kgem_bo_map(kgem, bo); 615 if (dst == NULL) 616 return false; 617 618 assert(dst != src); 619 620 do { 621 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, 622 box->x1 + src_dx, box->y1 + src_dy, 623 box->x1 + dst_dx, box->y1 + dst_dy, 624 box->x2 - box->x1, box->y2 - box->y1, 625 bpp, stride, bo->pitch)); 626 627 assert(box->x2 > box->x1); 628 assert(box->y2 > box->y1); 629 630 assert(box->x1 + dst_dx >= 0); 631 assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch); 632 assert(box->y1 + dst_dy >= 0); 633 assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo)); 634 635 assert(box->x1 + src_dx >= 0); 636 assert((box->x2 + src_dx)*bpp <= 8*stride); 637 assert(box->y1 + src_dy >= 0); 638 639 memcpy_blt(src, dst, bpp, 640 stride, bo->pitch, 641 box->x1 + src_dx, box->y1 + src_dy, 642 box->x1 + dst_dx, box->y1 + dst_dy, 643 box->x2 - box->x1, box->y2 - box->y1); 644 box++; 645 } while (--n); 646 return true; 647} 648 649static bool __upload_inplace(struct kgem *kgem, 650 struct kgem_bo *bo, 651 const BoxRec *box, 652 int n, int bpp) 653{ 654 unsigned int bytes; 655 656 if (FORCE_INPLACE) 657 return FORCE_INPLACE > 0; 658 659 /* If we are writing through the GTT, check first if we might be 660 * able to almagamate a series of small writes into a single 661 * operation. 662 */ 663 bytes = 0; 664 while (n--) { 665 bytes += (box->x2 - box->x1) * (box->y2 - box->y1); 666 box++; 667 } 668 if (__kgem_bo_is_busy(kgem, bo)) 669 return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages; 670 else 671 return bytes * bpp >> 12; 672} 673 674static bool upload_inplace(struct kgem *kgem, 675 struct kgem_bo *bo, 676 const BoxRec *box, 677 int n, int bpp) 678{ 679 if (unlikely(kgem->wedged)) 680 return true; 681 682 if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo)) 683 return false; 684 685 return __upload_inplace(kgem, bo, box, n,bpp); 686} 687 688bool sna_write_boxes(struct sna *sna, PixmapPtr dst, 689 struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy, 690 const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy, 691 const BoxRec *box, int nbox) 692{ 693 struct kgem *kgem = &sna->kgem; 694 struct kgem_bo *src_bo; 695 BoxRec extents; 696 void *ptr; 697 int offset; 698 int n, cmd, br13; 699 bool can_blt; 700 701 DBG(("%s x %d, src stride=%d, src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy)); 702 703 if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)&& 704 write_boxes_inplace(kgem, 705 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 706 dst_bo, dst_dx, dst_dy, 707 box, nbox)) 708 return true; 709 710 can_blt = kgem_bo_can_blt(kgem, dst_bo) && 711 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 712 extents = box[0]; 713 for (n = 1; n < nbox; n++) { 714 if (box[n].x1 < extents.x1) 715 extents.x1 = box[n].x1; 716 if (box[n].x2 > extents.x2) 717 extents.x2 = box[n].x2; 718 719 if (can_blt) 720 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 721 722 if (box[n].y1 < extents.y1) 723 extents.y1 = box[n].y1; 724 if (box[n].y2 > extents.y2) 725 extents.y2 = box[n].y2; 726 } 727 728 /* Try to avoid switching rings... */ 729 if (!can_blt || kgem->ring == KGEM_RENDER || 730 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 731 PixmapRec tmp; 732 733 tmp.drawable.width = extents.x2 - extents.x1; 734 tmp.drawable.height = extents.y2 - extents.y1; 735 tmp.drawable.depth = dst->drawable.depth; 736 tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; 737 tmp.devPrivate.ptr = NULL; 738 739 assert(tmp.drawable.width); 740 assert(tmp.drawable.height); 741 742 DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n", 743 __FUNCTION__, 744 extents.x1, extents.y1, 745 tmp.drawable.width, tmp.drawable.height, 746 sna->render.max_3d_size, sna->render.max_3d_size)); 747 if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { 748 BoxRec tile, stack[64], *clipped, *c; 749 int cpp, step; 750 751tile: 752 cpp = dst->drawable.bitsPerPixel / 8; 753 step = MIN(sna->render.max_3d_size, 754 (MAXSHORT&~63) / cpp); 755 while (step * step * cpp > sna->kgem.max_upload_tile_size) 756 step /= 2; 757 758 if (step * cpp > 4096) 759 step = 4096 / cpp; 760 assert(step); 761 762 DBG(("%s: tiling upload, using %dx%d tiles\n", 763 __FUNCTION__, step, step)); 764 765 if (n > ARRAY_SIZE(stack)) { 766 clipped = malloc(sizeof(BoxRec) * n); 767 if (clipped == NULL) 768 goto fallback; 769 } else 770 clipped = stack; 771 772 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 773 int y2 = tile.y1 + step; 774 if (y2 > extents.y2) 775 y2 = extents.y2; 776 tile.y2 = y2; 777 778 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 779 int x2 = tile.x1 + step; 780 if (x2 > extents.x2) 781 x2 = extents.x2; 782 tile.x2 = x2; 783 784 tmp.drawable.width = tile.x2 - tile.x1; 785 tmp.drawable.height = tile.y2 - tile.y1; 786 787 src_bo = kgem_create_buffer_2d(kgem, 788 tmp.drawable.width, 789 tmp.drawable.height, 790 tmp.drawable.bitsPerPixel, 791 KGEM_BUFFER_WRITE_INPLACE, 792 &ptr); 793 if (!src_bo) { 794 if (clipped != stack) 795 free(clipped); 796 goto fallback; 797 } 798 799 c = clipped; 800 for (n = 0; n < nbox; n++) { 801 *c = box[n]; 802 if (!box_intersect(c, &tile)) 803 continue; 804 805 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 806 __FUNCTION__, 807 c->x1, c->y1, 808 c->x2, c->y2, 809 src_dx, src_dy, 810 c->x1 - tile.x1, 811 c->y1 - tile.y1)); 812 memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel, 813 stride, src_bo->pitch, 814 c->x1 + src_dx, 815 c->y1 + src_dy, 816 c->x1 - tile.x1, 817 c->y1 - tile.y1, 818 c->x2 - c->x1, 819 c->y2 - c->y1); 820 c++; 821 } 822 823 if (c != clipped) 824 n = sna->render.copy_boxes(sna, GXcopy, 825 &tmp, src_bo, -tile.x1, -tile.y1, 826 dst, dst_bo, dst_dx, dst_dy, 827 clipped, c - clipped, 0); 828 else 829 n = 1; 830 831 kgem_bo_destroy(&sna->kgem, src_bo); 832 833 if (!n) { 834 if (clipped != stack) 835 free(clipped); 836 goto fallback; 837 } 838 } 839 } 840 841 if (clipped != stack) 842 free(clipped); 843 } else { 844 src_bo = kgem_create_buffer_2d(kgem, 845 tmp.drawable.width, 846 tmp.drawable.height, 847 tmp.drawable.bitsPerPixel, 848 KGEM_BUFFER_WRITE_INPLACE, 849 &ptr); 850 if (!src_bo) 851 goto fallback; 852 853 for (n = 0; n < nbox; n++) { 854 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 855 __FUNCTION__, 856 box[n].x1, box[n].y1, 857 box[n].x2, box[n].y2, 858 src_dx, src_dy, 859 box[n].x1 - extents.x1, 860 box[n].y1 - extents.y1)); 861 memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel, 862 stride, src_bo->pitch, 863 box[n].x1 + src_dx, 864 box[n].y1 + src_dy, 865 box[n].x1 - extents.x1, 866 box[n].y1 - extents.y1, 867 box[n].x2 - box[n].x1, 868 box[n].y2 - box[n].y1); 869 } 870 871 n = sna->render.copy_boxes(sna, GXcopy, 872 &tmp, src_bo, -extents.x1, -extents.y1, 873 dst, dst_bo, dst_dx, dst_dy, 874 box, nbox, 0); 875 876 kgem_bo_destroy(&sna->kgem, src_bo); 877 878 if (!n) 879 goto tile; 880 } 881 882 return true; 883 } 884 885 cmd = XY_SRC_COPY_BLT_CMD; 886 br13 = dst_bo->pitch; 887 if (kgem->gen >= 040 && dst_bo->tiling) { 888 cmd |= BLT_DST_TILED; 889 br13 >>= 2; 890 } 891 br13 |= 0xcc << 16; 892 switch (dst->drawable.bitsPerPixel) { 893 default: 894 case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 895 br13 |= 1 << 25; /* RGB8888 */ 896 case 16: br13 |= 1 << 24; /* RGB565 */ 897 case 8: break; 898 } 899 900 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 901 if (!kgem_check_batch(kgem, 8) || 902 !kgem_check_reloc_and_exec(kgem, 2) || 903 !kgem_check_bo_fenced(kgem, dst_bo)) { 904 kgem_submit(kgem); 905 if (!kgem_check_bo_fenced(kgem, dst_bo)) 906 goto fallback; 907 _kgem_set_mode(kgem, KGEM_BLT); 908 } 909 910 do { 911 int nbox_this_time; 912 913 nbox_this_time = nbox; 914 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 915 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 916 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 917 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 918 assert(nbox_this_time); 919 nbox -= nbox_this_time; 920 921 /* Count the total number of bytes to be read and allocate a 922 * single buffer large enough. Or if it is very small, combine 923 * with other allocations. */ 924 offset = 0; 925 for (n = 0; n < nbox_this_time; n++) { 926 int height = box[n].y2 - box[n].y1; 927 int width = box[n].x2 - box[n].x1; 928 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 929 } 930 931 src_bo = kgem_create_buffer(kgem, offset, 932 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 933 &ptr); 934 if (!src_bo) 935 break; 936 937 offset = 0; 938 do { 939 int height = box->y2 - box->y1; 940 int width = box->x2 - box->x1; 941 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 942 uint32_t *b; 943 944 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 945 __FUNCTION__, 946 box->x1 + src_dx, box->y1 + src_dy, 947 box->x1 + dst_dx, box->y1 + dst_dy, 948 width, height, 949 offset, pitch)); 950 951 assert(box->x1 + src_dx >= 0); 952 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 953 assert(box->y1 + src_dy >= 0); 954 955 assert(box->x1 + dst_dx >= 0); 956 assert(box->y1 + dst_dy >= 0); 957 958 memcpy_blt(src, (char *)ptr + offset, 959 dst->drawable.bitsPerPixel, 960 stride, pitch, 961 box->x1 + src_dx, box->y1 + src_dy, 962 0, 0, 963 width, height); 964 965 b = kgem->batch + kgem->nbatch; 966 b[0] = cmd; 967 b[1] = br13; 968 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 969 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 970 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 971 I915_GEM_DOMAIN_RENDER << 16 | 972 I915_GEM_DOMAIN_RENDER | 973 KGEM_RELOC_FENCED, 974 0); 975 b[5] = 0; 976 b[6] = pitch; 977 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 978 I915_GEM_DOMAIN_RENDER << 16 | 979 KGEM_RELOC_FENCED, 980 offset); 981 kgem->nbatch += 8; 982 983 box++; 984 offset += pitch * height; 985 } while (--nbox_this_time); 986 assert(offset == __kgem_buffer_size(src_bo)); 987 988 if (nbox) { 989 _kgem_submit(kgem); 990 _kgem_set_mode(kgem, KGEM_BLT); 991 } 992 993 kgem_bo_destroy(kgem, src_bo); 994 } while (nbox); 995 996 sna->blt_state.fill_bo = 0; 997 return true; 998 999fallback: 1000 return write_boxes_inplace(kgem, 1001 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 1002 dst_bo, dst_dx, dst_dy, 1003 box, nbox); 1004} 1005 1006static void 1007write_boxes_inplace__xor(struct kgem *kgem, 1008 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 1009 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 1010 const BoxRec *box, int n, 1011 uint32_t and, uint32_t or) 1012{ 1013 void *dst; 1014 1015 DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); 1016 1017 kgem_bo_submit(kgem, bo); 1018 1019 dst = kgem_bo_map(kgem, bo); 1020 if (dst == NULL) 1021 return; 1022 1023 do { 1024 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, 1025 box->x1 + src_dx, box->y1 + src_dy, 1026 box->x1 + dst_dx, box->y1 + dst_dy, 1027 box->x2 - box->x1, box->y2 - box->y1, 1028 bpp, stride, bo->pitch)); 1029 1030 assert(box->x2 > box->x1); 1031 assert(box->y2 > box->y1); 1032 1033 assert(box->x1 + dst_dx >= 0); 1034 assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch); 1035 assert(box->y1 + dst_dy >= 0); 1036 assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo)); 1037 1038 assert(box->x1 + src_dx >= 0); 1039 assert((box->x2 + src_dx)*bpp <= 8*stride); 1040 assert(box->y1 + src_dy >= 0); 1041 1042 memcpy_xor(src, dst, bpp, 1043 stride, bo->pitch, 1044 box->x1 + src_dx, box->y1 + src_dy, 1045 box->x1 + dst_dx, box->y1 + dst_dy, 1046 box->x2 - box->x1, box->y2 - box->y1, 1047 and, or); 1048 box++; 1049 } while (--n); 1050} 1051 1052static bool upload_inplace__xor(struct kgem *kgem, 1053 struct kgem_bo *bo, 1054 const BoxRec *box, 1055 int n, int bpp) 1056{ 1057 if (unlikely(kgem->wedged)) 1058 return true; 1059 1060 if (!kgem_bo_can_map(kgem, bo)) 1061 return false; 1062 1063 return __upload_inplace(kgem, bo, box, n, bpp); 1064} 1065 1066void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, 1067 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 1068 const void *src, int stride, int16_t src_dx, int16_t src_dy, 1069 const BoxRec *box, int nbox, 1070 uint32_t and, uint32_t or) 1071{ 1072 struct kgem *kgem = &sna->kgem; 1073 struct kgem_bo *src_bo; 1074 BoxRec extents; 1075 bool can_blt; 1076 void *ptr; 1077 int offset; 1078 int n, cmd, br13; 1079 1080 DBG(("%s x %d\n", __FUNCTION__, nbox)); 1081 1082 if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) { 1083fallback: 1084 write_boxes_inplace__xor(kgem, 1085 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 1086 dst_bo, dst_dx, dst_dy, 1087 box, nbox, 1088 and, or); 1089 return; 1090 } 1091 1092 can_blt = kgem_bo_can_blt(kgem, dst_bo) && 1093 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 1094 extents = box[0]; 1095 for (n = 1; n < nbox; n++) { 1096 if (box[n].x1 < extents.x1) 1097 extents.x1 = box[n].x1; 1098 if (box[n].x2 > extents.x2) 1099 extents.x2 = box[n].x2; 1100 1101 if (can_blt) 1102 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 1103 1104 if (box[n].y1 < extents.y1) 1105 extents.y1 = box[n].y1; 1106 if (box[n].y2 > extents.y2) 1107 extents.y2 = box[n].y2; 1108 } 1109 1110 /* Try to avoid switching rings... */ 1111 if (!can_blt || kgem->ring == KGEM_RENDER || 1112 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 1113 PixmapRec tmp; 1114 1115 tmp.drawable.width = extents.x2 - extents.x1; 1116 tmp.drawable.height = extents.y2 - extents.y1; 1117 tmp.drawable.depth = dst->drawable.depth; 1118 tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; 1119 tmp.devPrivate.ptr = NULL; 1120 1121 assert(tmp.drawable.width); 1122 assert(tmp.drawable.height); 1123 1124 DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n", 1125 __FUNCTION__, 1126 extents.x1, extents.y1, 1127 tmp.drawable.width, tmp.drawable.height, 1128 sna->render.max_3d_size, sna->render.max_3d_size)); 1129 if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { 1130 BoxRec tile, stack[64], *clipped, *c; 1131 int step; 1132 1133tile: 1134 step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel, 1135 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); 1136 while (step * step * 4 > sna->kgem.max_upload_tile_size) 1137 step /= 2; 1138 1139 DBG(("%s: tiling upload, using %dx%d tiles\n", 1140 __FUNCTION__, step, step)); 1141 assert(step); 1142 1143 if (n > ARRAY_SIZE(stack)) { 1144 clipped = malloc(sizeof(BoxRec) * n); 1145 if (clipped == NULL) 1146 goto fallback; 1147 } else 1148 clipped = stack; 1149 1150 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 1151 int y2 = tile.y1 + step; 1152 if (y2 > extents.y2) 1153 y2 = extents.y2; 1154 tile.y2 = y2; 1155 1156 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 1157 int x2 = tile.x1 + step; 1158 if (x2 > extents.x2) 1159 x2 = extents.x2; 1160 tile.x2 = x2; 1161 1162 tmp.drawable.width = tile.x2 - tile.x1; 1163 tmp.drawable.height = tile.y2 - tile.y1; 1164 1165 src_bo = kgem_create_buffer_2d(kgem, 1166 tmp.drawable.width, 1167 tmp.drawable.height, 1168 tmp.drawable.bitsPerPixel, 1169 KGEM_BUFFER_WRITE_INPLACE, 1170 &ptr); 1171 if (!src_bo) { 1172 if (clipped != stack) 1173 free(clipped); 1174 goto fallback; 1175 } 1176 1177 c = clipped; 1178 for (n = 0; n < nbox; n++) { 1179 *c = box[n]; 1180 if (!box_intersect(c, &tile)) 1181 continue; 1182 1183 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 1184 __FUNCTION__, 1185 c->x1, c->y1, 1186 c->x2, c->y2, 1187 src_dx, src_dy, 1188 c->x1 - tile.x1, 1189 c->y1 - tile.y1)); 1190 memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel, 1191 stride, src_bo->pitch, 1192 c->x1 + src_dx, 1193 c->y1 + src_dy, 1194 c->x1 - tile.x1, 1195 c->y1 - tile.y1, 1196 c->x2 - c->x1, 1197 c->y2 - c->y1, 1198 and, or); 1199 c++; 1200 } 1201 1202 if (c != clipped) 1203 n = sna->render.copy_boxes(sna, GXcopy, 1204 &tmp, src_bo, -tile.x1, -tile.y1, 1205 dst, dst_bo, dst_dx, dst_dy, 1206 clipped, c - clipped, 0); 1207 else 1208 n = 1; 1209 1210 kgem_bo_destroy(&sna->kgem, src_bo); 1211 1212 if (!n) { 1213 if (clipped != stack) 1214 free(clipped); 1215 goto fallback; 1216 } 1217 } 1218 } 1219 1220 if (clipped != stack) 1221 free(clipped); 1222 } else { 1223 src_bo = kgem_create_buffer_2d(kgem, 1224 tmp.drawable.width, 1225 tmp.drawable.height, 1226 tmp.drawable.bitsPerPixel, 1227 KGEM_BUFFER_WRITE_INPLACE, 1228 &ptr); 1229 if (!src_bo) 1230 goto fallback; 1231 1232 for (n = 0; n < nbox; n++) { 1233 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 1234 __FUNCTION__, 1235 box[n].x1, box[n].y1, 1236 box[n].x2, box[n].y2, 1237 src_dx, src_dy, 1238 box[n].x1 - extents.x1, 1239 box[n].y1 - extents.y1)); 1240 memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel, 1241 stride, src_bo->pitch, 1242 box[n].x1 + src_dx, 1243 box[n].y1 + src_dy, 1244 box[n].x1 - extents.x1, 1245 box[n].y1 - extents.y1, 1246 box[n].x2 - box[n].x1, 1247 box[n].y2 - box[n].y1, 1248 and, or); 1249 } 1250 1251 n = sna->render.copy_boxes(sna, GXcopy, 1252 &tmp, src_bo, -extents.x1, -extents.y1, 1253 dst, dst_bo, dst_dx, dst_dy, 1254 box, nbox, 0); 1255 1256 kgem_bo_destroy(&sna->kgem, src_bo); 1257 1258 if (!n) 1259 goto tile; 1260 } 1261 1262 return; 1263 } 1264 1265 cmd = XY_SRC_COPY_BLT_CMD; 1266 br13 = dst_bo->pitch; 1267 if (kgem->gen >= 040 && dst_bo->tiling) { 1268 cmd |= BLT_DST_TILED; 1269 br13 >>= 2; 1270 } 1271 br13 |= 0xcc << 16; 1272 switch (dst->drawable.bitsPerPixel) { 1273 default: 1274 case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 1275 br13 |= 1 << 25; /* RGB8888 */ 1276 case 16: br13 |= 1 << 24; /* RGB565 */ 1277 case 8: break; 1278 } 1279 1280 kgem_set_mode(kgem, KGEM_BLT, dst_bo); 1281 if (!kgem_check_batch(kgem, 8) || 1282 !kgem_check_reloc_and_exec(kgem, 2) || 1283 !kgem_check_bo_fenced(kgem, dst_bo)) { 1284 kgem_submit(kgem); 1285 if (!kgem_check_bo_fenced(kgem, dst_bo)) 1286 goto fallback; 1287 _kgem_set_mode(kgem, KGEM_BLT); 1288 } 1289 1290 do { 1291 int nbox_this_time; 1292 1293 nbox_this_time = nbox; 1294 if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) 1295 nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; 1296 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1297 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 1298 assert(nbox_this_time); 1299 nbox -= nbox_this_time; 1300 1301 /* Count the total number of bytes to be read and allocate a 1302 * single buffer large enough. Or if it is very small, combine 1303 * with other allocations. */ 1304 offset = 0; 1305 for (n = 0; n < nbox_this_time; n++) { 1306 int height = box[n].y2 - box[n].y1; 1307 int width = box[n].x2 - box[n].x1; 1308 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 1309 } 1310 1311 src_bo = kgem_create_buffer(kgem, offset, 1312 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 1313 &ptr); 1314 if (!src_bo) 1315 break; 1316 1317 offset = 0; 1318 do { 1319 int height = box->y2 - box->y1; 1320 int width = box->x2 - box->x1; 1321 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 1322 uint32_t *b; 1323 1324 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 1325 __FUNCTION__, 1326 box->x1 + src_dx, box->y1 + src_dy, 1327 box->x1 + dst_dx, box->y1 + dst_dy, 1328 width, height, 1329 offset, pitch)); 1330 1331 assert(box->x1 + src_dx >= 0); 1332 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 1333 assert(box->y1 + src_dy >= 0); 1334 1335 assert(box->x1 + dst_dx >= 0); 1336 assert(box->y1 + dst_dy >= 0); 1337 1338 memcpy_xor(src, (char *)ptr + offset, 1339 dst->drawable.bitsPerPixel, 1340 stride, pitch, 1341 box->x1 + src_dx, box->y1 + src_dy, 1342 0, 0, 1343 width, height, 1344 and, or); 1345 1346 b = kgem->batch + kgem->nbatch; 1347 b[0] = cmd; 1348 b[1] = br13; 1349 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 1350 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 1351 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1352 I915_GEM_DOMAIN_RENDER << 16 | 1353 I915_GEM_DOMAIN_RENDER | 1354 KGEM_RELOC_FENCED, 1355 0); 1356 b[5] = 0; 1357 b[6] = pitch; 1358 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1359 I915_GEM_DOMAIN_RENDER << 16 | 1360 KGEM_RELOC_FENCED, 1361 offset); 1362 kgem->nbatch += 8; 1363 1364 box++; 1365 offset += pitch * height; 1366 } while (--nbox_this_time); 1367 assert(offset == __kgem_buffer_size(src_bo)); 1368 1369 if (nbox) { 1370 _kgem_submit(kgem); 1371 _kgem_set_mode(kgem, KGEM_BLT); 1372 } 1373 1374 kgem_bo_destroy(kgem, src_bo); 1375 } while (nbox); 1376 1377 sna->blt_state.fill_bo = 0; 1378} 1379 1380static bool 1381indirect_replace(struct sna *sna, 1382 PixmapPtr pixmap, 1383 struct kgem_bo *bo, 1384 const void *src, int stride) 1385{ 1386 struct kgem *kgem = &sna->kgem; 1387 struct kgem_bo *src_bo; 1388 BoxRec box; 1389 void *ptr; 1390 bool ret; 1391 1392 DBG(("%s: size=%d vs %d\n", 1393 __FUNCTION__, 1394 (int)pixmap->devKind * pixmap->drawable.height >> 12, 1395 kgem->half_cpu_cache_pages)); 1396 1397 if ((int)pixmap->devKind * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages) 1398 return false; 1399 1400 if (!kgem_bo_can_blt(kgem, bo) && 1401 must_tile(sna, pixmap->drawable.width, pixmap->drawable.height)) 1402 return false; 1403 1404 src_bo = kgem_create_buffer_2d(kgem, 1405 pixmap->drawable.width, 1406 pixmap->drawable.height, 1407 pixmap->drawable.bitsPerPixel, 1408 KGEM_BUFFER_WRITE_INPLACE, 1409 &ptr); 1410 if (!src_bo) 1411 return false; 1412 1413 memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel, 1414 stride, src_bo->pitch, 1415 0, 0, 1416 0, 0, 1417 pixmap->drawable.width, 1418 pixmap->drawable.height); 1419 1420 box.x1 = box.y1 = 0; 1421 box.x2 = pixmap->drawable.width; 1422 box.y2 = pixmap->drawable.height; 1423 1424 ret = sna->render.copy_boxes(sna, GXcopy, 1425 pixmap, src_bo, 0, 0, 1426 pixmap, bo, 0, 0, 1427 &box, 1, 0); 1428 1429 kgem_bo_destroy(kgem, src_bo); 1430 1431 return ret; 1432} 1433 1434bool sna_replace(struct sna *sna, 1435 PixmapPtr pixmap, 1436 struct kgem_bo **_bo, 1437 const void *src, int stride) 1438{ 1439 struct kgem_bo *bo = *_bo; 1440 struct kgem *kgem = &sna->kgem; 1441 void *dst; 1442 1443 DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n", 1444 __FUNCTION__, bo->handle, 1445 pixmap->drawable.width, 1446 pixmap->drawable.height, 1447 pixmap->drawable.bitsPerPixel, 1448 bo->tiling, 1449 __kgem_bo_is_busy(kgem, bo))); 1450 1451 assert(!sna_pixmap(pixmap)->pinned); 1452 1453 kgem_bo_undo(kgem, bo); 1454 1455 if (__kgem_bo_is_busy(kgem, bo)) { 1456 struct kgem_bo *new_bo; 1457 1458 if (indirect_replace(sna, pixmap, bo, src, stride)) 1459 return true; 1460 1461 new_bo = kgem_create_2d(kgem, 1462 pixmap->drawable.width, 1463 pixmap->drawable.height, 1464 pixmap->drawable.bitsPerPixel, 1465 bo->tiling, 1466 CREATE_GTT_MAP | CREATE_INACTIVE); 1467 if (new_bo) 1468 bo = new_bo; 1469 } 1470 1471 if (bo->tiling == I915_TILING_NONE && bo->pitch == stride && 1472 kgem_bo_write(kgem, bo, src, 1473 (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8)) 1474 goto done; 1475 1476 if (upload_inplace__tiled(kgem, bo)) { 1477 BoxRec box; 1478 1479 box.x1 = box.y1 = 0; 1480 box.x2 = pixmap->drawable.width; 1481 box.y2 = pixmap->drawable.height; 1482 1483 if (write_boxes_inplace__tiled(kgem, src, 1484 stride, pixmap->drawable.bitsPerPixel, 0, 0, 1485 bo, 0, 0, &box, 1)) 1486 goto done; 1487 } 1488 1489 if (kgem_bo_is_mappable(kgem, bo) && 1490 (dst = kgem_bo_map(kgem, bo)) != NULL) { 1491 memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel, 1492 stride, bo->pitch, 1493 0, 0, 1494 0, 0, 1495 pixmap->drawable.width, 1496 pixmap->drawable.height); 1497 } else { 1498 BoxRec box; 1499 1500 box.x1 = box.y1 = 0; 1501 box.x2 = pixmap->drawable.width; 1502 box.y2 = pixmap->drawable.height; 1503 1504 if (!sna_write_boxes(sna, pixmap, 1505 bo, 0, 0, 1506 src, stride, 0, 0, 1507 &box, 1)) 1508 goto err; 1509 } 1510 1511done: 1512 if (bo != *_bo) 1513 kgem_bo_destroy(kgem, *_bo); 1514 *_bo = bo; 1515 return true; 1516 1517err: 1518 if (bo != *_bo) 1519 kgem_bo_destroy(kgem, bo); 1520 return false; 1521} 1522 1523struct kgem_bo *sna_replace__xor(struct sna *sna, 1524 PixmapPtr pixmap, 1525 struct kgem_bo *bo, 1526 const void *src, int stride, 1527 uint32_t and, uint32_t or) 1528{ 1529 struct kgem *kgem = &sna->kgem; 1530 void *dst; 1531 1532 DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n", 1533 __FUNCTION__, bo->handle, 1534 pixmap->drawable.width, 1535 pixmap->drawable.height, 1536 pixmap->drawable.bitsPerPixel, 1537 bo->tiling)); 1538 1539 assert(!sna_pixmap(pixmap)->pinned); 1540 1541 if (kgem_bo_is_busy(bo)) { 1542 struct kgem_bo *new_bo; 1543 1544 new_bo = kgem_create_2d(kgem, 1545 pixmap->drawable.width, 1546 pixmap->drawable.height, 1547 pixmap->drawable.bitsPerPixel, 1548 bo->tiling, 1549 CREATE_GTT_MAP | CREATE_INACTIVE); 1550 if (new_bo) { 1551 kgem_bo_destroy(kgem, bo); 1552 bo = new_bo; 1553 } 1554 } 1555 1556 if (kgem_bo_is_mappable(kgem, bo)) { 1557 dst = kgem_bo_map(kgem, bo); 1558 if (dst) { 1559 memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel, 1560 stride, bo->pitch, 1561 0, 0, 1562 0, 0, 1563 pixmap->drawable.width, 1564 pixmap->drawable.height, 1565 and, or); 1566 } 1567 } else { 1568 BoxRec box; 1569 1570 box.x1 = box.y1 = 0; 1571 box.x2 = pixmap->drawable.width; 1572 box.y2 = pixmap->drawable.height; 1573 1574 sna_write_boxes__xor(sna, pixmap, 1575 bo, 0, 0, 1576 src, stride, 0, 0, 1577 &box, 1, 1578 and, or); 1579 } 1580 1581 return bo; 1582} 1583