1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a 3 * copy of this software and associated documentation files (the "Software"), 4 * to deal in the Software without restriction, including without limitation 5 * on the rights to use, copy, modify, merge, publish, distribute, sub 6 * license, and/or sell copies of the Software, and to permit persons to whom 7 * the Software is furnished to do so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice (including the next 10 * paragraph) shall be included in all copies or substantial portions of the 11 * Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * Authors: 22 * Adam Rak <adam.rak@streamnovation.com> 23 */ 24 25#include "pipe/p_defines.h" 26#include "pipe/p_state.h" 27#include "pipe/p_context.h" 28#include "util/u_blitter.h" 29#include "util/list.h" 30#include "util/u_transfer.h" 31#include "util/u_surface.h" 32#include "util/u_pack_color.h" 33#include "util/u_math.h" 34#include "util/u_memory.h" 35#include "util/u_inlines.h" 36#include "util/u_framebuffer.h" 37#include "r600_shader.h" 38#include "r600_pipe.h" 39#include "r600_formats.h" 40#include "compute_memory_pool.h" 41#include "evergreen_compute.h" 42#include "evergreen_compute_internal.h" 43#include <inttypes.h> 44 45#define ITEM_ALIGNMENT 1024 46 47/* A few forward declarations of static functions */ 48static void compute_memory_shadow(struct compute_memory_pool* pool, 49 struct pipe_context *pipe, int device_to_host); 50 51static void compute_memory_defrag(struct compute_memory_pool *pool, 52 struct pipe_resource *src, struct pipe_resource *dst, 53 struct pipe_context *pipe); 54 55static int compute_memory_promote_item(struct compute_memory_pool *pool, 56 struct compute_memory_item *item, struct pipe_context *pipe, 57 int64_t allocated); 58 59static void compute_memory_move_item(struct compute_memory_pool *pool, 60 struct pipe_resource *src, struct pipe_resource *dst, 61 struct compute_memory_item *item, uint64_t new_start_in_dw, 62 struct pipe_context *pipe); 63 64static void compute_memory_transfer(struct compute_memory_pool* pool, 65 struct pipe_context * pipe, int device_to_host, 66 struct compute_memory_item* chunk, void* data, 67 int offset_in_chunk, int size); 68 69/** 70 * Creates a new pool. 71 */ 72struct compute_memory_pool* compute_memory_pool_new( 73 struct r600_screen * rscreen) 74{ 75 struct compute_memory_pool* pool = (struct compute_memory_pool*) 76 CALLOC(sizeof(struct compute_memory_pool), 1); 77 if (!pool) 78 return NULL; 79 80 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n"); 81 82 pool->screen = rscreen; 83 pool->item_list = (struct list_head *) 84 CALLOC(sizeof(struct list_head), 1); 85 pool->unallocated_list = (struct list_head *) 86 CALLOC(sizeof(struct list_head), 1); 87 list_inithead(pool->item_list); 88 list_inithead(pool->unallocated_list); 89 return pool; 90} 91 92/** 93 * Initializes the pool with a size of \a initial_size_in_dw. 94 * \param pool The pool to be initialized. 95 * \param initial_size_in_dw The initial size. 96 * \see compute_memory_grow_defrag_pool 97 */ 98static void compute_memory_pool_init(struct compute_memory_pool * pool, 99 unsigned initial_size_in_dw) 100{ 101 102 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %u\n", 103 initial_size_in_dw); 104 105 pool->size_in_dw = initial_size_in_dw; 106 pool->bo = r600_compute_buffer_alloc_vram(pool->screen, 107 pool->size_in_dw * 4); 108} 109 110/** 111 * Frees all stuff in the pool and the pool struct itself too. 112 */ 113void compute_memory_pool_delete(struct compute_memory_pool* pool) 114{ 115 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n"); 116 free(pool->shadow); 117 r600_resource_reference(&pool->bo, NULL); 118 /* In theory, all of the items were freed in compute_memory_free. 119 * Just delete the list heads 120 */ 121 free(pool->item_list); 122 free(pool->unallocated_list); 123 /* And then the pool itself */ 124 free(pool); 125} 126 127/** 128 * Reallocates and defragments the pool, conserves data. 129 * \returns -1 if it fails, 0 otherwise 130 * \see compute_memory_finalize_pending 131 */ 132static int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, 133 struct pipe_context *pipe, int new_size_in_dw) 134{ 135 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); 136 137 COMPUTE_DBG(pool->screen, "* compute_memory_grow_defrag_pool() " 138 "new_size_in_dw = %d (%d bytes)\n", 139 new_size_in_dw, new_size_in_dw * 4); 140 141 assert(new_size_in_dw >= pool->size_in_dw); 142 143 if (!pool->bo) { 144 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16)); 145 } else { 146 struct r600_resource *temp = NULL; 147 148 temp = r600_compute_buffer_alloc_vram(pool->screen, new_size_in_dw * 4); 149 150 if (temp != NULL) { 151 struct pipe_resource *src = (struct pipe_resource *)pool->bo; 152 struct pipe_resource *dst = (struct pipe_resource *)temp; 153 154 COMPUTE_DBG(pool->screen, " Growing and defragmenting the pool " 155 "using a temporary resource\n"); 156 157 compute_memory_defrag(pool, src, dst, pipe); 158 159 /* Release the old buffer */ 160 r600_resource_reference(&pool->bo, NULL); 161 pool->bo = temp; 162 pool->size_in_dw = new_size_in_dw; 163 } 164 else { 165 COMPUTE_DBG(pool->screen, " The creation of the temporary resource failed\n" 166 " Falling back to using 'shadow'\n"); 167 168 compute_memory_shadow(pool, pipe, 1); 169 pool->shadow = realloc(pool->shadow, new_size_in_dw * 4); 170 if (pool->shadow == NULL) 171 return -1; 172 173 pool->size_in_dw = new_size_in_dw; 174 /* Release the old buffer */ 175 r600_resource_reference(&pool->bo, NULL); 176 pool->bo = r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4); 177 compute_memory_shadow(pool, pipe, 0); 178 179 if (pool->status & POOL_FRAGMENTED) { 180 struct pipe_resource *src = (struct pipe_resource *)pool->bo; 181 compute_memory_defrag(pool, src, src, pipe); 182 } 183 } 184 } 185 186 return 0; 187} 188 189/** 190 * Copy pool from device to host, or host to device. 191 * \param device_to_host 1 for device->host, 0 for host->device 192 * \see compute_memory_grow_defrag_pool 193 */ 194static void compute_memory_shadow(struct compute_memory_pool* pool, 195 struct pipe_context * pipe, int device_to_host) 196{ 197 struct compute_memory_item chunk; 198 199 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n", 200 device_to_host); 201 202 chunk.id = 0; 203 chunk.start_in_dw = 0; 204 chunk.size_in_dw = pool->size_in_dw; 205 compute_memory_transfer(pool, pipe, device_to_host, &chunk, 206 pool->shadow, 0, pool->size_in_dw*4); 207} 208 209/** 210 * Moves all the items marked for promotion from the \a unallocated_list 211 * to the \a item_list. 212 * \return -1 if it fails, 0 otherwise 213 * \see evergreen_set_global_binding 214 */ 215int compute_memory_finalize_pending(struct compute_memory_pool* pool, 216 struct pipe_context * pipe) 217{ 218 struct compute_memory_item *item, *next; 219 220 int64_t allocated = 0; 221 int64_t unallocated = 0; 222 int64_t last_pos; 223 224 int err = 0; 225 226 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n"); 227 228 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { 229 COMPUTE_DBG(pool->screen, " + list: offset = %"PRIi64" id = %"PRIi64" size = %"PRIi64" " 230 "(%"PRIi64" bytes)\n", item->start_in_dw, item->id, 231 item->size_in_dw, item->size_in_dw * 4); 232 } 233 234 /* Calculate the total allocated size */ 235 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { 236 allocated += align(item->size_in_dw, ITEM_ALIGNMENT); 237 } 238 239 /* Calculate the total unallocated size of the items that 240 * will be promoted to the pool */ 241 LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) { 242 if (item->status & ITEM_FOR_PROMOTING) 243 unallocated += align(item->size_in_dw, ITEM_ALIGNMENT); 244 } 245 246 if (unallocated == 0) { 247 return 0; 248 } 249 250 if (pool->size_in_dw < allocated + unallocated) { 251 err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated); 252 if (err == -1) 253 return -1; 254 } 255 else if (pool->status & POOL_FRAGMENTED) { 256 struct pipe_resource *src = (struct pipe_resource *)pool->bo; 257 compute_memory_defrag(pool, src, src, pipe); 258 } 259 260 /* After defragmenting the pool, allocated is equal to the first available 261 * position for new items in the pool */ 262 last_pos = allocated; 263 264 /* Loop through all the unallocated items, check if they are marked 265 * for promoting, allocate space for them and add them to the item_list. */ 266 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) { 267 if (item->status & ITEM_FOR_PROMOTING) { 268 err = compute_memory_promote_item(pool, item, pipe, last_pos); 269 item->status &= ~ITEM_FOR_PROMOTING; 270 271 last_pos += align(item->size_in_dw, ITEM_ALIGNMENT); 272 273 if (err == -1) 274 return -1; 275 } 276 } 277 278 return 0; 279} 280 281/** 282 * Defragments the pool, so that there's no gap between items. 283 * \param pool The pool to be defragmented 284 * \param src The origin resource 285 * \param dst The destination resource 286 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending 287 */ 288static void compute_memory_defrag(struct compute_memory_pool *pool, 289 struct pipe_resource *src, struct pipe_resource *dst, 290 struct pipe_context *pipe) 291{ 292 struct compute_memory_item *item; 293 int64_t last_pos; 294 295 COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n"); 296 297 last_pos = 0; 298 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { 299 if (src != dst || item->start_in_dw != last_pos) { 300 assert(last_pos <= item->start_in_dw); 301 302 compute_memory_move_item(pool, src, dst, 303 item, last_pos, pipe); 304 } 305 306 last_pos += align(item->size_in_dw, ITEM_ALIGNMENT); 307 } 308 309 pool->status &= ~POOL_FRAGMENTED; 310} 311 312/** 313 * Moves an item from the \a unallocated_list to the \a item_list. 314 * \param item The item that will be promoted. 315 * \return -1 if it fails, 0 otherwise 316 * \see compute_memory_finalize_pending 317 */ 318static int compute_memory_promote_item(struct compute_memory_pool *pool, 319 struct compute_memory_item *item, struct pipe_context *pipe, 320 int64_t start_in_dw) 321{ 322 struct pipe_screen *screen = (struct pipe_screen *)pool->screen; 323 struct r600_context *rctx = (struct r600_context *)pipe; 324 struct pipe_resource *src = (struct pipe_resource *)item->real_buffer; 325 struct pipe_resource *dst = (struct pipe_resource *)pool->bo; 326 struct pipe_box box; 327 328 COMPUTE_DBG(pool->screen, "* compute_memory_promote_item()\n" 329 " + Promoting Item: %"PRIi64" , starting at: %"PRIi64" (%"PRIi64" bytes) " 330 "size: %"PRIi64" (%"PRIi64" bytes)\n\t\t\tnew start: %"PRIi64" (%"PRIi64" bytes)\n", 331 item->id, item->start_in_dw, item->start_in_dw * 4, 332 item->size_in_dw, item->size_in_dw * 4, 333 start_in_dw, start_in_dw * 4); 334 335 /* Remove the item from the unallocated list */ 336 list_del(&item->link); 337 338 /* Add it back to the item_list */ 339 list_addtail(&item->link, pool->item_list); 340 item->start_in_dw = start_in_dw; 341 342 if (src) { 343 u_box_1d(0, item->size_in_dw * 4, &box); 344 345 rctx->b.b.resource_copy_region(pipe, 346 dst, 0, item->start_in_dw * 4, 0 ,0, 347 src, 0, &box); 348 349 /* We check if the item is mapped for reading. 350 * In this case, we need to keep the temporary buffer 'alive' 351 * because it is possible to keep a map active for reading 352 * while a kernel (that reads from it) executes */ 353 if (!(item->status & ITEM_MAPPED_FOR_READING)) { 354 pool->screen->b.b.resource_destroy(screen, src); 355 item->real_buffer = NULL; 356 } 357 } 358 359 return 0; 360} 361 362/** 363 * Moves an item from the \a item_list to the \a unallocated_list. 364 * \param item The item that will be demoted 365 * \see r600_compute_global_transfer_map 366 */ 367void compute_memory_demote_item(struct compute_memory_pool *pool, 368 struct compute_memory_item *item, struct pipe_context *pipe) 369{ 370 struct r600_context *rctx = (struct r600_context *)pipe; 371 struct pipe_resource *src = (struct pipe_resource *)pool->bo; 372 struct pipe_resource *dst; 373 struct pipe_box box; 374 375 COMPUTE_DBG(pool->screen, "* compute_memory_demote_item()\n" 376 " + Demoting Item: %"PRIi64", starting at: %"PRIi64" (%"PRIi64" bytes) " 377 "size: %"PRIi64" (%"PRIi64" bytes)\n", item->id, item->start_in_dw, 378 item->start_in_dw * 4, item->size_in_dw, item->size_in_dw * 4); 379 380 /* First, we remove the item from the item_list */ 381 list_del(&item->link); 382 383 /* Now we add it to the unallocated list */ 384 list_addtail(&item->link, pool->unallocated_list); 385 386 /* We check if the intermediate buffer exists, and if it 387 * doesn't, we create it again */ 388 if (item->real_buffer == NULL) { 389 item->real_buffer = r600_compute_buffer_alloc_vram( 390 pool->screen, item->size_in_dw * 4); 391 } 392 393 dst = (struct pipe_resource *)item->real_buffer; 394 395 /* We transfer the memory from the item in the pool to the 396 * temporary buffer */ 397 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box); 398 399 rctx->b.b.resource_copy_region(pipe, 400 dst, 0, 0, 0, 0, 401 src, 0, &box); 402 403 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */ 404 item->start_in_dw = -1; 405 406 if (item->link.next != pool->item_list) { 407 pool->status |= POOL_FRAGMENTED; 408 } 409} 410 411/** 412 * Moves the item \a item forward from the resource \a src to the 413 * resource \a dst at \a new_start_in_dw 414 * 415 * This function assumes two things: 416 * 1) The item is \b only moved forward, unless src is different from dst 417 * 2) The item \b won't change it's position inside the \a item_list 418 * 419 * \param item The item that will be moved 420 * \param new_start_in_dw The new position of the item in \a item_list 421 * \see compute_memory_defrag 422 */ 423static void compute_memory_move_item(struct compute_memory_pool *pool, 424 struct pipe_resource *src, struct pipe_resource *dst, 425 struct compute_memory_item *item, uint64_t new_start_in_dw, 426 struct pipe_context *pipe) 427{ 428 struct pipe_screen *screen = (struct pipe_screen *)pool->screen; 429 struct r600_context *rctx = (struct r600_context *)pipe; 430 struct pipe_box box; 431 432 COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n" 433 " + Moving item %"PRIi64" from %"PRIi64" (%"PRIi64" bytes) to %"PRIu64" (%"PRIu64" bytes)\n", 434 item->id, item->start_in_dw, item->start_in_dw * 4, 435 new_start_in_dw, new_start_in_dw * 4); 436 437 if (pool->item_list != item->link.prev) { 438 ASSERTED struct compute_memory_item *prev; 439 prev = container_of(item->link.prev, struct compute_memory_item, link); 440 assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw); 441 } 442 443 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box); 444 445 /* If the ranges don't overlap, or we are copying from one resource 446 * to another, we can just copy the item directly */ 447 if (src != dst || new_start_in_dw + item->size_in_dw <= item->start_in_dw) { 448 449 rctx->b.b.resource_copy_region(pipe, 450 dst, 0, new_start_in_dw * 4, 0, 0, 451 src, 0, &box); 452 } else { 453 /* The ranges overlap, we will try first to use an intermediate 454 * resource to move the item */ 455 struct pipe_resource *tmp = (struct pipe_resource *) 456 r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4); 457 458 if (tmp != NULL) { 459 rctx->b.b.resource_copy_region(pipe, 460 tmp, 0, 0, 0, 0, 461 src, 0, &box); 462 463 box.x = 0; 464 465 rctx->b.b.resource_copy_region(pipe, 466 dst, 0, new_start_in_dw * 4, 0, 0, 467 tmp, 0, &box); 468 469 pool->screen->b.b.resource_destroy(screen, tmp); 470 471 } else { 472 /* The allocation of the temporary resource failed, 473 * falling back to use mappings */ 474 uint32_t *map; 475 int64_t offset; 476 struct pipe_transfer *trans; 477 478 offset = item->start_in_dw - new_start_in_dw; 479 480 u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box); 481 482 map = pipe->buffer_map(pipe, src, 0, PIPE_MAP_READ_WRITE, 483 &box, &trans); 484 485 assert(map); 486 assert(trans); 487 488 memmove(map, map + offset, item->size_in_dw * 4); 489 490 pipe->buffer_unmap(pipe, trans); 491 } 492 } 493 494 item->start_in_dw = new_start_in_dw; 495} 496 497/** 498 * Frees the memory associated to the item with id \a id from the pool. 499 * \param id The id of the item to be freed. 500 */ 501void compute_memory_free(struct compute_memory_pool* pool, int64_t id) 502{ 503 struct compute_memory_item *item, *next; 504 struct pipe_screen *screen = (struct pipe_screen *)pool->screen; 505 struct pipe_resource *res; 506 507 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id); 508 509 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) { 510 511 if (item->id == id) { 512 513 if (item->link.next != pool->item_list) { 514 pool->status |= POOL_FRAGMENTED; 515 } 516 517 list_del(&item->link); 518 519 if (item->real_buffer) { 520 res = (struct pipe_resource *)item->real_buffer; 521 pool->screen->b.b.resource_destroy( 522 screen, res); 523 } 524 525 free(item); 526 527 return; 528 } 529 } 530 531 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) { 532 533 if (item->id == id) { 534 list_del(&item->link); 535 536 if (item->real_buffer) { 537 res = (struct pipe_resource *)item->real_buffer; 538 pool->screen->b.b.resource_destroy( 539 screen, res); 540 } 541 542 free(item); 543 544 return; 545 } 546 } 547 548 fprintf(stderr, "Internal error, invalid id %"PRIi64" " 549 "for compute_memory_free\n", id); 550 551 assert(0 && "error"); 552} 553 554/** 555 * Creates pending allocations for new items, these items are 556 * placed in the unallocated_list. 557 * \param size_in_dw The size, in double words, of the new item. 558 * \return The new item 559 * \see r600_compute_global_buffer_create 560 */ 561struct compute_memory_item* compute_memory_alloc( 562 struct compute_memory_pool* pool, 563 int64_t size_in_dw) 564{ 565 struct compute_memory_item *new_item = NULL; 566 567 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n", 568 size_in_dw, 4 * size_in_dw); 569 570 new_item = (struct compute_memory_item *) 571 CALLOC(sizeof(struct compute_memory_item), 1); 572 if (!new_item) 573 return NULL; 574 575 new_item->size_in_dw = size_in_dw; 576 new_item->start_in_dw = -1; /* mark pending */ 577 new_item->id = pool->next_id++; 578 new_item->pool = pool; 579 new_item->real_buffer = NULL; 580 581 list_addtail(&new_item->link, pool->unallocated_list); 582 583 COMPUTE_DBG(pool->screen, " + Adding item %p id = %"PRIi64" size = %"PRIi64" (%"PRIi64" bytes)\n", 584 new_item, new_item->id, new_item->size_in_dw, 585 new_item->size_in_dw * 4); 586 return new_item; 587} 588 589/** 590 * Transfer data host<->device, offset and size is in bytes. 591 * \param device_to_host 1 for device->host, 0 for host->device. 592 * \see compute_memory_shadow 593 */ 594static void compute_memory_transfer( 595 struct compute_memory_pool* pool, 596 struct pipe_context * pipe, 597 int device_to_host, 598 struct compute_memory_item* chunk, 599 void* data, 600 int offset_in_chunk, 601 int size) 602{ 603 int64_t aligned_size = pool->size_in_dw; 604 struct pipe_resource* gart = (struct pipe_resource*)pool->bo; 605 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk; 606 607 struct pipe_transfer *xfer; 608 uint32_t *map; 609 610 assert(gart); 611 612 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, " 613 "offset_in_chunk = %d, size = %d\n", device_to_host, 614 offset_in_chunk, size); 615 616 if (device_to_host) { 617 map = pipe->buffer_map(pipe, gart, 0, PIPE_MAP_READ, 618 &(struct pipe_box) { .width = aligned_size * 4, 619 .height = 1, .depth = 1 }, &xfer); 620 assert(xfer); 621 assert(map); 622 memcpy(data, map + internal_offset, size); 623 pipe->buffer_unmap(pipe, xfer); 624 } else { 625 map = pipe->buffer_map(pipe, gart, 0, PIPE_MAP_WRITE, 626 &(struct pipe_box) { .width = aligned_size * 4, 627 .height = 1, .depth = 1 }, &xfer); 628 assert(xfer); 629 assert(map); 630 memcpy(map + internal_offset, data, size); 631 pipe->buffer_unmap(pipe, xfer); 632 } 633} 634