radv_pipeline_cache.c revision 01e04c3f
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/mesa-sha1.h" 25#include "util/debug.h" 26#include "util/disk_cache.h" 27#include "util/u_atomic.h" 28#include "radv_debug.h" 29#include "radv_private.h" 30#include "radv_shader.h" 31 32#include "ac_nir_to_llvm.h" 33 34struct cache_entry_variant_info { 35 struct radv_shader_variant_info variant_info; 36 struct ac_shader_config config; 37 uint32_t rsrc1, rsrc2; 38}; 39 40struct cache_entry { 41 union { 42 unsigned char sha1[20]; 43 uint32_t sha1_dw[5]; 44 }; 45 uint32_t code_sizes[MESA_SHADER_STAGES]; 46 struct radv_shader_variant *variants[MESA_SHADER_STAGES]; 47 char code[0]; 48}; 49 50void 51radv_pipeline_cache_init(struct radv_pipeline_cache *cache, 52 struct radv_device *device) 53{ 54 cache->device = device; 55 pthread_mutex_init(&cache->mutex, NULL); 56 57 cache->modified = false; 58 cache->kernel_count = 0; 59 cache->total_size = 0; 60 cache->table_size = 1024; 61 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 62 cache->hash_table = malloc(byte_size); 63 64 /* We don't consider allocation failure fatal, we just start with a 0-sized 65 * cache. Disable caching when we want to keep shader debug info, since 66 * we don't get the debug info on cached shaders. */ 67 if (cache->hash_table == NULL || 68 (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || 69 device->keep_shader_info) 70 cache->table_size = 0; 71 else 72 memset(cache->hash_table, 0, byte_size); 73} 74 75void 76radv_pipeline_cache_finish(struct radv_pipeline_cache *cache) 77{ 78 for (unsigned i = 0; i < cache->table_size; ++i) 79 if (cache->hash_table[i]) { 80 for(int j = 0; j < MESA_SHADER_STAGES; ++j) { 81 if (cache->hash_table[i]->variants[j]) 82 radv_shader_variant_destroy(cache->device, 83 cache->hash_table[i]->variants[j]); 84 } 85 vk_free(&cache->alloc, cache->hash_table[i]); 86 } 87 pthread_mutex_destroy(&cache->mutex); 88 free(cache->hash_table); 89} 90 91static uint32_t 92entry_size(struct cache_entry *entry) 93{ 94 size_t ret = sizeof(*entry); 95 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 96 if (entry->code_sizes[i]) 97 ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 98 return ret; 99} 100 101void 102radv_hash_shaders(unsigned char *hash, 103 const VkPipelineShaderStageCreateInfo **stages, 104 const struct radv_pipeline_layout *layout, 105 const struct radv_pipeline_key *key, 106 uint32_t flags) 107{ 108 struct mesa_sha1 ctx; 109 110 _mesa_sha1_init(&ctx); 111 if (key) 112 _mesa_sha1_update(&ctx, key, sizeof(*key)); 113 if (layout) 114 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 115 116 for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 117 if (stages[i]) { 118 RADV_FROM_HANDLE(radv_shader_module, module, stages[i]->module); 119 const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo; 120 121 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 122 _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName)); 123 if (spec_info) { 124 _mesa_sha1_update(&ctx, spec_info->pMapEntries, 125 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); 126 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); 127 } 128 } 129 } 130 _mesa_sha1_update(&ctx, &flags, 4); 131 _mesa_sha1_final(&ctx, hash); 132} 133 134 135static struct cache_entry * 136radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, 137 const unsigned char *sha1) 138{ 139 const uint32_t mask = cache->table_size - 1; 140 const uint32_t start = (*(uint32_t *) sha1); 141 142 if (cache->table_size == 0) 143 return NULL; 144 145 for (uint32_t i = 0; i < cache->table_size; i++) { 146 const uint32_t index = (start + i) & mask; 147 struct cache_entry *entry = cache->hash_table[index]; 148 149 if (!entry) 150 return NULL; 151 152 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 153 return entry; 154 } 155 } 156 157 unreachable("hash table should never be full"); 158} 159 160static struct cache_entry * 161radv_pipeline_cache_search(struct radv_pipeline_cache *cache, 162 const unsigned char *sha1) 163{ 164 struct cache_entry *entry; 165 166 pthread_mutex_lock(&cache->mutex); 167 168 entry = radv_pipeline_cache_search_unlocked(cache, sha1); 169 170 pthread_mutex_unlock(&cache->mutex); 171 172 return entry; 173} 174 175static void 176radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, 177 struct cache_entry *entry) 178{ 179 const uint32_t mask = cache->table_size - 1; 180 const uint32_t start = entry->sha1_dw[0]; 181 182 /* We'll always be able to insert when we get here. */ 183 assert(cache->kernel_count < cache->table_size / 2); 184 185 for (uint32_t i = 0; i < cache->table_size; i++) { 186 const uint32_t index = (start + i) & mask; 187 if (!cache->hash_table[index]) { 188 cache->hash_table[index] = entry; 189 break; 190 } 191 } 192 193 cache->total_size += entry_size(entry); 194 cache->kernel_count++; 195} 196 197 198static VkResult 199radv_pipeline_cache_grow(struct radv_pipeline_cache *cache) 200{ 201 const uint32_t table_size = cache->table_size * 2; 202 const uint32_t old_table_size = cache->table_size; 203 const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 204 struct cache_entry **table; 205 struct cache_entry **old_table = cache->hash_table; 206 207 table = malloc(byte_size); 208 if (table == NULL) 209 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 210 211 cache->hash_table = table; 212 cache->table_size = table_size; 213 cache->kernel_count = 0; 214 cache->total_size = 0; 215 216 memset(cache->hash_table, 0, byte_size); 217 for (uint32_t i = 0; i < old_table_size; i++) { 218 struct cache_entry *entry = old_table[i]; 219 if (!entry) 220 continue; 221 222 radv_pipeline_cache_set_entry(cache, entry); 223 } 224 225 free(old_table); 226 227 return VK_SUCCESS; 228} 229 230static void 231radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, 232 struct cache_entry *entry) 233{ 234 if (cache->kernel_count == cache->table_size / 2) 235 radv_pipeline_cache_grow(cache); 236 237 /* Failing to grow that hash table isn't fatal, but may mean we don't 238 * have enough space to add this new kernel. Only add it if there's room. 239 */ 240 if (cache->kernel_count < cache->table_size / 2) 241 radv_pipeline_cache_set_entry(cache, entry); 242} 243 244static bool 245radv_is_cache_disabled(struct radv_device *device) 246{ 247 /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with 248 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested. 249 */ 250 return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || 251 device->keep_shader_info; 252} 253 254bool 255radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, 256 struct radv_pipeline_cache *cache, 257 const unsigned char *sha1, 258 struct radv_shader_variant **variants) 259{ 260 struct cache_entry *entry; 261 262 if (!cache) 263 cache = device->mem_cache; 264 265 pthread_mutex_lock(&cache->mutex); 266 267 entry = radv_pipeline_cache_search_unlocked(cache, sha1); 268 269 if (!entry) { 270 /* Don't cache when we want debug info, since this isn't 271 * present in the cache. 272 */ 273 if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) { 274 pthread_mutex_unlock(&cache->mutex); 275 return false; 276 } 277 278 uint8_t disk_sha1[20]; 279 disk_cache_compute_key(device->physical_device->disk_cache, 280 sha1, 20, disk_sha1); 281 entry = (struct cache_entry *) 282 disk_cache_get(device->physical_device->disk_cache, 283 disk_sha1, NULL); 284 if (!entry) { 285 pthread_mutex_unlock(&cache->mutex); 286 return false; 287 } else { 288 size_t size = entry_size(entry); 289 struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8, 290 VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 291 if (!new_entry) { 292 free(entry); 293 pthread_mutex_unlock(&cache->mutex); 294 return false; 295 } 296 297 memcpy(new_entry, entry, entry_size(entry)); 298 free(entry); 299 entry = new_entry; 300 301 radv_pipeline_cache_add_entry(cache, new_entry); 302 } 303 } 304 305 char *p = entry->code; 306 for(int i = 0; i < MESA_SHADER_STAGES; ++i) { 307 if (!entry->variants[i] && entry->code_sizes[i]) { 308 struct radv_shader_variant *variant; 309 struct cache_entry_variant_info info; 310 311 variant = calloc(1, sizeof(struct radv_shader_variant)); 312 if (!variant) { 313 pthread_mutex_unlock(&cache->mutex); 314 return false; 315 } 316 317 memcpy(&info, p, sizeof(struct cache_entry_variant_info)); 318 p += sizeof(struct cache_entry_variant_info); 319 320 variant->config = info.config; 321 variant->info = info.variant_info; 322 variant->rsrc1 = info.rsrc1; 323 variant->rsrc2 = info.rsrc2; 324 variant->code_size = entry->code_sizes[i]; 325 variant->ref_count = 1; 326 327 void *ptr = radv_alloc_shader_memory(device, variant); 328 memcpy(ptr, p, entry->code_sizes[i]); 329 p += entry->code_sizes[i]; 330 331 entry->variants[i] = variant; 332 } else if (entry->code_sizes[i]) { 333 p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 334 } 335 336 } 337 338 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 339 if (entry->variants[i]) 340 p_atomic_inc(&entry->variants[i]->ref_count); 341 342 memcpy(variants, entry->variants, sizeof(entry->variants)); 343 pthread_mutex_unlock(&cache->mutex); 344 return true; 345} 346 347void 348radv_pipeline_cache_insert_shaders(struct radv_device *device, 349 struct radv_pipeline_cache *cache, 350 const unsigned char *sha1, 351 struct radv_shader_variant **variants, 352 const void *const *codes, 353 const unsigned *code_sizes) 354{ 355 if (!cache) 356 cache = device->mem_cache; 357 358 pthread_mutex_lock(&cache->mutex); 359 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); 360 if (entry) { 361 for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 362 if (entry->variants[i]) { 363 radv_shader_variant_destroy(cache->device, variants[i]); 364 variants[i] = entry->variants[i]; 365 } else { 366 entry->variants[i] = variants[i]; 367 } 368 if (variants[i]) 369 p_atomic_inc(&variants[i]->ref_count); 370 } 371 pthread_mutex_unlock(&cache->mutex); 372 return; 373 } 374 375 /* Don't cache when we want debug info, since this isn't 376 * present in the cache. 377 */ 378 if (radv_is_cache_disabled(device)) { 379 pthread_mutex_unlock(&cache->mutex); 380 return; 381 } 382 383 size_t size = sizeof(*entry); 384 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 385 if (variants[i]) 386 size += sizeof(struct cache_entry_variant_info) + code_sizes[i]; 387 388 389 entry = vk_alloc(&cache->alloc, size, 8, 390 VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 391 if (!entry) { 392 pthread_mutex_unlock(&cache->mutex); 393 return; 394 } 395 396 memset(entry, 0, sizeof(*entry)); 397 memcpy(entry->sha1, sha1, 20); 398 399 char* p = entry->code; 400 struct cache_entry_variant_info info; 401 memset(&info, 0, sizeof(info)); 402 403 for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 404 if (!variants[i]) 405 continue; 406 407 entry->code_sizes[i] = code_sizes[i]; 408 409 info.config = variants[i]->config; 410 info.variant_info = variants[i]->info; 411 info.rsrc1 = variants[i]->rsrc1; 412 info.rsrc2 = variants[i]->rsrc2; 413 memcpy(p, &info, sizeof(struct cache_entry_variant_info)); 414 p += sizeof(struct cache_entry_variant_info); 415 416 memcpy(p, codes[i], code_sizes[i]); 417 p += code_sizes[i]; 418 } 419 420 /* Always add cache items to disk. This will allow collection of 421 * compiled shaders by third parties such as steam, even if the app 422 * implements its own pipeline cache. 423 */ 424 if (device->physical_device->disk_cache) { 425 uint8_t disk_sha1[20]; 426 disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, 427 disk_sha1); 428 disk_cache_put(device->physical_device->disk_cache, 429 disk_sha1, entry, entry_size(entry), NULL); 430 } 431 432 /* We delay setting the variant so we have reproducible disk cache 433 * items. 434 */ 435 for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 436 if (!variants[i]) 437 continue; 438 439 entry->variants[i] = variants[i]; 440 p_atomic_inc(&variants[i]->ref_count); 441 } 442 443 radv_pipeline_cache_add_entry(cache, entry); 444 445 cache->modified = true; 446 pthread_mutex_unlock(&cache->mutex); 447 return; 448} 449 450struct cache_header { 451 uint32_t header_size; 452 uint32_t header_version; 453 uint32_t vendor_id; 454 uint32_t device_id; 455 uint8_t uuid[VK_UUID_SIZE]; 456}; 457 458bool 459radv_pipeline_cache_load(struct radv_pipeline_cache *cache, 460 const void *data, size_t size) 461{ 462 struct radv_device *device = cache->device; 463 struct cache_header header; 464 465 if (size < sizeof(header)) 466 return false; 467 memcpy(&header, data, sizeof(header)); 468 if (header.header_size < sizeof(header)) 469 return false; 470 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 471 return false; 472 if (header.vendor_id != ATI_VENDOR_ID) 473 return false; 474 if (header.device_id != device->physical_device->rad_info.pci_id) 475 return false; 476 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0) 477 return false; 478 479 char *end = (void *) data + size; 480 char *p = (void *) data + header.header_size; 481 482 while (end - p >= sizeof(struct cache_entry)) { 483 struct cache_entry *entry = (struct cache_entry*)p; 484 struct cache_entry *dest_entry; 485 size_t size = entry_size(entry); 486 if(end - p < size) 487 break; 488 489 dest_entry = vk_alloc(&cache->alloc, size, 490 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 491 if (dest_entry) { 492 memcpy(dest_entry, entry, size); 493 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 494 dest_entry->variants[i] = NULL; 495 radv_pipeline_cache_add_entry(cache, dest_entry); 496 } 497 p += size; 498 } 499 500 return true; 501} 502 503VkResult radv_CreatePipelineCache( 504 VkDevice _device, 505 const VkPipelineCacheCreateInfo* pCreateInfo, 506 const VkAllocationCallbacks* pAllocator, 507 VkPipelineCache* pPipelineCache) 508{ 509 RADV_FROM_HANDLE(radv_device, device, _device); 510 struct radv_pipeline_cache *cache; 511 512 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 513 assert(pCreateInfo->flags == 0); 514 515 cache = vk_alloc2(&device->alloc, pAllocator, 516 sizeof(*cache), 8, 517 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 518 if (cache == NULL) 519 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 520 521 if (pAllocator) 522 cache->alloc = *pAllocator; 523 else 524 cache->alloc = device->alloc; 525 526 radv_pipeline_cache_init(cache, device); 527 528 if (pCreateInfo->initialDataSize > 0) { 529 radv_pipeline_cache_load(cache, 530 pCreateInfo->pInitialData, 531 pCreateInfo->initialDataSize); 532 } 533 534 *pPipelineCache = radv_pipeline_cache_to_handle(cache); 535 536 return VK_SUCCESS; 537} 538 539void radv_DestroyPipelineCache( 540 VkDevice _device, 541 VkPipelineCache _cache, 542 const VkAllocationCallbacks* pAllocator) 543{ 544 RADV_FROM_HANDLE(radv_device, device, _device); 545 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 546 547 if (!cache) 548 return; 549 radv_pipeline_cache_finish(cache); 550 551 vk_free2(&device->alloc, pAllocator, cache); 552} 553 554VkResult radv_GetPipelineCacheData( 555 VkDevice _device, 556 VkPipelineCache _cache, 557 size_t* pDataSize, 558 void* pData) 559{ 560 RADV_FROM_HANDLE(radv_device, device, _device); 561 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 562 struct cache_header *header; 563 VkResult result = VK_SUCCESS; 564 565 pthread_mutex_lock(&cache->mutex); 566 567 const size_t size = sizeof(*header) + cache->total_size; 568 if (pData == NULL) { 569 pthread_mutex_unlock(&cache->mutex); 570 *pDataSize = size; 571 return VK_SUCCESS; 572 } 573 if (*pDataSize < sizeof(*header)) { 574 pthread_mutex_unlock(&cache->mutex); 575 *pDataSize = 0; 576 return VK_INCOMPLETE; 577 } 578 void *p = pData, *end = pData + *pDataSize; 579 header = p; 580 header->header_size = sizeof(*header); 581 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 582 header->vendor_id = ATI_VENDOR_ID; 583 header->device_id = device->physical_device->rad_info.pci_id; 584 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); 585 p += header->header_size; 586 587 struct cache_entry *entry; 588 for (uint32_t i = 0; i < cache->table_size; i++) { 589 if (!cache->hash_table[i]) 590 continue; 591 entry = cache->hash_table[i]; 592 const uint32_t size = entry_size(entry); 593 if (end < p + size) { 594 result = VK_INCOMPLETE; 595 break; 596 } 597 598 memcpy(p, entry, size); 599 for(int j = 0; j < MESA_SHADER_STAGES; ++j) 600 ((struct cache_entry*)p)->variants[j] = NULL; 601 p += size; 602 } 603 *pDataSize = p - pData; 604 605 pthread_mutex_unlock(&cache->mutex); 606 return result; 607} 608 609static void 610radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, 611 struct radv_pipeline_cache *src) 612{ 613 for (uint32_t i = 0; i < src->table_size; i++) { 614 struct cache_entry *entry = src->hash_table[i]; 615 if (!entry || radv_pipeline_cache_search(dst, entry->sha1)) 616 continue; 617 618 radv_pipeline_cache_add_entry(dst, entry); 619 620 src->hash_table[i] = NULL; 621 } 622} 623 624VkResult radv_MergePipelineCaches( 625 VkDevice _device, 626 VkPipelineCache destCache, 627 uint32_t srcCacheCount, 628 const VkPipelineCache* pSrcCaches) 629{ 630 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache); 631 632 for (uint32_t i = 0; i < srcCacheCount; i++) { 633 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]); 634 635 radv_pipeline_cache_merge(dst, src); 636 } 637 638 return VK_SUCCESS; 639} 640