1361fc4cbSmaya/* 2361fc4cbSmaya * Copyright © 2015 Intel Corporation 3361fc4cbSmaya * 4361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a 5361fc4cbSmaya * copy of this software and associated documentation files (the "Software"), 6361fc4cbSmaya * to deal in the Software without restriction, including without limitation 7361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the 9361fc4cbSmaya * Software is furnished to do so, subject to the following conditions: 10361fc4cbSmaya * 11361fc4cbSmaya * The above copyright notice and this permission notice (including the next 12361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the 13361fc4cbSmaya * Software. 14361fc4cbSmaya * 15361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20361fc4cbSmaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21361fc4cbSmaya * DEALINGS IN THE SOFTWARE. 22361fc4cbSmaya */ 23361fc4cbSmaya 24361fc4cbSmaya#include "tu_private.h" 25361fc4cbSmaya 26361fc4cbSmaya#include "util/debug.h" 27361fc4cbSmaya#include "util/disk_cache.h" 28361fc4cbSmaya#include "util/mesa-sha1.h" 29361fc4cbSmaya#include "util/u_atomic.h" 307ec681f3Smrg#include "vulkan/util/vk_util.h" 31361fc4cbSmaya 32361fc4cbSmayastruct cache_entry_variant_info 33361fc4cbSmaya{ 34361fc4cbSmaya}; 35361fc4cbSmaya 36361fc4cbSmayastruct cache_entry 37361fc4cbSmaya{ 38361fc4cbSmaya union { 39361fc4cbSmaya unsigned char sha1[20]; 40361fc4cbSmaya uint32_t sha1_dw[5]; 41361fc4cbSmaya }; 42361fc4cbSmaya uint32_t code_sizes[MESA_SHADER_STAGES]; 43361fc4cbSmaya struct tu_shader_variant *variants[MESA_SHADER_STAGES]; 44361fc4cbSmaya char code[0]; 45361fc4cbSmaya}; 46361fc4cbSmaya 477ec681f3Smrgstatic void 48361fc4cbSmayatu_pipeline_cache_init(struct tu_pipeline_cache *cache, 49361fc4cbSmaya struct tu_device *device) 50361fc4cbSmaya{ 51361fc4cbSmaya cache->device = device; 52361fc4cbSmaya pthread_mutex_init(&cache->mutex, NULL); 53361fc4cbSmaya 54361fc4cbSmaya cache->modified = false; 55361fc4cbSmaya cache->kernel_count = 0; 56361fc4cbSmaya cache->total_size = 0; 57361fc4cbSmaya cache->table_size = 1024; 58361fc4cbSmaya const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 59361fc4cbSmaya cache->hash_table = malloc(byte_size); 60361fc4cbSmaya 61361fc4cbSmaya /* We don't consider allocation failure fatal, we just start with a 0-sized 62361fc4cbSmaya * cache. Disable caching when we want to keep shader debug info, since 63361fc4cbSmaya * we don't get the debug info on cached shaders. */ 64361fc4cbSmaya if (cache->hash_table == NULL) 65361fc4cbSmaya cache->table_size = 0; 66361fc4cbSmaya else 67361fc4cbSmaya memset(cache->hash_table, 0, byte_size); 68361fc4cbSmaya} 69361fc4cbSmaya 707ec681f3Smrgstatic void 71361fc4cbSmayatu_pipeline_cache_finish(struct tu_pipeline_cache *cache) 72361fc4cbSmaya{ 73361fc4cbSmaya for (unsigned i = 0; i < cache->table_size; ++i) 74361fc4cbSmaya if (cache->hash_table[i]) { 75361fc4cbSmaya vk_free(&cache->alloc, cache->hash_table[i]); 76361fc4cbSmaya } 77361fc4cbSmaya pthread_mutex_destroy(&cache->mutex); 78361fc4cbSmaya free(cache->hash_table); 79361fc4cbSmaya} 80361fc4cbSmaya 81361fc4cbSmayastatic uint32_t 82361fc4cbSmayaentry_size(struct cache_entry *entry) 83361fc4cbSmaya{ 84361fc4cbSmaya size_t ret = sizeof(*entry); 85361fc4cbSmaya for (int i = 0; i < MESA_SHADER_STAGES; ++i) 86361fc4cbSmaya if (entry->code_sizes[i]) 87361fc4cbSmaya ret += 88361fc4cbSmaya sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 89361fc4cbSmaya return ret; 90361fc4cbSmaya} 91361fc4cbSmaya 92361fc4cbSmayastatic struct cache_entry * 93361fc4cbSmayatu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, 94361fc4cbSmaya const unsigned char *sha1) 95361fc4cbSmaya{ 96361fc4cbSmaya const uint32_t mask = cache->table_size - 1; 97361fc4cbSmaya const uint32_t start = (*(uint32_t *) sha1); 98361fc4cbSmaya 99361fc4cbSmaya if (cache->table_size == 0) 100361fc4cbSmaya return NULL; 101361fc4cbSmaya 102361fc4cbSmaya for (uint32_t i = 0; i < cache->table_size; i++) { 103361fc4cbSmaya const uint32_t index = (start + i) & mask; 104361fc4cbSmaya struct cache_entry *entry = cache->hash_table[index]; 105361fc4cbSmaya 106361fc4cbSmaya if (!entry) 107361fc4cbSmaya return NULL; 108361fc4cbSmaya 109361fc4cbSmaya if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 110361fc4cbSmaya return entry; 111361fc4cbSmaya } 112361fc4cbSmaya } 113361fc4cbSmaya 114361fc4cbSmaya unreachable("hash table should never be full"); 115361fc4cbSmaya} 116361fc4cbSmaya 117361fc4cbSmayastatic struct cache_entry * 118361fc4cbSmayatu_pipeline_cache_search(struct tu_pipeline_cache *cache, 119361fc4cbSmaya const unsigned char *sha1) 120361fc4cbSmaya{ 121361fc4cbSmaya struct cache_entry *entry; 122361fc4cbSmaya 123361fc4cbSmaya pthread_mutex_lock(&cache->mutex); 124361fc4cbSmaya 125361fc4cbSmaya entry = tu_pipeline_cache_search_unlocked(cache, sha1); 126361fc4cbSmaya 127361fc4cbSmaya pthread_mutex_unlock(&cache->mutex); 128361fc4cbSmaya 129361fc4cbSmaya return entry; 130361fc4cbSmaya} 131361fc4cbSmaya 132361fc4cbSmayastatic void 133361fc4cbSmayatu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, 134361fc4cbSmaya struct cache_entry *entry) 135361fc4cbSmaya{ 136361fc4cbSmaya const uint32_t mask = cache->table_size - 1; 137361fc4cbSmaya const uint32_t start = entry->sha1_dw[0]; 138361fc4cbSmaya 139361fc4cbSmaya /* We'll always be able to insert when we get here. */ 140361fc4cbSmaya assert(cache->kernel_count < cache->table_size / 2); 141361fc4cbSmaya 142361fc4cbSmaya for (uint32_t i = 0; i < cache->table_size; i++) { 143361fc4cbSmaya const uint32_t index = (start + i) & mask; 144361fc4cbSmaya if (!cache->hash_table[index]) { 145361fc4cbSmaya cache->hash_table[index] = entry; 146361fc4cbSmaya break; 147361fc4cbSmaya } 148361fc4cbSmaya } 149361fc4cbSmaya 150361fc4cbSmaya cache->total_size += entry_size(entry); 151361fc4cbSmaya cache->kernel_count++; 152361fc4cbSmaya} 153361fc4cbSmaya 154361fc4cbSmayastatic VkResult 155361fc4cbSmayatu_pipeline_cache_grow(struct tu_pipeline_cache *cache) 156361fc4cbSmaya{ 157361fc4cbSmaya const uint32_t table_size = cache->table_size * 2; 158361fc4cbSmaya const uint32_t old_table_size = cache->table_size; 159361fc4cbSmaya const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 160361fc4cbSmaya struct cache_entry **table; 161361fc4cbSmaya struct cache_entry **old_table = cache->hash_table; 162361fc4cbSmaya 163361fc4cbSmaya table = malloc(byte_size); 164361fc4cbSmaya if (table == NULL) 1657ec681f3Smrg return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY); 166361fc4cbSmaya 167361fc4cbSmaya cache->hash_table = table; 168361fc4cbSmaya cache->table_size = table_size; 169361fc4cbSmaya cache->kernel_count = 0; 170361fc4cbSmaya cache->total_size = 0; 171361fc4cbSmaya 172361fc4cbSmaya memset(cache->hash_table, 0, byte_size); 173361fc4cbSmaya for (uint32_t i = 0; i < old_table_size; i++) { 174361fc4cbSmaya struct cache_entry *entry = old_table[i]; 175361fc4cbSmaya if (!entry) 176361fc4cbSmaya continue; 177361fc4cbSmaya 178361fc4cbSmaya tu_pipeline_cache_set_entry(cache, entry); 179361fc4cbSmaya } 180361fc4cbSmaya 181361fc4cbSmaya free(old_table); 182361fc4cbSmaya 183361fc4cbSmaya return VK_SUCCESS; 184361fc4cbSmaya} 185361fc4cbSmaya 186361fc4cbSmayastatic void 187361fc4cbSmayatu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, 188361fc4cbSmaya struct cache_entry *entry) 189361fc4cbSmaya{ 190361fc4cbSmaya if (cache->kernel_count == cache->table_size / 2) 191361fc4cbSmaya tu_pipeline_cache_grow(cache); 192361fc4cbSmaya 193361fc4cbSmaya /* Failing to grow that hash table isn't fatal, but may mean we don't 194361fc4cbSmaya * have enough space to add this new kernel. Only add it if there's room. 195361fc4cbSmaya */ 196361fc4cbSmaya if (cache->kernel_count < cache->table_size / 2) 197361fc4cbSmaya tu_pipeline_cache_set_entry(cache, entry); 198361fc4cbSmaya} 199361fc4cbSmaya 2007ec681f3Smrgstatic void 201361fc4cbSmayatu_pipeline_cache_load(struct tu_pipeline_cache *cache, 202361fc4cbSmaya const void *data, 203361fc4cbSmaya size_t size) 204361fc4cbSmaya{ 205361fc4cbSmaya struct tu_device *device = cache->device; 2067ec681f3Smrg struct vk_pipeline_cache_header header; 207361fc4cbSmaya 208361fc4cbSmaya if (size < sizeof(header)) 209361fc4cbSmaya return; 210361fc4cbSmaya memcpy(&header, data, sizeof(header)); 211361fc4cbSmaya if (header.header_size < sizeof(header)) 212361fc4cbSmaya return; 213361fc4cbSmaya if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 214361fc4cbSmaya return; 2157ec681f3Smrg if (header.vendor_id != 0x5143) 216361fc4cbSmaya return; 2177ec681f3Smrg if (header.device_id != device->physical_device->dev_id.chip_id) 218361fc4cbSmaya return; 219361fc4cbSmaya if (memcmp(header.uuid, device->physical_device->cache_uuid, 220361fc4cbSmaya VK_UUID_SIZE) != 0) 221361fc4cbSmaya return; 222361fc4cbSmaya 223361fc4cbSmaya char *end = (void *) data + size; 224361fc4cbSmaya char *p = (void *) data + header.header_size; 225361fc4cbSmaya 226361fc4cbSmaya while (end - p >= sizeof(struct cache_entry)) { 227361fc4cbSmaya struct cache_entry *entry = (struct cache_entry *) p; 228361fc4cbSmaya struct cache_entry *dest_entry; 229361fc4cbSmaya size_t size = entry_size(entry); 230361fc4cbSmaya if (end - p < size) 231361fc4cbSmaya break; 232361fc4cbSmaya 233361fc4cbSmaya dest_entry = 234361fc4cbSmaya vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 235361fc4cbSmaya if (dest_entry) { 236361fc4cbSmaya memcpy(dest_entry, entry, size); 237361fc4cbSmaya for (int i = 0; i < MESA_SHADER_STAGES; ++i) 238361fc4cbSmaya dest_entry->variants[i] = NULL; 239361fc4cbSmaya tu_pipeline_cache_add_entry(cache, dest_entry); 240361fc4cbSmaya } 241361fc4cbSmaya p += size; 242361fc4cbSmaya } 243361fc4cbSmaya} 244361fc4cbSmaya 2457ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 246361fc4cbSmayatu_CreatePipelineCache(VkDevice _device, 247361fc4cbSmaya const VkPipelineCacheCreateInfo *pCreateInfo, 248361fc4cbSmaya const VkAllocationCallbacks *pAllocator, 249361fc4cbSmaya VkPipelineCache *pPipelineCache) 250361fc4cbSmaya{ 251361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 252361fc4cbSmaya struct tu_pipeline_cache *cache; 253361fc4cbSmaya 254361fc4cbSmaya assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 255361fc4cbSmaya assert(pCreateInfo->flags == 0); 256361fc4cbSmaya 2577ec681f3Smrg cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache), 2587ec681f3Smrg VK_OBJECT_TYPE_PIPELINE_CACHE); 259361fc4cbSmaya if (cache == NULL) 2607ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 261361fc4cbSmaya 262361fc4cbSmaya if (pAllocator) 263361fc4cbSmaya cache->alloc = *pAllocator; 264361fc4cbSmaya else 2657ec681f3Smrg cache->alloc = device->vk.alloc; 266361fc4cbSmaya 267361fc4cbSmaya tu_pipeline_cache_init(cache, device); 268361fc4cbSmaya 269361fc4cbSmaya if (pCreateInfo->initialDataSize > 0) { 270361fc4cbSmaya tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, 271361fc4cbSmaya pCreateInfo->initialDataSize); 272361fc4cbSmaya } 273361fc4cbSmaya 274361fc4cbSmaya *pPipelineCache = tu_pipeline_cache_to_handle(cache); 275361fc4cbSmaya 276361fc4cbSmaya return VK_SUCCESS; 277361fc4cbSmaya} 278361fc4cbSmaya 2797ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 280361fc4cbSmayatu_DestroyPipelineCache(VkDevice _device, 281361fc4cbSmaya VkPipelineCache _cache, 282361fc4cbSmaya const VkAllocationCallbacks *pAllocator) 283361fc4cbSmaya{ 284361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 285361fc4cbSmaya TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 286361fc4cbSmaya 287361fc4cbSmaya if (!cache) 288361fc4cbSmaya return; 289361fc4cbSmaya tu_pipeline_cache_finish(cache); 290361fc4cbSmaya 2917ec681f3Smrg vk_object_free(&device->vk, pAllocator, cache); 292361fc4cbSmaya} 293361fc4cbSmaya 2947ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 295361fc4cbSmayatu_GetPipelineCacheData(VkDevice _device, 296361fc4cbSmaya VkPipelineCache _cache, 297361fc4cbSmaya size_t *pDataSize, 298361fc4cbSmaya void *pData) 299361fc4cbSmaya{ 300361fc4cbSmaya TU_FROM_HANDLE(tu_device, device, _device); 301361fc4cbSmaya TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 3027ec681f3Smrg struct vk_pipeline_cache_header *header; 303361fc4cbSmaya VkResult result = VK_SUCCESS; 304361fc4cbSmaya 305361fc4cbSmaya pthread_mutex_lock(&cache->mutex); 306361fc4cbSmaya 307361fc4cbSmaya const size_t size = sizeof(*header) + cache->total_size; 308361fc4cbSmaya if (pData == NULL) { 309361fc4cbSmaya pthread_mutex_unlock(&cache->mutex); 310361fc4cbSmaya *pDataSize = size; 311361fc4cbSmaya return VK_SUCCESS; 312361fc4cbSmaya } 313361fc4cbSmaya if (*pDataSize < sizeof(*header)) { 314361fc4cbSmaya pthread_mutex_unlock(&cache->mutex); 315361fc4cbSmaya *pDataSize = 0; 316361fc4cbSmaya return VK_INCOMPLETE; 317361fc4cbSmaya } 318361fc4cbSmaya void *p = pData, *end = pData + *pDataSize; 319361fc4cbSmaya header = p; 320361fc4cbSmaya header->header_size = sizeof(*header); 321361fc4cbSmaya header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 3227ec681f3Smrg header->vendor_id = 0x5143; 3237ec681f3Smrg header->device_id = device->physical_device->dev_id.chip_id; 324361fc4cbSmaya memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); 325361fc4cbSmaya p += header->header_size; 326361fc4cbSmaya 327361fc4cbSmaya struct cache_entry *entry; 328361fc4cbSmaya for (uint32_t i = 0; i < cache->table_size; i++) { 329361fc4cbSmaya if (!cache->hash_table[i]) 330361fc4cbSmaya continue; 331361fc4cbSmaya entry = cache->hash_table[i]; 332361fc4cbSmaya const uint32_t size = entry_size(entry); 333361fc4cbSmaya if (end < p + size) { 334361fc4cbSmaya result = VK_INCOMPLETE; 335361fc4cbSmaya break; 336361fc4cbSmaya } 337361fc4cbSmaya 338361fc4cbSmaya memcpy(p, entry, size); 339361fc4cbSmaya for (int j = 0; j < MESA_SHADER_STAGES; ++j) 340361fc4cbSmaya ((struct cache_entry *) p)->variants[j] = NULL; 341361fc4cbSmaya p += size; 342361fc4cbSmaya } 343361fc4cbSmaya *pDataSize = p - pData; 344361fc4cbSmaya 345361fc4cbSmaya pthread_mutex_unlock(&cache->mutex); 346361fc4cbSmaya return result; 347361fc4cbSmaya} 348361fc4cbSmaya 349361fc4cbSmayastatic void 350361fc4cbSmayatu_pipeline_cache_merge(struct tu_pipeline_cache *dst, 351361fc4cbSmaya struct tu_pipeline_cache *src) 352361fc4cbSmaya{ 353361fc4cbSmaya for (uint32_t i = 0; i < src->table_size; i++) { 354361fc4cbSmaya struct cache_entry *entry = src->hash_table[i]; 355361fc4cbSmaya if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) 356361fc4cbSmaya continue; 357361fc4cbSmaya 358361fc4cbSmaya tu_pipeline_cache_add_entry(dst, entry); 359361fc4cbSmaya 360361fc4cbSmaya src->hash_table[i] = NULL; 361361fc4cbSmaya } 362361fc4cbSmaya} 363361fc4cbSmaya 3647ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL 365361fc4cbSmayatu_MergePipelineCaches(VkDevice _device, 366361fc4cbSmaya VkPipelineCache destCache, 367361fc4cbSmaya uint32_t srcCacheCount, 368361fc4cbSmaya const VkPipelineCache *pSrcCaches) 369361fc4cbSmaya{ 370361fc4cbSmaya TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); 371361fc4cbSmaya 372361fc4cbSmaya for (uint32_t i = 0; i < srcCacheCount; i++) { 373361fc4cbSmaya TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); 374361fc4cbSmaya 375361fc4cbSmaya tu_pipeline_cache_merge(dst, src); 376361fc4cbSmaya } 377361fc4cbSmaya 378361fc4cbSmaya return VK_SUCCESS; 379361fc4cbSmaya} 380