1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "tu_private.h" 25 26#include "util/debug.h" 27#include "util/disk_cache.h" 28#include "util/mesa-sha1.h" 29#include "util/u_atomic.h" 30 31struct cache_entry_variant_info 32{ 33}; 34 35struct cache_entry 36{ 37 union { 38 unsigned char sha1[20]; 39 uint32_t sha1_dw[5]; 40 }; 41 uint32_t code_sizes[MESA_SHADER_STAGES]; 42 struct tu_shader_variant *variants[MESA_SHADER_STAGES]; 43 char code[0]; 44}; 45 46void 47tu_pipeline_cache_init(struct tu_pipeline_cache *cache, 48 struct tu_device *device) 49{ 50 cache->device = device; 51 pthread_mutex_init(&cache->mutex, NULL); 52 53 cache->modified = false; 54 cache->kernel_count = 0; 55 cache->total_size = 0; 56 cache->table_size = 1024; 57 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 58 cache->hash_table = malloc(byte_size); 59 60 /* We don't consider allocation failure fatal, we just start with a 0-sized 61 * cache. Disable caching when we want to keep shader debug info, since 62 * we don't get the debug info on cached shaders. */ 63 if (cache->hash_table == NULL) 64 cache->table_size = 0; 65 else 66 memset(cache->hash_table, 0, byte_size); 67} 68 69void 70tu_pipeline_cache_finish(struct tu_pipeline_cache *cache) 71{ 72 for (unsigned i = 0; i < cache->table_size; ++i) 73 if (cache->hash_table[i]) { 74 vk_free(&cache->alloc, cache->hash_table[i]); 75 } 76 pthread_mutex_destroy(&cache->mutex); 77 free(cache->hash_table); 78} 79 80static uint32_t 81entry_size(struct cache_entry *entry) 82{ 83 size_t ret = sizeof(*entry); 84 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 85 if (entry->code_sizes[i]) 86 ret += 87 sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 88 return ret; 89} 90 91void 92tu_hash_shaders(unsigned char *hash, 93 const VkPipelineShaderStageCreateInfo **stages, 94 const struct tu_pipeline_layout *layout, 95 const struct tu_pipeline_key *key, 96 uint32_t flags) 97{ 98 struct mesa_sha1 ctx; 99 100 _mesa_sha1_init(&ctx); 101 if (key) 102 _mesa_sha1_update(&ctx, key, sizeof(*key)); 103 if (layout) 104 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 105 106 for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 107 if (stages[i]) { 108 TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module); 109 const VkSpecializationInfo *spec_info = 110 stages[i]->pSpecializationInfo; 111 112 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 113 _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName)); 114 if (spec_info) { 115 _mesa_sha1_update( 116 &ctx, spec_info->pMapEntries, 117 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); 118 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); 119 } 120 } 121 } 122 _mesa_sha1_update(&ctx, &flags, 4); 123 _mesa_sha1_final(&ctx, hash); 124} 125 126static struct cache_entry * 127tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, 128 const unsigned char *sha1) 129{ 130 const uint32_t mask = cache->table_size - 1; 131 const uint32_t start = (*(uint32_t *) sha1); 132 133 if (cache->table_size == 0) 134 return NULL; 135 136 for (uint32_t i = 0; i < cache->table_size; i++) { 137 const uint32_t index = (start + i) & mask; 138 struct cache_entry *entry = cache->hash_table[index]; 139 140 if (!entry) 141 return NULL; 142 143 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 144 return entry; 145 } 146 } 147 148 unreachable("hash table should never be full"); 149} 150 151static struct cache_entry * 152tu_pipeline_cache_search(struct tu_pipeline_cache *cache, 153 const unsigned char *sha1) 154{ 155 struct cache_entry *entry; 156 157 pthread_mutex_lock(&cache->mutex); 158 159 entry = tu_pipeline_cache_search_unlocked(cache, sha1); 160 161 pthread_mutex_unlock(&cache->mutex); 162 163 return entry; 164} 165 166static void 167tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, 168 struct cache_entry *entry) 169{ 170 const uint32_t mask = cache->table_size - 1; 171 const uint32_t start = entry->sha1_dw[0]; 172 173 /* We'll always be able to insert when we get here. */ 174 assert(cache->kernel_count < cache->table_size / 2); 175 176 for (uint32_t i = 0; i < cache->table_size; i++) { 177 const uint32_t index = (start + i) & mask; 178 if (!cache->hash_table[index]) { 179 cache->hash_table[index] = entry; 180 break; 181 } 182 } 183 184 cache->total_size += entry_size(entry); 185 cache->kernel_count++; 186} 187 188static VkResult 189tu_pipeline_cache_grow(struct tu_pipeline_cache *cache) 190{ 191 const uint32_t table_size = cache->table_size * 2; 192 const uint32_t old_table_size = cache->table_size; 193 const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 194 struct cache_entry **table; 195 struct cache_entry **old_table = cache->hash_table; 196 197 table = malloc(byte_size); 198 if (table == NULL) 199 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 200 201 cache->hash_table = table; 202 cache->table_size = table_size; 203 cache->kernel_count = 0; 204 cache->total_size = 0; 205 206 memset(cache->hash_table, 0, byte_size); 207 for (uint32_t i = 0; i < old_table_size; i++) { 208 struct cache_entry *entry = old_table[i]; 209 if (!entry) 210 continue; 211 212 tu_pipeline_cache_set_entry(cache, entry); 213 } 214 215 free(old_table); 216 217 return VK_SUCCESS; 218} 219 220static void 221tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, 222 struct cache_entry *entry) 223{ 224 if (cache->kernel_count == cache->table_size / 2) 225 tu_pipeline_cache_grow(cache); 226 227 /* Failing to grow that hash table isn't fatal, but may mean we don't 228 * have enough space to add this new kernel. Only add it if there's room. 229 */ 230 if (cache->kernel_count < cache->table_size / 2) 231 tu_pipeline_cache_set_entry(cache, entry); 232} 233 234struct cache_header 235{ 236 uint32_t header_size; 237 uint32_t header_version; 238 uint32_t vendor_id; 239 uint32_t device_id; 240 uint8_t uuid[VK_UUID_SIZE]; 241}; 242 243void 244tu_pipeline_cache_load(struct tu_pipeline_cache *cache, 245 const void *data, 246 size_t size) 247{ 248 struct tu_device *device = cache->device; 249 struct cache_header header; 250 251 if (size < sizeof(header)) 252 return; 253 memcpy(&header, data, sizeof(header)); 254 if (header.header_size < sizeof(header)) 255 return; 256 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 257 return; 258 if (header.vendor_id != 0 /* TODO */) 259 return; 260 if (header.device_id != 0 /* TODO */) 261 return; 262 if (memcmp(header.uuid, device->physical_device->cache_uuid, 263 VK_UUID_SIZE) != 0) 264 return; 265 266 char *end = (void *) data + size; 267 char *p = (void *) data + header.header_size; 268 269 while (end - p >= sizeof(struct cache_entry)) { 270 struct cache_entry *entry = (struct cache_entry *) p; 271 struct cache_entry *dest_entry; 272 size_t size = entry_size(entry); 273 if (end - p < size) 274 break; 275 276 dest_entry = 277 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 278 if (dest_entry) { 279 memcpy(dest_entry, entry, size); 280 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 281 dest_entry->variants[i] = NULL; 282 tu_pipeline_cache_add_entry(cache, dest_entry); 283 } 284 p += size; 285 } 286} 287 288VkResult 289tu_CreatePipelineCache(VkDevice _device, 290 const VkPipelineCacheCreateInfo *pCreateInfo, 291 const VkAllocationCallbacks *pAllocator, 292 VkPipelineCache *pPipelineCache) 293{ 294 TU_FROM_HANDLE(tu_device, device, _device); 295 struct tu_pipeline_cache *cache; 296 297 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 298 assert(pCreateInfo->flags == 0); 299 300 cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8, 301 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 302 if (cache == NULL) 303 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 304 305 if (pAllocator) 306 cache->alloc = *pAllocator; 307 else 308 cache->alloc = device->alloc; 309 310 tu_pipeline_cache_init(cache, device); 311 312 if (pCreateInfo->initialDataSize > 0) { 313 tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, 314 pCreateInfo->initialDataSize); 315 } 316 317 *pPipelineCache = tu_pipeline_cache_to_handle(cache); 318 319 return VK_SUCCESS; 320} 321 322void 323tu_DestroyPipelineCache(VkDevice _device, 324 VkPipelineCache _cache, 325 const VkAllocationCallbacks *pAllocator) 326{ 327 TU_FROM_HANDLE(tu_device, device, _device); 328 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 329 330 if (!cache) 331 return; 332 tu_pipeline_cache_finish(cache); 333 334 vk_free2(&device->alloc, pAllocator, cache); 335} 336 337VkResult 338tu_GetPipelineCacheData(VkDevice _device, 339 VkPipelineCache _cache, 340 size_t *pDataSize, 341 void *pData) 342{ 343 TU_FROM_HANDLE(tu_device, device, _device); 344 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 345 struct cache_header *header; 346 VkResult result = VK_SUCCESS; 347 348 pthread_mutex_lock(&cache->mutex); 349 350 const size_t size = sizeof(*header) + cache->total_size; 351 if (pData == NULL) { 352 pthread_mutex_unlock(&cache->mutex); 353 *pDataSize = size; 354 return VK_SUCCESS; 355 } 356 if (*pDataSize < sizeof(*header)) { 357 pthread_mutex_unlock(&cache->mutex); 358 *pDataSize = 0; 359 return VK_INCOMPLETE; 360 } 361 void *p = pData, *end = pData + *pDataSize; 362 header = p; 363 header->header_size = sizeof(*header); 364 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 365 header->vendor_id = 0 /* TODO */; 366 header->device_id = 0 /* TODO */; 367 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); 368 p += header->header_size; 369 370 struct cache_entry *entry; 371 for (uint32_t i = 0; i < cache->table_size; i++) { 372 if (!cache->hash_table[i]) 373 continue; 374 entry = cache->hash_table[i]; 375 const uint32_t size = entry_size(entry); 376 if (end < p + size) { 377 result = VK_INCOMPLETE; 378 break; 379 } 380 381 memcpy(p, entry, size); 382 for (int j = 0; j < MESA_SHADER_STAGES; ++j) 383 ((struct cache_entry *) p)->variants[j] = NULL; 384 p += size; 385 } 386 *pDataSize = p - pData; 387 388 pthread_mutex_unlock(&cache->mutex); 389 return result; 390} 391 392static void 393tu_pipeline_cache_merge(struct tu_pipeline_cache *dst, 394 struct tu_pipeline_cache *src) 395{ 396 for (uint32_t i = 0; i < src->table_size; i++) { 397 struct cache_entry *entry = src->hash_table[i]; 398 if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) 399 continue; 400 401 tu_pipeline_cache_add_entry(dst, entry); 402 403 src->hash_table[i] = NULL; 404 } 405} 406 407VkResult 408tu_MergePipelineCaches(VkDevice _device, 409 VkPipelineCache destCache, 410 uint32_t srcCacheCount, 411 const VkPipelineCache *pSrcCaches) 412{ 413 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); 414 415 for (uint32_t i = 0; i < srcCacheCount; i++) { 416 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); 417 418 tu_pipeline_cache_merge(dst, src); 419 } 420 421 return VK_SUCCESS; 422} 423