101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2017 Connor Abbott 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir_serialize.h" 2501e04c3fSmrg#include "nir_control_flow.h" 2601e04c3fSmrg#include "util/u_dynarray.h" 277ec681f3Smrg#include "util/u_math.h" 287ec681f3Smrg 297ec681f3Smrg#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) 307ec681f3Smrg#define MAX_OBJECT_IDS (1 << 20) 3101e04c3fSmrg 3201e04c3fSmrgtypedef struct { 3301e04c3fSmrg size_t blob_offset; 3401e04c3fSmrg nir_ssa_def *src; 3501e04c3fSmrg nir_block *block; 3601e04c3fSmrg} write_phi_fixup; 3701e04c3fSmrg 3801e04c3fSmrgtypedef struct { 3901e04c3fSmrg const nir_shader *nir; 4001e04c3fSmrg 4101e04c3fSmrg struct blob *blob; 4201e04c3fSmrg 4301e04c3fSmrg /* maps pointer to index */ 4401e04c3fSmrg struct hash_table *remap_table; 4501e04c3fSmrg 4601e04c3fSmrg /* the next index to assign to a NIR in-memory object */ 477ec681f3Smrg uint32_t next_idx; 4801e04c3fSmrg 4901e04c3fSmrg /* Array of write_phi_fixup structs representing phi sources that need to 5001e04c3fSmrg * be resolved in the second pass. 5101e04c3fSmrg */ 5201e04c3fSmrg struct util_dynarray phi_fixups; 537ec681f3Smrg 547ec681f3Smrg /* The last serialized type. */ 557ec681f3Smrg const struct glsl_type *last_type; 567ec681f3Smrg const struct glsl_type *last_interface_type; 577ec681f3Smrg struct nir_variable_data last_var_data; 587ec681f3Smrg 597ec681f3Smrg /* For skipping equal ALU headers (typical after scalarization). */ 607ec681f3Smrg nir_instr_type last_instr_type; 617ec681f3Smrg uintptr_t last_alu_header_offset; 627ec681f3Smrg 637ec681f3Smrg /* Don't write optional data such as variable names. */ 647ec681f3Smrg bool strip; 6501e04c3fSmrg} write_ctx; 6601e04c3fSmrg 6701e04c3fSmrgtypedef struct { 6801e04c3fSmrg nir_shader *nir; 6901e04c3fSmrg 7001e04c3fSmrg struct blob_reader *blob; 7101e04c3fSmrg 7201e04c3fSmrg /* the next index to assign to a NIR in-memory object */ 737ec681f3Smrg uint32_t next_idx; 7401e04c3fSmrg 7501e04c3fSmrg /* The length of the index -> object table */ 767ec681f3Smrg uint32_t idx_table_len; 7701e04c3fSmrg 7801e04c3fSmrg /* map from index to deserialized pointer */ 7901e04c3fSmrg void **idx_table; 8001e04c3fSmrg 8101e04c3fSmrg /* List of phi sources. */ 8201e04c3fSmrg struct list_head phi_srcs; 8301e04c3fSmrg 847ec681f3Smrg /* The last deserialized type. */ 857ec681f3Smrg const struct glsl_type *last_type; 867ec681f3Smrg const struct glsl_type *last_interface_type; 877ec681f3Smrg struct nir_variable_data last_var_data; 8801e04c3fSmrg} read_ctx; 8901e04c3fSmrg 9001e04c3fSmrgstatic void 9101e04c3fSmrgwrite_add_object(write_ctx *ctx, const void *obj) 9201e04c3fSmrg{ 937ec681f3Smrg uint32_t index = ctx->next_idx++; 947ec681f3Smrg assert(index != MAX_OBJECT_IDS); 957ec681f3Smrg _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index); 9601e04c3fSmrg} 9701e04c3fSmrg 987ec681f3Smrgstatic uint32_t 9901e04c3fSmrgwrite_lookup_object(write_ctx *ctx, const void *obj) 10001e04c3fSmrg{ 10101e04c3fSmrg struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 10201e04c3fSmrg assert(entry); 1037ec681f3Smrg return (uint32_t)(uintptr_t) entry->data; 10401e04c3fSmrg} 10501e04c3fSmrg 10601e04c3fSmrgstatic void 10701e04c3fSmrgread_add_object(read_ctx *ctx, void *obj) 10801e04c3fSmrg{ 10901e04c3fSmrg assert(ctx->next_idx < ctx->idx_table_len); 11001e04c3fSmrg ctx->idx_table[ctx->next_idx++] = obj; 11101e04c3fSmrg} 11201e04c3fSmrg 11301e04c3fSmrgstatic void * 1147ec681f3Smrgread_lookup_object(read_ctx *ctx, uint32_t idx) 11501e04c3fSmrg{ 11601e04c3fSmrg assert(idx < ctx->idx_table_len); 11701e04c3fSmrg return ctx->idx_table[idx]; 11801e04c3fSmrg} 11901e04c3fSmrg 12001e04c3fSmrgstatic void * 12101e04c3fSmrgread_object(read_ctx *ctx) 12201e04c3fSmrg{ 1237ec681f3Smrg return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); 1247ec681f3Smrg} 1257ec681f3Smrg 1267ec681f3Smrgstatic uint32_t 1277ec681f3Smrgencode_bit_size_3bits(uint8_t bit_size) 1287ec681f3Smrg{ 1297ec681f3Smrg /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ 1307ec681f3Smrg assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); 1317ec681f3Smrg if (bit_size) 1327ec681f3Smrg return util_logbase2(bit_size) + 1; 1337ec681f3Smrg return 0; 1347ec681f3Smrg} 1357ec681f3Smrg 1367ec681f3Smrgstatic uint8_t 1377ec681f3Smrgdecode_bit_size_3bits(uint8_t bit_size) 1387ec681f3Smrg{ 1397ec681f3Smrg if (bit_size) 1407ec681f3Smrg return 1 << (bit_size - 1); 1417ec681f3Smrg return 0; 1427ec681f3Smrg} 1437ec681f3Smrg 1447ec681f3Smrg#define NUM_COMPONENTS_IS_SEPARATE_7 7 1457ec681f3Smrg 1467ec681f3Smrgstatic uint8_t 1477ec681f3Smrgencode_num_components_in_3bits(uint8_t num_components) 1487ec681f3Smrg{ 1497ec681f3Smrg if (num_components <= 4) 1507ec681f3Smrg return num_components; 1517ec681f3Smrg if (num_components == 8) 1527ec681f3Smrg return 5; 1537ec681f3Smrg if (num_components == 16) 1547ec681f3Smrg return 6; 1557ec681f3Smrg 1567ec681f3Smrg /* special value indicating that num_components is in the next uint32 */ 1577ec681f3Smrg return NUM_COMPONENTS_IS_SEPARATE_7; 1587ec681f3Smrg} 1597ec681f3Smrg 1607ec681f3Smrgstatic uint8_t 1617ec681f3Smrgdecode_num_components_in_3bits(uint8_t value) 1627ec681f3Smrg{ 1637ec681f3Smrg if (value <= 4) 1647ec681f3Smrg return value; 1657ec681f3Smrg if (value == 5) 1667ec681f3Smrg return 8; 1677ec681f3Smrg if (value == 6) 1687ec681f3Smrg return 16; 1697ec681f3Smrg 1707ec681f3Smrg unreachable("invalid num_components encoding"); 1717ec681f3Smrg return 0; 17201e04c3fSmrg} 17301e04c3fSmrg 17401e04c3fSmrgstatic void 17501e04c3fSmrgwrite_constant(write_ctx *ctx, const nir_constant *c) 17601e04c3fSmrg{ 17701e04c3fSmrg blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 17801e04c3fSmrg blob_write_uint32(ctx->blob, c->num_elements); 17901e04c3fSmrg for (unsigned i = 0; i < c->num_elements; i++) 18001e04c3fSmrg write_constant(ctx, c->elements[i]); 18101e04c3fSmrg} 18201e04c3fSmrg 18301e04c3fSmrgstatic nir_constant * 18401e04c3fSmrgread_constant(read_ctx *ctx, nir_variable *nvar) 18501e04c3fSmrg{ 18601e04c3fSmrg nir_constant *c = ralloc(nvar, nir_constant); 18701e04c3fSmrg 18801e04c3fSmrg blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 18901e04c3fSmrg c->num_elements = blob_read_uint32(ctx->blob); 19001e04c3fSmrg c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 19101e04c3fSmrg for (unsigned i = 0; i < c->num_elements; i++) 19201e04c3fSmrg c->elements[i] = read_constant(ctx, nvar); 19301e04c3fSmrg 19401e04c3fSmrg return c; 19501e04c3fSmrg} 19601e04c3fSmrg 1977ec681f3Smrgenum var_data_encoding { 1987ec681f3Smrg var_encode_full, 1997ec681f3Smrg var_encode_shader_temp, 2007ec681f3Smrg var_encode_function_temp, 2017ec681f3Smrg var_encode_location_diff, 2027ec681f3Smrg}; 2037ec681f3Smrg 2047ec681f3Smrgunion packed_var { 2057ec681f3Smrg uint32_t u32; 2067ec681f3Smrg struct { 2077ec681f3Smrg unsigned has_name:1; 2087ec681f3Smrg unsigned has_constant_initializer:1; 2097ec681f3Smrg unsigned has_pointer_initializer:1; 2107ec681f3Smrg unsigned has_interface_type:1; 2117ec681f3Smrg unsigned num_state_slots:7; 2127ec681f3Smrg unsigned data_encoding:2; 2137ec681f3Smrg unsigned type_same_as_last:1; 2147ec681f3Smrg unsigned interface_type_same_as_last:1; 2157ec681f3Smrg unsigned _pad:1; 2167ec681f3Smrg unsigned num_members:16; 2177ec681f3Smrg } u; 2187ec681f3Smrg}; 2197ec681f3Smrg 2207ec681f3Smrgunion packed_var_data_diff { 2217ec681f3Smrg uint32_t u32; 2227ec681f3Smrg struct { 2237ec681f3Smrg int location:13; 2247ec681f3Smrg int location_frac:3; 2257ec681f3Smrg int driver_location:16; 2267ec681f3Smrg } u; 2277ec681f3Smrg}; 2287ec681f3Smrg 22901e04c3fSmrgstatic void 23001e04c3fSmrgwrite_variable(write_ctx *ctx, const nir_variable *var) 23101e04c3fSmrg{ 23201e04c3fSmrg write_add_object(ctx, var); 2337ec681f3Smrg 2347ec681f3Smrg assert(var->num_state_slots < (1 << 7)); 2357ec681f3Smrg 2367ec681f3Smrg STATIC_ASSERT(sizeof(union packed_var) == 4); 2377ec681f3Smrg union packed_var flags; 2387ec681f3Smrg flags.u32 = 0; 2397ec681f3Smrg 2407ec681f3Smrg flags.u.has_name = !ctx->strip && var->name; 2417ec681f3Smrg flags.u.has_constant_initializer = !!(var->constant_initializer); 2427ec681f3Smrg flags.u.has_pointer_initializer = !!(var->pointer_initializer); 2437ec681f3Smrg flags.u.has_interface_type = !!(var->interface_type); 2447ec681f3Smrg flags.u.type_same_as_last = var->type == ctx->last_type; 2457ec681f3Smrg flags.u.interface_type_same_as_last = 2467ec681f3Smrg var->interface_type && var->interface_type == ctx->last_interface_type; 2477ec681f3Smrg flags.u.num_state_slots = var->num_state_slots; 2487ec681f3Smrg flags.u.num_members = var->num_members; 2497ec681f3Smrg 2507ec681f3Smrg struct nir_variable_data data = var->data; 2517ec681f3Smrg 2527ec681f3Smrg /* When stripping, we expect that the location is no longer needed, 2537ec681f3Smrg * which is typically after shaders are linked. 2547ec681f3Smrg */ 2557ec681f3Smrg if (ctx->strip && 2567ec681f3Smrg data.mode != nir_var_system_value && 2577ec681f3Smrg data.mode != nir_var_shader_in && 2587ec681f3Smrg data.mode != nir_var_shader_out) 2597ec681f3Smrg data.location = 0; 2607ec681f3Smrg 2617ec681f3Smrg /* Temporary variables don't serialize var->data. */ 2627ec681f3Smrg if (data.mode == nir_var_shader_temp) 2637ec681f3Smrg flags.u.data_encoding = var_encode_shader_temp; 2647ec681f3Smrg else if (data.mode == nir_var_function_temp) 2657ec681f3Smrg flags.u.data_encoding = var_encode_function_temp; 2667ec681f3Smrg else { 2677ec681f3Smrg struct nir_variable_data tmp = data; 2687ec681f3Smrg 2697ec681f3Smrg tmp.location = ctx->last_var_data.location; 2707ec681f3Smrg tmp.location_frac = ctx->last_var_data.location_frac; 2717ec681f3Smrg tmp.driver_location = ctx->last_var_data.driver_location; 2727ec681f3Smrg 2737ec681f3Smrg /* See if we can encode only the difference in locations from the last 2747ec681f3Smrg * variable. 2757ec681f3Smrg */ 2767ec681f3Smrg if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && 2777ec681f3Smrg abs((int)data.location - 2787ec681f3Smrg (int)ctx->last_var_data.location) < (1 << 12) && 2797ec681f3Smrg abs((int)data.driver_location - 2807ec681f3Smrg (int)ctx->last_var_data.driver_location) < (1 << 15)) 2817ec681f3Smrg flags.u.data_encoding = var_encode_location_diff; 2827ec681f3Smrg else 2837ec681f3Smrg flags.u.data_encoding = var_encode_full; 2847ec681f3Smrg } 2857ec681f3Smrg 2867ec681f3Smrg blob_write_uint32(ctx->blob, flags.u32); 2877ec681f3Smrg 2887ec681f3Smrg if (!flags.u.type_same_as_last) { 2897ec681f3Smrg encode_type_to_blob(ctx->blob, var->type); 2907ec681f3Smrg ctx->last_type = var->type; 2917ec681f3Smrg } 2927ec681f3Smrg 2937ec681f3Smrg if (var->interface_type && !flags.u.interface_type_same_as_last) { 2947ec681f3Smrg encode_type_to_blob(ctx->blob, var->interface_type); 2957ec681f3Smrg ctx->last_interface_type = var->interface_type; 2967ec681f3Smrg } 2977ec681f3Smrg 2987ec681f3Smrg if (flags.u.has_name) 29901e04c3fSmrg blob_write_string(ctx->blob, var->name); 3007ec681f3Smrg 3017ec681f3Smrg if (flags.u.data_encoding == var_encode_full || 3027ec681f3Smrg flags.u.data_encoding == var_encode_location_diff) { 3037ec681f3Smrg if (flags.u.data_encoding == var_encode_full) { 3047ec681f3Smrg blob_write_bytes(ctx->blob, &data, sizeof(data)); 3057ec681f3Smrg } else { 3067ec681f3Smrg /* Serialize only the difference in locations from the last variable. 3077ec681f3Smrg */ 3087ec681f3Smrg union packed_var_data_diff diff; 3097ec681f3Smrg 3107ec681f3Smrg diff.u.location = data.location - ctx->last_var_data.location; 3117ec681f3Smrg diff.u.location_frac = data.location_frac - 3127ec681f3Smrg ctx->last_var_data.location_frac; 3137ec681f3Smrg diff.u.driver_location = data.driver_location - 3147ec681f3Smrg ctx->last_var_data.driver_location; 3157ec681f3Smrg 3167ec681f3Smrg blob_write_uint32(ctx->blob, diff.u32); 3177ec681f3Smrg } 3187ec681f3Smrg 3197ec681f3Smrg ctx->last_var_data = data; 3207ec681f3Smrg } 3217ec681f3Smrg 3227e102996Smaya for (unsigned i = 0; i < var->num_state_slots; i++) { 3237ec681f3Smrg blob_write_bytes(ctx->blob, &var->state_slots[i], 3247ec681f3Smrg sizeof(var->state_slots[i])); 3257e102996Smaya } 32601e04c3fSmrg if (var->constant_initializer) 32701e04c3fSmrg write_constant(ctx, var->constant_initializer); 3287ec681f3Smrg if (var->pointer_initializer) 3297ec681f3Smrg write_lookup_object(ctx, var->pointer_initializer); 33001e04c3fSmrg if (var->num_members > 0) { 33101e04c3fSmrg blob_write_bytes(ctx->blob, (uint8_t *) var->members, 33201e04c3fSmrg var->num_members * sizeof(*var->members)); 33301e04c3fSmrg } 33401e04c3fSmrg} 33501e04c3fSmrg 33601e04c3fSmrgstatic nir_variable * 33701e04c3fSmrgread_variable(read_ctx *ctx) 33801e04c3fSmrg{ 33901e04c3fSmrg nir_variable *var = rzalloc(ctx->nir, nir_variable); 34001e04c3fSmrg read_add_object(ctx, var); 34101e04c3fSmrg 3427ec681f3Smrg union packed_var flags; 3437ec681f3Smrg flags.u32 = blob_read_uint32(ctx->blob); 3447ec681f3Smrg 3457ec681f3Smrg if (flags.u.type_same_as_last) { 3467ec681f3Smrg var->type = ctx->last_type; 3477ec681f3Smrg } else { 3487ec681f3Smrg var->type = decode_type_from_blob(ctx->blob); 3497ec681f3Smrg ctx->last_type = var->type; 3507ec681f3Smrg } 3517ec681f3Smrg 3527ec681f3Smrg if (flags.u.has_interface_type) { 3537ec681f3Smrg if (flags.u.interface_type_same_as_last) { 3547ec681f3Smrg var->interface_type = ctx->last_interface_type; 3557ec681f3Smrg } else { 3567ec681f3Smrg var->interface_type = decode_type_from_blob(ctx->blob); 3577ec681f3Smrg ctx->last_interface_type = var->interface_type; 3587ec681f3Smrg } 3597ec681f3Smrg } 3607ec681f3Smrg 3617ec681f3Smrg if (flags.u.has_name) { 36201e04c3fSmrg const char *name = blob_read_string(ctx->blob); 36301e04c3fSmrg var->name = ralloc_strdup(var, name); 36401e04c3fSmrg } else { 36501e04c3fSmrg var->name = NULL; 36601e04c3fSmrg } 3677ec681f3Smrg 3687ec681f3Smrg if (flags.u.data_encoding == var_encode_shader_temp) 3697ec681f3Smrg var->data.mode = nir_var_shader_temp; 3707ec681f3Smrg else if (flags.u.data_encoding == var_encode_function_temp) 3717ec681f3Smrg var->data.mode = nir_var_function_temp; 3727ec681f3Smrg else if (flags.u.data_encoding == var_encode_full) { 3737ec681f3Smrg blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 3747ec681f3Smrg ctx->last_var_data = var->data; 3757ec681f3Smrg } else { /* var_encode_location_diff */ 3767ec681f3Smrg union packed_var_data_diff diff; 3777ec681f3Smrg diff.u32 = blob_read_uint32(ctx->blob); 3787ec681f3Smrg 3797ec681f3Smrg var->data = ctx->last_var_data; 3807ec681f3Smrg var->data.location += diff.u.location; 3817ec681f3Smrg var->data.location_frac += diff.u.location_frac; 3827ec681f3Smrg var->data.driver_location += diff.u.driver_location; 3837ec681f3Smrg 3847ec681f3Smrg ctx->last_var_data = var->data; 3857ec681f3Smrg } 3867ec681f3Smrg 3877ec681f3Smrg var->num_state_slots = flags.u.num_state_slots; 3887e102996Smaya if (var->num_state_slots != 0) { 3897e102996Smaya var->state_slots = ralloc_array(var, nir_state_slot, 3907e102996Smaya var->num_state_slots); 3917e102996Smaya for (unsigned i = 0; i < var->num_state_slots; i++) { 3927ec681f3Smrg blob_copy_bytes(ctx->blob, &var->state_slots[i], 3937ec681f3Smrg sizeof(var->state_slots[i])); 3947e102996Smaya } 3957e102996Smaya } 3967ec681f3Smrg if (flags.u.has_constant_initializer) 39701e04c3fSmrg var->constant_initializer = read_constant(ctx, var); 39801e04c3fSmrg else 39901e04c3fSmrg var->constant_initializer = NULL; 4007ec681f3Smrg 4017ec681f3Smrg if (flags.u.has_pointer_initializer) 4027ec681f3Smrg var->pointer_initializer = read_object(ctx); 40301e04c3fSmrg else 4047ec681f3Smrg var->pointer_initializer = NULL; 4057ec681f3Smrg 4067ec681f3Smrg var->num_members = flags.u.num_members; 40701e04c3fSmrg if (var->num_members > 0) { 40801e04c3fSmrg var->members = ralloc_array(var, struct nir_variable_data, 40901e04c3fSmrg var->num_members); 41001e04c3fSmrg blob_copy_bytes(ctx->blob, (uint8_t *) var->members, 41101e04c3fSmrg var->num_members * sizeof(*var->members)); 41201e04c3fSmrg } 41301e04c3fSmrg 41401e04c3fSmrg return var; 41501e04c3fSmrg} 41601e04c3fSmrg 41701e04c3fSmrgstatic void 41801e04c3fSmrgwrite_var_list(write_ctx *ctx, const struct exec_list *src) 41901e04c3fSmrg{ 42001e04c3fSmrg blob_write_uint32(ctx->blob, exec_list_length(src)); 42101e04c3fSmrg foreach_list_typed(nir_variable, var, node, src) { 42201e04c3fSmrg write_variable(ctx, var); 42301e04c3fSmrg } 42401e04c3fSmrg} 42501e04c3fSmrg 42601e04c3fSmrgstatic void 42701e04c3fSmrgread_var_list(read_ctx *ctx, struct exec_list *dst) 42801e04c3fSmrg{ 42901e04c3fSmrg exec_list_make_empty(dst); 43001e04c3fSmrg unsigned num_vars = blob_read_uint32(ctx->blob); 43101e04c3fSmrg for (unsigned i = 0; i < num_vars; i++) { 43201e04c3fSmrg nir_variable *var = read_variable(ctx); 43301e04c3fSmrg exec_list_push_tail(dst, &var->node); 43401e04c3fSmrg } 43501e04c3fSmrg} 43601e04c3fSmrg 43701e04c3fSmrgstatic void 43801e04c3fSmrgwrite_register(write_ctx *ctx, const nir_register *reg) 43901e04c3fSmrg{ 44001e04c3fSmrg write_add_object(ctx, reg); 44101e04c3fSmrg blob_write_uint32(ctx->blob, reg->num_components); 44201e04c3fSmrg blob_write_uint32(ctx->blob, reg->bit_size); 44301e04c3fSmrg blob_write_uint32(ctx->blob, reg->num_array_elems); 44401e04c3fSmrg blob_write_uint32(ctx->blob, reg->index); 44501e04c3fSmrg} 44601e04c3fSmrg 44701e04c3fSmrgstatic nir_register * 44801e04c3fSmrgread_register(read_ctx *ctx) 44901e04c3fSmrg{ 45001e04c3fSmrg nir_register *reg = ralloc(ctx->nir, nir_register); 45101e04c3fSmrg read_add_object(ctx, reg); 45201e04c3fSmrg reg->num_components = blob_read_uint32(ctx->blob); 45301e04c3fSmrg reg->bit_size = blob_read_uint32(ctx->blob); 45401e04c3fSmrg reg->num_array_elems = blob_read_uint32(ctx->blob); 45501e04c3fSmrg reg->index = blob_read_uint32(ctx->blob); 45601e04c3fSmrg 45701e04c3fSmrg list_inithead(®->uses); 45801e04c3fSmrg list_inithead(®->defs); 45901e04c3fSmrg list_inithead(®->if_uses); 46001e04c3fSmrg 46101e04c3fSmrg return reg; 46201e04c3fSmrg} 46301e04c3fSmrg 46401e04c3fSmrgstatic void 46501e04c3fSmrgwrite_reg_list(write_ctx *ctx, const struct exec_list *src) 46601e04c3fSmrg{ 46701e04c3fSmrg blob_write_uint32(ctx->blob, exec_list_length(src)); 46801e04c3fSmrg foreach_list_typed(nir_register, reg, node, src) 46901e04c3fSmrg write_register(ctx, reg); 47001e04c3fSmrg} 47101e04c3fSmrg 47201e04c3fSmrgstatic void 47301e04c3fSmrgread_reg_list(read_ctx *ctx, struct exec_list *dst) 47401e04c3fSmrg{ 47501e04c3fSmrg exec_list_make_empty(dst); 47601e04c3fSmrg unsigned num_regs = blob_read_uint32(ctx->blob); 47701e04c3fSmrg for (unsigned i = 0; i < num_regs; i++) { 47801e04c3fSmrg nir_register *reg = read_register(ctx); 47901e04c3fSmrg exec_list_push_tail(dst, ®->node); 48001e04c3fSmrg } 48101e04c3fSmrg} 48201e04c3fSmrg 4837ec681f3Smrgunion packed_src { 4847ec681f3Smrg uint32_t u32; 4857ec681f3Smrg struct { 4867ec681f3Smrg unsigned is_ssa:1; /* <-- Header */ 4877ec681f3Smrg unsigned is_indirect:1; 4887ec681f3Smrg unsigned object_idx:20; 4897ec681f3Smrg unsigned _footer:10; /* <-- Footer */ 4907ec681f3Smrg } any; 4917ec681f3Smrg struct { 4927ec681f3Smrg unsigned _header:22; /* <-- Header */ 4937ec681f3Smrg unsigned negate:1; /* <-- Footer */ 4947ec681f3Smrg unsigned abs:1; 4957ec681f3Smrg unsigned swizzle_x:2; 4967ec681f3Smrg unsigned swizzle_y:2; 4977ec681f3Smrg unsigned swizzle_z:2; 4987ec681f3Smrg unsigned swizzle_w:2; 4997ec681f3Smrg } alu; 5007ec681f3Smrg struct { 5017ec681f3Smrg unsigned _header:22; /* <-- Header */ 5027ec681f3Smrg unsigned src_type:5; /* <-- Footer */ 5037ec681f3Smrg unsigned _pad:5; 5047ec681f3Smrg } tex; 5057ec681f3Smrg}; 5067ec681f3Smrg 50701e04c3fSmrgstatic void 5087ec681f3Smrgwrite_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) 50901e04c3fSmrg{ 51001e04c3fSmrg /* Since sources are very frequent, we try to save some space when storing 51101e04c3fSmrg * them. In particular, we store whether the source is a register and 51201e04c3fSmrg * whether the register has an indirect index in the low two bits. We can 51301e04c3fSmrg * assume that the high two bits of the index are zero, since otherwise our 51401e04c3fSmrg * address space would've been exhausted allocating the remap table! 51501e04c3fSmrg */ 5167ec681f3Smrg header.any.is_ssa = src->is_ssa; 51701e04c3fSmrg if (src->is_ssa) { 5187ec681f3Smrg header.any.object_idx = write_lookup_object(ctx, src->ssa); 5197ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 52001e04c3fSmrg } else { 5217ec681f3Smrg header.any.object_idx = write_lookup_object(ctx, src->reg.reg); 5227ec681f3Smrg header.any.is_indirect = !!src->reg.indirect; 5237ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 52401e04c3fSmrg blob_write_uint32(ctx->blob, src->reg.base_offset); 52501e04c3fSmrg if (src->reg.indirect) { 5267ec681f3Smrg union packed_src header = {0}; 5277ec681f3Smrg write_src_full(ctx, src->reg.indirect, header); 52801e04c3fSmrg } 52901e04c3fSmrg } 53001e04c3fSmrg} 53101e04c3fSmrg 53201e04c3fSmrgstatic void 5337ec681f3Smrgwrite_src(write_ctx *ctx, const nir_src *src) 5347ec681f3Smrg{ 5357ec681f3Smrg union packed_src header = {0}; 5367ec681f3Smrg write_src_full(ctx, src, header); 5377ec681f3Smrg} 5387ec681f3Smrg 5397ec681f3Smrgstatic union packed_src 54001e04c3fSmrgread_src(read_ctx *ctx, nir_src *src, void *mem_ctx) 54101e04c3fSmrg{ 5427ec681f3Smrg STATIC_ASSERT(sizeof(union packed_src) == 4); 5437ec681f3Smrg union packed_src header; 5447ec681f3Smrg header.u32 = blob_read_uint32(ctx->blob); 5457ec681f3Smrg 5467ec681f3Smrg src->is_ssa = header.any.is_ssa; 54701e04c3fSmrg if (src->is_ssa) { 5487ec681f3Smrg src->ssa = read_lookup_object(ctx, header.any.object_idx); 54901e04c3fSmrg } else { 5507ec681f3Smrg src->reg.reg = read_lookup_object(ctx, header.any.object_idx); 55101e04c3fSmrg src->reg.base_offset = blob_read_uint32(ctx->blob); 5527ec681f3Smrg if (header.any.is_indirect) { 5537ec681f3Smrg src->reg.indirect = malloc(sizeof(nir_src)); 55401e04c3fSmrg read_src(ctx, src->reg.indirect, mem_ctx); 55501e04c3fSmrg } else { 55601e04c3fSmrg src->reg.indirect = NULL; 55701e04c3fSmrg } 55801e04c3fSmrg } 5597ec681f3Smrg return header; 56001e04c3fSmrg} 56101e04c3fSmrg 5627ec681f3Smrgunion packed_dest { 5637ec681f3Smrg uint8_t u8; 5647ec681f3Smrg struct { 5657ec681f3Smrg uint8_t is_ssa:1; 5667ec681f3Smrg uint8_t num_components:3; 5677ec681f3Smrg uint8_t bit_size:3; 5687ec681f3Smrg uint8_t _pad:1; 5697ec681f3Smrg } ssa; 5707ec681f3Smrg struct { 5717ec681f3Smrg uint8_t is_ssa:1; 5727ec681f3Smrg uint8_t is_indirect:1; 5737ec681f3Smrg uint8_t _pad:6; 5747ec681f3Smrg } reg; 5757ec681f3Smrg}; 5767ec681f3Smrg 5777ec681f3Smrgenum intrinsic_const_indices_encoding { 5787ec681f3Smrg /* Use the 9 bits of packed_const_indices to store 1-9 indices. 5797ec681f3Smrg * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or 5807ec681f3Smrg * 4 2-bit indices, or 5-9 1-bit indices. 5817ec681f3Smrg * 5827ec681f3Smrg * The common case for load_ubo is 0, 0, 0, which is trivially represented. 5837ec681f3Smrg * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 5847ec681f3Smrg */ 5857ec681f3Smrg const_indices_9bit_all_combined, 5867ec681f3Smrg 5877ec681f3Smrg const_indices_8bit, /* 8 bits per element */ 5887ec681f3Smrg const_indices_16bit, /* 16 bits per element */ 5897ec681f3Smrg const_indices_32bit, /* 32 bits per element */ 5907ec681f3Smrg}; 5917ec681f3Smrg 5927ec681f3Smrgenum load_const_packing { 5937ec681f3Smrg /* Constants are not packed and are stored in following dwords. */ 5947ec681f3Smrg load_const_full, 5957ec681f3Smrg 5967ec681f3Smrg /* packed_value contains high 19 bits, low bits are 0, 5977ec681f3Smrg * good for floating-point decimals 5987ec681f3Smrg */ 5997ec681f3Smrg load_const_scalar_hi_19bits, 6007ec681f3Smrg 6017ec681f3Smrg /* packed_value contains low 19 bits, high bits are sign-extended */ 6027ec681f3Smrg load_const_scalar_lo_19bits_sext, 6037ec681f3Smrg}; 6047ec681f3Smrg 6057ec681f3Smrgunion packed_instr { 6067ec681f3Smrg uint32_t u32; 6077ec681f3Smrg struct { 6087ec681f3Smrg unsigned instr_type:4; /* always present */ 6097ec681f3Smrg unsigned _pad:20; 6107ec681f3Smrg unsigned dest:8; /* always last */ 6117ec681f3Smrg } any; 6127ec681f3Smrg struct { 6137ec681f3Smrg unsigned instr_type:4; 6147ec681f3Smrg unsigned exact:1; 6157ec681f3Smrg unsigned no_signed_wrap:1; 6167ec681f3Smrg unsigned no_unsigned_wrap:1; 6177ec681f3Smrg unsigned saturate:1; 6187ec681f3Smrg /* Reg: writemask; SSA: swizzles for 2 srcs */ 6197ec681f3Smrg unsigned writemask_or_two_swizzles:4; 6207ec681f3Smrg unsigned op:9; 6217ec681f3Smrg unsigned packed_src_ssa_16bit:1; 6227ec681f3Smrg /* Scalarized ALUs always have the same header. */ 6237ec681f3Smrg unsigned num_followup_alu_sharing_header:2; 6247ec681f3Smrg unsigned dest:8; 6257ec681f3Smrg } alu; 6267ec681f3Smrg struct { 6277ec681f3Smrg unsigned instr_type:4; 6287ec681f3Smrg unsigned deref_type:3; 6297ec681f3Smrg unsigned cast_type_same_as_last:1; 6307ec681f3Smrg unsigned modes:14; /* deref_var redefines this */ 6317ec681f3Smrg unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */ 6327ec681f3Smrg unsigned _pad:1; /* deref_var redefines this */ 6337ec681f3Smrg unsigned dest:8; 6347ec681f3Smrg } deref; 6357ec681f3Smrg struct { 6367ec681f3Smrg unsigned instr_type:4; 6377ec681f3Smrg unsigned deref_type:3; 6387ec681f3Smrg unsigned _pad:1; 6397ec681f3Smrg unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */ 6407ec681f3Smrg unsigned dest:8; 6417ec681f3Smrg } deref_var; 6427ec681f3Smrg struct { 6437ec681f3Smrg unsigned instr_type:4; 6447ec681f3Smrg unsigned intrinsic:9; 6457ec681f3Smrg unsigned const_indices_encoding:2; 6467ec681f3Smrg unsigned packed_const_indices:9; 6477ec681f3Smrg unsigned dest:8; 6487ec681f3Smrg } intrinsic; 6497ec681f3Smrg struct { 6507ec681f3Smrg unsigned instr_type:4; 6517ec681f3Smrg unsigned last_component:4; 6527ec681f3Smrg unsigned bit_size:3; 6537ec681f3Smrg unsigned packing:2; /* enum load_const_packing */ 6547ec681f3Smrg unsigned packed_value:19; /* meaning determined by packing */ 6557ec681f3Smrg } load_const; 6567ec681f3Smrg struct { 6577ec681f3Smrg unsigned instr_type:4; 6587ec681f3Smrg unsigned last_component:4; 6597ec681f3Smrg unsigned bit_size:3; 6607ec681f3Smrg unsigned _pad:21; 6617ec681f3Smrg } undef; 6627ec681f3Smrg struct { 6637ec681f3Smrg unsigned instr_type:4; 6647ec681f3Smrg unsigned num_srcs:4; 6657ec681f3Smrg unsigned op:4; 6667ec681f3Smrg unsigned dest:8; 6677ec681f3Smrg unsigned _pad:12; 6687ec681f3Smrg } tex; 6697ec681f3Smrg struct { 6707ec681f3Smrg unsigned instr_type:4; 6717ec681f3Smrg unsigned num_srcs:20; 6727ec681f3Smrg unsigned dest:8; 6737ec681f3Smrg } phi; 6747ec681f3Smrg struct { 6757ec681f3Smrg unsigned instr_type:4; 6767ec681f3Smrg unsigned type:2; 6777ec681f3Smrg unsigned _pad:26; 6787ec681f3Smrg } jump; 6797ec681f3Smrg}; 6807ec681f3Smrg 6817ec681f3Smrg/* Write "lo24" as low 24 bits in the first uint32. */ 68201e04c3fSmrgstatic void 6837ec681f3Smrgwrite_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, 6847ec681f3Smrg nir_instr_type instr_type) 68501e04c3fSmrg{ 6867ec681f3Smrg STATIC_ASSERT(sizeof(union packed_dest) == 1); 6877ec681f3Smrg union packed_dest dest; 6887ec681f3Smrg dest.u8 = 0; 6897ec681f3Smrg 6907ec681f3Smrg dest.ssa.is_ssa = dst->is_ssa; 69101e04c3fSmrg if (dst->is_ssa) { 6927ec681f3Smrg dest.ssa.num_components = 6937ec681f3Smrg encode_num_components_in_3bits(dst->ssa.num_components); 6947ec681f3Smrg dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size); 6957ec681f3Smrg } else { 6967ec681f3Smrg dest.reg.is_indirect = !!(dst->reg.indirect); 6977ec681f3Smrg } 6987ec681f3Smrg header.any.dest = dest.u8; 6997ec681f3Smrg 7007ec681f3Smrg /* Check if the current ALU instruction has the same header as the previous 7017ec681f3Smrg * instruction that is also ALU. If it is, we don't have to write 7027ec681f3Smrg * the current header. This is a typical occurence after scalarization. 7037ec681f3Smrg */ 7047ec681f3Smrg if (instr_type == nir_instr_type_alu) { 7057ec681f3Smrg bool equal_header = false; 7067ec681f3Smrg 7077ec681f3Smrg if (ctx->last_instr_type == nir_instr_type_alu) { 7087ec681f3Smrg assert(ctx->last_alu_header_offset); 7097ec681f3Smrg union packed_instr last_header; 7107ec681f3Smrg memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset, 7117ec681f3Smrg sizeof(last_header)); 7127ec681f3Smrg 7137ec681f3Smrg /* Clear the field that counts ALUs with equal headers. */ 7147ec681f3Smrg union packed_instr clean_header; 7157ec681f3Smrg clean_header.u32 = last_header.u32; 7167ec681f3Smrg clean_header.alu.num_followup_alu_sharing_header = 0; 7177ec681f3Smrg 7187ec681f3Smrg /* There can be at most 4 consecutive ALU instructions 7197ec681f3Smrg * sharing the same header. 7207ec681f3Smrg */ 7217ec681f3Smrg if (last_header.alu.num_followup_alu_sharing_header < 3 && 7227ec681f3Smrg header.u32 == clean_header.u32) { 7237ec681f3Smrg last_header.alu.num_followup_alu_sharing_header++; 7247ec681f3Smrg memcpy(ctx->blob->data + ctx->last_alu_header_offset, 7257ec681f3Smrg &last_header, sizeof(last_header)); 7267ec681f3Smrg 7277ec681f3Smrg equal_header = true; 7287ec681f3Smrg } 7297ec681f3Smrg } 7307ec681f3Smrg 7317ec681f3Smrg if (!equal_header) { 7327ec681f3Smrg ctx->last_alu_header_offset = ctx->blob->size; 7337ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 7347ec681f3Smrg } 73501e04c3fSmrg } else { 7367ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 73701e04c3fSmrg } 7387ec681f3Smrg 7397ec681f3Smrg if (dest.ssa.is_ssa && 7407ec681f3Smrg dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 7417ec681f3Smrg blob_write_uint32(ctx->blob, dst->ssa.num_components); 7427ec681f3Smrg 74301e04c3fSmrg if (dst->is_ssa) { 74401e04c3fSmrg write_add_object(ctx, &dst->ssa); 74501e04c3fSmrg } else { 7467ec681f3Smrg blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg)); 74701e04c3fSmrg blob_write_uint32(ctx->blob, dst->reg.base_offset); 74801e04c3fSmrg if (dst->reg.indirect) 74901e04c3fSmrg write_src(ctx, dst->reg.indirect); 75001e04c3fSmrg } 75101e04c3fSmrg} 75201e04c3fSmrg 75301e04c3fSmrgstatic void 7547ec681f3Smrgread_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, 7557ec681f3Smrg union packed_instr header) 7567ec681f3Smrg{ 7577ec681f3Smrg union packed_dest dest; 7587ec681f3Smrg dest.u8 = header.any.dest; 7597ec681f3Smrg 7607ec681f3Smrg if (dest.ssa.is_ssa) { 7617ec681f3Smrg unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size); 7627ec681f3Smrg unsigned num_components; 7637ec681f3Smrg if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 7647ec681f3Smrg num_components = blob_read_uint32(ctx->blob); 7657ec681f3Smrg else 7667ec681f3Smrg num_components = decode_num_components_in_3bits(dest.ssa.num_components); 7677ec681f3Smrg nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL); 76801e04c3fSmrg read_add_object(ctx, &dst->ssa); 76901e04c3fSmrg } else { 77001e04c3fSmrg dst->reg.reg = read_object(ctx); 77101e04c3fSmrg dst->reg.base_offset = blob_read_uint32(ctx->blob); 7727ec681f3Smrg if (dest.reg.is_indirect) { 7737ec681f3Smrg dst->reg.indirect = malloc(sizeof(nir_src)); 77401e04c3fSmrg read_src(ctx, dst->reg.indirect, instr); 77501e04c3fSmrg } 77601e04c3fSmrg } 77701e04c3fSmrg} 77801e04c3fSmrg 7797ec681f3Smrgstatic bool 7807ec681f3Smrgare_object_ids_16bit(write_ctx *ctx) 7817ec681f3Smrg{ 7827ec681f3Smrg /* Check the highest object ID, because they are monotonic. */ 7837ec681f3Smrg return ctx->next_idx < (1 << 16); 7847ec681f3Smrg} 7857ec681f3Smrg 7867ec681f3Smrgstatic bool 7877ec681f3Smrgis_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) 7887ec681f3Smrg{ 7897ec681f3Smrg unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 7907ec681f3Smrg 7917ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 7927ec681f3Smrg if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate) 7937ec681f3Smrg return false; 7947ec681f3Smrg 7957ec681f3Smrg unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 7967ec681f3Smrg 7977ec681f3Smrg for (unsigned chan = 0; chan < src_components; chan++) { 7987ec681f3Smrg /* The swizzles for src0.x and src1.x are stored 7997ec681f3Smrg * in writemask_or_two_swizzles for SSA ALUs. 8007ec681f3Smrg */ 8017ec681f3Smrg if (alu->dest.dest.is_ssa && i < 2 && chan == 0 && 8027ec681f3Smrg alu->src[i].swizzle[chan] < 4) 8037ec681f3Smrg continue; 8047ec681f3Smrg 8057ec681f3Smrg if (alu->src[i].swizzle[chan] != chan) 8067ec681f3Smrg return false; 8077ec681f3Smrg } 8087ec681f3Smrg } 8097ec681f3Smrg 8107ec681f3Smrg return are_object_ids_16bit(ctx); 8117ec681f3Smrg} 8127ec681f3Smrg 81301e04c3fSmrgstatic void 81401e04c3fSmrgwrite_alu(write_ctx *ctx, const nir_alu_instr *alu) 81501e04c3fSmrg{ 8167ec681f3Smrg unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 8177ec681f3Smrg unsigned dst_components = nir_dest_num_components(alu->dest.dest); 8187ec681f3Smrg 8197ec681f3Smrg /* 9 bits for nir_op */ 8207ec681f3Smrg STATIC_ASSERT(nir_num_opcodes <= 512); 8217ec681f3Smrg union packed_instr header; 8227ec681f3Smrg header.u32 = 0; 8237ec681f3Smrg 8247ec681f3Smrg header.alu.instr_type = alu->instr.type; 8257ec681f3Smrg header.alu.exact = alu->exact; 8267ec681f3Smrg header.alu.no_signed_wrap = alu->no_signed_wrap; 8277ec681f3Smrg header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; 8287ec681f3Smrg header.alu.saturate = alu->dest.saturate; 8297ec681f3Smrg header.alu.op = alu->op; 8307ec681f3Smrg header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); 8317ec681f3Smrg 8327ec681f3Smrg if (header.alu.packed_src_ssa_16bit && 8337ec681f3Smrg alu->dest.dest.is_ssa) { 8347ec681f3Smrg /* For packed srcs of SSA ALUs, this field stores the swizzles. */ 8357ec681f3Smrg header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0]; 8367ec681f3Smrg if (num_srcs > 1) 8377ec681f3Smrg header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2; 8387ec681f3Smrg } else if (!alu->dest.dest.is_ssa && dst_components <= 4) { 8397ec681f3Smrg /* For vec4 registers, this field is a writemask. */ 8407ec681f3Smrg header.alu.writemask_or_two_swizzles = alu->dest.write_mask; 8417ec681f3Smrg } 84201e04c3fSmrg 8437ec681f3Smrg write_dest(ctx, &alu->dest.dest, header, alu->instr.type); 84401e04c3fSmrg 8457ec681f3Smrg if (!alu->dest.dest.is_ssa && dst_components > 4) 8467ec681f3Smrg blob_write_uint32(ctx->blob, alu->dest.write_mask); 8477ec681f3Smrg 8487ec681f3Smrg if (header.alu.packed_src_ssa_16bit) { 8497ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 8507ec681f3Smrg assert(alu->src[i].src.is_ssa); 8517ec681f3Smrg unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); 8527ec681f3Smrg assert(idx < (1 << 16)); 8537ec681f3Smrg blob_write_uint16(ctx->blob, idx); 8547ec681f3Smrg } 8557ec681f3Smrg } else { 8567ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 8577ec681f3Smrg unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 8587ec681f3Smrg unsigned src_components = nir_src_num_components(alu->src[i].src); 8597ec681f3Smrg union packed_src src; 8607ec681f3Smrg bool packed = src_components <= 4 && src_channels <= 4; 8617ec681f3Smrg src.u32 = 0; 8627ec681f3Smrg 8637ec681f3Smrg src.alu.negate = alu->src[i].negate; 8647ec681f3Smrg src.alu.abs = alu->src[i].abs; 8657ec681f3Smrg 8667ec681f3Smrg if (packed) { 8677ec681f3Smrg src.alu.swizzle_x = alu->src[i].swizzle[0]; 8687ec681f3Smrg src.alu.swizzle_y = alu->src[i].swizzle[1]; 8697ec681f3Smrg src.alu.swizzle_z = alu->src[i].swizzle[2]; 8707ec681f3Smrg src.alu.swizzle_w = alu->src[i].swizzle[3]; 8717ec681f3Smrg } 8727ec681f3Smrg 8737ec681f3Smrg write_src_full(ctx, &alu->src[i].src, src); 8747ec681f3Smrg 8757ec681f3Smrg /* Store swizzles for vec8 and vec16. */ 8767ec681f3Smrg if (!packed) { 8777ec681f3Smrg for (unsigned o = 0; o < src_channels; o += 8) { 8787ec681f3Smrg unsigned value = 0; 8797ec681f3Smrg 8807ec681f3Smrg for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 8817ec681f3Smrg value |= (uint32_t)alu->src[i].swizzle[o + j] << 8827ec681f3Smrg (4 * j); /* 4 bits per swizzle */ 8837ec681f3Smrg } 8847ec681f3Smrg 8857ec681f3Smrg blob_write_uint32(ctx->blob, value); 8867ec681f3Smrg } 8877ec681f3Smrg } 8887ec681f3Smrg } 88901e04c3fSmrg } 89001e04c3fSmrg} 89101e04c3fSmrg 89201e04c3fSmrgstatic nir_alu_instr * 8937ec681f3Smrgread_alu(read_ctx *ctx, union packed_instr header) 89401e04c3fSmrg{ 8957ec681f3Smrg unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; 8967ec681f3Smrg nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); 89701e04c3fSmrg 8987ec681f3Smrg alu->exact = header.alu.exact; 8997ec681f3Smrg alu->no_signed_wrap = header.alu.no_signed_wrap; 9007ec681f3Smrg alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; 9017ec681f3Smrg alu->dest.saturate = header.alu.saturate; 9027ec681f3Smrg 9037ec681f3Smrg read_dest(ctx, &alu->dest.dest, &alu->instr, header); 9047ec681f3Smrg 9057ec681f3Smrg unsigned dst_components = nir_dest_num_components(alu->dest.dest); 9067ec681f3Smrg 9077ec681f3Smrg if (alu->dest.dest.is_ssa) { 9087ec681f3Smrg alu->dest.write_mask = u_bit_consecutive(0, dst_components); 9097ec681f3Smrg } else if (dst_components <= 4) { 9107ec681f3Smrg alu->dest.write_mask = header.alu.writemask_or_two_swizzles; 9117ec681f3Smrg } else { 9127ec681f3Smrg alu->dest.write_mask = blob_read_uint32(ctx->blob); 9137ec681f3Smrg } 9147ec681f3Smrg 9157ec681f3Smrg if (header.alu.packed_src_ssa_16bit) { 9167ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 9177ec681f3Smrg nir_alu_src *src = &alu->src[i]; 9187ec681f3Smrg src->src.is_ssa = true; 9197ec681f3Smrg src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 9207ec681f3Smrg 9217ec681f3Smrg memset(&src->swizzle, 0, sizeof(src->swizzle)); 9227ec681f3Smrg 9237ec681f3Smrg unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 9247ec681f3Smrg 9257ec681f3Smrg for (unsigned chan = 0; chan < src_components; chan++) 9267ec681f3Smrg src->swizzle[chan] = chan; 9277ec681f3Smrg } 9287ec681f3Smrg } else { 9297ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 9307ec681f3Smrg union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr); 9317ec681f3Smrg unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 9327ec681f3Smrg unsigned src_components = nir_src_num_components(alu->src[i].src); 9337ec681f3Smrg bool packed = src_components <= 4 && src_channels <= 4; 9347ec681f3Smrg 9357ec681f3Smrg alu->src[i].negate = src.alu.negate; 9367ec681f3Smrg alu->src[i].abs = src.alu.abs; 9377ec681f3Smrg 9387ec681f3Smrg memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); 9397ec681f3Smrg 9407ec681f3Smrg if (packed) { 9417ec681f3Smrg alu->src[i].swizzle[0] = src.alu.swizzle_x; 9427ec681f3Smrg alu->src[i].swizzle[1] = src.alu.swizzle_y; 9437ec681f3Smrg alu->src[i].swizzle[2] = src.alu.swizzle_z; 9447ec681f3Smrg alu->src[i].swizzle[3] = src.alu.swizzle_w; 9457ec681f3Smrg } else { 9467ec681f3Smrg /* Load swizzles for vec8 and vec16. */ 9477ec681f3Smrg for (unsigned o = 0; o < src_channels; o += 8) { 9487ec681f3Smrg unsigned value = blob_read_uint32(ctx->blob); 9497ec681f3Smrg 9507ec681f3Smrg for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 9517ec681f3Smrg alu->src[i].swizzle[o + j] = 9527ec681f3Smrg (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ 9537ec681f3Smrg } 9547ec681f3Smrg } 9557ec681f3Smrg } 9567ec681f3Smrg } 9577ec681f3Smrg } 9587ec681f3Smrg 9597ec681f3Smrg if (header.alu.packed_src_ssa_16bit && 9607ec681f3Smrg alu->dest.dest.is_ssa) { 9617ec681f3Smrg alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3; 9627ec681f3Smrg if (num_srcs > 1) 9637ec681f3Smrg alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2; 96401e04c3fSmrg } 96501e04c3fSmrg 96601e04c3fSmrg return alu; 96701e04c3fSmrg} 96801e04c3fSmrg 96901e04c3fSmrgstatic void 97001e04c3fSmrgwrite_deref(write_ctx *ctx, const nir_deref_instr *deref) 97101e04c3fSmrg{ 9727ec681f3Smrg assert(deref->deref_type < 8); 9737ec681f3Smrg assert(deref->modes < (1 << 14)); 9747ec681f3Smrg 9757ec681f3Smrg union packed_instr header; 9767ec681f3Smrg header.u32 = 0; 97701e04c3fSmrg 9787ec681f3Smrg header.deref.instr_type = deref->instr.type; 9797ec681f3Smrg header.deref.deref_type = deref->deref_type; 98001e04c3fSmrg 9817ec681f3Smrg if (deref->deref_type == nir_deref_type_cast) { 9827ec681f3Smrg header.deref.modes = deref->modes; 9837ec681f3Smrg header.deref.cast_type_same_as_last = deref->type == ctx->last_type; 9847ec681f3Smrg } 98501e04c3fSmrg 9867ec681f3Smrg unsigned var_idx = 0; 98701e04c3fSmrg if (deref->deref_type == nir_deref_type_var) { 9887ec681f3Smrg var_idx = write_lookup_object(ctx, deref->var); 9897ec681f3Smrg if (var_idx && var_idx < (1 << 16)) 9907ec681f3Smrg header.deref_var.object_idx = var_idx; 9917ec681f3Smrg } 9927ec681f3Smrg 9937ec681f3Smrg if (deref->deref_type == nir_deref_type_array || 9947ec681f3Smrg deref->deref_type == nir_deref_type_ptr_as_array) { 9957ec681f3Smrg header.deref.packed_src_ssa_16bit = 9967ec681f3Smrg deref->parent.is_ssa && deref->arr.index.is_ssa && 9977ec681f3Smrg are_object_ids_16bit(ctx); 99801e04c3fSmrg } 99901e04c3fSmrg 10007ec681f3Smrg write_dest(ctx, &deref->dest, header, deref->instr.type); 100101e04c3fSmrg 100201e04c3fSmrg switch (deref->deref_type) { 10037ec681f3Smrg case nir_deref_type_var: 10047ec681f3Smrg if (!header.deref_var.object_idx) 10057ec681f3Smrg blob_write_uint32(ctx->blob, var_idx); 10067ec681f3Smrg break; 10077ec681f3Smrg 100801e04c3fSmrg case nir_deref_type_struct: 10097ec681f3Smrg write_src(ctx, &deref->parent); 101001e04c3fSmrg blob_write_uint32(ctx->blob, deref->strct.index); 101101e04c3fSmrg break; 101201e04c3fSmrg 101301e04c3fSmrg case nir_deref_type_array: 10147e102996Smaya case nir_deref_type_ptr_as_array: 10157ec681f3Smrg if (header.deref.packed_src_ssa_16bit) { 10167ec681f3Smrg blob_write_uint16(ctx->blob, 10177ec681f3Smrg write_lookup_object(ctx, deref->parent.ssa)); 10187ec681f3Smrg blob_write_uint16(ctx->blob, 10197ec681f3Smrg write_lookup_object(ctx, deref->arr.index.ssa)); 10207ec681f3Smrg } else { 10217ec681f3Smrg write_src(ctx, &deref->parent); 10227ec681f3Smrg write_src(ctx, &deref->arr.index); 10237ec681f3Smrg } 102401e04c3fSmrg break; 102501e04c3fSmrg 102601e04c3fSmrg case nir_deref_type_cast: 10277ec681f3Smrg write_src(ctx, &deref->parent); 10287e102996Smaya blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 10297ec681f3Smrg blob_write_uint32(ctx->blob, deref->cast.align_mul); 10307ec681f3Smrg blob_write_uint32(ctx->blob, deref->cast.align_offset); 10317ec681f3Smrg if (!header.deref.cast_type_same_as_last) { 10327ec681f3Smrg encode_type_to_blob(ctx->blob, deref->type); 10337ec681f3Smrg ctx->last_type = deref->type; 10347ec681f3Smrg } 10357e102996Smaya break; 10367e102996Smaya 10377e102996Smaya case nir_deref_type_array_wildcard: 10387ec681f3Smrg write_src(ctx, &deref->parent); 103901e04c3fSmrg break; 104001e04c3fSmrg 104101e04c3fSmrg default: 104201e04c3fSmrg unreachable("Invalid deref type"); 104301e04c3fSmrg } 104401e04c3fSmrg} 104501e04c3fSmrg 104601e04c3fSmrgstatic nir_deref_instr * 10477ec681f3Smrgread_deref(read_ctx *ctx, union packed_instr header) 104801e04c3fSmrg{ 10497ec681f3Smrg nir_deref_type deref_type = header.deref.deref_type; 105001e04c3fSmrg nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 105101e04c3fSmrg 10527ec681f3Smrg read_dest(ctx, &deref->dest, &deref->instr, header); 105301e04c3fSmrg 10547ec681f3Smrg nir_deref_instr *parent; 105501e04c3fSmrg 10567ec681f3Smrg switch (deref->deref_type) { 10577ec681f3Smrg case nir_deref_type_var: 10587ec681f3Smrg if (header.deref_var.object_idx) 10597ec681f3Smrg deref->var = read_lookup_object(ctx, header.deref_var.object_idx); 10607ec681f3Smrg else 10617ec681f3Smrg deref->var = read_object(ctx); 106201e04c3fSmrg 10637ec681f3Smrg deref->type = deref->var->type; 10647ec681f3Smrg break; 106501e04c3fSmrg 106601e04c3fSmrg case nir_deref_type_struct: 10677ec681f3Smrg read_src(ctx, &deref->parent, &deref->instr); 10687ec681f3Smrg parent = nir_src_as_deref(deref->parent); 106901e04c3fSmrg deref->strct.index = blob_read_uint32(ctx->blob); 10707ec681f3Smrg deref->type = glsl_get_struct_field(parent->type, deref->strct.index); 107101e04c3fSmrg break; 107201e04c3fSmrg 107301e04c3fSmrg case nir_deref_type_array: 10747e102996Smaya case nir_deref_type_ptr_as_array: 10757ec681f3Smrg if (header.deref.packed_src_ssa_16bit) { 10767ec681f3Smrg deref->parent.is_ssa = true; 10777ec681f3Smrg deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 10787ec681f3Smrg deref->arr.index.is_ssa = true; 10797ec681f3Smrg deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 10807ec681f3Smrg } else { 10817ec681f3Smrg read_src(ctx, &deref->parent, &deref->instr); 10827ec681f3Smrg read_src(ctx, &deref->arr.index, &deref->instr); 10837ec681f3Smrg } 10847ec681f3Smrg 10857ec681f3Smrg parent = nir_src_as_deref(deref->parent); 10867ec681f3Smrg if (deref->deref_type == nir_deref_type_array) 10877ec681f3Smrg deref->type = glsl_get_array_element(parent->type); 10887ec681f3Smrg else 10897ec681f3Smrg deref->type = parent->type; 109001e04c3fSmrg break; 109101e04c3fSmrg 109201e04c3fSmrg case nir_deref_type_cast: 10937ec681f3Smrg read_src(ctx, &deref->parent, &deref->instr); 10947e102996Smaya deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 10957ec681f3Smrg deref->cast.align_mul = blob_read_uint32(ctx->blob); 10967ec681f3Smrg deref->cast.align_offset = blob_read_uint32(ctx->blob); 10977ec681f3Smrg if (header.deref.cast_type_same_as_last) { 10987ec681f3Smrg deref->type = ctx->last_type; 10997ec681f3Smrg } else { 11007ec681f3Smrg deref->type = decode_type_from_blob(ctx->blob); 11017ec681f3Smrg ctx->last_type = deref->type; 11027ec681f3Smrg } 11037e102996Smaya break; 11047e102996Smaya 11057e102996Smaya case nir_deref_type_array_wildcard: 11067ec681f3Smrg read_src(ctx, &deref->parent, &deref->instr); 11077ec681f3Smrg parent = nir_src_as_deref(deref->parent); 11087ec681f3Smrg deref->type = glsl_get_array_element(parent->type); 110901e04c3fSmrg break; 111001e04c3fSmrg 111101e04c3fSmrg default: 111201e04c3fSmrg unreachable("Invalid deref type"); 111301e04c3fSmrg } 111401e04c3fSmrg 11157ec681f3Smrg if (deref_type == nir_deref_type_var) { 11167ec681f3Smrg deref->modes = deref->var->data.mode; 11177ec681f3Smrg } else if (deref->deref_type == nir_deref_type_cast) { 11187ec681f3Smrg deref->modes = header.deref.modes; 11197ec681f3Smrg } else { 11207ec681f3Smrg assert(deref->parent.is_ssa); 11217ec681f3Smrg deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; 11227ec681f3Smrg } 11237ec681f3Smrg 112401e04c3fSmrg return deref; 112501e04c3fSmrg} 112601e04c3fSmrg 112701e04c3fSmrgstatic void 112801e04c3fSmrgwrite_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 112901e04c3fSmrg{ 11307ec681f3Smrg /* 9 bits for nir_intrinsic_op */ 11317ec681f3Smrg STATIC_ASSERT(nir_num_intrinsics <= 512); 113201e04c3fSmrg unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 113301e04c3fSmrg unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 11347ec681f3Smrg assert(intrin->intrinsic < 512); 11357ec681f3Smrg 11367ec681f3Smrg union packed_instr header; 11377ec681f3Smrg header.u32 = 0; 113801e04c3fSmrg 11397ec681f3Smrg header.intrinsic.instr_type = intrin->instr.type; 11407ec681f3Smrg header.intrinsic.intrinsic = intrin->intrinsic; 11417ec681f3Smrg 11427ec681f3Smrg /* Analyze constant indices to decide how to encode them. */ 11437ec681f3Smrg if (num_indices) { 11447ec681f3Smrg unsigned max_bits = 0; 11457ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) { 11467ec681f3Smrg unsigned max = util_last_bit(intrin->const_index[i]); 11477ec681f3Smrg max_bits = MAX2(max_bits, max); 11487ec681f3Smrg } 11497ec681f3Smrg 11507ec681f3Smrg if (max_bits * num_indices <= 9) { 11517ec681f3Smrg header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined; 11527ec681f3Smrg 11537ec681f3Smrg /* Pack all const indices into 6 bits. */ 11547ec681f3Smrg unsigned bit_size = 9 / num_indices; 11557ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) { 11567ec681f3Smrg header.intrinsic.packed_const_indices |= 11577ec681f3Smrg intrin->const_index[i] << (i * bit_size); 11587ec681f3Smrg } 11597ec681f3Smrg } else if (max_bits <= 8) 11607ec681f3Smrg header.intrinsic.const_indices_encoding = const_indices_8bit; 11617ec681f3Smrg else if (max_bits <= 16) 11627ec681f3Smrg header.intrinsic.const_indices_encoding = const_indices_16bit; 11637ec681f3Smrg else 11647ec681f3Smrg header.intrinsic.const_indices_encoding = const_indices_32bit; 11657ec681f3Smrg } 116601e04c3fSmrg 116701e04c3fSmrg if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 11687ec681f3Smrg write_dest(ctx, &intrin->dest, header, intrin->instr.type); 11697ec681f3Smrg else 11707ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 117101e04c3fSmrg 117201e04c3fSmrg for (unsigned i = 0; i < num_srcs; i++) 117301e04c3fSmrg write_src(ctx, &intrin->src[i]); 117401e04c3fSmrg 11757ec681f3Smrg if (num_indices) { 11767ec681f3Smrg switch (header.intrinsic.const_indices_encoding) { 11777ec681f3Smrg case const_indices_8bit: 11787ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 11797ec681f3Smrg blob_write_uint8(ctx->blob, intrin->const_index[i]); 11807ec681f3Smrg break; 11817ec681f3Smrg case const_indices_16bit: 11827ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 11837ec681f3Smrg blob_write_uint16(ctx->blob, intrin->const_index[i]); 11847ec681f3Smrg break; 11857ec681f3Smrg case const_indices_32bit: 11867ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 11877ec681f3Smrg blob_write_uint32(ctx->blob, intrin->const_index[i]); 11887ec681f3Smrg break; 11897ec681f3Smrg } 11907ec681f3Smrg } 119101e04c3fSmrg} 119201e04c3fSmrg 119301e04c3fSmrgstatic nir_intrinsic_instr * 11947ec681f3Smrgread_intrinsic(read_ctx *ctx, union packed_instr header) 119501e04c3fSmrg{ 11967ec681f3Smrg nir_intrinsic_op op = header.intrinsic.intrinsic; 119701e04c3fSmrg nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 119801e04c3fSmrg 119901e04c3fSmrg unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 120001e04c3fSmrg unsigned num_indices = nir_intrinsic_infos[op].num_indices; 120101e04c3fSmrg 120201e04c3fSmrg if (nir_intrinsic_infos[op].has_dest) 12037ec681f3Smrg read_dest(ctx, &intrin->dest, &intrin->instr, header); 120401e04c3fSmrg 120501e04c3fSmrg for (unsigned i = 0; i < num_srcs; i++) 120601e04c3fSmrg read_src(ctx, &intrin->src[i], &intrin->instr); 120701e04c3fSmrg 12087ec681f3Smrg /* Vectorized instrinsics have num_components same as dst or src that has 12097ec681f3Smrg * 0 components in the info. Find it. 12107ec681f3Smrg */ 12117ec681f3Smrg if (nir_intrinsic_infos[op].has_dest && 12127ec681f3Smrg nir_intrinsic_infos[op].dest_components == 0) { 12137ec681f3Smrg intrin->num_components = nir_dest_num_components(intrin->dest); 12147ec681f3Smrg } else { 12157ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 12167ec681f3Smrg if (nir_intrinsic_infos[op].src_components[i] == 0) { 12177ec681f3Smrg intrin->num_components = nir_src_num_components(intrin->src[i]); 12187ec681f3Smrg break; 12197ec681f3Smrg } 12207ec681f3Smrg } 12217ec681f3Smrg } 12227ec681f3Smrg 12237ec681f3Smrg if (num_indices) { 12247ec681f3Smrg switch (header.intrinsic.const_indices_encoding) { 12257ec681f3Smrg case const_indices_9bit_all_combined: { 12267ec681f3Smrg unsigned bit_size = 9 / num_indices; 12277ec681f3Smrg unsigned bit_mask = u_bit_consecutive(0, bit_size); 12287ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) { 12297ec681f3Smrg intrin->const_index[i] = 12307ec681f3Smrg (header.intrinsic.packed_const_indices >> (i * bit_size)) & 12317ec681f3Smrg bit_mask; 12327ec681f3Smrg } 12337ec681f3Smrg break; 12347ec681f3Smrg } 12357ec681f3Smrg case const_indices_8bit: 12367ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 12377ec681f3Smrg intrin->const_index[i] = blob_read_uint8(ctx->blob); 12387ec681f3Smrg break; 12397ec681f3Smrg case const_indices_16bit: 12407ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 12417ec681f3Smrg intrin->const_index[i] = blob_read_uint16(ctx->blob); 12427ec681f3Smrg break; 12437ec681f3Smrg case const_indices_32bit: 12447ec681f3Smrg for (unsigned i = 0; i < num_indices; i++) 12457ec681f3Smrg intrin->const_index[i] = blob_read_uint32(ctx->blob); 12467ec681f3Smrg break; 12477ec681f3Smrg } 12487ec681f3Smrg } 124901e04c3fSmrg 125001e04c3fSmrg return intrin; 125101e04c3fSmrg} 125201e04c3fSmrg 125301e04c3fSmrgstatic void 125401e04c3fSmrgwrite_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 125501e04c3fSmrg{ 12567ec681f3Smrg assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); 12577ec681f3Smrg union packed_instr header; 12587ec681f3Smrg header.u32 = 0; 12597ec681f3Smrg 12607ec681f3Smrg header.load_const.instr_type = lc->instr.type; 12617ec681f3Smrg header.load_const.last_component = lc->def.num_components - 1; 12627ec681f3Smrg header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); 12637ec681f3Smrg header.load_const.packing = load_const_full; 12647ec681f3Smrg 12657ec681f3Smrg /* Try to pack 1-component constants into the 19 free bits in the header. */ 12667ec681f3Smrg if (lc->def.num_components == 1) { 12677ec681f3Smrg switch (lc->def.bit_size) { 12687ec681f3Smrg case 64: 12697ec681f3Smrg if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { 12707ec681f3Smrg /* packed_value contains high 19 bits, low bits are 0 */ 12717ec681f3Smrg header.load_const.packing = load_const_scalar_hi_19bits; 12727ec681f3Smrg header.load_const.packed_value = lc->value[0].u64 >> 45; 12737ec681f3Smrg } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) { 12747ec681f3Smrg /* packed_value contains low 19 bits, high bits are sign-extended */ 12757ec681f3Smrg header.load_const.packing = load_const_scalar_lo_19bits_sext; 12767ec681f3Smrg header.load_const.packed_value = lc->value[0].u64; 12777ec681f3Smrg } 12787ec681f3Smrg break; 12797ec681f3Smrg 12807ec681f3Smrg case 32: 12817ec681f3Smrg if ((lc->value[0].u32 & 0x1fff) == 0) { 12827ec681f3Smrg header.load_const.packing = load_const_scalar_hi_19bits; 12837ec681f3Smrg header.load_const.packed_value = lc->value[0].u32 >> 13; 12847ec681f3Smrg } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) { 12857ec681f3Smrg header.load_const.packing = load_const_scalar_lo_19bits_sext; 12867ec681f3Smrg header.load_const.packed_value = lc->value[0].u32; 12877ec681f3Smrg } 12887ec681f3Smrg break; 12897ec681f3Smrg 12907ec681f3Smrg case 16: 12917ec681f3Smrg header.load_const.packing = load_const_scalar_lo_19bits_sext; 12927ec681f3Smrg header.load_const.packed_value = lc->value[0].u16; 12937ec681f3Smrg break; 12947ec681f3Smrg case 8: 12957ec681f3Smrg header.load_const.packing = load_const_scalar_lo_19bits_sext; 12967ec681f3Smrg header.load_const.packed_value = lc->value[0].u8; 12977ec681f3Smrg break; 12987ec681f3Smrg case 1: 12997ec681f3Smrg header.load_const.packing = load_const_scalar_lo_19bits_sext; 13007ec681f3Smrg header.load_const.packed_value = lc->value[0].b; 13017ec681f3Smrg break; 13027ec681f3Smrg default: 13037ec681f3Smrg unreachable("invalid bit_size"); 13047ec681f3Smrg } 13057ec681f3Smrg } 13067ec681f3Smrg 13077ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 13087ec681f3Smrg 13097ec681f3Smrg if (header.load_const.packing == load_const_full) { 13107ec681f3Smrg switch (lc->def.bit_size) { 13117ec681f3Smrg case 64: 13127ec681f3Smrg blob_write_bytes(ctx->blob, lc->value, 13137ec681f3Smrg sizeof(*lc->value) * lc->def.num_components); 13147ec681f3Smrg break; 13157ec681f3Smrg 13167ec681f3Smrg case 32: 13177ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13187ec681f3Smrg blob_write_uint32(ctx->blob, lc->value[i].u32); 13197ec681f3Smrg break; 13207ec681f3Smrg 13217ec681f3Smrg case 16: 13227ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13237ec681f3Smrg blob_write_uint16(ctx->blob, lc->value[i].u16); 13247ec681f3Smrg break; 13257ec681f3Smrg 13267ec681f3Smrg default: 13277ec681f3Smrg assert(lc->def.bit_size <= 8); 13287ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13297ec681f3Smrg blob_write_uint8(ctx->blob, lc->value[i].u8); 13307ec681f3Smrg break; 13317ec681f3Smrg } 13327ec681f3Smrg } 13337ec681f3Smrg 133401e04c3fSmrg write_add_object(ctx, &lc->def); 133501e04c3fSmrg} 133601e04c3fSmrg 133701e04c3fSmrgstatic nir_load_const_instr * 13387ec681f3Smrgread_load_const(read_ctx *ctx, union packed_instr header) 133901e04c3fSmrg{ 134001e04c3fSmrg nir_load_const_instr *lc = 13417ec681f3Smrg nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, 13427ec681f3Smrg decode_bit_size_3bits(header.load_const.bit_size)); 13437ec681f3Smrg 13447ec681f3Smrg switch (header.load_const.packing) { 13457ec681f3Smrg case load_const_scalar_hi_19bits: 13467ec681f3Smrg switch (lc->def.bit_size) { 13477ec681f3Smrg case 64: 13487ec681f3Smrg lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; 13497ec681f3Smrg break; 13507ec681f3Smrg case 32: 13517ec681f3Smrg lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; 13527ec681f3Smrg break; 13537ec681f3Smrg default: 13547ec681f3Smrg unreachable("invalid bit_size"); 13557ec681f3Smrg } 13567ec681f3Smrg break; 13577ec681f3Smrg 13587ec681f3Smrg case load_const_scalar_lo_19bits_sext: 13597ec681f3Smrg switch (lc->def.bit_size) { 13607ec681f3Smrg case 64: 13617ec681f3Smrg lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45; 13627ec681f3Smrg break; 13637ec681f3Smrg case 32: 13647ec681f3Smrg lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13; 13657ec681f3Smrg break; 13667ec681f3Smrg case 16: 13677ec681f3Smrg lc->value[0].u16 = header.load_const.packed_value; 13687ec681f3Smrg break; 13697ec681f3Smrg case 8: 13707ec681f3Smrg lc->value[0].u8 = header.load_const.packed_value; 13717ec681f3Smrg break; 13727ec681f3Smrg case 1: 13737ec681f3Smrg lc->value[0].b = header.load_const.packed_value; 13747ec681f3Smrg break; 13757ec681f3Smrg default: 13767ec681f3Smrg unreachable("invalid bit_size"); 13777ec681f3Smrg } 13787ec681f3Smrg break; 13797ec681f3Smrg 13807ec681f3Smrg case load_const_full: 13817ec681f3Smrg switch (lc->def.bit_size) { 13827ec681f3Smrg case 64: 13837ec681f3Smrg blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 13847ec681f3Smrg break; 13857ec681f3Smrg 13867ec681f3Smrg case 32: 13877ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13887ec681f3Smrg lc->value[i].u32 = blob_read_uint32(ctx->blob); 13897ec681f3Smrg break; 13907ec681f3Smrg 13917ec681f3Smrg case 16: 13927ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13937ec681f3Smrg lc->value[i].u16 = blob_read_uint16(ctx->blob); 13947ec681f3Smrg break; 13957ec681f3Smrg 13967ec681f3Smrg default: 13977ec681f3Smrg assert(lc->def.bit_size <= 8); 13987ec681f3Smrg for (unsigned i = 0; i < lc->def.num_components; i++) 13997ec681f3Smrg lc->value[i].u8 = blob_read_uint8(ctx->blob); 14007ec681f3Smrg break; 14017ec681f3Smrg } 14027ec681f3Smrg break; 14037ec681f3Smrg } 140401e04c3fSmrg 140501e04c3fSmrg read_add_object(ctx, &lc->def); 140601e04c3fSmrg return lc; 140701e04c3fSmrg} 140801e04c3fSmrg 140901e04c3fSmrgstatic void 141001e04c3fSmrgwrite_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef) 141101e04c3fSmrg{ 14127ec681f3Smrg assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); 14137ec681f3Smrg 14147ec681f3Smrg union packed_instr header; 14157ec681f3Smrg header.u32 = 0; 14167ec681f3Smrg 14177ec681f3Smrg header.undef.instr_type = undef->instr.type; 14187ec681f3Smrg header.undef.last_component = undef->def.num_components - 1; 14197ec681f3Smrg header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); 14207ec681f3Smrg 14217ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 142201e04c3fSmrg write_add_object(ctx, &undef->def); 142301e04c3fSmrg} 142401e04c3fSmrg 142501e04c3fSmrgstatic nir_ssa_undef_instr * 14267ec681f3Smrgread_ssa_undef(read_ctx *ctx, union packed_instr header) 142701e04c3fSmrg{ 142801e04c3fSmrg nir_ssa_undef_instr *undef = 14297ec681f3Smrg nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1, 14307ec681f3Smrg decode_bit_size_3bits(header.undef.bit_size)); 143101e04c3fSmrg 143201e04c3fSmrg read_add_object(ctx, &undef->def); 143301e04c3fSmrg return undef; 143401e04c3fSmrg} 143501e04c3fSmrg 143601e04c3fSmrgunion packed_tex_data { 143701e04c3fSmrg uint32_t u32; 143801e04c3fSmrg struct { 14397ec681f3Smrg unsigned sampler_dim:4; 14407ec681f3Smrg unsigned dest_type:8; 144101e04c3fSmrg unsigned coord_components:3; 144201e04c3fSmrg unsigned is_array:1; 144301e04c3fSmrg unsigned is_shadow:1; 144401e04c3fSmrg unsigned is_new_style_shadow:1; 14457ec681f3Smrg unsigned is_sparse:1; 144601e04c3fSmrg unsigned component:2; 14477ec681f3Smrg unsigned texture_non_uniform:1; 14487ec681f3Smrg unsigned sampler_non_uniform:1; 14497ec681f3Smrg unsigned array_is_lowered_cube:1; 14507ec681f3Smrg unsigned unused:6; /* Mark unused for valgrind. */ 145101e04c3fSmrg } u; 145201e04c3fSmrg}; 145301e04c3fSmrg 145401e04c3fSmrgstatic void 145501e04c3fSmrgwrite_tex(write_ctx *ctx, const nir_tex_instr *tex) 145601e04c3fSmrg{ 14577ec681f3Smrg assert(tex->num_srcs < 16); 14587ec681f3Smrg assert(tex->op < 16); 14597ec681f3Smrg 14607ec681f3Smrg union packed_instr header; 14617ec681f3Smrg header.u32 = 0; 14627ec681f3Smrg 14637ec681f3Smrg header.tex.instr_type = tex->instr.type; 14647ec681f3Smrg header.tex.num_srcs = tex->num_srcs; 14657ec681f3Smrg header.tex.op = tex->op; 14667ec681f3Smrg 14677ec681f3Smrg write_dest(ctx, &tex->dest, header, tex->instr.type); 14687ec681f3Smrg 146901e04c3fSmrg blob_write_uint32(ctx->blob, tex->texture_index); 147001e04c3fSmrg blob_write_uint32(ctx->blob, tex->sampler_index); 14717ec681f3Smrg if (tex->op == nir_texop_tg4) 14727ec681f3Smrg blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 147301e04c3fSmrg 147401e04c3fSmrg STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 147501e04c3fSmrg union packed_tex_data packed = { 147601e04c3fSmrg .u.sampler_dim = tex->sampler_dim, 147701e04c3fSmrg .u.dest_type = tex->dest_type, 147801e04c3fSmrg .u.coord_components = tex->coord_components, 147901e04c3fSmrg .u.is_array = tex->is_array, 148001e04c3fSmrg .u.is_shadow = tex->is_shadow, 148101e04c3fSmrg .u.is_new_style_shadow = tex->is_new_style_shadow, 14827ec681f3Smrg .u.is_sparse = tex->is_sparse, 148301e04c3fSmrg .u.component = tex->component, 14847ec681f3Smrg .u.texture_non_uniform = tex->texture_non_uniform, 14857ec681f3Smrg .u.sampler_non_uniform = tex->sampler_non_uniform, 14867ec681f3Smrg .u.array_is_lowered_cube = tex->array_is_lowered_cube, 148701e04c3fSmrg }; 148801e04c3fSmrg blob_write_uint32(ctx->blob, packed.u32); 148901e04c3fSmrg 149001e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 14917ec681f3Smrg union packed_src src; 14927ec681f3Smrg src.u32 = 0; 14937ec681f3Smrg src.tex.src_type = tex->src[i].src_type; 14947ec681f3Smrg write_src_full(ctx, &tex->src[i].src, src); 149501e04c3fSmrg } 149601e04c3fSmrg} 149701e04c3fSmrg 149801e04c3fSmrgstatic nir_tex_instr * 14997ec681f3Smrgread_tex(read_ctx *ctx, union packed_instr header) 150001e04c3fSmrg{ 15017ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); 15027ec681f3Smrg 15037ec681f3Smrg read_dest(ctx, &tex->dest, &tex->instr, header); 150401e04c3fSmrg 15057ec681f3Smrg tex->op = header.tex.op; 150601e04c3fSmrg tex->texture_index = blob_read_uint32(ctx->blob); 150701e04c3fSmrg tex->sampler_index = blob_read_uint32(ctx->blob); 15087ec681f3Smrg if (tex->op == nir_texop_tg4) 15097ec681f3Smrg blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 151001e04c3fSmrg 151101e04c3fSmrg union packed_tex_data packed; 151201e04c3fSmrg packed.u32 = blob_read_uint32(ctx->blob); 151301e04c3fSmrg tex->sampler_dim = packed.u.sampler_dim; 151401e04c3fSmrg tex->dest_type = packed.u.dest_type; 151501e04c3fSmrg tex->coord_components = packed.u.coord_components; 151601e04c3fSmrg tex->is_array = packed.u.is_array; 151701e04c3fSmrg tex->is_shadow = packed.u.is_shadow; 151801e04c3fSmrg tex->is_new_style_shadow = packed.u.is_new_style_shadow; 15197ec681f3Smrg tex->is_sparse = packed.u.is_sparse; 152001e04c3fSmrg tex->component = packed.u.component; 15217ec681f3Smrg tex->texture_non_uniform = packed.u.texture_non_uniform; 15227ec681f3Smrg tex->sampler_non_uniform = packed.u.sampler_non_uniform; 15237ec681f3Smrg tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; 152401e04c3fSmrg 152501e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 15267ec681f3Smrg union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr); 15277ec681f3Smrg tex->src[i].src_type = src.tex.src_type; 152801e04c3fSmrg } 152901e04c3fSmrg 153001e04c3fSmrg return tex; 153101e04c3fSmrg} 153201e04c3fSmrg 153301e04c3fSmrgstatic void 153401e04c3fSmrgwrite_phi(write_ctx *ctx, const nir_phi_instr *phi) 153501e04c3fSmrg{ 15367ec681f3Smrg union packed_instr header; 15377ec681f3Smrg header.u32 = 0; 15387ec681f3Smrg 15397ec681f3Smrg header.phi.instr_type = phi->instr.type; 15407ec681f3Smrg header.phi.num_srcs = exec_list_length(&phi->srcs); 15417ec681f3Smrg 154201e04c3fSmrg /* Phi nodes are special, since they may reference SSA definitions and 15437ec681f3Smrg * basic blocks that don't exist yet. We leave two empty uint32_t's here, 154401e04c3fSmrg * and then store enough information so that a later fixup pass can fill 154501e04c3fSmrg * them in correctly. 154601e04c3fSmrg */ 15477ec681f3Smrg write_dest(ctx, &phi->dest, header, phi->instr.type); 154801e04c3fSmrg 154901e04c3fSmrg nir_foreach_phi_src(src, phi) { 155001e04c3fSmrg assert(src->src.is_ssa); 15517ec681f3Smrg size_t blob_offset = blob_reserve_uint32(ctx->blob); 15527ec681f3Smrg ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); 15537ec681f3Smrg assert(blob_offset + sizeof(uint32_t) == blob_offset2); 155401e04c3fSmrg write_phi_fixup fixup = { 155501e04c3fSmrg .blob_offset = blob_offset, 155601e04c3fSmrg .src = src->src.ssa, 155701e04c3fSmrg .block = src->pred, 155801e04c3fSmrg }; 155901e04c3fSmrg util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 156001e04c3fSmrg } 156101e04c3fSmrg} 156201e04c3fSmrg 156301e04c3fSmrgstatic void 156401e04c3fSmrgwrite_fixup_phis(write_ctx *ctx) 156501e04c3fSmrg{ 156601e04c3fSmrg util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 15677ec681f3Smrg uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset); 156801e04c3fSmrg blob_ptr[0] = write_lookup_object(ctx, fixup->src); 156901e04c3fSmrg blob_ptr[1] = write_lookup_object(ctx, fixup->block); 157001e04c3fSmrg } 157101e04c3fSmrg 157201e04c3fSmrg util_dynarray_clear(&ctx->phi_fixups); 157301e04c3fSmrg} 157401e04c3fSmrg 157501e04c3fSmrgstatic nir_phi_instr * 15767ec681f3Smrgread_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) 157701e04c3fSmrg{ 157801e04c3fSmrg nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 157901e04c3fSmrg 15807ec681f3Smrg read_dest(ctx, &phi->dest, &phi->instr, header); 158101e04c3fSmrg 158201e04c3fSmrg /* For similar reasons as before, we just store the index directly into the 158301e04c3fSmrg * pointer, and let a later pass resolve the phi sources. 158401e04c3fSmrg * 158501e04c3fSmrg * In order to ensure that the copied sources (which are just the indices 158601e04c3fSmrg * from the blob for now) don't get inserted into the old shader's use-def 158701e04c3fSmrg * lists, we have to add the phi instruction *before* we set up its 158801e04c3fSmrg * sources. 158901e04c3fSmrg */ 159001e04c3fSmrg nir_instr_insert_after_block(blk, &phi->instr); 159101e04c3fSmrg 15927ec681f3Smrg for (unsigned i = 0; i < header.phi.num_srcs; i++) { 15937ec681f3Smrg nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob); 15947ec681f3Smrg nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob); 15957ec681f3Smrg nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def)); 159601e04c3fSmrg 159701e04c3fSmrg /* Since we're not letting nir_insert_instr handle use/def stuff for us, 159801e04c3fSmrg * we have to set the parent_instr manually. It doesn't really matter 159901e04c3fSmrg * when we do it, so we might as well do it here. 160001e04c3fSmrg */ 160101e04c3fSmrg src->src.parent_instr = &phi->instr; 160201e04c3fSmrg 160301e04c3fSmrg /* Stash it in the list of phi sources. We'll walk this list and fix up 160401e04c3fSmrg * sources at the very end of read_function_impl. 160501e04c3fSmrg */ 160601e04c3fSmrg list_add(&src->src.use_link, &ctx->phi_srcs); 160701e04c3fSmrg } 160801e04c3fSmrg 160901e04c3fSmrg return phi; 161001e04c3fSmrg} 161101e04c3fSmrg 161201e04c3fSmrgstatic void 161301e04c3fSmrgread_fixup_phis(read_ctx *ctx) 161401e04c3fSmrg{ 161501e04c3fSmrg list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 161601e04c3fSmrg src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 161701e04c3fSmrg src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 161801e04c3fSmrg 161901e04c3fSmrg /* Remove from this list */ 162001e04c3fSmrg list_del(&src->src.use_link); 162101e04c3fSmrg 162201e04c3fSmrg list_addtail(&src->src.use_link, &src->src.ssa->uses); 162301e04c3fSmrg } 16247ec681f3Smrg assert(list_is_empty(&ctx->phi_srcs)); 162501e04c3fSmrg} 162601e04c3fSmrg 162701e04c3fSmrgstatic void 162801e04c3fSmrgwrite_jump(write_ctx *ctx, const nir_jump_instr *jmp) 162901e04c3fSmrg{ 16307ec681f3Smrg /* These aren't handled because they require special block linking */ 16317ec681f3Smrg assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); 16327ec681f3Smrg 16337ec681f3Smrg assert(jmp->type < 4); 16347ec681f3Smrg 16357ec681f3Smrg union packed_instr header; 16367ec681f3Smrg header.u32 = 0; 16377ec681f3Smrg 16387ec681f3Smrg header.jump.instr_type = jmp->instr.type; 16397ec681f3Smrg header.jump.type = jmp->type; 16407ec681f3Smrg 16417ec681f3Smrg blob_write_uint32(ctx->blob, header.u32); 164201e04c3fSmrg} 164301e04c3fSmrg 164401e04c3fSmrgstatic nir_jump_instr * 16457ec681f3Smrgread_jump(read_ctx *ctx, union packed_instr header) 164601e04c3fSmrg{ 16477ec681f3Smrg /* These aren't handled because they require special block linking */ 16487ec681f3Smrg assert(header.jump.type != nir_jump_goto && 16497ec681f3Smrg header.jump.type != nir_jump_goto_if); 16507ec681f3Smrg 16517ec681f3Smrg nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); 165201e04c3fSmrg return jmp; 165301e04c3fSmrg} 165401e04c3fSmrg 165501e04c3fSmrgstatic void 165601e04c3fSmrgwrite_call(write_ctx *ctx, const nir_call_instr *call) 165701e04c3fSmrg{ 16587ec681f3Smrg blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); 165901e04c3fSmrg 166001e04c3fSmrg for (unsigned i = 0; i < call->num_params; i++) 166101e04c3fSmrg write_src(ctx, &call->params[i]); 166201e04c3fSmrg} 166301e04c3fSmrg 166401e04c3fSmrgstatic nir_call_instr * 166501e04c3fSmrgread_call(read_ctx *ctx) 166601e04c3fSmrg{ 166701e04c3fSmrg nir_function *callee = read_object(ctx); 166801e04c3fSmrg nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 166901e04c3fSmrg 167001e04c3fSmrg for (unsigned i = 0; i < call->num_params; i++) 167101e04c3fSmrg read_src(ctx, &call->params[i], call); 167201e04c3fSmrg 167301e04c3fSmrg return call; 167401e04c3fSmrg} 167501e04c3fSmrg 167601e04c3fSmrgstatic void 167701e04c3fSmrgwrite_instr(write_ctx *ctx, const nir_instr *instr) 167801e04c3fSmrg{ 16797ec681f3Smrg /* We have only 4 bits for the instruction type. */ 16807ec681f3Smrg assert(instr->type < 16); 16817ec681f3Smrg 168201e04c3fSmrg switch (instr->type) { 168301e04c3fSmrg case nir_instr_type_alu: 168401e04c3fSmrg write_alu(ctx, nir_instr_as_alu(instr)); 168501e04c3fSmrg break; 168601e04c3fSmrg case nir_instr_type_deref: 168701e04c3fSmrg write_deref(ctx, nir_instr_as_deref(instr)); 168801e04c3fSmrg break; 168901e04c3fSmrg case nir_instr_type_intrinsic: 169001e04c3fSmrg write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 169101e04c3fSmrg break; 169201e04c3fSmrg case nir_instr_type_load_const: 169301e04c3fSmrg write_load_const(ctx, nir_instr_as_load_const(instr)); 169401e04c3fSmrg break; 169501e04c3fSmrg case nir_instr_type_ssa_undef: 169601e04c3fSmrg write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 169701e04c3fSmrg break; 169801e04c3fSmrg case nir_instr_type_tex: 169901e04c3fSmrg write_tex(ctx, nir_instr_as_tex(instr)); 170001e04c3fSmrg break; 170101e04c3fSmrg case nir_instr_type_phi: 170201e04c3fSmrg write_phi(ctx, nir_instr_as_phi(instr)); 170301e04c3fSmrg break; 170401e04c3fSmrg case nir_instr_type_jump: 170501e04c3fSmrg write_jump(ctx, nir_instr_as_jump(instr)); 170601e04c3fSmrg break; 170701e04c3fSmrg case nir_instr_type_call: 17087ec681f3Smrg blob_write_uint32(ctx->blob, instr->type); 170901e04c3fSmrg write_call(ctx, nir_instr_as_call(instr)); 171001e04c3fSmrg break; 171101e04c3fSmrg case nir_instr_type_parallel_copy: 171201e04c3fSmrg unreachable("Cannot write parallel copies"); 171301e04c3fSmrg default: 171401e04c3fSmrg unreachable("bad instr type"); 171501e04c3fSmrg } 171601e04c3fSmrg} 171701e04c3fSmrg 17187ec681f3Smrg/* Return the number of instructions read. */ 17197ec681f3Smrgstatic unsigned 172001e04c3fSmrgread_instr(read_ctx *ctx, nir_block *block) 172101e04c3fSmrg{ 17227ec681f3Smrg STATIC_ASSERT(sizeof(union packed_instr) == 4); 17237ec681f3Smrg union packed_instr header; 17247ec681f3Smrg header.u32 = blob_read_uint32(ctx->blob); 172501e04c3fSmrg nir_instr *instr; 17267ec681f3Smrg 17277ec681f3Smrg switch (header.any.instr_type) { 172801e04c3fSmrg case nir_instr_type_alu: 17297ec681f3Smrg for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) 17307ec681f3Smrg nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); 17317ec681f3Smrg return header.alu.num_followup_alu_sharing_header + 1; 173201e04c3fSmrg case nir_instr_type_deref: 17337ec681f3Smrg instr = &read_deref(ctx, header)->instr; 173401e04c3fSmrg break; 173501e04c3fSmrg case nir_instr_type_intrinsic: 17367ec681f3Smrg instr = &read_intrinsic(ctx, header)->instr; 173701e04c3fSmrg break; 173801e04c3fSmrg case nir_instr_type_load_const: 17397ec681f3Smrg instr = &read_load_const(ctx, header)->instr; 174001e04c3fSmrg break; 174101e04c3fSmrg case nir_instr_type_ssa_undef: 17427ec681f3Smrg instr = &read_ssa_undef(ctx, header)->instr; 174301e04c3fSmrg break; 174401e04c3fSmrg case nir_instr_type_tex: 17457ec681f3Smrg instr = &read_tex(ctx, header)->instr; 174601e04c3fSmrg break; 174701e04c3fSmrg case nir_instr_type_phi: 174801e04c3fSmrg /* Phi instructions are a bit of a special case when reading because we 174901e04c3fSmrg * don't want inserting the instruction to automatically handle use/defs 175001e04c3fSmrg * for us. Instead, we need to wait until all the blocks/instructions 175101e04c3fSmrg * are read so that we can set their sources up. 175201e04c3fSmrg */ 17537ec681f3Smrg read_phi(ctx, block, header); 17547ec681f3Smrg return 1; 175501e04c3fSmrg case nir_instr_type_jump: 17567ec681f3Smrg instr = &read_jump(ctx, header)->instr; 175701e04c3fSmrg break; 175801e04c3fSmrg case nir_instr_type_call: 175901e04c3fSmrg instr = &read_call(ctx)->instr; 176001e04c3fSmrg break; 176101e04c3fSmrg case nir_instr_type_parallel_copy: 176201e04c3fSmrg unreachable("Cannot read parallel copies"); 176301e04c3fSmrg default: 176401e04c3fSmrg unreachable("bad instr type"); 176501e04c3fSmrg } 176601e04c3fSmrg 176701e04c3fSmrg nir_instr_insert_after_block(block, instr); 17687ec681f3Smrg return 1; 176901e04c3fSmrg} 177001e04c3fSmrg 177101e04c3fSmrgstatic void 177201e04c3fSmrgwrite_block(write_ctx *ctx, const nir_block *block) 177301e04c3fSmrg{ 177401e04c3fSmrg write_add_object(ctx, block); 177501e04c3fSmrg blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 17767ec681f3Smrg 17777ec681f3Smrg ctx->last_instr_type = ~0; 17787ec681f3Smrg ctx->last_alu_header_offset = 0; 17797ec681f3Smrg 17807ec681f3Smrg nir_foreach_instr(instr, block) { 178101e04c3fSmrg write_instr(ctx, instr); 17827ec681f3Smrg ctx->last_instr_type = instr->type; 17837ec681f3Smrg } 178401e04c3fSmrg} 178501e04c3fSmrg 178601e04c3fSmrgstatic void 178701e04c3fSmrgread_block(read_ctx *ctx, struct exec_list *cf_list) 178801e04c3fSmrg{ 178901e04c3fSmrg /* Don't actually create a new block. Just use the one from the tail of 179001e04c3fSmrg * the list. NIR guarantees that the tail of the list is a block and that 179101e04c3fSmrg * no two blocks are side-by-side in the IR; It should be empty. 179201e04c3fSmrg */ 179301e04c3fSmrg nir_block *block = 179401e04c3fSmrg exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 179501e04c3fSmrg 179601e04c3fSmrg read_add_object(ctx, block); 179701e04c3fSmrg unsigned num_instrs = blob_read_uint32(ctx->blob); 17987ec681f3Smrg for (unsigned i = 0; i < num_instrs;) { 17997ec681f3Smrg i += read_instr(ctx, block); 180001e04c3fSmrg } 180101e04c3fSmrg} 180201e04c3fSmrg 180301e04c3fSmrgstatic void 180401e04c3fSmrgwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 180501e04c3fSmrg 180601e04c3fSmrgstatic void 180701e04c3fSmrgread_cf_list(read_ctx *ctx, struct exec_list *cf_list); 180801e04c3fSmrg 180901e04c3fSmrgstatic void 181001e04c3fSmrgwrite_if(write_ctx *ctx, nir_if *nif) 181101e04c3fSmrg{ 181201e04c3fSmrg write_src(ctx, &nif->condition); 18137ec681f3Smrg blob_write_uint8(ctx->blob, nif->control); 181401e04c3fSmrg 181501e04c3fSmrg write_cf_list(ctx, &nif->then_list); 181601e04c3fSmrg write_cf_list(ctx, &nif->else_list); 181701e04c3fSmrg} 181801e04c3fSmrg 181901e04c3fSmrgstatic void 182001e04c3fSmrgread_if(read_ctx *ctx, struct exec_list *cf_list) 182101e04c3fSmrg{ 182201e04c3fSmrg nir_if *nif = nir_if_create(ctx->nir); 182301e04c3fSmrg 182401e04c3fSmrg read_src(ctx, &nif->condition, nif); 18257ec681f3Smrg nif->control = blob_read_uint8(ctx->blob); 182601e04c3fSmrg 182701e04c3fSmrg nir_cf_node_insert_end(cf_list, &nif->cf_node); 182801e04c3fSmrg 182901e04c3fSmrg read_cf_list(ctx, &nif->then_list); 183001e04c3fSmrg read_cf_list(ctx, &nif->else_list); 183101e04c3fSmrg} 183201e04c3fSmrg 183301e04c3fSmrgstatic void 183401e04c3fSmrgwrite_loop(write_ctx *ctx, nir_loop *loop) 183501e04c3fSmrg{ 18367ec681f3Smrg blob_write_uint8(ctx->blob, loop->control); 183701e04c3fSmrg write_cf_list(ctx, &loop->body); 183801e04c3fSmrg} 183901e04c3fSmrg 184001e04c3fSmrgstatic void 184101e04c3fSmrgread_loop(read_ctx *ctx, struct exec_list *cf_list) 184201e04c3fSmrg{ 184301e04c3fSmrg nir_loop *loop = nir_loop_create(ctx->nir); 184401e04c3fSmrg 184501e04c3fSmrg nir_cf_node_insert_end(cf_list, &loop->cf_node); 184601e04c3fSmrg 18477ec681f3Smrg loop->control = blob_read_uint8(ctx->blob); 184801e04c3fSmrg read_cf_list(ctx, &loop->body); 184901e04c3fSmrg} 185001e04c3fSmrg 185101e04c3fSmrgstatic void 185201e04c3fSmrgwrite_cf_node(write_ctx *ctx, nir_cf_node *cf) 185301e04c3fSmrg{ 185401e04c3fSmrg blob_write_uint32(ctx->blob, cf->type); 185501e04c3fSmrg 185601e04c3fSmrg switch (cf->type) { 185701e04c3fSmrg case nir_cf_node_block: 185801e04c3fSmrg write_block(ctx, nir_cf_node_as_block(cf)); 185901e04c3fSmrg break; 186001e04c3fSmrg case nir_cf_node_if: 186101e04c3fSmrg write_if(ctx, nir_cf_node_as_if(cf)); 186201e04c3fSmrg break; 186301e04c3fSmrg case nir_cf_node_loop: 186401e04c3fSmrg write_loop(ctx, nir_cf_node_as_loop(cf)); 186501e04c3fSmrg break; 186601e04c3fSmrg default: 186701e04c3fSmrg unreachable("bad cf type"); 186801e04c3fSmrg } 186901e04c3fSmrg} 187001e04c3fSmrg 187101e04c3fSmrgstatic void 187201e04c3fSmrgread_cf_node(read_ctx *ctx, struct exec_list *list) 187301e04c3fSmrg{ 187401e04c3fSmrg nir_cf_node_type type = blob_read_uint32(ctx->blob); 187501e04c3fSmrg 187601e04c3fSmrg switch (type) { 187701e04c3fSmrg case nir_cf_node_block: 187801e04c3fSmrg read_block(ctx, list); 187901e04c3fSmrg break; 188001e04c3fSmrg case nir_cf_node_if: 188101e04c3fSmrg read_if(ctx, list); 188201e04c3fSmrg break; 188301e04c3fSmrg case nir_cf_node_loop: 188401e04c3fSmrg read_loop(ctx, list); 188501e04c3fSmrg break; 188601e04c3fSmrg default: 188701e04c3fSmrg unreachable("bad cf type"); 188801e04c3fSmrg } 188901e04c3fSmrg} 189001e04c3fSmrg 189101e04c3fSmrgstatic void 189201e04c3fSmrgwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 189301e04c3fSmrg{ 189401e04c3fSmrg blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 189501e04c3fSmrg foreach_list_typed(nir_cf_node, cf, node, cf_list) { 189601e04c3fSmrg write_cf_node(ctx, cf); 189701e04c3fSmrg } 189801e04c3fSmrg} 189901e04c3fSmrg 190001e04c3fSmrgstatic void 190101e04c3fSmrgread_cf_list(read_ctx *ctx, struct exec_list *cf_list) 190201e04c3fSmrg{ 190301e04c3fSmrg uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 190401e04c3fSmrg for (unsigned i = 0; i < num_cf_nodes; i++) 190501e04c3fSmrg read_cf_node(ctx, cf_list); 190601e04c3fSmrg} 190701e04c3fSmrg 190801e04c3fSmrgstatic void 190901e04c3fSmrgwrite_function_impl(write_ctx *ctx, const nir_function_impl *fi) 191001e04c3fSmrg{ 19117ec681f3Smrg blob_write_uint8(ctx->blob, fi->structured); 19127ec681f3Smrg 191301e04c3fSmrg write_var_list(ctx, &fi->locals); 191401e04c3fSmrg write_reg_list(ctx, &fi->registers); 191501e04c3fSmrg blob_write_uint32(ctx->blob, fi->reg_alloc); 191601e04c3fSmrg 191701e04c3fSmrg write_cf_list(ctx, &fi->body); 191801e04c3fSmrg write_fixup_phis(ctx); 191901e04c3fSmrg} 192001e04c3fSmrg 192101e04c3fSmrgstatic nir_function_impl * 192201e04c3fSmrgread_function_impl(read_ctx *ctx, nir_function *fxn) 192301e04c3fSmrg{ 192401e04c3fSmrg nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 192501e04c3fSmrg fi->function = fxn; 192601e04c3fSmrg 19277ec681f3Smrg fi->structured = blob_read_uint8(ctx->blob); 19287ec681f3Smrg 192901e04c3fSmrg read_var_list(ctx, &fi->locals); 193001e04c3fSmrg read_reg_list(ctx, &fi->registers); 193101e04c3fSmrg fi->reg_alloc = blob_read_uint32(ctx->blob); 193201e04c3fSmrg 193301e04c3fSmrg read_cf_list(ctx, &fi->body); 193401e04c3fSmrg read_fixup_phis(ctx); 193501e04c3fSmrg 193601e04c3fSmrg fi->valid_metadata = 0; 193701e04c3fSmrg 193801e04c3fSmrg return fi; 193901e04c3fSmrg} 194001e04c3fSmrg 194101e04c3fSmrgstatic void 194201e04c3fSmrgwrite_function(write_ctx *ctx, const nir_function *fxn) 194301e04c3fSmrg{ 19447ec681f3Smrg uint32_t flags = fxn->is_entrypoint; 19457ec681f3Smrg if (fxn->name) 19467ec681f3Smrg flags |= 0x2; 19477ec681f3Smrg if (fxn->impl) 19487ec681f3Smrg flags |= 0x4; 19497ec681f3Smrg blob_write_uint32(ctx->blob, flags); 195001e04c3fSmrg if (fxn->name) 195101e04c3fSmrg blob_write_string(ctx->blob, fxn->name); 195201e04c3fSmrg 195301e04c3fSmrg write_add_object(ctx, fxn); 195401e04c3fSmrg 195501e04c3fSmrg blob_write_uint32(ctx->blob, fxn->num_params); 195601e04c3fSmrg for (unsigned i = 0; i < fxn->num_params; i++) { 195701e04c3fSmrg uint32_t val = 195801e04c3fSmrg ((uint32_t)fxn->params[i].num_components) | 195901e04c3fSmrg ((uint32_t)fxn->params[i].bit_size) << 8; 196001e04c3fSmrg blob_write_uint32(ctx->blob, val); 196101e04c3fSmrg } 196201e04c3fSmrg 196301e04c3fSmrg /* At first glance, it looks like we should write the function_impl here. 196401e04c3fSmrg * However, call instructions need to be able to reference at least the 196501e04c3fSmrg * function and those will get processed as we write the function_impls. 196601e04c3fSmrg * We stop here and write function_impls as a second pass. 196701e04c3fSmrg */ 196801e04c3fSmrg} 196901e04c3fSmrg 197001e04c3fSmrgstatic void 197101e04c3fSmrgread_function(read_ctx *ctx) 197201e04c3fSmrg{ 19737ec681f3Smrg uint32_t flags = blob_read_uint32(ctx->blob); 19747ec681f3Smrg bool has_name = flags & 0x2; 197501e04c3fSmrg char *name = has_name ? blob_read_string(ctx->blob) : NULL; 197601e04c3fSmrg 197701e04c3fSmrg nir_function *fxn = nir_function_create(ctx->nir, name); 197801e04c3fSmrg 197901e04c3fSmrg read_add_object(ctx, fxn); 198001e04c3fSmrg 198101e04c3fSmrg fxn->num_params = blob_read_uint32(ctx->blob); 198201e04c3fSmrg fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params); 198301e04c3fSmrg for (unsigned i = 0; i < fxn->num_params; i++) { 198401e04c3fSmrg uint32_t val = blob_read_uint32(ctx->blob); 198501e04c3fSmrg fxn->params[i].num_components = val & 0xff; 198601e04c3fSmrg fxn->params[i].bit_size = (val >> 8) & 0xff; 198701e04c3fSmrg } 19887e102996Smaya 19897ec681f3Smrg fxn->is_entrypoint = flags & 0x1; 19907ec681f3Smrg if (flags & 0x4) 19917ec681f3Smrg fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; 199201e04c3fSmrg} 199301e04c3fSmrg 19947ec681f3Smrg/** 19957ec681f3Smrg * Serialize NIR into a binary blob. 19967ec681f3Smrg * 19977ec681f3Smrg * \param strip Don't serialize information only useful for debugging, 19987ec681f3Smrg * such as variable names, making cache hits from similar 19997ec681f3Smrg * shaders more likely. 20007ec681f3Smrg */ 200101e04c3fSmrgvoid 20027ec681f3Smrgnir_serialize(struct blob *blob, const nir_shader *nir, bool strip) 200301e04c3fSmrg{ 20047ec681f3Smrg write_ctx ctx = {0}; 20057e102996Smaya ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 200601e04c3fSmrg ctx.blob = blob; 200701e04c3fSmrg ctx.nir = nir; 20087ec681f3Smrg ctx.strip = strip; 200901e04c3fSmrg util_dynarray_init(&ctx.phi_fixups, NULL); 201001e04c3fSmrg 20117ec681f3Smrg size_t idx_size_offset = blob_reserve_uint32(blob); 201201e04c3fSmrg 201301e04c3fSmrg struct shader_info info = nir->info; 201401e04c3fSmrg uint32_t strings = 0; 20157ec681f3Smrg if (!strip && info.name) 201601e04c3fSmrg strings |= 0x1; 20177ec681f3Smrg if (!strip && info.label) 201801e04c3fSmrg strings |= 0x2; 201901e04c3fSmrg blob_write_uint32(blob, strings); 20207ec681f3Smrg if (!strip && info.name) 202101e04c3fSmrg blob_write_string(blob, info.name); 20227ec681f3Smrg if (!strip && info.label) 202301e04c3fSmrg blob_write_string(blob, info.label); 202401e04c3fSmrg info.name = info.label = NULL; 202501e04c3fSmrg blob_write_bytes(blob, (uint8_t *) &info, sizeof(info)); 202601e04c3fSmrg 20277ec681f3Smrg write_var_list(&ctx, &nir->variables); 202801e04c3fSmrg 202901e04c3fSmrg blob_write_uint32(blob, nir->num_inputs); 203001e04c3fSmrg blob_write_uint32(blob, nir->num_uniforms); 203101e04c3fSmrg blob_write_uint32(blob, nir->num_outputs); 20327e102996Smaya blob_write_uint32(blob, nir->scratch_size); 203301e04c3fSmrg 203401e04c3fSmrg blob_write_uint32(blob, exec_list_length(&nir->functions)); 203501e04c3fSmrg nir_foreach_function(fxn, nir) { 203601e04c3fSmrg write_function(&ctx, fxn); 203701e04c3fSmrg } 203801e04c3fSmrg 203901e04c3fSmrg nir_foreach_function(fxn, nir) { 20407ec681f3Smrg if (fxn->impl) 20417ec681f3Smrg write_function_impl(&ctx, fxn->impl); 204201e04c3fSmrg } 204301e04c3fSmrg 204401e04c3fSmrg blob_write_uint32(blob, nir->constant_data_size); 204501e04c3fSmrg if (nir->constant_data_size > 0) 204601e04c3fSmrg blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 204701e04c3fSmrg 20487ec681f3Smrg *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx; 204901e04c3fSmrg 205001e04c3fSmrg _mesa_hash_table_destroy(ctx.remap_table, NULL); 205101e04c3fSmrg util_dynarray_fini(&ctx.phi_fixups); 205201e04c3fSmrg} 205301e04c3fSmrg 205401e04c3fSmrgnir_shader * 205501e04c3fSmrgnir_deserialize(void *mem_ctx, 205601e04c3fSmrg const struct nir_shader_compiler_options *options, 205701e04c3fSmrg struct blob_reader *blob) 205801e04c3fSmrg{ 20597ec681f3Smrg read_ctx ctx = {0}; 206001e04c3fSmrg ctx.blob = blob; 206101e04c3fSmrg list_inithead(&ctx.phi_srcs); 20627ec681f3Smrg ctx.idx_table_len = blob_read_uint32(blob); 206301e04c3fSmrg ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 206401e04c3fSmrg 206501e04c3fSmrg uint32_t strings = blob_read_uint32(blob); 206601e04c3fSmrg char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 206701e04c3fSmrg char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 206801e04c3fSmrg 206901e04c3fSmrg struct shader_info info; 207001e04c3fSmrg blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info)); 207101e04c3fSmrg 207201e04c3fSmrg ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 207301e04c3fSmrg 207401e04c3fSmrg info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 207501e04c3fSmrg info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 207601e04c3fSmrg 207701e04c3fSmrg ctx.nir->info = info; 207801e04c3fSmrg 20797ec681f3Smrg read_var_list(&ctx, &ctx.nir->variables); 208001e04c3fSmrg 208101e04c3fSmrg ctx.nir->num_inputs = blob_read_uint32(blob); 208201e04c3fSmrg ctx.nir->num_uniforms = blob_read_uint32(blob); 208301e04c3fSmrg ctx.nir->num_outputs = blob_read_uint32(blob); 20847e102996Smaya ctx.nir->scratch_size = blob_read_uint32(blob); 208501e04c3fSmrg 208601e04c3fSmrg unsigned num_functions = blob_read_uint32(blob); 208701e04c3fSmrg for (unsigned i = 0; i < num_functions; i++) 208801e04c3fSmrg read_function(&ctx); 208901e04c3fSmrg 20907ec681f3Smrg nir_foreach_function(fxn, ctx.nir) { 20917ec681f3Smrg if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) 20927ec681f3Smrg fxn->impl = read_function_impl(&ctx, fxn); 20937ec681f3Smrg } 209401e04c3fSmrg 209501e04c3fSmrg ctx.nir->constant_data_size = blob_read_uint32(blob); 209601e04c3fSmrg if (ctx.nir->constant_data_size > 0) { 209701e04c3fSmrg ctx.nir->constant_data = 209801e04c3fSmrg ralloc_size(ctx.nir, ctx.nir->constant_data_size); 209901e04c3fSmrg blob_copy_bytes(blob, ctx.nir->constant_data, 210001e04c3fSmrg ctx.nir->constant_data_size); 210101e04c3fSmrg } 210201e04c3fSmrg 210301e04c3fSmrg free(ctx.idx_table); 210401e04c3fSmrg 21057ec681f3Smrg nir_validate_shader(ctx.nir, "after deserialize"); 21067ec681f3Smrg 210701e04c3fSmrg return ctx.nir; 210801e04c3fSmrg} 210901e04c3fSmrg 21107ec681f3Smrgvoid 21117ec681f3Smrgnir_shader_serialize_deserialize(nir_shader *shader) 211201e04c3fSmrg{ 21137ec681f3Smrg const struct nir_shader_compiler_options *options = shader->options; 211401e04c3fSmrg 211501e04c3fSmrg struct blob writer; 211601e04c3fSmrg blob_init(&writer); 21177ec681f3Smrg nir_serialize(&writer, shader, false); 21187ec681f3Smrg 21197ec681f3Smrg /* Delete all of dest's ralloc children but leave dest alone */ 21207ec681f3Smrg void *dead_ctx = ralloc_context(NULL); 21217ec681f3Smrg ralloc_adopt(dead_ctx, shader); 21227ec681f3Smrg ralloc_free(dead_ctx); 21237ec681f3Smrg 21247ec681f3Smrg dead_ctx = ralloc_context(NULL); 212501e04c3fSmrg 212601e04c3fSmrg struct blob_reader reader; 212701e04c3fSmrg blob_reader_init(&reader, writer.data, writer.size); 21287ec681f3Smrg nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); 212901e04c3fSmrg 213001e04c3fSmrg blob_finish(&writer); 213101e04c3fSmrg 21327ec681f3Smrg nir_shader_replace(shader, copy); 21337ec681f3Smrg ralloc_free(dead_ctx); 213401e04c3fSmrg} 2135