1af69d88dSmrg/********************************************************** 2af69d88dSmrg * Copyright 2008-2012 VMware, Inc. All rights reserved. 3af69d88dSmrg * 4af69d88dSmrg * Permission is hereby granted, free of charge, to any person 5af69d88dSmrg * obtaining a copy of this software and associated documentation 6af69d88dSmrg * files (the "Software"), to deal in the Software without 7af69d88dSmrg * restriction, including without limitation the rights to use, copy, 8af69d88dSmrg * modify, merge, publish, distribute, sublicense, and/or sell copies 9af69d88dSmrg * of the Software, and to permit persons to whom the Software is 10af69d88dSmrg * furnished to do so, subject to the following conditions: 11af69d88dSmrg * 12af69d88dSmrg * The above copyright notice and this permission notice shall be 13af69d88dSmrg * included in all copies or substantial portions of the Software. 14af69d88dSmrg * 15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16af69d88dSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18af69d88dSmrg * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19af69d88dSmrg * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20af69d88dSmrg * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21af69d88dSmrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22af69d88dSmrg * SOFTWARE. 23af69d88dSmrg * 24af69d88dSmrg **********************************************************/ 25af69d88dSmrg 26af69d88dSmrg#include "util/u_bitmask.h" 27af69d88dSmrg#include "util/u_memory.h" 287ec681f3Smrg#include "util/format/u_format.h" 29af69d88dSmrg#include "svga_context.h" 30af69d88dSmrg#include "svga_cmd.h" 3101e04c3fSmrg#include "svga_format.h" 32af69d88dSmrg#include "svga_shader.h" 3301e04c3fSmrg#include "svga_resource_texture.h" 347ec681f3Smrg#include "VGPU10ShaderTokens.h" 35af69d88dSmrg 36af69d88dSmrg 3701e04c3fSmrg/** 3801e04c3fSmrg * This bit isn't really used anywhere. It only serves to help 3901e04c3fSmrg * generate a unique "signature" for the vertex shader output bitmask. 4001e04c3fSmrg * Shader input/output signatures are used to resolve shader linking 4101e04c3fSmrg * issues. 4201e04c3fSmrg */ 4301e04c3fSmrg#define FOG_GENERIC_BIT (((uint64_t) 1) << 63) 4401e04c3fSmrg 4501e04c3fSmrg 4601e04c3fSmrg/** 4701e04c3fSmrg * Use the shader info to generate a bitmask indicating which generic 4801e04c3fSmrg * inputs are used by the shader. A set bit indicates that GENERIC[i] 4901e04c3fSmrg * is used. 5001e04c3fSmrg */ 5101e04c3fSmrguint64_t 5201e04c3fSmrgsvga_get_generic_inputs_mask(const struct tgsi_shader_info *info) 5301e04c3fSmrg{ 5401e04c3fSmrg unsigned i; 5501e04c3fSmrg uint64_t mask = 0x0; 5601e04c3fSmrg 5701e04c3fSmrg for (i = 0; i < info->num_inputs; i++) { 5801e04c3fSmrg if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { 5901e04c3fSmrg unsigned j = info->input_semantic_index[i]; 6001e04c3fSmrg assert(j < sizeof(mask) * 8); 6101e04c3fSmrg mask |= ((uint64_t) 1) << j; 6201e04c3fSmrg } 6301e04c3fSmrg } 6401e04c3fSmrg 6501e04c3fSmrg return mask; 6601e04c3fSmrg} 6701e04c3fSmrg 6801e04c3fSmrg 6901e04c3fSmrg/** 7001e04c3fSmrg * Scan shader info to return a bitmask of written outputs. 7101e04c3fSmrg */ 7201e04c3fSmrguint64_t 7301e04c3fSmrgsvga_get_generic_outputs_mask(const struct tgsi_shader_info *info) 7401e04c3fSmrg{ 7501e04c3fSmrg unsigned i; 7601e04c3fSmrg uint64_t mask = 0x0; 7701e04c3fSmrg 7801e04c3fSmrg for (i = 0; i < info->num_outputs; i++) { 7901e04c3fSmrg switch (info->output_semantic_name[i]) { 8001e04c3fSmrg case TGSI_SEMANTIC_GENERIC: 8101e04c3fSmrg { 8201e04c3fSmrg unsigned j = info->output_semantic_index[i]; 8301e04c3fSmrg assert(j < sizeof(mask) * 8); 8401e04c3fSmrg mask |= ((uint64_t) 1) << j; 8501e04c3fSmrg } 8601e04c3fSmrg break; 8701e04c3fSmrg case TGSI_SEMANTIC_FOG: 8801e04c3fSmrg mask |= FOG_GENERIC_BIT; 8901e04c3fSmrg break; 9001e04c3fSmrg } 9101e04c3fSmrg } 9201e04c3fSmrg 9301e04c3fSmrg return mask; 9401e04c3fSmrg} 9501e04c3fSmrg 9601e04c3fSmrg 9701e04c3fSmrg 9801e04c3fSmrg/** 9901e04c3fSmrg * Given a mask of used generic variables (as returned by the above functions) 10001e04c3fSmrg * fill in a table which maps those indexes to small integers. 10101e04c3fSmrg * This table is used by the remap_generic_index() function in 10201e04c3fSmrg * svga_tgsi_decl_sm30.c 10301e04c3fSmrg * Example: if generics_mask = binary(1010) it means that GENERIC[1] and 10401e04c3fSmrg * GENERIC[3] are used. The remap_table will contain: 10501e04c3fSmrg * table[1] = 0; 10601e04c3fSmrg * table[3] = 1; 10701e04c3fSmrg * The remaining table entries will be filled in with the next unused 10801e04c3fSmrg * generic index (in this example, 2). 10901e04c3fSmrg */ 11001e04c3fSmrgvoid 11101e04c3fSmrgsvga_remap_generics(uint64_t generics_mask, 11201e04c3fSmrg int8_t remap_table[MAX_GENERIC_VARYING]) 11301e04c3fSmrg{ 11401e04c3fSmrg /* Note texcoord[0] is reserved so start at 1 */ 11501e04c3fSmrg unsigned count = 1, i; 11601e04c3fSmrg 11701e04c3fSmrg for (i = 0; i < MAX_GENERIC_VARYING; i++) { 11801e04c3fSmrg remap_table[i] = -1; 11901e04c3fSmrg } 12001e04c3fSmrg 12101e04c3fSmrg /* for each bit set in generic_mask */ 12201e04c3fSmrg while (generics_mask) { 12301e04c3fSmrg unsigned index = ffsll(generics_mask) - 1; 12401e04c3fSmrg remap_table[index] = count++; 12501e04c3fSmrg generics_mask &= ~((uint64_t) 1 << index); 12601e04c3fSmrg } 12701e04c3fSmrg} 12801e04c3fSmrg 12901e04c3fSmrg 13001e04c3fSmrg/** 13101e04c3fSmrg * Use the generic remap table to map a TGSI generic varying variable 13201e04c3fSmrg * index to a small integer. If the remapping table doesn't have a 13301e04c3fSmrg * valid value for the given index (the table entry is -1) it means 13401e04c3fSmrg * the fragment shader doesn't use that VS output. Just allocate 13501e04c3fSmrg * the next free value in that case. Alternately, we could cull 13601e04c3fSmrg * VS instructions that write to register, or replace the register 13701e04c3fSmrg * with a dummy temp register. 13801e04c3fSmrg * XXX TODO: we should do one of the later as it would save precious 13901e04c3fSmrg * texcoord registers. 14001e04c3fSmrg */ 14101e04c3fSmrgint 14201e04c3fSmrgsvga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], 14301e04c3fSmrg int generic_index) 14401e04c3fSmrg{ 14501e04c3fSmrg assert(generic_index < MAX_GENERIC_VARYING); 14601e04c3fSmrg 14701e04c3fSmrg if (generic_index >= MAX_GENERIC_VARYING) { 14801e04c3fSmrg /* just don't return a random/garbage value */ 14901e04c3fSmrg generic_index = MAX_GENERIC_VARYING - 1; 15001e04c3fSmrg } 15101e04c3fSmrg 15201e04c3fSmrg if (remap_table[generic_index] == -1) { 15301e04c3fSmrg /* This is a VS output that has no matching PS input. Find a 15401e04c3fSmrg * free index. 15501e04c3fSmrg */ 15601e04c3fSmrg int i, max = 0; 15701e04c3fSmrg for (i = 0; i < MAX_GENERIC_VARYING; i++) { 15801e04c3fSmrg max = MAX2(max, remap_table[i]); 15901e04c3fSmrg } 16001e04c3fSmrg remap_table[generic_index] = max + 1; 16101e04c3fSmrg } 16201e04c3fSmrg 16301e04c3fSmrg return remap_table[generic_index]; 16401e04c3fSmrg} 16501e04c3fSmrg 16601e04c3fSmrgstatic const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = { 16701e04c3fSmrg PIPE_SWIZZLE_X, 16801e04c3fSmrg PIPE_SWIZZLE_Y, 16901e04c3fSmrg PIPE_SWIZZLE_Z, 17001e04c3fSmrg PIPE_SWIZZLE_W, 17101e04c3fSmrg PIPE_SWIZZLE_0, 17201e04c3fSmrg PIPE_SWIZZLE_1, 17301e04c3fSmrg PIPE_SWIZZLE_NONE 17401e04c3fSmrg}; 17501e04c3fSmrg 17601e04c3fSmrgstatic const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = { 17701e04c3fSmrg PIPE_SWIZZLE_X, 17801e04c3fSmrg PIPE_SWIZZLE_Y, 17901e04c3fSmrg PIPE_SWIZZLE_Z, 18001e04c3fSmrg PIPE_SWIZZLE_1, 18101e04c3fSmrg PIPE_SWIZZLE_0, 18201e04c3fSmrg PIPE_SWIZZLE_1, 18301e04c3fSmrg PIPE_SWIZZLE_NONE 18401e04c3fSmrg}; 18501e04c3fSmrg 18601e04c3fSmrgstatic const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = { 18701e04c3fSmrg PIPE_SWIZZLE_0, 18801e04c3fSmrg PIPE_SWIZZLE_0, 18901e04c3fSmrg PIPE_SWIZZLE_0, 19001e04c3fSmrg PIPE_SWIZZLE_X, 19101e04c3fSmrg PIPE_SWIZZLE_0, 19201e04c3fSmrg PIPE_SWIZZLE_1, 19301e04c3fSmrg PIPE_SWIZZLE_NONE 19401e04c3fSmrg}; 19501e04c3fSmrg 19601e04c3fSmrgstatic const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = { 19701e04c3fSmrg PIPE_SWIZZLE_X, 19801e04c3fSmrg PIPE_SWIZZLE_X, 19901e04c3fSmrg PIPE_SWIZZLE_X, 20001e04c3fSmrg PIPE_SWIZZLE_X, 20101e04c3fSmrg PIPE_SWIZZLE_0, 20201e04c3fSmrg PIPE_SWIZZLE_1, 20301e04c3fSmrg PIPE_SWIZZLE_NONE 20401e04c3fSmrg}; 20501e04c3fSmrg 20601e04c3fSmrgstatic const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = { 20701e04c3fSmrg PIPE_SWIZZLE_X, 20801e04c3fSmrg PIPE_SWIZZLE_X, 20901e04c3fSmrg PIPE_SWIZZLE_X, 21001e04c3fSmrg PIPE_SWIZZLE_1, 21101e04c3fSmrg PIPE_SWIZZLE_0, 21201e04c3fSmrg PIPE_SWIZZLE_1, 21301e04c3fSmrg PIPE_SWIZZLE_NONE 21401e04c3fSmrg}; 21501e04c3fSmrg 21601e04c3fSmrgstatic const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = { 21701e04c3fSmrg PIPE_SWIZZLE_X, 21801e04c3fSmrg PIPE_SWIZZLE_X, 21901e04c3fSmrg PIPE_SWIZZLE_X, 22001e04c3fSmrg PIPE_SWIZZLE_Y, 22101e04c3fSmrg PIPE_SWIZZLE_0, 22201e04c3fSmrg PIPE_SWIZZLE_1, 22301e04c3fSmrg PIPE_SWIZZLE_NONE 22401e04c3fSmrg}; 22501e04c3fSmrg 22601e04c3fSmrg 2277ec681f3Smrgstatic VGPU10_RESOURCE_RETURN_TYPE 2287ec681f3Smrgvgpu10_return_type(enum pipe_format format) 2297ec681f3Smrg{ 2307ec681f3Smrg if (util_format_is_unorm(format)) 2317ec681f3Smrg return VGPU10_RETURN_TYPE_UNORM; 2327ec681f3Smrg else if (util_format_is_snorm(format)) 2337ec681f3Smrg return VGPU10_RETURN_TYPE_SNORM; 2347ec681f3Smrg else if (util_format_is_pure_uint(format)) 2357ec681f3Smrg return VGPU10_RETURN_TYPE_UINT; 2367ec681f3Smrg else if (util_format_is_pure_sint(format)) 2377ec681f3Smrg return VGPU10_RETURN_TYPE_SINT; 2387ec681f3Smrg else if (util_format_is_float(format)) 2397ec681f3Smrg return VGPU10_RETURN_TYPE_FLOAT; 2407ec681f3Smrg else 2417ec681f3Smrg return VGPU10_RETURN_TYPE_MAX; 2427ec681f3Smrg} 2437ec681f3Smrg 2447ec681f3Smrg 24501e04c3fSmrg/** 24601e04c3fSmrg * Initialize the shader-neutral fields of svga_compile_key from context 24701e04c3fSmrg * state. This is basically the texture-related state. 24801e04c3fSmrg */ 24901e04c3fSmrgvoid 25001e04c3fSmrgsvga_init_shader_key_common(const struct svga_context *svga, 2517ec681f3Smrg enum pipe_shader_type shader_type, 2527ec681f3Smrg const struct svga_shader *shader, 25301e04c3fSmrg struct svga_compile_key *key) 25401e04c3fSmrg{ 25501e04c3fSmrg unsigned i, idx = 0; 25601e04c3fSmrg 2577ec681f3Smrg assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views)); 25801e04c3fSmrg 25901e04c3fSmrg /* In case the number of samplers and sampler_views doesn't match, 26001e04c3fSmrg * loop over the lower of the two counts. 26101e04c3fSmrg */ 2627ec681f3Smrg key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type], 2637ec681f3Smrg svga->curr.num_samplers[shader_type]); 26401e04c3fSmrg 26501e04c3fSmrg for (i = 0; i < key->num_textures; i++) { 2667ec681f3Smrg struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i]; 2677ec681f3Smrg const struct svga_sampler_state 2687ec681f3Smrg *sampler = svga->curr.sampler[shader_type][i]; 2697ec681f3Smrg 27001e04c3fSmrg if (view) { 27101e04c3fSmrg assert(view->texture); 27201e04c3fSmrg assert(view->texture->target < (1 << 4)); /* texture_target:4 */ 27301e04c3fSmrg 2747ec681f3Smrg enum pipe_texture_target target = view->target; 2757ec681f3Smrg 2767ec681f3Smrg key->tex[i].target = target; 2777ec681f3Smrg key->tex[i].sampler_return_type = vgpu10_return_type(view->format); 2787ec681f3Smrg key->tex[i].sampler_view = 1; 2797ec681f3Smrg 2807ec681f3Smrg 28101e04c3fSmrg /* 1D/2D array textures with one slice and cube map array textures 28201e04c3fSmrg * with one cube are treated as non-arrays by the SVGA3D device. 28301e04c3fSmrg * Set the is_array flag only if we know that we have more than 1 28401e04c3fSmrg * element. This will be used to select shader instruction/resource 28501e04c3fSmrg * types during shader translation. 28601e04c3fSmrg */ 28701e04c3fSmrg switch (view->texture->target) { 28801e04c3fSmrg case PIPE_TEXTURE_1D_ARRAY: 28901e04c3fSmrg case PIPE_TEXTURE_2D_ARRAY: 29001e04c3fSmrg key->tex[i].is_array = view->texture->array_size > 1; 29101e04c3fSmrg break; 29201e04c3fSmrg case PIPE_TEXTURE_CUBE_ARRAY: 29301e04c3fSmrg key->tex[i].is_array = view->texture->array_size > 6; 29401e04c3fSmrg break; 29501e04c3fSmrg default: 29601e04c3fSmrg ; /* nothing / silence compiler warning */ 29701e04c3fSmrg } 29801e04c3fSmrg 29901e04c3fSmrg assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */ 30001e04c3fSmrg key->tex[i].num_samples = view->texture->nr_samples; 30101e04c3fSmrg 30201e04c3fSmrg const enum pipe_swizzle *swizzle_tab; 30301e04c3fSmrg if (view->texture->target == PIPE_BUFFER) { 30401e04c3fSmrg SVGA3dSurfaceFormat svga_format; 30501e04c3fSmrg unsigned tf_flags; 30601e04c3fSmrg 30701e04c3fSmrg /* Apply any special swizzle mask for the view format if needed */ 30801e04c3fSmrg 30901e04c3fSmrg svga_translate_texture_buffer_view_format(view->format, 31001e04c3fSmrg &svga_format, &tf_flags); 31101e04c3fSmrg if (tf_flags & TF_000X) 31201e04c3fSmrg swizzle_tab = set_000X; 31301e04c3fSmrg else if (tf_flags & TF_XXXX) 31401e04c3fSmrg swizzle_tab = set_XXXX; 31501e04c3fSmrg else if (tf_flags & TF_XXX1) 31601e04c3fSmrg swizzle_tab = set_XXX1; 31701e04c3fSmrg else if (tf_flags & TF_XXXY) 31801e04c3fSmrg swizzle_tab = set_XXXY; 31901e04c3fSmrg else 32001e04c3fSmrg swizzle_tab = copy_alpha; 32101e04c3fSmrg } 32201e04c3fSmrg else { 32301e04c3fSmrg /* If we have a non-alpha view into an svga3d surface with an 32401e04c3fSmrg * alpha channel, then explicitly set the alpha channel to 1 32501e04c3fSmrg * when sampling. Note that we need to check the 32601e04c3fSmrg * actual device format to cover also imported surface cases. 32701e04c3fSmrg */ 32801e04c3fSmrg swizzle_tab = 32901e04c3fSmrg (!util_format_has_alpha(view->format) && 33001e04c3fSmrg svga_texture_device_format_has_alpha(view->texture)) ? 33101e04c3fSmrg set_alpha : copy_alpha; 33201e04c3fSmrg 33301e04c3fSmrg if (view->texture->format == PIPE_FORMAT_DXT1_RGB || 33401e04c3fSmrg view->texture->format == PIPE_FORMAT_DXT1_SRGB) 33501e04c3fSmrg swizzle_tab = set_alpha; 3367ec681f3Smrg 3377ec681f3Smrg /* Save the compare function as we need to handle 3387ec681f3Smrg * depth compare in the shader. 3397ec681f3Smrg */ 3407ec681f3Smrg key->tex[i].compare_mode = sampler->compare_mode; 3417ec681f3Smrg key->tex[i].compare_func = sampler->compare_func; 34201e04c3fSmrg } 34301e04c3fSmrg 34401e04c3fSmrg key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r]; 34501e04c3fSmrg key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g]; 34601e04c3fSmrg key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b]; 34701e04c3fSmrg key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a]; 34801e04c3fSmrg } 3497ec681f3Smrg else { 3507ec681f3Smrg key->tex[i].sampler_view = 0; 3517ec681f3Smrg } 35201e04c3fSmrg 35301e04c3fSmrg if (sampler) { 35401e04c3fSmrg if (!sampler->normalized_coords) { 3557ec681f3Smrg if (view) { 3567ec681f3Smrg assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ 3577ec681f3Smrg key->tex[i].width_height_idx = idx++; 3587ec681f3Smrg } 35901e04c3fSmrg key->tex[i].unnormalized = TRUE; 36001e04c3fSmrg ++key->num_unnormalized_coords; 36101e04c3fSmrg 36201e04c3fSmrg if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST || 36301e04c3fSmrg sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) { 36401e04c3fSmrg key->tex[i].texel_bias = TRUE; 36501e04c3fSmrg } 36601e04c3fSmrg } 36701e04c3fSmrg } 36801e04c3fSmrg } 3697ec681f3Smrg 3707ec681f3Smrg key->clamp_vertex_color = svga->curr.rast ? 3717ec681f3Smrg svga->curr.rast->templ.clamp_vertex_color : 0; 37201e04c3fSmrg} 37301e04c3fSmrg 37401e04c3fSmrg 37501e04c3fSmrg/** Search for a compiled shader variant with the same compile key */ 37601e04c3fSmrgstruct svga_shader_variant * 37701e04c3fSmrgsvga_search_shader_key(const struct svga_shader *shader, 37801e04c3fSmrg const struct svga_compile_key *key) 37901e04c3fSmrg{ 38001e04c3fSmrg struct svga_shader_variant *variant = shader->variants; 38101e04c3fSmrg 38201e04c3fSmrg assert(key); 38301e04c3fSmrg 38401e04c3fSmrg for ( ; variant; variant = variant->next) { 38501e04c3fSmrg if (svga_compile_keys_equal(key, &variant->key)) 38601e04c3fSmrg return variant; 38701e04c3fSmrg } 38801e04c3fSmrg return NULL; 38901e04c3fSmrg} 39001e04c3fSmrg 39101e04c3fSmrg/** Search for a shader with the same token key */ 39201e04c3fSmrgstruct svga_shader * 39301e04c3fSmrgsvga_search_shader_token_key(struct svga_shader *pshader, 39401e04c3fSmrg const struct svga_token_key *key) 39501e04c3fSmrg{ 39601e04c3fSmrg struct svga_shader *shader = pshader; 39701e04c3fSmrg 39801e04c3fSmrg assert(key); 39901e04c3fSmrg 40001e04c3fSmrg for ( ; shader; shader = shader->next) { 40101e04c3fSmrg if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0) 40201e04c3fSmrg return shader; 40301e04c3fSmrg } 40401e04c3fSmrg return NULL; 40501e04c3fSmrg} 40601e04c3fSmrg 40701e04c3fSmrg/** 40801e04c3fSmrg * Helper function to define a gb shader for non-vgpu10 device 40901e04c3fSmrg */ 41001e04c3fSmrgstatic enum pipe_error 41101e04c3fSmrgdefine_gb_shader_vgpu9(struct svga_context *svga, 41201e04c3fSmrg struct svga_shader_variant *variant, 41301e04c3fSmrg unsigned codeLen) 41401e04c3fSmrg{ 41501e04c3fSmrg struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 41601e04c3fSmrg enum pipe_error ret; 41701e04c3fSmrg 41801e04c3fSmrg /** 41901e04c3fSmrg * Create gb memory for the shader and upload the shader code. 42001e04c3fSmrg * Kernel module will allocate an id for the shader and issue 42101e04c3fSmrg * the DefineGBShader command. 42201e04c3fSmrg */ 4239f464c52Smaya variant->gb_shader = sws->shader_create(sws, variant->type, 42401e04c3fSmrg variant->tokens, codeLen); 42501e04c3fSmrg 4267ec681f3Smrg svga->hud.shader_mem_used += codeLen; 4277ec681f3Smrg 42801e04c3fSmrg if (!variant->gb_shader) 42901e04c3fSmrg return PIPE_ERROR_OUT_OF_MEMORY; 43001e04c3fSmrg 43101e04c3fSmrg ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); 43201e04c3fSmrg 43301e04c3fSmrg return ret; 43401e04c3fSmrg} 43501e04c3fSmrg 43601e04c3fSmrg/** 43701e04c3fSmrg * Helper function to define a gb shader for vgpu10 device 43801e04c3fSmrg */ 43901e04c3fSmrgstatic enum pipe_error 44001e04c3fSmrgdefine_gb_shader_vgpu10(struct svga_context *svga, 44101e04c3fSmrg struct svga_shader_variant *variant, 44201e04c3fSmrg unsigned codeLen) 44301e04c3fSmrg{ 44401e04c3fSmrg struct svga_winsys_context *swc = svga->swc; 44501e04c3fSmrg enum pipe_error ret; 4467ec681f3Smrg unsigned len = codeLen + variant->signatureLen; 44701e04c3fSmrg 44801e04c3fSmrg /** 44901e04c3fSmrg * Shaders in VGPU10 enabled device reside in the device COTable. 45001e04c3fSmrg * SVGA driver will allocate an integer ID for the shader and 45101e04c3fSmrg * issue DXDefineShader and DXBindShader commands. 45201e04c3fSmrg */ 45301e04c3fSmrg variant->id = util_bitmask_add(svga->shader_id_bm); 45401e04c3fSmrg if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 45501e04c3fSmrg return PIPE_ERROR_OUT_OF_MEMORY; 45601e04c3fSmrg } 45701e04c3fSmrg 45801e04c3fSmrg /* Create gb memory for the shader and upload the shader code */ 45901e04c3fSmrg variant->gb_shader = swc->shader_create(swc, 4609f464c52Smaya variant->id, variant->type, 4617ec681f3Smrg variant->tokens, codeLen, 4627ec681f3Smrg variant->signature, 4637ec681f3Smrg variant->signatureLen); 4647ec681f3Smrg 4657ec681f3Smrg svga->hud.shader_mem_used += len; 46601e04c3fSmrg 46701e04c3fSmrg if (!variant->gb_shader) { 46801e04c3fSmrg /* Free the shader ID */ 46901e04c3fSmrg assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 47001e04c3fSmrg goto fail_no_allocation; 47101e04c3fSmrg } 47201e04c3fSmrg 47301e04c3fSmrg /** 47401e04c3fSmrg * Since we don't want to do any flush within state emission to avoid 47501e04c3fSmrg * partial state in a command buffer, it's important to make sure that 47601e04c3fSmrg * there is enough room to send both the DXDefineShader & DXBindShader 47701e04c3fSmrg * commands in the same command buffer. So let's send both 47801e04c3fSmrg * commands in one command reservation. If it fails, we'll undo 47901e04c3fSmrg * the shader creation and return an error. 48001e04c3fSmrg */ 48101e04c3fSmrg ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader, 4827ec681f3Smrg variant->id, variant->type, 4837ec681f3Smrg len); 48401e04c3fSmrg 48501e04c3fSmrg if (ret != PIPE_OK) 48601e04c3fSmrg goto fail; 48701e04c3fSmrg 48801e04c3fSmrg return PIPE_OK; 48901e04c3fSmrg 49001e04c3fSmrgfail: 49101e04c3fSmrg swc->shader_destroy(swc, variant->gb_shader); 49201e04c3fSmrg variant->gb_shader = NULL; 49301e04c3fSmrg 49401e04c3fSmrgfail_no_allocation: 49501e04c3fSmrg util_bitmask_clear(svga->shader_id_bm, variant->id); 49601e04c3fSmrg variant->id = UTIL_BITMASK_INVALID_INDEX; 49701e04c3fSmrg 49801e04c3fSmrg return PIPE_ERROR_OUT_OF_MEMORY; 49901e04c3fSmrg} 500af69d88dSmrg 501af69d88dSmrg/** 502af69d88dSmrg * Issue the SVGA3D commands to define a new shader. 50301e04c3fSmrg * \param variant contains the shader tokens, etc. The result->id field will 50401e04c3fSmrg * be set here. 505af69d88dSmrg */ 506af69d88dSmrgenum pipe_error 507af69d88dSmrgsvga_define_shader(struct svga_context *svga, 508af69d88dSmrg struct svga_shader_variant *variant) 509af69d88dSmrg{ 510af69d88dSmrg unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]); 51101e04c3fSmrg enum pipe_error ret; 512af69d88dSmrg 51301e04c3fSmrg SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER); 514af69d88dSmrg 51501e04c3fSmrg variant->id = UTIL_BITMASK_INVALID_INDEX; 516af69d88dSmrg 51701e04c3fSmrg if (svga_have_gb_objects(svga)) { 51801e04c3fSmrg if (svga_have_vgpu10(svga)) 5199f464c52Smaya ret = define_gb_shader_vgpu10(svga, variant, codeLen); 52001e04c3fSmrg else 5219f464c52Smaya ret = define_gb_shader_vgpu9(svga, variant, codeLen); 522af69d88dSmrg } 523af69d88dSmrg else { 524af69d88dSmrg /* Allocate an integer ID for the shader */ 525af69d88dSmrg variant->id = util_bitmask_add(svga->shader_id_bm); 526af69d88dSmrg if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 52701e04c3fSmrg ret = PIPE_ERROR_OUT_OF_MEMORY; 52801e04c3fSmrg goto done; 529af69d88dSmrg } 530af69d88dSmrg 531af69d88dSmrg /* Issue SVGA3D device command to define the shader */ 532af69d88dSmrg ret = SVGA3D_DefineShader(svga->swc, 533af69d88dSmrg variant->id, 5349f464c52Smaya variant->type, 535af69d88dSmrg variant->tokens, 536af69d88dSmrg codeLen); 537af69d88dSmrg if (ret != PIPE_OK) { 538af69d88dSmrg /* free the ID */ 539af69d88dSmrg assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 540af69d88dSmrg util_bitmask_clear(svga->shader_id_bm, variant->id); 541af69d88dSmrg variant->id = UTIL_BITMASK_INVALID_INDEX; 542af69d88dSmrg } 543af69d88dSmrg } 544af69d88dSmrg 54501e04c3fSmrgdone: 54601e04c3fSmrg SVGA_STATS_TIME_POP(svga_sws(svga)); 54701e04c3fSmrg return ret; 548af69d88dSmrg} 549af69d88dSmrg 550af69d88dSmrg 55101e04c3fSmrg/** 55201e04c3fSmrg * Issue the SVGA3D commands to set/bind a shader. 55301e04c3fSmrg * \param result the shader to bind. 55401e04c3fSmrg */ 555af69d88dSmrgenum pipe_error 55601e04c3fSmrgsvga_set_shader(struct svga_context *svga, 55701e04c3fSmrg SVGA3dShaderType type, 55801e04c3fSmrg struct svga_shader_variant *variant) 559af69d88dSmrg{ 56001e04c3fSmrg enum pipe_error ret; 56101e04c3fSmrg unsigned id = variant ? variant->id : SVGA3D_INVALID_ID; 56201e04c3fSmrg 56301e04c3fSmrg assert(type == SVGA3D_SHADERTYPE_VS || 56401e04c3fSmrg type == SVGA3D_SHADERTYPE_GS || 5657ec681f3Smrg type == SVGA3D_SHADERTYPE_PS || 5667ec681f3Smrg type == SVGA3D_SHADERTYPE_HS || 5677ec681f3Smrg type == SVGA3D_SHADERTYPE_DS || 5687ec681f3Smrg type == SVGA3D_SHADERTYPE_CS); 569af69d88dSmrg 570af69d88dSmrg if (svga_have_gb_objects(svga)) { 57101e04c3fSmrg struct svga_winsys_gb_shader *gbshader = 57201e04c3fSmrg variant ? variant->gb_shader : NULL; 573af69d88dSmrg 57401e04c3fSmrg if (svga_have_vgpu10(svga)) 57501e04c3fSmrg ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id); 57601e04c3fSmrg else 57701e04c3fSmrg ret = SVGA3D_SetGBShader(svga->swc, type, gbshader); 57801e04c3fSmrg } 57901e04c3fSmrg else { 58001e04c3fSmrg ret = SVGA3D_SetShader(svga->swc, type, id); 581af69d88dSmrg } 582af69d88dSmrg 58301e04c3fSmrg return ret; 58401e04c3fSmrg} 58501e04c3fSmrg 586af69d88dSmrg 58701e04c3fSmrgstruct svga_shader_variant * 5889f464c52Smayasvga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type) 58901e04c3fSmrg{ 5907ec681f3Smrg struct svga_shader_variant *variant; 5917ec681f3Smrg 5927ec681f3Smrg switch (type) { 5937ec681f3Smrg case PIPE_SHADER_FRAGMENT: 5947ec681f3Smrg variant = CALLOC(1, sizeof(struct svga_fs_variant)); 5957ec681f3Smrg break; 5967ec681f3Smrg case PIPE_SHADER_GEOMETRY: 5977ec681f3Smrg variant = CALLOC(1, sizeof(struct svga_gs_variant)); 5987ec681f3Smrg break; 5997ec681f3Smrg case PIPE_SHADER_VERTEX: 6007ec681f3Smrg variant = CALLOC(1, sizeof(struct svga_vs_variant)); 6017ec681f3Smrg break; 6027ec681f3Smrg case PIPE_SHADER_TESS_EVAL: 6037ec681f3Smrg variant = CALLOC(1, sizeof(struct svga_tes_variant)); 6047ec681f3Smrg break; 6057ec681f3Smrg case PIPE_SHADER_TESS_CTRL: 6067ec681f3Smrg variant = CALLOC(1, sizeof(struct svga_tcs_variant)); 6077ec681f3Smrg break; 6087ec681f3Smrg default: 6097ec681f3Smrg return NULL; 6107ec681f3Smrg } 6119f464c52Smaya 6129f464c52Smaya if (variant) { 6139f464c52Smaya variant->type = svga_shader_type(type); 6149f464c52Smaya svga->hud.num_shaders++; 6159f464c52Smaya } 6169f464c52Smaya return variant; 61701e04c3fSmrg} 61801e04c3fSmrg 61901e04c3fSmrg 62001e04c3fSmrgvoid 62101e04c3fSmrgsvga_destroy_shader_variant(struct svga_context *svga, 62201e04c3fSmrg struct svga_shader_variant *variant) 62301e04c3fSmrg{ 62401e04c3fSmrg if (svga_have_gb_objects(svga) && variant->gb_shader) { 62501e04c3fSmrg if (svga_have_vgpu10(svga)) { 62601e04c3fSmrg struct svga_winsys_context *swc = svga->swc; 62701e04c3fSmrg swc->shader_destroy(swc, variant->gb_shader); 6287ec681f3Smrg SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id)); 62901e04c3fSmrg util_bitmask_clear(svga->shader_id_bm, variant->id); 63001e04c3fSmrg } 63101e04c3fSmrg else { 63201e04c3fSmrg struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 63301e04c3fSmrg sws->shader_destroy(sws, variant->gb_shader); 63401e04c3fSmrg } 63501e04c3fSmrg variant->gb_shader = NULL; 63601e04c3fSmrg } 63701e04c3fSmrg else { 63801e04c3fSmrg if (variant->id != UTIL_BITMASK_INVALID_INDEX) { 6397ec681f3Smrg SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id, 6407ec681f3Smrg variant->type)); 64101e04c3fSmrg util_bitmask_clear(svga->shader_id_bm, variant->id); 642af69d88dSmrg } 643af69d88dSmrg } 644af69d88dSmrg 6457ec681f3Smrg FREE(variant->signature); 646af69d88dSmrg FREE((unsigned *)variant->tokens); 647af69d88dSmrg FREE(variant); 648af69d88dSmrg 64901e04c3fSmrg svga->hud.num_shaders--; 65001e04c3fSmrg} 65101e04c3fSmrg 65201e04c3fSmrg/* 65301e04c3fSmrg * Rebind shaders. 65401e04c3fSmrg * Called at the beginning of every new command buffer to ensure that 65501e04c3fSmrg * shaders are properly paged-in. Instead of sending the SetShader 65601e04c3fSmrg * command, this function sends a private allocation command to 65701e04c3fSmrg * page in a shader. This avoids emitting redundant state to the device 65801e04c3fSmrg * just to page in a resource. 65901e04c3fSmrg */ 66001e04c3fSmrgenum pipe_error 66101e04c3fSmrgsvga_rebind_shaders(struct svga_context *svga) 66201e04c3fSmrg{ 66301e04c3fSmrg struct svga_winsys_context *swc = svga->swc; 66401e04c3fSmrg struct svga_hw_draw_state *hw = &svga->state.hw_draw; 66501e04c3fSmrg enum pipe_error ret; 66601e04c3fSmrg 66701e04c3fSmrg assert(svga_have_vgpu10(svga)); 66801e04c3fSmrg 66901e04c3fSmrg /** 67001e04c3fSmrg * If the underlying winsys layer does not need resource rebinding, 67101e04c3fSmrg * just clear the rebind flags and return. 67201e04c3fSmrg */ 67301e04c3fSmrg if (swc->resource_rebind == NULL) { 67401e04c3fSmrg svga->rebind.flags.vs = 0; 67501e04c3fSmrg svga->rebind.flags.gs = 0; 67601e04c3fSmrg svga->rebind.flags.fs = 0; 6777ec681f3Smrg svga->rebind.flags.tcs = 0; 6787ec681f3Smrg svga->rebind.flags.tes = 0; 67901e04c3fSmrg 68001e04c3fSmrg return PIPE_OK; 68101e04c3fSmrg } 68201e04c3fSmrg 68301e04c3fSmrg if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) { 68401e04c3fSmrg ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ); 68501e04c3fSmrg if (ret != PIPE_OK) 68601e04c3fSmrg return ret; 68701e04c3fSmrg } 68801e04c3fSmrg svga->rebind.flags.vs = 0; 68901e04c3fSmrg 69001e04c3fSmrg if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) { 69101e04c3fSmrg ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ); 69201e04c3fSmrg if (ret != PIPE_OK) 69301e04c3fSmrg return ret; 69401e04c3fSmrg } 69501e04c3fSmrg svga->rebind.flags.gs = 0; 69601e04c3fSmrg 69701e04c3fSmrg if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) { 69801e04c3fSmrg ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ); 69901e04c3fSmrg if (ret != PIPE_OK) 70001e04c3fSmrg return ret; 70101e04c3fSmrg } 70201e04c3fSmrg svga->rebind.flags.fs = 0; 70301e04c3fSmrg 7047ec681f3Smrg if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) { 7057ec681f3Smrg ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ); 7067ec681f3Smrg if (ret != PIPE_OK) 7077ec681f3Smrg return ret; 7087ec681f3Smrg } 7097ec681f3Smrg svga->rebind.flags.tcs = 0; 7107ec681f3Smrg 7117ec681f3Smrg if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) { 7127ec681f3Smrg ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ); 7137ec681f3Smrg if (ret != PIPE_OK) 7147ec681f3Smrg return ret; 7157ec681f3Smrg } 7167ec681f3Smrg svga->rebind.flags.tes = 0; 7177ec681f3Smrg 71801e04c3fSmrg return PIPE_OK; 719af69d88dSmrg} 720