1af69d88dSmrg/**********************************************************
2af69d88dSmrg * Copyright 2008-2012 VMware, Inc.  All rights reserved.
3af69d88dSmrg *
4af69d88dSmrg * Permission is hereby granted, free of charge, to any person
5af69d88dSmrg * obtaining a copy of this software and associated documentation
6af69d88dSmrg * files (the "Software"), to deal in the Software without
7af69d88dSmrg * restriction, including without limitation the rights to use, copy,
8af69d88dSmrg * modify, merge, publish, distribute, sublicense, and/or sell copies
9af69d88dSmrg * of the Software, and to permit persons to whom the Software is
10af69d88dSmrg * furnished to do so, subject to the following conditions:
11af69d88dSmrg *
12af69d88dSmrg * The above copyright notice and this permission notice shall be
13af69d88dSmrg * included in all copies or substantial portions of the Software.
14af69d88dSmrg *
15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16af69d88dSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18af69d88dSmrg * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19af69d88dSmrg * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20af69d88dSmrg * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21af69d88dSmrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22af69d88dSmrg * SOFTWARE.
23af69d88dSmrg *
24af69d88dSmrg **********************************************************/
25af69d88dSmrg
26af69d88dSmrg#include "util/u_bitmask.h"
27af69d88dSmrg#include "util/u_memory.h"
287ec681f3Smrg#include "util/format/u_format.h"
29af69d88dSmrg#include "svga_context.h"
30af69d88dSmrg#include "svga_cmd.h"
3101e04c3fSmrg#include "svga_format.h"
32af69d88dSmrg#include "svga_shader.h"
3301e04c3fSmrg#include "svga_resource_texture.h"
347ec681f3Smrg#include "VGPU10ShaderTokens.h"
35af69d88dSmrg
36af69d88dSmrg
3701e04c3fSmrg/**
3801e04c3fSmrg * This bit isn't really used anywhere.  It only serves to help
3901e04c3fSmrg * generate a unique "signature" for the vertex shader output bitmask.
4001e04c3fSmrg * Shader input/output signatures are used to resolve shader linking
4101e04c3fSmrg * issues.
4201e04c3fSmrg */
4301e04c3fSmrg#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
4401e04c3fSmrg
4501e04c3fSmrg
4601e04c3fSmrg/**
4701e04c3fSmrg * Use the shader info to generate a bitmask indicating which generic
4801e04c3fSmrg * inputs are used by the shader.  A set bit indicates that GENERIC[i]
4901e04c3fSmrg * is used.
5001e04c3fSmrg */
5101e04c3fSmrguint64_t
5201e04c3fSmrgsvga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
5301e04c3fSmrg{
5401e04c3fSmrg   unsigned i;
5501e04c3fSmrg   uint64_t mask = 0x0;
5601e04c3fSmrg
5701e04c3fSmrg   for (i = 0; i < info->num_inputs; i++) {
5801e04c3fSmrg      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
5901e04c3fSmrg         unsigned j = info->input_semantic_index[i];
6001e04c3fSmrg         assert(j < sizeof(mask) * 8);
6101e04c3fSmrg         mask |= ((uint64_t) 1) << j;
6201e04c3fSmrg      }
6301e04c3fSmrg   }
6401e04c3fSmrg
6501e04c3fSmrg   return mask;
6601e04c3fSmrg}
6701e04c3fSmrg
6801e04c3fSmrg
6901e04c3fSmrg/**
7001e04c3fSmrg * Scan shader info to return a bitmask of written outputs.
7101e04c3fSmrg */
7201e04c3fSmrguint64_t
7301e04c3fSmrgsvga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
7401e04c3fSmrg{
7501e04c3fSmrg   unsigned i;
7601e04c3fSmrg   uint64_t mask = 0x0;
7701e04c3fSmrg
7801e04c3fSmrg   for (i = 0; i < info->num_outputs; i++) {
7901e04c3fSmrg      switch (info->output_semantic_name[i]) {
8001e04c3fSmrg      case TGSI_SEMANTIC_GENERIC:
8101e04c3fSmrg         {
8201e04c3fSmrg            unsigned j = info->output_semantic_index[i];
8301e04c3fSmrg            assert(j < sizeof(mask) * 8);
8401e04c3fSmrg            mask |= ((uint64_t) 1) << j;
8501e04c3fSmrg         }
8601e04c3fSmrg         break;
8701e04c3fSmrg      case TGSI_SEMANTIC_FOG:
8801e04c3fSmrg         mask |= FOG_GENERIC_BIT;
8901e04c3fSmrg         break;
9001e04c3fSmrg      }
9101e04c3fSmrg   }
9201e04c3fSmrg
9301e04c3fSmrg   return mask;
9401e04c3fSmrg}
9501e04c3fSmrg
9601e04c3fSmrg
9701e04c3fSmrg
9801e04c3fSmrg/**
9901e04c3fSmrg * Given a mask of used generic variables (as returned by the above functions)
10001e04c3fSmrg * fill in a table which maps those indexes to small integers.
10101e04c3fSmrg * This table is used by the remap_generic_index() function in
10201e04c3fSmrg * svga_tgsi_decl_sm30.c
10301e04c3fSmrg * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
10401e04c3fSmrg * GENERIC[3] are used.  The remap_table will contain:
10501e04c3fSmrg *   table[1] = 0;
10601e04c3fSmrg *   table[3] = 1;
10701e04c3fSmrg * The remaining table entries will be filled in with the next unused
10801e04c3fSmrg * generic index (in this example, 2).
10901e04c3fSmrg */
11001e04c3fSmrgvoid
11101e04c3fSmrgsvga_remap_generics(uint64_t generics_mask,
11201e04c3fSmrg                    int8_t remap_table[MAX_GENERIC_VARYING])
11301e04c3fSmrg{
11401e04c3fSmrg   /* Note texcoord[0] is reserved so start at 1 */
11501e04c3fSmrg   unsigned count = 1, i;
11601e04c3fSmrg
11701e04c3fSmrg   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
11801e04c3fSmrg      remap_table[i] = -1;
11901e04c3fSmrg   }
12001e04c3fSmrg
12101e04c3fSmrg   /* for each bit set in generic_mask */
12201e04c3fSmrg   while (generics_mask) {
12301e04c3fSmrg      unsigned index = ffsll(generics_mask) - 1;
12401e04c3fSmrg      remap_table[index] = count++;
12501e04c3fSmrg      generics_mask &= ~((uint64_t) 1 << index);
12601e04c3fSmrg   }
12701e04c3fSmrg}
12801e04c3fSmrg
12901e04c3fSmrg
13001e04c3fSmrg/**
13101e04c3fSmrg * Use the generic remap table to map a TGSI generic varying variable
13201e04c3fSmrg * index to a small integer.  If the remapping table doesn't have a
13301e04c3fSmrg * valid value for the given index (the table entry is -1) it means
13401e04c3fSmrg * the fragment shader doesn't use that VS output.  Just allocate
13501e04c3fSmrg * the next free value in that case.  Alternately, we could cull
13601e04c3fSmrg * VS instructions that write to register, or replace the register
13701e04c3fSmrg * with a dummy temp register.
13801e04c3fSmrg * XXX TODO: we should do one of the later as it would save precious
13901e04c3fSmrg * texcoord registers.
14001e04c3fSmrg */
14101e04c3fSmrgint
14201e04c3fSmrgsvga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
14301e04c3fSmrg                         int generic_index)
14401e04c3fSmrg{
14501e04c3fSmrg   assert(generic_index < MAX_GENERIC_VARYING);
14601e04c3fSmrg
14701e04c3fSmrg   if (generic_index >= MAX_GENERIC_VARYING) {
14801e04c3fSmrg      /* just don't return a random/garbage value */
14901e04c3fSmrg      generic_index = MAX_GENERIC_VARYING - 1;
15001e04c3fSmrg   }
15101e04c3fSmrg
15201e04c3fSmrg   if (remap_table[generic_index] == -1) {
15301e04c3fSmrg      /* This is a VS output that has no matching PS input.  Find a
15401e04c3fSmrg       * free index.
15501e04c3fSmrg       */
15601e04c3fSmrg      int i, max = 0;
15701e04c3fSmrg      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
15801e04c3fSmrg         max = MAX2(max, remap_table[i]);
15901e04c3fSmrg      }
16001e04c3fSmrg      remap_table[generic_index] = max + 1;
16101e04c3fSmrg   }
16201e04c3fSmrg
16301e04c3fSmrg   return remap_table[generic_index];
16401e04c3fSmrg}
16501e04c3fSmrg
16601e04c3fSmrgstatic const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
16701e04c3fSmrg   PIPE_SWIZZLE_X,
16801e04c3fSmrg   PIPE_SWIZZLE_Y,
16901e04c3fSmrg   PIPE_SWIZZLE_Z,
17001e04c3fSmrg   PIPE_SWIZZLE_W,
17101e04c3fSmrg   PIPE_SWIZZLE_0,
17201e04c3fSmrg   PIPE_SWIZZLE_1,
17301e04c3fSmrg   PIPE_SWIZZLE_NONE
17401e04c3fSmrg};
17501e04c3fSmrg
17601e04c3fSmrgstatic const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
17701e04c3fSmrg   PIPE_SWIZZLE_X,
17801e04c3fSmrg   PIPE_SWIZZLE_Y,
17901e04c3fSmrg   PIPE_SWIZZLE_Z,
18001e04c3fSmrg   PIPE_SWIZZLE_1,
18101e04c3fSmrg   PIPE_SWIZZLE_0,
18201e04c3fSmrg   PIPE_SWIZZLE_1,
18301e04c3fSmrg   PIPE_SWIZZLE_NONE
18401e04c3fSmrg};
18501e04c3fSmrg
18601e04c3fSmrgstatic const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
18701e04c3fSmrg   PIPE_SWIZZLE_0,
18801e04c3fSmrg   PIPE_SWIZZLE_0,
18901e04c3fSmrg   PIPE_SWIZZLE_0,
19001e04c3fSmrg   PIPE_SWIZZLE_X,
19101e04c3fSmrg   PIPE_SWIZZLE_0,
19201e04c3fSmrg   PIPE_SWIZZLE_1,
19301e04c3fSmrg   PIPE_SWIZZLE_NONE
19401e04c3fSmrg};
19501e04c3fSmrg
19601e04c3fSmrgstatic const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
19701e04c3fSmrg   PIPE_SWIZZLE_X,
19801e04c3fSmrg   PIPE_SWIZZLE_X,
19901e04c3fSmrg   PIPE_SWIZZLE_X,
20001e04c3fSmrg   PIPE_SWIZZLE_X,
20101e04c3fSmrg   PIPE_SWIZZLE_0,
20201e04c3fSmrg   PIPE_SWIZZLE_1,
20301e04c3fSmrg   PIPE_SWIZZLE_NONE
20401e04c3fSmrg};
20501e04c3fSmrg
20601e04c3fSmrgstatic const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
20701e04c3fSmrg   PIPE_SWIZZLE_X,
20801e04c3fSmrg   PIPE_SWIZZLE_X,
20901e04c3fSmrg   PIPE_SWIZZLE_X,
21001e04c3fSmrg   PIPE_SWIZZLE_1,
21101e04c3fSmrg   PIPE_SWIZZLE_0,
21201e04c3fSmrg   PIPE_SWIZZLE_1,
21301e04c3fSmrg   PIPE_SWIZZLE_NONE
21401e04c3fSmrg};
21501e04c3fSmrg
21601e04c3fSmrgstatic const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
21701e04c3fSmrg   PIPE_SWIZZLE_X,
21801e04c3fSmrg   PIPE_SWIZZLE_X,
21901e04c3fSmrg   PIPE_SWIZZLE_X,
22001e04c3fSmrg   PIPE_SWIZZLE_Y,
22101e04c3fSmrg   PIPE_SWIZZLE_0,
22201e04c3fSmrg   PIPE_SWIZZLE_1,
22301e04c3fSmrg   PIPE_SWIZZLE_NONE
22401e04c3fSmrg};
22501e04c3fSmrg
22601e04c3fSmrg
2277ec681f3Smrgstatic VGPU10_RESOURCE_RETURN_TYPE
2287ec681f3Smrgvgpu10_return_type(enum pipe_format format)
2297ec681f3Smrg{
2307ec681f3Smrg   if (util_format_is_unorm(format))
2317ec681f3Smrg      return VGPU10_RETURN_TYPE_UNORM;
2327ec681f3Smrg   else if (util_format_is_snorm(format))
2337ec681f3Smrg      return VGPU10_RETURN_TYPE_SNORM;
2347ec681f3Smrg   else if (util_format_is_pure_uint(format))
2357ec681f3Smrg      return VGPU10_RETURN_TYPE_UINT;
2367ec681f3Smrg   else if (util_format_is_pure_sint(format))
2377ec681f3Smrg      return VGPU10_RETURN_TYPE_SINT;
2387ec681f3Smrg   else if (util_format_is_float(format))
2397ec681f3Smrg      return VGPU10_RETURN_TYPE_FLOAT;
2407ec681f3Smrg   else
2417ec681f3Smrg      return VGPU10_RETURN_TYPE_MAX;
2427ec681f3Smrg}
2437ec681f3Smrg
2447ec681f3Smrg
24501e04c3fSmrg/**
24601e04c3fSmrg * Initialize the shader-neutral fields of svga_compile_key from context
24701e04c3fSmrg * state.  This is basically the texture-related state.
24801e04c3fSmrg */
24901e04c3fSmrgvoid
25001e04c3fSmrgsvga_init_shader_key_common(const struct svga_context *svga,
2517ec681f3Smrg                            enum pipe_shader_type shader_type,
2527ec681f3Smrg                            const struct svga_shader *shader,
25301e04c3fSmrg                            struct svga_compile_key *key)
25401e04c3fSmrg{
25501e04c3fSmrg   unsigned i, idx = 0;
25601e04c3fSmrg
2577ec681f3Smrg   assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
25801e04c3fSmrg
25901e04c3fSmrg   /* In case the number of samplers and sampler_views doesn't match,
26001e04c3fSmrg    * loop over the lower of the two counts.
26101e04c3fSmrg    */
2627ec681f3Smrg   key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
2637ec681f3Smrg                            svga->curr.num_samplers[shader_type]);
26401e04c3fSmrg
26501e04c3fSmrg   for (i = 0; i < key->num_textures; i++) {
2667ec681f3Smrg      struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
2677ec681f3Smrg      const struct svga_sampler_state
2687ec681f3Smrg         *sampler = svga->curr.sampler[shader_type][i];
2697ec681f3Smrg
27001e04c3fSmrg      if (view) {
27101e04c3fSmrg         assert(view->texture);
27201e04c3fSmrg         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
27301e04c3fSmrg
2747ec681f3Smrg         enum pipe_texture_target target = view->target;
2757ec681f3Smrg
2767ec681f3Smrg	 key->tex[i].target = target;
2777ec681f3Smrg	 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
2787ec681f3Smrg	 key->tex[i].sampler_view = 1;
2797ec681f3Smrg
2807ec681f3Smrg
28101e04c3fSmrg         /* 1D/2D array textures with one slice and cube map array textures
28201e04c3fSmrg          * with one cube are treated as non-arrays by the SVGA3D device.
28301e04c3fSmrg          * Set the is_array flag only if we know that we have more than 1
28401e04c3fSmrg          * element.  This will be used to select shader instruction/resource
28501e04c3fSmrg          * types during shader translation.
28601e04c3fSmrg          */
28701e04c3fSmrg         switch (view->texture->target) {
28801e04c3fSmrg         case PIPE_TEXTURE_1D_ARRAY:
28901e04c3fSmrg         case PIPE_TEXTURE_2D_ARRAY:
29001e04c3fSmrg            key->tex[i].is_array = view->texture->array_size > 1;
29101e04c3fSmrg            break;
29201e04c3fSmrg         case PIPE_TEXTURE_CUBE_ARRAY:
29301e04c3fSmrg            key->tex[i].is_array = view->texture->array_size > 6;
29401e04c3fSmrg            break;
29501e04c3fSmrg         default:
29601e04c3fSmrg            ; /* nothing / silence compiler warning */
29701e04c3fSmrg         }
29801e04c3fSmrg
29901e04c3fSmrg         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
30001e04c3fSmrg         key->tex[i].num_samples = view->texture->nr_samples;
30101e04c3fSmrg
30201e04c3fSmrg         const enum pipe_swizzle *swizzle_tab;
30301e04c3fSmrg         if (view->texture->target == PIPE_BUFFER) {
30401e04c3fSmrg            SVGA3dSurfaceFormat svga_format;
30501e04c3fSmrg            unsigned tf_flags;
30601e04c3fSmrg
30701e04c3fSmrg            /* Apply any special swizzle mask for the view format if needed */
30801e04c3fSmrg
30901e04c3fSmrg            svga_translate_texture_buffer_view_format(view->format,
31001e04c3fSmrg                                                      &svga_format, &tf_flags);
31101e04c3fSmrg            if (tf_flags & TF_000X)
31201e04c3fSmrg               swizzle_tab = set_000X;
31301e04c3fSmrg            else if (tf_flags & TF_XXXX)
31401e04c3fSmrg               swizzle_tab = set_XXXX;
31501e04c3fSmrg            else if (tf_flags & TF_XXX1)
31601e04c3fSmrg               swizzle_tab = set_XXX1;
31701e04c3fSmrg            else if (tf_flags & TF_XXXY)
31801e04c3fSmrg               swizzle_tab = set_XXXY;
31901e04c3fSmrg            else
32001e04c3fSmrg               swizzle_tab = copy_alpha;
32101e04c3fSmrg         }
32201e04c3fSmrg         else {
32301e04c3fSmrg            /* If we have a non-alpha view into an svga3d surface with an
32401e04c3fSmrg             * alpha channel, then explicitly set the alpha channel to 1
32501e04c3fSmrg             * when sampling. Note that we need to check the
32601e04c3fSmrg             * actual device format to cover also imported surface cases.
32701e04c3fSmrg             */
32801e04c3fSmrg            swizzle_tab =
32901e04c3fSmrg               (!util_format_has_alpha(view->format) &&
33001e04c3fSmrg                svga_texture_device_format_has_alpha(view->texture)) ?
33101e04c3fSmrg                set_alpha : copy_alpha;
33201e04c3fSmrg
33301e04c3fSmrg            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
33401e04c3fSmrg                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
33501e04c3fSmrg               swizzle_tab = set_alpha;
3367ec681f3Smrg
3377ec681f3Smrg            /* Save the compare function as we need to handle
3387ec681f3Smrg             * depth compare in the shader.
3397ec681f3Smrg             */
3407ec681f3Smrg            key->tex[i].compare_mode = sampler->compare_mode;
3417ec681f3Smrg            key->tex[i].compare_func = sampler->compare_func;
34201e04c3fSmrg         }
34301e04c3fSmrg
34401e04c3fSmrg         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
34501e04c3fSmrg         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
34601e04c3fSmrg         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
34701e04c3fSmrg         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
34801e04c3fSmrg      }
3497ec681f3Smrg      else {
3507ec681f3Smrg	 key->tex[i].sampler_view = 0;
3517ec681f3Smrg      }
35201e04c3fSmrg
35301e04c3fSmrg      if (sampler) {
35401e04c3fSmrg         if (!sampler->normalized_coords) {
3557ec681f3Smrg            if (view) {
3567ec681f3Smrg               assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
3577ec681f3Smrg               key->tex[i].width_height_idx = idx++;
3587ec681f3Smrg	    }
35901e04c3fSmrg            key->tex[i].unnormalized = TRUE;
36001e04c3fSmrg            ++key->num_unnormalized_coords;
36101e04c3fSmrg
36201e04c3fSmrg            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
36301e04c3fSmrg                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
36401e04c3fSmrg                key->tex[i].texel_bias = TRUE;
36501e04c3fSmrg            }
36601e04c3fSmrg         }
36701e04c3fSmrg      }
36801e04c3fSmrg   }
3697ec681f3Smrg
3707ec681f3Smrg   key->clamp_vertex_color = svga->curr.rast ?
3717ec681f3Smrg                             svga->curr.rast->templ.clamp_vertex_color : 0;
37201e04c3fSmrg}
37301e04c3fSmrg
37401e04c3fSmrg
37501e04c3fSmrg/** Search for a compiled shader variant with the same compile key */
37601e04c3fSmrgstruct svga_shader_variant *
37701e04c3fSmrgsvga_search_shader_key(const struct svga_shader *shader,
37801e04c3fSmrg                       const struct svga_compile_key *key)
37901e04c3fSmrg{
38001e04c3fSmrg   struct svga_shader_variant *variant = shader->variants;
38101e04c3fSmrg
38201e04c3fSmrg   assert(key);
38301e04c3fSmrg
38401e04c3fSmrg   for ( ; variant; variant = variant->next) {
38501e04c3fSmrg      if (svga_compile_keys_equal(key, &variant->key))
38601e04c3fSmrg         return variant;
38701e04c3fSmrg   }
38801e04c3fSmrg   return NULL;
38901e04c3fSmrg}
39001e04c3fSmrg
39101e04c3fSmrg/** Search for a shader with the same token key */
39201e04c3fSmrgstruct svga_shader *
39301e04c3fSmrgsvga_search_shader_token_key(struct svga_shader *pshader,
39401e04c3fSmrg                             const struct svga_token_key *key)
39501e04c3fSmrg{
39601e04c3fSmrg   struct svga_shader *shader = pshader;
39701e04c3fSmrg
39801e04c3fSmrg   assert(key);
39901e04c3fSmrg
40001e04c3fSmrg   for ( ; shader; shader = shader->next) {
40101e04c3fSmrg      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
40201e04c3fSmrg         return shader;
40301e04c3fSmrg   }
40401e04c3fSmrg   return NULL;
40501e04c3fSmrg}
40601e04c3fSmrg
40701e04c3fSmrg/**
40801e04c3fSmrg * Helper function to define a gb shader for non-vgpu10 device
40901e04c3fSmrg */
41001e04c3fSmrgstatic enum pipe_error
41101e04c3fSmrgdefine_gb_shader_vgpu9(struct svga_context *svga,
41201e04c3fSmrg                       struct svga_shader_variant *variant,
41301e04c3fSmrg                       unsigned codeLen)
41401e04c3fSmrg{
41501e04c3fSmrg   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
41601e04c3fSmrg   enum pipe_error ret;
41701e04c3fSmrg
41801e04c3fSmrg   /**
41901e04c3fSmrg    * Create gb memory for the shader and upload the shader code.
42001e04c3fSmrg    * Kernel module will allocate an id for the shader and issue
42101e04c3fSmrg    * the DefineGBShader command.
42201e04c3fSmrg    */
4239f464c52Smaya   variant->gb_shader = sws->shader_create(sws, variant->type,
42401e04c3fSmrg                                           variant->tokens, codeLen);
42501e04c3fSmrg
4267ec681f3Smrg   svga->hud.shader_mem_used += codeLen;
4277ec681f3Smrg
42801e04c3fSmrg   if (!variant->gb_shader)
42901e04c3fSmrg      return PIPE_ERROR_OUT_OF_MEMORY;
43001e04c3fSmrg
43101e04c3fSmrg   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
43201e04c3fSmrg
43301e04c3fSmrg   return ret;
43401e04c3fSmrg}
43501e04c3fSmrg
43601e04c3fSmrg/**
43701e04c3fSmrg * Helper function to define a gb shader for vgpu10 device
43801e04c3fSmrg */
43901e04c3fSmrgstatic enum pipe_error
44001e04c3fSmrgdefine_gb_shader_vgpu10(struct svga_context *svga,
44101e04c3fSmrg                        struct svga_shader_variant *variant,
44201e04c3fSmrg                        unsigned codeLen)
44301e04c3fSmrg{
44401e04c3fSmrg   struct svga_winsys_context *swc = svga->swc;
44501e04c3fSmrg   enum pipe_error ret;
4467ec681f3Smrg   unsigned len = codeLen + variant->signatureLen;
44701e04c3fSmrg
44801e04c3fSmrg   /**
44901e04c3fSmrg    * Shaders in VGPU10 enabled device reside in the device COTable.
45001e04c3fSmrg    * SVGA driver will allocate an integer ID for the shader and
45101e04c3fSmrg    * issue DXDefineShader and DXBindShader commands.
45201e04c3fSmrg    */
45301e04c3fSmrg   variant->id = util_bitmask_add(svga->shader_id_bm);
45401e04c3fSmrg   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
45501e04c3fSmrg      return PIPE_ERROR_OUT_OF_MEMORY;
45601e04c3fSmrg   }
45701e04c3fSmrg
45801e04c3fSmrg   /* Create gb memory for the shader and upload the shader code */
45901e04c3fSmrg   variant->gb_shader = swc->shader_create(swc,
4609f464c52Smaya                                           variant->id, variant->type,
4617ec681f3Smrg                                           variant->tokens, codeLen,
4627ec681f3Smrg                                           variant->signature,
4637ec681f3Smrg                                           variant->signatureLen);
4647ec681f3Smrg
4657ec681f3Smrg   svga->hud.shader_mem_used += len;
46601e04c3fSmrg
46701e04c3fSmrg   if (!variant->gb_shader) {
46801e04c3fSmrg      /* Free the shader ID */
46901e04c3fSmrg      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
47001e04c3fSmrg      goto fail_no_allocation;
47101e04c3fSmrg   }
47201e04c3fSmrg
47301e04c3fSmrg   /**
47401e04c3fSmrg    * Since we don't want to do any flush within state emission to avoid
47501e04c3fSmrg    * partial state in a command buffer, it's important to make sure that
47601e04c3fSmrg    * there is enough room to send both the DXDefineShader & DXBindShader
47701e04c3fSmrg    * commands in the same command buffer. So let's send both
47801e04c3fSmrg    * commands in one command reservation. If it fails, we'll undo
47901e04c3fSmrg    * the shader creation and return an error.
48001e04c3fSmrg    */
48101e04c3fSmrg   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
4827ec681f3Smrg                                           variant->id, variant->type,
4837ec681f3Smrg                                           len);
48401e04c3fSmrg
48501e04c3fSmrg   if (ret != PIPE_OK)
48601e04c3fSmrg      goto fail;
48701e04c3fSmrg
48801e04c3fSmrg   return PIPE_OK;
48901e04c3fSmrg
49001e04c3fSmrgfail:
49101e04c3fSmrg   swc->shader_destroy(swc, variant->gb_shader);
49201e04c3fSmrg   variant->gb_shader = NULL;
49301e04c3fSmrg
49401e04c3fSmrgfail_no_allocation:
49501e04c3fSmrg   util_bitmask_clear(svga->shader_id_bm, variant->id);
49601e04c3fSmrg   variant->id = UTIL_BITMASK_INVALID_INDEX;
49701e04c3fSmrg
49801e04c3fSmrg   return PIPE_ERROR_OUT_OF_MEMORY;
49901e04c3fSmrg}
500af69d88dSmrg
501af69d88dSmrg/**
502af69d88dSmrg * Issue the SVGA3D commands to define a new shader.
50301e04c3fSmrg * \param variant  contains the shader tokens, etc.  The result->id field will
50401e04c3fSmrg *                 be set here.
505af69d88dSmrg */
506af69d88dSmrgenum pipe_error
507af69d88dSmrgsvga_define_shader(struct svga_context *svga,
508af69d88dSmrg                   struct svga_shader_variant *variant)
509af69d88dSmrg{
510af69d88dSmrg   unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
51101e04c3fSmrg   enum pipe_error ret;
512af69d88dSmrg
51301e04c3fSmrg   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
514af69d88dSmrg
51501e04c3fSmrg   variant->id = UTIL_BITMASK_INVALID_INDEX;
516af69d88dSmrg
51701e04c3fSmrg   if (svga_have_gb_objects(svga)) {
51801e04c3fSmrg      if (svga_have_vgpu10(svga))
5199f464c52Smaya         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
52001e04c3fSmrg      else
5219f464c52Smaya         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
522af69d88dSmrg   }
523af69d88dSmrg   else {
524af69d88dSmrg      /* Allocate an integer ID for the shader */
525af69d88dSmrg      variant->id = util_bitmask_add(svga->shader_id_bm);
526af69d88dSmrg      if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
52701e04c3fSmrg         ret = PIPE_ERROR_OUT_OF_MEMORY;
52801e04c3fSmrg         goto done;
529af69d88dSmrg      }
530af69d88dSmrg
531af69d88dSmrg      /* Issue SVGA3D device command to define the shader */
532af69d88dSmrg      ret = SVGA3D_DefineShader(svga->swc,
533af69d88dSmrg                                variant->id,
5349f464c52Smaya                                variant->type,
535af69d88dSmrg                                variant->tokens,
536af69d88dSmrg                                codeLen);
537af69d88dSmrg      if (ret != PIPE_OK) {
538af69d88dSmrg         /* free the ID */
539af69d88dSmrg         assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
540af69d88dSmrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
541af69d88dSmrg         variant->id = UTIL_BITMASK_INVALID_INDEX;
542af69d88dSmrg      }
543af69d88dSmrg   }
544af69d88dSmrg
54501e04c3fSmrgdone:
54601e04c3fSmrg   SVGA_STATS_TIME_POP(svga_sws(svga));
54701e04c3fSmrg   return ret;
548af69d88dSmrg}
549af69d88dSmrg
550af69d88dSmrg
55101e04c3fSmrg/**
55201e04c3fSmrg * Issue the SVGA3D commands to set/bind a shader.
55301e04c3fSmrg * \param result  the shader to bind.
55401e04c3fSmrg */
555af69d88dSmrgenum pipe_error
55601e04c3fSmrgsvga_set_shader(struct svga_context *svga,
55701e04c3fSmrg                SVGA3dShaderType type,
55801e04c3fSmrg                struct svga_shader_variant *variant)
559af69d88dSmrg{
56001e04c3fSmrg   enum pipe_error ret;
56101e04c3fSmrg   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
56201e04c3fSmrg
56301e04c3fSmrg   assert(type == SVGA3D_SHADERTYPE_VS ||
56401e04c3fSmrg          type == SVGA3D_SHADERTYPE_GS ||
5657ec681f3Smrg          type == SVGA3D_SHADERTYPE_PS ||
5667ec681f3Smrg          type == SVGA3D_SHADERTYPE_HS ||
5677ec681f3Smrg          type == SVGA3D_SHADERTYPE_DS ||
5687ec681f3Smrg          type == SVGA3D_SHADERTYPE_CS);
569af69d88dSmrg
570af69d88dSmrg   if (svga_have_gb_objects(svga)) {
57101e04c3fSmrg      struct svga_winsys_gb_shader *gbshader =
57201e04c3fSmrg         variant ? variant->gb_shader : NULL;
573af69d88dSmrg
57401e04c3fSmrg      if (svga_have_vgpu10(svga))
57501e04c3fSmrg         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
57601e04c3fSmrg      else
57701e04c3fSmrg         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
57801e04c3fSmrg   }
57901e04c3fSmrg   else {
58001e04c3fSmrg      ret = SVGA3D_SetShader(svga->swc, type, id);
581af69d88dSmrg   }
582af69d88dSmrg
58301e04c3fSmrg   return ret;
58401e04c3fSmrg}
58501e04c3fSmrg
586af69d88dSmrg
58701e04c3fSmrgstruct svga_shader_variant *
5889f464c52Smayasvga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
58901e04c3fSmrg{
5907ec681f3Smrg   struct svga_shader_variant *variant;
5917ec681f3Smrg
5927ec681f3Smrg   switch (type) {
5937ec681f3Smrg   case PIPE_SHADER_FRAGMENT:
5947ec681f3Smrg      variant = CALLOC(1, sizeof(struct svga_fs_variant));
5957ec681f3Smrg      break;
5967ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
5977ec681f3Smrg      variant = CALLOC(1, sizeof(struct svga_gs_variant));
5987ec681f3Smrg      break;
5997ec681f3Smrg   case PIPE_SHADER_VERTEX:
6007ec681f3Smrg      variant = CALLOC(1, sizeof(struct svga_vs_variant));
6017ec681f3Smrg      break;
6027ec681f3Smrg   case PIPE_SHADER_TESS_EVAL:
6037ec681f3Smrg      variant = CALLOC(1, sizeof(struct svga_tes_variant));
6047ec681f3Smrg      break;
6057ec681f3Smrg   case PIPE_SHADER_TESS_CTRL:
6067ec681f3Smrg      variant = CALLOC(1, sizeof(struct svga_tcs_variant));
6077ec681f3Smrg      break;
6087ec681f3Smrg   default:
6097ec681f3Smrg      return NULL;
6107ec681f3Smrg   }
6119f464c52Smaya
6129f464c52Smaya   if (variant) {
6139f464c52Smaya      variant->type = svga_shader_type(type);
6149f464c52Smaya      svga->hud.num_shaders++;
6159f464c52Smaya   }
6169f464c52Smaya   return variant;
61701e04c3fSmrg}
61801e04c3fSmrg
61901e04c3fSmrg
62001e04c3fSmrgvoid
62101e04c3fSmrgsvga_destroy_shader_variant(struct svga_context *svga,
62201e04c3fSmrg                            struct svga_shader_variant *variant)
62301e04c3fSmrg{
62401e04c3fSmrg   if (svga_have_gb_objects(svga) && variant->gb_shader) {
62501e04c3fSmrg      if (svga_have_vgpu10(svga)) {
62601e04c3fSmrg         struct svga_winsys_context *swc = svga->swc;
62701e04c3fSmrg         swc->shader_destroy(swc, variant->gb_shader);
6287ec681f3Smrg         SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
62901e04c3fSmrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
63001e04c3fSmrg      }
63101e04c3fSmrg      else {
63201e04c3fSmrg         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
63301e04c3fSmrg         sws->shader_destroy(sws, variant->gb_shader);
63401e04c3fSmrg      }
63501e04c3fSmrg      variant->gb_shader = NULL;
63601e04c3fSmrg   }
63701e04c3fSmrg   else {
63801e04c3fSmrg      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
6397ec681f3Smrg         SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
6407ec681f3Smrg                                               variant->type));
64101e04c3fSmrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
642af69d88dSmrg      }
643af69d88dSmrg   }
644af69d88dSmrg
6457ec681f3Smrg   FREE(variant->signature);
646af69d88dSmrg   FREE((unsigned *)variant->tokens);
647af69d88dSmrg   FREE(variant);
648af69d88dSmrg
64901e04c3fSmrg   svga->hud.num_shaders--;
65001e04c3fSmrg}
65101e04c3fSmrg
65201e04c3fSmrg/*
65301e04c3fSmrg * Rebind shaders.
65401e04c3fSmrg * Called at the beginning of every new command buffer to ensure that
65501e04c3fSmrg * shaders are properly paged-in. Instead of sending the SetShader
65601e04c3fSmrg * command, this function sends a private allocation command to
65701e04c3fSmrg * page in a shader. This avoids emitting redundant state to the device
65801e04c3fSmrg * just to page in a resource.
65901e04c3fSmrg */
66001e04c3fSmrgenum pipe_error
66101e04c3fSmrgsvga_rebind_shaders(struct svga_context *svga)
66201e04c3fSmrg{
66301e04c3fSmrg   struct svga_winsys_context *swc = svga->swc;
66401e04c3fSmrg   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
66501e04c3fSmrg   enum pipe_error ret;
66601e04c3fSmrg
66701e04c3fSmrg   assert(svga_have_vgpu10(svga));
66801e04c3fSmrg
66901e04c3fSmrg   /**
67001e04c3fSmrg    * If the underlying winsys layer does not need resource rebinding,
67101e04c3fSmrg    * just clear the rebind flags and return.
67201e04c3fSmrg    */
67301e04c3fSmrg   if (swc->resource_rebind == NULL) {
67401e04c3fSmrg      svga->rebind.flags.vs = 0;
67501e04c3fSmrg      svga->rebind.flags.gs = 0;
67601e04c3fSmrg      svga->rebind.flags.fs = 0;
6777ec681f3Smrg      svga->rebind.flags.tcs = 0;
6787ec681f3Smrg      svga->rebind.flags.tes = 0;
67901e04c3fSmrg
68001e04c3fSmrg      return PIPE_OK;
68101e04c3fSmrg   }
68201e04c3fSmrg
68301e04c3fSmrg   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
68401e04c3fSmrg      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
68501e04c3fSmrg      if (ret != PIPE_OK)
68601e04c3fSmrg         return ret;
68701e04c3fSmrg   }
68801e04c3fSmrg   svga->rebind.flags.vs = 0;
68901e04c3fSmrg
69001e04c3fSmrg   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
69101e04c3fSmrg      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
69201e04c3fSmrg      if (ret != PIPE_OK)
69301e04c3fSmrg         return ret;
69401e04c3fSmrg   }
69501e04c3fSmrg   svga->rebind.flags.gs = 0;
69601e04c3fSmrg
69701e04c3fSmrg   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
69801e04c3fSmrg      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
69901e04c3fSmrg      if (ret != PIPE_OK)
70001e04c3fSmrg         return ret;
70101e04c3fSmrg   }
70201e04c3fSmrg   svga->rebind.flags.fs = 0;
70301e04c3fSmrg
7047ec681f3Smrg   if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
7057ec681f3Smrg      ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
7067ec681f3Smrg      if (ret != PIPE_OK)
7077ec681f3Smrg         return ret;
7087ec681f3Smrg   }
7097ec681f3Smrg   svga->rebind.flags.tcs = 0;
7107ec681f3Smrg
7117ec681f3Smrg   if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
7127ec681f3Smrg      ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
7137ec681f3Smrg      if (ret != PIPE_OK)
7147ec681f3Smrg         return ret;
7157ec681f3Smrg   }
7167ec681f3Smrg   svga->rebind.flags.tes = 0;
7177ec681f3Smrg
71801e04c3fSmrg   return PIPE_OK;
719af69d88dSmrg}
720