1848b8605Smrg/**********************************************************
2848b8605Smrg * Copyright 2008-2012 VMware, Inc.  All rights reserved.
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person
5848b8605Smrg * obtaining a copy of this software and associated documentation
6848b8605Smrg * files (the "Software"), to deal in the Software without
7848b8605Smrg * restriction, including without limitation the rights to use, copy,
8848b8605Smrg * modify, merge, publish, distribute, sublicense, and/or sell copies
9848b8605Smrg * of the Software, and to permit persons to whom the Software is
10848b8605Smrg * furnished to do so, subject to the following conditions:
11848b8605Smrg *
12848b8605Smrg * The above copyright notice and this permission notice shall be
13848b8605Smrg * included in all copies or substantial portions of the Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16848b8605Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18848b8605Smrg * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19848b8605Smrg * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20848b8605Smrg * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21848b8605Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22848b8605Smrg * SOFTWARE.
23848b8605Smrg *
24848b8605Smrg **********************************************************/
25848b8605Smrg
26848b8605Smrg#include "util/u_bitmask.h"
27848b8605Smrg#include "util/u_memory.h"
28b8e80941Smrg#include "util/u_format.h"
29848b8605Smrg#include "svga_context.h"
30848b8605Smrg#include "svga_cmd.h"
31b8e80941Smrg#include "svga_format.h"
32848b8605Smrg#include "svga_shader.h"
33b8e80941Smrg#include "svga_resource_texture.h"
34848b8605Smrg
35848b8605Smrg
36b8e80941Smrg/**
37b8e80941Smrg * This bit isn't really used anywhere.  It only serves to help
38b8e80941Smrg * generate a unique "signature" for the vertex shader output bitmask.
39b8e80941Smrg * Shader input/output signatures are used to resolve shader linking
40b8e80941Smrg * issues.
41b8e80941Smrg */
42b8e80941Smrg#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
43b8e80941Smrg
44b8e80941Smrg
45b8e80941Smrg/**
46b8e80941Smrg * Use the shader info to generate a bitmask indicating which generic
47b8e80941Smrg * inputs are used by the shader.  A set bit indicates that GENERIC[i]
48b8e80941Smrg * is used.
49b8e80941Smrg */
50b8e80941Smrguint64_t
51b8e80941Smrgsvga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
52b8e80941Smrg{
53b8e80941Smrg   unsigned i;
54b8e80941Smrg   uint64_t mask = 0x0;
55b8e80941Smrg
56b8e80941Smrg   for (i = 0; i < info->num_inputs; i++) {
57b8e80941Smrg      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
58b8e80941Smrg         unsigned j = info->input_semantic_index[i];
59b8e80941Smrg         assert(j < sizeof(mask) * 8);
60b8e80941Smrg         mask |= ((uint64_t) 1) << j;
61b8e80941Smrg      }
62b8e80941Smrg   }
63b8e80941Smrg
64b8e80941Smrg   return mask;
65b8e80941Smrg}
66b8e80941Smrg
67b8e80941Smrg
68b8e80941Smrg/**
69b8e80941Smrg * Scan shader info to return a bitmask of written outputs.
70b8e80941Smrg */
71b8e80941Smrguint64_t
72b8e80941Smrgsvga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
73b8e80941Smrg{
74b8e80941Smrg   unsigned i;
75b8e80941Smrg   uint64_t mask = 0x0;
76b8e80941Smrg
77b8e80941Smrg   for (i = 0; i < info->num_outputs; i++) {
78b8e80941Smrg      switch (info->output_semantic_name[i]) {
79b8e80941Smrg      case TGSI_SEMANTIC_GENERIC:
80b8e80941Smrg         {
81b8e80941Smrg            unsigned j = info->output_semantic_index[i];
82b8e80941Smrg            assert(j < sizeof(mask) * 8);
83b8e80941Smrg            mask |= ((uint64_t) 1) << j;
84b8e80941Smrg         }
85b8e80941Smrg         break;
86b8e80941Smrg      case TGSI_SEMANTIC_FOG:
87b8e80941Smrg         mask |= FOG_GENERIC_BIT;
88b8e80941Smrg         break;
89b8e80941Smrg      }
90b8e80941Smrg   }
91b8e80941Smrg
92b8e80941Smrg   return mask;
93b8e80941Smrg}
94b8e80941Smrg
95b8e80941Smrg
96b8e80941Smrg
97b8e80941Smrg/**
98b8e80941Smrg * Given a mask of used generic variables (as returned by the above functions)
99b8e80941Smrg * fill in a table which maps those indexes to small integers.
100b8e80941Smrg * This table is used by the remap_generic_index() function in
101b8e80941Smrg * svga_tgsi_decl_sm30.c
102b8e80941Smrg * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
103b8e80941Smrg * GENERIC[3] are used.  The remap_table will contain:
104b8e80941Smrg *   table[1] = 0;
105b8e80941Smrg *   table[3] = 1;
106b8e80941Smrg * The remaining table entries will be filled in with the next unused
107b8e80941Smrg * generic index (in this example, 2).
108b8e80941Smrg */
109b8e80941Smrgvoid
110b8e80941Smrgsvga_remap_generics(uint64_t generics_mask,
111b8e80941Smrg                    int8_t remap_table[MAX_GENERIC_VARYING])
112b8e80941Smrg{
113b8e80941Smrg   /* Note texcoord[0] is reserved so start at 1 */
114b8e80941Smrg   unsigned count = 1, i;
115b8e80941Smrg
116b8e80941Smrg   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
117b8e80941Smrg      remap_table[i] = -1;
118b8e80941Smrg   }
119b8e80941Smrg
120b8e80941Smrg   /* for each bit set in generic_mask */
121b8e80941Smrg   while (generics_mask) {
122b8e80941Smrg      unsigned index = ffsll(generics_mask) - 1;
123b8e80941Smrg      remap_table[index] = count++;
124b8e80941Smrg      generics_mask &= ~((uint64_t) 1 << index);
125b8e80941Smrg   }
126b8e80941Smrg}
127b8e80941Smrg
128b8e80941Smrg
129b8e80941Smrg/**
130b8e80941Smrg * Use the generic remap table to map a TGSI generic varying variable
131b8e80941Smrg * index to a small integer.  If the remapping table doesn't have a
132b8e80941Smrg * valid value for the given index (the table entry is -1) it means
133b8e80941Smrg * the fragment shader doesn't use that VS output.  Just allocate
134b8e80941Smrg * the next free value in that case.  Alternately, we could cull
135b8e80941Smrg * VS instructions that write to register, or replace the register
136b8e80941Smrg * with a dummy temp register.
137b8e80941Smrg * XXX TODO: we should do one of the later as it would save precious
138b8e80941Smrg * texcoord registers.
139b8e80941Smrg */
140b8e80941Smrgint
141b8e80941Smrgsvga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
142b8e80941Smrg                         int generic_index)
143b8e80941Smrg{
144b8e80941Smrg   assert(generic_index < MAX_GENERIC_VARYING);
145b8e80941Smrg
146b8e80941Smrg   if (generic_index >= MAX_GENERIC_VARYING) {
147b8e80941Smrg      /* just don't return a random/garbage value */
148b8e80941Smrg      generic_index = MAX_GENERIC_VARYING - 1;
149b8e80941Smrg   }
150b8e80941Smrg
151b8e80941Smrg   if (remap_table[generic_index] == -1) {
152b8e80941Smrg      /* This is a VS output that has no matching PS input.  Find a
153b8e80941Smrg       * free index.
154b8e80941Smrg       */
155b8e80941Smrg      int i, max = 0;
156b8e80941Smrg      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
157b8e80941Smrg         max = MAX2(max, remap_table[i]);
158b8e80941Smrg      }
159b8e80941Smrg      remap_table[generic_index] = max + 1;
160b8e80941Smrg   }
161b8e80941Smrg
162b8e80941Smrg   return remap_table[generic_index];
163b8e80941Smrg}
164b8e80941Smrg
165b8e80941Smrgstatic const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
166b8e80941Smrg   PIPE_SWIZZLE_X,
167b8e80941Smrg   PIPE_SWIZZLE_Y,
168b8e80941Smrg   PIPE_SWIZZLE_Z,
169b8e80941Smrg   PIPE_SWIZZLE_W,
170b8e80941Smrg   PIPE_SWIZZLE_0,
171b8e80941Smrg   PIPE_SWIZZLE_1,
172b8e80941Smrg   PIPE_SWIZZLE_NONE
173b8e80941Smrg};
174b8e80941Smrg
175b8e80941Smrgstatic const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
176b8e80941Smrg   PIPE_SWIZZLE_X,
177b8e80941Smrg   PIPE_SWIZZLE_Y,
178b8e80941Smrg   PIPE_SWIZZLE_Z,
179b8e80941Smrg   PIPE_SWIZZLE_1,
180b8e80941Smrg   PIPE_SWIZZLE_0,
181b8e80941Smrg   PIPE_SWIZZLE_1,
182b8e80941Smrg   PIPE_SWIZZLE_NONE
183b8e80941Smrg};
184b8e80941Smrg
185b8e80941Smrgstatic const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
186b8e80941Smrg   PIPE_SWIZZLE_0,
187b8e80941Smrg   PIPE_SWIZZLE_0,
188b8e80941Smrg   PIPE_SWIZZLE_0,
189b8e80941Smrg   PIPE_SWIZZLE_X,
190b8e80941Smrg   PIPE_SWIZZLE_0,
191b8e80941Smrg   PIPE_SWIZZLE_1,
192b8e80941Smrg   PIPE_SWIZZLE_NONE
193b8e80941Smrg};
194b8e80941Smrg
195b8e80941Smrgstatic const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
196b8e80941Smrg   PIPE_SWIZZLE_X,
197b8e80941Smrg   PIPE_SWIZZLE_X,
198b8e80941Smrg   PIPE_SWIZZLE_X,
199b8e80941Smrg   PIPE_SWIZZLE_X,
200b8e80941Smrg   PIPE_SWIZZLE_0,
201b8e80941Smrg   PIPE_SWIZZLE_1,
202b8e80941Smrg   PIPE_SWIZZLE_NONE
203b8e80941Smrg};
204b8e80941Smrg
205b8e80941Smrgstatic const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
206b8e80941Smrg   PIPE_SWIZZLE_X,
207b8e80941Smrg   PIPE_SWIZZLE_X,
208b8e80941Smrg   PIPE_SWIZZLE_X,
209b8e80941Smrg   PIPE_SWIZZLE_1,
210b8e80941Smrg   PIPE_SWIZZLE_0,
211b8e80941Smrg   PIPE_SWIZZLE_1,
212b8e80941Smrg   PIPE_SWIZZLE_NONE
213b8e80941Smrg};
214b8e80941Smrg
215b8e80941Smrgstatic const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
216b8e80941Smrg   PIPE_SWIZZLE_X,
217b8e80941Smrg   PIPE_SWIZZLE_X,
218b8e80941Smrg   PIPE_SWIZZLE_X,
219b8e80941Smrg   PIPE_SWIZZLE_Y,
220b8e80941Smrg   PIPE_SWIZZLE_0,
221b8e80941Smrg   PIPE_SWIZZLE_1,
222b8e80941Smrg   PIPE_SWIZZLE_NONE
223b8e80941Smrg};
224b8e80941Smrg
225b8e80941Smrg
226b8e80941Smrg/**
227b8e80941Smrg * Initialize the shader-neutral fields of svga_compile_key from context
228b8e80941Smrg * state.  This is basically the texture-related state.
229b8e80941Smrg */
230b8e80941Smrgvoid
231b8e80941Smrgsvga_init_shader_key_common(const struct svga_context *svga,
232b8e80941Smrg                            enum pipe_shader_type shader,
233b8e80941Smrg                            struct svga_compile_key *key)
234b8e80941Smrg{
235b8e80941Smrg   unsigned i, idx = 0;
236b8e80941Smrg
237b8e80941Smrg   assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
238b8e80941Smrg
239b8e80941Smrg   /* In case the number of samplers and sampler_views doesn't match,
240b8e80941Smrg    * loop over the lower of the two counts.
241b8e80941Smrg    */
242b8e80941Smrg   key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
243b8e80941Smrg                            svga->curr.num_samplers[shader]);
244b8e80941Smrg
245b8e80941Smrg   for (i = 0; i < key->num_textures; i++) {
246b8e80941Smrg      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
247b8e80941Smrg      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
248b8e80941Smrg      if (view) {
249b8e80941Smrg         assert(view->texture);
250b8e80941Smrg         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
251b8e80941Smrg
252b8e80941Smrg         /* 1D/2D array textures with one slice and cube map array textures
253b8e80941Smrg          * with one cube are treated as non-arrays by the SVGA3D device.
254b8e80941Smrg          * Set the is_array flag only if we know that we have more than 1
255b8e80941Smrg          * element.  This will be used to select shader instruction/resource
256b8e80941Smrg          * types during shader translation.
257b8e80941Smrg          */
258b8e80941Smrg         switch (view->texture->target) {
259b8e80941Smrg         case PIPE_TEXTURE_1D_ARRAY:
260b8e80941Smrg         case PIPE_TEXTURE_2D_ARRAY:
261b8e80941Smrg            key->tex[i].is_array = view->texture->array_size > 1;
262b8e80941Smrg            break;
263b8e80941Smrg         case PIPE_TEXTURE_CUBE_ARRAY:
264b8e80941Smrg            key->tex[i].is_array = view->texture->array_size > 6;
265b8e80941Smrg            break;
266b8e80941Smrg         default:
267b8e80941Smrg            ; /* nothing / silence compiler warning */
268b8e80941Smrg         }
269b8e80941Smrg
270b8e80941Smrg         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
271b8e80941Smrg         key->tex[i].num_samples = view->texture->nr_samples;
272b8e80941Smrg
273b8e80941Smrg         const enum pipe_swizzle *swizzle_tab;
274b8e80941Smrg         if (view->texture->target == PIPE_BUFFER) {
275b8e80941Smrg            SVGA3dSurfaceFormat svga_format;
276b8e80941Smrg            unsigned tf_flags;
277b8e80941Smrg
278b8e80941Smrg            /* Apply any special swizzle mask for the view format if needed */
279b8e80941Smrg
280b8e80941Smrg            svga_translate_texture_buffer_view_format(view->format,
281b8e80941Smrg                                                      &svga_format, &tf_flags);
282b8e80941Smrg            if (tf_flags & TF_000X)
283b8e80941Smrg               swizzle_tab = set_000X;
284b8e80941Smrg            else if (tf_flags & TF_XXXX)
285b8e80941Smrg               swizzle_tab = set_XXXX;
286b8e80941Smrg            else if (tf_flags & TF_XXX1)
287b8e80941Smrg               swizzle_tab = set_XXX1;
288b8e80941Smrg            else if (tf_flags & TF_XXXY)
289b8e80941Smrg               swizzle_tab = set_XXXY;
290b8e80941Smrg            else
291b8e80941Smrg               swizzle_tab = copy_alpha;
292b8e80941Smrg         }
293b8e80941Smrg         else {
294b8e80941Smrg            /* If we have a non-alpha view into an svga3d surface with an
295b8e80941Smrg             * alpha channel, then explicitly set the alpha channel to 1
296b8e80941Smrg             * when sampling. Note that we need to check the
297b8e80941Smrg             * actual device format to cover also imported surface cases.
298b8e80941Smrg             */
299b8e80941Smrg            swizzle_tab =
300b8e80941Smrg               (!util_format_has_alpha(view->format) &&
301b8e80941Smrg                svga_texture_device_format_has_alpha(view->texture)) ?
302b8e80941Smrg                set_alpha : copy_alpha;
303b8e80941Smrg
304b8e80941Smrg            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
305b8e80941Smrg                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
306b8e80941Smrg               swizzle_tab = set_alpha;
307b8e80941Smrg         }
308b8e80941Smrg
309b8e80941Smrg         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
310b8e80941Smrg         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
311b8e80941Smrg         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
312b8e80941Smrg         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
313b8e80941Smrg      }
314b8e80941Smrg
315b8e80941Smrg      if (sampler) {
316b8e80941Smrg         if (!sampler->normalized_coords) {
317b8e80941Smrg            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
318b8e80941Smrg            key->tex[i].width_height_idx = idx++;
319b8e80941Smrg            key->tex[i].unnormalized = TRUE;
320b8e80941Smrg            ++key->num_unnormalized_coords;
321b8e80941Smrg
322b8e80941Smrg            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
323b8e80941Smrg                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
324b8e80941Smrg                key->tex[i].texel_bias = TRUE;
325b8e80941Smrg            }
326b8e80941Smrg         }
327b8e80941Smrg      }
328b8e80941Smrg   }
329b8e80941Smrg}
330b8e80941Smrg
331b8e80941Smrg
332b8e80941Smrg/** Search for a compiled shader variant with the same compile key */
333b8e80941Smrgstruct svga_shader_variant *
334b8e80941Smrgsvga_search_shader_key(const struct svga_shader *shader,
335b8e80941Smrg                       const struct svga_compile_key *key)
336b8e80941Smrg{
337b8e80941Smrg   struct svga_shader_variant *variant = shader->variants;
338b8e80941Smrg
339b8e80941Smrg   assert(key);
340b8e80941Smrg
341b8e80941Smrg   for ( ; variant; variant = variant->next) {
342b8e80941Smrg      if (svga_compile_keys_equal(key, &variant->key))
343b8e80941Smrg         return variant;
344b8e80941Smrg   }
345b8e80941Smrg   return NULL;
346b8e80941Smrg}
347b8e80941Smrg
348b8e80941Smrg/** Search for a shader with the same token key */
349b8e80941Smrgstruct svga_shader *
350b8e80941Smrgsvga_search_shader_token_key(struct svga_shader *pshader,
351b8e80941Smrg                             const struct svga_token_key *key)
352b8e80941Smrg{
353b8e80941Smrg   struct svga_shader *shader = pshader;
354b8e80941Smrg
355b8e80941Smrg   assert(key);
356b8e80941Smrg
357b8e80941Smrg   for ( ; shader; shader = shader->next) {
358b8e80941Smrg      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
359b8e80941Smrg         return shader;
360b8e80941Smrg   }
361b8e80941Smrg   return NULL;
362b8e80941Smrg}
363b8e80941Smrg
364b8e80941Smrg/**
365b8e80941Smrg * Helper function to define a gb shader for non-vgpu10 device
366b8e80941Smrg */
367b8e80941Smrgstatic enum pipe_error
368b8e80941Smrgdefine_gb_shader_vgpu9(struct svga_context *svga,
369b8e80941Smrg                       struct svga_shader_variant *variant,
370b8e80941Smrg                       unsigned codeLen)
371b8e80941Smrg{
372b8e80941Smrg   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
373b8e80941Smrg   enum pipe_error ret;
374b8e80941Smrg
375b8e80941Smrg   /**
376b8e80941Smrg    * Create gb memory for the shader and upload the shader code.
377b8e80941Smrg    * Kernel module will allocate an id for the shader and issue
378b8e80941Smrg    * the DefineGBShader command.
379b8e80941Smrg    */
380b8e80941Smrg   variant->gb_shader = sws->shader_create(sws, variant->type,
381b8e80941Smrg                                           variant->tokens, codeLen);
382b8e80941Smrg
383b8e80941Smrg   if (!variant->gb_shader)
384b8e80941Smrg      return PIPE_ERROR_OUT_OF_MEMORY;
385b8e80941Smrg
386b8e80941Smrg   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
387b8e80941Smrg
388b8e80941Smrg   return ret;
389b8e80941Smrg}
390b8e80941Smrg
391b8e80941Smrg/**
392b8e80941Smrg * Helper function to define a gb shader for vgpu10 device
393b8e80941Smrg */
394b8e80941Smrgstatic enum pipe_error
395b8e80941Smrgdefine_gb_shader_vgpu10(struct svga_context *svga,
396b8e80941Smrg                        struct svga_shader_variant *variant,
397b8e80941Smrg                        unsigned codeLen)
398b8e80941Smrg{
399b8e80941Smrg   struct svga_winsys_context *swc = svga->swc;
400b8e80941Smrg   enum pipe_error ret;
401b8e80941Smrg
402b8e80941Smrg   /**
403b8e80941Smrg    * Shaders in VGPU10 enabled device reside in the device COTable.
404b8e80941Smrg    * SVGA driver will allocate an integer ID for the shader and
405b8e80941Smrg    * issue DXDefineShader and DXBindShader commands.
406b8e80941Smrg    */
407b8e80941Smrg   variant->id = util_bitmask_add(svga->shader_id_bm);
408b8e80941Smrg   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
409b8e80941Smrg      return PIPE_ERROR_OUT_OF_MEMORY;
410b8e80941Smrg   }
411b8e80941Smrg
412b8e80941Smrg   /* Create gb memory for the shader and upload the shader code */
413b8e80941Smrg   variant->gb_shader = swc->shader_create(swc,
414b8e80941Smrg                                           variant->id, variant->type,
415b8e80941Smrg                                           variant->tokens, codeLen);
416b8e80941Smrg
417b8e80941Smrg   if (!variant->gb_shader) {
418b8e80941Smrg      /* Free the shader ID */
419b8e80941Smrg      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
420b8e80941Smrg      goto fail_no_allocation;
421b8e80941Smrg   }
422b8e80941Smrg
423b8e80941Smrg   /**
424b8e80941Smrg    * Since we don't want to do any flush within state emission to avoid
425b8e80941Smrg    * partial state in a command buffer, it's important to make sure that
426b8e80941Smrg    * there is enough room to send both the DXDefineShader & DXBindShader
427b8e80941Smrg    * commands in the same command buffer. So let's send both
428b8e80941Smrg    * commands in one command reservation. If it fails, we'll undo
429b8e80941Smrg    * the shader creation and return an error.
430b8e80941Smrg    */
431b8e80941Smrg   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
432b8e80941Smrg                                           variant->id, variant->type, codeLen);
433b8e80941Smrg
434b8e80941Smrg   if (ret != PIPE_OK)
435b8e80941Smrg      goto fail;
436b8e80941Smrg
437b8e80941Smrg   return PIPE_OK;
438b8e80941Smrg
439b8e80941Smrgfail:
440b8e80941Smrg   swc->shader_destroy(swc, variant->gb_shader);
441b8e80941Smrg   variant->gb_shader = NULL;
442b8e80941Smrg
443b8e80941Smrgfail_no_allocation:
444b8e80941Smrg   util_bitmask_clear(svga->shader_id_bm, variant->id);
445b8e80941Smrg   variant->id = UTIL_BITMASK_INVALID_INDEX;
446b8e80941Smrg
447b8e80941Smrg   return PIPE_ERROR_OUT_OF_MEMORY;
448b8e80941Smrg}
449848b8605Smrg
450848b8605Smrg/**
451848b8605Smrg * Issue the SVGA3D commands to define a new shader.
452b8e80941Smrg * \param variant  contains the shader tokens, etc.  The result->id field will
453b8e80941Smrg *                 be set here.
454848b8605Smrg */
455848b8605Smrgenum pipe_error
456848b8605Smrgsvga_define_shader(struct svga_context *svga,
457848b8605Smrg                   struct svga_shader_variant *variant)
458848b8605Smrg{
459848b8605Smrg   unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
460b8e80941Smrg   enum pipe_error ret;
461848b8605Smrg
462b8e80941Smrg   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
463848b8605Smrg
464b8e80941Smrg   variant->id = UTIL_BITMASK_INVALID_INDEX;
465848b8605Smrg
466b8e80941Smrg   if (svga_have_gb_objects(svga)) {
467b8e80941Smrg      if (svga_have_vgpu10(svga))
468b8e80941Smrg         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
469b8e80941Smrg      else
470b8e80941Smrg         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
471848b8605Smrg   }
472848b8605Smrg   else {
473848b8605Smrg      /* Allocate an integer ID for the shader */
474848b8605Smrg      variant->id = util_bitmask_add(svga->shader_id_bm);
475848b8605Smrg      if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
476b8e80941Smrg         ret = PIPE_ERROR_OUT_OF_MEMORY;
477b8e80941Smrg         goto done;
478848b8605Smrg      }
479848b8605Smrg
480848b8605Smrg      /* Issue SVGA3D device command to define the shader */
481848b8605Smrg      ret = SVGA3D_DefineShader(svga->swc,
482848b8605Smrg                                variant->id,
483b8e80941Smrg                                variant->type,
484848b8605Smrg                                variant->tokens,
485848b8605Smrg                                codeLen);
486848b8605Smrg      if (ret != PIPE_OK) {
487848b8605Smrg         /* free the ID */
488848b8605Smrg         assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
489848b8605Smrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
490848b8605Smrg         variant->id = UTIL_BITMASK_INVALID_INDEX;
491848b8605Smrg      }
492848b8605Smrg   }
493848b8605Smrg
494b8e80941Smrgdone:
495b8e80941Smrg   SVGA_STATS_TIME_POP(svga_sws(svga));
496b8e80941Smrg   return ret;
497848b8605Smrg}
498848b8605Smrg
499848b8605Smrg
500b8e80941Smrg/**
501b8e80941Smrg * Issue the SVGA3D commands to set/bind a shader.
502b8e80941Smrg * \param result  the shader to bind.
503b8e80941Smrg */
504848b8605Smrgenum pipe_error
505b8e80941Smrgsvga_set_shader(struct svga_context *svga,
506b8e80941Smrg                SVGA3dShaderType type,
507b8e80941Smrg                struct svga_shader_variant *variant)
508848b8605Smrg{
509b8e80941Smrg   enum pipe_error ret;
510b8e80941Smrg   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
511b8e80941Smrg
512b8e80941Smrg   assert(type == SVGA3D_SHADERTYPE_VS ||
513b8e80941Smrg          type == SVGA3D_SHADERTYPE_GS ||
514b8e80941Smrg          type == SVGA3D_SHADERTYPE_PS);
515848b8605Smrg
516848b8605Smrg   if (svga_have_gb_objects(svga)) {
517b8e80941Smrg      struct svga_winsys_gb_shader *gbshader =
518b8e80941Smrg         variant ? variant->gb_shader : NULL;
519848b8605Smrg
520b8e80941Smrg      if (svga_have_vgpu10(svga))
521b8e80941Smrg         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
522b8e80941Smrg      else
523b8e80941Smrg         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
524b8e80941Smrg   }
525b8e80941Smrg   else {
526b8e80941Smrg      ret = SVGA3D_SetShader(svga->swc, type, id);
527848b8605Smrg   }
528848b8605Smrg
529b8e80941Smrg   return ret;
530b8e80941Smrg}
531848b8605Smrg
532848b8605Smrg
533b8e80941Smrgstruct svga_shader_variant *
534b8e80941Smrgsvga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
535b8e80941Smrg{
536b8e80941Smrg   struct svga_shader_variant *variant = CALLOC_STRUCT(svga_shader_variant);
537b8e80941Smrg
538b8e80941Smrg   if (variant) {
539b8e80941Smrg      variant->type = svga_shader_type(type);
540b8e80941Smrg      svga->hud.num_shaders++;
541b8e80941Smrg   }
542b8e80941Smrg   return variant;
543b8e80941Smrg}
544b8e80941Smrg
545b8e80941Smrg
546b8e80941Smrgvoid
547b8e80941Smrgsvga_destroy_shader_variant(struct svga_context *svga,
548b8e80941Smrg                            struct svga_shader_variant *variant)
549b8e80941Smrg{
550b8e80941Smrg   enum pipe_error ret = PIPE_OK;
551848b8605Smrg
552b8e80941Smrg   if (svga_have_gb_objects(svga) && variant->gb_shader) {
553b8e80941Smrg      if (svga_have_vgpu10(svga)) {
554b8e80941Smrg         struct svga_winsys_context *swc = svga->swc;
555b8e80941Smrg         swc->shader_destroy(swc, variant->gb_shader);
556b8e80941Smrg         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
557b8e80941Smrg         if (ret != PIPE_OK) {
558b8e80941Smrg            /* flush and try again */
559b8e80941Smrg            svga_context_flush(svga, NULL);
560b8e80941Smrg            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
561b8e80941Smrg            assert(ret == PIPE_OK);
562b8e80941Smrg         }
563b8e80941Smrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
564b8e80941Smrg      }
565b8e80941Smrg      else {
566b8e80941Smrg         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
567b8e80941Smrg         sws->shader_destroy(sws, variant->gb_shader);
568b8e80941Smrg      }
569b8e80941Smrg      variant->gb_shader = NULL;
570b8e80941Smrg   }
571b8e80941Smrg   else {
572b8e80941Smrg      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
573b8e80941Smrg         ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type);
574b8e80941Smrg         if (ret != PIPE_OK) {
575b8e80941Smrg            /* flush and try again */
576b8e80941Smrg            svga_context_flush(svga, NULL);
577b8e80941Smrg            ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type);
578b8e80941Smrg            assert(ret == PIPE_OK);
579b8e80941Smrg         }
580b8e80941Smrg         util_bitmask_clear(svga->shader_id_bm, variant->id);
581b8e80941Smrg      }
582848b8605Smrg   }
583848b8605Smrg
584848b8605Smrg   FREE((unsigned *)variant->tokens);
585848b8605Smrg   FREE(variant);
586848b8605Smrg
587b8e80941Smrg   svga->hud.num_shaders--;
588b8e80941Smrg}
589b8e80941Smrg
590b8e80941Smrg/*
591b8e80941Smrg * Rebind shaders.
592b8e80941Smrg * Called at the beginning of every new command buffer to ensure that
593b8e80941Smrg * shaders are properly paged-in. Instead of sending the SetShader
594b8e80941Smrg * command, this function sends a private allocation command to
595b8e80941Smrg * page in a shader. This avoids emitting redundant state to the device
596b8e80941Smrg * just to page in a resource.
597b8e80941Smrg */
598b8e80941Smrgenum pipe_error
599b8e80941Smrgsvga_rebind_shaders(struct svga_context *svga)
600b8e80941Smrg{
601b8e80941Smrg   struct svga_winsys_context *swc = svga->swc;
602b8e80941Smrg   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
603b8e80941Smrg   enum pipe_error ret;
604b8e80941Smrg
605b8e80941Smrg   assert(svga_have_vgpu10(svga));
606b8e80941Smrg
607b8e80941Smrg   /**
608b8e80941Smrg    * If the underlying winsys layer does not need resource rebinding,
609b8e80941Smrg    * just clear the rebind flags and return.
610b8e80941Smrg    */
611b8e80941Smrg   if (swc->resource_rebind == NULL) {
612b8e80941Smrg      svga->rebind.flags.vs = 0;
613b8e80941Smrg      svga->rebind.flags.gs = 0;
614b8e80941Smrg      svga->rebind.flags.fs = 0;
615b8e80941Smrg
616b8e80941Smrg      return PIPE_OK;
617b8e80941Smrg   }
618b8e80941Smrg
619b8e80941Smrg   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
620b8e80941Smrg      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
621b8e80941Smrg      if (ret != PIPE_OK)
622b8e80941Smrg         return ret;
623b8e80941Smrg   }
624b8e80941Smrg   svga->rebind.flags.vs = 0;
625b8e80941Smrg
626b8e80941Smrg   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
627b8e80941Smrg      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
628b8e80941Smrg      if (ret != PIPE_OK)
629b8e80941Smrg         return ret;
630b8e80941Smrg   }
631b8e80941Smrg   svga->rebind.flags.gs = 0;
632b8e80941Smrg
633b8e80941Smrg   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
634b8e80941Smrg      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
635b8e80941Smrg      if (ret != PIPE_OK)
636b8e80941Smrg         return ret;
637b8e80941Smrg   }
638b8e80941Smrg   svga->rebind.flags.fs = 0;
639b8e80941Smrg
640b8e80941Smrg   return PIPE_OK;
641848b8605Smrg}
642