svga_shader.c revision 01e04c3f
1/**********************************************************
2 * Copyright 2008-2012 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26#include "util/u_bitmask.h"
27#include "util/u_memory.h"
28#include "util/u_format.h"
29#include "svga_context.h"
30#include "svga_cmd.h"
31#include "svga_format.h"
32#include "svga_shader.h"
33#include "svga_resource_texture.h"
34
35
36/**
37 * This bit isn't really used anywhere.  It only serves to help
38 * generate a unique "signature" for the vertex shader output bitmask.
39 * Shader input/output signatures are used to resolve shader linking
40 * issues.
41 */
42#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
43
44
45/**
46 * Use the shader info to generate a bitmask indicating which generic
47 * inputs are used by the shader.  A set bit indicates that GENERIC[i]
48 * is used.
49 */
50uint64_t
51svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
52{
53   unsigned i;
54   uint64_t mask = 0x0;
55
56   for (i = 0; i < info->num_inputs; i++) {
57      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
58         unsigned j = info->input_semantic_index[i];
59         assert(j < sizeof(mask) * 8);
60         mask |= ((uint64_t) 1) << j;
61      }
62   }
63
64   return mask;
65}
66
67
68/**
69 * Scan shader info to return a bitmask of written outputs.
70 */
71uint64_t
72svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
73{
74   unsigned i;
75   uint64_t mask = 0x0;
76
77   for (i = 0; i < info->num_outputs; i++) {
78      switch (info->output_semantic_name[i]) {
79      case TGSI_SEMANTIC_GENERIC:
80         {
81            unsigned j = info->output_semantic_index[i];
82            assert(j < sizeof(mask) * 8);
83            mask |= ((uint64_t) 1) << j;
84         }
85         break;
86      case TGSI_SEMANTIC_FOG:
87         mask |= FOG_GENERIC_BIT;
88         break;
89      }
90   }
91
92   return mask;
93}
94
95
96
97/**
98 * Given a mask of used generic variables (as returned by the above functions)
99 * fill in a table which maps those indexes to small integers.
100 * This table is used by the remap_generic_index() function in
101 * svga_tgsi_decl_sm30.c
102 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
103 * GENERIC[3] are used.  The remap_table will contain:
104 *   table[1] = 0;
105 *   table[3] = 1;
106 * The remaining table entries will be filled in with the next unused
107 * generic index (in this example, 2).
108 */
109void
110svga_remap_generics(uint64_t generics_mask,
111                    int8_t remap_table[MAX_GENERIC_VARYING])
112{
113   /* Note texcoord[0] is reserved so start at 1 */
114   unsigned count = 1, i;
115
116   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
117      remap_table[i] = -1;
118   }
119
120   /* for each bit set in generic_mask */
121   while (generics_mask) {
122      unsigned index = ffsll(generics_mask) - 1;
123      remap_table[index] = count++;
124      generics_mask &= ~((uint64_t) 1 << index);
125   }
126}
127
128
129/**
130 * Use the generic remap table to map a TGSI generic varying variable
131 * index to a small integer.  If the remapping table doesn't have a
132 * valid value for the given index (the table entry is -1) it means
133 * the fragment shader doesn't use that VS output.  Just allocate
134 * the next free value in that case.  Alternately, we could cull
135 * VS instructions that write to register, or replace the register
136 * with a dummy temp register.
137 * XXX TODO: we should do one of the later as it would save precious
138 * texcoord registers.
139 */
140int
141svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
142                         int generic_index)
143{
144   assert(generic_index < MAX_GENERIC_VARYING);
145
146   if (generic_index >= MAX_GENERIC_VARYING) {
147      /* just don't return a random/garbage value */
148      generic_index = MAX_GENERIC_VARYING - 1;
149   }
150
151   if (remap_table[generic_index] == -1) {
152      /* This is a VS output that has no matching PS input.  Find a
153       * free index.
154       */
155      int i, max = 0;
156      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
157         max = MAX2(max, remap_table[i]);
158      }
159      remap_table[generic_index] = max + 1;
160   }
161
162   return remap_table[generic_index];
163}
164
165static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
166   PIPE_SWIZZLE_X,
167   PIPE_SWIZZLE_Y,
168   PIPE_SWIZZLE_Z,
169   PIPE_SWIZZLE_W,
170   PIPE_SWIZZLE_0,
171   PIPE_SWIZZLE_1,
172   PIPE_SWIZZLE_NONE
173};
174
175static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
176   PIPE_SWIZZLE_X,
177   PIPE_SWIZZLE_Y,
178   PIPE_SWIZZLE_Z,
179   PIPE_SWIZZLE_1,
180   PIPE_SWIZZLE_0,
181   PIPE_SWIZZLE_1,
182   PIPE_SWIZZLE_NONE
183};
184
185static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
186   PIPE_SWIZZLE_0,
187   PIPE_SWIZZLE_0,
188   PIPE_SWIZZLE_0,
189   PIPE_SWIZZLE_X,
190   PIPE_SWIZZLE_0,
191   PIPE_SWIZZLE_1,
192   PIPE_SWIZZLE_NONE
193};
194
195static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
196   PIPE_SWIZZLE_X,
197   PIPE_SWIZZLE_X,
198   PIPE_SWIZZLE_X,
199   PIPE_SWIZZLE_X,
200   PIPE_SWIZZLE_0,
201   PIPE_SWIZZLE_1,
202   PIPE_SWIZZLE_NONE
203};
204
205static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
206   PIPE_SWIZZLE_X,
207   PIPE_SWIZZLE_X,
208   PIPE_SWIZZLE_X,
209   PIPE_SWIZZLE_1,
210   PIPE_SWIZZLE_0,
211   PIPE_SWIZZLE_1,
212   PIPE_SWIZZLE_NONE
213};
214
215static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
216   PIPE_SWIZZLE_X,
217   PIPE_SWIZZLE_X,
218   PIPE_SWIZZLE_X,
219   PIPE_SWIZZLE_Y,
220   PIPE_SWIZZLE_0,
221   PIPE_SWIZZLE_1,
222   PIPE_SWIZZLE_NONE
223};
224
225
226/**
227 * Initialize the shader-neutral fields of svga_compile_key from context
228 * state.  This is basically the texture-related state.
229 */
230void
231svga_init_shader_key_common(const struct svga_context *svga,
232                            enum pipe_shader_type shader,
233                            struct svga_compile_key *key)
234{
235   unsigned i, idx = 0;
236
237   assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
238
239   /* In case the number of samplers and sampler_views doesn't match,
240    * loop over the lower of the two counts.
241    */
242   key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
243                            svga->curr.num_samplers[shader]);
244
245   for (i = 0; i < key->num_textures; i++) {
246      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
247      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
248      if (view) {
249         assert(view->texture);
250         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
251
252         /* 1D/2D array textures with one slice and cube map array textures
253          * with one cube are treated as non-arrays by the SVGA3D device.
254          * Set the is_array flag only if we know that we have more than 1
255          * element.  This will be used to select shader instruction/resource
256          * types during shader translation.
257          */
258         switch (view->texture->target) {
259         case PIPE_TEXTURE_1D_ARRAY:
260         case PIPE_TEXTURE_2D_ARRAY:
261            key->tex[i].is_array = view->texture->array_size > 1;
262            break;
263         case PIPE_TEXTURE_CUBE_ARRAY:
264            key->tex[i].is_array = view->texture->array_size > 6;
265            break;
266         default:
267            ; /* nothing / silence compiler warning */
268         }
269
270         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
271         key->tex[i].num_samples = view->texture->nr_samples;
272
273         const enum pipe_swizzle *swizzle_tab;
274         if (view->texture->target == PIPE_BUFFER) {
275            SVGA3dSurfaceFormat svga_format;
276            unsigned tf_flags;
277
278            /* Apply any special swizzle mask for the view format if needed */
279
280            svga_translate_texture_buffer_view_format(view->format,
281                                                      &svga_format, &tf_flags);
282            if (tf_flags & TF_000X)
283               swizzle_tab = set_000X;
284            else if (tf_flags & TF_XXXX)
285               swizzle_tab = set_XXXX;
286            else if (tf_flags & TF_XXX1)
287               swizzle_tab = set_XXX1;
288            else if (tf_flags & TF_XXXY)
289               swizzle_tab = set_XXXY;
290            else
291               swizzle_tab = copy_alpha;
292         }
293         else {
294            /* If we have a non-alpha view into an svga3d surface with an
295             * alpha channel, then explicitly set the alpha channel to 1
296             * when sampling. Note that we need to check the
297             * actual device format to cover also imported surface cases.
298             */
299            swizzle_tab =
300               (!util_format_has_alpha(view->format) &&
301                svga_texture_device_format_has_alpha(view->texture)) ?
302                set_alpha : copy_alpha;
303
304            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
305                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
306               swizzle_tab = set_alpha;
307         }
308
309         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
310         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
311         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
312         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
313      }
314
315      if (sampler) {
316         if (!sampler->normalized_coords) {
317            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
318            key->tex[i].width_height_idx = idx++;
319            key->tex[i].unnormalized = TRUE;
320            ++key->num_unnormalized_coords;
321
322            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
323                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
324                key->tex[i].texel_bias = TRUE;
325            }
326         }
327      }
328   }
329}
330
331
332/** Search for a compiled shader variant with the same compile key */
333struct svga_shader_variant *
334svga_search_shader_key(const struct svga_shader *shader,
335                       const struct svga_compile_key *key)
336{
337   struct svga_shader_variant *variant = shader->variants;
338
339   assert(key);
340
341   for ( ; variant; variant = variant->next) {
342      if (svga_compile_keys_equal(key, &variant->key))
343         return variant;
344   }
345   return NULL;
346}
347
348/** Search for a shader with the same token key */
349struct svga_shader *
350svga_search_shader_token_key(struct svga_shader *pshader,
351                             const struct svga_token_key *key)
352{
353   struct svga_shader *shader = pshader;
354
355   assert(key);
356
357   for ( ; shader; shader = shader->next) {
358      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
359         return shader;
360   }
361   return NULL;
362}
363
364/**
365 * Helper function to define a gb shader for non-vgpu10 device
366 */
367static enum pipe_error
368define_gb_shader_vgpu9(struct svga_context *svga,
369                       SVGA3dShaderType type,
370                       struct svga_shader_variant *variant,
371                       unsigned codeLen)
372{
373   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
374   enum pipe_error ret;
375
376   /**
377    * Create gb memory for the shader and upload the shader code.
378    * Kernel module will allocate an id for the shader and issue
379    * the DefineGBShader command.
380    */
381   variant->gb_shader = sws->shader_create(sws, type,
382                                           variant->tokens, codeLen);
383
384   if (!variant->gb_shader)
385      return PIPE_ERROR_OUT_OF_MEMORY;
386
387   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
388
389   return ret;
390}
391
392/**
393 * Helper function to define a gb shader for vgpu10 device
394 */
395static enum pipe_error
396define_gb_shader_vgpu10(struct svga_context *svga,
397                        SVGA3dShaderType type,
398                        struct svga_shader_variant *variant,
399                        unsigned codeLen)
400{
401   struct svga_winsys_context *swc = svga->swc;
402   enum pipe_error ret;
403
404   /**
405    * Shaders in VGPU10 enabled device reside in the device COTable.
406    * SVGA driver will allocate an integer ID for the shader and
407    * issue DXDefineShader and DXBindShader commands.
408    */
409   variant->id = util_bitmask_add(svga->shader_id_bm);
410   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
411      return PIPE_ERROR_OUT_OF_MEMORY;
412   }
413
414   /* Create gb memory for the shader and upload the shader code */
415   variant->gb_shader = swc->shader_create(swc,
416                                           variant->id, type,
417                                           variant->tokens, codeLen);
418
419   if (!variant->gb_shader) {
420      /* Free the shader ID */
421      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
422      goto fail_no_allocation;
423   }
424
425   /**
426    * Since we don't want to do any flush within state emission to avoid
427    * partial state in a command buffer, it's important to make sure that
428    * there is enough room to send both the DXDefineShader & DXBindShader
429    * commands in the same command buffer. So let's send both
430    * commands in one command reservation. If it fails, we'll undo
431    * the shader creation and return an error.
432    */
433   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
434                                           variant->id, type, codeLen);
435
436   if (ret != PIPE_OK)
437      goto fail;
438
439   return PIPE_OK;
440
441fail:
442   swc->shader_destroy(swc, variant->gb_shader);
443   variant->gb_shader = NULL;
444
445fail_no_allocation:
446   util_bitmask_clear(svga->shader_id_bm, variant->id);
447   variant->id = UTIL_BITMASK_INVALID_INDEX;
448
449   return PIPE_ERROR_OUT_OF_MEMORY;
450}
451
452/**
453 * Issue the SVGA3D commands to define a new shader.
454 * \param variant  contains the shader tokens, etc.  The result->id field will
455 *                 be set here.
456 */
457enum pipe_error
458svga_define_shader(struct svga_context *svga,
459                   SVGA3dShaderType type,
460                   struct svga_shader_variant *variant)
461{
462   unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
463   enum pipe_error ret;
464
465   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
466
467   variant->id = UTIL_BITMASK_INVALID_INDEX;
468
469   if (svga_have_gb_objects(svga)) {
470      if (svga_have_vgpu10(svga))
471         ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
472      else
473         ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
474   }
475   else {
476      /* Allocate an integer ID for the shader */
477      variant->id = util_bitmask_add(svga->shader_id_bm);
478      if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
479         ret = PIPE_ERROR_OUT_OF_MEMORY;
480         goto done;
481      }
482
483      /* Issue SVGA3D device command to define the shader */
484      ret = SVGA3D_DefineShader(svga->swc,
485                                variant->id,
486                                type,
487                                variant->tokens,
488                                codeLen);
489      if (ret != PIPE_OK) {
490         /* free the ID */
491         assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
492         util_bitmask_clear(svga->shader_id_bm, variant->id);
493         variant->id = UTIL_BITMASK_INVALID_INDEX;
494      }
495   }
496
497done:
498   SVGA_STATS_TIME_POP(svga_sws(svga));
499   return ret;
500}
501
502
503/**
504 * Issue the SVGA3D commands to set/bind a shader.
505 * \param result  the shader to bind.
506 */
507enum pipe_error
508svga_set_shader(struct svga_context *svga,
509                SVGA3dShaderType type,
510                struct svga_shader_variant *variant)
511{
512   enum pipe_error ret;
513   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
514
515   assert(type == SVGA3D_SHADERTYPE_VS ||
516          type == SVGA3D_SHADERTYPE_GS ||
517          type == SVGA3D_SHADERTYPE_PS);
518
519   if (svga_have_gb_objects(svga)) {
520      struct svga_winsys_gb_shader *gbshader =
521         variant ? variant->gb_shader : NULL;
522
523      if (svga_have_vgpu10(svga))
524         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
525      else
526         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
527   }
528   else {
529      ret = SVGA3D_SetShader(svga->swc, type, id);
530   }
531
532   return ret;
533}
534
535
536struct svga_shader_variant *
537svga_new_shader_variant(struct svga_context *svga)
538{
539   svga->hud.num_shaders++;
540   return CALLOC_STRUCT(svga_shader_variant);
541}
542
543
544void
545svga_destroy_shader_variant(struct svga_context *svga,
546                            SVGA3dShaderType type,
547                            struct svga_shader_variant *variant)
548{
549   enum pipe_error ret = PIPE_OK;
550
551   if (svga_have_gb_objects(svga) && variant->gb_shader) {
552      if (svga_have_vgpu10(svga)) {
553         struct svga_winsys_context *swc = svga->swc;
554         swc->shader_destroy(swc, variant->gb_shader);
555         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
556         if (ret != PIPE_OK) {
557            /* flush and try again */
558            svga_context_flush(svga, NULL);
559            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
560            assert(ret == PIPE_OK);
561         }
562         util_bitmask_clear(svga->shader_id_bm, variant->id);
563      }
564      else {
565         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
566         sws->shader_destroy(sws, variant->gb_shader);
567      }
568      variant->gb_shader = NULL;
569   }
570   else {
571      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
572         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
573         if (ret != PIPE_OK) {
574            /* flush and try again */
575            svga_context_flush(svga, NULL);
576            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
577            assert(ret == PIPE_OK);
578         }
579         util_bitmask_clear(svga->shader_id_bm, variant->id);
580      }
581   }
582
583   FREE((unsigned *)variant->tokens);
584   FREE(variant);
585
586   svga->hud.num_shaders--;
587}
588
589/*
590 * Rebind shaders.
591 * Called at the beginning of every new command buffer to ensure that
592 * shaders are properly paged-in. Instead of sending the SetShader
593 * command, this function sends a private allocation command to
594 * page in a shader. This avoids emitting redundant state to the device
595 * just to page in a resource.
596 */
597enum pipe_error
598svga_rebind_shaders(struct svga_context *svga)
599{
600   struct svga_winsys_context *swc = svga->swc;
601   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
602   enum pipe_error ret;
603
604   assert(svga_have_vgpu10(svga));
605
606   /**
607    * If the underlying winsys layer does not need resource rebinding,
608    * just clear the rebind flags and return.
609    */
610   if (swc->resource_rebind == NULL) {
611      svga->rebind.flags.vs = 0;
612      svga->rebind.flags.gs = 0;
613      svga->rebind.flags.fs = 0;
614
615      return PIPE_OK;
616   }
617
618   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
619      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
620      if (ret != PIPE_OK)
621         return ret;
622   }
623   svga->rebind.flags.vs = 0;
624
625   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
626      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
627      if (ret != PIPE_OK)
628         return ret;
629   }
630   svga->rebind.flags.gs = 0;
631
632   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
633      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
634      if (ret != PIPE_OK)
635         return ret;
636   }
637   svga->rebind.flags.fs = 0;
638
639   return PIPE_OK;
640}
641