svga_shader.c revision 01e04c3f
1/********************************************************** 2 * Copyright 2008-2012 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26#include "util/u_bitmask.h" 27#include "util/u_memory.h" 28#include "util/u_format.h" 29#include "svga_context.h" 30#include "svga_cmd.h" 31#include "svga_format.h" 32#include "svga_shader.h" 33#include "svga_resource_texture.h" 34 35 36/** 37 * This bit isn't really used anywhere. It only serves to help 38 * generate a unique "signature" for the vertex shader output bitmask. 39 * Shader input/output signatures are used to resolve shader linking 40 * issues. 41 */ 42#define FOG_GENERIC_BIT (((uint64_t) 1) << 63) 43 44 45/** 46 * Use the shader info to generate a bitmask indicating which generic 47 * inputs are used by the shader. A set bit indicates that GENERIC[i] 48 * is used. 49 */ 50uint64_t 51svga_get_generic_inputs_mask(const struct tgsi_shader_info *info) 52{ 53 unsigned i; 54 uint64_t mask = 0x0; 55 56 for (i = 0; i < info->num_inputs; i++) { 57 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { 58 unsigned j = info->input_semantic_index[i]; 59 assert(j < sizeof(mask) * 8); 60 mask |= ((uint64_t) 1) << j; 61 } 62 } 63 64 return mask; 65} 66 67 68/** 69 * Scan shader info to return a bitmask of written outputs. 70 */ 71uint64_t 72svga_get_generic_outputs_mask(const struct tgsi_shader_info *info) 73{ 74 unsigned i; 75 uint64_t mask = 0x0; 76 77 for (i = 0; i < info->num_outputs; i++) { 78 switch (info->output_semantic_name[i]) { 79 case TGSI_SEMANTIC_GENERIC: 80 { 81 unsigned j = info->output_semantic_index[i]; 82 assert(j < sizeof(mask) * 8); 83 mask |= ((uint64_t) 1) << j; 84 } 85 break; 86 case TGSI_SEMANTIC_FOG: 87 mask |= FOG_GENERIC_BIT; 88 break; 89 } 90 } 91 92 return mask; 93} 94 95 96 97/** 98 * Given a mask of used generic variables (as returned by the above functions) 99 * fill in a table which maps those indexes to small integers. 100 * This table is used by the remap_generic_index() function in 101 * svga_tgsi_decl_sm30.c 102 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and 103 * GENERIC[3] are used. The remap_table will contain: 104 * table[1] = 0; 105 * table[3] = 1; 106 * The remaining table entries will be filled in with the next unused 107 * generic index (in this example, 2). 108 */ 109void 110svga_remap_generics(uint64_t generics_mask, 111 int8_t remap_table[MAX_GENERIC_VARYING]) 112{ 113 /* Note texcoord[0] is reserved so start at 1 */ 114 unsigned count = 1, i; 115 116 for (i = 0; i < MAX_GENERIC_VARYING; i++) { 117 remap_table[i] = -1; 118 } 119 120 /* for each bit set in generic_mask */ 121 while (generics_mask) { 122 unsigned index = ffsll(generics_mask) - 1; 123 remap_table[index] = count++; 124 generics_mask &= ~((uint64_t) 1 << index); 125 } 126} 127 128 129/** 130 * Use the generic remap table to map a TGSI generic varying variable 131 * index to a small integer. If the remapping table doesn't have a 132 * valid value for the given index (the table entry is -1) it means 133 * the fragment shader doesn't use that VS output. Just allocate 134 * the next free value in that case. Alternately, we could cull 135 * VS instructions that write to register, or replace the register 136 * with a dummy temp register. 137 * XXX TODO: we should do one of the later as it would save precious 138 * texcoord registers. 139 */ 140int 141svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], 142 int generic_index) 143{ 144 assert(generic_index < MAX_GENERIC_VARYING); 145 146 if (generic_index >= MAX_GENERIC_VARYING) { 147 /* just don't return a random/garbage value */ 148 generic_index = MAX_GENERIC_VARYING - 1; 149 } 150 151 if (remap_table[generic_index] == -1) { 152 /* This is a VS output that has no matching PS input. Find a 153 * free index. 154 */ 155 int i, max = 0; 156 for (i = 0; i < MAX_GENERIC_VARYING; i++) { 157 max = MAX2(max, remap_table[i]); 158 } 159 remap_table[generic_index] = max + 1; 160 } 161 162 return remap_table[generic_index]; 163} 164 165static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = { 166 PIPE_SWIZZLE_X, 167 PIPE_SWIZZLE_Y, 168 PIPE_SWIZZLE_Z, 169 PIPE_SWIZZLE_W, 170 PIPE_SWIZZLE_0, 171 PIPE_SWIZZLE_1, 172 PIPE_SWIZZLE_NONE 173}; 174 175static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = { 176 PIPE_SWIZZLE_X, 177 PIPE_SWIZZLE_Y, 178 PIPE_SWIZZLE_Z, 179 PIPE_SWIZZLE_1, 180 PIPE_SWIZZLE_0, 181 PIPE_SWIZZLE_1, 182 PIPE_SWIZZLE_NONE 183}; 184 185static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = { 186 PIPE_SWIZZLE_0, 187 PIPE_SWIZZLE_0, 188 PIPE_SWIZZLE_0, 189 PIPE_SWIZZLE_X, 190 PIPE_SWIZZLE_0, 191 PIPE_SWIZZLE_1, 192 PIPE_SWIZZLE_NONE 193}; 194 195static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = { 196 PIPE_SWIZZLE_X, 197 PIPE_SWIZZLE_X, 198 PIPE_SWIZZLE_X, 199 PIPE_SWIZZLE_X, 200 PIPE_SWIZZLE_0, 201 PIPE_SWIZZLE_1, 202 PIPE_SWIZZLE_NONE 203}; 204 205static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = { 206 PIPE_SWIZZLE_X, 207 PIPE_SWIZZLE_X, 208 PIPE_SWIZZLE_X, 209 PIPE_SWIZZLE_1, 210 PIPE_SWIZZLE_0, 211 PIPE_SWIZZLE_1, 212 PIPE_SWIZZLE_NONE 213}; 214 215static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = { 216 PIPE_SWIZZLE_X, 217 PIPE_SWIZZLE_X, 218 PIPE_SWIZZLE_X, 219 PIPE_SWIZZLE_Y, 220 PIPE_SWIZZLE_0, 221 PIPE_SWIZZLE_1, 222 PIPE_SWIZZLE_NONE 223}; 224 225 226/** 227 * Initialize the shader-neutral fields of svga_compile_key from context 228 * state. This is basically the texture-related state. 229 */ 230void 231svga_init_shader_key_common(const struct svga_context *svga, 232 enum pipe_shader_type shader, 233 struct svga_compile_key *key) 234{ 235 unsigned i, idx = 0; 236 237 assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views)); 238 239 /* In case the number of samplers and sampler_views doesn't match, 240 * loop over the lower of the two counts. 241 */ 242 key->num_textures = MAX2(svga->curr.num_sampler_views[shader], 243 svga->curr.num_samplers[shader]); 244 245 for (i = 0; i < key->num_textures; i++) { 246 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; 247 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; 248 if (view) { 249 assert(view->texture); 250 assert(view->texture->target < (1 << 4)); /* texture_target:4 */ 251 252 /* 1D/2D array textures with one slice and cube map array textures 253 * with one cube are treated as non-arrays by the SVGA3D device. 254 * Set the is_array flag only if we know that we have more than 1 255 * element. This will be used to select shader instruction/resource 256 * types during shader translation. 257 */ 258 switch (view->texture->target) { 259 case PIPE_TEXTURE_1D_ARRAY: 260 case PIPE_TEXTURE_2D_ARRAY: 261 key->tex[i].is_array = view->texture->array_size > 1; 262 break; 263 case PIPE_TEXTURE_CUBE_ARRAY: 264 key->tex[i].is_array = view->texture->array_size > 6; 265 break; 266 default: 267 ; /* nothing / silence compiler warning */ 268 } 269 270 assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */ 271 key->tex[i].num_samples = view->texture->nr_samples; 272 273 const enum pipe_swizzle *swizzle_tab; 274 if (view->texture->target == PIPE_BUFFER) { 275 SVGA3dSurfaceFormat svga_format; 276 unsigned tf_flags; 277 278 /* Apply any special swizzle mask for the view format if needed */ 279 280 svga_translate_texture_buffer_view_format(view->format, 281 &svga_format, &tf_flags); 282 if (tf_flags & TF_000X) 283 swizzle_tab = set_000X; 284 else if (tf_flags & TF_XXXX) 285 swizzle_tab = set_XXXX; 286 else if (tf_flags & TF_XXX1) 287 swizzle_tab = set_XXX1; 288 else if (tf_flags & TF_XXXY) 289 swizzle_tab = set_XXXY; 290 else 291 swizzle_tab = copy_alpha; 292 } 293 else { 294 /* If we have a non-alpha view into an svga3d surface with an 295 * alpha channel, then explicitly set the alpha channel to 1 296 * when sampling. Note that we need to check the 297 * actual device format to cover also imported surface cases. 298 */ 299 swizzle_tab = 300 (!util_format_has_alpha(view->format) && 301 svga_texture_device_format_has_alpha(view->texture)) ? 302 set_alpha : copy_alpha; 303 304 if (view->texture->format == PIPE_FORMAT_DXT1_RGB || 305 view->texture->format == PIPE_FORMAT_DXT1_SRGB) 306 swizzle_tab = set_alpha; 307 } 308 309 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r]; 310 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g]; 311 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b]; 312 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a]; 313 } 314 315 if (sampler) { 316 if (!sampler->normalized_coords) { 317 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ 318 key->tex[i].width_height_idx = idx++; 319 key->tex[i].unnormalized = TRUE; 320 ++key->num_unnormalized_coords; 321 322 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST || 323 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) { 324 key->tex[i].texel_bias = TRUE; 325 } 326 } 327 } 328 } 329} 330 331 332/** Search for a compiled shader variant with the same compile key */ 333struct svga_shader_variant * 334svga_search_shader_key(const struct svga_shader *shader, 335 const struct svga_compile_key *key) 336{ 337 struct svga_shader_variant *variant = shader->variants; 338 339 assert(key); 340 341 for ( ; variant; variant = variant->next) { 342 if (svga_compile_keys_equal(key, &variant->key)) 343 return variant; 344 } 345 return NULL; 346} 347 348/** Search for a shader with the same token key */ 349struct svga_shader * 350svga_search_shader_token_key(struct svga_shader *pshader, 351 const struct svga_token_key *key) 352{ 353 struct svga_shader *shader = pshader; 354 355 assert(key); 356 357 for ( ; shader; shader = shader->next) { 358 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0) 359 return shader; 360 } 361 return NULL; 362} 363 364/** 365 * Helper function to define a gb shader for non-vgpu10 device 366 */ 367static enum pipe_error 368define_gb_shader_vgpu9(struct svga_context *svga, 369 SVGA3dShaderType type, 370 struct svga_shader_variant *variant, 371 unsigned codeLen) 372{ 373 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 374 enum pipe_error ret; 375 376 /** 377 * Create gb memory for the shader and upload the shader code. 378 * Kernel module will allocate an id for the shader and issue 379 * the DefineGBShader command. 380 */ 381 variant->gb_shader = sws->shader_create(sws, type, 382 variant->tokens, codeLen); 383 384 if (!variant->gb_shader) 385 return PIPE_ERROR_OUT_OF_MEMORY; 386 387 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); 388 389 return ret; 390} 391 392/** 393 * Helper function to define a gb shader for vgpu10 device 394 */ 395static enum pipe_error 396define_gb_shader_vgpu10(struct svga_context *svga, 397 SVGA3dShaderType type, 398 struct svga_shader_variant *variant, 399 unsigned codeLen) 400{ 401 struct svga_winsys_context *swc = svga->swc; 402 enum pipe_error ret; 403 404 /** 405 * Shaders in VGPU10 enabled device reside in the device COTable. 406 * SVGA driver will allocate an integer ID for the shader and 407 * issue DXDefineShader and DXBindShader commands. 408 */ 409 variant->id = util_bitmask_add(svga->shader_id_bm); 410 if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 411 return PIPE_ERROR_OUT_OF_MEMORY; 412 } 413 414 /* Create gb memory for the shader and upload the shader code */ 415 variant->gb_shader = swc->shader_create(swc, 416 variant->id, type, 417 variant->tokens, codeLen); 418 419 if (!variant->gb_shader) { 420 /* Free the shader ID */ 421 assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 422 goto fail_no_allocation; 423 } 424 425 /** 426 * Since we don't want to do any flush within state emission to avoid 427 * partial state in a command buffer, it's important to make sure that 428 * there is enough room to send both the DXDefineShader & DXBindShader 429 * commands in the same command buffer. So let's send both 430 * commands in one command reservation. If it fails, we'll undo 431 * the shader creation and return an error. 432 */ 433 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader, 434 variant->id, type, codeLen); 435 436 if (ret != PIPE_OK) 437 goto fail; 438 439 return PIPE_OK; 440 441fail: 442 swc->shader_destroy(swc, variant->gb_shader); 443 variant->gb_shader = NULL; 444 445fail_no_allocation: 446 util_bitmask_clear(svga->shader_id_bm, variant->id); 447 variant->id = UTIL_BITMASK_INVALID_INDEX; 448 449 return PIPE_ERROR_OUT_OF_MEMORY; 450} 451 452/** 453 * Issue the SVGA3D commands to define a new shader. 454 * \param variant contains the shader tokens, etc. The result->id field will 455 * be set here. 456 */ 457enum pipe_error 458svga_define_shader(struct svga_context *svga, 459 SVGA3dShaderType type, 460 struct svga_shader_variant *variant) 461{ 462 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]); 463 enum pipe_error ret; 464 465 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER); 466 467 variant->id = UTIL_BITMASK_INVALID_INDEX; 468 469 if (svga_have_gb_objects(svga)) { 470 if (svga_have_vgpu10(svga)) 471 ret = define_gb_shader_vgpu10(svga, type, variant, codeLen); 472 else 473 ret = define_gb_shader_vgpu9(svga, type, variant, codeLen); 474 } 475 else { 476 /* Allocate an integer ID for the shader */ 477 variant->id = util_bitmask_add(svga->shader_id_bm); 478 if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 479 ret = PIPE_ERROR_OUT_OF_MEMORY; 480 goto done; 481 } 482 483 /* Issue SVGA3D device command to define the shader */ 484 ret = SVGA3D_DefineShader(svga->swc, 485 variant->id, 486 type, 487 variant->tokens, 488 codeLen); 489 if (ret != PIPE_OK) { 490 /* free the ID */ 491 assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 492 util_bitmask_clear(svga->shader_id_bm, variant->id); 493 variant->id = UTIL_BITMASK_INVALID_INDEX; 494 } 495 } 496 497done: 498 SVGA_STATS_TIME_POP(svga_sws(svga)); 499 return ret; 500} 501 502 503/** 504 * Issue the SVGA3D commands to set/bind a shader. 505 * \param result the shader to bind. 506 */ 507enum pipe_error 508svga_set_shader(struct svga_context *svga, 509 SVGA3dShaderType type, 510 struct svga_shader_variant *variant) 511{ 512 enum pipe_error ret; 513 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID; 514 515 assert(type == SVGA3D_SHADERTYPE_VS || 516 type == SVGA3D_SHADERTYPE_GS || 517 type == SVGA3D_SHADERTYPE_PS); 518 519 if (svga_have_gb_objects(svga)) { 520 struct svga_winsys_gb_shader *gbshader = 521 variant ? variant->gb_shader : NULL; 522 523 if (svga_have_vgpu10(svga)) 524 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id); 525 else 526 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader); 527 } 528 else { 529 ret = SVGA3D_SetShader(svga->swc, type, id); 530 } 531 532 return ret; 533} 534 535 536struct svga_shader_variant * 537svga_new_shader_variant(struct svga_context *svga) 538{ 539 svga->hud.num_shaders++; 540 return CALLOC_STRUCT(svga_shader_variant); 541} 542 543 544void 545svga_destroy_shader_variant(struct svga_context *svga, 546 SVGA3dShaderType type, 547 struct svga_shader_variant *variant) 548{ 549 enum pipe_error ret = PIPE_OK; 550 551 if (svga_have_gb_objects(svga) && variant->gb_shader) { 552 if (svga_have_vgpu10(svga)) { 553 struct svga_winsys_context *swc = svga->swc; 554 swc->shader_destroy(swc, variant->gb_shader); 555 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); 556 if (ret != PIPE_OK) { 557 /* flush and try again */ 558 svga_context_flush(svga, NULL); 559 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); 560 assert(ret == PIPE_OK); 561 } 562 util_bitmask_clear(svga->shader_id_bm, variant->id); 563 } 564 else { 565 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 566 sws->shader_destroy(sws, variant->gb_shader); 567 } 568 variant->gb_shader = NULL; 569 } 570 else { 571 if (variant->id != UTIL_BITMASK_INVALID_INDEX) { 572 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); 573 if (ret != PIPE_OK) { 574 /* flush and try again */ 575 svga_context_flush(svga, NULL); 576 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); 577 assert(ret == PIPE_OK); 578 } 579 util_bitmask_clear(svga->shader_id_bm, variant->id); 580 } 581 } 582 583 FREE((unsigned *)variant->tokens); 584 FREE(variant); 585 586 svga->hud.num_shaders--; 587} 588 589/* 590 * Rebind shaders. 591 * Called at the beginning of every new command buffer to ensure that 592 * shaders are properly paged-in. Instead of sending the SetShader 593 * command, this function sends a private allocation command to 594 * page in a shader. This avoids emitting redundant state to the device 595 * just to page in a resource. 596 */ 597enum pipe_error 598svga_rebind_shaders(struct svga_context *svga) 599{ 600 struct svga_winsys_context *swc = svga->swc; 601 struct svga_hw_draw_state *hw = &svga->state.hw_draw; 602 enum pipe_error ret; 603 604 assert(svga_have_vgpu10(svga)); 605 606 /** 607 * If the underlying winsys layer does not need resource rebinding, 608 * just clear the rebind flags and return. 609 */ 610 if (swc->resource_rebind == NULL) { 611 svga->rebind.flags.vs = 0; 612 svga->rebind.flags.gs = 0; 613 svga->rebind.flags.fs = 0; 614 615 return PIPE_OK; 616 } 617 618 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) { 619 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ); 620 if (ret != PIPE_OK) 621 return ret; 622 } 623 svga->rebind.flags.vs = 0; 624 625 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) { 626 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ); 627 if (ret != PIPE_OK) 628 return ret; 629 } 630 svga->rebind.flags.gs = 0; 631 632 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) { 633 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ); 634 if (ret != PIPE_OK) 635 return ret; 636 } 637 svga->rebind.flags.fs = 0; 638 639 return PIPE_OK; 640} 641