1/********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26#include "util/u_inlines.h" 27#include "pipe/p_defines.h" 28#include "util/u_format.h" 29#include "util/u_math.h" 30#include "util/u_memory.h" 31#include "util/u_bitmask.h" 32#include "tgsi/tgsi_ureg.h" 33 34#include "svga_context.h" 35#include "svga_state.h" 36#include "svga_cmd.h" 37#include "svga_shader.h" 38#include "svga_resource_texture.h" 39#include "svga_tgsi.h" 40#include "svga_format.h" 41 42#include "svga_hw_reg.h" 43 44 45 46/** 47 * If we fail to compile a fragment shader (because it uses too many 48 * registers, for example) we'll use a dummy/fallback shader that 49 * simply emits a constant color (red for debug, black for release). 50 * We hit this with the Unigine/Heaven demo when Shaders = High. 51 * With black, the demo still looks good. 52 */ 53static const struct tgsi_token * 54get_dummy_fragment_shader(void) 55{ 56#ifdef DEBUG 57 static const float color[4] = { 1.0, 0.0, 0.0, 0.0 }; /* red */ 58#else 59 static const float color[4] = { 0.0, 0.0, 0.0, 0.0 }; /* black */ 60#endif 61 struct ureg_program *ureg; 62 const struct tgsi_token *tokens; 63 struct ureg_src src; 64 struct ureg_dst dst; 65 66 ureg = ureg_create(PIPE_SHADER_FRAGMENT); 67 if (!ureg) 68 return NULL; 69 70 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 71 src = ureg_DECL_immediate(ureg, color, 4); 72 ureg_MOV(ureg, dst, src); 73 ureg_END(ureg); 74 75 tokens = ureg_get_tokens(ureg, NULL); 76 77 ureg_destroy(ureg); 78 79 return tokens; 80} 81 82 83static struct svga_shader_variant * 84translate_fragment_program(struct svga_context *svga, 85 const struct svga_fragment_shader *fs, 86 const struct svga_compile_key *key) 87{ 88 if (svga_have_vgpu10(svga)) { 89 return svga_tgsi_vgpu10_translate(svga, &fs->base, key, 90 PIPE_SHADER_FRAGMENT); 91 } 92 else { 93 return svga_tgsi_vgpu9_translate(svga, &fs->base, key, 94 PIPE_SHADER_FRAGMENT); 95 } 96} 97 98 99/** 100 * Replace the given shader's instruction with a simple constant-color 101 * shader. We use this when normal shader translation fails. 102 */ 103static struct svga_shader_variant * 104get_compiled_dummy_shader(struct svga_context *svga, 105 struct svga_fragment_shader *fs, 106 const struct svga_compile_key *key) 107{ 108 const struct tgsi_token *dummy = get_dummy_fragment_shader(); 109 struct svga_shader_variant *variant; 110 111 if (!dummy) { 112 return NULL; 113 } 114 115 FREE((void *) fs->base.tokens); 116 fs->base.tokens = dummy; 117 118 tgsi_scan_shader(fs->base.tokens, &fs->base.info); 119 fs->generic_inputs = svga_get_generic_inputs_mask(&fs->base.info); 120 svga_remap_generics(fs->generic_inputs, fs->generic_remap_table); 121 122 variant = translate_fragment_program(svga, fs, key); 123 return variant; 124} 125 126 127/** 128 * Translate TGSI shader into an svga shader variant. 129 */ 130static enum pipe_error 131compile_fs(struct svga_context *svga, 132 struct svga_fragment_shader *fs, 133 const struct svga_compile_key *key, 134 struct svga_shader_variant **out_variant) 135{ 136 struct svga_shader_variant *variant; 137 enum pipe_error ret = PIPE_ERROR; 138 139 variant = translate_fragment_program(svga, fs, key); 140 if (variant == NULL) { 141 debug_printf("Failed to compile fragment shader," 142 " using dummy shader instead.\n"); 143 variant = get_compiled_dummy_shader(svga, fs, key); 144 } 145 else if (svga_shader_too_large(svga, variant)) { 146 /* too big, use dummy shader */ 147 debug_printf("Shader too large (%u bytes)," 148 " using dummy shader instead.\n", 149 (unsigned) (variant->nr_tokens 150 * sizeof(variant->tokens[0]))); 151 /* Free the too-large variant */ 152 svga_destroy_shader_variant(svga, variant); 153 /* Use simple pass-through shader instead */ 154 variant = get_compiled_dummy_shader(svga, fs, key); 155 } 156 157 if (!variant) { 158 return PIPE_ERROR; 159 } 160 161 ret = svga_define_shader(svga, variant); 162 if (ret != PIPE_OK) { 163 svga_destroy_shader_variant(svga, variant); 164 return ret; 165 } 166 167 *out_variant = variant; 168 169 /* insert variant at head of linked list */ 170 variant->next = fs->base.variants; 171 fs->base.variants = variant; 172 173 return PIPE_OK; 174} 175 176 177/* SVGA_NEW_TEXTURE_BINDING 178 * SVGA_NEW_RAST 179 * SVGA_NEW_NEED_SWTNL 180 * SVGA_NEW_SAMPLER 181 */ 182static enum pipe_error 183make_fs_key(const struct svga_context *svga, 184 struct svga_fragment_shader *fs, 185 struct svga_compile_key *key) 186{ 187 const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT; 188 unsigned i; 189 190 memset(key, 0, sizeof *key); 191 192 memcpy(key->generic_remap_table, fs->generic_remap_table, 193 sizeof(fs->generic_remap_table)); 194 195 /* SVGA_NEW_GS, SVGA_NEW_VS 196 */ 197 if (svga->curr.gs) { 198 key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; 199 } else { 200 key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; 201 } 202 203 /* Only need fragment shader fixup for twoside lighting if doing 204 * hwtnl. Otherwise the draw module does the whole job for us. 205 * 206 * SVGA_NEW_SWTNL 207 */ 208 if (!svga->state.sw.need_swtnl) { 209 /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE 210 */ 211 key->fs.light_twoside = svga->curr.rast->templ.light_twoside; 212 key->fs.front_ccw = svga->curr.rast->templ.front_ccw; 213 key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && 214 svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); 215 key->fs.aa_point = (svga->curr.rast->templ.point_smooth && 216 svga->curr.reduced_prim == PIPE_PRIM_POINTS && 217 (svga->curr.rast->pointsize > 1.0 || 218 svga->curr.vs->base.info.writes_psize)); 219 if (key->fs.aa_point) { 220 assert(svga->curr.gs != NULL); 221 assert(svga->curr.gs->aa_point_coord_index != -1); 222 key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; 223 } 224 } 225 226 /* The blend workaround for simulating logicop xor behaviour 227 * requires that the incoming fragment color be white. This change 228 * achieves that by creating a variant of the current fragment 229 * shader that overrides all output colors with 1,1,1,1 230 * 231 * This will work for most shaders, including those containing 232 * TEXKIL and/or depth-write. However, it will break on the 233 * combination of xor-logicop plus alphatest. 234 * 235 * Ultimately, we could implement alphatest in the shader using 236 * texkil prior to overriding the outgoing fragment color. 237 * 238 * SVGA_NEW_BLEND 239 */ 240 key->fs.white_fragments = svga->curr.blend->need_white_fragments; 241 242 key->fs.alpha_to_one = svga->curr.blend->alpha_to_one; 243 244#ifdef DEBUG 245 /* 246 * We expect a consistent set of samplers and sampler views. 247 * Do some debug checks/warnings here. 248 */ 249 { 250 static boolean warned = FALSE; 251 unsigned i, n = MAX2(svga->curr.num_sampler_views[shader], 252 svga->curr.num_samplers[shader]); 253 /* Only warn once to prevent too much debug output */ 254 if (!warned) { 255 if (svga->curr.num_sampler_views[shader] != 256 svga->curr.num_samplers[shader]) { 257 debug_printf("svga: mismatched number of sampler views (%u) " 258 "vs. samplers (%u)\n", 259 svga->curr.num_sampler_views[shader], 260 svga->curr.num_samplers[shader]); 261 } 262 for (i = 0; i < n; i++) { 263 if ((svga->curr.sampler_views[shader][i] == NULL) != 264 (svga->curr.sampler[shader][i] == NULL)) 265 debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", 266 i, svga->curr.sampler_views[shader][i], 267 i, svga->curr.sampler[shader][i]); 268 } 269 warned = TRUE; 270 } 271 } 272#endif 273 274 /* XXX: want to limit this to the textures that the shader actually 275 * refers to. 276 * 277 * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER 278 */ 279 svga_init_shader_key_common(svga, shader, key); 280 281 for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { 282 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; 283 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; 284 if (view) { 285 struct pipe_resource *tex = view->texture; 286 if (tex->target != PIPE_BUFFER) { 287 struct svga_texture *stex = svga_texture(tex); 288 SVGA3dSurfaceFormat format = stex->key.format; 289 290 if (!svga_have_vgpu10(svga) && 291 (format == SVGA3D_Z_D16 || 292 format == SVGA3D_Z_D24X8 || 293 format == SVGA3D_Z_D24S8)) { 294 /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, 295 * or SVGA3D_Z_D24S8 surface, we'll automatically get 296 * shadow comparison. But we only get LEQUAL mode. 297 * Set TEX_COMPARE_NONE here so we don't emit the extra FS 298 * code for shadow comparison. 299 */ 300 key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; 301 key->tex[i].compare_func = PIPE_FUNC_NEVER; 302 /* These depth formats _only_ support comparison mode and 303 * not ordinary sampling so warn if the later is expected. 304 */ 305 if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { 306 debug_warn_once("Unsupported shadow compare mode"); 307 } 308 /* The shader translation code can emit code to 309 * handle ALWAYS and NEVER compare functions 310 */ 311 else if (sampler->compare_func == PIPE_FUNC_ALWAYS || 312 sampler->compare_func == PIPE_FUNC_NEVER) { 313 key->tex[i].compare_mode = sampler->compare_mode; 314 key->tex[i].compare_func = sampler->compare_func; 315 } 316 else if (sampler->compare_func != PIPE_FUNC_LEQUAL) { 317 debug_warn_once("Unsupported shadow compare function"); 318 } 319 } 320 else { 321 /* For other texture formats, just use the compare func/mode 322 * as-is. Should be no-ops for color textures. For depth 323 * textures, we do not get automatic depth compare. We have 324 * to do it ourselves in the shader. And we don't get PCF. 325 */ 326 key->tex[i].compare_mode = sampler->compare_mode; 327 key->tex[i].compare_func = sampler->compare_func; 328 } 329 } 330 } 331 } 332 333 /* sprite coord gen state */ 334 key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable; 335 336 key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode 337 == PIPE_SPRITE_COORD_LOWER_LEFT); 338 339 key->fs.flatshade = svga->curr.rast->templ.flatshade; 340 341 /* SVGA_NEW_DEPTH_STENCIL_ALPHA */ 342 if (svga_have_vgpu10(svga)) { 343 /* Alpha testing is not supported in integer-valued render targets. */ 344 if (svga_has_any_integer_cbufs(svga)) { 345 key->fs.alpha_func = SVGA3D_CMP_ALWAYS; 346 key->fs.alpha_ref = 0; 347 } 348 else { 349 key->fs.alpha_func = svga->curr.depth->alphafunc; 350 key->fs.alpha_ref = svga->curr.depth->alpharef; 351 } 352 } 353 354 /* SVGA_NEW_FRAME_BUFFER | SVGA_NEW_BLEND */ 355 if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] || 356 svga->curr.blend->need_white_fragments) { 357 /* Replicate color0 output (or white) to N colorbuffers */ 358 key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; 359 } 360 361 return PIPE_OK; 362} 363 364 365/** 366 * svga_reemit_fs_bindings - Reemit the fragment shader bindings 367 */ 368enum pipe_error 369svga_reemit_fs_bindings(struct svga_context *svga) 370{ 371 enum pipe_error ret; 372 373 assert(svga->rebind.flags.fs); 374 assert(svga_have_gb_objects(svga)); 375 376 if (!svga->state.hw_draw.fs) 377 return PIPE_OK; 378 379 if (!svga_need_to_rebind_resources(svga)) { 380 ret = svga->swc->resource_rebind(svga->swc, NULL, 381 svga->state.hw_draw.fs->gb_shader, 382 SVGA_RELOC_READ); 383 } 384 else { 385 if (svga_have_vgpu10(svga)) 386 ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, 387 svga->state.hw_draw.fs->gb_shader, 388 svga->state.hw_draw.fs->id); 389 else 390 ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, 391 svga->state.hw_draw.fs->gb_shader); 392 } 393 394 if (ret != PIPE_OK) 395 return ret; 396 397 svga->rebind.flags.fs = FALSE; 398 return PIPE_OK; 399} 400 401 402 403static enum pipe_error 404emit_hw_fs(struct svga_context *svga, unsigned dirty) 405{ 406 struct svga_shader_variant *variant = NULL; 407 enum pipe_error ret = PIPE_OK; 408 struct svga_fragment_shader *fs = svga->curr.fs; 409 struct svga_compile_key key; 410 411 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITFS); 412 413 /* Disable rasterization if rasterizer_discard flag is set or 414 * vs/gs does not output position. 415 */ 416 svga->disable_rasterizer = 417 svga->curr.rast->templ.rasterizer_discard || 418 (svga->curr.gs && !svga->curr.gs->base.info.writes_position) || 419 (!svga->curr.gs && !svga->curr.vs->base.info.writes_position); 420 421 /* Set FS to NULL when rasterization is to be disabled */ 422 if (svga->disable_rasterizer) { 423 /* Set FS to NULL if it has not been done */ 424 if (svga->state.hw_draw.fs) { 425 ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); 426 if (ret != PIPE_OK) 427 goto done; 428 } 429 svga->rebind.flags.fs = FALSE; 430 svga->state.hw_draw.fs = NULL; 431 goto done; 432 } 433 434 /* SVGA_NEW_BLEND 435 * SVGA_NEW_TEXTURE_BINDING 436 * SVGA_NEW_RAST 437 * SVGA_NEW_NEED_SWTNL 438 * SVGA_NEW_SAMPLER 439 * SVGA_NEW_FRAME_BUFFER 440 * SVGA_NEW_DEPTH_STENCIL_ALPHA 441 * SVGA_NEW_VS 442 */ 443 ret = make_fs_key(svga, fs, &key); 444 if (ret != PIPE_OK) 445 goto done; 446 447 variant = svga_search_shader_key(&fs->base, &key); 448 if (!variant) { 449 ret = compile_fs(svga, fs, &key, &variant); 450 if (ret != PIPE_OK) 451 goto done; 452 } 453 454 assert(variant); 455 456 if (variant != svga->state.hw_draw.fs) { 457 ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant); 458 if (ret != PIPE_OK) 459 goto done; 460 461 svga->rebind.flags.fs = FALSE; 462 463 svga->dirty |= SVGA_NEW_FS_VARIANT; 464 svga->state.hw_draw.fs = variant; 465 } 466 467done: 468 SVGA_STATS_TIME_POP(svga_sws(svga)); 469 return ret; 470} 471 472struct svga_tracked_state svga_hw_fs = 473{ 474 "fragment shader (hwtnl)", 475 (SVGA_NEW_FS | 476 SVGA_NEW_GS | 477 SVGA_NEW_VS | 478 SVGA_NEW_TEXTURE_BINDING | 479 SVGA_NEW_NEED_SWTNL | 480 SVGA_NEW_RAST | 481 SVGA_NEW_STIPPLE | 482 SVGA_NEW_REDUCED_PRIMITIVE | 483 SVGA_NEW_SAMPLER | 484 SVGA_NEW_FRAME_BUFFER | 485 SVGA_NEW_DEPTH_STENCIL_ALPHA | 486 SVGA_NEW_BLEND), 487 emit_hw_fs 488}; 489 490 491 492