1/************************************************************************** 2 * 3 * Copyright 2009-2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_screen.h" 30#include "pipe/p_context.h" 31#include "pipe/p_state.h" 32#include "tgsi/tgsi_ureg.h" 33#include "tgsi/tgsi_build.h" 34#include "tgsi/tgsi_from_mesa.h" 35#include "tgsi/tgsi_info.h" 36#include "tgsi/tgsi_dump.h" 37#include "tgsi/tgsi_sanity.h" 38#include "util/u_debug.h" 39#include "util/u_inlines.h" 40#include "util/u_memory.h" 41#include "util/u_math.h" 42#include "util/u_bitmask.h" 43#include "GL/gl.h" 44#include "compiler/shader_info.h" 45 46union tgsi_any_token { 47 struct tgsi_header header; 48 struct tgsi_processor processor; 49 struct tgsi_token token; 50 struct tgsi_property prop; 51 struct tgsi_property_data prop_data; 52 struct tgsi_declaration decl; 53 struct tgsi_declaration_range decl_range; 54 struct tgsi_declaration_dimension decl_dim; 55 struct tgsi_declaration_interp decl_interp; 56 struct tgsi_declaration_image decl_image; 57 struct tgsi_declaration_semantic decl_semantic; 58 struct tgsi_declaration_sampler_view decl_sampler_view; 59 struct tgsi_declaration_array array; 60 struct tgsi_immediate imm; 61 union tgsi_immediate_data imm_data; 62 struct tgsi_instruction insn; 63 struct tgsi_instruction_label insn_label; 64 struct tgsi_instruction_texture insn_texture; 65 struct tgsi_instruction_memory insn_memory; 66 struct tgsi_texture_offset insn_texture_offset; 67 struct tgsi_src_register src; 68 struct tgsi_ind_register ind; 69 struct tgsi_dimension dim; 70 struct tgsi_dst_register dst; 71 unsigned value; 72}; 73 74 75struct ureg_tokens { 76 union tgsi_any_token *tokens; 77 unsigned size; 78 unsigned order; 79 unsigned count; 80}; 81 82#define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS) 83#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS 84#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) 85#define UREG_MAX_CONSTANT_RANGE 32 86#define UREG_MAX_HW_ATOMIC_RANGE 32 87#define UREG_MAX_IMMEDIATE 4096 88#define UREG_MAX_ADDR 3 89#define UREG_MAX_ARRAY_TEMPS 256 90 91struct const_decl { 92 struct { 93 unsigned first; 94 unsigned last; 95 } constant_range[UREG_MAX_CONSTANT_RANGE]; 96 unsigned nr_constant_ranges; 97}; 98 99struct hw_atomic_decl { 100 struct { 101 unsigned first; 102 unsigned last; 103 unsigned array_id; 104 } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE]; 105 unsigned nr_hw_atomic_ranges; 106}; 107 108#define DOMAIN_DECL 0 109#define DOMAIN_INSN 1 110 111struct ureg_program 112{ 113 enum pipe_shader_type processor; 114 bool supports_any_inout_decl_range; 115 int next_shader_processor; 116 117 struct ureg_input_decl { 118 enum tgsi_semantic semantic_name; 119 unsigned semantic_index; 120 enum tgsi_interpolate_mode interp; 121 unsigned char usage_mask; 122 enum tgsi_interpolate_loc interp_location; 123 unsigned first; 124 unsigned last; 125 unsigned array_id; 126 } input[UREG_MAX_INPUT]; 127 unsigned nr_inputs, nr_input_regs; 128 129 unsigned vs_inputs[PIPE_MAX_ATTRIBS/32]; 130 131 struct { 132 enum tgsi_semantic semantic_name; 133 unsigned semantic_index; 134 } system_value[UREG_MAX_SYSTEM_VALUE]; 135 unsigned nr_system_values; 136 137 struct ureg_output_decl { 138 enum tgsi_semantic semantic_name; 139 unsigned semantic_index; 140 unsigned streams; 141 unsigned usage_mask; /* = TGSI_WRITEMASK_* */ 142 unsigned first; 143 unsigned last; 144 unsigned array_id; 145 boolean invariant; 146 } output[UREG_MAX_OUTPUT]; 147 unsigned nr_outputs, nr_output_regs; 148 149 struct { 150 union { 151 float f[4]; 152 unsigned u[4]; 153 int i[4]; 154 } value; 155 unsigned nr; 156 unsigned type; 157 } immediate[UREG_MAX_IMMEDIATE]; 158 unsigned nr_immediates; 159 160 struct ureg_src sampler[PIPE_MAX_SAMPLERS]; 161 unsigned nr_samplers; 162 163 struct { 164 unsigned index; 165 enum tgsi_texture_type target; 166 enum tgsi_return_type return_type_x; 167 enum tgsi_return_type return_type_y; 168 enum tgsi_return_type return_type_z; 169 enum tgsi_return_type return_type_w; 170 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 171 unsigned nr_sampler_views; 172 173 struct { 174 unsigned index; 175 enum tgsi_texture_type target; 176 enum pipe_format format; 177 boolean wr; 178 boolean raw; 179 } image[PIPE_MAX_SHADER_IMAGES]; 180 unsigned nr_images; 181 182 struct { 183 unsigned index; 184 bool atomic; 185 } buffer[PIPE_MAX_SHADER_BUFFERS]; 186 unsigned nr_buffers; 187 188 struct util_bitmask *free_temps; 189 struct util_bitmask *local_temps; 190 struct util_bitmask *decl_temps; 191 unsigned nr_temps; 192 193 unsigned array_temps[UREG_MAX_ARRAY_TEMPS]; 194 unsigned nr_array_temps; 195 196 struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; 197 198 struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS]; 199 200 unsigned properties[TGSI_PROPERTY_COUNT]; 201 202 unsigned nr_addrs; 203 unsigned nr_instructions; 204 205 struct ureg_tokens domain[2]; 206 207 bool use_memory[TGSI_MEMORY_TYPE_COUNT]; 208}; 209 210static union tgsi_any_token error_tokens[32]; 211 212static void tokens_error( struct ureg_tokens *tokens ) 213{ 214 if (tokens->tokens && tokens->tokens != error_tokens) 215 FREE(tokens->tokens); 216 217 tokens->tokens = error_tokens; 218 tokens->size = ARRAY_SIZE(error_tokens); 219 tokens->count = 0; 220} 221 222 223static void tokens_expand( struct ureg_tokens *tokens, 224 unsigned count ) 225{ 226 unsigned old_size = tokens->size * sizeof(unsigned); 227 228 if (tokens->tokens == error_tokens) { 229 return; 230 } 231 232 while (tokens->count + count > tokens->size) { 233 tokens->size = (1 << ++tokens->order); 234 } 235 236 tokens->tokens = REALLOC(tokens->tokens, 237 old_size, 238 tokens->size * sizeof(unsigned)); 239 if (tokens->tokens == NULL) { 240 tokens_error(tokens); 241 } 242} 243 244static void set_bad( struct ureg_program *ureg ) 245{ 246 tokens_error(&ureg->domain[0]); 247} 248 249 250 251static union tgsi_any_token *get_tokens( struct ureg_program *ureg, 252 unsigned domain, 253 unsigned count ) 254{ 255 struct ureg_tokens *tokens = &ureg->domain[domain]; 256 union tgsi_any_token *result; 257 258 if (tokens->count + count > tokens->size) 259 tokens_expand(tokens, count); 260 261 result = &tokens->tokens[tokens->count]; 262 tokens->count += count; 263 return result; 264} 265 266 267static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, 268 unsigned domain, 269 unsigned nr ) 270{ 271 if (ureg->domain[domain].tokens == error_tokens) 272 return &error_tokens[0]; 273 274 return &ureg->domain[domain].tokens[nr]; 275} 276 277 278void 279ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) 280{ 281 assert(name < ARRAY_SIZE(ureg->properties)); 282 ureg->properties[name] = value; 283} 284 285struct ureg_src 286ureg_DECL_fs_input_centroid_layout(struct ureg_program *ureg, 287 enum tgsi_semantic semantic_name, 288 unsigned semantic_index, 289 enum tgsi_interpolate_mode interp_mode, 290 enum tgsi_interpolate_loc interp_location, 291 unsigned index, 292 unsigned usage_mask, 293 unsigned array_id, 294 unsigned array_size) 295{ 296 unsigned i; 297 298 assert(usage_mask != 0); 299 assert(usage_mask <= TGSI_WRITEMASK_XYZW); 300 301 for (i = 0; i < ureg->nr_inputs; i++) { 302 if (ureg->input[i].semantic_name == semantic_name && 303 ureg->input[i].semantic_index == semantic_index) { 304 assert(ureg->input[i].interp == interp_mode); 305 assert(ureg->input[i].interp_location == interp_location); 306 if (ureg->input[i].array_id == array_id) { 307 ureg->input[i].usage_mask |= usage_mask; 308 goto out; 309 } 310 assert((ureg->input[i].usage_mask & usage_mask) == 0); 311 } 312 } 313 314 if (ureg->nr_inputs < UREG_MAX_INPUT) { 315 assert(array_size >= 1); 316 ureg->input[i].semantic_name = semantic_name; 317 ureg->input[i].semantic_index = semantic_index; 318 ureg->input[i].interp = interp_mode; 319 ureg->input[i].interp_location = interp_location; 320 ureg->input[i].first = index; 321 ureg->input[i].last = index + array_size - 1; 322 ureg->input[i].array_id = array_id; 323 ureg->input[i].usage_mask = usage_mask; 324 ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size); 325 ureg->nr_inputs++; 326 } else { 327 set_bad(ureg); 328 } 329 330out: 331 return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first, 332 array_id); 333} 334 335struct ureg_src 336ureg_DECL_fs_input_centroid(struct ureg_program *ureg, 337 enum tgsi_semantic semantic_name, 338 unsigned semantic_index, 339 enum tgsi_interpolate_mode interp_mode, 340 enum tgsi_interpolate_loc interp_location, 341 unsigned array_id, 342 unsigned array_size) 343{ 344 return ureg_DECL_fs_input_centroid_layout(ureg, 345 semantic_name, semantic_index, interp_mode, 346 interp_location, 347 ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size); 348} 349 350 351struct ureg_src 352ureg_DECL_vs_input( struct ureg_program *ureg, 353 unsigned index ) 354{ 355 assert(ureg->processor == PIPE_SHADER_VERTEX); 356 assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs)); 357 358 ureg->vs_inputs[index/32] |= 1 << (index % 32); 359 return ureg_src_register( TGSI_FILE_INPUT, index ); 360} 361 362 363struct ureg_src 364ureg_DECL_input_layout(struct ureg_program *ureg, 365 enum tgsi_semantic semantic_name, 366 unsigned semantic_index, 367 unsigned index, 368 unsigned usage_mask, 369 unsigned array_id, 370 unsigned array_size) 371{ 372 return ureg_DECL_fs_input_centroid_layout(ureg, 373 semantic_name, semantic_index, 374 TGSI_INTERPOLATE_CONSTANT, TGSI_INTERPOLATE_LOC_CENTER, 375 index, usage_mask, array_id, array_size); 376} 377 378 379struct ureg_src 380ureg_DECL_input(struct ureg_program *ureg, 381 enum tgsi_semantic semantic_name, 382 unsigned semantic_index, 383 unsigned array_id, 384 unsigned array_size) 385{ 386 return ureg_DECL_fs_input_centroid(ureg, semantic_name, semantic_index, 387 TGSI_INTERPOLATE_CONSTANT, 388 TGSI_INTERPOLATE_LOC_CENTER, 389 array_id, array_size); 390} 391 392 393struct ureg_src 394ureg_DECL_system_value(struct ureg_program *ureg, 395 enum tgsi_semantic semantic_name, 396 unsigned semantic_index) 397{ 398 unsigned i; 399 400 for (i = 0; i < ureg->nr_system_values; i++) { 401 if (ureg->system_value[i].semantic_name == semantic_name && 402 ureg->system_value[i].semantic_index == semantic_index) { 403 goto out; 404 } 405 } 406 407 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { 408 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; 409 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; 410 i = ureg->nr_system_values; 411 ureg->nr_system_values++; 412 } else { 413 set_bad(ureg); 414 } 415 416out: 417 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i); 418} 419 420 421struct ureg_dst 422ureg_DECL_output_layout(struct ureg_program *ureg, 423 enum tgsi_semantic semantic_name, 424 unsigned semantic_index, 425 unsigned streams, 426 unsigned index, 427 unsigned usage_mask, 428 unsigned array_id, 429 unsigned array_size, 430 boolean invariant) 431{ 432 unsigned i; 433 434 assert(usage_mask != 0); 435 assert(!(streams & 0x03) || (usage_mask & 1)); 436 assert(!(streams & 0x0c) || (usage_mask & 2)); 437 assert(!(streams & 0x30) || (usage_mask & 4)); 438 assert(!(streams & 0xc0) || (usage_mask & 8)); 439 440 for (i = 0; i < ureg->nr_outputs; i++) { 441 if (ureg->output[i].semantic_name == semantic_name && 442 ureg->output[i].semantic_index == semantic_index) { 443 if (ureg->output[i].array_id == array_id) { 444 ureg->output[i].usage_mask |= usage_mask; 445 goto out; 446 } 447 assert((ureg->output[i].usage_mask & usage_mask) == 0); 448 } 449 } 450 451 if (ureg->nr_outputs < UREG_MAX_OUTPUT) { 452 ureg->output[i].semantic_name = semantic_name; 453 ureg->output[i].semantic_index = semantic_index; 454 ureg->output[i].usage_mask = usage_mask; 455 ureg->output[i].first = index; 456 ureg->output[i].last = index + array_size - 1; 457 ureg->output[i].array_id = array_id; 458 ureg->output[i].invariant = invariant; 459 ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size); 460 ureg->nr_outputs++; 461 } 462 else { 463 set_bad( ureg ); 464 i = 0; 465 } 466 467out: 468 ureg->output[i].streams |= streams; 469 470 return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first, 471 array_id); 472} 473 474 475struct ureg_dst 476ureg_DECL_output_masked(struct ureg_program *ureg, 477 unsigned name, 478 unsigned index, 479 unsigned usage_mask, 480 unsigned array_id, 481 unsigned array_size) 482{ 483 return ureg_DECL_output_layout(ureg, name, index, 0, 484 ureg->nr_output_regs, usage_mask, array_id, 485 array_size, FALSE); 486} 487 488 489struct ureg_dst 490ureg_DECL_output(struct ureg_program *ureg, 491 enum tgsi_semantic name, 492 unsigned index) 493{ 494 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW, 495 0, 1); 496} 497 498struct ureg_dst 499ureg_DECL_output_array(struct ureg_program *ureg, 500 enum tgsi_semantic semantic_name, 501 unsigned semantic_index, 502 unsigned array_id, 503 unsigned array_size) 504{ 505 return ureg_DECL_output_masked(ureg, semantic_name, semantic_index, 506 TGSI_WRITEMASK_XYZW, 507 array_id, array_size); 508} 509 510 511/* Returns a new constant register. Keep track of which have been 512 * referred to so that we can emit decls later. 513 * 514 * Constant operands declared with this function must be addressed 515 * with a two-dimensional index. 516 * 517 * There is nothing in this code to bind this constant to any tracked 518 * value or manage any constant_buffer contents -- that's the 519 * resposibility of the calling code. 520 */ 521void 522ureg_DECL_constant2D(struct ureg_program *ureg, 523 unsigned first, 524 unsigned last, 525 unsigned index2D) 526{ 527 struct const_decl *decl = &ureg->const_decls[index2D]; 528 529 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); 530 531 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 532 uint i = decl->nr_constant_ranges++; 533 534 decl->constant_range[i].first = first; 535 decl->constant_range[i].last = last; 536 } 537} 538 539 540/* A one-dimensional, deprecated version of ureg_DECL_constant2D(). 541 * 542 * Constant operands declared with this function must be addressed 543 * with a one-dimensional index. 544 */ 545struct ureg_src 546ureg_DECL_constant(struct ureg_program *ureg, 547 unsigned index) 548{ 549 struct const_decl *decl = &ureg->const_decls[0]; 550 unsigned minconst = index, maxconst = index; 551 unsigned i; 552 553 /* Inside existing range? 554 */ 555 for (i = 0; i < decl->nr_constant_ranges; i++) { 556 if (decl->constant_range[i].first <= index && 557 decl->constant_range[i].last >= index) { 558 goto out; 559 } 560 } 561 562 /* Extend existing range? 563 */ 564 for (i = 0; i < decl->nr_constant_ranges; i++) { 565 if (decl->constant_range[i].last == index - 1) { 566 decl->constant_range[i].last = index; 567 goto out; 568 } 569 570 if (decl->constant_range[i].first == index + 1) { 571 decl->constant_range[i].first = index; 572 goto out; 573 } 574 575 minconst = MIN2(minconst, decl->constant_range[i].first); 576 maxconst = MAX2(maxconst, decl->constant_range[i].last); 577 } 578 579 /* Create new range? 580 */ 581 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 582 i = decl->nr_constant_ranges++; 583 decl->constant_range[i].first = index; 584 decl->constant_range[i].last = index; 585 goto out; 586 } 587 588 /* Collapse all ranges down to one: 589 */ 590 i = 0; 591 decl->constant_range[0].first = minconst; 592 decl->constant_range[0].last = maxconst; 593 decl->nr_constant_ranges = 1; 594 595out: 596 assert(i < decl->nr_constant_ranges); 597 assert(decl->constant_range[i].first <= index); 598 assert(decl->constant_range[i].last >= index); 599 600 struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index); 601 return ureg_src_dimension(src, 0); 602} 603 604 605/* Returns a new hw atomic register. Keep track of which have been 606 * referred to so that we can emit decls later. 607 */ 608void 609ureg_DECL_hw_atomic(struct ureg_program *ureg, 610 unsigned first, 611 unsigned last, 612 unsigned buffer_id, 613 unsigned array_id) 614{ 615 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id]; 616 617 if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) { 618 uint i = decl->nr_hw_atomic_ranges++; 619 620 decl->hw_atomic_range[i].first = first; 621 decl->hw_atomic_range[i].last = last; 622 decl->hw_atomic_range[i].array_id = array_id; 623 } else { 624 set_bad(ureg); 625 } 626} 627 628static struct ureg_dst alloc_temporary( struct ureg_program *ureg, 629 boolean local ) 630{ 631 unsigned i; 632 633 /* Look for a released temporary. 634 */ 635 for (i = util_bitmask_get_first_index(ureg->free_temps); 636 i != UTIL_BITMASK_INVALID_INDEX; 637 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) { 638 if (util_bitmask_get(ureg->local_temps, i) == local) 639 break; 640 } 641 642 /* Or allocate a new one. 643 */ 644 if (i == UTIL_BITMASK_INVALID_INDEX) { 645 i = ureg->nr_temps++; 646 647 if (local) 648 util_bitmask_set(ureg->local_temps, i); 649 650 /* Start a new declaration when the local flag changes */ 651 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local) 652 util_bitmask_set(ureg->decl_temps, i); 653 } 654 655 util_bitmask_clear(ureg->free_temps, i); 656 657 return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 658} 659 660struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) 661{ 662 return alloc_temporary(ureg, FALSE); 663} 664 665struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg ) 666{ 667 return alloc_temporary(ureg, TRUE); 668} 669 670struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg, 671 unsigned size, 672 boolean local ) 673{ 674 unsigned i = ureg->nr_temps; 675 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 676 677 if (local) 678 util_bitmask_set(ureg->local_temps, i); 679 680 /* Always start a new declaration at the start */ 681 util_bitmask_set(ureg->decl_temps, i); 682 683 ureg->nr_temps += size; 684 685 /* and also at the end of the array */ 686 util_bitmask_set(ureg->decl_temps, ureg->nr_temps); 687 688 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) { 689 ureg->array_temps[ureg->nr_array_temps++] = i; 690 dst.ArrayID = ureg->nr_array_temps; 691 } 692 693 return dst; 694} 695 696void ureg_release_temporary( struct ureg_program *ureg, 697 struct ureg_dst tmp ) 698{ 699 if(tmp.File == TGSI_FILE_TEMPORARY) 700 util_bitmask_set(ureg->free_temps, tmp.Index); 701} 702 703 704/* Allocate a new address register. 705 */ 706struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) 707{ 708 if (ureg->nr_addrs < UREG_MAX_ADDR) 709 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); 710 711 assert( 0 ); 712 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); 713} 714 715/* Allocate a new sampler. 716 */ 717struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, 718 unsigned nr ) 719{ 720 unsigned i; 721 722 for (i = 0; i < ureg->nr_samplers; i++) 723 if (ureg->sampler[i].Index == (int)nr) 724 return ureg->sampler[i]; 725 726 if (i < PIPE_MAX_SAMPLERS) { 727 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); 728 ureg->nr_samplers++; 729 return ureg->sampler[i]; 730 } 731 732 assert( 0 ); 733 return ureg->sampler[0]; 734} 735 736/* 737 * Allocate a new shader sampler view. 738 */ 739struct ureg_src 740ureg_DECL_sampler_view(struct ureg_program *ureg, 741 unsigned index, 742 enum tgsi_texture_type target, 743 enum tgsi_return_type return_type_x, 744 enum tgsi_return_type return_type_y, 745 enum tgsi_return_type return_type_z, 746 enum tgsi_return_type return_type_w) 747{ 748 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index); 749 uint i; 750 751 for (i = 0; i < ureg->nr_sampler_views; i++) { 752 if (ureg->sampler_view[i].index == index) { 753 return reg; 754 } 755 } 756 757 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) { 758 ureg->sampler_view[i].index = index; 759 ureg->sampler_view[i].target = target; 760 ureg->sampler_view[i].return_type_x = return_type_x; 761 ureg->sampler_view[i].return_type_y = return_type_y; 762 ureg->sampler_view[i].return_type_z = return_type_z; 763 ureg->sampler_view[i].return_type_w = return_type_w; 764 ureg->nr_sampler_views++; 765 return reg; 766 } 767 768 assert(0); 769 return reg; 770} 771 772/* Allocate a new image. 773 */ 774struct ureg_src 775ureg_DECL_image(struct ureg_program *ureg, 776 unsigned index, 777 enum tgsi_texture_type target, 778 enum pipe_format format, 779 boolean wr, 780 boolean raw) 781{ 782 struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index); 783 unsigned i; 784 785 for (i = 0; i < ureg->nr_images; i++) 786 if (ureg->image[i].index == index) 787 return reg; 788 789 if (i < PIPE_MAX_SHADER_IMAGES) { 790 ureg->image[i].index = index; 791 ureg->image[i].target = target; 792 ureg->image[i].wr = wr; 793 ureg->image[i].raw = raw; 794 ureg->image[i].format = format; 795 ureg->nr_images++; 796 return reg; 797 } 798 799 assert(0); 800 return reg; 801} 802 803/* Allocate a new buffer. 804 */ 805struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, 806 bool atomic) 807{ 808 struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr); 809 unsigned i; 810 811 for (i = 0; i < ureg->nr_buffers; i++) 812 if (ureg->buffer[i].index == nr) 813 return reg; 814 815 if (i < PIPE_MAX_SHADER_BUFFERS) { 816 ureg->buffer[i].index = nr; 817 ureg->buffer[i].atomic = atomic; 818 ureg->nr_buffers++; 819 return reg; 820 } 821 822 assert(0); 823 return reg; 824} 825 826/* Allocate a memory area. 827 */ 828struct ureg_src ureg_DECL_memory(struct ureg_program *ureg, 829 unsigned memory_type) 830{ 831 struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type); 832 833 ureg->use_memory[memory_type] = true; 834 return reg; 835} 836 837static int 838match_or_expand_immediate64( const unsigned *v, 839 unsigned nr, 840 unsigned *v2, 841 unsigned *pnr2, 842 unsigned *swizzle ) 843{ 844 unsigned nr2 = *pnr2; 845 unsigned i, j; 846 *swizzle = 0; 847 848 for (i = 0; i < nr; i += 2) { 849 boolean found = FALSE; 850 851 for (j = 0; j < nr2 && !found; j += 2) { 852 if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) { 853 *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2)); 854 found = TRUE; 855 } 856 } 857 if (!found) { 858 if ((nr2) >= 4) { 859 return FALSE; 860 } 861 862 v2[nr2] = v[i]; 863 v2[nr2 + 1] = v[i + 1]; 864 865 *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2)); 866 nr2 += 2; 867 } 868 } 869 870 /* Actually expand immediate only when fully succeeded. 871 */ 872 *pnr2 = nr2; 873 return TRUE; 874} 875 876static int 877match_or_expand_immediate( const unsigned *v, 878 int type, 879 unsigned nr, 880 unsigned *v2, 881 unsigned *pnr2, 882 unsigned *swizzle ) 883{ 884 unsigned nr2 = *pnr2; 885 unsigned i, j; 886 887 if (type == TGSI_IMM_FLOAT64 || 888 type == TGSI_IMM_UINT64 || 889 type == TGSI_IMM_INT64) 890 return match_or_expand_immediate64(v, nr, v2, pnr2, swizzle); 891 892 *swizzle = 0; 893 894 for (i = 0; i < nr; i++) { 895 boolean found = FALSE; 896 897 for (j = 0; j < nr2 && !found; j++) { 898 if (v[i] == v2[j]) { 899 *swizzle |= j << (i * 2); 900 found = TRUE; 901 } 902 } 903 904 if (!found) { 905 if (nr2 >= 4) { 906 return FALSE; 907 } 908 909 v2[nr2] = v[i]; 910 *swizzle |= nr2 << (i * 2); 911 nr2++; 912 } 913 } 914 915 /* Actually expand immediate only when fully succeeded. 916 */ 917 *pnr2 = nr2; 918 return TRUE; 919} 920 921 922static struct ureg_src 923decl_immediate( struct ureg_program *ureg, 924 const unsigned *v, 925 unsigned nr, 926 unsigned type ) 927{ 928 unsigned i, j; 929 unsigned swizzle = 0; 930 931 /* Could do a first pass where we examine all existing immediates 932 * without expanding. 933 */ 934 935 for (i = 0; i < ureg->nr_immediates; i++) { 936 if (ureg->immediate[i].type != type) { 937 continue; 938 } 939 if (match_or_expand_immediate(v, 940 type, 941 nr, 942 ureg->immediate[i].value.u, 943 &ureg->immediate[i].nr, 944 &swizzle)) { 945 goto out; 946 } 947 } 948 949 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { 950 i = ureg->nr_immediates++; 951 ureg->immediate[i].type = type; 952 if (match_or_expand_immediate(v, 953 type, 954 nr, 955 ureg->immediate[i].value.u, 956 &ureg->immediate[i].nr, 957 &swizzle)) { 958 goto out; 959 } 960 } 961 962 set_bad(ureg); 963 964out: 965 /* Make sure that all referenced elements are from this immediate. 966 * Has the effect of making size-one immediates into scalars. 967 */ 968 if (type == TGSI_IMM_FLOAT64 || 969 type == TGSI_IMM_UINT64 || 970 type == TGSI_IMM_INT64) { 971 for (j = nr; j < 4; j+=2) { 972 swizzle |= (swizzle & 0xf) << (j * 2); 973 } 974 } else { 975 for (j = nr; j < 4; j++) { 976 swizzle |= (swizzle & 0x3) << (j * 2); 977 } 978 } 979 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), 980 (swizzle >> 0) & 0x3, 981 (swizzle >> 2) & 0x3, 982 (swizzle >> 4) & 0x3, 983 (swizzle >> 6) & 0x3); 984} 985 986 987struct ureg_src 988ureg_DECL_immediate( struct ureg_program *ureg, 989 const float *v, 990 unsigned nr ) 991{ 992 union { 993 float f[4]; 994 unsigned u[4]; 995 } fu; 996 unsigned int i; 997 998 for (i = 0; i < nr; i++) { 999 fu.f[i] = v[i]; 1000 } 1001 1002 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); 1003} 1004 1005struct ureg_src 1006ureg_DECL_immediate_f64( struct ureg_program *ureg, 1007 const double *v, 1008 unsigned nr ) 1009{ 1010 union { 1011 unsigned u[4]; 1012 double d[2]; 1013 } fu; 1014 unsigned int i; 1015 1016 assert((nr / 2) < 3); 1017 for (i = 0; i < nr / 2; i++) { 1018 fu.d[i] = v[i]; 1019 } 1020 1021 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64); 1022} 1023 1024struct ureg_src 1025ureg_DECL_immediate_uint( struct ureg_program *ureg, 1026 const unsigned *v, 1027 unsigned nr ) 1028{ 1029 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); 1030} 1031 1032 1033struct ureg_src 1034ureg_DECL_immediate_block_uint( struct ureg_program *ureg, 1035 const unsigned *v, 1036 unsigned nr ) 1037{ 1038 uint index; 1039 uint i; 1040 1041 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { 1042 set_bad(ureg); 1043 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); 1044 } 1045 1046 index = ureg->nr_immediates; 1047 ureg->nr_immediates += (nr + 3) / 4; 1048 1049 for (i = index; i < ureg->nr_immediates; i++) { 1050 ureg->immediate[i].type = TGSI_IMM_UINT32; 1051 ureg->immediate[i].nr = nr > 4 ? 4 : nr; 1052 memcpy(ureg->immediate[i].value.u, 1053 &v[(i - index) * 4], 1054 ureg->immediate[i].nr * sizeof(uint)); 1055 nr -= 4; 1056 } 1057 1058 return ureg_src_register(TGSI_FILE_IMMEDIATE, index); 1059} 1060 1061 1062struct ureg_src 1063ureg_DECL_immediate_int( struct ureg_program *ureg, 1064 const int *v, 1065 unsigned nr ) 1066{ 1067 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); 1068} 1069 1070struct ureg_src 1071ureg_DECL_immediate_uint64( struct ureg_program *ureg, 1072 const uint64_t *v, 1073 unsigned nr ) 1074{ 1075 union { 1076 unsigned u[4]; 1077 uint64_t u64[2]; 1078 } fu; 1079 unsigned int i; 1080 1081 assert((nr / 2) < 3); 1082 for (i = 0; i < nr / 2; i++) { 1083 fu.u64[i] = v[i]; 1084 } 1085 1086 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64); 1087} 1088 1089struct ureg_src 1090ureg_DECL_immediate_int64( struct ureg_program *ureg, 1091 const int64_t *v, 1092 unsigned nr ) 1093{ 1094 union { 1095 unsigned u[4]; 1096 int64_t i64[2]; 1097 } fu; 1098 unsigned int i; 1099 1100 assert((nr / 2) < 3); 1101 for (i = 0; i < nr / 2; i++) { 1102 fu.i64[i] = v[i]; 1103 } 1104 1105 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64); 1106} 1107 1108void 1109ureg_emit_src( struct ureg_program *ureg, 1110 struct ureg_src src ) 1111{ 1112 unsigned size = 1 + (src.Indirect ? 1 : 0) + 1113 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0); 1114 1115 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 1116 unsigned n = 0; 1117 1118 assert(src.File != TGSI_FILE_NULL); 1119 assert(src.File < TGSI_FILE_COUNT); 1120 1121 out[n].value = 0; 1122 out[n].src.File = src.File; 1123 out[n].src.SwizzleX = src.SwizzleX; 1124 out[n].src.SwizzleY = src.SwizzleY; 1125 out[n].src.SwizzleZ = src.SwizzleZ; 1126 out[n].src.SwizzleW = src.SwizzleW; 1127 out[n].src.Index = src.Index; 1128 out[n].src.Negate = src.Negate; 1129 out[0].src.Absolute = src.Absolute; 1130 n++; 1131 1132 if (src.Indirect) { 1133 out[0].src.Indirect = 1; 1134 out[n].value = 0; 1135 out[n].ind.File = src.IndirectFile; 1136 out[n].ind.Swizzle = src.IndirectSwizzle; 1137 out[n].ind.Index = src.IndirectIndex; 1138 if (!ureg->supports_any_inout_decl_range && 1139 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) 1140 out[n].ind.ArrayID = 0; 1141 else 1142 out[n].ind.ArrayID = src.ArrayID; 1143 n++; 1144 } 1145 1146 if (src.Dimension) { 1147 out[0].src.Dimension = 1; 1148 out[n].dim.Dimension = 0; 1149 out[n].dim.Padding = 0; 1150 if (src.DimIndirect) { 1151 out[n].dim.Indirect = 1; 1152 out[n].dim.Index = src.DimensionIndex; 1153 n++; 1154 out[n].value = 0; 1155 out[n].ind.File = src.DimIndFile; 1156 out[n].ind.Swizzle = src.DimIndSwizzle; 1157 out[n].ind.Index = src.DimIndIndex; 1158 if (!ureg->supports_any_inout_decl_range && 1159 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) 1160 out[n].ind.ArrayID = 0; 1161 else 1162 out[n].ind.ArrayID = src.ArrayID; 1163 } else { 1164 out[n].dim.Indirect = 0; 1165 out[n].dim.Index = src.DimensionIndex; 1166 } 1167 n++; 1168 } 1169 1170 assert(n == size); 1171} 1172 1173 1174void 1175ureg_emit_dst( struct ureg_program *ureg, 1176 struct ureg_dst dst ) 1177{ 1178 unsigned size = 1 + (dst.Indirect ? 1 : 0) + 1179 (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0); 1180 1181 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 1182 unsigned n = 0; 1183 1184 assert(dst.File != TGSI_FILE_NULL); 1185 assert(dst.File != TGSI_FILE_SAMPLER); 1186 assert(dst.File != TGSI_FILE_SAMPLER_VIEW); 1187 assert(dst.File != TGSI_FILE_IMMEDIATE); 1188 assert(dst.File < TGSI_FILE_COUNT); 1189 1190 out[n].value = 0; 1191 out[n].dst.File = dst.File; 1192 out[n].dst.WriteMask = dst.WriteMask; 1193 out[n].dst.Indirect = dst.Indirect; 1194 out[n].dst.Index = dst.Index; 1195 n++; 1196 1197 if (dst.Indirect) { 1198 out[n].value = 0; 1199 out[n].ind.File = dst.IndirectFile; 1200 out[n].ind.Swizzle = dst.IndirectSwizzle; 1201 out[n].ind.Index = dst.IndirectIndex; 1202 if (!ureg->supports_any_inout_decl_range && 1203 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) 1204 out[n].ind.ArrayID = 0; 1205 else 1206 out[n].ind.ArrayID = dst.ArrayID; 1207 n++; 1208 } 1209 1210 if (dst.Dimension) { 1211 out[0].dst.Dimension = 1; 1212 out[n].dim.Dimension = 0; 1213 out[n].dim.Padding = 0; 1214 if (dst.DimIndirect) { 1215 out[n].dim.Indirect = 1; 1216 out[n].dim.Index = dst.DimensionIndex; 1217 n++; 1218 out[n].value = 0; 1219 out[n].ind.File = dst.DimIndFile; 1220 out[n].ind.Swizzle = dst.DimIndSwizzle; 1221 out[n].ind.Index = dst.DimIndIndex; 1222 if (!ureg->supports_any_inout_decl_range && 1223 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) 1224 out[n].ind.ArrayID = 0; 1225 else 1226 out[n].ind.ArrayID = dst.ArrayID; 1227 } else { 1228 out[n].dim.Indirect = 0; 1229 out[n].dim.Index = dst.DimensionIndex; 1230 } 1231 n++; 1232 } 1233 1234 assert(n == size); 1235} 1236 1237 1238static void validate( enum tgsi_opcode opcode, 1239 unsigned nr_dst, 1240 unsigned nr_src ) 1241{ 1242#ifndef NDEBUG 1243 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); 1244 assert(info); 1245 if (info) { 1246 assert(nr_dst == info->num_dst); 1247 assert(nr_src == info->num_src); 1248 } 1249#endif 1250} 1251 1252struct ureg_emit_insn_result 1253ureg_emit_insn(struct ureg_program *ureg, 1254 enum tgsi_opcode opcode, 1255 boolean saturate, 1256 unsigned precise, 1257 unsigned num_dst, 1258 unsigned num_src) 1259{ 1260 union tgsi_any_token *out; 1261 uint count = 1; 1262 struct ureg_emit_insn_result result; 1263 1264 validate( opcode, num_dst, num_src ); 1265 1266 out = get_tokens( ureg, DOMAIN_INSN, count ); 1267 out[0].insn = tgsi_default_instruction(); 1268 out[0].insn.Opcode = opcode; 1269 out[0].insn.Saturate = saturate; 1270 out[0].insn.Precise = precise; 1271 out[0].insn.NumDstRegs = num_dst; 1272 out[0].insn.NumSrcRegs = num_src; 1273 1274 result.insn_token = ureg->domain[DOMAIN_INSN].count - count; 1275 result.extended_token = result.insn_token; 1276 1277 ureg->nr_instructions++; 1278 1279 return result; 1280} 1281 1282 1283/** 1284 * Emit a label token. 1285 * \param label_token returns a token number indicating where the label 1286 * needs to be patched later. Later, this value should be passed to the 1287 * ureg_fixup_label() function. 1288 */ 1289void 1290ureg_emit_label(struct ureg_program *ureg, 1291 unsigned extended_token, 1292 unsigned *label_token ) 1293{ 1294 union tgsi_any_token *out, *insn; 1295 1296 if (!label_token) 1297 return; 1298 1299 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 1300 out[0].value = 0; 1301 1302 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 1303 insn->insn.Label = 1; 1304 1305 *label_token = ureg->domain[DOMAIN_INSN].count - 1; 1306} 1307 1308/* Will return a number which can be used in a label to point to the 1309 * next instruction to be emitted. 1310 */ 1311unsigned 1312ureg_get_instruction_number( struct ureg_program *ureg ) 1313{ 1314 return ureg->nr_instructions; 1315} 1316 1317/* Patch a given label (expressed as a token number) to point to a 1318 * given instruction (expressed as an instruction number). 1319 */ 1320void 1321ureg_fixup_label(struct ureg_program *ureg, 1322 unsigned label_token, 1323 unsigned instruction_number ) 1324{ 1325 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); 1326 1327 out->insn_label.Label = instruction_number; 1328} 1329 1330 1331void 1332ureg_emit_texture(struct ureg_program *ureg, 1333 unsigned extended_token, 1334 enum tgsi_texture_type target, 1335 enum tgsi_return_type return_type, unsigned num_offsets) 1336{ 1337 union tgsi_any_token *out, *insn; 1338 1339 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 1340 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 1341 1342 insn->insn.Texture = 1; 1343 1344 out[0].value = 0; 1345 out[0].insn_texture.Texture = target; 1346 out[0].insn_texture.NumOffsets = num_offsets; 1347 out[0].insn_texture.ReturnType = return_type; 1348} 1349 1350void 1351ureg_emit_texture_offset(struct ureg_program *ureg, 1352 const struct tgsi_texture_offset *offset) 1353{ 1354 union tgsi_any_token *out; 1355 1356 out = get_tokens( ureg, DOMAIN_INSN, 1); 1357 1358 out[0].value = 0; 1359 out[0].insn_texture_offset = *offset; 1360} 1361 1362void 1363ureg_emit_memory(struct ureg_program *ureg, 1364 unsigned extended_token, 1365 unsigned qualifier, 1366 enum tgsi_texture_type texture, 1367 enum pipe_format format) 1368{ 1369 union tgsi_any_token *out, *insn; 1370 1371 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 1372 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 1373 1374 insn->insn.Memory = 1; 1375 1376 out[0].value = 0; 1377 out[0].insn_memory.Qualifier = qualifier; 1378 out[0].insn_memory.Texture = texture; 1379 out[0].insn_memory.Format = format; 1380} 1381 1382void 1383ureg_fixup_insn_size(struct ureg_program *ureg, 1384 unsigned insn ) 1385{ 1386 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); 1387 1388 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 1389 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; 1390} 1391 1392 1393void 1394ureg_insn(struct ureg_program *ureg, 1395 enum tgsi_opcode opcode, 1396 const struct ureg_dst *dst, 1397 unsigned nr_dst, 1398 const struct ureg_src *src, 1399 unsigned nr_src, 1400 unsigned precise ) 1401{ 1402 struct ureg_emit_insn_result insn; 1403 unsigned i; 1404 boolean saturate; 1405 1406 if (nr_dst && ureg_dst_is_empty(dst[0])) { 1407 return; 1408 } 1409 1410 saturate = nr_dst ? dst[0].Saturate : FALSE; 1411 1412 insn = ureg_emit_insn(ureg, 1413 opcode, 1414 saturate, 1415 precise, 1416 nr_dst, 1417 nr_src); 1418 1419 for (i = 0; i < nr_dst; i++) 1420 ureg_emit_dst( ureg, dst[i] ); 1421 1422 for (i = 0; i < nr_src; i++) 1423 ureg_emit_src( ureg, src[i] ); 1424 1425 ureg_fixup_insn_size( ureg, insn.insn_token ); 1426} 1427 1428void 1429ureg_tex_insn(struct ureg_program *ureg, 1430 enum tgsi_opcode opcode, 1431 const struct ureg_dst *dst, 1432 unsigned nr_dst, 1433 enum tgsi_texture_type target, 1434 enum tgsi_return_type return_type, 1435 const struct tgsi_texture_offset *texoffsets, 1436 unsigned nr_offset, 1437 const struct ureg_src *src, 1438 unsigned nr_src ) 1439{ 1440 struct ureg_emit_insn_result insn; 1441 unsigned i; 1442 boolean saturate; 1443 1444 if (nr_dst && ureg_dst_is_empty(dst[0])) { 1445 return; 1446 } 1447 1448 saturate = nr_dst ? dst[0].Saturate : FALSE; 1449 1450 insn = ureg_emit_insn(ureg, 1451 opcode, 1452 saturate, 1453 0, 1454 nr_dst, 1455 nr_src); 1456 1457 ureg_emit_texture( ureg, insn.extended_token, target, return_type, 1458 nr_offset ); 1459 1460 for (i = 0; i < nr_offset; i++) 1461 ureg_emit_texture_offset( ureg, &texoffsets[i]); 1462 1463 for (i = 0; i < nr_dst; i++) 1464 ureg_emit_dst( ureg, dst[i] ); 1465 1466 for (i = 0; i < nr_src; i++) 1467 ureg_emit_src( ureg, src[i] ); 1468 1469 ureg_fixup_insn_size( ureg, insn.insn_token ); 1470} 1471 1472 1473void 1474ureg_memory_insn(struct ureg_program *ureg, 1475 enum tgsi_opcode opcode, 1476 const struct ureg_dst *dst, 1477 unsigned nr_dst, 1478 const struct ureg_src *src, 1479 unsigned nr_src, 1480 unsigned qualifier, 1481 enum tgsi_texture_type texture, 1482 enum pipe_format format) 1483{ 1484 struct ureg_emit_insn_result insn; 1485 unsigned i; 1486 1487 insn = ureg_emit_insn(ureg, 1488 opcode, 1489 FALSE, 1490 0, 1491 nr_dst, 1492 nr_src); 1493 1494 ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format); 1495 1496 for (i = 0; i < nr_dst; i++) 1497 ureg_emit_dst(ureg, dst[i]); 1498 1499 for (i = 0; i < nr_src; i++) 1500 ureg_emit_src(ureg, src[i]); 1501 1502 ureg_fixup_insn_size(ureg, insn.insn_token); 1503} 1504 1505 1506static void 1507emit_decl_semantic(struct ureg_program *ureg, 1508 unsigned file, 1509 unsigned first, 1510 unsigned last, 1511 enum tgsi_semantic semantic_name, 1512 unsigned semantic_index, 1513 unsigned streams, 1514 unsigned usage_mask, 1515 unsigned array_id, 1516 boolean invariant) 1517{ 1518 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); 1519 1520 out[0].value = 0; 1521 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1522 out[0].decl.NrTokens = 3; 1523 out[0].decl.File = file; 1524 out[0].decl.UsageMask = usage_mask; 1525 out[0].decl.Semantic = 1; 1526 out[0].decl.Array = array_id != 0; 1527 out[0].decl.Invariant = invariant; 1528 1529 out[1].value = 0; 1530 out[1].decl_range.First = first; 1531 out[1].decl_range.Last = last; 1532 1533 out[2].value = 0; 1534 out[2].decl_semantic.Name = semantic_name; 1535 out[2].decl_semantic.Index = semantic_index; 1536 out[2].decl_semantic.StreamX = streams & 3; 1537 out[2].decl_semantic.StreamY = (streams >> 2) & 3; 1538 out[2].decl_semantic.StreamZ = (streams >> 4) & 3; 1539 out[2].decl_semantic.StreamW = (streams >> 6) & 3; 1540 1541 if (array_id) { 1542 out[3].value = 0; 1543 out[3].array.ArrayID = array_id; 1544 } 1545} 1546 1547static void 1548emit_decl_atomic_2d(struct ureg_program *ureg, 1549 unsigned first, 1550 unsigned last, 1551 unsigned index2D, 1552 unsigned array_id) 1553{ 1554 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); 1555 1556 out[0].value = 0; 1557 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1558 out[0].decl.NrTokens = 3; 1559 out[0].decl.File = TGSI_FILE_HW_ATOMIC; 1560 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1561 out[0].decl.Dimension = 1; 1562 out[0].decl.Array = array_id != 0; 1563 1564 out[1].value = 0; 1565 out[1].decl_range.First = first; 1566 out[1].decl_range.Last = last; 1567 1568 out[2].value = 0; 1569 out[2].decl_dim.Index2D = index2D; 1570 1571 if (array_id) { 1572 out[3].value = 0; 1573 out[3].array.ArrayID = array_id; 1574 } 1575} 1576 1577static void 1578emit_decl_fs(struct ureg_program *ureg, 1579 unsigned file, 1580 unsigned first, 1581 unsigned last, 1582 enum tgsi_semantic semantic_name, 1583 unsigned semantic_index, 1584 enum tgsi_interpolate_mode interpolate, 1585 enum tgsi_interpolate_loc interpolate_location, 1586 unsigned array_id, 1587 unsigned usage_mask) 1588{ 1589 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 1590 array_id ? 5 : 4); 1591 1592 out[0].value = 0; 1593 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1594 out[0].decl.NrTokens = 4; 1595 out[0].decl.File = file; 1596 out[0].decl.UsageMask = usage_mask; 1597 out[0].decl.Interpolate = 1; 1598 out[0].decl.Semantic = 1; 1599 out[0].decl.Array = array_id != 0; 1600 1601 out[1].value = 0; 1602 out[1].decl_range.First = first; 1603 out[1].decl_range.Last = last; 1604 1605 out[2].value = 0; 1606 out[2].decl_interp.Interpolate = interpolate; 1607 out[2].decl_interp.Location = interpolate_location; 1608 1609 out[3].value = 0; 1610 out[3].decl_semantic.Name = semantic_name; 1611 out[3].decl_semantic.Index = semantic_index; 1612 1613 if (array_id) { 1614 out[4].value = 0; 1615 out[4].array.ArrayID = array_id; 1616 } 1617} 1618 1619static void 1620emit_decl_temps( struct ureg_program *ureg, 1621 unsigned first, unsigned last, 1622 boolean local, 1623 unsigned arrayid ) 1624{ 1625 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 1626 arrayid ? 3 : 2 ); 1627 1628 out[0].value = 0; 1629 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1630 out[0].decl.NrTokens = 2; 1631 out[0].decl.File = TGSI_FILE_TEMPORARY; 1632 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1633 out[0].decl.Local = local; 1634 1635 out[1].value = 0; 1636 out[1].decl_range.First = first; 1637 out[1].decl_range.Last = last; 1638 1639 if (arrayid) { 1640 out[0].decl.Array = 1; 1641 out[2].value = 0; 1642 out[2].array.ArrayID = arrayid; 1643 } 1644} 1645 1646static void emit_decl_range( struct ureg_program *ureg, 1647 unsigned file, 1648 unsigned first, 1649 unsigned count ) 1650{ 1651 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1652 1653 out[0].value = 0; 1654 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1655 out[0].decl.NrTokens = 2; 1656 out[0].decl.File = file; 1657 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1658 out[0].decl.Semantic = 0; 1659 1660 out[1].value = 0; 1661 out[1].decl_range.First = first; 1662 out[1].decl_range.Last = first + count - 1; 1663} 1664 1665static void 1666emit_decl_range2D(struct ureg_program *ureg, 1667 unsigned file, 1668 unsigned first, 1669 unsigned last, 1670 unsigned index2D) 1671{ 1672 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1673 1674 out[0].value = 0; 1675 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1676 out[0].decl.NrTokens = 3; 1677 out[0].decl.File = file; 1678 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1679 out[0].decl.Dimension = 1; 1680 1681 out[1].value = 0; 1682 out[1].decl_range.First = first; 1683 out[1].decl_range.Last = last; 1684 1685 out[2].value = 0; 1686 out[2].decl_dim.Index2D = index2D; 1687} 1688 1689static void 1690emit_decl_sampler_view(struct ureg_program *ureg, 1691 unsigned index, 1692 enum tgsi_texture_type target, 1693 enum tgsi_return_type return_type_x, 1694 enum tgsi_return_type return_type_y, 1695 enum tgsi_return_type return_type_z, 1696 enum tgsi_return_type return_type_w ) 1697{ 1698 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1699 1700 out[0].value = 0; 1701 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1702 out[0].decl.NrTokens = 3; 1703 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; 1704 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1705 1706 out[1].value = 0; 1707 out[1].decl_range.First = index; 1708 out[1].decl_range.Last = index; 1709 1710 out[2].value = 0; 1711 out[2].decl_sampler_view.Resource = target; 1712 out[2].decl_sampler_view.ReturnTypeX = return_type_x; 1713 out[2].decl_sampler_view.ReturnTypeY = return_type_y; 1714 out[2].decl_sampler_view.ReturnTypeZ = return_type_z; 1715 out[2].decl_sampler_view.ReturnTypeW = return_type_w; 1716} 1717 1718static void 1719emit_decl_image(struct ureg_program *ureg, 1720 unsigned index, 1721 enum tgsi_texture_type target, 1722 enum pipe_format format, 1723 boolean wr, 1724 boolean raw) 1725{ 1726 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1727 1728 out[0].value = 0; 1729 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1730 out[0].decl.NrTokens = 3; 1731 out[0].decl.File = TGSI_FILE_IMAGE; 1732 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1733 1734 out[1].value = 0; 1735 out[1].decl_range.First = index; 1736 out[1].decl_range.Last = index; 1737 1738 out[2].value = 0; 1739 out[2].decl_image.Resource = target; 1740 out[2].decl_image.Writable = wr; 1741 out[2].decl_image.Raw = raw; 1742 out[2].decl_image.Format = format; 1743} 1744 1745static void 1746emit_decl_buffer(struct ureg_program *ureg, 1747 unsigned index, 1748 bool atomic) 1749{ 1750 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1751 1752 out[0].value = 0; 1753 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1754 out[0].decl.NrTokens = 2; 1755 out[0].decl.File = TGSI_FILE_BUFFER; 1756 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1757 out[0].decl.Atomic = atomic; 1758 1759 out[1].value = 0; 1760 out[1].decl_range.First = index; 1761 out[1].decl_range.Last = index; 1762} 1763 1764static void 1765emit_decl_memory(struct ureg_program *ureg, unsigned memory_type) 1766{ 1767 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1768 1769 out[0].value = 0; 1770 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1771 out[0].decl.NrTokens = 2; 1772 out[0].decl.File = TGSI_FILE_MEMORY; 1773 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1774 out[0].decl.MemType = memory_type; 1775 1776 out[1].value = 0; 1777 out[1].decl_range.First = memory_type; 1778 out[1].decl_range.Last = memory_type; 1779} 1780 1781static void 1782emit_immediate( struct ureg_program *ureg, 1783 const unsigned *v, 1784 unsigned type ) 1785{ 1786 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); 1787 1788 out[0].value = 0; 1789 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; 1790 out[0].imm.NrTokens = 5; 1791 out[0].imm.DataType = type; 1792 out[0].imm.Padding = 0; 1793 1794 out[1].imm_data.Uint = v[0]; 1795 out[2].imm_data.Uint = v[1]; 1796 out[3].imm_data.Uint = v[2]; 1797 out[4].imm_data.Uint = v[3]; 1798} 1799 1800static void 1801emit_property(struct ureg_program *ureg, 1802 unsigned name, 1803 unsigned data) 1804{ 1805 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1806 1807 out[0].value = 0; 1808 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; 1809 out[0].prop.NrTokens = 2; 1810 out[0].prop.PropertyName = name; 1811 1812 out[1].prop_data.Data = data; 1813} 1814 1815static int 1816input_sort(const void *in_a, const void *in_b) 1817{ 1818 const struct ureg_input_decl *a = in_a, *b = in_b; 1819 1820 return a->first - b->first; 1821} 1822 1823static int 1824output_sort(const void *in_a, const void *in_b) 1825{ 1826 const struct ureg_output_decl *a = in_a, *b = in_b; 1827 1828 return a->first - b->first; 1829} 1830 1831static void emit_decls( struct ureg_program *ureg ) 1832{ 1833 unsigned i,j; 1834 1835 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) 1836 if (ureg->properties[i] != ~0u) 1837 emit_property(ureg, i, ureg->properties[i]); 1838 1839 /* While not required by TGSI spec, virglrenderer has a dependency on the 1840 * inputs being sorted. 1841 */ 1842 qsort(ureg->input, ureg->nr_inputs, sizeof(ureg->input[0]), input_sort); 1843 1844 if (ureg->processor == PIPE_SHADER_VERTEX) { 1845 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 1846 if (ureg->vs_inputs[i/32] & (1u << (i%32))) { 1847 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); 1848 } 1849 } 1850 } else if (ureg->processor == PIPE_SHADER_FRAGMENT) { 1851 if (ureg->supports_any_inout_decl_range) { 1852 for (i = 0; i < ureg->nr_inputs; i++) { 1853 emit_decl_fs(ureg, 1854 TGSI_FILE_INPUT, 1855 ureg->input[i].first, 1856 ureg->input[i].last, 1857 ureg->input[i].semantic_name, 1858 ureg->input[i].semantic_index, 1859 ureg->input[i].interp, 1860 ureg->input[i].interp_location, 1861 ureg->input[i].array_id, 1862 ureg->input[i].usage_mask); 1863 } 1864 } 1865 else { 1866 for (i = 0; i < ureg->nr_inputs; i++) { 1867 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { 1868 emit_decl_fs(ureg, 1869 TGSI_FILE_INPUT, 1870 j, j, 1871 ureg->input[i].semantic_name, 1872 ureg->input[i].semantic_index + 1873 (j - ureg->input[i].first), 1874 ureg->input[i].interp, 1875 ureg->input[i].interp_location, 0, 1876 ureg->input[i].usage_mask); 1877 } 1878 } 1879 } 1880 } else { 1881 if (ureg->supports_any_inout_decl_range) { 1882 for (i = 0; i < ureg->nr_inputs; i++) { 1883 emit_decl_semantic(ureg, 1884 TGSI_FILE_INPUT, 1885 ureg->input[i].first, 1886 ureg->input[i].last, 1887 ureg->input[i].semantic_name, 1888 ureg->input[i].semantic_index, 1889 0, 1890 TGSI_WRITEMASK_XYZW, 1891 ureg->input[i].array_id, 1892 FALSE); 1893 } 1894 } 1895 else { 1896 for (i = 0; i < ureg->nr_inputs; i++) { 1897 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { 1898 emit_decl_semantic(ureg, 1899 TGSI_FILE_INPUT, 1900 j, j, 1901 ureg->input[i].semantic_name, 1902 ureg->input[i].semantic_index + 1903 (j - ureg->input[i].first), 1904 0, 1905 TGSI_WRITEMASK_XYZW, 0, FALSE); 1906 } 1907 } 1908 } 1909 } 1910 1911 for (i = 0; i < ureg->nr_system_values; i++) { 1912 emit_decl_semantic(ureg, 1913 TGSI_FILE_SYSTEM_VALUE, 1914 i, 1915 i, 1916 ureg->system_value[i].semantic_name, 1917 ureg->system_value[i].semantic_index, 1918 0, 1919 TGSI_WRITEMASK_XYZW, 0, FALSE); 1920 } 1921 1922 /* While not required by TGSI spec, virglrenderer has a dependency on the 1923 * outputs being sorted. 1924 */ 1925 qsort(ureg->output, ureg->nr_outputs, sizeof(ureg->output[0]), output_sort); 1926 1927 if (ureg->supports_any_inout_decl_range) { 1928 for (i = 0; i < ureg->nr_outputs; i++) { 1929 emit_decl_semantic(ureg, 1930 TGSI_FILE_OUTPUT, 1931 ureg->output[i].first, 1932 ureg->output[i].last, 1933 ureg->output[i].semantic_name, 1934 ureg->output[i].semantic_index, 1935 ureg->output[i].streams, 1936 ureg->output[i].usage_mask, 1937 ureg->output[i].array_id, 1938 ureg->output[i].invariant); 1939 } 1940 } 1941 else { 1942 for (i = 0; i < ureg->nr_outputs; i++) { 1943 for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) { 1944 emit_decl_semantic(ureg, 1945 TGSI_FILE_OUTPUT, 1946 j, j, 1947 ureg->output[i].semantic_name, 1948 ureg->output[i].semantic_index + 1949 (j - ureg->output[i].first), 1950 ureg->output[i].streams, 1951 ureg->output[i].usage_mask, 1952 0, 1953 ureg->output[i].invariant); 1954 } 1955 } 1956 } 1957 1958 for (i = 0; i < ureg->nr_samplers; i++) { 1959 emit_decl_range( ureg, 1960 TGSI_FILE_SAMPLER, 1961 ureg->sampler[i].Index, 1 ); 1962 } 1963 1964 for (i = 0; i < ureg->nr_sampler_views; i++) { 1965 emit_decl_sampler_view(ureg, 1966 ureg->sampler_view[i].index, 1967 ureg->sampler_view[i].target, 1968 ureg->sampler_view[i].return_type_x, 1969 ureg->sampler_view[i].return_type_y, 1970 ureg->sampler_view[i].return_type_z, 1971 ureg->sampler_view[i].return_type_w); 1972 } 1973 1974 for (i = 0; i < ureg->nr_images; i++) { 1975 emit_decl_image(ureg, 1976 ureg->image[i].index, 1977 ureg->image[i].target, 1978 ureg->image[i].format, 1979 ureg->image[i].wr, 1980 ureg->image[i].raw); 1981 } 1982 1983 for (i = 0; i < ureg->nr_buffers; i++) { 1984 emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic); 1985 } 1986 1987 for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) { 1988 if (ureg->use_memory[i]) 1989 emit_decl_memory(ureg, i); 1990 } 1991 1992 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 1993 struct const_decl *decl = &ureg->const_decls[i]; 1994 1995 if (decl->nr_constant_ranges) { 1996 uint j; 1997 1998 for (j = 0; j < decl->nr_constant_ranges; j++) { 1999 emit_decl_range2D(ureg, 2000 TGSI_FILE_CONSTANT, 2001 decl->constant_range[j].first, 2002 decl->constant_range[j].last, 2003 i); 2004 } 2005 } 2006 } 2007 2008 for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) { 2009 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i]; 2010 2011 if (decl->nr_hw_atomic_ranges) { 2012 uint j; 2013 2014 for (j = 0; j < decl->nr_hw_atomic_ranges; j++) { 2015 emit_decl_atomic_2d(ureg, 2016 decl->hw_atomic_range[j].first, 2017 decl->hw_atomic_range[j].last, 2018 i, 2019 decl->hw_atomic_range[j].array_id); 2020 } 2021 } 2022 } 2023 2024 if (ureg->nr_temps) { 2025 unsigned array = 0; 2026 for (i = 0; i < ureg->nr_temps;) { 2027 boolean local = util_bitmask_get(ureg->local_temps, i); 2028 unsigned first = i; 2029 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1); 2030 if (i == UTIL_BITMASK_INVALID_INDEX) 2031 i = ureg->nr_temps; 2032 2033 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first) 2034 emit_decl_temps( ureg, first, i - 1, local, ++array ); 2035 else 2036 emit_decl_temps( ureg, first, i - 1, local, 0 ); 2037 } 2038 } 2039 2040 if (ureg->nr_addrs) { 2041 emit_decl_range( ureg, 2042 TGSI_FILE_ADDRESS, 2043 0, ureg->nr_addrs ); 2044 } 2045 2046 for (i = 0; i < ureg->nr_immediates; i++) { 2047 emit_immediate( ureg, 2048 ureg->immediate[i].value.u, 2049 ureg->immediate[i].type ); 2050 } 2051} 2052 2053/* Append the instruction tokens onto the declarations to build a 2054 * contiguous stream suitable to send to the driver. 2055 */ 2056static void copy_instructions( struct ureg_program *ureg ) 2057{ 2058 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; 2059 union tgsi_any_token *out = get_tokens( ureg, 2060 DOMAIN_DECL, 2061 nr_tokens ); 2062 2063 memcpy(out, 2064 ureg->domain[DOMAIN_INSN].tokens, 2065 nr_tokens * sizeof out[0] ); 2066} 2067 2068 2069static void 2070fixup_header_size(struct ureg_program *ureg) 2071{ 2072 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); 2073 2074 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; 2075} 2076 2077 2078static void 2079emit_header( struct ureg_program *ureg ) 2080{ 2081 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 2082 2083 out[0].header.HeaderSize = 2; 2084 out[0].header.BodySize = 0; 2085 2086 out[1].processor.Processor = ureg->processor; 2087 out[1].processor.Padding = 0; 2088} 2089 2090 2091const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) 2092{ 2093 const struct tgsi_token *tokens; 2094 2095 switch (ureg->processor) { 2096 case PIPE_SHADER_VERTEX: 2097 case PIPE_SHADER_TESS_EVAL: 2098 ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER, 2099 ureg->next_shader_processor == -1 ? 2100 PIPE_SHADER_FRAGMENT : 2101 ureg->next_shader_processor); 2102 break; 2103 default: 2104 ; /* nothing */ 2105 } 2106 2107 emit_header( ureg ); 2108 emit_decls( ureg ); 2109 copy_instructions( ureg ); 2110 fixup_header_size( ureg ); 2111 2112 if (ureg->domain[0].tokens == error_tokens || 2113 ureg->domain[1].tokens == error_tokens) { 2114 debug_printf("%s: error in generated shader\n", __FUNCTION__); 2115 assert(0); 2116 return NULL; 2117 } 2118 2119 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 2120 2121 if (0) { 2122 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, 2123 ureg->domain[DOMAIN_DECL].count); 2124 tgsi_dump( tokens, 0 ); 2125 } 2126 2127#if DEBUG 2128 /* tgsi_sanity doesn't seem to return if there are too many constants. */ 2129 bool too_many_constants = false; 2130 for (unsigned i = 0; i < ARRAY_SIZE(ureg->const_decls); i++) { 2131 for (unsigned j = 0; j < ureg->const_decls[i].nr_constant_ranges; j++) { 2132 if (ureg->const_decls[i].constant_range[j].last > 4096) { 2133 too_many_constants = true; 2134 break; 2135 } 2136 } 2137 } 2138 2139 if (tokens && !too_many_constants && !tgsi_sanity_check(tokens)) { 2140 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); 2141 tgsi_dump(tokens, 0); 2142 assert(0); 2143 } 2144#endif 2145 2146 2147 return tokens; 2148} 2149 2150 2151void *ureg_create_shader( struct ureg_program *ureg, 2152 struct pipe_context *pipe, 2153 const struct pipe_stream_output_info *so ) 2154{ 2155 struct pipe_shader_state state = {0}; 2156 2157 pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg)); 2158 if(!state.tokens) 2159 return NULL; 2160 2161 if (so) 2162 state.stream_output = *so; 2163 2164 switch (ureg->processor) { 2165 case PIPE_SHADER_VERTEX: 2166 return pipe->create_vs_state(pipe, &state); 2167 case PIPE_SHADER_TESS_CTRL: 2168 return pipe->create_tcs_state(pipe, &state); 2169 case PIPE_SHADER_TESS_EVAL: 2170 return pipe->create_tes_state(pipe, &state); 2171 case PIPE_SHADER_GEOMETRY: 2172 return pipe->create_gs_state(pipe, &state); 2173 case PIPE_SHADER_FRAGMENT: 2174 return pipe->create_fs_state(pipe, &state); 2175 default: 2176 return NULL; 2177 } 2178} 2179 2180 2181const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, 2182 unsigned *nr_tokens ) 2183{ 2184 const struct tgsi_token *tokens; 2185 2186 ureg_finalize(ureg); 2187 2188 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 2189 2190 if (nr_tokens) 2191 *nr_tokens = ureg->domain[DOMAIN_DECL].count; 2192 2193 ureg->domain[DOMAIN_DECL].tokens = 0; 2194 ureg->domain[DOMAIN_DECL].size = 0; 2195 ureg->domain[DOMAIN_DECL].order = 0; 2196 ureg->domain[DOMAIN_DECL].count = 0; 2197 2198 return tokens; 2199} 2200 2201 2202void ureg_free_tokens( const struct tgsi_token *tokens ) 2203{ 2204 FREE((struct tgsi_token *)tokens); 2205} 2206 2207 2208struct ureg_program * 2209ureg_create(enum pipe_shader_type processor) 2210{ 2211 return ureg_create_with_screen(processor, NULL); 2212} 2213 2214 2215struct ureg_program * 2216ureg_create_with_screen(enum pipe_shader_type processor, 2217 struct pipe_screen *screen) 2218{ 2219 uint i; 2220 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); 2221 if (!ureg) 2222 goto no_ureg; 2223 2224 ureg->processor = processor; 2225 ureg->supports_any_inout_decl_range = 2226 screen && 2227 screen->get_shader_param(screen, processor, 2228 PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0; 2229 ureg->next_shader_processor = -1; 2230 2231 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) 2232 ureg->properties[i] = ~0; 2233 2234 ureg->free_temps = util_bitmask_create(); 2235 if (ureg->free_temps == NULL) 2236 goto no_free_temps; 2237 2238 ureg->local_temps = util_bitmask_create(); 2239 if (ureg->local_temps == NULL) 2240 goto no_local_temps; 2241 2242 ureg->decl_temps = util_bitmask_create(); 2243 if (ureg->decl_temps == NULL) 2244 goto no_decl_temps; 2245 2246 return ureg; 2247 2248no_decl_temps: 2249 util_bitmask_destroy(ureg->local_temps); 2250no_local_temps: 2251 util_bitmask_destroy(ureg->free_temps); 2252no_free_temps: 2253 FREE(ureg); 2254no_ureg: 2255 return NULL; 2256} 2257 2258 2259void 2260ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor) 2261{ 2262 ureg->next_shader_processor = processor; 2263} 2264 2265 2266unsigned 2267ureg_get_nr_outputs( const struct ureg_program *ureg ) 2268{ 2269 if (!ureg) 2270 return 0; 2271 return ureg->nr_outputs; 2272} 2273 2274static void 2275ureg_setup_clipdist_info(struct ureg_program *ureg, 2276 const struct shader_info *info) 2277{ 2278 if (info->clip_distance_array_size) 2279 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, 2280 info->clip_distance_array_size); 2281 if (info->cull_distance_array_size) 2282 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED, 2283 info->cull_distance_array_size); 2284} 2285 2286static void 2287ureg_setup_tess_ctrl_shader(struct ureg_program *ureg, 2288 const struct shader_info *info) 2289{ 2290 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 2291 info->tess.tcs_vertices_out); 2292} 2293 2294static void 2295ureg_setup_tess_eval_shader(struct ureg_program *ureg, 2296 const struct shader_info *info) 2297{ 2298 if (info->tess.primitive_mode == GL_ISOLINES) 2299 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES); 2300 else 2301 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, 2302 info->tess.primitive_mode); 2303 2304 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); 2305 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == 2306 PIPE_TESS_SPACING_FRACTIONAL_ODD); 2307 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == 2308 PIPE_TESS_SPACING_FRACTIONAL_EVEN); 2309 2310 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, 2311 (info->tess.spacing + 1) % 3); 2312 2313 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW, 2314 !info->tess.ccw); 2315 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, 2316 info->tess.point_mode); 2317} 2318 2319static void 2320ureg_setup_geometry_shader(struct ureg_program *ureg, 2321 const struct shader_info *info) 2322{ 2323 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, 2324 info->gs.input_primitive); 2325 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, 2326 info->gs.output_primitive); 2327 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 2328 info->gs.vertices_out); 2329 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 2330 info->gs.invocations); 2331} 2332 2333static void 2334ureg_setup_fragment_shader(struct ureg_program *ureg, 2335 const struct shader_info *info) 2336{ 2337 if (info->fs.early_fragment_tests || info->fs.post_depth_coverage) { 2338 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); 2339 2340 if (info->fs.post_depth_coverage) 2341 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1); 2342 } 2343 2344 if (info->fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) { 2345 switch (info->fs.depth_layout) { 2346 case FRAG_DEPTH_LAYOUT_ANY: 2347 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, 2348 TGSI_FS_DEPTH_LAYOUT_ANY); 2349 break; 2350 case FRAG_DEPTH_LAYOUT_GREATER: 2351 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, 2352 TGSI_FS_DEPTH_LAYOUT_GREATER); 2353 break; 2354 case FRAG_DEPTH_LAYOUT_LESS: 2355 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, 2356 TGSI_FS_DEPTH_LAYOUT_LESS); 2357 break; 2358 case FRAG_DEPTH_LAYOUT_UNCHANGED: 2359 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, 2360 TGSI_FS_DEPTH_LAYOUT_UNCHANGED); 2361 break; 2362 default: 2363 assert(0); 2364 } 2365 } 2366 2367 if (info->fs.advanced_blend_modes) { 2368 ureg_property(ureg, TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED, 2369 info->fs.advanced_blend_modes); 2370 } 2371} 2372 2373static void 2374ureg_setup_compute_shader(struct ureg_program *ureg, 2375 const struct shader_info *info) 2376{ 2377 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 2378 info->workgroup_size[0]); 2379 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 2380 info->workgroup_size[1]); 2381 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 2382 info->workgroup_size[2]); 2383 2384 if (info->shared_size) 2385 ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); 2386} 2387 2388void 2389ureg_setup_shader_info(struct ureg_program *ureg, 2390 const struct shader_info *info) 2391{ 2392 if (info->layer_viewport_relative) 2393 ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1); 2394 2395 switch (info->stage) { 2396 case MESA_SHADER_VERTEX: 2397 ureg_setup_clipdist_info(ureg, info); 2398 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage)); 2399 break; 2400 case MESA_SHADER_TESS_CTRL: 2401 ureg_setup_tess_ctrl_shader(ureg, info); 2402 break; 2403 case MESA_SHADER_TESS_EVAL: 2404 ureg_setup_tess_eval_shader(ureg, info); 2405 ureg_setup_clipdist_info(ureg, info); 2406 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage)); 2407 break; 2408 case MESA_SHADER_GEOMETRY: 2409 ureg_setup_geometry_shader(ureg, info); 2410 ureg_setup_clipdist_info(ureg, info); 2411 break; 2412 case MESA_SHADER_FRAGMENT: 2413 ureg_setup_fragment_shader(ureg, info); 2414 break; 2415 case MESA_SHADER_COMPUTE: 2416 ureg_setup_compute_shader(ureg, info); 2417 break; 2418 default: 2419 break; 2420 } 2421} 2422 2423 2424void ureg_destroy( struct ureg_program *ureg ) 2425{ 2426 unsigned i; 2427 2428 for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) { 2429 if (ureg->domain[i].tokens && 2430 ureg->domain[i].tokens != error_tokens) 2431 FREE(ureg->domain[i].tokens); 2432 } 2433 2434 util_bitmask_destroy(ureg->free_temps); 2435 util_bitmask_destroy(ureg->local_temps); 2436 util_bitmask_destroy(ureg->decl_temps); 2437 2438 FREE(ureg); 2439} 2440