tgsi_ureg.c revision af69d88d
1/************************************************************************** 2 * 3 * Copyright 2009-2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_context.h" 30#include "pipe/p_state.h" 31#include "tgsi/tgsi_ureg.h" 32#include "tgsi/tgsi_build.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_dump.h" 35#include "tgsi/tgsi_sanity.h" 36#include "util/u_debug.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include "util/u_bitmask.h" 40 41union tgsi_any_token { 42 struct tgsi_header header; 43 struct tgsi_processor processor; 44 struct tgsi_token token; 45 struct tgsi_property prop; 46 struct tgsi_property_data prop_data; 47 struct tgsi_declaration decl; 48 struct tgsi_declaration_range decl_range; 49 struct tgsi_declaration_dimension decl_dim; 50 struct tgsi_declaration_interp decl_interp; 51 struct tgsi_declaration_semantic decl_semantic; 52 struct tgsi_declaration_sampler_view decl_sampler_view; 53 struct tgsi_declaration_array array; 54 struct tgsi_immediate imm; 55 union tgsi_immediate_data imm_data; 56 struct tgsi_instruction insn; 57 struct tgsi_instruction_predicate insn_predicate; 58 struct tgsi_instruction_label insn_label; 59 struct tgsi_instruction_texture insn_texture; 60 struct tgsi_texture_offset insn_texture_offset; 61 struct tgsi_src_register src; 62 struct tgsi_ind_register ind; 63 struct tgsi_dimension dim; 64 struct tgsi_dst_register dst; 65 unsigned value; 66}; 67 68 69struct ureg_tokens { 70 union tgsi_any_token *tokens; 71 unsigned size; 72 unsigned order; 73 unsigned count; 74}; 75 76#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS 77#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS 78#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS 79#define UREG_MAX_CONSTANT_RANGE 32 80#define UREG_MAX_IMMEDIATE 4096 81#define UREG_MAX_ADDR 3 82#define UREG_MAX_PRED 1 83#define UREG_MAX_ARRAY_TEMPS 256 84 85struct const_decl { 86 struct { 87 unsigned first; 88 unsigned last; 89 } constant_range[UREG_MAX_CONSTANT_RANGE]; 90 unsigned nr_constant_ranges; 91}; 92 93#define DOMAIN_DECL 0 94#define DOMAIN_INSN 1 95 96struct ureg_program 97{ 98 unsigned processor; 99 struct pipe_context *pipe; 100 101 struct { 102 unsigned semantic_name; 103 unsigned semantic_index; 104 unsigned interp; 105 unsigned char cylindrical_wrap; 106 unsigned interp_location; 107 } fs_input[UREG_MAX_INPUT]; 108 unsigned nr_fs_inputs; 109 110 unsigned vs_inputs[UREG_MAX_INPUT/32]; 111 112 struct { 113 unsigned index; 114 unsigned semantic_name; 115 unsigned semantic_index; 116 } gs_input[UREG_MAX_INPUT]; 117 unsigned nr_gs_inputs; 118 119 struct { 120 unsigned index; 121 unsigned semantic_name; 122 unsigned semantic_index; 123 } system_value[UREG_MAX_SYSTEM_VALUE]; 124 unsigned nr_system_values; 125 126 struct { 127 unsigned semantic_name; 128 unsigned semantic_index; 129 unsigned usage_mask; /* = TGSI_WRITEMASK_* */ 130 } output[UREG_MAX_OUTPUT]; 131 unsigned nr_outputs; 132 133 struct { 134 union { 135 float f[4]; 136 unsigned u[4]; 137 int i[4]; 138 } value; 139 unsigned nr; 140 unsigned type; 141 } immediate[UREG_MAX_IMMEDIATE]; 142 unsigned nr_immediates; 143 144 struct ureg_src sampler[PIPE_MAX_SAMPLERS]; 145 unsigned nr_samplers; 146 147 struct { 148 unsigned index; 149 unsigned target; 150 unsigned return_type_x; 151 unsigned return_type_y; 152 unsigned return_type_z; 153 unsigned return_type_w; 154 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 155 unsigned nr_sampler_views; 156 157 struct util_bitmask *free_temps; 158 struct util_bitmask *local_temps; 159 struct util_bitmask *decl_temps; 160 unsigned nr_temps; 161 162 unsigned array_temps[UREG_MAX_ARRAY_TEMPS]; 163 unsigned nr_array_temps; 164 165 struct const_decl const_decls; 166 struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; 167 168 unsigned property_gs_input_prim; 169 unsigned property_gs_output_prim; 170 unsigned property_gs_max_vertices; 171 unsigned property_gs_invocations; 172 unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ 173 unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ 174 unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */ 175 unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */ 176 boolean property_vs_window_space_position; /* TGSI_VS_WINDOW_SPACE_POSITION */ 177 178 unsigned nr_addrs; 179 unsigned nr_preds; 180 unsigned nr_instructions; 181 182 struct ureg_tokens domain[2]; 183}; 184 185static union tgsi_any_token error_tokens[32]; 186 187static void tokens_error( struct ureg_tokens *tokens ) 188{ 189 if (tokens->tokens && tokens->tokens != error_tokens) 190 FREE(tokens->tokens); 191 192 tokens->tokens = error_tokens; 193 tokens->size = Elements(error_tokens); 194 tokens->count = 0; 195} 196 197 198static void tokens_expand( struct ureg_tokens *tokens, 199 unsigned count ) 200{ 201 unsigned old_size = tokens->size * sizeof(unsigned); 202 203 if (tokens->tokens == error_tokens) { 204 return; 205 } 206 207 while (tokens->count + count > tokens->size) { 208 tokens->size = (1 << ++tokens->order); 209 } 210 211 tokens->tokens = REALLOC(tokens->tokens, 212 old_size, 213 tokens->size * sizeof(unsigned)); 214 if (tokens->tokens == NULL) { 215 tokens_error(tokens); 216 } 217} 218 219static void set_bad( struct ureg_program *ureg ) 220{ 221 tokens_error(&ureg->domain[0]); 222} 223 224 225 226static union tgsi_any_token *get_tokens( struct ureg_program *ureg, 227 unsigned domain, 228 unsigned count ) 229{ 230 struct ureg_tokens *tokens = &ureg->domain[domain]; 231 union tgsi_any_token *result; 232 233 if (tokens->count + count > tokens->size) 234 tokens_expand(tokens, count); 235 236 result = &tokens->tokens[tokens->count]; 237 tokens->count += count; 238 return result; 239} 240 241 242static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, 243 unsigned domain, 244 unsigned nr ) 245{ 246 if (ureg->domain[domain].tokens == error_tokens) 247 return &error_tokens[0]; 248 249 return &ureg->domain[domain].tokens[nr]; 250} 251 252 253 254static INLINE struct ureg_dst 255ureg_dst_register( unsigned file, 256 unsigned index ) 257{ 258 struct ureg_dst dst; 259 260 dst.File = file; 261 dst.WriteMask = TGSI_WRITEMASK_XYZW; 262 dst.Indirect = 0; 263 dst.IndirectFile = TGSI_FILE_NULL; 264 dst.IndirectIndex = 0; 265 dst.IndirectSwizzle = 0; 266 dst.Saturate = 0; 267 dst.Predicate = 0; 268 dst.PredNegate = 0; 269 dst.PredSwizzleX = TGSI_SWIZZLE_X; 270 dst.PredSwizzleY = TGSI_SWIZZLE_Y; 271 dst.PredSwizzleZ = TGSI_SWIZZLE_Z; 272 dst.PredSwizzleW = TGSI_SWIZZLE_W; 273 dst.Index = index; 274 dst.ArrayID = 0; 275 276 return dst; 277} 278 279 280void 281ureg_property_gs_input_prim(struct ureg_program *ureg, 282 unsigned input_prim) 283{ 284 ureg->property_gs_input_prim = input_prim; 285} 286 287void 288ureg_property_gs_output_prim(struct ureg_program *ureg, 289 unsigned output_prim) 290{ 291 ureg->property_gs_output_prim = output_prim; 292} 293 294void 295ureg_property_gs_max_vertices(struct ureg_program *ureg, 296 unsigned max_vertices) 297{ 298 ureg->property_gs_max_vertices = max_vertices; 299} 300void 301ureg_property_gs_invocations(struct ureg_program *ureg, 302 unsigned invocations) 303{ 304 ureg->property_gs_invocations = invocations; 305} 306 307void 308ureg_property_fs_coord_origin(struct ureg_program *ureg, 309 unsigned fs_coord_origin) 310{ 311 ureg->property_fs_coord_origin = fs_coord_origin; 312} 313 314void 315ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, 316 unsigned fs_coord_pixel_center) 317{ 318 ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; 319} 320 321void 322ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, 323 unsigned fs_color0_writes_all_cbufs) 324{ 325 ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs; 326} 327 328void 329ureg_property_fs_depth_layout(struct ureg_program *ureg, 330 unsigned fs_depth_layout) 331{ 332 ureg->property_fs_depth_layout = fs_depth_layout; 333} 334 335void 336ureg_property_vs_window_space_position(struct ureg_program *ureg, 337 boolean vs_window_space_position) 338{ 339 ureg->property_vs_window_space_position = vs_window_space_position; 340} 341 342struct ureg_src 343ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, 344 unsigned semantic_name, 345 unsigned semantic_index, 346 unsigned interp_mode, 347 unsigned cylindrical_wrap, 348 unsigned interp_location) 349{ 350 unsigned i; 351 352 for (i = 0; i < ureg->nr_fs_inputs; i++) { 353 if (ureg->fs_input[i].semantic_name == semantic_name && 354 ureg->fs_input[i].semantic_index == semantic_index) { 355 goto out; 356 } 357 } 358 359 if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { 360 ureg->fs_input[i].semantic_name = semantic_name; 361 ureg->fs_input[i].semantic_index = semantic_index; 362 ureg->fs_input[i].interp = interp_mode; 363 ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; 364 ureg->fs_input[i].interp_location = interp_location; 365 ureg->nr_fs_inputs++; 366 } else { 367 set_bad(ureg); 368 } 369 370out: 371 return ureg_src_register(TGSI_FILE_INPUT, i); 372} 373 374 375struct ureg_src 376ureg_DECL_vs_input( struct ureg_program *ureg, 377 unsigned index ) 378{ 379 assert(ureg->processor == TGSI_PROCESSOR_VERTEX); 380 381 ureg->vs_inputs[index/32] |= 1 << (index % 32); 382 return ureg_src_register( TGSI_FILE_INPUT, index ); 383} 384 385 386struct ureg_src 387ureg_DECL_gs_input(struct ureg_program *ureg, 388 unsigned index, 389 unsigned semantic_name, 390 unsigned semantic_index) 391{ 392 if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { 393 ureg->gs_input[ureg->nr_gs_inputs].index = index; 394 ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name; 395 ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index; 396 ureg->nr_gs_inputs++; 397 } else { 398 set_bad(ureg); 399 } 400 401 /* XXX: Add suport for true 2D input registers. */ 402 return ureg_src_register(TGSI_FILE_INPUT, index); 403} 404 405 406struct ureg_src 407ureg_DECL_system_value(struct ureg_program *ureg, 408 unsigned index, 409 unsigned semantic_name, 410 unsigned semantic_index) 411{ 412 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { 413 ureg->system_value[ureg->nr_system_values].index = index; 414 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; 415 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; 416 ureg->nr_system_values++; 417 } else { 418 set_bad(ureg); 419 } 420 421 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); 422} 423 424 425struct ureg_dst 426ureg_DECL_output_masked( struct ureg_program *ureg, 427 unsigned name, 428 unsigned index, 429 unsigned usage_mask ) 430{ 431 unsigned i; 432 433 assert(usage_mask != 0); 434 435 for (i = 0; i < ureg->nr_outputs; i++) { 436 if (ureg->output[i].semantic_name == name && 437 ureg->output[i].semantic_index == index) { 438 ureg->output[i].usage_mask |= usage_mask; 439 goto out; 440 } 441 } 442 443 if (ureg->nr_outputs < UREG_MAX_OUTPUT) { 444 ureg->output[i].semantic_name = name; 445 ureg->output[i].semantic_index = index; 446 ureg->output[i].usage_mask = usage_mask; 447 ureg->nr_outputs++; 448 } 449 else { 450 set_bad( ureg ); 451 } 452 453out: 454 return ureg_dst_register( TGSI_FILE_OUTPUT, i ); 455} 456 457 458struct ureg_dst 459ureg_DECL_output( struct ureg_program *ureg, 460 unsigned name, 461 unsigned index ) 462{ 463 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW); 464} 465 466 467/* Returns a new constant register. Keep track of which have been 468 * referred to so that we can emit decls later. 469 * 470 * Constant operands declared with this function must be addressed 471 * with a two-dimensional index. 472 * 473 * There is nothing in this code to bind this constant to any tracked 474 * value or manage any constant_buffer contents -- that's the 475 * resposibility of the calling code. 476 */ 477void 478ureg_DECL_constant2D(struct ureg_program *ureg, 479 unsigned first, 480 unsigned last, 481 unsigned index2D) 482{ 483 struct const_decl *decl = &ureg->const_decls2D[index2D]; 484 485 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); 486 487 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 488 uint i = decl->nr_constant_ranges++; 489 490 decl->constant_range[i].first = first; 491 decl->constant_range[i].last = last; 492 } 493} 494 495 496/* A one-dimensional, depricated version of ureg_DECL_constant2D(). 497 * 498 * Constant operands declared with this function must be addressed 499 * with a one-dimensional index. 500 */ 501struct ureg_src 502ureg_DECL_constant(struct ureg_program *ureg, 503 unsigned index) 504{ 505 struct const_decl *decl = &ureg->const_decls; 506 unsigned minconst = index, maxconst = index; 507 unsigned i; 508 509 /* Inside existing range? 510 */ 511 for (i = 0; i < decl->nr_constant_ranges; i++) { 512 if (decl->constant_range[i].first <= index && 513 decl->constant_range[i].last >= index) { 514 goto out; 515 } 516 } 517 518 /* Extend existing range? 519 */ 520 for (i = 0; i < decl->nr_constant_ranges; i++) { 521 if (decl->constant_range[i].last == index - 1) { 522 decl->constant_range[i].last = index; 523 goto out; 524 } 525 526 if (decl->constant_range[i].first == index + 1) { 527 decl->constant_range[i].first = index; 528 goto out; 529 } 530 531 minconst = MIN2(minconst, decl->constant_range[i].first); 532 maxconst = MAX2(maxconst, decl->constant_range[i].last); 533 } 534 535 /* Create new range? 536 */ 537 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 538 i = decl->nr_constant_ranges++; 539 decl->constant_range[i].first = index; 540 decl->constant_range[i].last = index; 541 goto out; 542 } 543 544 /* Collapse all ranges down to one: 545 */ 546 i = 0; 547 decl->constant_range[0].first = minconst; 548 decl->constant_range[0].last = maxconst; 549 decl->nr_constant_ranges = 1; 550 551out: 552 assert(i < decl->nr_constant_ranges); 553 assert(decl->constant_range[i].first <= index); 554 assert(decl->constant_range[i].last >= index); 555 return ureg_src_register(TGSI_FILE_CONSTANT, index); 556} 557 558static struct ureg_dst alloc_temporary( struct ureg_program *ureg, 559 boolean local ) 560{ 561 unsigned i; 562 563 /* Look for a released temporary. 564 */ 565 for (i = util_bitmask_get_first_index(ureg->free_temps); 566 i != UTIL_BITMASK_INVALID_INDEX; 567 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) { 568 if (util_bitmask_get(ureg->local_temps, i) == local) 569 break; 570 } 571 572 /* Or allocate a new one. 573 */ 574 if (i == UTIL_BITMASK_INVALID_INDEX) { 575 i = ureg->nr_temps++; 576 577 if (local) 578 util_bitmask_set(ureg->local_temps, i); 579 580 /* Start a new declaration when the local flag changes */ 581 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local) 582 util_bitmask_set(ureg->decl_temps, i); 583 } 584 585 util_bitmask_clear(ureg->free_temps, i); 586 587 return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 588} 589 590struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) 591{ 592 return alloc_temporary(ureg, FALSE); 593} 594 595struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg ) 596{ 597 return alloc_temporary(ureg, TRUE); 598} 599 600struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg, 601 unsigned size, 602 boolean local ) 603{ 604 unsigned i = ureg->nr_temps; 605 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 606 607 if (local) 608 util_bitmask_set(ureg->local_temps, i); 609 610 /* Always start a new declaration at the start */ 611 util_bitmask_set(ureg->decl_temps, i); 612 613 ureg->nr_temps += size; 614 615 /* and also at the end of the array */ 616 util_bitmask_set(ureg->decl_temps, ureg->nr_temps); 617 618 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) { 619 ureg->array_temps[ureg->nr_array_temps++] = i; 620 dst.ArrayID = ureg->nr_array_temps; 621 } 622 623 return dst; 624} 625 626void ureg_release_temporary( struct ureg_program *ureg, 627 struct ureg_dst tmp ) 628{ 629 if(tmp.File == TGSI_FILE_TEMPORARY) 630 util_bitmask_set(ureg->free_temps, tmp.Index); 631} 632 633 634/* Allocate a new address register. 635 */ 636struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) 637{ 638 if (ureg->nr_addrs < UREG_MAX_ADDR) 639 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); 640 641 assert( 0 ); 642 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); 643} 644 645/* Allocate a new predicate register. 646 */ 647struct ureg_dst 648ureg_DECL_predicate(struct ureg_program *ureg) 649{ 650 if (ureg->nr_preds < UREG_MAX_PRED) { 651 return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); 652 } 653 654 assert(0); 655 return ureg_dst_register(TGSI_FILE_PREDICATE, 0); 656} 657 658/* Allocate a new sampler. 659 */ 660struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, 661 unsigned nr ) 662{ 663 unsigned i; 664 665 for (i = 0; i < ureg->nr_samplers; i++) 666 if (ureg->sampler[i].Index == nr) 667 return ureg->sampler[i]; 668 669 if (i < PIPE_MAX_SAMPLERS) { 670 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); 671 ureg->nr_samplers++; 672 return ureg->sampler[i]; 673 } 674 675 assert( 0 ); 676 return ureg->sampler[0]; 677} 678 679/* 680 * Allocate a new shader sampler view. 681 */ 682struct ureg_src 683ureg_DECL_sampler_view(struct ureg_program *ureg, 684 unsigned index, 685 unsigned target, 686 unsigned return_type_x, 687 unsigned return_type_y, 688 unsigned return_type_z, 689 unsigned return_type_w) 690{ 691 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index); 692 uint i; 693 694 for (i = 0; i < ureg->nr_sampler_views; i++) { 695 if (ureg->sampler_view[i].index == index) { 696 return reg; 697 } 698 } 699 700 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) { 701 ureg->sampler_view[i].index = index; 702 ureg->sampler_view[i].target = target; 703 ureg->sampler_view[i].return_type_x = return_type_x; 704 ureg->sampler_view[i].return_type_y = return_type_y; 705 ureg->sampler_view[i].return_type_z = return_type_z; 706 ureg->sampler_view[i].return_type_w = return_type_w; 707 ureg->nr_sampler_views++; 708 return reg; 709 } 710 711 assert(0); 712 return reg; 713} 714 715static int 716match_or_expand_immediate( const unsigned *v, 717 unsigned nr, 718 unsigned *v2, 719 unsigned *pnr2, 720 unsigned *swizzle ) 721{ 722 unsigned nr2 = *pnr2; 723 unsigned i, j; 724 725 *swizzle = 0; 726 727 for (i = 0; i < nr; i++) { 728 boolean found = FALSE; 729 730 for (j = 0; j < nr2 && !found; j++) { 731 if (v[i] == v2[j]) { 732 *swizzle |= j << (i * 2); 733 found = TRUE; 734 } 735 } 736 737 if (!found) { 738 if (nr2 >= 4) { 739 return FALSE; 740 } 741 742 v2[nr2] = v[i]; 743 *swizzle |= nr2 << (i * 2); 744 nr2++; 745 } 746 } 747 748 /* Actually expand immediate only when fully succeeded. 749 */ 750 *pnr2 = nr2; 751 return TRUE; 752} 753 754 755static struct ureg_src 756decl_immediate( struct ureg_program *ureg, 757 const unsigned *v, 758 unsigned nr, 759 unsigned type ) 760{ 761 unsigned i, j; 762 unsigned swizzle = 0; 763 764 /* Could do a first pass where we examine all existing immediates 765 * without expanding. 766 */ 767 768 for (i = 0; i < ureg->nr_immediates; i++) { 769 if (ureg->immediate[i].type != type) { 770 continue; 771 } 772 if (match_or_expand_immediate(v, 773 nr, 774 ureg->immediate[i].value.u, 775 &ureg->immediate[i].nr, 776 &swizzle)) { 777 goto out; 778 } 779 } 780 781 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { 782 i = ureg->nr_immediates++; 783 ureg->immediate[i].type = type; 784 if (match_or_expand_immediate(v, 785 nr, 786 ureg->immediate[i].value.u, 787 &ureg->immediate[i].nr, 788 &swizzle)) { 789 goto out; 790 } 791 } 792 793 set_bad(ureg); 794 795out: 796 /* Make sure that all referenced elements are from this immediate. 797 * Has the effect of making size-one immediates into scalars. 798 */ 799 for (j = nr; j < 4; j++) { 800 swizzle |= (swizzle & 0x3) << (j * 2); 801 } 802 803 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), 804 (swizzle >> 0) & 0x3, 805 (swizzle >> 2) & 0x3, 806 (swizzle >> 4) & 0x3, 807 (swizzle >> 6) & 0x3); 808} 809 810 811struct ureg_src 812ureg_DECL_immediate( struct ureg_program *ureg, 813 const float *v, 814 unsigned nr ) 815{ 816 union { 817 float f[4]; 818 unsigned u[4]; 819 } fu; 820 unsigned int i; 821 822 for (i = 0; i < nr; i++) { 823 fu.f[i] = v[i]; 824 } 825 826 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); 827} 828 829 830struct ureg_src 831ureg_DECL_immediate_uint( struct ureg_program *ureg, 832 const unsigned *v, 833 unsigned nr ) 834{ 835 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); 836} 837 838 839struct ureg_src 840ureg_DECL_immediate_block_uint( struct ureg_program *ureg, 841 const unsigned *v, 842 unsigned nr ) 843{ 844 uint index; 845 uint i; 846 847 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { 848 set_bad(ureg); 849 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); 850 } 851 852 index = ureg->nr_immediates; 853 ureg->nr_immediates += (nr + 3) / 4; 854 855 for (i = index; i < ureg->nr_immediates; i++) { 856 ureg->immediate[i].type = TGSI_IMM_UINT32; 857 ureg->immediate[i].nr = nr > 4 ? 4 : nr; 858 memcpy(ureg->immediate[i].value.u, 859 &v[(i - index) * 4], 860 ureg->immediate[i].nr * sizeof(uint)); 861 nr -= 4; 862 } 863 864 return ureg_src_register(TGSI_FILE_IMMEDIATE, index); 865} 866 867 868struct ureg_src 869ureg_DECL_immediate_int( struct ureg_program *ureg, 870 const int *v, 871 unsigned nr ) 872{ 873 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); 874} 875 876 877void 878ureg_emit_src( struct ureg_program *ureg, 879 struct ureg_src src ) 880{ 881 unsigned size = 1 + (src.Indirect ? 1 : 0) + 882 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0); 883 884 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 885 unsigned n = 0; 886 887 assert(src.File != TGSI_FILE_NULL); 888 assert(src.File < TGSI_FILE_COUNT); 889 890 out[n].value = 0; 891 out[n].src.File = src.File; 892 out[n].src.SwizzleX = src.SwizzleX; 893 out[n].src.SwizzleY = src.SwizzleY; 894 out[n].src.SwizzleZ = src.SwizzleZ; 895 out[n].src.SwizzleW = src.SwizzleW; 896 out[n].src.Index = src.Index; 897 out[n].src.Negate = src.Negate; 898 out[0].src.Absolute = src.Absolute; 899 n++; 900 901 if (src.Indirect) { 902 out[0].src.Indirect = 1; 903 out[n].value = 0; 904 out[n].ind.File = src.IndirectFile; 905 out[n].ind.Swizzle = src.IndirectSwizzle; 906 out[n].ind.Index = src.IndirectIndex; 907 out[n].ind.ArrayID = src.ArrayID; 908 n++; 909 } 910 911 if (src.Dimension) { 912 out[0].src.Dimension = 1; 913 out[n].dim.Dimension = 0; 914 out[n].dim.Padding = 0; 915 if (src.DimIndirect) { 916 out[n].dim.Indirect = 1; 917 out[n].dim.Index = src.DimensionIndex; 918 n++; 919 out[n].value = 0; 920 out[n].ind.File = src.DimIndFile; 921 out[n].ind.Swizzle = src.DimIndSwizzle; 922 out[n].ind.Index = src.DimIndIndex; 923 out[n].ind.ArrayID = src.ArrayID; 924 } else { 925 out[n].dim.Indirect = 0; 926 out[n].dim.Index = src.DimensionIndex; 927 } 928 n++; 929 } 930 931 assert(n == size); 932} 933 934 935void 936ureg_emit_dst( struct ureg_program *ureg, 937 struct ureg_dst dst ) 938{ 939 unsigned size = (1 + 940 (dst.Indirect ? 1 : 0)); 941 942 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 943 unsigned n = 0; 944 945 assert(dst.File != TGSI_FILE_NULL); 946 assert(dst.File != TGSI_FILE_CONSTANT); 947 assert(dst.File != TGSI_FILE_INPUT); 948 assert(dst.File != TGSI_FILE_SAMPLER); 949 assert(dst.File != TGSI_FILE_SAMPLER_VIEW); 950 assert(dst.File != TGSI_FILE_IMMEDIATE); 951 assert(dst.File < TGSI_FILE_COUNT); 952 953 out[n].value = 0; 954 out[n].dst.File = dst.File; 955 out[n].dst.WriteMask = dst.WriteMask; 956 out[n].dst.Indirect = dst.Indirect; 957 out[n].dst.Index = dst.Index; 958 n++; 959 960 if (dst.Indirect) { 961 out[n].value = 0; 962 out[n].ind.File = dst.IndirectFile; 963 out[n].ind.Swizzle = dst.IndirectSwizzle; 964 out[n].ind.Index = dst.IndirectIndex; 965 out[n].ind.ArrayID = dst.ArrayID; 966 n++; 967 } 968 969 assert(n == size); 970} 971 972 973static void validate( unsigned opcode, 974 unsigned nr_dst, 975 unsigned nr_src ) 976{ 977#ifdef DEBUG 978 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); 979 assert(info); 980 if(info) { 981 assert(nr_dst == info->num_dst); 982 assert(nr_src == info->num_src); 983 } 984#endif 985} 986 987struct ureg_emit_insn_result 988ureg_emit_insn(struct ureg_program *ureg, 989 unsigned opcode, 990 boolean saturate, 991 boolean predicate, 992 boolean pred_negate, 993 unsigned pred_swizzle_x, 994 unsigned pred_swizzle_y, 995 unsigned pred_swizzle_z, 996 unsigned pred_swizzle_w, 997 unsigned num_dst, 998 unsigned num_src ) 999{ 1000 union tgsi_any_token *out; 1001 uint count = predicate ? 2 : 1; 1002 struct ureg_emit_insn_result result; 1003 1004 validate( opcode, num_dst, num_src ); 1005 1006 out = get_tokens( ureg, DOMAIN_INSN, count ); 1007 out[0].insn = tgsi_default_instruction(); 1008 out[0].insn.Opcode = opcode; 1009 out[0].insn.Saturate = saturate; 1010 out[0].insn.NumDstRegs = num_dst; 1011 out[0].insn.NumSrcRegs = num_src; 1012 1013 result.insn_token = ureg->domain[DOMAIN_INSN].count - count; 1014 result.extended_token = result.insn_token; 1015 1016 if (predicate) { 1017 out[0].insn.Predicate = 1; 1018 out[1].insn_predicate = tgsi_default_instruction_predicate(); 1019 out[1].insn_predicate.Negate = pred_negate; 1020 out[1].insn_predicate.SwizzleX = pred_swizzle_x; 1021 out[1].insn_predicate.SwizzleY = pred_swizzle_y; 1022 out[1].insn_predicate.SwizzleZ = pred_swizzle_z; 1023 out[1].insn_predicate.SwizzleW = pred_swizzle_w; 1024 } 1025 1026 ureg->nr_instructions++; 1027 1028 return result; 1029} 1030 1031 1032void 1033ureg_emit_label(struct ureg_program *ureg, 1034 unsigned extended_token, 1035 unsigned *label_token ) 1036{ 1037 union tgsi_any_token *out, *insn; 1038 1039 if(!label_token) 1040 return; 1041 1042 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 1043 out[0].value = 0; 1044 1045 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 1046 insn->insn.Label = 1; 1047 1048 *label_token = ureg->domain[DOMAIN_INSN].count - 1; 1049} 1050 1051/* Will return a number which can be used in a label to point to the 1052 * next instruction to be emitted. 1053 */ 1054unsigned 1055ureg_get_instruction_number( struct ureg_program *ureg ) 1056{ 1057 return ureg->nr_instructions; 1058} 1059 1060/* Patch a given label (expressed as a token number) to point to a 1061 * given instruction (expressed as an instruction number). 1062 */ 1063void 1064ureg_fixup_label(struct ureg_program *ureg, 1065 unsigned label_token, 1066 unsigned instruction_number ) 1067{ 1068 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); 1069 1070 out->insn_label.Label = instruction_number; 1071} 1072 1073 1074void 1075ureg_emit_texture(struct ureg_program *ureg, 1076 unsigned extended_token, 1077 unsigned target, unsigned num_offsets) 1078{ 1079 union tgsi_any_token *out, *insn; 1080 1081 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 1082 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 1083 1084 insn->insn.Texture = 1; 1085 1086 out[0].value = 0; 1087 out[0].insn_texture.Texture = target; 1088 out[0].insn_texture.NumOffsets = num_offsets; 1089} 1090 1091void 1092ureg_emit_texture_offset(struct ureg_program *ureg, 1093 const struct tgsi_texture_offset *offset) 1094{ 1095 union tgsi_any_token *out; 1096 1097 out = get_tokens( ureg, DOMAIN_INSN, 1); 1098 1099 out[0].value = 0; 1100 out[0].insn_texture_offset = *offset; 1101 1102} 1103 1104 1105void 1106ureg_fixup_insn_size(struct ureg_program *ureg, 1107 unsigned insn ) 1108{ 1109 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); 1110 1111 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 1112 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; 1113} 1114 1115 1116void 1117ureg_insn(struct ureg_program *ureg, 1118 unsigned opcode, 1119 const struct ureg_dst *dst, 1120 unsigned nr_dst, 1121 const struct ureg_src *src, 1122 unsigned nr_src ) 1123{ 1124 struct ureg_emit_insn_result insn; 1125 unsigned i; 1126 boolean saturate; 1127 boolean predicate; 1128 boolean negate = FALSE; 1129 unsigned swizzle[4] = { 0 }; 1130 1131 if (nr_dst && ureg_dst_is_empty(dst[0])) { 1132 return; 1133 } 1134 1135 saturate = nr_dst ? dst[0].Saturate : FALSE; 1136 predicate = nr_dst ? dst[0].Predicate : FALSE; 1137 if (predicate) { 1138 negate = dst[0].PredNegate; 1139 swizzle[0] = dst[0].PredSwizzleX; 1140 swizzle[1] = dst[0].PredSwizzleY; 1141 swizzle[2] = dst[0].PredSwizzleZ; 1142 swizzle[3] = dst[0].PredSwizzleW; 1143 } 1144 1145 insn = ureg_emit_insn(ureg, 1146 opcode, 1147 saturate, 1148 predicate, 1149 negate, 1150 swizzle[0], 1151 swizzle[1], 1152 swizzle[2], 1153 swizzle[3], 1154 nr_dst, 1155 nr_src); 1156 1157 for (i = 0; i < nr_dst; i++) 1158 ureg_emit_dst( ureg, dst[i] ); 1159 1160 for (i = 0; i < nr_src; i++) 1161 ureg_emit_src( ureg, src[i] ); 1162 1163 ureg_fixup_insn_size( ureg, insn.insn_token ); 1164} 1165 1166void 1167ureg_tex_insn(struct ureg_program *ureg, 1168 unsigned opcode, 1169 const struct ureg_dst *dst, 1170 unsigned nr_dst, 1171 unsigned target, 1172 const struct tgsi_texture_offset *texoffsets, 1173 unsigned nr_offset, 1174 const struct ureg_src *src, 1175 unsigned nr_src ) 1176{ 1177 struct ureg_emit_insn_result insn; 1178 unsigned i; 1179 boolean saturate; 1180 boolean predicate; 1181 boolean negate = FALSE; 1182 unsigned swizzle[4] = { 0 }; 1183 1184 if (nr_dst && ureg_dst_is_empty(dst[0])) { 1185 return; 1186 } 1187 1188 saturate = nr_dst ? dst[0].Saturate : FALSE; 1189 predicate = nr_dst ? dst[0].Predicate : FALSE; 1190 if (predicate) { 1191 negate = dst[0].PredNegate; 1192 swizzle[0] = dst[0].PredSwizzleX; 1193 swizzle[1] = dst[0].PredSwizzleY; 1194 swizzle[2] = dst[0].PredSwizzleZ; 1195 swizzle[3] = dst[0].PredSwizzleW; 1196 } 1197 1198 insn = ureg_emit_insn(ureg, 1199 opcode, 1200 saturate, 1201 predicate, 1202 negate, 1203 swizzle[0], 1204 swizzle[1], 1205 swizzle[2], 1206 swizzle[3], 1207 nr_dst, 1208 nr_src); 1209 1210 ureg_emit_texture( ureg, insn.extended_token, target, nr_offset ); 1211 1212 for (i = 0; i < nr_offset; i++) 1213 ureg_emit_texture_offset( ureg, &texoffsets[i]); 1214 1215 for (i = 0; i < nr_dst; i++) 1216 ureg_emit_dst( ureg, dst[i] ); 1217 1218 for (i = 0; i < nr_src; i++) 1219 ureg_emit_src( ureg, src[i] ); 1220 1221 ureg_fixup_insn_size( ureg, insn.insn_token ); 1222} 1223 1224 1225void 1226ureg_label_insn(struct ureg_program *ureg, 1227 unsigned opcode, 1228 const struct ureg_src *src, 1229 unsigned nr_src, 1230 unsigned *label_token ) 1231{ 1232 struct ureg_emit_insn_result insn; 1233 unsigned i; 1234 1235 insn = ureg_emit_insn(ureg, 1236 opcode, 1237 FALSE, 1238 FALSE, 1239 FALSE, 1240 TGSI_SWIZZLE_X, 1241 TGSI_SWIZZLE_Y, 1242 TGSI_SWIZZLE_Z, 1243 TGSI_SWIZZLE_W, 1244 0, 1245 nr_src); 1246 1247 ureg_emit_label( ureg, insn.extended_token, label_token ); 1248 1249 for (i = 0; i < nr_src; i++) 1250 ureg_emit_src( ureg, src[i] ); 1251 1252 ureg_fixup_insn_size( ureg, insn.insn_token ); 1253} 1254 1255 1256static void 1257emit_decl_semantic(struct ureg_program *ureg, 1258 unsigned file, 1259 unsigned index, 1260 unsigned semantic_name, 1261 unsigned semantic_index, 1262 unsigned usage_mask) 1263{ 1264 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1265 1266 out[0].value = 0; 1267 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1268 out[0].decl.NrTokens = 3; 1269 out[0].decl.File = file; 1270 out[0].decl.UsageMask = usage_mask; 1271 out[0].decl.Semantic = 1; 1272 1273 out[1].value = 0; 1274 out[1].decl_range.First = index; 1275 out[1].decl_range.Last = index; 1276 1277 out[2].value = 0; 1278 out[2].decl_semantic.Name = semantic_name; 1279 out[2].decl_semantic.Index = semantic_index; 1280} 1281 1282 1283static void 1284emit_decl_fs(struct ureg_program *ureg, 1285 unsigned file, 1286 unsigned index, 1287 unsigned semantic_name, 1288 unsigned semantic_index, 1289 unsigned interpolate, 1290 unsigned cylindrical_wrap, 1291 unsigned interpolate_location) 1292{ 1293 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4); 1294 1295 out[0].value = 0; 1296 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1297 out[0].decl.NrTokens = 4; 1298 out[0].decl.File = file; 1299 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ 1300 out[0].decl.Interpolate = 1; 1301 out[0].decl.Semantic = 1; 1302 1303 out[1].value = 0; 1304 out[1].decl_range.First = index; 1305 out[1].decl_range.Last = index; 1306 1307 out[2].value = 0; 1308 out[2].decl_interp.Interpolate = interpolate; 1309 out[2].decl_interp.CylindricalWrap = cylindrical_wrap; 1310 out[2].decl_interp.Location = interpolate_location; 1311 1312 out[3].value = 0; 1313 out[3].decl_semantic.Name = semantic_name; 1314 out[3].decl_semantic.Index = semantic_index; 1315} 1316 1317static void 1318emit_decl_temps( struct ureg_program *ureg, 1319 unsigned first, unsigned last, 1320 boolean local, 1321 unsigned arrayid ) 1322{ 1323 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 1324 arrayid ? 3 : 2 ); 1325 1326 out[0].value = 0; 1327 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1328 out[0].decl.NrTokens = 2; 1329 out[0].decl.File = TGSI_FILE_TEMPORARY; 1330 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1331 out[0].decl.Local = local; 1332 1333 out[1].value = 0; 1334 out[1].decl_range.First = first; 1335 out[1].decl_range.Last = last; 1336 1337 if (arrayid) { 1338 out[0].decl.Array = 1; 1339 out[2].value = 0; 1340 out[2].array.ArrayID = arrayid; 1341 } 1342} 1343 1344static void emit_decl_range( struct ureg_program *ureg, 1345 unsigned file, 1346 unsigned first, 1347 unsigned count ) 1348{ 1349 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1350 1351 out[0].value = 0; 1352 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1353 out[0].decl.NrTokens = 2; 1354 out[0].decl.File = file; 1355 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1356 out[0].decl.Semantic = 0; 1357 1358 out[1].value = 0; 1359 out[1].decl_range.First = first; 1360 out[1].decl_range.Last = first + count - 1; 1361} 1362 1363static void 1364emit_decl_range2D(struct ureg_program *ureg, 1365 unsigned file, 1366 unsigned first, 1367 unsigned last, 1368 unsigned index2D) 1369{ 1370 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1371 1372 out[0].value = 0; 1373 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1374 out[0].decl.NrTokens = 3; 1375 out[0].decl.File = file; 1376 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; 1377 out[0].decl.Dimension = 1; 1378 1379 out[1].value = 0; 1380 out[1].decl_range.First = first; 1381 out[1].decl_range.Last = last; 1382 1383 out[2].value = 0; 1384 out[2].decl_dim.Index2D = index2D; 1385} 1386 1387static void 1388emit_decl_sampler_view(struct ureg_program *ureg, 1389 unsigned index, 1390 unsigned target, 1391 unsigned return_type_x, 1392 unsigned return_type_y, 1393 unsigned return_type_z, 1394 unsigned return_type_w ) 1395{ 1396 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1397 1398 out[0].value = 0; 1399 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1400 out[0].decl.NrTokens = 3; 1401 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; 1402 out[0].decl.UsageMask = 0xf; 1403 1404 out[1].value = 0; 1405 out[1].decl_range.First = index; 1406 out[1].decl_range.Last = index; 1407 1408 out[2].value = 0; 1409 out[2].decl_sampler_view.Resource = target; 1410 out[2].decl_sampler_view.ReturnTypeX = return_type_x; 1411 out[2].decl_sampler_view.ReturnTypeY = return_type_y; 1412 out[2].decl_sampler_view.ReturnTypeZ = return_type_z; 1413 out[2].decl_sampler_view.ReturnTypeW = return_type_w; 1414} 1415 1416static void 1417emit_immediate( struct ureg_program *ureg, 1418 const unsigned *v, 1419 unsigned type ) 1420{ 1421 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); 1422 1423 out[0].value = 0; 1424 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; 1425 out[0].imm.NrTokens = 5; 1426 out[0].imm.DataType = type; 1427 out[0].imm.Padding = 0; 1428 1429 out[1].imm_data.Uint = v[0]; 1430 out[2].imm_data.Uint = v[1]; 1431 out[3].imm_data.Uint = v[2]; 1432 out[4].imm_data.Uint = v[3]; 1433} 1434 1435static void 1436emit_property(struct ureg_program *ureg, 1437 unsigned name, 1438 unsigned data) 1439{ 1440 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1441 1442 out[0].value = 0; 1443 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; 1444 out[0].prop.NrTokens = 2; 1445 out[0].prop.PropertyName = name; 1446 1447 out[1].prop_data.Data = data; 1448} 1449 1450 1451static void emit_decls( struct ureg_program *ureg ) 1452{ 1453 unsigned i; 1454 1455 if (ureg->property_gs_input_prim != ~0) { 1456 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1457 1458 emit_property(ureg, 1459 TGSI_PROPERTY_GS_INPUT_PRIM, 1460 ureg->property_gs_input_prim); 1461 } 1462 1463 if (ureg->property_gs_output_prim != ~0) { 1464 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1465 1466 emit_property(ureg, 1467 TGSI_PROPERTY_GS_OUTPUT_PRIM, 1468 ureg->property_gs_output_prim); 1469 } 1470 1471 if (ureg->property_gs_max_vertices != ~0) { 1472 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1473 1474 emit_property(ureg, 1475 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1476 ureg->property_gs_max_vertices); 1477 } 1478 1479 if (ureg->property_gs_invocations != ~0) { 1480 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1481 1482 emit_property(ureg, 1483 TGSI_PROPERTY_GS_INVOCATIONS, 1484 ureg->property_gs_invocations); 1485 } 1486 1487 if (ureg->property_fs_coord_origin) { 1488 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1489 1490 emit_property(ureg, 1491 TGSI_PROPERTY_FS_COORD_ORIGIN, 1492 ureg->property_fs_coord_origin); 1493 } 1494 1495 if (ureg->property_fs_coord_pixel_center) { 1496 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1497 1498 emit_property(ureg, 1499 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 1500 ureg->property_fs_coord_pixel_center); 1501 } 1502 1503 if (ureg->property_fs_color0_writes_all_cbufs) { 1504 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1505 1506 emit_property(ureg, 1507 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1508 ureg->property_fs_color0_writes_all_cbufs); 1509 } 1510 1511 if (ureg->property_fs_depth_layout) { 1512 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1513 1514 emit_property(ureg, 1515 TGSI_PROPERTY_FS_DEPTH_LAYOUT, 1516 ureg->property_fs_depth_layout); 1517 } 1518 1519 if (ureg->property_vs_window_space_position) { 1520 assert(ureg->processor == TGSI_PROCESSOR_VERTEX); 1521 1522 emit_property(ureg, 1523 TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, 1524 ureg->property_vs_window_space_position); 1525 } 1526 1527 if (ureg->processor == TGSI_PROCESSOR_VERTEX) { 1528 for (i = 0; i < UREG_MAX_INPUT; i++) { 1529 if (ureg->vs_inputs[i/32] & (1 << (i%32))) { 1530 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); 1531 } 1532 } 1533 } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { 1534 for (i = 0; i < ureg->nr_fs_inputs; i++) { 1535 emit_decl_fs(ureg, 1536 TGSI_FILE_INPUT, 1537 i, 1538 ureg->fs_input[i].semantic_name, 1539 ureg->fs_input[i].semantic_index, 1540 ureg->fs_input[i].interp, 1541 ureg->fs_input[i].cylindrical_wrap, 1542 ureg->fs_input[i].interp_location); 1543 } 1544 } else { 1545 for (i = 0; i < ureg->nr_gs_inputs; i++) { 1546 emit_decl_semantic(ureg, 1547 TGSI_FILE_INPUT, 1548 ureg->gs_input[i].index, 1549 ureg->gs_input[i].semantic_name, 1550 ureg->gs_input[i].semantic_index, 1551 TGSI_WRITEMASK_XYZW); 1552 } 1553 } 1554 1555 for (i = 0; i < ureg->nr_system_values; i++) { 1556 emit_decl_semantic(ureg, 1557 TGSI_FILE_SYSTEM_VALUE, 1558 ureg->system_value[i].index, 1559 ureg->system_value[i].semantic_name, 1560 ureg->system_value[i].semantic_index, 1561 TGSI_WRITEMASK_XYZW); 1562 } 1563 1564 for (i = 0; i < ureg->nr_outputs; i++) { 1565 emit_decl_semantic(ureg, 1566 TGSI_FILE_OUTPUT, 1567 i, 1568 ureg->output[i].semantic_name, 1569 ureg->output[i].semantic_index, 1570 ureg->output[i].usage_mask); 1571 } 1572 1573 for (i = 0; i < ureg->nr_samplers; i++) { 1574 emit_decl_range( ureg, 1575 TGSI_FILE_SAMPLER, 1576 ureg->sampler[i].Index, 1 ); 1577 } 1578 1579 for (i = 0; i < ureg->nr_sampler_views; i++) { 1580 emit_decl_sampler_view(ureg, 1581 ureg->sampler_view[i].index, 1582 ureg->sampler_view[i].target, 1583 ureg->sampler_view[i].return_type_x, 1584 ureg->sampler_view[i].return_type_y, 1585 ureg->sampler_view[i].return_type_z, 1586 ureg->sampler_view[i].return_type_w); 1587 } 1588 1589 if (ureg->const_decls.nr_constant_ranges) { 1590 for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { 1591 emit_decl_range(ureg, 1592 TGSI_FILE_CONSTANT, 1593 ureg->const_decls.constant_range[i].first, 1594 ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); 1595 } 1596 } 1597 1598 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 1599 struct const_decl *decl = &ureg->const_decls2D[i]; 1600 1601 if (decl->nr_constant_ranges) { 1602 uint j; 1603 1604 for (j = 0; j < decl->nr_constant_ranges; j++) { 1605 emit_decl_range2D(ureg, 1606 TGSI_FILE_CONSTANT, 1607 decl->constant_range[j].first, 1608 decl->constant_range[j].last, 1609 i); 1610 } 1611 } 1612 } 1613 1614 if (ureg->nr_temps) { 1615 unsigned array = 0; 1616 for (i = 0; i < ureg->nr_temps;) { 1617 boolean local = util_bitmask_get(ureg->local_temps, i); 1618 unsigned first = i; 1619 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1); 1620 if (i == UTIL_BITMASK_INVALID_INDEX) 1621 i = ureg->nr_temps; 1622 1623 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first) 1624 emit_decl_temps( ureg, first, i - 1, local, ++array ); 1625 else 1626 emit_decl_temps( ureg, first, i - 1, local, 0 ); 1627 } 1628 } 1629 1630 if (ureg->nr_addrs) { 1631 emit_decl_range( ureg, 1632 TGSI_FILE_ADDRESS, 1633 0, ureg->nr_addrs ); 1634 } 1635 1636 if (ureg->nr_preds) { 1637 emit_decl_range(ureg, 1638 TGSI_FILE_PREDICATE, 1639 0, 1640 ureg->nr_preds); 1641 } 1642 1643 for (i = 0; i < ureg->nr_immediates; i++) { 1644 emit_immediate( ureg, 1645 ureg->immediate[i].value.u, 1646 ureg->immediate[i].type ); 1647 } 1648} 1649 1650/* Append the instruction tokens onto the declarations to build a 1651 * contiguous stream suitable to send to the driver. 1652 */ 1653static void copy_instructions( struct ureg_program *ureg ) 1654{ 1655 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; 1656 union tgsi_any_token *out = get_tokens( ureg, 1657 DOMAIN_DECL, 1658 nr_tokens ); 1659 1660 memcpy(out, 1661 ureg->domain[DOMAIN_INSN].tokens, 1662 nr_tokens * sizeof out[0] ); 1663} 1664 1665 1666static void 1667fixup_header_size(struct ureg_program *ureg) 1668{ 1669 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); 1670 1671 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; 1672} 1673 1674 1675static void 1676emit_header( struct ureg_program *ureg ) 1677{ 1678 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1679 1680 out[0].header.HeaderSize = 2; 1681 out[0].header.BodySize = 0; 1682 1683 out[1].processor.Processor = ureg->processor; 1684 out[1].processor.Padding = 0; 1685} 1686 1687 1688const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) 1689{ 1690 const struct tgsi_token *tokens; 1691 1692 emit_header( ureg ); 1693 emit_decls( ureg ); 1694 copy_instructions( ureg ); 1695 fixup_header_size( ureg ); 1696 1697 if (ureg->domain[0].tokens == error_tokens || 1698 ureg->domain[1].tokens == error_tokens) { 1699 debug_printf("%s: error in generated shader\n", __FUNCTION__); 1700 assert(0); 1701 return NULL; 1702 } 1703 1704 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1705 1706 if (0) { 1707 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, 1708 ureg->domain[DOMAIN_DECL].count); 1709 tgsi_dump( tokens, 0 ); 1710 } 1711 1712#if DEBUG 1713 if (tokens && !tgsi_sanity_check(tokens)) { 1714 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); 1715 tgsi_dump(tokens, 0); 1716 assert(0); 1717 } 1718#endif 1719 1720 1721 return tokens; 1722} 1723 1724 1725void *ureg_create_shader( struct ureg_program *ureg, 1726 struct pipe_context *pipe, 1727 const struct pipe_stream_output_info *so ) 1728{ 1729 struct pipe_shader_state state; 1730 1731 state.tokens = ureg_finalize(ureg); 1732 if(!state.tokens) 1733 return NULL; 1734 1735 if (so) 1736 state.stream_output = *so; 1737 else 1738 memset(&state.stream_output, 0, sizeof(state.stream_output)); 1739 1740 if (ureg->processor == TGSI_PROCESSOR_VERTEX) 1741 return pipe->create_vs_state( pipe, &state ); 1742 else 1743 return pipe->create_fs_state( pipe, &state ); 1744} 1745 1746 1747const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, 1748 unsigned *nr_tokens ) 1749{ 1750 const struct tgsi_token *tokens; 1751 1752 ureg_finalize(ureg); 1753 1754 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1755 1756 if (nr_tokens) 1757 *nr_tokens = ureg->domain[DOMAIN_DECL].size; 1758 1759 ureg->domain[DOMAIN_DECL].tokens = 0; 1760 ureg->domain[DOMAIN_DECL].size = 0; 1761 ureg->domain[DOMAIN_DECL].order = 0; 1762 ureg->domain[DOMAIN_DECL].count = 0; 1763 1764 return tokens; 1765} 1766 1767 1768void ureg_free_tokens( const struct tgsi_token *tokens ) 1769{ 1770 FREE((struct tgsi_token *)tokens); 1771} 1772 1773 1774struct ureg_program *ureg_create( unsigned processor ) 1775{ 1776 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); 1777 if (ureg == NULL) 1778 goto no_ureg; 1779 1780 ureg->processor = processor; 1781 ureg->property_gs_input_prim = ~0; 1782 ureg->property_gs_output_prim = ~0; 1783 ureg->property_gs_max_vertices = ~0; 1784 ureg->property_gs_invocations = ~0; 1785 1786 ureg->free_temps = util_bitmask_create(); 1787 if (ureg->free_temps == NULL) 1788 goto no_free_temps; 1789 1790 ureg->local_temps = util_bitmask_create(); 1791 if (ureg->local_temps == NULL) 1792 goto no_local_temps; 1793 1794 ureg->decl_temps = util_bitmask_create(); 1795 if (ureg->decl_temps == NULL) 1796 goto no_decl_temps; 1797 1798 return ureg; 1799 1800no_decl_temps: 1801 util_bitmask_destroy(ureg->local_temps); 1802no_local_temps: 1803 util_bitmask_destroy(ureg->free_temps); 1804no_free_temps: 1805 FREE(ureg); 1806no_ureg: 1807 return NULL; 1808} 1809 1810 1811unsigned 1812ureg_get_nr_outputs( const struct ureg_program *ureg ) 1813{ 1814 if (!ureg) 1815 return 0; 1816 return ureg->nr_outputs; 1817} 1818 1819 1820void ureg_destroy( struct ureg_program *ureg ) 1821{ 1822 unsigned i; 1823 1824 for (i = 0; i < Elements(ureg->domain); i++) { 1825 if (ureg->domain[i].tokens && 1826 ureg->domain[i].tokens != error_tokens) 1827 FREE(ureg->domain[i].tokens); 1828 } 1829 1830 util_bitmask_destroy(ureg->free_temps); 1831 util_bitmask_destroy(ureg->local_temps); 1832 util_bitmask_destroy(ureg->decl_temps); 1833 1834 FREE(ureg); 1835} 1836