tgsi_ureg.c revision af69d88d
1/**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_context.h"
30#include "pipe/p_state.h"
31#include "tgsi/tgsi_ureg.h"
32#include "tgsi/tgsi_build.h"
33#include "tgsi/tgsi_info.h"
34#include "tgsi/tgsi_dump.h"
35#include "tgsi/tgsi_sanity.h"
36#include "util/u_debug.h"
37#include "util/u_memory.h"
38#include "util/u_math.h"
39#include "util/u_bitmask.h"
40
41union tgsi_any_token {
42   struct tgsi_header header;
43   struct tgsi_processor processor;
44   struct tgsi_token token;
45   struct tgsi_property prop;
46   struct tgsi_property_data prop_data;
47   struct tgsi_declaration decl;
48   struct tgsi_declaration_range decl_range;
49   struct tgsi_declaration_dimension decl_dim;
50   struct tgsi_declaration_interp decl_interp;
51   struct tgsi_declaration_semantic decl_semantic;
52   struct tgsi_declaration_sampler_view decl_sampler_view;
53   struct tgsi_declaration_array array;
54   struct tgsi_immediate imm;
55   union  tgsi_immediate_data imm_data;
56   struct tgsi_instruction insn;
57   struct tgsi_instruction_predicate insn_predicate;
58   struct tgsi_instruction_label insn_label;
59   struct tgsi_instruction_texture insn_texture;
60   struct tgsi_texture_offset insn_texture_offset;
61   struct tgsi_src_register src;
62   struct tgsi_ind_register ind;
63   struct tgsi_dimension dim;
64   struct tgsi_dst_register dst;
65   unsigned value;
66};
67
68
69struct ureg_tokens {
70   union tgsi_any_token *tokens;
71   unsigned size;
72   unsigned order;
73   unsigned count;
74};
75
76#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
77#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
78#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
79#define UREG_MAX_CONSTANT_RANGE 32
80#define UREG_MAX_IMMEDIATE 4096
81#define UREG_MAX_ADDR 3
82#define UREG_MAX_PRED 1
83#define UREG_MAX_ARRAY_TEMPS 256
84
85struct const_decl {
86   struct {
87      unsigned first;
88      unsigned last;
89   } constant_range[UREG_MAX_CONSTANT_RANGE];
90   unsigned nr_constant_ranges;
91};
92
93#define DOMAIN_DECL 0
94#define DOMAIN_INSN 1
95
96struct ureg_program
97{
98   unsigned processor;
99   struct pipe_context *pipe;
100
101   struct {
102      unsigned semantic_name;
103      unsigned semantic_index;
104      unsigned interp;
105      unsigned char cylindrical_wrap;
106      unsigned interp_location;
107   } fs_input[UREG_MAX_INPUT];
108   unsigned nr_fs_inputs;
109
110   unsigned vs_inputs[UREG_MAX_INPUT/32];
111
112   struct {
113      unsigned index;
114      unsigned semantic_name;
115      unsigned semantic_index;
116   } gs_input[UREG_MAX_INPUT];
117   unsigned nr_gs_inputs;
118
119   struct {
120      unsigned index;
121      unsigned semantic_name;
122      unsigned semantic_index;
123   } system_value[UREG_MAX_SYSTEM_VALUE];
124   unsigned nr_system_values;
125
126   struct {
127      unsigned semantic_name;
128      unsigned semantic_index;
129      unsigned usage_mask; /* = TGSI_WRITEMASK_* */
130   } output[UREG_MAX_OUTPUT];
131   unsigned nr_outputs;
132
133   struct {
134      union {
135         float f[4];
136         unsigned u[4];
137         int i[4];
138      } value;
139      unsigned nr;
140      unsigned type;
141   } immediate[UREG_MAX_IMMEDIATE];
142   unsigned nr_immediates;
143
144   struct ureg_src sampler[PIPE_MAX_SAMPLERS];
145   unsigned nr_samplers;
146
147   struct {
148      unsigned index;
149      unsigned target;
150      unsigned return_type_x;
151      unsigned return_type_y;
152      unsigned return_type_z;
153      unsigned return_type_w;
154   } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
155   unsigned nr_sampler_views;
156
157   struct util_bitmask *free_temps;
158   struct util_bitmask *local_temps;
159   struct util_bitmask *decl_temps;
160   unsigned nr_temps;
161
162   unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
163   unsigned nr_array_temps;
164
165   struct const_decl const_decls;
166   struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
167
168   unsigned property_gs_input_prim;
169   unsigned property_gs_output_prim;
170   unsigned property_gs_max_vertices;
171   unsigned property_gs_invocations;
172   unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */
173   unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */
174   unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */
175   unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */
176   boolean property_vs_window_space_position; /* TGSI_VS_WINDOW_SPACE_POSITION */
177
178   unsigned nr_addrs;
179   unsigned nr_preds;
180   unsigned nr_instructions;
181
182   struct ureg_tokens domain[2];
183};
184
185static union tgsi_any_token error_tokens[32];
186
187static void tokens_error( struct ureg_tokens *tokens )
188{
189   if (tokens->tokens && tokens->tokens != error_tokens)
190      FREE(tokens->tokens);
191
192   tokens->tokens = error_tokens;
193   tokens->size = Elements(error_tokens);
194   tokens->count = 0;
195}
196
197
198static void tokens_expand( struct ureg_tokens *tokens,
199                           unsigned count )
200{
201   unsigned old_size = tokens->size * sizeof(unsigned);
202
203   if (tokens->tokens == error_tokens) {
204      return;
205   }
206
207   while (tokens->count + count > tokens->size) {
208      tokens->size = (1 << ++tokens->order);
209   }
210
211   tokens->tokens = REALLOC(tokens->tokens,
212                            old_size,
213                            tokens->size * sizeof(unsigned));
214   if (tokens->tokens == NULL) {
215      tokens_error(tokens);
216   }
217}
218
219static void set_bad( struct ureg_program *ureg )
220{
221   tokens_error(&ureg->domain[0]);
222}
223
224
225
226static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
227                                         unsigned domain,
228                                         unsigned count )
229{
230   struct ureg_tokens *tokens = &ureg->domain[domain];
231   union tgsi_any_token *result;
232
233   if (tokens->count + count > tokens->size)
234      tokens_expand(tokens, count);
235
236   result = &tokens->tokens[tokens->count];
237   tokens->count += count;
238   return result;
239}
240
241
242static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
243                                            unsigned domain,
244                                            unsigned nr )
245{
246   if (ureg->domain[domain].tokens == error_tokens)
247      return &error_tokens[0];
248
249   return &ureg->domain[domain].tokens[nr];
250}
251
252
253
254static INLINE struct ureg_dst
255ureg_dst_register( unsigned file,
256                   unsigned index )
257{
258   struct ureg_dst dst;
259
260   dst.File      = file;
261   dst.WriteMask = TGSI_WRITEMASK_XYZW;
262   dst.Indirect  = 0;
263   dst.IndirectFile = TGSI_FILE_NULL;
264   dst.IndirectIndex = 0;
265   dst.IndirectSwizzle = 0;
266   dst.Saturate  = 0;
267   dst.Predicate = 0;
268   dst.PredNegate = 0;
269   dst.PredSwizzleX = TGSI_SWIZZLE_X;
270   dst.PredSwizzleY = TGSI_SWIZZLE_Y;
271   dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
272   dst.PredSwizzleW = TGSI_SWIZZLE_W;
273   dst.Index     = index;
274   dst.ArrayID = 0;
275
276   return dst;
277}
278
279
280void
281ureg_property_gs_input_prim(struct ureg_program *ureg,
282                            unsigned input_prim)
283{
284   ureg->property_gs_input_prim = input_prim;
285}
286
287void
288ureg_property_gs_output_prim(struct ureg_program *ureg,
289                             unsigned output_prim)
290{
291   ureg->property_gs_output_prim = output_prim;
292}
293
294void
295ureg_property_gs_max_vertices(struct ureg_program *ureg,
296                              unsigned max_vertices)
297{
298   ureg->property_gs_max_vertices = max_vertices;
299}
300void
301ureg_property_gs_invocations(struct ureg_program *ureg,
302                             unsigned invocations)
303{
304   ureg->property_gs_invocations = invocations;
305}
306
307void
308ureg_property_fs_coord_origin(struct ureg_program *ureg,
309                            unsigned fs_coord_origin)
310{
311   ureg->property_fs_coord_origin = fs_coord_origin;
312}
313
314void
315ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
316                            unsigned fs_coord_pixel_center)
317{
318   ureg->property_fs_coord_pixel_center = fs_coord_pixel_center;
319}
320
321void
322ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg,
323                            unsigned fs_color0_writes_all_cbufs)
324{
325   ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs;
326}
327
328void
329ureg_property_fs_depth_layout(struct ureg_program *ureg,
330                              unsigned fs_depth_layout)
331{
332   ureg->property_fs_depth_layout = fs_depth_layout;
333}
334
335void
336ureg_property_vs_window_space_position(struct ureg_program *ureg,
337                                       boolean vs_window_space_position)
338{
339   ureg->property_vs_window_space_position = vs_window_space_position;
340}
341
342struct ureg_src
343ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
344                       unsigned semantic_name,
345                       unsigned semantic_index,
346                       unsigned interp_mode,
347                       unsigned cylindrical_wrap,
348                       unsigned interp_location)
349{
350   unsigned i;
351
352   for (i = 0; i < ureg->nr_fs_inputs; i++) {
353      if (ureg->fs_input[i].semantic_name == semantic_name &&
354          ureg->fs_input[i].semantic_index == semantic_index) {
355         goto out;
356      }
357   }
358
359   if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
360      ureg->fs_input[i].semantic_name = semantic_name;
361      ureg->fs_input[i].semantic_index = semantic_index;
362      ureg->fs_input[i].interp = interp_mode;
363      ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
364      ureg->fs_input[i].interp_location = interp_location;
365      ureg->nr_fs_inputs++;
366   } else {
367      set_bad(ureg);
368   }
369
370out:
371   return ureg_src_register(TGSI_FILE_INPUT, i);
372}
373
374
375struct ureg_src
376ureg_DECL_vs_input( struct ureg_program *ureg,
377                    unsigned index )
378{
379   assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
380
381   ureg->vs_inputs[index/32] |= 1 << (index % 32);
382   return ureg_src_register( TGSI_FILE_INPUT, index );
383}
384
385
386struct ureg_src
387ureg_DECL_gs_input(struct ureg_program *ureg,
388                   unsigned index,
389                   unsigned semantic_name,
390                   unsigned semantic_index)
391{
392   if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
393      ureg->gs_input[ureg->nr_gs_inputs].index = index;
394      ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name;
395      ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index;
396      ureg->nr_gs_inputs++;
397   } else {
398      set_bad(ureg);
399   }
400
401   /* XXX: Add suport for true 2D input registers. */
402   return ureg_src_register(TGSI_FILE_INPUT, index);
403}
404
405
406struct ureg_src
407ureg_DECL_system_value(struct ureg_program *ureg,
408                       unsigned index,
409                       unsigned semantic_name,
410                       unsigned semantic_index)
411{
412   if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
413      ureg->system_value[ureg->nr_system_values].index = index;
414      ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
415      ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
416      ureg->nr_system_values++;
417   } else {
418      set_bad(ureg);
419   }
420
421   return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
422}
423
424
425struct ureg_dst
426ureg_DECL_output_masked( struct ureg_program *ureg,
427                         unsigned name,
428                         unsigned index,
429                         unsigned usage_mask )
430{
431   unsigned i;
432
433   assert(usage_mask != 0);
434
435   for (i = 0; i < ureg->nr_outputs; i++) {
436      if (ureg->output[i].semantic_name == name &&
437          ureg->output[i].semantic_index == index) {
438         ureg->output[i].usage_mask |= usage_mask;
439         goto out;
440      }
441   }
442
443   if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
444      ureg->output[i].semantic_name = name;
445      ureg->output[i].semantic_index = index;
446      ureg->output[i].usage_mask = usage_mask;
447      ureg->nr_outputs++;
448   }
449   else {
450      set_bad( ureg );
451   }
452
453out:
454   return ureg_dst_register( TGSI_FILE_OUTPUT, i );
455}
456
457
458struct ureg_dst
459ureg_DECL_output( struct ureg_program *ureg,
460                  unsigned name,
461                  unsigned index )
462{
463   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
464}
465
466
467/* Returns a new constant register.  Keep track of which have been
468 * referred to so that we can emit decls later.
469 *
470 * Constant operands declared with this function must be addressed
471 * with a two-dimensional index.
472 *
473 * There is nothing in this code to bind this constant to any tracked
474 * value or manage any constant_buffer contents -- that's the
475 * resposibility of the calling code.
476 */
477void
478ureg_DECL_constant2D(struct ureg_program *ureg,
479                     unsigned first,
480                     unsigned last,
481                     unsigned index2D)
482{
483   struct const_decl *decl = &ureg->const_decls2D[index2D];
484
485   assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
486
487   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
488      uint i = decl->nr_constant_ranges++;
489
490      decl->constant_range[i].first = first;
491      decl->constant_range[i].last = last;
492   }
493}
494
495
496/* A one-dimensional, depricated version of ureg_DECL_constant2D().
497 *
498 * Constant operands declared with this function must be addressed
499 * with a one-dimensional index.
500 */
501struct ureg_src
502ureg_DECL_constant(struct ureg_program *ureg,
503                   unsigned index)
504{
505   struct const_decl *decl = &ureg->const_decls;
506   unsigned minconst = index, maxconst = index;
507   unsigned i;
508
509   /* Inside existing range?
510    */
511   for (i = 0; i < decl->nr_constant_ranges; i++) {
512      if (decl->constant_range[i].first <= index &&
513          decl->constant_range[i].last >= index) {
514         goto out;
515      }
516   }
517
518   /* Extend existing range?
519    */
520   for (i = 0; i < decl->nr_constant_ranges; i++) {
521      if (decl->constant_range[i].last == index - 1) {
522         decl->constant_range[i].last = index;
523         goto out;
524      }
525
526      if (decl->constant_range[i].first == index + 1) {
527         decl->constant_range[i].first = index;
528         goto out;
529      }
530
531      minconst = MIN2(minconst, decl->constant_range[i].first);
532      maxconst = MAX2(maxconst, decl->constant_range[i].last);
533   }
534
535   /* Create new range?
536    */
537   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
538      i = decl->nr_constant_ranges++;
539      decl->constant_range[i].first = index;
540      decl->constant_range[i].last = index;
541      goto out;
542   }
543
544   /* Collapse all ranges down to one:
545    */
546   i = 0;
547   decl->constant_range[0].first = minconst;
548   decl->constant_range[0].last = maxconst;
549   decl->nr_constant_ranges = 1;
550
551out:
552   assert(i < decl->nr_constant_ranges);
553   assert(decl->constant_range[i].first <= index);
554   assert(decl->constant_range[i].last >= index);
555   return ureg_src_register(TGSI_FILE_CONSTANT, index);
556}
557
558static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
559                                        boolean local )
560{
561   unsigned i;
562
563   /* Look for a released temporary.
564    */
565   for (i = util_bitmask_get_first_index(ureg->free_temps);
566        i != UTIL_BITMASK_INVALID_INDEX;
567        i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
568      if (util_bitmask_get(ureg->local_temps, i) == local)
569         break;
570   }
571
572   /* Or allocate a new one.
573    */
574   if (i == UTIL_BITMASK_INVALID_INDEX) {
575      i = ureg->nr_temps++;
576
577      if (local)
578         util_bitmask_set(ureg->local_temps, i);
579
580      /* Start a new declaration when the local flag changes */
581      if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
582         util_bitmask_set(ureg->decl_temps, i);
583   }
584
585   util_bitmask_clear(ureg->free_temps, i);
586
587   return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
588}
589
590struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
591{
592   return alloc_temporary(ureg, FALSE);
593}
594
595struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
596{
597   return alloc_temporary(ureg, TRUE);
598}
599
600struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
601                                           unsigned size,
602                                           boolean local )
603{
604   unsigned i = ureg->nr_temps;
605   struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
606
607   if (local)
608      util_bitmask_set(ureg->local_temps, i);
609
610   /* Always start a new declaration at the start */
611   util_bitmask_set(ureg->decl_temps, i);
612
613   ureg->nr_temps += size;
614
615   /* and also at the end of the array */
616   util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
617
618   if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
619      ureg->array_temps[ureg->nr_array_temps++] = i;
620      dst.ArrayID = ureg->nr_array_temps;
621   }
622
623   return dst;
624}
625
626void ureg_release_temporary( struct ureg_program *ureg,
627                             struct ureg_dst tmp )
628{
629   if(tmp.File == TGSI_FILE_TEMPORARY)
630      util_bitmask_set(ureg->free_temps, tmp.Index);
631}
632
633
634/* Allocate a new address register.
635 */
636struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
637{
638   if (ureg->nr_addrs < UREG_MAX_ADDR)
639      return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
640
641   assert( 0 );
642   return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
643}
644
645/* Allocate a new predicate register.
646 */
647struct ureg_dst
648ureg_DECL_predicate(struct ureg_program *ureg)
649{
650   if (ureg->nr_preds < UREG_MAX_PRED) {
651      return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++);
652   }
653
654   assert(0);
655   return ureg_dst_register(TGSI_FILE_PREDICATE, 0);
656}
657
658/* Allocate a new sampler.
659 */
660struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
661                                   unsigned nr )
662{
663   unsigned i;
664
665   for (i = 0; i < ureg->nr_samplers; i++)
666      if (ureg->sampler[i].Index == nr)
667         return ureg->sampler[i];
668
669   if (i < PIPE_MAX_SAMPLERS) {
670      ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
671      ureg->nr_samplers++;
672      return ureg->sampler[i];
673   }
674
675   assert( 0 );
676   return ureg->sampler[0];
677}
678
679/*
680 * Allocate a new shader sampler view.
681 */
682struct ureg_src
683ureg_DECL_sampler_view(struct ureg_program *ureg,
684                       unsigned index,
685                       unsigned target,
686                       unsigned return_type_x,
687                       unsigned return_type_y,
688                       unsigned return_type_z,
689                       unsigned return_type_w)
690{
691   struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
692   uint i;
693
694   for (i = 0; i < ureg->nr_sampler_views; i++) {
695      if (ureg->sampler_view[i].index == index) {
696         return reg;
697      }
698   }
699
700   if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
701      ureg->sampler_view[i].index = index;
702      ureg->sampler_view[i].target = target;
703      ureg->sampler_view[i].return_type_x = return_type_x;
704      ureg->sampler_view[i].return_type_y = return_type_y;
705      ureg->sampler_view[i].return_type_z = return_type_z;
706      ureg->sampler_view[i].return_type_w = return_type_w;
707      ureg->nr_sampler_views++;
708      return reg;
709   }
710
711   assert(0);
712   return reg;
713}
714
715static int
716match_or_expand_immediate( const unsigned *v,
717                           unsigned nr,
718                           unsigned *v2,
719                           unsigned *pnr2,
720                           unsigned *swizzle )
721{
722   unsigned nr2 = *pnr2;
723   unsigned i, j;
724
725   *swizzle = 0;
726
727   for (i = 0; i < nr; i++) {
728      boolean found = FALSE;
729
730      for (j = 0; j < nr2 && !found; j++) {
731         if (v[i] == v2[j]) {
732            *swizzle |= j << (i * 2);
733            found = TRUE;
734         }
735      }
736
737      if (!found) {
738         if (nr2 >= 4) {
739            return FALSE;
740         }
741
742         v2[nr2] = v[i];
743         *swizzle |= nr2 << (i * 2);
744         nr2++;
745      }
746   }
747
748   /* Actually expand immediate only when fully succeeded.
749    */
750   *pnr2 = nr2;
751   return TRUE;
752}
753
754
755static struct ureg_src
756decl_immediate( struct ureg_program *ureg,
757                const unsigned *v,
758                unsigned nr,
759                unsigned type )
760{
761   unsigned i, j;
762   unsigned swizzle = 0;
763
764   /* Could do a first pass where we examine all existing immediates
765    * without expanding.
766    */
767
768   for (i = 0; i < ureg->nr_immediates; i++) {
769      if (ureg->immediate[i].type != type) {
770         continue;
771      }
772      if (match_or_expand_immediate(v,
773                                    nr,
774                                    ureg->immediate[i].value.u,
775                                    &ureg->immediate[i].nr,
776                                    &swizzle)) {
777         goto out;
778      }
779   }
780
781   if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
782      i = ureg->nr_immediates++;
783      ureg->immediate[i].type = type;
784      if (match_or_expand_immediate(v,
785                                    nr,
786                                    ureg->immediate[i].value.u,
787                                    &ureg->immediate[i].nr,
788                                    &swizzle)) {
789         goto out;
790      }
791   }
792
793   set_bad(ureg);
794
795out:
796   /* Make sure that all referenced elements are from this immediate.
797    * Has the effect of making size-one immediates into scalars.
798    */
799   for (j = nr; j < 4; j++) {
800      swizzle |= (swizzle & 0x3) << (j * 2);
801   }
802
803   return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
804                       (swizzle >> 0) & 0x3,
805                       (swizzle >> 2) & 0x3,
806                       (swizzle >> 4) & 0x3,
807                       (swizzle >> 6) & 0x3);
808}
809
810
811struct ureg_src
812ureg_DECL_immediate( struct ureg_program *ureg,
813                     const float *v,
814                     unsigned nr )
815{
816   union {
817      float f[4];
818      unsigned u[4];
819   } fu;
820   unsigned int i;
821
822   for (i = 0; i < nr; i++) {
823      fu.f[i] = v[i];
824   }
825
826   return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
827}
828
829
830struct ureg_src
831ureg_DECL_immediate_uint( struct ureg_program *ureg,
832                          const unsigned *v,
833                          unsigned nr )
834{
835   return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
836}
837
838
839struct ureg_src
840ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
841                                const unsigned *v,
842                                unsigned nr )
843{
844   uint index;
845   uint i;
846
847   if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
848      set_bad(ureg);
849      return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
850   }
851
852   index = ureg->nr_immediates;
853   ureg->nr_immediates += (nr + 3) / 4;
854
855   for (i = index; i < ureg->nr_immediates; i++) {
856      ureg->immediate[i].type = TGSI_IMM_UINT32;
857      ureg->immediate[i].nr = nr > 4 ? 4 : nr;
858      memcpy(ureg->immediate[i].value.u,
859             &v[(i - index) * 4],
860             ureg->immediate[i].nr * sizeof(uint));
861      nr -= 4;
862   }
863
864   return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
865}
866
867
868struct ureg_src
869ureg_DECL_immediate_int( struct ureg_program *ureg,
870                         const int *v,
871                         unsigned nr )
872{
873   return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
874}
875
876
877void
878ureg_emit_src( struct ureg_program *ureg,
879               struct ureg_src src )
880{
881   unsigned size = 1 + (src.Indirect ? 1 : 0) +
882                   (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
883
884   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
885   unsigned n = 0;
886
887   assert(src.File != TGSI_FILE_NULL);
888   assert(src.File < TGSI_FILE_COUNT);
889
890   out[n].value = 0;
891   out[n].src.File = src.File;
892   out[n].src.SwizzleX = src.SwizzleX;
893   out[n].src.SwizzleY = src.SwizzleY;
894   out[n].src.SwizzleZ = src.SwizzleZ;
895   out[n].src.SwizzleW = src.SwizzleW;
896   out[n].src.Index = src.Index;
897   out[n].src.Negate = src.Negate;
898   out[0].src.Absolute = src.Absolute;
899   n++;
900
901   if (src.Indirect) {
902      out[0].src.Indirect = 1;
903      out[n].value = 0;
904      out[n].ind.File = src.IndirectFile;
905      out[n].ind.Swizzle = src.IndirectSwizzle;
906      out[n].ind.Index = src.IndirectIndex;
907      out[n].ind.ArrayID = src.ArrayID;
908      n++;
909   }
910
911   if (src.Dimension) {
912      out[0].src.Dimension = 1;
913      out[n].dim.Dimension = 0;
914      out[n].dim.Padding = 0;
915      if (src.DimIndirect) {
916         out[n].dim.Indirect = 1;
917         out[n].dim.Index = src.DimensionIndex;
918         n++;
919         out[n].value = 0;
920         out[n].ind.File = src.DimIndFile;
921         out[n].ind.Swizzle = src.DimIndSwizzle;
922         out[n].ind.Index = src.DimIndIndex;
923         out[n].ind.ArrayID = src.ArrayID;
924      } else {
925         out[n].dim.Indirect = 0;
926         out[n].dim.Index = src.DimensionIndex;
927      }
928      n++;
929   }
930
931   assert(n == size);
932}
933
934
935void
936ureg_emit_dst( struct ureg_program *ureg,
937               struct ureg_dst dst )
938{
939   unsigned size = (1 +
940                    (dst.Indirect ? 1 : 0));
941
942   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
943   unsigned n = 0;
944
945   assert(dst.File != TGSI_FILE_NULL);
946   assert(dst.File != TGSI_FILE_CONSTANT);
947   assert(dst.File != TGSI_FILE_INPUT);
948   assert(dst.File != TGSI_FILE_SAMPLER);
949   assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
950   assert(dst.File != TGSI_FILE_IMMEDIATE);
951   assert(dst.File < TGSI_FILE_COUNT);
952
953   out[n].value = 0;
954   out[n].dst.File = dst.File;
955   out[n].dst.WriteMask = dst.WriteMask;
956   out[n].dst.Indirect = dst.Indirect;
957   out[n].dst.Index = dst.Index;
958   n++;
959
960   if (dst.Indirect) {
961      out[n].value = 0;
962      out[n].ind.File = dst.IndirectFile;
963      out[n].ind.Swizzle = dst.IndirectSwizzle;
964      out[n].ind.Index = dst.IndirectIndex;
965      out[n].ind.ArrayID = dst.ArrayID;
966      n++;
967   }
968
969   assert(n == size);
970}
971
972
973static void validate( unsigned opcode,
974                      unsigned nr_dst,
975                      unsigned nr_src )
976{
977#ifdef DEBUG
978   const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
979   assert(info);
980   if(info) {
981      assert(nr_dst == info->num_dst);
982      assert(nr_src == info->num_src);
983   }
984#endif
985}
986
987struct ureg_emit_insn_result
988ureg_emit_insn(struct ureg_program *ureg,
989               unsigned opcode,
990               boolean saturate,
991               boolean predicate,
992               boolean pred_negate,
993               unsigned pred_swizzle_x,
994               unsigned pred_swizzle_y,
995               unsigned pred_swizzle_z,
996               unsigned pred_swizzle_w,
997               unsigned num_dst,
998               unsigned num_src )
999{
1000   union tgsi_any_token *out;
1001   uint count = predicate ? 2 : 1;
1002   struct ureg_emit_insn_result result;
1003
1004   validate( opcode, num_dst, num_src );
1005
1006   out = get_tokens( ureg, DOMAIN_INSN, count );
1007   out[0].insn = tgsi_default_instruction();
1008   out[0].insn.Opcode = opcode;
1009   out[0].insn.Saturate = saturate;
1010   out[0].insn.NumDstRegs = num_dst;
1011   out[0].insn.NumSrcRegs = num_src;
1012
1013   result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
1014   result.extended_token = result.insn_token;
1015
1016   if (predicate) {
1017      out[0].insn.Predicate = 1;
1018      out[1].insn_predicate = tgsi_default_instruction_predicate();
1019      out[1].insn_predicate.Negate = pred_negate;
1020      out[1].insn_predicate.SwizzleX = pred_swizzle_x;
1021      out[1].insn_predicate.SwizzleY = pred_swizzle_y;
1022      out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
1023      out[1].insn_predicate.SwizzleW = pred_swizzle_w;
1024   }
1025
1026   ureg->nr_instructions++;
1027
1028   return result;
1029}
1030
1031
1032void
1033ureg_emit_label(struct ureg_program *ureg,
1034                unsigned extended_token,
1035                unsigned *label_token )
1036{
1037   union tgsi_any_token *out, *insn;
1038
1039   if(!label_token)
1040      return;
1041
1042   out = get_tokens( ureg, DOMAIN_INSN, 1 );
1043   out[0].value = 0;
1044
1045   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1046   insn->insn.Label = 1;
1047
1048   *label_token = ureg->domain[DOMAIN_INSN].count - 1;
1049}
1050
1051/* Will return a number which can be used in a label to point to the
1052 * next instruction to be emitted.
1053 */
1054unsigned
1055ureg_get_instruction_number( struct ureg_program *ureg )
1056{
1057   return ureg->nr_instructions;
1058}
1059
1060/* Patch a given label (expressed as a token number) to point to a
1061 * given instruction (expressed as an instruction number).
1062 */
1063void
1064ureg_fixup_label(struct ureg_program *ureg,
1065                 unsigned label_token,
1066                 unsigned instruction_number )
1067{
1068   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
1069
1070   out->insn_label.Label = instruction_number;
1071}
1072
1073
1074void
1075ureg_emit_texture(struct ureg_program *ureg,
1076                  unsigned extended_token,
1077                  unsigned target, unsigned num_offsets)
1078{
1079   union tgsi_any_token *out, *insn;
1080
1081   out = get_tokens( ureg, DOMAIN_INSN, 1 );
1082   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1083
1084   insn->insn.Texture = 1;
1085
1086   out[0].value = 0;
1087   out[0].insn_texture.Texture = target;
1088   out[0].insn_texture.NumOffsets = num_offsets;
1089}
1090
1091void
1092ureg_emit_texture_offset(struct ureg_program *ureg,
1093                         const struct tgsi_texture_offset *offset)
1094{
1095   union tgsi_any_token *out;
1096
1097   out = get_tokens( ureg, DOMAIN_INSN, 1);
1098
1099   out[0].value = 0;
1100   out[0].insn_texture_offset = *offset;
1101
1102}
1103
1104
1105void
1106ureg_fixup_insn_size(struct ureg_program *ureg,
1107                     unsigned insn )
1108{
1109   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
1110
1111   assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
1112   out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
1113}
1114
1115
1116void
1117ureg_insn(struct ureg_program *ureg,
1118          unsigned opcode,
1119          const struct ureg_dst *dst,
1120          unsigned nr_dst,
1121          const struct ureg_src *src,
1122          unsigned nr_src )
1123{
1124   struct ureg_emit_insn_result insn;
1125   unsigned i;
1126   boolean saturate;
1127   boolean predicate;
1128   boolean negate = FALSE;
1129   unsigned swizzle[4] = { 0 };
1130
1131   if (nr_dst && ureg_dst_is_empty(dst[0])) {
1132      return;
1133   }
1134
1135   saturate = nr_dst ? dst[0].Saturate : FALSE;
1136   predicate = nr_dst ? dst[0].Predicate : FALSE;
1137   if (predicate) {
1138      negate = dst[0].PredNegate;
1139      swizzle[0] = dst[0].PredSwizzleX;
1140      swizzle[1] = dst[0].PredSwizzleY;
1141      swizzle[2] = dst[0].PredSwizzleZ;
1142      swizzle[3] = dst[0].PredSwizzleW;
1143   }
1144
1145   insn = ureg_emit_insn(ureg,
1146                         opcode,
1147                         saturate,
1148                         predicate,
1149                         negate,
1150                         swizzle[0],
1151                         swizzle[1],
1152                         swizzle[2],
1153                         swizzle[3],
1154                         nr_dst,
1155                         nr_src);
1156
1157   for (i = 0; i < nr_dst; i++)
1158      ureg_emit_dst( ureg, dst[i] );
1159
1160   for (i = 0; i < nr_src; i++)
1161      ureg_emit_src( ureg, src[i] );
1162
1163   ureg_fixup_insn_size( ureg, insn.insn_token );
1164}
1165
1166void
1167ureg_tex_insn(struct ureg_program *ureg,
1168              unsigned opcode,
1169              const struct ureg_dst *dst,
1170              unsigned nr_dst,
1171              unsigned target,
1172              const struct tgsi_texture_offset *texoffsets,
1173              unsigned nr_offset,
1174              const struct ureg_src *src,
1175              unsigned nr_src )
1176{
1177   struct ureg_emit_insn_result insn;
1178   unsigned i;
1179   boolean saturate;
1180   boolean predicate;
1181   boolean negate = FALSE;
1182   unsigned swizzle[4] = { 0 };
1183
1184   if (nr_dst && ureg_dst_is_empty(dst[0])) {
1185      return;
1186   }
1187
1188   saturate = nr_dst ? dst[0].Saturate : FALSE;
1189   predicate = nr_dst ? dst[0].Predicate : FALSE;
1190   if (predicate) {
1191      negate = dst[0].PredNegate;
1192      swizzle[0] = dst[0].PredSwizzleX;
1193      swizzle[1] = dst[0].PredSwizzleY;
1194      swizzle[2] = dst[0].PredSwizzleZ;
1195      swizzle[3] = dst[0].PredSwizzleW;
1196   }
1197
1198   insn = ureg_emit_insn(ureg,
1199                         opcode,
1200                         saturate,
1201                         predicate,
1202                         negate,
1203                         swizzle[0],
1204                         swizzle[1],
1205                         swizzle[2],
1206                         swizzle[3],
1207                         nr_dst,
1208                         nr_src);
1209
1210   ureg_emit_texture( ureg, insn.extended_token, target, nr_offset );
1211
1212   for (i = 0; i < nr_offset; i++)
1213      ureg_emit_texture_offset( ureg, &texoffsets[i]);
1214
1215   for (i = 0; i < nr_dst; i++)
1216      ureg_emit_dst( ureg, dst[i] );
1217
1218   for (i = 0; i < nr_src; i++)
1219      ureg_emit_src( ureg, src[i] );
1220
1221   ureg_fixup_insn_size( ureg, insn.insn_token );
1222}
1223
1224
1225void
1226ureg_label_insn(struct ureg_program *ureg,
1227                unsigned opcode,
1228                const struct ureg_src *src,
1229                unsigned nr_src,
1230                unsigned *label_token )
1231{
1232   struct ureg_emit_insn_result insn;
1233   unsigned i;
1234
1235   insn = ureg_emit_insn(ureg,
1236                         opcode,
1237                         FALSE,
1238                         FALSE,
1239                         FALSE,
1240                         TGSI_SWIZZLE_X,
1241                         TGSI_SWIZZLE_Y,
1242                         TGSI_SWIZZLE_Z,
1243                         TGSI_SWIZZLE_W,
1244                         0,
1245                         nr_src);
1246
1247   ureg_emit_label( ureg, insn.extended_token, label_token );
1248
1249   for (i = 0; i < nr_src; i++)
1250      ureg_emit_src( ureg, src[i] );
1251
1252   ureg_fixup_insn_size( ureg, insn.insn_token );
1253}
1254
1255
1256static void
1257emit_decl_semantic(struct ureg_program *ureg,
1258                   unsigned file,
1259                   unsigned index,
1260                   unsigned semantic_name,
1261                   unsigned semantic_index,
1262                   unsigned usage_mask)
1263{
1264   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1265
1266   out[0].value = 0;
1267   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1268   out[0].decl.NrTokens = 3;
1269   out[0].decl.File = file;
1270   out[0].decl.UsageMask = usage_mask;
1271   out[0].decl.Semantic = 1;
1272
1273   out[1].value = 0;
1274   out[1].decl_range.First = index;
1275   out[1].decl_range.Last = index;
1276
1277   out[2].value = 0;
1278   out[2].decl_semantic.Name = semantic_name;
1279   out[2].decl_semantic.Index = semantic_index;
1280}
1281
1282
1283static void
1284emit_decl_fs(struct ureg_program *ureg,
1285             unsigned file,
1286             unsigned index,
1287             unsigned semantic_name,
1288             unsigned semantic_index,
1289             unsigned interpolate,
1290             unsigned cylindrical_wrap,
1291             unsigned interpolate_location)
1292{
1293   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4);
1294
1295   out[0].value = 0;
1296   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1297   out[0].decl.NrTokens = 4;
1298   out[0].decl.File = file;
1299   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
1300   out[0].decl.Interpolate = 1;
1301   out[0].decl.Semantic = 1;
1302
1303   out[1].value = 0;
1304   out[1].decl_range.First = index;
1305   out[1].decl_range.Last = index;
1306
1307   out[2].value = 0;
1308   out[2].decl_interp.Interpolate = interpolate;
1309   out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
1310   out[2].decl_interp.Location = interpolate_location;
1311
1312   out[3].value = 0;
1313   out[3].decl_semantic.Name = semantic_name;
1314   out[3].decl_semantic.Index = semantic_index;
1315}
1316
1317static void
1318emit_decl_temps( struct ureg_program *ureg,
1319                 unsigned first, unsigned last,
1320                 boolean local,
1321                 unsigned arrayid )
1322{
1323   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
1324                                           arrayid ? 3 : 2 );
1325
1326   out[0].value = 0;
1327   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1328   out[0].decl.NrTokens = 2;
1329   out[0].decl.File = TGSI_FILE_TEMPORARY;
1330   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1331   out[0].decl.Local = local;
1332
1333   out[1].value = 0;
1334   out[1].decl_range.First = first;
1335   out[1].decl_range.Last = last;
1336
1337   if (arrayid) {
1338      out[0].decl.Array = 1;
1339      out[2].value = 0;
1340      out[2].array.ArrayID = arrayid;
1341   }
1342}
1343
1344static void emit_decl_range( struct ureg_program *ureg,
1345                             unsigned file,
1346                             unsigned first,
1347                             unsigned count )
1348{
1349   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1350
1351   out[0].value = 0;
1352   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1353   out[0].decl.NrTokens = 2;
1354   out[0].decl.File = file;
1355   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1356   out[0].decl.Semantic = 0;
1357
1358   out[1].value = 0;
1359   out[1].decl_range.First = first;
1360   out[1].decl_range.Last = first + count - 1;
1361}
1362
1363static void
1364emit_decl_range2D(struct ureg_program *ureg,
1365                  unsigned file,
1366                  unsigned first,
1367                  unsigned last,
1368                  unsigned index2D)
1369{
1370   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1371
1372   out[0].value = 0;
1373   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1374   out[0].decl.NrTokens = 3;
1375   out[0].decl.File = file;
1376   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1377   out[0].decl.Dimension = 1;
1378
1379   out[1].value = 0;
1380   out[1].decl_range.First = first;
1381   out[1].decl_range.Last = last;
1382
1383   out[2].value = 0;
1384   out[2].decl_dim.Index2D = index2D;
1385}
1386
1387static void
1388emit_decl_sampler_view(struct ureg_program *ureg,
1389                       unsigned index,
1390                       unsigned target,
1391                       unsigned return_type_x,
1392                       unsigned return_type_y,
1393                       unsigned return_type_z,
1394                       unsigned return_type_w )
1395{
1396   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1397
1398   out[0].value = 0;
1399   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1400   out[0].decl.NrTokens = 3;
1401   out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
1402   out[0].decl.UsageMask = 0xf;
1403
1404   out[1].value = 0;
1405   out[1].decl_range.First = index;
1406   out[1].decl_range.Last = index;
1407
1408   out[2].value = 0;
1409   out[2].decl_sampler_view.Resource    = target;
1410   out[2].decl_sampler_view.ReturnTypeX = return_type_x;
1411   out[2].decl_sampler_view.ReturnTypeY = return_type_y;
1412   out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
1413   out[2].decl_sampler_view.ReturnTypeW = return_type_w;
1414}
1415
1416static void
1417emit_immediate( struct ureg_program *ureg,
1418                const unsigned *v,
1419                unsigned type )
1420{
1421   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1422
1423   out[0].value = 0;
1424   out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1425   out[0].imm.NrTokens = 5;
1426   out[0].imm.DataType = type;
1427   out[0].imm.Padding = 0;
1428
1429   out[1].imm_data.Uint = v[0];
1430   out[2].imm_data.Uint = v[1];
1431   out[3].imm_data.Uint = v[2];
1432   out[4].imm_data.Uint = v[3];
1433}
1434
1435static void
1436emit_property(struct ureg_program *ureg,
1437              unsigned name,
1438              unsigned data)
1439{
1440   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1441
1442   out[0].value = 0;
1443   out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1444   out[0].prop.NrTokens = 2;
1445   out[0].prop.PropertyName = name;
1446
1447   out[1].prop_data.Data = data;
1448}
1449
1450
1451static void emit_decls( struct ureg_program *ureg )
1452{
1453   unsigned i;
1454
1455   if (ureg->property_gs_input_prim != ~0) {
1456      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1457
1458      emit_property(ureg,
1459                    TGSI_PROPERTY_GS_INPUT_PRIM,
1460                    ureg->property_gs_input_prim);
1461   }
1462
1463   if (ureg->property_gs_output_prim != ~0) {
1464      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1465
1466      emit_property(ureg,
1467                    TGSI_PROPERTY_GS_OUTPUT_PRIM,
1468                    ureg->property_gs_output_prim);
1469   }
1470
1471   if (ureg->property_gs_max_vertices != ~0) {
1472      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1473
1474      emit_property(ureg,
1475                    TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1476                    ureg->property_gs_max_vertices);
1477   }
1478
1479   if (ureg->property_gs_invocations != ~0) {
1480      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1481
1482      emit_property(ureg,
1483                    TGSI_PROPERTY_GS_INVOCATIONS,
1484                    ureg->property_gs_invocations);
1485   }
1486
1487   if (ureg->property_fs_coord_origin) {
1488      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1489
1490      emit_property(ureg,
1491                    TGSI_PROPERTY_FS_COORD_ORIGIN,
1492                    ureg->property_fs_coord_origin);
1493   }
1494
1495   if (ureg->property_fs_coord_pixel_center) {
1496      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1497
1498      emit_property(ureg,
1499                    TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
1500                    ureg->property_fs_coord_pixel_center);
1501   }
1502
1503   if (ureg->property_fs_color0_writes_all_cbufs) {
1504      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1505
1506      emit_property(ureg,
1507                    TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS,
1508                    ureg->property_fs_color0_writes_all_cbufs);
1509   }
1510
1511   if (ureg->property_fs_depth_layout) {
1512      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1513
1514      emit_property(ureg,
1515                    TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1516                    ureg->property_fs_depth_layout);
1517   }
1518
1519   if (ureg->property_vs_window_space_position) {
1520      assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
1521
1522      emit_property(ureg,
1523                    TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION,
1524                    ureg->property_vs_window_space_position);
1525   }
1526
1527   if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
1528      for (i = 0; i < UREG_MAX_INPUT; i++) {
1529         if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
1530            emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1531         }
1532      }
1533   } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
1534      for (i = 0; i < ureg->nr_fs_inputs; i++) {
1535         emit_decl_fs(ureg,
1536                      TGSI_FILE_INPUT,
1537                      i,
1538                      ureg->fs_input[i].semantic_name,
1539                      ureg->fs_input[i].semantic_index,
1540                      ureg->fs_input[i].interp,
1541                      ureg->fs_input[i].cylindrical_wrap,
1542                      ureg->fs_input[i].interp_location);
1543      }
1544   } else {
1545      for (i = 0; i < ureg->nr_gs_inputs; i++) {
1546         emit_decl_semantic(ureg,
1547                            TGSI_FILE_INPUT,
1548                            ureg->gs_input[i].index,
1549                            ureg->gs_input[i].semantic_name,
1550                            ureg->gs_input[i].semantic_index,
1551                            TGSI_WRITEMASK_XYZW);
1552      }
1553   }
1554
1555   for (i = 0; i < ureg->nr_system_values; i++) {
1556      emit_decl_semantic(ureg,
1557                         TGSI_FILE_SYSTEM_VALUE,
1558                         ureg->system_value[i].index,
1559                         ureg->system_value[i].semantic_name,
1560                         ureg->system_value[i].semantic_index,
1561                         TGSI_WRITEMASK_XYZW);
1562   }
1563
1564   for (i = 0; i < ureg->nr_outputs; i++) {
1565      emit_decl_semantic(ureg,
1566                         TGSI_FILE_OUTPUT,
1567                         i,
1568                         ureg->output[i].semantic_name,
1569                         ureg->output[i].semantic_index,
1570                         ureg->output[i].usage_mask);
1571   }
1572
1573   for (i = 0; i < ureg->nr_samplers; i++) {
1574      emit_decl_range( ureg,
1575                       TGSI_FILE_SAMPLER,
1576                       ureg->sampler[i].Index, 1 );
1577   }
1578
1579   for (i = 0; i < ureg->nr_sampler_views; i++) {
1580      emit_decl_sampler_view(ureg,
1581                             ureg->sampler_view[i].index,
1582                             ureg->sampler_view[i].target,
1583                             ureg->sampler_view[i].return_type_x,
1584                             ureg->sampler_view[i].return_type_y,
1585                             ureg->sampler_view[i].return_type_z,
1586                             ureg->sampler_view[i].return_type_w);
1587   }
1588
1589   if (ureg->const_decls.nr_constant_ranges) {
1590      for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
1591         emit_decl_range(ureg,
1592                         TGSI_FILE_CONSTANT,
1593                         ureg->const_decls.constant_range[i].first,
1594                         ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1);
1595      }
1596   }
1597
1598   for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
1599      struct const_decl *decl = &ureg->const_decls2D[i];
1600
1601      if (decl->nr_constant_ranges) {
1602         uint j;
1603
1604         for (j = 0; j < decl->nr_constant_ranges; j++) {
1605            emit_decl_range2D(ureg,
1606                              TGSI_FILE_CONSTANT,
1607                              decl->constant_range[j].first,
1608                              decl->constant_range[j].last,
1609                              i);
1610         }
1611      }
1612   }
1613
1614   if (ureg->nr_temps) {
1615      unsigned array = 0;
1616      for (i = 0; i < ureg->nr_temps;) {
1617         boolean local = util_bitmask_get(ureg->local_temps, i);
1618         unsigned first = i;
1619         i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
1620         if (i == UTIL_BITMASK_INVALID_INDEX)
1621            i = ureg->nr_temps;
1622
1623         if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
1624            emit_decl_temps( ureg, first, i - 1, local, ++array );
1625         else
1626            emit_decl_temps( ureg, first, i - 1, local, 0 );
1627      }
1628   }
1629
1630   if (ureg->nr_addrs) {
1631      emit_decl_range( ureg,
1632                       TGSI_FILE_ADDRESS,
1633                       0, ureg->nr_addrs );
1634   }
1635
1636   if (ureg->nr_preds) {
1637      emit_decl_range(ureg,
1638                      TGSI_FILE_PREDICATE,
1639                      0,
1640                      ureg->nr_preds);
1641   }
1642
1643   for (i = 0; i < ureg->nr_immediates; i++) {
1644      emit_immediate( ureg,
1645                      ureg->immediate[i].value.u,
1646                      ureg->immediate[i].type );
1647   }
1648}
1649
1650/* Append the instruction tokens onto the declarations to build a
1651 * contiguous stream suitable to send to the driver.
1652 */
1653static void copy_instructions( struct ureg_program *ureg )
1654{
1655   unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
1656   union tgsi_any_token *out = get_tokens( ureg,
1657                                           DOMAIN_DECL,
1658                                           nr_tokens );
1659
1660   memcpy(out,
1661          ureg->domain[DOMAIN_INSN].tokens,
1662          nr_tokens * sizeof out[0] );
1663}
1664
1665
1666static void
1667fixup_header_size(struct ureg_program *ureg)
1668{
1669   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
1670
1671   out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
1672}
1673
1674
1675static void
1676emit_header( struct ureg_program *ureg )
1677{
1678   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1679
1680   out[0].header.HeaderSize = 2;
1681   out[0].header.BodySize = 0;
1682
1683   out[1].processor.Processor = ureg->processor;
1684   out[1].processor.Padding = 0;
1685}
1686
1687
1688const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
1689{
1690   const struct tgsi_token *tokens;
1691
1692   emit_header( ureg );
1693   emit_decls( ureg );
1694   copy_instructions( ureg );
1695   fixup_header_size( ureg );
1696
1697   if (ureg->domain[0].tokens == error_tokens ||
1698       ureg->domain[1].tokens == error_tokens) {
1699      debug_printf("%s: error in generated shader\n", __FUNCTION__);
1700      assert(0);
1701      return NULL;
1702   }
1703
1704   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1705
1706   if (0) {
1707      debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
1708                   ureg->domain[DOMAIN_DECL].count);
1709      tgsi_dump( tokens, 0 );
1710   }
1711
1712#if DEBUG
1713   if (tokens && !tgsi_sanity_check(tokens)) {
1714      debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
1715      tgsi_dump(tokens, 0);
1716      assert(0);
1717   }
1718#endif
1719
1720
1721   return tokens;
1722}
1723
1724
1725void *ureg_create_shader( struct ureg_program *ureg,
1726                          struct pipe_context *pipe,
1727                          const struct pipe_stream_output_info *so )
1728{
1729   struct pipe_shader_state state;
1730
1731   state.tokens = ureg_finalize(ureg);
1732   if(!state.tokens)
1733      return NULL;
1734
1735   if (so)
1736      state.stream_output = *so;
1737   else
1738      memset(&state.stream_output, 0, sizeof(state.stream_output));
1739
1740   if (ureg->processor == TGSI_PROCESSOR_VERTEX)
1741      return pipe->create_vs_state( pipe, &state );
1742   else
1743      return pipe->create_fs_state( pipe, &state );
1744}
1745
1746
1747const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
1748                                          unsigned *nr_tokens )
1749{
1750   const struct tgsi_token *tokens;
1751
1752   ureg_finalize(ureg);
1753
1754   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1755
1756   if (nr_tokens)
1757      *nr_tokens = ureg->domain[DOMAIN_DECL].size;
1758
1759   ureg->domain[DOMAIN_DECL].tokens = 0;
1760   ureg->domain[DOMAIN_DECL].size = 0;
1761   ureg->domain[DOMAIN_DECL].order = 0;
1762   ureg->domain[DOMAIN_DECL].count = 0;
1763
1764   return tokens;
1765}
1766
1767
1768void ureg_free_tokens( const struct tgsi_token *tokens )
1769{
1770   FREE((struct tgsi_token *)tokens);
1771}
1772
1773
1774struct ureg_program *ureg_create( unsigned processor )
1775{
1776   struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
1777   if (ureg == NULL)
1778      goto no_ureg;
1779
1780   ureg->processor = processor;
1781   ureg->property_gs_input_prim = ~0;
1782   ureg->property_gs_output_prim = ~0;
1783   ureg->property_gs_max_vertices = ~0;
1784   ureg->property_gs_invocations = ~0;
1785
1786   ureg->free_temps = util_bitmask_create();
1787   if (ureg->free_temps == NULL)
1788      goto no_free_temps;
1789
1790   ureg->local_temps = util_bitmask_create();
1791   if (ureg->local_temps == NULL)
1792      goto no_local_temps;
1793
1794   ureg->decl_temps = util_bitmask_create();
1795   if (ureg->decl_temps == NULL)
1796      goto no_decl_temps;
1797
1798   return ureg;
1799
1800no_decl_temps:
1801   util_bitmask_destroy(ureg->local_temps);
1802no_local_temps:
1803   util_bitmask_destroy(ureg->free_temps);
1804no_free_temps:
1805   FREE(ureg);
1806no_ureg:
1807   return NULL;
1808}
1809
1810
1811unsigned
1812ureg_get_nr_outputs( const struct ureg_program *ureg )
1813{
1814   if (!ureg)
1815      return 0;
1816   return ureg->nr_outputs;
1817}
1818
1819
1820void ureg_destroy( struct ureg_program *ureg )
1821{
1822   unsigned i;
1823
1824   for (i = 0; i < Elements(ureg->domain); i++) {
1825      if (ureg->domain[i].tokens &&
1826          ureg->domain[i].tokens != error_tokens)
1827         FREE(ureg->domain[i].tokens);
1828   }
1829
1830   util_bitmask_destroy(ureg->free_temps);
1831   util_bitmask_destroy(ureg->local_temps);
1832   util_bitmask_destroy(ureg->decl_temps);
1833
1834   FREE(ureg);
1835}
1836