1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_debug.h"
59#include "lp_bld_sample.h"
60
61
62/**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66static LLVMValueRef
67swizzle_aos(struct lp_build_tgsi_context *bld_base,
68            LLVMValueRef a,
69            unsigned swizzle_x,
70            unsigned swizzle_y,
71            unsigned swizzle_z,
72            unsigned swizzle_w)
73{
74   unsigned char swizzles[4];
75   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77   assert(swizzle_x < 4);
78   assert(swizzle_y < 4);
79   assert(swizzle_z < 4);
80   assert(swizzle_w < 4);
81
82   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88}
89
90
91static LLVMValueRef
92swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                   LLVMValueRef a,
94                   unsigned chan)
95{
96   chan = bld->swizzles[chan];
97   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98}
99
100
101static LLVMValueRef
102emit_fetch_constant(
103   struct lp_build_tgsi_context * bld_base,
104   const struct tgsi_full_src_register * reg,
105   enum tgsi_opcode_type stype,
106   unsigned swizzle)
107{
108   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110   struct lp_type type = bld_base->base.type;
111   LLVMValueRef res;
112   unsigned chan;
113
114   assert(!reg->Register.Indirect);
115
116   /*
117    * Get the constants components
118    */
119
120   res = bld->bld_base.base.undef;
121   for (chan = 0; chan < 4; ++chan) {
122      LLVMValueRef index;
123      LLVMValueRef scalar_ptr;
124      LLVMValueRef scalar;
125      LLVMValueRef swizzle;
126
127      index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                   reg->Register.Index * 4 + chan);
129
130      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136      /*
137       * NOTE: constants array is always assumed to be RGBA
138       */
139
140      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                     bld->swizzles[chan]);
142
143      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144   }
145
146   /*
147    * Broadcast the first quaternion to all others.
148    *
149    * XXX: could be factored into a reusable function.
150    */
151
152   if (type.length > 4) {
153      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154      unsigned i;
155
156      for (chan = 0; chan < 4; ++chan) {
157         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158      }
159
160      for (i = 4; i < type.length; ++i) {
161         shuffles[i] = shuffles[i % 4];
162      }
163
164      res = LLVMBuildShuffleVector(builder,
165                                   res, bld->bld_base.base.undef,
166                                   LLVMConstVector(shuffles, type.length),
167                                   "");
168   }
169   return res;
170}
171
172static LLVMValueRef
173emit_fetch_immediate(
174   struct lp_build_tgsi_context * bld_base,
175   const struct tgsi_full_src_register * reg,
176   enum tgsi_opcode_type stype,
177   unsigned swizzle)
178{
179   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180   LLVMValueRef res = bld->immediates[reg->Register.Index];
181   assert(res);
182   return res;
183}
184
185static LLVMValueRef
186emit_fetch_input(
187   struct lp_build_tgsi_context * bld_base,
188   const struct tgsi_full_src_register * reg,
189   enum tgsi_opcode_type stype,
190   unsigned swizzle)
191{
192   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193   LLVMValueRef res = bld->inputs[reg->Register.Index];
194   assert(!reg->Register.Indirect);
195   assert(res);
196   return res;
197}
198
199static LLVMValueRef
200emit_fetch_temporary(
201   struct lp_build_tgsi_context * bld_base,
202   const struct tgsi_full_src_register * reg,
203   enum tgsi_opcode_type stype,
204   unsigned swizzle)
205{
206   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210   assert(!reg->Register.Indirect);
211   if (!res)
212      return bld->bld_base.base.undef;
213
214   return res;
215}
216
217/**
218 * Register store.
219 */
220void
221lp_emit_store_aos(
222   struct lp_build_tgsi_aos_context *bld,
223   const struct tgsi_full_instruction *inst,
224   unsigned index,
225   LLVMValueRef value)
226{
227   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229   LLVMValueRef mask = NULL;
230   LLVMValueRef ptr;
231
232   /*
233    * Saturate the value
234    */
235   if (inst->Instruction.Saturate) {
236      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238   }
239
240   /*
241    * Translate the register file
242    */
243
244   assert(!reg->Register.Indirect);
245
246   switch (reg->Register.File) {
247   case TGSI_FILE_OUTPUT:
248      ptr = bld->outputs[reg->Register.Index];
249      break;
250
251   case TGSI_FILE_TEMPORARY:
252      ptr = bld->temps[reg->Register.Index];
253      break;
254
255   case TGSI_FILE_ADDRESS:
256      ptr = bld->addr[reg->Indirect.Index];
257      break;
258
259   default:
260      assert(0);
261      return;
262   }
263
264   if (!ptr)
265      return;
266
267   /*
268    * Writemask
269    */
270
271   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
272      LLVMValueRef writemask;
273
274      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
275                                                   bld->bld_base.base.type,
276                                                   reg->Register.WriteMask,
277                                                   TGSI_NUM_CHANNELS,
278                                                   bld->swizzles);
279
280      if (mask) {
281         mask = LLVMBuildAnd(builder, mask, writemask, "");
282      } else {
283         mask = writemask;
284      }
285   }
286
287   if (mask) {
288      LLVMValueRef orig_value;
289
290      orig_value = LLVMBuildLoad(builder, ptr, "");
291      value = lp_build_select(&bld->bld_base.base,
292                              mask, value, orig_value);
293   }
294
295   LLVMBuildStore(builder, value, ptr);
296}
297
298
299/**
300 * High-level instruction translators.
301 */
302
303static LLVMValueRef
304emit_tex(struct lp_build_tgsi_aos_context *bld,
305         const struct tgsi_full_instruction *inst,
306         enum lp_build_tex_modifier modifier)
307{
308   unsigned target;
309   unsigned unit;
310   LLVMValueRef coords;
311   struct lp_derivatives derivs = { {NULL}, {NULL} };
312
313   if (!bld->sampler) {
314      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
315      return bld->bld_base.base.undef;
316   }
317
318   target = inst->Texture.Texture;
319
320   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
321
322   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
323      /* probably not going to work */
324      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
325      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
326      unit = inst->Src[3].Register.Index;
327   }
328   else {
329      unit = inst->Src[1].Register.Index;
330   }
331   return bld->sampler->emit_fetch_texel(bld->sampler,
332                                         &bld->bld_base.base,
333                                         target, unit,
334                                         coords, derivs,
335                                         modifier);
336}
337
338
339static LLVMValueRef
340emit_sample(struct lp_build_tgsi_aos_context *bld,
341            const struct tgsi_full_instruction *inst,
342            enum lp_build_tex_modifier modifier)
343{
344   unsigned target;
345   unsigned unit;
346   LLVMValueRef coords;
347   struct lp_derivatives derivs = { {NULL}, {NULL} };
348
349   if (!bld->sampler) {
350      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
351      return bld->bld_base.base.undef;
352   }
353
354   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
355
356   /* ignore modifiers, can't handle different sampler / sampler view, etc... */
357   unit = inst->Src[1].Register.Index;
358   assert(inst->Src[2].Register.Index == unit);
359
360   target = bld->sv[unit].Resource;
361
362   return bld->sampler->emit_fetch_texel(bld->sampler,
363                                         &bld->bld_base.base,
364                                         target, unit,
365                                         coords, derivs,
366                                         modifier);
367}
368
369
370void
371lp_emit_declaration_aos(
372   struct lp_build_tgsi_aos_context *bld,
373   const struct tgsi_full_declaration *decl)
374{
375   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
376   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
377
378   unsigned first = decl->Range.First;
379   unsigned last = decl->Range.Last;
380   unsigned idx;
381
382   for (idx = first; idx <= last; ++idx) {
383      switch (decl->Declaration.File) {
384      case TGSI_FILE_TEMPORARY:
385         assert(idx < LP_MAX_INLINED_TEMPS);
386         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
387            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
388            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
389                                                     vec_type, array_size, "");
390         } else {
391            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
392         }
393         break;
394
395      case TGSI_FILE_OUTPUT:
396         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
397         break;
398
399      case TGSI_FILE_ADDRESS:
400         assert(idx < LP_MAX_TGSI_ADDRS);
401         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
402         break;
403
404      case TGSI_FILE_SAMPLER_VIEW:
405         /*
406          * The target stored here MUST match whatever there actually
407          * is in the set sampler views (what about return type?).
408          */
409         assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
410         for (idx = first; idx <= last; ++idx) {
411            bld->sv[idx] = decl->SamplerView;
412         }
413         break;
414
415      default:
416         /* don't need to declare other vars */
417         break;
418      }
419   }
420}
421
422
423/**
424 * Emit LLVM for one TGSI instruction.
425 * \param return TRUE for success, FALSE otherwise
426 */
427boolean
428lp_emit_instruction_aos(
429   struct lp_build_tgsi_aos_context *bld,
430   const struct tgsi_full_instruction *inst,
431   const struct tgsi_opcode_info *info,
432   int *pc)
433{
434   LLVMValueRef src0, src1, src2;
435   LLVMValueRef tmp0;
436   LLVMValueRef dst0 = NULL;
437
438   /*
439    * Stores and write masks are handled in a general fashion after the long
440    * instruction opcode switch statement.
441    *
442    * Although not stricitly necessary, we avoid generating instructions for
443    * channels which won't be stored, in cases where's that easy. For some
444    * complex instructions, like texture sampling, it is more convenient to
445    * assume a full writemask and then let LLVM optimization passes eliminate
446    * redundant code.
447    */
448
449   (*pc)++;
450
451   assert(info->num_dst <= 1);
452   if (info->num_dst) {
453      dst0 = bld->bld_base.base.undef;
454   }
455
456   switch (inst->Instruction.Opcode) {
457   case TGSI_OPCODE_ARL:
458      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
459      dst0 = lp_build_floor(&bld->bld_base.base, src0);
460      break;
461
462   case TGSI_OPCODE_MOV:
463      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
464      break;
465
466   case TGSI_OPCODE_LIT:
467      return FALSE;
468
469   case TGSI_OPCODE_RCP:
470   /* TGSI_OPCODE_RECIP */
471      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
472      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
473      break;
474
475   case TGSI_OPCODE_RSQ:
476   /* TGSI_OPCODE_RECIPSQRT */
477      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
478      tmp0 = lp_build_abs(&bld->bld_base.base, src0);
479      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
480      break;
481
482   case TGSI_OPCODE_EXP:
483      return FALSE;
484
485   case TGSI_OPCODE_LOG:
486      return FALSE;
487
488   case TGSI_OPCODE_MUL:
489      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
491      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
492      break;
493
494   case TGSI_OPCODE_ADD:
495      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
497      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
498      break;
499
500   case TGSI_OPCODE_DP3:
501   /* TGSI_OPCODE_DOT3 */
502      return FALSE;
503
504   case TGSI_OPCODE_DP4:
505   /* TGSI_OPCODE_DOT4 */
506      return FALSE;
507
508   case TGSI_OPCODE_DST:
509      return FALSE;
510
511   case TGSI_OPCODE_MIN:
512      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
513      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
514      dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
515      break;
516
517   case TGSI_OPCODE_MAX:
518      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
519      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
520      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
521      break;
522
523   case TGSI_OPCODE_SLT:
524   /* TGSI_OPCODE_SETLT */
525      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
526      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
527      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
528      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
529      break;
530
531   case TGSI_OPCODE_SGE:
532   /* TGSI_OPCODE_SETGE */
533      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
534      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
535      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
536      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
537      break;
538
539   case TGSI_OPCODE_MAD:
540   /* TGSI_OPCODE_MADD */
541      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
544      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
545      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
546      break;
547
548   case TGSI_OPCODE_LRP:
549      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
552      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
553      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
554      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
555      break;
556
557   case TGSI_OPCODE_FRC:
558      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
560      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
561      break;
562
563   case TGSI_OPCODE_FLR:
564      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565      dst0 = lp_build_floor(&bld->bld_base.base, src0);
566      break;
567
568   case TGSI_OPCODE_ROUND:
569      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
570      dst0 = lp_build_round(&bld->bld_base.base, src0);
571      break;
572
573   case TGSI_OPCODE_EX2:
574      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
575      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
576      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
577      break;
578
579   case TGSI_OPCODE_LG2:
580      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
581      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
582      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
583      break;
584
585   case TGSI_OPCODE_POW:
586      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
587      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
588      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
590      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
591      break;
592
593   case TGSI_OPCODE_COS:
594      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
595      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
596      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
597      break;
598
599   case TGSI_OPCODE_DDX:
600      return FALSE;
601
602   case TGSI_OPCODE_DDY:
603      return FALSE;
604
605   case TGSI_OPCODE_KILL:
606      return FALSE;
607
608   case TGSI_OPCODE_KILL_IF:
609      return FALSE;
610
611   case TGSI_OPCODE_PK2H:
612      return FALSE;
613      break;
614
615   case TGSI_OPCODE_PK2US:
616      return FALSE;
617      break;
618
619   case TGSI_OPCODE_PK4B:
620      return FALSE;
621      break;
622
623   case TGSI_OPCODE_PK4UB:
624      return FALSE;
625
626   case TGSI_OPCODE_SEQ:
627      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
628      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
629      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
630      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
631      break;
632
633   case TGSI_OPCODE_SGT:
634      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
635      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
636      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
637      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
638      break;
639
640   case TGSI_OPCODE_SIN:
641      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
642      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
643      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
644      break;
645
646   case TGSI_OPCODE_SLE:
647      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
648      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
649      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
650      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
651      break;
652
653   case TGSI_OPCODE_SNE:
654      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
655      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
656      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
657      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
658      break;
659
660   case TGSI_OPCODE_TEX:
661      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
662      break;
663
664   case TGSI_OPCODE_TXD:
665      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
666      break;
667
668   case TGSI_OPCODE_UP2H:
669      /* deprecated */
670      assert (0);
671      return FALSE;
672      break;
673
674   case TGSI_OPCODE_UP2US:
675      /* deprecated */
676      assert(0);
677      return FALSE;
678      break;
679
680   case TGSI_OPCODE_UP4B:
681      /* deprecated */
682      assert(0);
683      return FALSE;
684      break;
685
686   case TGSI_OPCODE_UP4UB:
687      /* deprecated */
688      assert(0);
689      return FALSE;
690      break;
691
692   case TGSI_OPCODE_ARR:
693      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
694      dst0 = lp_build_round(&bld->bld_base.base, src0);
695      break;
696
697   case TGSI_OPCODE_CAL:
698      return FALSE;
699
700   case TGSI_OPCODE_RET:
701      /* safe to ignore at end */
702      break;
703
704   case TGSI_OPCODE_END:
705      *pc = -1;
706      break;
707
708   case TGSI_OPCODE_SSG:
709   /* TGSI_OPCODE_SGN */
710      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
712      break;
713
714   case TGSI_OPCODE_CMP:
715      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
716      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
717      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
718      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
719      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
720      break;
721
722   case TGSI_OPCODE_TXB:
723      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
724      break;
725
726   case TGSI_OPCODE_DIV:
727      assert(0);
728      return FALSE;
729      break;
730
731   case TGSI_OPCODE_DP2:
732      return FALSE;
733
734   case TGSI_OPCODE_TXL:
735      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
736      break;
737
738   case TGSI_OPCODE_TXP:
739      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
740      break;
741
742   case TGSI_OPCODE_BRK:
743      return FALSE;
744
745   case TGSI_OPCODE_IF:
746   case TGSI_OPCODE_UIF:
747      return FALSE;
748
749   case TGSI_OPCODE_BGNLOOP:
750      return FALSE;
751
752   case TGSI_OPCODE_BGNSUB:
753      return FALSE;
754
755   case TGSI_OPCODE_ELSE:
756      return FALSE;
757
758   case TGSI_OPCODE_ENDIF:
759      return FALSE;
760
761   case TGSI_OPCODE_ENDLOOP:
762      return FALSE;
763
764   case TGSI_OPCODE_ENDSUB:
765      return FALSE;
766
767   case TGSI_OPCODE_CEIL:
768      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
769      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
770      break;
771
772   case TGSI_OPCODE_I2F:
773      assert(0);
774      return FALSE;
775      break;
776
777   case TGSI_OPCODE_NOT:
778      assert(0);
779      return FALSE;
780      break;
781
782   case TGSI_OPCODE_TRUNC:
783      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
784      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
785      break;
786
787   case TGSI_OPCODE_SHL:
788      assert(0);
789      return FALSE;
790      break;
791
792   case TGSI_OPCODE_ISHR:
793      assert(0);
794      return FALSE;
795      break;
796
797   case TGSI_OPCODE_AND:
798      assert(0);
799      return FALSE;
800      break;
801
802   case TGSI_OPCODE_OR:
803      assert(0);
804      return FALSE;
805      break;
806
807   case TGSI_OPCODE_MOD:
808      assert(0);
809      return FALSE;
810      break;
811
812   case TGSI_OPCODE_XOR:
813      assert(0);
814      return FALSE;
815      break;
816
817   case TGSI_OPCODE_TXF:
818      assert(0);
819      return FALSE;
820      break;
821
822   case TGSI_OPCODE_TXQ:
823      assert(0);
824      return FALSE;
825      break;
826
827   case TGSI_OPCODE_CONT:
828      return FALSE;
829
830   case TGSI_OPCODE_EMIT:
831      return FALSE;
832      break;
833
834   case TGSI_OPCODE_ENDPRIM:
835      return FALSE;
836      break;
837
838   case TGSI_OPCODE_NOP:
839      break;
840
841   case TGSI_OPCODE_SAMPLE:
842      dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
843      break;
844
845   default:
846      return FALSE;
847   }
848
849   if (info->num_dst) {
850      lp_emit_store_aos(bld, inst, 0, dst0);
851   }
852
853   return TRUE;
854}
855
856
857void
858lp_build_tgsi_aos(struct gallivm_state *gallivm,
859                  const struct tgsi_token *tokens,
860                  struct lp_type type,
861                  const unsigned char swizzles[4],
862                  LLVMValueRef consts_ptr,
863                  const LLVMValueRef *inputs,
864                  LLVMValueRef *outputs,
865                  const struct lp_build_sampler_aos *sampler,
866                  const struct tgsi_shader_info *info)
867{
868   struct lp_build_tgsi_aos_context bld;
869   struct tgsi_parse_context parse;
870   uint num_immediates = 0;
871   unsigned chan;
872   int pc = 0;
873
874   /* Setup build context */
875   memset(&bld, 0, sizeof bld);
876   lp_build_context_init(&bld.bld_base.base, gallivm, type);
877   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
878   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
879   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
880
881   for (chan = 0; chan < 4; ++chan) {
882      bld.swizzles[chan] = swizzles[chan];
883      bld.inv_swizzles[swizzles[chan]] = chan;
884   }
885
886   bld.inputs = inputs;
887   bld.outputs = outputs;
888   bld.consts_ptr = consts_ptr;
889   bld.sampler = sampler;
890   bld.indirect_files = info->indirect_files;
891   bld.bld_base.emit_swizzle = swizzle_aos;
892   bld.bld_base.info = info;
893
894   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
895   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
896   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
897   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
898
899   /* Set opcode actions */
900   lp_set_default_actions_cpu(&bld.bld_base);
901
902   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
903      return;
904   }
905
906   tgsi_parse_init(&parse, tokens);
907
908   while (!tgsi_parse_end_of_tokens(&parse)) {
909      tgsi_parse_token(&parse);
910
911      switch(parse.FullToken.Token.Type) {
912      case TGSI_TOKEN_TYPE_DECLARATION:
913         /* Inputs already interpolated */
914         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
915         break;
916
917      case TGSI_TOKEN_TYPE_INSTRUCTION:
918         /* save expanded instruction */
919         lp_bld_tgsi_add_instruction(&bld.bld_base,
920                                     &parse.FullToken.FullInstruction);
921         break;
922
923      case TGSI_TOKEN_TYPE_IMMEDIATE:
924         /* simply copy the immediate values into the next immediates[] slot */
925         {
926            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
927            float imm[4];
928            assert(size <= 4);
929            assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
930            for (chan = 0; chan < 4; ++chan) {
931               imm[chan] = 0.0f;
932            }
933            for (chan = 0; chan < size; ++chan) {
934               unsigned swizzle = bld.swizzles[chan];
935               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
936            }
937            bld.immediates[num_immediates] =
938                     lp_build_const_aos(gallivm, type,
939                                        imm[0], imm[1], imm[2], imm[3],
940                                        NULL);
941            num_immediates++;
942         }
943         break;
944
945      case TGSI_TOKEN_TYPE_PROPERTY:
946         break;
947
948      default:
949         assert(0);
950      }
951   }
952
953   while (pc != -1) {
954      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
955      const struct tgsi_opcode_info *opcode_info =
956         tgsi_get_opcode_info(instr->Instruction.Opcode);
957      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
958         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
959                       tgsi_get_opcode_name(instr->Instruction.Opcode));
960   }
961
962   if (0) {
963      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
964      LLVMValueRef function = LLVMGetBasicBlockParent(block);
965      debug_printf("11111111111111111111111111111 \n");
966      tgsi_dump(tokens, 0);
967      lp_debug_dump_value(function);
968      debug_printf("2222222222222222222222222222 \n");
969   }
970   tgsi_parse_free(&parse);
971   FREE(bld.bld_base.instructions);
972
973   if (0) {
974      LLVMModuleRef module = LLVMGetGlobalParent(
975         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
976      LLVMDumpModule(module);
977   }
978
979}
980
981