1/**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * Copyright 2007-2008 VMware, Inc.
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30/**
31 * @file
32 * TGSI to LLVM IR translation.
33 *
34 * @author Jose Fonseca <jfonseca@vmware.com>
35 * @author Tom Stellard <thomas.stellard@amd.com>
36 *
37 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
38 * Brian Paul, and others.
39 */
40
41
42#include "lp_bld_tgsi_action.h"
43
44#include "lp_bld_tgsi.h"
45#include "lp_bld_arit.h"
46#include "lp_bld_bitarit.h"
47#include "lp_bld_const.h"
48#include "lp_bld_conv.h"
49#include "lp_bld_gather.h"
50#include "lp_bld_logic.h"
51#include "lp_bld_pack.h"
52
53#include "tgsi/tgsi_exec.h"
54
55/* XXX: The CPU only defaults should be repaced by generic ones.  In most
56 * cases, the CPU defaults are just wrappers around a function in
57 * lp_build_arit.c and these functions should be inlined here and the CPU
58 * generic code should be removed and placed elsewhere.
59 */
60
61/* Default actions */
62
63/* Generic fetch_arg functions */
64
65static void scalar_unary_fetch_args(
66   struct lp_build_tgsi_context * bld_base,
67   struct lp_build_emit_data * emit_data)
68{
69   /* src0.x */
70   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
71   emit_data->arg_count = 1;
72   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
73}
74
75static void scalar_binary_fetch_args(
76   struct lp_build_tgsi_context * bld_base,
77   struct lp_build_emit_data * emit_data)
78{
79   /* src0.x */
80   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
81                                            0, TGSI_CHAN_X);
82   /* src1.x */
83   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
84                                            1, TGSI_CHAN_X);
85   emit_data->arg_count = 2;
86   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
87}
88
89/* TGSI_OPCODE_ADD */
90static void
91add_emit(
92   const struct lp_build_tgsi_action * action,
93   struct lp_build_tgsi_context * bld_base,
94   struct lp_build_emit_data * emit_data)
95{
96   emit_data->output[emit_data->chan] = LLVMBuildFAdd(
97                                bld_base->base.gallivm->builder,
98                                emit_data->args[0], emit_data->args[1], "");
99}
100
101/* TGSI_OPCODE_ARR */
102static void
103arr_emit(
104   const struct lp_build_tgsi_action * action,
105   struct lp_build_tgsi_context * bld_base,
106   struct lp_build_emit_data * emit_data)
107{
108   LLVMValueRef tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ROUND, emit_data->args[0]);
109   emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
110							bld_base->uint_bld.vec_type, "");
111}
112
113/* DP* Helper */
114
115static void
116dp_fetch_args(
117   struct lp_build_tgsi_context * bld_base,
118   struct lp_build_emit_data * emit_data,
119   unsigned dp_components)
120{
121   unsigned chan, src;
122   for (src = 0; src < 2; src++) {
123      for (chan = 0; chan < dp_components; chan++) {
124         emit_data->args[(src * dp_components) + chan] =
125                     lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
126      }
127   }
128   emit_data->dst_type = bld_base->base.elem_type;
129}
130
131/* TGSI_OPCODE_DP2 */
132static void
133dp2_fetch_args(
134   struct lp_build_tgsi_context * bld_base,
135   struct lp_build_emit_data * emit_data)
136{
137   dp_fetch_args(bld_base, emit_data, 2);
138}
139
140static void
141dp2_emit(
142   const struct lp_build_tgsi_action * action,
143   struct lp_build_tgsi_context * bld_base,
144   struct lp_build_emit_data * emit_data)
145{
146   LLVMValueRef tmp0, tmp1;
147   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
148                                    emit_data->args[0] /* src0.x */,
149                                    emit_data->args[2] /* src1.x */);
150   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
151                                    emit_data->args[1] /* src0.y */,
152                                    emit_data->args[3] /* src1.y */);
153   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
154                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
155}
156
157static struct lp_build_tgsi_action dp2_action = {
158   dp2_fetch_args,	 /* fetch_args */
159   dp2_emit	 /* emit */
160};
161
162/* TGSI_OPCODE_DP3 */
163static void
164dp3_fetch_args(
165   struct lp_build_tgsi_context * bld_base,
166   struct lp_build_emit_data * emit_data)
167{
168   dp_fetch_args(bld_base, emit_data, 3);
169}
170
171static void
172dp3_emit(
173   const struct lp_build_tgsi_action * action,
174   struct lp_build_tgsi_context * bld_base,
175   struct lp_build_emit_data * emit_data)
176{
177   LLVMValueRef tmp0, tmp1;
178   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
179                                    emit_data->args[0] /* src0.x */,
180                                    emit_data->args[3] /* src1.x */);
181   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
182                                    emit_data->args[1] /* src0.y */,
183                                    emit_data->args[4] /* src1.y */);
184   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
185   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
186                                    emit_data->args[2] /* src0.z */,
187                                    emit_data->args[5] /* src1.z */);
188   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
189                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
190}
191
192static struct lp_build_tgsi_action dp3_action = {
193   dp3_fetch_args,	 /* fetch_args */
194   dp3_emit	 /* emit */
195};
196
197/* TGSI_OPCODDE_DP4 */
198
199static void
200dp4_fetch_args(
201   struct lp_build_tgsi_context * bld_base,
202   struct lp_build_emit_data * emit_data)
203{
204   dp_fetch_args(bld_base, emit_data, 4);
205}
206
207static void
208dp4_emit(
209   const struct lp_build_tgsi_action * action,
210   struct lp_build_tgsi_context * bld_base,
211   struct lp_build_emit_data * emit_data)
212{
213   LLVMValueRef tmp0, tmp1;
214   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
215                                    emit_data->args[0] /* src0.x */,
216                                    emit_data->args[4] /* src1.x */);
217   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
218                                    emit_data->args[1] /* src0.y */,
219                                    emit_data->args[5] /* src1.y */);
220   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
221   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
222                                    emit_data->args[2] /* src0.z */,
223                                    emit_data->args[6] /* src1.z */);
224   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
225   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
226                                    emit_data->args[3] /* src0.w */,
227                                    emit_data->args[7] /* src1.w */);
228   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
229                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
230}
231
232static struct lp_build_tgsi_action dp4_action = {
233   dp4_fetch_args,	 /* fetch_args */
234   dp4_emit	 /* emit */
235};
236
237/* TGSI_OPCODE_DST */
238static void
239dst_fetch_args(
240   struct lp_build_tgsi_context * bld_base,
241   struct lp_build_emit_data * emit_data)
242{
243   /* src0.y */
244   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
245                                            0, TGSI_CHAN_Y);
246   /* src0.z */
247   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
248                                            0, TGSI_CHAN_Z);
249   /* src1.y */
250   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
251                                            1, TGSI_CHAN_Y);
252   /* src1.w */
253   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
254                                            1, TGSI_CHAN_W);
255}
256
257static void
258dst_emit(
259   const struct lp_build_tgsi_action * action,
260   struct lp_build_tgsi_context * bld_base,
261   struct lp_build_emit_data * emit_data)
262{
263   /* dst.x */
264   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
265
266   /* dst.y */
267   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
268                                          TGSI_OPCODE_MUL,
269                                          emit_data->args[0] /* src0.y */,
270                                          emit_data->args[2] /* src1.y */);
271   /* dst.z */
272   emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
273
274   /* dst.w */
275   emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
276}
277
278static struct lp_build_tgsi_action dst_action = {
279   dst_fetch_args,	 /* fetch_args */
280   dst_emit	 /* emit */
281};
282
283/* TGSI_OPCODE_END */
284static void
285end_emit(
286   const struct lp_build_tgsi_action * action,
287   struct lp_build_tgsi_context * bld_base,
288   struct lp_build_emit_data * emit_data)
289{
290   bld_base->pc = -1;
291}
292
293/* TGSI_OPCODE_EXP */
294
295static void
296exp_emit(
297   const struct lp_build_tgsi_action * action,
298   struct lp_build_tgsi_context * bld_base,
299   struct lp_build_emit_data * emit_data)
300{
301   LLVMValueRef floor_x;
302
303   /* floor( src0.x ) */
304   floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
305                                      emit_data->args[0]);
306
307   /* 2 ^ floor( src0.x ) */
308   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
309                                       TGSI_OPCODE_EX2, floor_x);
310
311   /* src0.x - floor( src0.x ) */
312   emit_data->output[TGSI_CHAN_Y] =
313      lp_build_sub(&bld_base->base, emit_data->args[0] /* src0.x */, floor_x);
314
315   /* 2 ^ src0.x */
316   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
317                             TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
318
319   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
320}
321
322const struct lp_build_tgsi_action exp_action = {
323   scalar_unary_fetch_args,	 /* fetch_args */
324   exp_emit	 /* emit */
325};
326
327/* TGSI_OPCODE_FRC */
328
329static void
330frc_emit(
331   const struct lp_build_tgsi_action * action,
332   struct lp_build_tgsi_context * bld_base,
333   struct lp_build_emit_data * emit_data)
334{
335   LLVMValueRef tmp;
336   tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
337                                  emit_data->args[0]);
338   emit_data->output[emit_data->chan] =
339      lp_build_sub(&bld_base->base, emit_data->args[0], tmp);
340}
341
342/* TGSI_OPCODE_KILL_IF */
343
344static void
345kil_fetch_args(
346   struct lp_build_tgsi_context * bld_base,
347   struct lp_build_emit_data * emit_data)
348{
349   /* src0.x */
350   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
351                                            0, TGSI_CHAN_X);
352   /* src0.y */
353   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
354                                            0, TGSI_CHAN_Y);
355   /* src0.z */
356   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
357                                            0, TGSI_CHAN_Z);
358   /* src0.w */
359   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
360                                            0, TGSI_CHAN_W);
361   emit_data->arg_count = 4;
362   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
363}
364
365/* TGSI_OPCODE_KILL */
366
367static void
368kilp_fetch_args(
369   struct lp_build_tgsi_context * bld_base,
370   struct lp_build_emit_data * emit_data)
371{
372   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
373}
374
375/* TGSI_OPCODE_LIT */
376
377static void
378lit_fetch_args(
379   struct lp_build_tgsi_context * bld_base,
380   struct lp_build_emit_data * emit_data)
381{
382   /* src0.x */
383   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
384   /* src0.y */
385   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
386   /* src0.w */
387   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
388   emit_data->arg_count = 3;
389}
390
391static void
392lit_emit(
393   const struct lp_build_tgsi_action * action,
394   struct lp_build_tgsi_context * bld_base,
395   struct lp_build_emit_data * emit_data)
396{
397   LLVMValueRef tmp0, tmp1, tmp2;
398
399   /* dst.x */
400   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
401
402   /* dst. y */
403   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
404                                               TGSI_OPCODE_MAX,
405                                               emit_data->args[0] /* src0.x */,
406                                               bld_base->base.zero);
407
408   /* dst.z */
409   /* XMM[1] = SrcReg[0].yyyy */
410   tmp1 = emit_data->args[1];
411   /* XMM[1] = max(XMM[1], 0) */
412   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
413                                    tmp1, bld_base->base.zero);
414   /* XMM[2] = SrcReg[0].wwww */
415   tmp2 = emit_data->args[2];
416   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
417                                    tmp1, tmp2);
418   tmp0 = emit_data->args[0];
419   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
420                                             TGSI_OPCODE_CMP,
421                                             tmp0, bld_base->base.zero, tmp1);
422   /* dst.w */
423   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
424}
425
426static struct lp_build_tgsi_action lit_action = {
427   lit_fetch_args,	 /* fetch_args */
428   lit_emit	 /* emit */
429};
430
431/* TGSI_OPCODE_LOG */
432
433static void
434log_emit(
435   const struct lp_build_tgsi_action * action,
436   struct lp_build_tgsi_context * bld_base,
437   struct lp_build_emit_data * emit_data)
438{
439
440   LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
441
442   /* abs( src0.x) */
443   abs_x = lp_build_abs(&bld_base->base, emit_data->args[0] /* src0.x */);
444
445   /* log( abs( src0.x ) ) */
446   log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
447                                        abs_x);
448
449   /* floor( log( abs( src0.x ) ) ) */
450   flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
451                                            log_abs_x);
452   /* dst.x */
453   emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
454
455   /* dst.y */
456   ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
457                                                flr_log_abs_x);
458
459   /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
460   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
461                                    TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
462
463   /* dst.x */
464   emit_data->output[TGSI_CHAN_Z] = log_abs_x;
465
466   /* dst.w */
467   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
468}
469
470static struct lp_build_tgsi_action log_action = {
471   scalar_unary_fetch_args,	 /* fetch_args */
472   log_emit	 /* emit */
473};
474
475/* TGSI_OPCODE_PK2H */
476
477static void
478pk2h_fetch_args(
479   struct lp_build_tgsi_context * bld_base,
480   struct lp_build_emit_data * emit_data)
481{
482   /* src0.x */
483   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
484                                            0, TGSI_CHAN_X);
485   /* src0.y */
486   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
487                                            0, TGSI_CHAN_Y);
488}
489
490static void
491pk2h_emit(
492   const struct lp_build_tgsi_action *action,
493   struct lp_build_tgsi_context *bld_base,
494   struct lp_build_emit_data *emit_data)
495{
496   struct gallivm_state *gallivm = bld_base->base.gallivm;
497   struct lp_type f16i_t;
498   LLVMValueRef lo, hi, res;
499
500   f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
501   lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
502   hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
503   /* maybe some interleave doubling vector width would be useful... */
504   lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
505   hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
506   res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
507
508   emit_data->output[emit_data->chan] = res;
509}
510
511static struct lp_build_tgsi_action pk2h_action = {
512   pk2h_fetch_args, /* fetch_args */
513   pk2h_emit        /* emit */
514};
515
516/* TGSI_OPCODE_UP2H */
517
518static void
519up2h_emit(
520   const struct lp_build_tgsi_action *action,
521   struct lp_build_tgsi_context *bld_base,
522   struct lp_build_emit_data *emit_data)
523{
524   struct gallivm_state *gallivm = bld_base->base.gallivm;
525   LLVMBuilderRef builder = gallivm->builder;
526   LLVMContextRef context = gallivm->context;
527   LLVMValueRef lo, hi, res[2], arg;
528   unsigned nr = bld_base->base.type.length;
529   LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
530
531   arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
532   lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
533   hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
534   res[0] = lp_build_half_to_float(gallivm, lo);
535   res[1] = lp_build_half_to_float(gallivm, hi);
536
537   emit_data->output[0] = emit_data->output[2] = res[0];
538   emit_data->output[1] = emit_data->output[3] = res[1];
539}
540
541static struct lp_build_tgsi_action up2h_action = {
542   scalar_unary_fetch_args, /* fetch_args */
543   up2h_emit                /* emit */
544};
545
546/* TGSI_OPCODE_LRP */
547
548static void
549lrp_emit(
550   const struct lp_build_tgsi_action * action,
551   struct lp_build_tgsi_context * bld_base,
552   struct lp_build_emit_data * emit_data)
553{
554   struct lp_build_context *bld = &bld_base->base;
555   LLVMValueRef inv, a, b;
556
557   /* This uses the correct version: (1 - t)*a + t*b
558    *
559    * An alternative version is "a + t*(b-a)". The problem is this version
560    * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
561    * because of the floating-point rounding.
562    */
563   inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
564   a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
565   b = lp_build_mul(bld, emit_data->args[2], inv);
566   emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
567}
568
569/* TGSI_OPCODE_MAD */
570
571static void
572mad_emit(
573   const struct lp_build_tgsi_action * action,
574   struct lp_build_tgsi_context * bld_base,
575   struct lp_build_emit_data * emit_data)
576{
577   LLVMValueRef tmp;
578   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
579                                   emit_data->args[0],
580                                   emit_data->args[1]);
581   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
582                                       TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
583}
584
585/* TGSI_OPCODE_MOV */
586
587static void
588mov_emit(
589   const struct lp_build_tgsi_action * action,
590   struct lp_build_tgsi_context * bld_base,
591   struct lp_build_emit_data * emit_data)
592{
593   emit_data->output[emit_data->chan] = emit_data->args[0];
594}
595
596/* TGSI_OPCODE_MUL */
597static void
598mul_emit(
599   const struct lp_build_tgsi_action * action,
600   struct lp_build_tgsi_context * bld_base,
601   struct lp_build_emit_data * emit_data)
602{
603   emit_data->output[emit_data->chan] = LLVMBuildFMul(
604                                   bld_base->base.gallivm->builder,
605                                   emit_data->args[0], emit_data->args[1], "");
606}
607
608/*.TGSI_OPCODE_DIV.*/
609static void fdiv_emit(
610   const struct lp_build_tgsi_action * action,
611   struct lp_build_tgsi_context * bld_base,
612   struct lp_build_emit_data * emit_data)
613{
614   emit_data->output[emit_data->chan] = LLVMBuildFDiv(
615                                   bld_base->base.gallivm->builder,
616                                   emit_data->args[0], emit_data->args[1], "");
617}
618
619/*.TGSI_OPCODE_RCP.*/
620static void rcp_emit(
621   const struct lp_build_tgsi_action * action,
622   struct lp_build_tgsi_context * bld_base,
623   struct lp_build_emit_data * emit_data)
624{
625   LLVMValueRef one;
626   one = lp_build_const_float(bld_base->base.gallivm, 1.0f);
627   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
628                                   TGSI_OPCODE_DIV, one, emit_data->args[0]);
629}
630
631/* TGSI_OPCODE_POW */
632
633static void
634pow_emit(
635   const struct lp_build_tgsi_action * action,
636   struct lp_build_tgsi_context * bld_base,
637   struct lp_build_emit_data * emit_data)
638{
639   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
640                                   emit_data->args[0], emit_data->args[1]);
641}
642
643static struct lp_build_tgsi_action pow_action = {
644   scalar_binary_fetch_args,	 /* fetch_args */
645   pow_emit	 /* emit */
646};
647
648/* TGSI_OPCODE_RSQ */
649
650static void
651rsq_emit(
652   const struct lp_build_tgsi_action * action,
653   struct lp_build_tgsi_context * bld_base,
654   struct lp_build_emit_data * emit_data)
655{
656   if (bld_base->rsq_action.emit) {
657      bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
658   } else {
659      emit_data->output[emit_data->chan] = bld_base->base.undef;
660   }
661}
662
663const struct lp_build_tgsi_action rsq_action = {
664   scalar_unary_fetch_args,	 /* fetch_args */
665   rsq_emit	 /* emit */
666
667};
668
669/* TGSI_OPCODE_SQRT */
670
671static void
672sqrt_emit(
673   const struct lp_build_tgsi_action * action,
674   struct lp_build_tgsi_context * bld_base,
675   struct lp_build_emit_data * emit_data)
676{
677   if (bld_base->sqrt_action.emit) {
678      bld_base->sqrt_action.emit(&bld_base->sqrt_action, bld_base, emit_data);
679   } else {
680      emit_data->output[emit_data->chan] = bld_base->base.undef;
681   }
682}
683
684const struct lp_build_tgsi_action sqrt_action = {
685   scalar_unary_fetch_args,	 /* fetch_args */
686   sqrt_emit	 /* emit */
687};
688
689/* TGSI_OPCODE_F2U */
690static void
691f2u_emit(
692   const struct lp_build_tgsi_action * action,
693   struct lp_build_tgsi_context * bld_base,
694   struct lp_build_emit_data * emit_data)
695{
696   emit_data->output[emit_data->chan] =
697      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
698                      emit_data->args[0],
699                      bld_base->base.int_vec_type, "");
700}
701
702/* TGSI_OPCODE_U2F */
703static void
704u2f_emit(
705   const struct lp_build_tgsi_action * action,
706   struct lp_build_tgsi_context * bld_base,
707   struct lp_build_emit_data * emit_data)
708{
709   emit_data->output[emit_data->chan] =
710      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
711                      emit_data->args[0],
712                      bld_base->base.vec_type, "");
713}
714
715static void
716umad_emit(
717   const struct lp_build_tgsi_action * action,
718   struct lp_build_tgsi_context * bld_base,
719   struct lp_build_emit_data * emit_data)
720{
721   LLVMValueRef tmp;
722   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMUL,
723                                   emit_data->args[0],
724                                   emit_data->args[1]);
725   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
726                                       TGSI_OPCODE_UADD, tmp, emit_data->args[2]);
727}
728
729/* TGSI_OPCODE_UMUL */
730static void
731umul_emit(
732   const struct lp_build_tgsi_action * action,
733   struct lp_build_tgsi_context * bld_base,
734   struct lp_build_emit_data * emit_data)
735{
736   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint_bld,
737                                   emit_data->args[0], emit_data->args[1]);
738}
739
740/* TGSI_OPCODE_IMUL_HI */
741static void
742imul_hi_emit(
743   const struct lp_build_tgsi_action * action,
744   struct lp_build_tgsi_context * bld_base,
745   struct lp_build_emit_data * emit_data)
746{
747   struct lp_build_context *int_bld = &bld_base->int_bld;
748   LLVMValueRef hi_bits;
749
750   assert(int_bld->type.width == 32);
751
752   /* low result bits are tossed away */
753   lp_build_mul_32_lohi(int_bld, emit_data->args[0],
754                        emit_data->args[1], &hi_bits);
755   emit_data->output[emit_data->chan] = hi_bits;
756}
757
758static void
759imul_hi_emit_cpu(
760   const struct lp_build_tgsi_action * action,
761   struct lp_build_tgsi_context * bld_base,
762   struct lp_build_emit_data * emit_data)
763{
764   struct lp_build_context *int_bld = &bld_base->int_bld;
765   LLVMValueRef hi_bits;
766
767   assert(int_bld->type.width == 32);
768
769   /* low result bits are tossed away */
770   lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
771                            emit_data->args[1], &hi_bits);
772   emit_data->output[emit_data->chan] = hi_bits;
773}
774
775/* TGSI_OPCODE_UMUL_HI */
776static void
777umul_hi_emit(
778   const struct lp_build_tgsi_action * action,
779   struct lp_build_tgsi_context * bld_base,
780   struct lp_build_emit_data * emit_data)
781{
782   struct lp_build_context *uint_bld = &bld_base->uint_bld;
783   LLVMValueRef hi_bits;
784
785   assert(uint_bld->type.width == 32);
786
787   /* low result bits are tossed away */
788   lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
789                        emit_data->args[1], &hi_bits);
790   emit_data->output[emit_data->chan] = hi_bits;
791}
792
793static void
794umul_hi_emit_cpu(
795   const struct lp_build_tgsi_action * action,
796   struct lp_build_tgsi_context * bld_base,
797   struct lp_build_emit_data * emit_data)
798{
799   struct lp_build_context *uint_bld = &bld_base->uint_bld;
800   LLVMValueRef hi_bits;
801
802   assert(uint_bld->type.width == 32);
803
804   /* low result bits are tossed away */
805   lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
806                            emit_data->args[1], &hi_bits);
807   emit_data->output[emit_data->chan] = hi_bits;
808}
809
810/* TGSI_OPCODE_MAX */
811static void fmax_emit(
812   const struct lp_build_tgsi_action * action,
813   struct lp_build_tgsi_context * bld_base,
814   struct lp_build_emit_data * emit_data)
815{
816   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
817   emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
818                                   LLVMBuildFCmp(builder, LLVMRealUGE,
819                                   emit_data->args[0], emit_data->args[1], ""),
820                                   emit_data->args[0], emit_data->args[1], "");
821}
822
823/* TGSI_OPCODE_MIN */
824static void fmin_emit(
825   const struct lp_build_tgsi_action * action,
826   struct lp_build_tgsi_context * bld_base,
827   struct lp_build_emit_data * emit_data)
828{
829   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
830   emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
831                                   LLVMBuildFCmp(builder, LLVMRealUGE,
832                                   emit_data->args[0], emit_data->args[1], ""),
833                                   emit_data->args[1], emit_data->args[0], "");
834}
835
836/* TGSI_OPCODE_D2F */
837static void
838d2f_emit(
839   const struct lp_build_tgsi_action * action,
840   struct lp_build_tgsi_context * bld_base,
841   struct lp_build_emit_data * emit_data)
842{
843   emit_data->output[emit_data->chan] =
844      LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
845                      emit_data->args[0],
846                       bld_base->base.vec_type, "");
847}
848
849/* TGSI_OPCODE_D2I */
850static void
851d2i_emit(
852   const struct lp_build_tgsi_action * action,
853   struct lp_build_tgsi_context * bld_base,
854   struct lp_build_emit_data * emit_data)
855{
856   emit_data->output[emit_data->chan] =
857      LLVMBuildFPToSI(bld_base->base.gallivm->builder,
858                      emit_data->args[0],
859                      bld_base->base.int_vec_type, "");
860}
861
862/* TGSI_OPCODE_D2U */
863static void
864d2u_emit(
865   const struct lp_build_tgsi_action * action,
866   struct lp_build_tgsi_context * bld_base,
867   struct lp_build_emit_data * emit_data)
868{
869   emit_data->output[emit_data->chan] =
870      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
871                      emit_data->args[0],
872                      bld_base->base.int_vec_type, "");
873}
874
875/* TGSI_OPCODE_F2D */
876static void
877f2d_emit(
878   const struct lp_build_tgsi_action * action,
879   struct lp_build_tgsi_context * bld_base,
880   struct lp_build_emit_data * emit_data)
881{
882   emit_data->output[emit_data->chan] =
883      LLVMBuildFPExt(bld_base->base.gallivm->builder,
884                      emit_data->args[0],
885                      bld_base->dbl_bld.vec_type, "");
886}
887
888/* TGSI_OPCODE_U2D */
889static void
890u2d_emit(
891   const struct lp_build_tgsi_action * action,
892   struct lp_build_tgsi_context * bld_base,
893   struct lp_build_emit_data * emit_data)
894{
895   emit_data->output[emit_data->chan] =
896      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
897                      emit_data->args[0],
898                      bld_base->dbl_bld.vec_type, "");
899}
900
901/* TGSI_OPCODE_I2D */
902static void
903i2d_emit(
904   const struct lp_build_tgsi_action * action,
905   struct lp_build_tgsi_context * bld_base,
906   struct lp_build_emit_data * emit_data)
907{
908   emit_data->output[emit_data->chan] =
909      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
910                      emit_data->args[0],
911                      bld_base->dbl_bld.vec_type, "");
912}
913
914/* TGSI_OPCODE_DMAD */
915static void
916dmad_emit(
917   const struct lp_build_tgsi_action * action,
918   struct lp_build_tgsi_context * bld_base,
919   struct lp_build_emit_data * emit_data)
920{
921   LLVMValueRef tmp;
922   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
923                                   emit_data->args[0],
924                                   emit_data->args[1]);
925   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
926                                       TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
927}
928
929/*.TGSI_OPCODE_DRCP.*/
930static void drcp_emit(
931   const struct lp_build_tgsi_action * action,
932   struct lp_build_tgsi_context * bld_base,
933   struct lp_build_emit_data * emit_data)
934{
935   LLVMValueRef one;
936   one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
937   emit_data->output[emit_data->chan] = LLVMBuildFDiv(
938      bld_base->base.gallivm->builder,
939      one, emit_data->args[0], "");
940}
941
942/* TGSI_OPCODE_DFRAC */
943static void dfrac_emit(
944   const struct lp_build_tgsi_action * action,
945   struct lp_build_tgsi_context * bld_base,
946   struct lp_build_emit_data * emit_data)
947{
948   LLVMValueRef tmp;
949   tmp = lp_build_floor(&bld_base->dbl_bld,
950			emit_data->args[0]);
951   emit_data->output[emit_data->chan] =  LLVMBuildFSub(bld_base->base.gallivm->builder,
952                                                       emit_data->args[0], tmp, "");
953}
954
955static void
956u64mul_emit(
957   const struct lp_build_tgsi_action * action,
958   struct lp_build_tgsi_context * bld_base,
959   struct lp_build_emit_data * emit_data)
960{
961   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
962                                   emit_data->args[0], emit_data->args[1]);
963}
964
965static void
966u64mod_emit_cpu(
967   const struct lp_build_tgsi_action * action,
968   struct lp_build_tgsi_context * bld_base,
969   struct lp_build_emit_data * emit_data)
970{
971   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
972   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
973                                        PIPE_FUNC_EQUAL, emit_data->args[1],
974                                        bld_base->uint64_bld.zero);
975   /* We want to make sure that we never divide/mod by zero to not
976    * generate sigfpe. We don't want to crash just because the
977    * shader is doing something weird. */
978   LLVMValueRef divisor = LLVMBuildOr(builder,
979                                      div_mask,
980                                      emit_data->args[1], "");
981   LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
982                                      emit_data->args[0], divisor);
983   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
984   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
985                                                    div_mask,
986                                                    result, "");
987}
988
989static void
990i64mod_emit_cpu(
991   const struct lp_build_tgsi_action * action,
992   struct lp_build_tgsi_context * bld_base,
993   struct lp_build_emit_data * emit_data)
994{
995   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
996   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
997                                        PIPE_FUNC_EQUAL, emit_data->args[1],
998                                        bld_base->uint64_bld.zero);
999   /* We want to make sure that we never divide/mod by zero to not
1000    * generate sigfpe. We don't want to crash just because the
1001    * shader is doing something weird. */
1002   LLVMValueRef divisor = LLVMBuildOr(builder,
1003                                      div_mask,
1004                                      emit_data->args[1], "");
1005   LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
1006                                      emit_data->args[0], divisor);
1007   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1008   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1009                                                    div_mask,
1010                                                    result, "");
1011}
1012
1013static void
1014u64div_emit_cpu(
1015   const struct lp_build_tgsi_action * action,
1016   struct lp_build_tgsi_context * bld_base,
1017   struct lp_build_emit_data * emit_data)
1018{
1019
1020   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1021   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
1022                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1023                                        bld_base->uint64_bld.zero);
1024   /* We want to make sure that we never divide/mod by zero to not
1025    * generate sigfpe. We don't want to crash just because the
1026    * shader is doing something weird. */
1027   LLVMValueRef divisor = LLVMBuildOr(builder,
1028                                      div_mask,
1029                                      emit_data->args[1], "");
1030   LLVMValueRef result = LLVMBuildUDiv(builder,
1031				       emit_data->args[0], divisor, "");
1032   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1033   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1034                                                    div_mask,
1035                                                    result, "");
1036}
1037
1038static void
1039i64div_emit_cpu(
1040   const struct lp_build_tgsi_action * action,
1041   struct lp_build_tgsi_context * bld_base,
1042   struct lp_build_emit_data * emit_data)
1043{
1044
1045   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1046   LLVMValueRef div_mask = lp_build_cmp(&bld_base->int64_bld,
1047                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1048                                        bld_base->int64_bld.zero);
1049   /* We want to make sure that we never divide/mod by zero to not
1050    * generate sigfpe. We don't want to crash just because the
1051    * shader is doing something weird. */
1052   LLVMValueRef divisor = LLVMBuildOr(builder,
1053                                      div_mask,
1054                                      emit_data->args[1], "");
1055   LLVMValueRef result = LLVMBuildSDiv(builder,
1056				       emit_data->args[0], divisor, "");
1057   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1058   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1059                                                    div_mask,
1060                                                    result, "");
1061}
1062
1063static void
1064f2u64_emit(
1065   const struct lp_build_tgsi_action * action,
1066   struct lp_build_tgsi_context * bld_base,
1067   struct lp_build_emit_data * emit_data)
1068{
1069   emit_data->output[emit_data->chan] =
1070      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1071                      emit_data->args[0],
1072                      bld_base->uint64_bld.vec_type, "");
1073}
1074
1075static void
1076f2i64_emit(
1077   const struct lp_build_tgsi_action * action,
1078   struct lp_build_tgsi_context * bld_base,
1079   struct lp_build_emit_data * emit_data)
1080{
1081   emit_data->output[emit_data->chan] =
1082      LLVMBuildFPToSI(bld_base->base.gallivm->builder,
1083                      emit_data->args[0],
1084                      bld_base->int64_bld.vec_type, "");
1085}
1086
1087static void
1088u2i64_emit(
1089   const struct lp_build_tgsi_action * action,
1090   struct lp_build_tgsi_context * bld_base,
1091   struct lp_build_emit_data * emit_data)
1092{
1093   emit_data->output[emit_data->chan] =
1094      LLVMBuildZExt(bld_base->base.gallivm->builder,
1095                      emit_data->args[0],
1096                      bld_base->uint64_bld.vec_type, "");
1097}
1098
1099static void
1100i2i64_emit(
1101   const struct lp_build_tgsi_action * action,
1102   struct lp_build_tgsi_context * bld_base,
1103   struct lp_build_emit_data * emit_data)
1104{
1105   emit_data->output[emit_data->chan] =
1106      LLVMBuildSExt(bld_base->base.gallivm->builder,
1107                      emit_data->args[0],
1108                      bld_base->int64_bld.vec_type, "");
1109}
1110
1111static void
1112i642f_emit(
1113   const struct lp_build_tgsi_action * action,
1114   struct lp_build_tgsi_context * bld_base,
1115   struct lp_build_emit_data * emit_data)
1116{
1117   emit_data->output[emit_data->chan] =
1118      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1119                      emit_data->args[0],
1120                      bld_base->base.vec_type, "");
1121}
1122
1123static void
1124u642f_emit(
1125   const struct lp_build_tgsi_action * action,
1126   struct lp_build_tgsi_context * bld_base,
1127   struct lp_build_emit_data * emit_data)
1128{
1129   emit_data->output[emit_data->chan] =
1130      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1131                      emit_data->args[0],
1132                      bld_base->base.vec_type, "");
1133}
1134
1135static void
1136i642d_emit(
1137   const struct lp_build_tgsi_action * action,
1138   struct lp_build_tgsi_context * bld_base,
1139   struct lp_build_emit_data * emit_data)
1140{
1141   emit_data->output[emit_data->chan] =
1142      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1143                      emit_data->args[0],
1144                      bld_base->dbl_bld.vec_type, "");
1145}
1146
1147static void
1148u642d_emit(
1149   const struct lp_build_tgsi_action * action,
1150   struct lp_build_tgsi_context * bld_base,
1151   struct lp_build_emit_data * emit_data)
1152{
1153   emit_data->output[emit_data->chan] =
1154      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1155                      emit_data->args[0],
1156                      bld_base->dbl_bld.vec_type, "");
1157}
1158
1159void
1160lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
1161{
1162   bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
1163   bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
1164   bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
1165   bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
1166   bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
1167   bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
1168   bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
1169   bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
1170   bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
1171   bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
1172   bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
1173   bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
1174
1175   bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
1176   bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
1177   bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
1178   bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
1179   bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
1180   bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
1181   bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
1182   bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
1183   bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
1184   bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
1185   bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
1186
1187   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
1188   bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
1189   bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
1190   bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
1191   bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
1192   bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
1193   bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
1194   bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
1195   bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
1196   bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
1197
1198   bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
1199   bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
1200   bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
1201   bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
1202   bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
1203   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
1204   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
1205
1206   bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
1207   bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
1208
1209   bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
1210   bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
1211   bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
1212   bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
1213   bld_base->op_actions[TGSI_OPCODE_DDIV].emit = fdiv_emit;
1214
1215   bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
1216   bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
1217   bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
1218
1219   bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
1220   bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
1221   bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
1222
1223   bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
1224
1225   bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
1226   bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
1227
1228   bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit;
1229
1230   bld_base->op_actions[TGSI_OPCODE_F2I64].emit = f2i64_emit;
1231   bld_base->op_actions[TGSI_OPCODE_F2U64].emit = f2u64_emit;
1232
1233   bld_base->op_actions[TGSI_OPCODE_D2I64].emit = f2i64_emit;
1234   bld_base->op_actions[TGSI_OPCODE_D2U64].emit = f2u64_emit;
1235
1236   bld_base->op_actions[TGSI_OPCODE_I2I64].emit = i2i64_emit;
1237   bld_base->op_actions[TGSI_OPCODE_U2I64].emit = u2i64_emit;
1238
1239   bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1240   bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1241
1242   bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1243   bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1244
1245   bld_base->op_actions[TGSI_OPCODE_I642D].emit = i642d_emit;
1246   bld_base->op_actions[TGSI_OPCODE_U642D].emit = u642d_emit;
1247
1248}
1249
1250/* CPU Only default actions */
1251
1252/* These actions are CPU only, because they could potentially output SSE
1253 * intrinsics.
1254 */
1255
1256/* TGSI_OPCODE_ADD (CPU Only) */
1257static void
1258add_emit_cpu(
1259   const struct lp_build_tgsi_action * action,
1260   struct lp_build_tgsi_context * bld_base,
1261   struct lp_build_emit_data * emit_data)
1262{
1263   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
1264                                   emit_data->args[0], emit_data->args[1]);
1265}
1266
1267/* TGSI_OPCODE_AND (CPU Only) */
1268static void
1269and_emit_cpu(
1270   const struct lp_build_tgsi_action * action,
1271   struct lp_build_tgsi_context * bld_base,
1272   struct lp_build_emit_data * emit_data)
1273{
1274   emit_data->output[emit_data->chan] = lp_build_and(&bld_base->uint_bld,
1275                                   emit_data->args[0], emit_data->args[1]);
1276}
1277
1278/* TGSI_OPCODE_ARL (CPU Only) */
1279static void
1280arl_emit_cpu(
1281   const struct lp_build_tgsi_action * action,
1282   struct lp_build_tgsi_context * bld_base,
1283   struct lp_build_emit_data * emit_data)
1284{
1285   LLVMValueRef tmp;
1286   tmp = lp_build_floor(&bld_base->base,
1287			emit_data->args[0]);
1288   emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
1289							bld_base->uint_bld.vec_type, "");
1290}
1291
1292/* TGSI_OPCODE_ARR (CPU Only) */
1293static void
1294arr_emit_cpu(
1295   const struct lp_build_tgsi_action * action,
1296   struct lp_build_tgsi_context * bld_base,
1297   struct lp_build_emit_data * emit_data)
1298{
1299   emit_data->output[emit_data->chan] = lp_build_iround(&bld_base->base, emit_data->args[0]);
1300}
1301
1302/* TGSI_OPCODE_CEIL (CPU Only) */
1303static void
1304ceil_emit_cpu(
1305   const struct lp_build_tgsi_action * action,
1306   struct lp_build_tgsi_context * bld_base,
1307   struct lp_build_emit_data * emit_data)
1308{
1309   emit_data->output[emit_data->chan] = lp_build_ceil(&bld_base->base,
1310                                                      emit_data->args[0]);
1311}
1312
1313/* TGSI_OPCODE_CMP (CPU Only) */
1314static void
1315cmp_emit_cpu(
1316   const struct lp_build_tgsi_action * action,
1317   struct lp_build_tgsi_context * bld_base,
1318   struct lp_build_emit_data * emit_data)
1319{
1320   LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
1321                                   emit_data->args[0], bld_base->base.zero);
1322   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1323                                cond, emit_data->args[1], emit_data->args[2]);
1324}
1325
1326/* TGSI_OPCODE_UCMP (CPU Only) */
1327static void
1328ucmp_emit_cpu(
1329   const struct lp_build_tgsi_action * action,
1330   struct lp_build_tgsi_context * bld_base,
1331   struct lp_build_emit_data * emit_data)
1332{
1333   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1334   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1335   LLVMValueRef unsigned_cond =
1336      LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
1337   LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
1338                                    unsigned_cond,
1339                                    uint_bld->zero);
1340   emit_data->output[emit_data->chan] =
1341      lp_build_select(&bld_base->base,
1342                      cond, emit_data->args[1], emit_data->args[2]);
1343}
1344
1345/* TGSI_OPCODE_COS (CPU Only) */
1346static void
1347cos_emit_cpu(
1348   const struct lp_build_tgsi_action * action,
1349   struct lp_build_tgsi_context * bld_base,
1350   struct lp_build_emit_data * emit_data)
1351{
1352   emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
1353                                                       emit_data->args[0]);
1354}
1355
1356/* TGSI_OPCODE_DIV (CPU Only) */
1357static void
1358div_emit_cpu(
1359   const struct lp_build_tgsi_action * action,
1360   struct lp_build_tgsi_context * bld_base,
1361   struct lp_build_emit_data * emit_data)
1362{
1363   emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
1364                                   emit_data->args[0], emit_data->args[1]);
1365}
1366
1367/* TGSI_OPCODE_EX2 (CPU Only) */
1368static void
1369ex2_emit_cpu(
1370   const struct lp_build_tgsi_action * action,
1371   struct lp_build_tgsi_context * bld_base,
1372   struct lp_build_emit_data * emit_data)
1373{
1374   emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
1375                                                        emit_data->args[0]);
1376}
1377
1378/* TGSI_OPCODE_F2I (CPU Only) */
1379static void
1380f2i_emit_cpu(
1381   const struct lp_build_tgsi_action * action,
1382   struct lp_build_tgsi_context * bld_base,
1383   struct lp_build_emit_data * emit_data)
1384{
1385   emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
1386                                                        emit_data->args[0]);
1387}
1388
1389/* TGSI_OPCODE_FSET Helper (CPU Only) */
1390static void
1391fset_emit_cpu(
1392   const struct lp_build_tgsi_action * action,
1393   struct lp_build_tgsi_context * bld_base,
1394   struct lp_build_emit_data * emit_data,
1395   unsigned pipe_func)
1396{
1397   LLVMValueRef cond;
1398
1399   if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1400      cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1401                                  emit_data->args[0], emit_data->args[1]);
1402   }
1403   else {
1404      cond = lp_build_cmp(&bld_base->base, pipe_func,
1405                          emit_data->args[0], emit_data->args[1]);
1406
1407   }
1408   emit_data->output[emit_data->chan] = cond;
1409}
1410
1411
1412/* TGSI_OPCODE_FSEQ (CPU Only) */
1413static void
1414fseq_emit_cpu(
1415   const struct lp_build_tgsi_action * action,
1416   struct lp_build_tgsi_context * bld_base,
1417   struct lp_build_emit_data * emit_data)
1418{
1419   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1420}
1421
1422/* TGSI_OPCODE_ISGE (CPU Only) */
1423static void
1424fsge_emit_cpu(
1425   const struct lp_build_tgsi_action * action,
1426   struct lp_build_tgsi_context * bld_base,
1427   struct lp_build_emit_data * emit_data)
1428{
1429   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1430}
1431
1432/* TGSI_OPCODE_ISLT (CPU Only) */
1433static void
1434fslt_emit_cpu(
1435   const struct lp_build_tgsi_action * action,
1436   struct lp_build_tgsi_context * bld_base,
1437   struct lp_build_emit_data * emit_data)
1438{
1439   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1440}
1441
1442/* TGSI_OPCODE_USNE (CPU Only) */
1443
1444static void
1445fsne_emit_cpu(
1446   const struct lp_build_tgsi_action * action,
1447   struct lp_build_tgsi_context * bld_base,
1448   struct lp_build_emit_data * emit_data)
1449{
1450   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1451}
1452
1453/* TGSI_OPCODE_FLR (CPU Only) */
1454
1455static void
1456flr_emit_cpu(
1457   const struct lp_build_tgsi_action * action,
1458   struct lp_build_tgsi_context * bld_base,
1459   struct lp_build_emit_data * emit_data)
1460{
1461   emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
1462                                                         emit_data->args[0]);
1463}
1464
1465/* TGSI_OPCODE_I2F (CPU Only) */
1466static void
1467i2f_emit_cpu(
1468   const struct lp_build_tgsi_action * action,
1469   struct lp_build_tgsi_context * bld_base,
1470   struct lp_build_emit_data * emit_data)
1471{
1472   emit_data->output[emit_data->chan] = lp_build_int_to_float(&bld_base->base,
1473                                                              emit_data->args[0]);
1474}
1475
1476/* TGSI_OPCODE_IABS (CPU Only) */
1477static void
1478iabs_emit_cpu(
1479   const struct lp_build_tgsi_action * action,
1480   struct lp_build_tgsi_context * bld_base,
1481   struct lp_build_emit_data * emit_data)
1482{
1483   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
1484                                                       emit_data->args[0]);
1485}
1486
1487/* TGSI_OPCODE_IDIV (CPU Only) */
1488static void
1489idiv_emit_cpu(
1490   const struct lp_build_tgsi_action * action,
1491   struct lp_build_tgsi_context * bld_base,
1492   struct lp_build_emit_data * emit_data)
1493{
1494   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1495   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1496                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1497                                        bld_base->uint_bld.zero);
1498   /* We want to make sure that we never divide/mod by zero to not
1499    * generate sigfpe. We don't want to crash just because the
1500    * shader is doing something weird. */
1501   LLVMValueRef divisor = LLVMBuildOr(builder,
1502                                      div_mask,
1503                                      emit_data->args[1], "");
1504   LLVMValueRef result = lp_build_div(&bld_base->int_bld,
1505                                      emit_data->args[0], divisor);
1506   LLVMValueRef not_div_mask = LLVMBuildNot(builder,
1507                                            div_mask,"");
1508   /* idiv by zero doesn't have a guaranteed return value chose 0 for now. */
1509   emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1510                                                     not_div_mask,
1511                                                     result, "");
1512}
1513
1514/* TGSI_OPCODE_INEG (CPU Only) */
1515static void
1516ineg_emit_cpu(
1517   const struct lp_build_tgsi_action * action,
1518   struct lp_build_tgsi_context * bld_base,
1519   struct lp_build_emit_data * emit_data)
1520{
1521   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int_bld,
1522                                                     bld_base->int_bld.zero,
1523                                                     emit_data->args[0]);
1524}
1525
1526/* TGSI_OPCODE_ISET Helper (CPU Only) */
1527static void
1528iset_emit_cpu(
1529   const struct lp_build_tgsi_action * action,
1530   struct lp_build_tgsi_context * bld_base,
1531   struct lp_build_emit_data * emit_data,
1532   unsigned pipe_func)
1533{
1534   LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
1535                                    emit_data->args[0], emit_data->args[1]);
1536   emit_data->output[emit_data->chan] = cond;
1537}
1538
1539/* TGSI_OPCODE_IMAX (CPU Only) */
1540static void
1541imax_emit_cpu(
1542   const struct lp_build_tgsi_action * action,
1543   struct lp_build_tgsi_context * bld_base,
1544   struct lp_build_emit_data * emit_data)
1545{
1546   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int_bld,
1547                                   emit_data->args[0], emit_data->args[1]);
1548}
1549
1550/* TGSI_OPCODE_IMIN (CPU Only) */
1551static void
1552imin_emit_cpu(
1553   const struct lp_build_tgsi_action * action,
1554   struct lp_build_tgsi_context * bld_base,
1555   struct lp_build_emit_data * emit_data)
1556{
1557   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int_bld,
1558                                   emit_data->args[0], emit_data->args[1]);
1559}
1560
1561/* TGSI_OPCODE_ISGE (CPU Only) */
1562static void
1563isge_emit_cpu(
1564   const struct lp_build_tgsi_action * action,
1565   struct lp_build_tgsi_context * bld_base,
1566   struct lp_build_emit_data * emit_data)
1567{
1568   iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1569}
1570
1571/* TGSI_OPCODE_ISHR (CPU Only) */
1572static void
1573ishr_emit_cpu(
1574   const struct lp_build_tgsi_action * action,
1575   struct lp_build_tgsi_context * bld_base,
1576   struct lp_build_emit_data * emit_data)
1577{
1578   struct lp_build_context *int_bld = &bld_base->int_bld;
1579   LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
1580                                          int_bld->type.width - 1);
1581   LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
1582   emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
1583                                                     masked_count);
1584}
1585
1586/* TGSI_OPCODE_ISLT (CPU Only) */
1587static void
1588islt_emit_cpu(
1589   const struct lp_build_tgsi_action * action,
1590   struct lp_build_tgsi_context * bld_base,
1591   struct lp_build_emit_data * emit_data)
1592{
1593   iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1594}
1595
1596
1597/* TGSI_OPCODE_ISSG (CPU Only) */
1598static void
1599issg_emit_cpu(
1600   const struct lp_build_tgsi_action * action,
1601   struct lp_build_tgsi_context * bld_base,
1602   struct lp_build_emit_data * emit_data)
1603{
1604   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
1605                                                       emit_data->args[0]);
1606}
1607
1608/* TGSI_OPCODE_LG2 (CPU Only) */
1609static void
1610lg2_emit_cpu(
1611   const struct lp_build_tgsi_action * action,
1612   struct lp_build_tgsi_context * bld_base,
1613   struct lp_build_emit_data * emit_data)
1614{
1615   emit_data->output[emit_data->chan] = lp_build_log2_safe(&bld_base->base,
1616                                                           emit_data->args[0]);
1617}
1618
1619/* TGSI_OPCODE_LOG (CPU Only) */
1620static void
1621log_emit_cpu(
1622   const struct lp_build_tgsi_action * action,
1623   struct lp_build_tgsi_context * bld_base,
1624   struct lp_build_emit_data * emit_data)
1625{
1626   LLVMValueRef p_floor_log2;
1627   LLVMValueRef p_exp;
1628   LLVMValueRef p_log2;
1629   LLVMValueRef src0 = emit_data->args[0];
1630
1631   lp_build_log2_approx(&bld_base->base, src0,
1632                        &p_exp, &p_floor_log2, &p_log2, FALSE);
1633
1634   emit_data->output[TGSI_CHAN_X] = p_floor_log2;
1635
1636   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
1637                                             TGSI_OPCODE_DIV,
1638                                             src0, p_exp);
1639   emit_data->output[TGSI_CHAN_Z] = p_log2;
1640
1641   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
1642
1643}
1644
1645/* TGSI_OPCODE_MAD (CPU Only) */
1646
1647static void
1648mad_emit_cpu(
1649   const struct lp_build_tgsi_action * action,
1650   struct lp_build_tgsi_context * bld_base,
1651   struct lp_build_emit_data * emit_data)
1652{
1653   emit_data->output[emit_data->chan] =
1654      lp_build_mad(&bld_base->base,
1655                   emit_data->args[0], emit_data->args[1], emit_data->args[2]);
1656}
1657
1658/* TGSI_OPCODE_MAX (CPU Only) */
1659
1660static void
1661max_emit_cpu(
1662   const struct lp_build_tgsi_action * action,
1663   struct lp_build_tgsi_context * bld_base,
1664   struct lp_build_emit_data * emit_data)
1665{
1666   emit_data->output[emit_data->chan] =
1667      lp_build_max_ext(&bld_base->base,
1668                       emit_data->args[0], emit_data->args[1],
1669                       GALLIVM_NAN_RETURN_OTHER);
1670}
1671
1672/* TGSI_OPCODE_MIN (CPU Only) */
1673static void
1674min_emit_cpu(
1675   const struct lp_build_tgsi_action * action,
1676   struct lp_build_tgsi_context * bld_base,
1677   struct lp_build_emit_data * emit_data)
1678{
1679   emit_data->output[emit_data->chan] =
1680      lp_build_min_ext(&bld_base->base,
1681                       emit_data->args[0], emit_data->args[1],
1682                       GALLIVM_NAN_RETURN_OTHER);
1683}
1684
1685/* TGSI_OPCODE_MOD (CPU Only) */
1686static void
1687mod_emit_cpu(
1688   const struct lp_build_tgsi_action * action,
1689   struct lp_build_tgsi_context * bld_base,
1690   struct lp_build_emit_data * emit_data)
1691{
1692   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1693   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1694                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1695                                        bld_base->uint_bld.zero);
1696   /* We want to make sure that we never divide/mod by zero to not
1697    * generate sigfpe. We don't want to crash just because the
1698    * shader is doing something weird. */
1699   LLVMValueRef divisor = LLVMBuildOr(builder,
1700                                      div_mask,
1701                                      emit_data->args[1], "");
1702   LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
1703                                      emit_data->args[0], divisor);
1704   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1705   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1706                                                    div_mask,
1707                                                    result, "");
1708}
1709
1710/* TGSI_OPCODE_NOT */
1711static void
1712not_emit_cpu(
1713   const struct lp_build_tgsi_action * action,
1714   struct lp_build_tgsi_context * bld_base,
1715   struct lp_build_emit_data * emit_data)
1716{
1717   emit_data->output[emit_data->chan] = lp_build_not(&bld_base->uint_bld,
1718                                                     emit_data->args[0]);
1719}
1720
1721/* TGSI_OPCODE_OR (CPU Only) */
1722static void
1723or_emit_cpu(
1724   const struct lp_build_tgsi_action * action,
1725   struct lp_build_tgsi_context * bld_base,
1726   struct lp_build_emit_data * emit_data)
1727{
1728   emit_data->output[emit_data->chan] = lp_build_or(&bld_base->uint_bld,
1729                                   emit_data->args[0], emit_data->args[1]);
1730}
1731
1732/* TGSI_OPCODE_POW (CPU Only) */
1733static void
1734pow_emit_cpu(
1735   const struct lp_build_tgsi_action * action,
1736   struct lp_build_tgsi_context * bld_base,
1737   struct lp_build_emit_data * emit_data)
1738{
1739   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
1740                                   emit_data->args[0], emit_data->args[1]);
1741}
1742
1743
1744/* TGSI_OPCODE_RCP (CPU Only) */
1745
1746static void
1747rcp_emit_cpu(
1748   const struct lp_build_tgsi_action * action,
1749   struct lp_build_tgsi_context * bld_base,
1750   struct lp_build_emit_data * emit_data)
1751{
1752   emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
1753                                                       emit_data->args[0]);
1754}
1755
1756/* Reciprical squareroot (CPU Only) */
1757static void
1758recip_sqrt_emit_cpu(
1759   const struct lp_build_tgsi_action * action,
1760   struct lp_build_tgsi_context * bld_base,
1761   struct lp_build_emit_data * emit_data)
1762{
1763   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1764                                                         emit_data->args[0]);
1765}
1766
1767static void
1768sqrt_emit_cpu(
1769   const struct lp_build_tgsi_action * action,
1770   struct lp_build_tgsi_context * bld_base,
1771   struct lp_build_emit_data * emit_data)
1772{
1773   emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->base,
1774                                                      emit_data->args[0]);
1775}
1776
1777
1778/* TGSI_OPCODE_ROUND (CPU Only) */
1779static void
1780round_emit_cpu(
1781   const struct lp_build_tgsi_action * action,
1782   struct lp_build_tgsi_context * bld_base,
1783   struct lp_build_emit_data * emit_data)
1784{
1785   emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1786                                                         emit_data->args[0]);
1787}
1788
1789/* TGSI_OPCODE_SET Helper (CPU Only) */
1790
1791static void
1792set_emit_cpu(
1793   const struct lp_build_tgsi_action * action,
1794   struct lp_build_tgsi_context * bld_base,
1795   struct lp_build_emit_data * emit_data,
1796   unsigned pipe_func)
1797{
1798   LLVMValueRef cond;
1799
1800   if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1801      cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1802                                  emit_data->args[0], emit_data->args[1]);
1803   }
1804   else {
1805      cond = lp_build_cmp(&bld_base->base, pipe_func,
1806                          emit_data->args[0], emit_data->args[1]);
1807
1808   }
1809   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1810                                          cond,
1811                                          bld_base->base.one,
1812                                          bld_base->base.zero);
1813}
1814
1815/* TGSI_OPCODE_SEQ (CPU Only) */
1816
1817static void
1818seq_emit_cpu(
1819   const struct lp_build_tgsi_action * action,
1820   struct lp_build_tgsi_context * bld_base,
1821   struct lp_build_emit_data * emit_data)
1822{
1823   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1824}
1825
1826/* TGSI_OPCODE_SGE (CPU Only) */
1827static void
1828sge_emit_cpu(
1829   const struct lp_build_tgsi_action * action,
1830   struct lp_build_tgsi_context * bld_base,
1831   struct lp_build_emit_data * emit_data)
1832{
1833   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1834}
1835
1836/* TGSI_OPCODE_SGT (CPU Only)*/
1837
1838static void
1839sgt_emit_cpu(
1840   const struct lp_build_tgsi_action * action,
1841   struct lp_build_tgsi_context * bld_base,
1842   struct lp_build_emit_data * emit_data)
1843{
1844   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
1845}
1846
1847/* TGSI_OPCODE_SHL (CPU Only) */
1848static void
1849shl_emit_cpu(
1850   const struct lp_build_tgsi_action * action,
1851   struct lp_build_tgsi_context * bld_base,
1852   struct lp_build_emit_data * emit_data)
1853{
1854   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1855   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1856                                          uint_bld->type.width - 1);
1857   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1858   emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
1859                                                     masked_count);
1860}
1861
1862/* TGSI_OPCODE_SIN (CPU Only) */
1863static void
1864sin_emit_cpu(
1865   const struct lp_build_tgsi_action * action,
1866   struct lp_build_tgsi_context * bld_base,
1867   struct lp_build_emit_data * emit_data)
1868{
1869   emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
1870                                                       emit_data->args[0]);
1871}
1872
1873/* TGSI_OPCODE_SLE (CPU Only) */
1874static void
1875sle_emit_cpu(
1876   const struct lp_build_tgsi_action * action,
1877   struct lp_build_tgsi_context * bld_base,
1878   struct lp_build_emit_data * emit_data)
1879{
1880   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
1881}
1882
1883/* TGSI_OPCODE_SLT (CPU Only) */
1884static void
1885slt_emit_cpu(
1886   const struct lp_build_tgsi_action * action,
1887   struct lp_build_tgsi_context * bld_base,
1888   struct lp_build_emit_data * emit_data)
1889{
1890   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1891}
1892
1893/* TGSI_OPCODE_SNE (CPU Only) */
1894
1895static void
1896sne_emit_cpu(
1897   const struct lp_build_tgsi_action * action,
1898   struct lp_build_tgsi_context * bld_base,
1899   struct lp_build_emit_data * emit_data)
1900{
1901   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1902}
1903
1904/* TGSI_OPCODE_SSG (CPU Only) */
1905
1906static void
1907ssg_emit_cpu(
1908   const struct lp_build_tgsi_action * action,
1909   struct lp_build_tgsi_context * bld_base,
1910   struct lp_build_emit_data * emit_data)
1911{
1912   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
1913                                                       emit_data->args[0]);
1914}
1915
1916/* TGSI_OPCODE_TRUNC (CPU Only) */
1917
1918static void
1919trunc_emit_cpu(
1920   const struct lp_build_tgsi_action * action,
1921   struct lp_build_tgsi_context * bld_base,
1922   struct lp_build_emit_data * emit_data)
1923{
1924   emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
1925                                                         emit_data->args[0]);
1926}
1927
1928/* TGSI_OPCODE_UADD (CPU Only) */
1929static void
1930uadd_emit_cpu(
1931   const struct lp_build_tgsi_action * action,
1932   struct lp_build_tgsi_context * bld_base,
1933   struct lp_build_emit_data * emit_data)
1934{
1935   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint_bld,
1936                                   emit_data->args[0], emit_data->args[1]);
1937}
1938
1939/* TGSI_OPCODE_UDIV (CPU Only) */
1940static void
1941udiv_emit_cpu(
1942   const struct lp_build_tgsi_action * action,
1943   struct lp_build_tgsi_context * bld_base,
1944   struct lp_build_emit_data * emit_data)
1945{
1946
1947   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1948   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1949                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1950                                        bld_base->uint_bld.zero);
1951   /* We want to make sure that we never divide/mod by zero to not
1952    * generate sigfpe. We don't want to crash just because the
1953    * shader is doing something weird. */
1954   LLVMValueRef divisor = LLVMBuildOr(builder,
1955                                      div_mask,
1956                                      emit_data->args[1], "");
1957   LLVMValueRef result = lp_build_div(&bld_base->uint_bld,
1958                                      emit_data->args[0], divisor);
1959   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1960   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1961                                                    div_mask,
1962                                                    result, "");
1963}
1964
1965/* TGSI_OPCODE_UMAX (CPU Only) */
1966static void
1967umax_emit_cpu(
1968   const struct lp_build_tgsi_action * action,
1969   struct lp_build_tgsi_context * bld_base,
1970   struct lp_build_emit_data * emit_data)
1971{
1972   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint_bld,
1973                                   emit_data->args[0], emit_data->args[1]);
1974}
1975
1976/* TGSI_OPCODE_UMIN (CPU Only) */
1977static void
1978umin_emit_cpu(
1979   const struct lp_build_tgsi_action * action,
1980   struct lp_build_tgsi_context * bld_base,
1981   struct lp_build_emit_data * emit_data)
1982{
1983   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint_bld,
1984                                   emit_data->args[0], emit_data->args[1]);
1985}
1986
1987/* TGSI_OPCODE_UMOD (CPU Only) */
1988static void
1989umod_emit_cpu(
1990   const struct lp_build_tgsi_action * action,
1991   struct lp_build_tgsi_context * bld_base,
1992   struct lp_build_emit_data * emit_data)
1993{
1994   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1995   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1996                                        PIPE_FUNC_EQUAL, emit_data->args[1],
1997                                        bld_base->uint_bld.zero);
1998   /* We want to make sure that we never divide/mod by zero to not
1999    * generate sigfpe. We don't want to crash just because the
2000    * shader is doing something weird. */
2001   LLVMValueRef divisor = LLVMBuildOr(builder,
2002                                      div_mask,
2003                                      emit_data->args[1], "");
2004   LLVMValueRef result = lp_build_mod(&bld_base->uint_bld,
2005                                      emit_data->args[0], divisor);
2006   /* umod by zero is guaranteed to return 0xffffffff */
2007   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
2008                                                    div_mask,
2009                                                    result, "");
2010}
2011
2012/* TGSI_OPCODE_USET Helper (CPU Only) */
2013static void
2014uset_emit_cpu(
2015   const struct lp_build_tgsi_action * action,
2016   struct lp_build_tgsi_context * bld_base,
2017   struct lp_build_emit_data * emit_data,
2018   unsigned pipe_func)
2019{
2020   LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
2021                                    emit_data->args[0], emit_data->args[1]);
2022   emit_data->output[emit_data->chan] = cond;
2023}
2024
2025
2026/* TGSI_OPCODE_USEQ (CPU Only) */
2027static void
2028useq_emit_cpu(
2029   const struct lp_build_tgsi_action * action,
2030   struct lp_build_tgsi_context * bld_base,
2031   struct lp_build_emit_data * emit_data)
2032{
2033   uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2034}
2035
2036/* TGSI_OPCODE_ISGE (CPU Only) */
2037static void
2038usge_emit_cpu(
2039   const struct lp_build_tgsi_action * action,
2040   struct lp_build_tgsi_context * bld_base,
2041   struct lp_build_emit_data * emit_data)
2042{
2043   uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2044}
2045
2046/* TGSI_OPCODE_USHR (CPU Only) */
2047static void
2048ushr_emit_cpu(
2049   const struct lp_build_tgsi_action * action,
2050   struct lp_build_tgsi_context * bld_base,
2051   struct lp_build_emit_data * emit_data)
2052{
2053   struct lp_build_context *uint_bld = &bld_base->uint_bld;
2054   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2055                                          uint_bld->type.width - 1);
2056   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2057   emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2058                                                     masked_count);
2059}
2060
2061/* TGSI_OPCODE_ISLT (CPU Only) */
2062static void
2063uslt_emit_cpu(
2064   const struct lp_build_tgsi_action * action,
2065   struct lp_build_tgsi_context * bld_base,
2066   struct lp_build_emit_data * emit_data)
2067{
2068   uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2069}
2070
2071/* TGSI_OPCODE_USNE (CPU Only) */
2072
2073static void
2074usne_emit_cpu(
2075   const struct lp_build_tgsi_action * action,
2076   struct lp_build_tgsi_context * bld_base,
2077   struct lp_build_emit_data * emit_data)
2078{
2079   uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2080}
2081
2082/* TGSI_OPCODE_XOR */
2083static void
2084xor_emit_cpu(
2085   const struct lp_build_tgsi_action * action,
2086   struct lp_build_tgsi_context * bld_base,
2087   struct lp_build_emit_data * emit_data)
2088{
2089   emit_data->output[emit_data->chan] = lp_build_xor(&bld_base->uint_bld,
2090                                                     emit_data->args[0],
2091                                                     emit_data->args[1]);
2092}
2093
2094/* TGSI_OPCODE_DABS (CPU Only) */
2095static void
2096dabs_emit_cpu(
2097   const struct lp_build_tgsi_action * action,
2098   struct lp_build_tgsi_context * bld_base,
2099   struct lp_build_emit_data * emit_data)
2100{
2101   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
2102                                                       emit_data->args[0]);
2103}
2104
2105/* TGSI_OPCODE_DNEG (CPU Only) */
2106static void
2107dneg_emit_cpu(
2108   const struct lp_build_tgsi_action * action,
2109   struct lp_build_tgsi_context * bld_base,
2110   struct lp_build_emit_data * emit_data)
2111{
2112   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
2113                                                     bld_base->dbl_bld.zero,
2114                                                     emit_data->args[0]);
2115}
2116
2117/* TGSI_OPCODE_DSET Helper (CPU Only) */
2118static void
2119dset_emit_cpu(
2120   const struct lp_build_tgsi_action * action,
2121   struct lp_build_tgsi_context * bld_base,
2122   struct lp_build_emit_data * emit_data,
2123   unsigned pipe_func)
2124{
2125   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2126   LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
2127                                    emit_data->args[0], emit_data->args[1]);
2128   /* arguments were 64 bit but store as 32 bit */
2129   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2130   emit_data->output[emit_data->chan] = cond;
2131}
2132
2133/* TGSI_OPCODE_DSEQ (CPU Only) */
2134static void
2135dseq_emit_cpu(
2136   const struct lp_build_tgsi_action * action,
2137   struct lp_build_tgsi_context * bld_base,
2138   struct lp_build_emit_data * emit_data)
2139{
2140   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2141}
2142
2143/* TGSI_OPCODE_DSGE (CPU Only) */
2144static void
2145dsge_emit_cpu(
2146   const struct lp_build_tgsi_action * action,
2147   struct lp_build_tgsi_context * bld_base,
2148   struct lp_build_emit_data * emit_data)
2149{
2150   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2151}
2152
2153/* TGSI_OPCODE_DSLT (CPU Only) */
2154static void
2155dslt_emit_cpu(
2156   const struct lp_build_tgsi_action * action,
2157   struct lp_build_tgsi_context * bld_base,
2158   struct lp_build_emit_data * emit_data)
2159{
2160   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2161}
2162
2163/* TGSI_OPCODE_DSNE (CPU Only) */
2164static void
2165dsne_emit_cpu(
2166   const struct lp_build_tgsi_action * action,
2167   struct lp_build_tgsi_context * bld_base,
2168   struct lp_build_emit_data * emit_data)
2169{
2170   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2171}
2172
2173/* Double Reciprocal squareroot (CPU Only) */
2174static void
2175drecip_sqrt_emit_cpu(
2176   const struct lp_build_tgsi_action * action,
2177   struct lp_build_tgsi_context * bld_base,
2178   struct lp_build_emit_data * emit_data)
2179{
2180   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
2181                                                         emit_data->args[0]);
2182}
2183
2184/* Double Squareroot (CPU Only) */
2185static void
2186dsqrt_emit_cpu(
2187   const struct lp_build_tgsi_action * action,
2188   struct lp_build_tgsi_context * bld_base,
2189   struct lp_build_emit_data * emit_data)
2190{
2191   emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
2192                                                      emit_data->args[0]);
2193}
2194
2195static void
2196i64abs_emit_cpu(
2197   const struct lp_build_tgsi_action * action,
2198   struct lp_build_tgsi_context * bld_base,
2199   struct lp_build_emit_data * emit_data)
2200{
2201   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
2202                                                       emit_data->args[0]);
2203}
2204
2205static void
2206i64ssg_emit_cpu(
2207   const struct lp_build_tgsi_action * action,
2208   struct lp_build_tgsi_context * bld_base,
2209   struct lp_build_emit_data * emit_data)
2210{
2211   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
2212                                                       emit_data->args[0]);
2213}
2214
2215static void
2216i64neg_emit_cpu(
2217   const struct lp_build_tgsi_action * action,
2218   struct lp_build_tgsi_context * bld_base,
2219   struct lp_build_emit_data * emit_data)
2220{
2221   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
2222                                                     bld_base->int64_bld.zero,
2223                                                     emit_data->args[0]);
2224}
2225
2226static void
2227u64set_emit_cpu(
2228   const struct lp_build_tgsi_action * action,
2229   struct lp_build_tgsi_context * bld_base,
2230   struct lp_build_emit_data * emit_data,
2231   unsigned pipe_func)
2232{
2233   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2234   LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
2235                                    emit_data->args[0], emit_data->args[1]);
2236   /* arguments were 64 bit but store as 32 bit */
2237   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2238   emit_data->output[emit_data->chan] = cond;
2239}
2240
2241static void
2242u64seq_emit_cpu(
2243   const struct lp_build_tgsi_action * action,
2244   struct lp_build_tgsi_context * bld_base,
2245   struct lp_build_emit_data * emit_data)
2246{
2247   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2248}
2249
2250static void
2251u64sne_emit_cpu(
2252   const struct lp_build_tgsi_action * action,
2253   struct lp_build_tgsi_context * bld_base,
2254   struct lp_build_emit_data * emit_data)
2255{
2256   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2257}
2258
2259static void
2260u64slt_emit_cpu(
2261   const struct lp_build_tgsi_action * action,
2262   struct lp_build_tgsi_context * bld_base,
2263   struct lp_build_emit_data * emit_data)
2264{
2265   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2266}
2267
2268static void
2269u64sge_emit_cpu(
2270   const struct lp_build_tgsi_action * action,
2271   struct lp_build_tgsi_context * bld_base,
2272   struct lp_build_emit_data * emit_data)
2273{
2274   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2275}
2276
2277static void
2278i64set_emit_cpu(
2279   const struct lp_build_tgsi_action * action,
2280   struct lp_build_tgsi_context * bld_base,
2281   struct lp_build_emit_data * emit_data,
2282   unsigned pipe_func)
2283{
2284   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2285   LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
2286                                    emit_data->args[0], emit_data->args[1]);
2287   /* arguments were 64 bit but store as 32 bit */
2288   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2289   emit_data->output[emit_data->chan] = cond;
2290}
2291
2292static void
2293i64slt_emit_cpu(
2294   const struct lp_build_tgsi_action * action,
2295   struct lp_build_tgsi_context * bld_base,
2296   struct lp_build_emit_data * emit_data)
2297{
2298   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2299}
2300
2301static void
2302i64sge_emit_cpu(
2303   const struct lp_build_tgsi_action * action,
2304   struct lp_build_tgsi_context * bld_base,
2305   struct lp_build_emit_data * emit_data)
2306{
2307   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2308}
2309
2310static void
2311u64max_emit_cpu(
2312   const struct lp_build_tgsi_action * action,
2313   struct lp_build_tgsi_context * bld_base,
2314   struct lp_build_emit_data * emit_data)
2315{
2316   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
2317                                   emit_data->args[0], emit_data->args[1]);
2318}
2319
2320static void
2321u64min_emit_cpu(
2322   const struct lp_build_tgsi_action * action,
2323   struct lp_build_tgsi_context * bld_base,
2324   struct lp_build_emit_data * emit_data)
2325{
2326   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
2327                                   emit_data->args[0], emit_data->args[1]);
2328}
2329
2330static void
2331i64max_emit_cpu(
2332   const struct lp_build_tgsi_action * action,
2333   struct lp_build_tgsi_context * bld_base,
2334   struct lp_build_emit_data * emit_data)
2335{
2336   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
2337                                   emit_data->args[0], emit_data->args[1]);
2338}
2339
2340static void
2341i64min_emit_cpu(
2342   const struct lp_build_tgsi_action * action,
2343   struct lp_build_tgsi_context * bld_base,
2344   struct lp_build_emit_data * emit_data)
2345{
2346   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
2347                                   emit_data->args[0], emit_data->args[1]);
2348}
2349
2350static void
2351u64add_emit_cpu(
2352   const struct lp_build_tgsi_action * action,
2353   struct lp_build_tgsi_context * bld_base,
2354   struct lp_build_emit_data * emit_data)
2355{
2356   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
2357                                   emit_data->args[0], emit_data->args[1]);
2358}
2359
2360static void
2361u64shl_emit_cpu(
2362   const struct lp_build_tgsi_action * action,
2363   struct lp_build_tgsi_context * bld_base,
2364   struct lp_build_emit_data * emit_data)
2365{
2366   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2367   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2368                                          uint_bld->type.width - 1);
2369   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2370   emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
2371                                                     masked_count);
2372}
2373
2374static void
2375i64shr_emit_cpu(
2376   const struct lp_build_tgsi_action * action,
2377   struct lp_build_tgsi_context * bld_base,
2378   struct lp_build_emit_data * emit_data)
2379{
2380   struct lp_build_context *int_bld = &bld_base->int64_bld;
2381   LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
2382                                          int_bld->type.width - 1);
2383   LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
2384   emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
2385                                                     masked_count);
2386}
2387
2388static void
2389u64shr_emit_cpu(
2390   const struct lp_build_tgsi_action * action,
2391   struct lp_build_tgsi_context * bld_base,
2392   struct lp_build_emit_data * emit_data)
2393{
2394   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2395   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2396                                          uint_bld->type.width - 1);
2397   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2398   emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2399                                                     masked_count);
2400}
2401static void bfi_emit_cpu(const struct lp_build_tgsi_action *action,
2402                         struct lp_build_tgsi_context *bld_base,
2403                         struct lp_build_emit_data *emit_data) {
2404  /*
2405   * def bfi(base, insert, offset, bits):
2406   *   if offset < 0 or bits < 0 or offset + bits > 32:
2407   *     return undefined
2408   *   # << defined such that mask == ~0 when bits == 32, offset == 0
2409   *   mask = ((1 << bits) - 1) << offset
2410   *   return ((insert << offset) & mask) | (base & ~mask)
2411   */
2412  struct lp_build_context *uint_bld = &bld_base->uint_bld;
2413  LLVMValueRef one_shl_bits_dec_one = lp_build_sub(
2414      uint_bld, lp_build_shl(uint_bld, uint_bld->one, emit_data->args[3]),
2415      uint_bld->one);
2416  LLVMValueRef mask =
2417      lp_build_shl(uint_bld, one_shl_bits_dec_one, emit_data->args[2]);
2418  LLVMValueRef insert_shl_offset =
2419      lp_build_shl(uint_bld, emit_data->args[1], emit_data->args[2]);
2420  LLVMValueRef insert_shl_offset_and_mask =
2421      lp_build_and(uint_bld, insert_shl_offset, mask);
2422  LLVMValueRef base_and_not_mask =
2423      lp_build_and(uint_bld, emit_data->args[0], lp_build_not(uint_bld, mask));
2424
2425  emit_data->output[emit_data->chan] =
2426      lp_build_or(uint_bld, insert_shl_offset_and_mask, base_and_not_mask);
2427}
2428
2429static void lsb_emit_cpu(const struct lp_build_tgsi_action *action,
2430                         struct lp_build_tgsi_context *bld_base,
2431                         struct lp_build_emit_data *emit_data) {
2432  struct lp_build_context *uint_bld = &bld_base->int_bld;
2433
2434  LLVMValueRef result = lp_build_cttz(uint_bld, emit_data->args[0]);
2435  LLVMValueRef cond =
2436      lp_build_cmp(uint_bld, PIPE_FUNC_LESS, result,
2437                   lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32));
2438  emit_data->output[emit_data->chan] = lp_build_select(
2439      uint_bld, cond, result,
2440      lp_build_const_vec(uint_bld->gallivm, uint_bld->type, -1));
2441}
2442
2443static void umsb_emit_cpu(const struct lp_build_tgsi_action *action,
2444                          struct lp_build_tgsi_context *bld_base,
2445                          struct lp_build_emit_data *emit_data) {
2446  struct lp_build_context *uint_bld = &bld_base->int_bld;
2447  emit_data->output[emit_data->chan] = lp_build_sub(
2448      uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 31),
2449      lp_build_ctlz(uint_bld, emit_data->args[0]));
2450}
2451
2452static void imsb_emit_cpu(const struct lp_build_tgsi_action *action,
2453                          struct lp_build_tgsi_context *bld_base,
2454                          struct lp_build_emit_data *emit_data) {
2455  struct lp_build_context *uint_bld = &bld_base->int_bld;
2456
2457  LLVMValueRef cond =
2458      lp_build_cmp(uint_bld, PIPE_FUNC_LESS, emit_data->args[0],
2459                   lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0));
2460  emit_data->args[0] = lp_build_select(
2461      uint_bld, cond, lp_build_not(uint_bld, emit_data->args[0]),
2462      emit_data->args[0]);
2463  umsb_emit_cpu(action, bld_base, emit_data);
2464}
2465
2466static void popc_emit_cpu(const struct lp_build_tgsi_action *action,
2467                          struct lp_build_tgsi_context *bld_base,
2468                          struct lp_build_emit_data *emit_data) {
2469  struct lp_build_context *uint_bld = &bld_base->int_bld;
2470  emit_data->output[emit_data->chan] =
2471      lp_build_popcount(uint_bld, emit_data->args[0]);
2472}
2473
2474static void ibfe_emit_cpu(const struct lp_build_tgsi_action *action,
2475                          struct lp_build_tgsi_context *bld_base,
2476                          struct lp_build_emit_data *emit_data) {
2477  /* def ibfe(value, offset, bits):
2478   *   if offset < 0 or bits < 0 or offset + bits > 32:
2479   *     return undefined
2480   *   if bits == 0: return 0
2481   *   # Note: >> sign-extends
2482   *   return (value << (32 - offset - bits)) >> (32 - bits)
2483   */
2484  struct lp_build_context *uint_bld = &bld_base->int_bld;
2485
2486  LLVMValueRef r_32_sub_bits = lp_build_sub(
2487      uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32),
2488      emit_data->args[2]);
2489  LLVMValueRef temp1 =
2490      lp_build_sub(uint_bld, r_32_sub_bits, emit_data->args[1]);
2491  LLVMValueRef temp2 = lp_build_shl(uint_bld, emit_data->args[0], temp1);
2492  LLVMValueRef cond =
2493      lp_build_cmp(uint_bld, PIPE_FUNC_EQUAL, emit_data->args[2],
2494                   lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0));
2495  emit_data->output[emit_data->chan] = lp_build_select(
2496      uint_bld, cond, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0),
2497      lp_build_shr(uint_bld, temp2, r_32_sub_bits));
2498}
2499
2500static void ubfe_emit_cpu(const struct lp_build_tgsi_action *action,
2501                          struct lp_build_tgsi_context *bld_base,
2502                          struct lp_build_emit_data *emit_data) {
2503  /* def ubfe(value, offset, bits):
2504   *   if offset < 0 or bits < 0 or offset + bits > 32:
2505   *     return undefined
2506   *   if bits == 0: return 0
2507   *   # Note: >> does not sign-extend
2508   *   return (value << (32 - offset - bits)) >> (32 - bits)
2509   */
2510  struct lp_build_context *uint_bld = &bld_base->uint_bld;
2511
2512  LLVMValueRef r_32_sub_bits = lp_build_sub(
2513      uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32),
2514      emit_data->args[2]);
2515  LLVMValueRef temp1 =
2516      lp_build_sub(uint_bld, r_32_sub_bits, emit_data->args[1]);
2517  LLVMValueRef temp2 = lp_build_shl(uint_bld, emit_data->args[0], temp1);
2518  emit_data->output[emit_data->chan] =
2519      lp_build_shr(uint_bld, temp2, r_32_sub_bits);
2520}
2521
2522static void brev_emit_cpu(const struct lp_build_tgsi_action *action,
2523                          struct lp_build_tgsi_context *bld_base,
2524                          struct lp_build_emit_data *emit_data) {
2525  struct lp_build_context *uint_bld = &bld_base->uint_bld;
2526  emit_data->output[emit_data->chan] =
2527      lp_build_bitfield_reverse(uint_bld, emit_data->args[0]);
2528}
2529
2530void
2531lp_set_default_actions_cpu(
2532   struct lp_build_tgsi_context * bld_base)
2533{
2534   lp_set_default_actions(bld_base);
2535   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
2536   bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
2537   bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
2538   bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
2539   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
2540   bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
2541   bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
2542   bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
2543   bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
2544   bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
2545   bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
2546   bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
2547   bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
2548   bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
2549   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
2550
2551   bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
2552   bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
2553   bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
2554   bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
2555   bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
2556   bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
2557   bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
2558   bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
2559   bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
2560   bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
2561   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
2562   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
2563
2564   bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
2565   bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
2566   bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu;
2567   bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
2568   bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
2569   bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
2570   bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
2571   bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
2572   bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
2573   bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
2574   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
2575   bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
2576   bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
2577   bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
2578   bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
2579   bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu;
2580   bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
2581   bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
2582   bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
2583   bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
2584   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
2585
2586   bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
2587   bld_base->sqrt_action.emit = sqrt_emit_cpu;
2588
2589   bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu;
2590   bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu;
2591   bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu;
2592   bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu;
2593   bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu;
2594   bld_base->op_actions[TGSI_OPCODE_UMOD].emit = umod_emit_cpu;
2595   bld_base->op_actions[TGSI_OPCODE_USEQ].emit = useq_emit_cpu;
2596   bld_base->op_actions[TGSI_OPCODE_USGE].emit = usge_emit_cpu;
2597   bld_base->op_actions[TGSI_OPCODE_USHR].emit = ushr_emit_cpu;
2598   bld_base->op_actions[TGSI_OPCODE_USLT].emit = uslt_emit_cpu;
2599   bld_base->op_actions[TGSI_OPCODE_USNE].emit = usne_emit_cpu;
2600
2601   bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
2602
2603   bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
2604   bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
2605   bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
2606   bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
2607   bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
2608   bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
2609
2610   bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
2611   bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
2612
2613   bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
2614   bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
2615   bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
2616
2617   bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
2618   bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
2619   bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
2620   bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
2621   bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
2622   bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
2623
2624   bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
2625   bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
2626   bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
2627   bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
2628
2629   bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
2630   bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
2631   bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
2632   bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
2633   bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
2634
2635   bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
2636   bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
2637   bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
2638
2639   bld_base->op_actions[TGSI_OPCODE_BFI].emit = bfi_emit_cpu;
2640   bld_base->op_actions[TGSI_OPCODE_POPC].emit = popc_emit_cpu;
2641   bld_base->op_actions[TGSI_OPCODE_LSB].emit = lsb_emit_cpu;
2642   bld_base->op_actions[TGSI_OPCODE_IMSB].emit = imsb_emit_cpu;
2643   bld_base->op_actions[TGSI_OPCODE_UMSB].emit = umsb_emit_cpu;
2644   bld_base->op_actions[TGSI_OPCODE_IBFE].emit = ibfe_emit_cpu;
2645   bld_base->op_actions[TGSI_OPCODE_UBFE].emit = ubfe_emit_cpu;
2646   bld_base->op_actions[TGSI_OPCODE_BREV].emit = brev_emit_cpu;
2647
2648}
2649