14a49301eSmrg/**************************************************************************
24a49301eSmrg *
3af69d88dSmrg * Copyright 2007-2008 VMware, Inc.
44a49301eSmrg * All Rights Reserved.
5cdc920a0Smrg * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
64a49301eSmrg *
74a49301eSmrg * Permission is hereby granted, free of charge, to any person obtaining a
84a49301eSmrg * copy of this software and associated documentation files (the
94a49301eSmrg * "Software"), to deal in the Software without restriction, including
104a49301eSmrg * without limitation the rights to use, copy, modify, merge, publish,
114a49301eSmrg * distribute, sub license, and/or sell copies of the Software, and to
124a49301eSmrg * permit persons to whom the Software is furnished to do so, subject to
134a49301eSmrg * the following conditions:
144a49301eSmrg *
154a49301eSmrg * The above copyright notice and this permission notice (including the
164a49301eSmrg * next paragraph) shall be included in all copies or substantial portions
174a49301eSmrg * of the Software.
184a49301eSmrg *
194a49301eSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
204a49301eSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
214a49301eSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
234a49301eSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
244a49301eSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
254a49301eSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
264a49301eSmrg *
274a49301eSmrg **************************************************************************/
284a49301eSmrg
294a49301eSmrg/**
304a49301eSmrg * TGSI interpreter/executor.
314a49301eSmrg *
324a49301eSmrg * Flow control information:
334a49301eSmrg *
344a49301eSmrg * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
354a49301eSmrg * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
364a49301eSmrg * care since a condition may be true for some quad components but false
374a49301eSmrg * for other components.
384a49301eSmrg *
394a49301eSmrg * We basically execute all statements (even if they're in the part of
404a49301eSmrg * an IF/ELSE clause that's "not taken") and use a special mask to
414a49301eSmrg * control writing to destination registers.  This is the ExecMask.
424a49301eSmrg * See store_dest().
434a49301eSmrg *
444a49301eSmrg * The ExecMask is computed from three other masks (CondMask, LoopMask and
454a49301eSmrg * ContMask) which are controlled by the flow control instructions (namely:
464a49301eSmrg * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
474a49301eSmrg *
484a49301eSmrg *
494a49301eSmrg * Authors:
504a49301eSmrg *   Michal Krol
514a49301eSmrg *   Brian Paul
524a49301eSmrg */
534a49301eSmrg
544a49301eSmrg#include "pipe/p_compiler.h"
554a49301eSmrg#include "pipe/p_state.h"
564a49301eSmrg#include "pipe/p_shader_tokens.h"
574a49301eSmrg#include "tgsi/tgsi_dump.h"
584a49301eSmrg#include "tgsi/tgsi_parse.h"
594a49301eSmrg#include "tgsi/tgsi_util.h"
604a49301eSmrg#include "tgsi_exec.h"
617ec681f3Smrg#include "util/compiler.h"
627ec681f3Smrg#include "util/half_float.h"
634a49301eSmrg#include "util/u_memory.h"
644a49301eSmrg#include "util/u_math.h"
6501e04c3fSmrg#include "util/rounding.h"
664a49301eSmrg
674a49301eSmrg
68af69d88dSmrg#define DEBUG_EXECUTION 0
69af69d88dSmrg
70af69d88dSmrg
714a49301eSmrg#define TILE_TOP_LEFT     0
724a49301eSmrg#define TILE_TOP_RIGHT    1
734a49301eSmrg#define TILE_BOTTOM_LEFT  2
744a49301eSmrg#define TILE_BOTTOM_RIGHT 3
754a49301eSmrg
7601e04c3fSmrgunion tgsi_double_channel {
7701e04c3fSmrg   double d[TGSI_QUAD_SIZE];
7801e04c3fSmrg   unsigned u[TGSI_QUAD_SIZE][2];
7901e04c3fSmrg   uint64_t u64[TGSI_QUAD_SIZE];
8001e04c3fSmrg   int64_t i64[TGSI_QUAD_SIZE];
817ec681f3Smrg} ALIGN16;
8201e04c3fSmrg
837ec681f3Smrgstruct ALIGN16 tgsi_double_vector {
8401e04c3fSmrg   union tgsi_double_channel xy;
8501e04c3fSmrg   union tgsi_double_channel zw;
8601e04c3fSmrg};
8701e04c3fSmrg
88cdc920a0Smrgstatic void
89cdc920a0Smrgmicro_abs(union tgsi_exec_channel *dst,
90cdc920a0Smrg          const union tgsi_exec_channel *src)
91cdc920a0Smrg{
92cdc920a0Smrg   dst->f[0] = fabsf(src->f[0]);
93cdc920a0Smrg   dst->f[1] = fabsf(src->f[1]);
94cdc920a0Smrg   dst->f[2] = fabsf(src->f[2]);
95cdc920a0Smrg   dst->f[3] = fabsf(src->f[3]);
96cdc920a0Smrg}
97cdc920a0Smrg
98cdc920a0Smrgstatic void
99cdc920a0Smrgmicro_arl(union tgsi_exec_channel *dst,
100cdc920a0Smrg          const union tgsi_exec_channel *src)
101cdc920a0Smrg{
102cdc920a0Smrg   dst->i[0] = (int)floorf(src->f[0]);
103cdc920a0Smrg   dst->i[1] = (int)floorf(src->f[1]);
104cdc920a0Smrg   dst->i[2] = (int)floorf(src->f[2]);
105cdc920a0Smrg   dst->i[3] = (int)floorf(src->f[3]);
106cdc920a0Smrg}
107cdc920a0Smrg
108cdc920a0Smrgstatic void
109cdc920a0Smrgmicro_arr(union tgsi_exec_channel *dst,
110cdc920a0Smrg          const union tgsi_exec_channel *src)
111cdc920a0Smrg{
112cdc920a0Smrg   dst->i[0] = (int)floorf(src->f[0] + 0.5f);
113cdc920a0Smrg   dst->i[1] = (int)floorf(src->f[1] + 0.5f);
114cdc920a0Smrg   dst->i[2] = (int)floorf(src->f[2] + 0.5f);
115cdc920a0Smrg   dst->i[3] = (int)floorf(src->f[3] + 0.5f);
116cdc920a0Smrg}
117cdc920a0Smrg
118cdc920a0Smrgstatic void
119cdc920a0Smrgmicro_ceil(union tgsi_exec_channel *dst,
120cdc920a0Smrg           const union tgsi_exec_channel *src)
121cdc920a0Smrg{
122cdc920a0Smrg   dst->f[0] = ceilf(src->f[0]);
123cdc920a0Smrg   dst->f[1] = ceilf(src->f[1]);
124cdc920a0Smrg   dst->f[2] = ceilf(src->f[2]);
125cdc920a0Smrg   dst->f[3] = ceilf(src->f[3]);
126cdc920a0Smrg}
127cdc920a0Smrg
128cdc920a0Smrgstatic void
129cdc920a0Smrgmicro_cmp(union tgsi_exec_channel *dst,
130cdc920a0Smrg          const union tgsi_exec_channel *src0,
131cdc920a0Smrg          const union tgsi_exec_channel *src1,
132cdc920a0Smrg          const union tgsi_exec_channel *src2)
133cdc920a0Smrg{
134cdc920a0Smrg   dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
135cdc920a0Smrg   dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
136cdc920a0Smrg   dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
137cdc920a0Smrg   dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
138cdc920a0Smrg}
139cdc920a0Smrg
140cdc920a0Smrgstatic void
141cdc920a0Smrgmicro_cos(union tgsi_exec_channel *dst,
142cdc920a0Smrg          const union tgsi_exec_channel *src)
143cdc920a0Smrg{
144cdc920a0Smrg   dst->f[0] = cosf(src->f[0]);
145cdc920a0Smrg   dst->f[1] = cosf(src->f[1]);
146cdc920a0Smrg   dst->f[2] = cosf(src->f[2]);
147cdc920a0Smrg   dst->f[3] = cosf(src->f[3]);
148cdc920a0Smrg}
149cdc920a0Smrg
15001e04c3fSmrgstatic void
15101e04c3fSmrgmicro_d2f(union tgsi_exec_channel *dst,
15201e04c3fSmrg          const union tgsi_double_channel *src)
15301e04c3fSmrg{
15401e04c3fSmrg   dst->f[0] = (float)src->d[0];
15501e04c3fSmrg   dst->f[1] = (float)src->d[1];
15601e04c3fSmrg   dst->f[2] = (float)src->d[2];
15701e04c3fSmrg   dst->f[3] = (float)src->d[3];
15801e04c3fSmrg}
15901e04c3fSmrg
16001e04c3fSmrgstatic void
16101e04c3fSmrgmicro_d2i(union tgsi_exec_channel *dst,
16201e04c3fSmrg          const union tgsi_double_channel *src)
16301e04c3fSmrg{
16401e04c3fSmrg   dst->i[0] = (int)src->d[0];
16501e04c3fSmrg   dst->i[1] = (int)src->d[1];
16601e04c3fSmrg   dst->i[2] = (int)src->d[2];
16701e04c3fSmrg   dst->i[3] = (int)src->d[3];
16801e04c3fSmrg}
16901e04c3fSmrg
17001e04c3fSmrgstatic void
17101e04c3fSmrgmicro_d2u(union tgsi_exec_channel *dst,
17201e04c3fSmrg          const union tgsi_double_channel *src)
17301e04c3fSmrg{
17401e04c3fSmrg   dst->u[0] = (unsigned)src->d[0];
17501e04c3fSmrg   dst->u[1] = (unsigned)src->d[1];
17601e04c3fSmrg   dst->u[2] = (unsigned)src->d[2];
17701e04c3fSmrg   dst->u[3] = (unsigned)src->d[3];
17801e04c3fSmrg}
17901e04c3fSmrgstatic void
18001e04c3fSmrgmicro_dabs(union tgsi_double_channel *dst,
18101e04c3fSmrg           const union tgsi_double_channel *src)
18201e04c3fSmrg{
18301e04c3fSmrg   dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
18401e04c3fSmrg   dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
18501e04c3fSmrg   dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
18601e04c3fSmrg   dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
18701e04c3fSmrg}
18801e04c3fSmrg
18901e04c3fSmrgstatic void
19001e04c3fSmrgmicro_dadd(union tgsi_double_channel *dst,
19101e04c3fSmrg          const union tgsi_double_channel *src)
19201e04c3fSmrg{
19301e04c3fSmrg   dst->d[0] = src[0].d[0] + src[1].d[0];
19401e04c3fSmrg   dst->d[1] = src[0].d[1] + src[1].d[1];
19501e04c3fSmrg   dst->d[2] = src[0].d[2] + src[1].d[2];
19601e04c3fSmrg   dst->d[3] = src[0].d[3] + src[1].d[3];
19701e04c3fSmrg}
19801e04c3fSmrg
19901e04c3fSmrgstatic void
20001e04c3fSmrgmicro_ddiv(union tgsi_double_channel *dst,
20101e04c3fSmrg          const union tgsi_double_channel *src)
20201e04c3fSmrg{
20301e04c3fSmrg   dst->d[0] = src[0].d[0] / src[1].d[0];
20401e04c3fSmrg   dst->d[1] = src[0].d[1] / src[1].d[1];
20501e04c3fSmrg   dst->d[2] = src[0].d[2] / src[1].d[2];
20601e04c3fSmrg   dst->d[3] = src[0].d[3] / src[1].d[3];
20701e04c3fSmrg}
20801e04c3fSmrg
209cdc920a0Smrgstatic void
210cdc920a0Smrgmicro_ddx(union tgsi_exec_channel *dst,
211cdc920a0Smrg          const union tgsi_exec_channel *src)
212cdc920a0Smrg{
213cdc920a0Smrg   dst->f[0] =
214cdc920a0Smrg   dst->f[1] =
215cdc920a0Smrg   dst->f[2] =
216cdc920a0Smrg   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
217cdc920a0Smrg}
218cdc920a0Smrg
2197ec681f3Smrgstatic void
2207ec681f3Smrgmicro_ddx_fine(union tgsi_exec_channel *dst,
2217ec681f3Smrg          const union tgsi_exec_channel *src)
2227ec681f3Smrg{
2237ec681f3Smrg   dst->f[0] =
2247ec681f3Smrg   dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
2257ec681f3Smrg   dst->f[2] =
2267ec681f3Smrg   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
2277ec681f3Smrg}
2287ec681f3Smrg
2297ec681f3Smrg
230cdc920a0Smrgstatic void
231cdc920a0Smrgmicro_ddy(union tgsi_exec_channel *dst,
232cdc920a0Smrg          const union tgsi_exec_channel *src)
233cdc920a0Smrg{
234cdc920a0Smrg   dst->f[0] =
235cdc920a0Smrg   dst->f[1] =
236cdc920a0Smrg   dst->f[2] =
237cdc920a0Smrg   dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
238cdc920a0Smrg}
239cdc920a0Smrg
2407ec681f3Smrgstatic void
2417ec681f3Smrgmicro_ddy_fine(union tgsi_exec_channel *dst,
2427ec681f3Smrg          const union tgsi_exec_channel *src)
2437ec681f3Smrg{
2447ec681f3Smrg   dst->f[0] =
2457ec681f3Smrg   dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
2467ec681f3Smrg   dst->f[1] =
2477ec681f3Smrg   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
2487ec681f3Smrg}
2497ec681f3Smrg
25001e04c3fSmrgstatic void
25101e04c3fSmrgmicro_dmul(union tgsi_double_channel *dst,
25201e04c3fSmrg           const union tgsi_double_channel *src)
25301e04c3fSmrg{
25401e04c3fSmrg   dst->d[0] = src[0].d[0] * src[1].d[0];
25501e04c3fSmrg   dst->d[1] = src[0].d[1] * src[1].d[1];
25601e04c3fSmrg   dst->d[2] = src[0].d[2] * src[1].d[2];
25701e04c3fSmrg   dst->d[3] = src[0].d[3] * src[1].d[3];
25801e04c3fSmrg}
25901e04c3fSmrg
26001e04c3fSmrgstatic void
26101e04c3fSmrgmicro_dmax(union tgsi_double_channel *dst,
26201e04c3fSmrg           const union tgsi_double_channel *src)
26301e04c3fSmrg{
2647ec681f3Smrg   dst->d[0] = fmax(src[0].d[0], src[1].d[0]);
2657ec681f3Smrg   dst->d[1] = fmax(src[0].d[1], src[1].d[1]);
2667ec681f3Smrg   dst->d[2] = fmax(src[0].d[2], src[1].d[2]);
2677ec681f3Smrg   dst->d[3] = fmax(src[0].d[3], src[1].d[3]);
26801e04c3fSmrg}
26901e04c3fSmrg
27001e04c3fSmrgstatic void
27101e04c3fSmrgmicro_dmin(union tgsi_double_channel *dst,
27201e04c3fSmrg           const union tgsi_double_channel *src)
27301e04c3fSmrg{
2747ec681f3Smrg   dst->d[0] = fmin(src[0].d[0], src[1].d[0]);
2757ec681f3Smrg   dst->d[1] = fmin(src[0].d[1], src[1].d[1]);
2767ec681f3Smrg   dst->d[2] = fmin(src[0].d[2], src[1].d[2]);
2777ec681f3Smrg   dst->d[3] = fmin(src[0].d[3], src[1].d[3]);
27801e04c3fSmrg}
27901e04c3fSmrg
28001e04c3fSmrgstatic void
28101e04c3fSmrgmicro_dneg(union tgsi_double_channel *dst,
28201e04c3fSmrg           const union tgsi_double_channel *src)
28301e04c3fSmrg{
28401e04c3fSmrg   dst->d[0] = -src->d[0];
28501e04c3fSmrg   dst->d[1] = -src->d[1];
28601e04c3fSmrg   dst->d[2] = -src->d[2];
28701e04c3fSmrg   dst->d[3] = -src->d[3];
28801e04c3fSmrg}
28901e04c3fSmrg
29001e04c3fSmrgstatic void
29101e04c3fSmrgmicro_dslt(union tgsi_double_channel *dst,
29201e04c3fSmrg           const union tgsi_double_channel *src)
29301e04c3fSmrg{
29401e04c3fSmrg   dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
29501e04c3fSmrg   dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
29601e04c3fSmrg   dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
29701e04c3fSmrg   dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
29801e04c3fSmrg}
29901e04c3fSmrg
30001e04c3fSmrgstatic void
30101e04c3fSmrgmicro_dsne(union tgsi_double_channel *dst,
30201e04c3fSmrg           const union tgsi_double_channel *src)
30301e04c3fSmrg{
30401e04c3fSmrg   dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
30501e04c3fSmrg   dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
30601e04c3fSmrg   dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
30701e04c3fSmrg   dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
30801e04c3fSmrg}
30901e04c3fSmrg
31001e04c3fSmrgstatic void
31101e04c3fSmrgmicro_dsge(union tgsi_double_channel *dst,
31201e04c3fSmrg           const union tgsi_double_channel *src)
31301e04c3fSmrg{
31401e04c3fSmrg   dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
31501e04c3fSmrg   dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
31601e04c3fSmrg   dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
31701e04c3fSmrg   dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
31801e04c3fSmrg}
31901e04c3fSmrg
32001e04c3fSmrgstatic void
32101e04c3fSmrgmicro_dseq(union tgsi_double_channel *dst,
32201e04c3fSmrg           const union tgsi_double_channel *src)
32301e04c3fSmrg{
32401e04c3fSmrg   dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
32501e04c3fSmrg   dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
32601e04c3fSmrg   dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
32701e04c3fSmrg   dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
32801e04c3fSmrg}
32901e04c3fSmrg
33001e04c3fSmrgstatic void
33101e04c3fSmrgmicro_drcp(union tgsi_double_channel *dst,
33201e04c3fSmrg           const union tgsi_double_channel *src)
33301e04c3fSmrg{
33401e04c3fSmrg   dst->d[0] = 1.0 / src->d[0];
33501e04c3fSmrg   dst->d[1] = 1.0 / src->d[1];
33601e04c3fSmrg   dst->d[2] = 1.0 / src->d[2];
33701e04c3fSmrg   dst->d[3] = 1.0 / src->d[3];
33801e04c3fSmrg}
33901e04c3fSmrg
34001e04c3fSmrgstatic void
34101e04c3fSmrgmicro_dsqrt(union tgsi_double_channel *dst,
34201e04c3fSmrg            const union tgsi_double_channel *src)
34301e04c3fSmrg{
34401e04c3fSmrg   dst->d[0] = sqrt(src->d[0]);
34501e04c3fSmrg   dst->d[1] = sqrt(src->d[1]);
34601e04c3fSmrg   dst->d[2] = sqrt(src->d[2]);
34701e04c3fSmrg   dst->d[3] = sqrt(src->d[3]);
34801e04c3fSmrg}
34901e04c3fSmrg
35001e04c3fSmrgstatic void
35101e04c3fSmrgmicro_drsq(union tgsi_double_channel *dst,
35201e04c3fSmrg          const union tgsi_double_channel *src)
35301e04c3fSmrg{
35401e04c3fSmrg   dst->d[0] = 1.0 / sqrt(src->d[0]);
35501e04c3fSmrg   dst->d[1] = 1.0 / sqrt(src->d[1]);
35601e04c3fSmrg   dst->d[2] = 1.0 / sqrt(src->d[2]);
35701e04c3fSmrg   dst->d[3] = 1.0 / sqrt(src->d[3]);
35801e04c3fSmrg}
35901e04c3fSmrg
36001e04c3fSmrgstatic void
36101e04c3fSmrgmicro_dmad(union tgsi_double_channel *dst,
36201e04c3fSmrg           const union tgsi_double_channel *src)
36301e04c3fSmrg{
36401e04c3fSmrg   dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
36501e04c3fSmrg   dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
36601e04c3fSmrg   dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
36701e04c3fSmrg   dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
36801e04c3fSmrg}
36901e04c3fSmrg
37001e04c3fSmrgstatic void
37101e04c3fSmrgmicro_dfrac(union tgsi_double_channel *dst,
37201e04c3fSmrg            const union tgsi_double_channel *src)
37301e04c3fSmrg{
37401e04c3fSmrg   dst->d[0] = src->d[0] - floor(src->d[0]);
37501e04c3fSmrg   dst->d[1] = src->d[1] - floor(src->d[1]);
37601e04c3fSmrg   dst->d[2] = src->d[2] - floor(src->d[2]);
37701e04c3fSmrg   dst->d[3] = src->d[3] - floor(src->d[3]);
37801e04c3fSmrg}
37901e04c3fSmrg
3807ec681f3Smrgstatic void
3817ec681f3Smrgmicro_dflr(union tgsi_double_channel *dst,
3827ec681f3Smrg           const union tgsi_double_channel *src)
3837ec681f3Smrg{
3847ec681f3Smrg   dst->d[0] = floor(src->d[0]);
3857ec681f3Smrg   dst->d[1] = floor(src->d[1]);
3867ec681f3Smrg   dst->d[2] = floor(src->d[2]);
3877ec681f3Smrg   dst->d[3] = floor(src->d[3]);
3887ec681f3Smrg}
3897ec681f3Smrg
39001e04c3fSmrgstatic void
39101e04c3fSmrgmicro_dldexp(union tgsi_double_channel *dst,
39201e04c3fSmrg             const union tgsi_double_channel *src0,
39301e04c3fSmrg             union tgsi_exec_channel *src1)
39401e04c3fSmrg{
39501e04c3fSmrg   dst->d[0] = ldexp(src0->d[0], src1->i[0]);
39601e04c3fSmrg   dst->d[1] = ldexp(src0->d[1], src1->i[1]);
39701e04c3fSmrg   dst->d[2] = ldexp(src0->d[2], src1->i[2]);
39801e04c3fSmrg   dst->d[3] = ldexp(src0->d[3], src1->i[3]);
39901e04c3fSmrg}
40001e04c3fSmrg
40101e04c3fSmrgstatic void
40201e04c3fSmrgmicro_dfracexp(union tgsi_double_channel *dst,
40301e04c3fSmrg               union tgsi_exec_channel *dst_exp,
40401e04c3fSmrg               const union tgsi_double_channel *src)
40501e04c3fSmrg{
40601e04c3fSmrg   dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
40701e04c3fSmrg   dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
40801e04c3fSmrg   dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
40901e04c3fSmrg   dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
41001e04c3fSmrg}
41101e04c3fSmrg
412cdc920a0Smrgstatic void
413cdc920a0Smrgmicro_exp2(union tgsi_exec_channel *dst,
414cdc920a0Smrg           const union tgsi_exec_channel *src)
415cdc920a0Smrg{
416cdc920a0Smrg#if DEBUG
417cdc920a0Smrg   /* Inf is okay for this instruction, so clamp it to silence assertions. */
418cdc920a0Smrg   uint i;
419cdc920a0Smrg   union tgsi_exec_channel clamped;
420cdc920a0Smrg
421cdc920a0Smrg   for (i = 0; i < 4; i++) {
422cdc920a0Smrg      if (src->f[i] > 127.99999f) {
423cdc920a0Smrg         clamped.f[i] = 127.99999f;
424cdc920a0Smrg      } else if (src->f[i] < -126.99999f) {
425cdc920a0Smrg         clamped.f[i] = -126.99999f;
426cdc920a0Smrg      } else {
427cdc920a0Smrg         clamped.f[i] = src->f[i];
428cdc920a0Smrg      }
429cdc920a0Smrg   }
430cdc920a0Smrg   src = &clamped;
431cdc920a0Smrg#endif /* DEBUG */
432cdc920a0Smrg
433cdc920a0Smrg   dst->f[0] = powf(2.0f, src->f[0]);
434cdc920a0Smrg   dst->f[1] = powf(2.0f, src->f[1]);
435cdc920a0Smrg   dst->f[2] = powf(2.0f, src->f[2]);
436cdc920a0Smrg   dst->f[3] = powf(2.0f, src->f[3]);
437cdc920a0Smrg}
438cdc920a0Smrg
43901e04c3fSmrgstatic void
44001e04c3fSmrgmicro_f2d(union tgsi_double_channel *dst,
44101e04c3fSmrg          const union tgsi_exec_channel *src)
44201e04c3fSmrg{
44301e04c3fSmrg   dst->d[0] = (double)src->f[0];
44401e04c3fSmrg   dst->d[1] = (double)src->f[1];
44501e04c3fSmrg   dst->d[2] = (double)src->f[2];
44601e04c3fSmrg   dst->d[3] = (double)src->f[3];
44701e04c3fSmrg}
44801e04c3fSmrg
449cdc920a0Smrgstatic void
450cdc920a0Smrgmicro_flr(union tgsi_exec_channel *dst,
451cdc920a0Smrg          const union tgsi_exec_channel *src)
452cdc920a0Smrg{
453cdc920a0Smrg   dst->f[0] = floorf(src->f[0]);
454cdc920a0Smrg   dst->f[1] = floorf(src->f[1]);
455cdc920a0Smrg   dst->f[2] = floorf(src->f[2]);
456cdc920a0Smrg   dst->f[3] = floorf(src->f[3]);
457cdc920a0Smrg}
458cdc920a0Smrg
459cdc920a0Smrgstatic void
460cdc920a0Smrgmicro_frc(union tgsi_exec_channel *dst,
461cdc920a0Smrg          const union tgsi_exec_channel *src)
462cdc920a0Smrg{
463cdc920a0Smrg   dst->f[0] = src->f[0] - floorf(src->f[0]);
464cdc920a0Smrg   dst->f[1] = src->f[1] - floorf(src->f[1]);
465cdc920a0Smrg   dst->f[2] = src->f[2] - floorf(src->f[2]);
466cdc920a0Smrg   dst->f[3] = src->f[3] - floorf(src->f[3]);
467cdc920a0Smrg}
468cdc920a0Smrg
46901e04c3fSmrgstatic void
47001e04c3fSmrgmicro_i2d(union tgsi_double_channel *dst,
47101e04c3fSmrg          const union tgsi_exec_channel *src)
47201e04c3fSmrg{
47301e04c3fSmrg   dst->d[0] = (double)src->i[0];
47401e04c3fSmrg   dst->d[1] = (double)src->i[1];
47501e04c3fSmrg   dst->d[2] = (double)src->i[2];
47601e04c3fSmrg   dst->d[3] = (double)src->i[3];
47701e04c3fSmrg}
47801e04c3fSmrg
479cdc920a0Smrgstatic void
480cdc920a0Smrgmicro_iabs(union tgsi_exec_channel *dst,
481cdc920a0Smrg           const union tgsi_exec_channel *src)
482cdc920a0Smrg{
483cdc920a0Smrg   dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
484cdc920a0Smrg   dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
485cdc920a0Smrg   dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
486cdc920a0Smrg   dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
487cdc920a0Smrg}
488cdc920a0Smrg
489cdc920a0Smrgstatic void
490cdc920a0Smrgmicro_ineg(union tgsi_exec_channel *dst,
491cdc920a0Smrg           const union tgsi_exec_channel *src)
492cdc920a0Smrg{
493cdc920a0Smrg   dst->i[0] = -src->i[0];
494cdc920a0Smrg   dst->i[1] = -src->i[1];
495cdc920a0Smrg   dst->i[2] = -src->i[2];
496cdc920a0Smrg   dst->i[3] = -src->i[3];
497cdc920a0Smrg}
498cdc920a0Smrg
499cdc920a0Smrgstatic void
500cdc920a0Smrgmicro_lg2(union tgsi_exec_channel *dst,
501cdc920a0Smrg          const union tgsi_exec_channel *src)
502cdc920a0Smrg{
503cdc920a0Smrg   dst->f[0] = logf(src->f[0]) * 1.442695f;
504cdc920a0Smrg   dst->f[1] = logf(src->f[1]) * 1.442695f;
505cdc920a0Smrg   dst->f[2] = logf(src->f[2]) * 1.442695f;
506cdc920a0Smrg   dst->f[3] = logf(src->f[3]) * 1.442695f;
507cdc920a0Smrg}
508cdc920a0Smrg
509cdc920a0Smrgstatic void
510cdc920a0Smrgmicro_lrp(union tgsi_exec_channel *dst,
511cdc920a0Smrg          const union tgsi_exec_channel *src0,
512cdc920a0Smrg          const union tgsi_exec_channel *src1,
513cdc920a0Smrg          const union tgsi_exec_channel *src2)
514cdc920a0Smrg{
515cdc920a0Smrg   dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
516cdc920a0Smrg   dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
517cdc920a0Smrg   dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
518cdc920a0Smrg   dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
519cdc920a0Smrg}
520cdc920a0Smrg
521cdc920a0Smrgstatic void
522cdc920a0Smrgmicro_mad(union tgsi_exec_channel *dst,
523cdc920a0Smrg          const union tgsi_exec_channel *src0,
524cdc920a0Smrg          const union tgsi_exec_channel *src1,
525cdc920a0Smrg          const union tgsi_exec_channel *src2)
526cdc920a0Smrg{
527cdc920a0Smrg   dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
528cdc920a0Smrg   dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
529cdc920a0Smrg   dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
530cdc920a0Smrg   dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
531cdc920a0Smrg}
532cdc920a0Smrg
533cdc920a0Smrgstatic void
534cdc920a0Smrgmicro_mov(union tgsi_exec_channel *dst,
535cdc920a0Smrg          const union tgsi_exec_channel *src)
536cdc920a0Smrg{
537cdc920a0Smrg   dst->u[0] = src->u[0];
538cdc920a0Smrg   dst->u[1] = src->u[1];
539cdc920a0Smrg   dst->u[2] = src->u[2];
540cdc920a0Smrg   dst->u[3] = src->u[3];
541cdc920a0Smrg}
542cdc920a0Smrg
543cdc920a0Smrgstatic void
544cdc920a0Smrgmicro_rcp(union tgsi_exec_channel *dst,
545cdc920a0Smrg          const union tgsi_exec_channel *src)
546cdc920a0Smrg{
547cdc920a0Smrg#if 0 /* for debugging */
548cdc920a0Smrg   assert(src->f[0] != 0.0f);
549cdc920a0Smrg   assert(src->f[1] != 0.0f);
550cdc920a0Smrg   assert(src->f[2] != 0.0f);
551cdc920a0Smrg   assert(src->f[3] != 0.0f);
552cdc920a0Smrg#endif
553cdc920a0Smrg   dst->f[0] = 1.0f / src->f[0];
554cdc920a0Smrg   dst->f[1] = 1.0f / src->f[1];
555cdc920a0Smrg   dst->f[2] = 1.0f / src->f[2];
556cdc920a0Smrg   dst->f[3] = 1.0f / src->f[3];
557cdc920a0Smrg}
558cdc920a0Smrg
559cdc920a0Smrgstatic void
560cdc920a0Smrgmicro_rnd(union tgsi_exec_channel *dst,
561cdc920a0Smrg          const union tgsi_exec_channel *src)
562cdc920a0Smrg{
56301e04c3fSmrg   dst->f[0] = _mesa_roundevenf(src->f[0]);
56401e04c3fSmrg   dst->f[1] = _mesa_roundevenf(src->f[1]);
56501e04c3fSmrg   dst->f[2] = _mesa_roundevenf(src->f[2]);
56601e04c3fSmrg   dst->f[3] = _mesa_roundevenf(src->f[3]);
567cdc920a0Smrg}
568cdc920a0Smrg
569cdc920a0Smrgstatic void
570cdc920a0Smrgmicro_rsq(union tgsi_exec_channel *dst,
571cdc920a0Smrg          const union tgsi_exec_channel *src)
572cdc920a0Smrg{
573cdc920a0Smrg#if 0 /* for debugging */
574cdc920a0Smrg   assert(src->f[0] != 0.0f);
575cdc920a0Smrg   assert(src->f[1] != 0.0f);
576cdc920a0Smrg   assert(src->f[2] != 0.0f);
577cdc920a0Smrg   assert(src->f[3] != 0.0f);
578cdc920a0Smrg#endif
579af69d88dSmrg   dst->f[0] = 1.0f / sqrtf(src->f[0]);
580af69d88dSmrg   dst->f[1] = 1.0f / sqrtf(src->f[1]);
581af69d88dSmrg   dst->f[2] = 1.0f / sqrtf(src->f[2]);
582af69d88dSmrg   dst->f[3] = 1.0f / sqrtf(src->f[3]);
583af69d88dSmrg}
584af69d88dSmrg
585af69d88dSmrgstatic void
586af69d88dSmrgmicro_sqrt(union tgsi_exec_channel *dst,
587af69d88dSmrg           const union tgsi_exec_channel *src)
588af69d88dSmrg{
589af69d88dSmrg   dst->f[0] = sqrtf(src->f[0]);
590af69d88dSmrg   dst->f[1] = sqrtf(src->f[1]);
591af69d88dSmrg   dst->f[2] = sqrtf(src->f[2]);
592af69d88dSmrg   dst->f[3] = sqrtf(src->f[3]);
593cdc920a0Smrg}
594cdc920a0Smrg
595cdc920a0Smrgstatic void
596cdc920a0Smrgmicro_seq(union tgsi_exec_channel *dst,
597cdc920a0Smrg          const union tgsi_exec_channel *src0,
598cdc920a0Smrg          const union tgsi_exec_channel *src1)
599cdc920a0Smrg{
600cdc920a0Smrg   dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
601cdc920a0Smrg   dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
602cdc920a0Smrg   dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
603cdc920a0Smrg   dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
604cdc920a0Smrg}
605cdc920a0Smrg
606cdc920a0Smrgstatic void
607cdc920a0Smrgmicro_sge(union tgsi_exec_channel *dst,
608cdc920a0Smrg          const union tgsi_exec_channel *src0,
609cdc920a0Smrg          const union tgsi_exec_channel *src1)
610cdc920a0Smrg{
611cdc920a0Smrg   dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
612cdc920a0Smrg   dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
613cdc920a0Smrg   dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
614cdc920a0Smrg   dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
615cdc920a0Smrg}
616cdc920a0Smrg
617cdc920a0Smrgstatic void
618cdc920a0Smrgmicro_sgn(union tgsi_exec_channel *dst,
619cdc920a0Smrg          const union tgsi_exec_channel *src)
620cdc920a0Smrg{
621cdc920a0Smrg   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
622cdc920a0Smrg   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
623cdc920a0Smrg   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
624cdc920a0Smrg   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
625cdc920a0Smrg}
626cdc920a0Smrg
627af69d88dSmrgstatic void
628af69d88dSmrgmicro_isgn(union tgsi_exec_channel *dst,
629af69d88dSmrg          const union tgsi_exec_channel *src)
630af69d88dSmrg{
631af69d88dSmrg   dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
632af69d88dSmrg   dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
633af69d88dSmrg   dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
634af69d88dSmrg   dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
635af69d88dSmrg}
636af69d88dSmrg
637cdc920a0Smrgstatic void
638cdc920a0Smrgmicro_sgt(union tgsi_exec_channel *dst,
639cdc920a0Smrg          const union tgsi_exec_channel *src0,
640cdc920a0Smrg          const union tgsi_exec_channel *src1)
641cdc920a0Smrg{
642cdc920a0Smrg   dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
643cdc920a0Smrg   dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
644cdc920a0Smrg   dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
645cdc920a0Smrg   dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
646cdc920a0Smrg}
647cdc920a0Smrg
648cdc920a0Smrgstatic void
649cdc920a0Smrgmicro_sin(union tgsi_exec_channel *dst,
650cdc920a0Smrg          const union tgsi_exec_channel *src)
651cdc920a0Smrg{
652cdc920a0Smrg   dst->f[0] = sinf(src->f[0]);
653cdc920a0Smrg   dst->f[1] = sinf(src->f[1]);
654cdc920a0Smrg   dst->f[2] = sinf(src->f[2]);
655cdc920a0Smrg   dst->f[3] = sinf(src->f[3]);
656cdc920a0Smrg}
657cdc920a0Smrg
658cdc920a0Smrgstatic void
659cdc920a0Smrgmicro_sle(union tgsi_exec_channel *dst,
660cdc920a0Smrg          const union tgsi_exec_channel *src0,
661cdc920a0Smrg          const union tgsi_exec_channel *src1)
662cdc920a0Smrg{
663cdc920a0Smrg   dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
664cdc920a0Smrg   dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
665cdc920a0Smrg   dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
666cdc920a0Smrg   dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
667cdc920a0Smrg}
668cdc920a0Smrg
669cdc920a0Smrgstatic void
670cdc920a0Smrgmicro_slt(union tgsi_exec_channel *dst,
671cdc920a0Smrg          const union tgsi_exec_channel *src0,
672cdc920a0Smrg          const union tgsi_exec_channel *src1)
673cdc920a0Smrg{
674cdc920a0Smrg   dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
675cdc920a0Smrg   dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
676cdc920a0Smrg   dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
677cdc920a0Smrg   dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
678cdc920a0Smrg}
679cdc920a0Smrg
680cdc920a0Smrgstatic void
681cdc920a0Smrgmicro_sne(union tgsi_exec_channel *dst,
682cdc920a0Smrg          const union tgsi_exec_channel *src0,
683cdc920a0Smrg          const union tgsi_exec_channel *src1)
684cdc920a0Smrg{
685cdc920a0Smrg   dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
686cdc920a0Smrg   dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
687cdc920a0Smrg   dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
688cdc920a0Smrg   dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
689cdc920a0Smrg}
690cdc920a0Smrg
6913464ebd5Sriastradhstatic void
69201e04c3fSmrgmicro_trunc(union tgsi_exec_channel *dst,
69301e04c3fSmrg            const union tgsi_exec_channel *src)
69401e04c3fSmrg{
69501e04c3fSmrg   dst->f[0] = truncf(src->f[0]);
69601e04c3fSmrg   dst->f[1] = truncf(src->f[1]);
69701e04c3fSmrg   dst->f[2] = truncf(src->f[2]);
69801e04c3fSmrg   dst->f[3] = truncf(src->f[3]);
69901e04c3fSmrg}
70001e04c3fSmrg
70101e04c3fSmrgstatic void
70201e04c3fSmrgmicro_u2d(union tgsi_double_channel *dst,
70301e04c3fSmrg          const union tgsi_exec_channel *src)
7043464ebd5Sriastradh{
70501e04c3fSmrg   dst->d[0] = (double)src->u[0];
70601e04c3fSmrg   dst->d[1] = (double)src->u[1];
70701e04c3fSmrg   dst->d[2] = (double)src->u[2];
70801e04c3fSmrg   dst->d[3] = (double)src->u[3];
7093464ebd5Sriastradh}
7103464ebd5Sriastradh
7113464ebd5Sriastradhstatic void
71201e04c3fSmrgmicro_i64abs(union tgsi_double_channel *dst,
71301e04c3fSmrg             const union tgsi_double_channel *src)
7143464ebd5Sriastradh{
71501e04c3fSmrg   dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
71601e04c3fSmrg   dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
71701e04c3fSmrg   dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
71801e04c3fSmrg   dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
7193464ebd5Sriastradh}
7203464ebd5Sriastradh
721cdc920a0Smrgstatic void
72201e04c3fSmrgmicro_i64sgn(union tgsi_double_channel *dst,
72301e04c3fSmrg             const union tgsi_double_channel *src)
72401e04c3fSmrg{
72501e04c3fSmrg   dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
72601e04c3fSmrg   dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
72701e04c3fSmrg   dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
72801e04c3fSmrg   dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
72901e04c3fSmrg}
73001e04c3fSmrg
73101e04c3fSmrgstatic void
73201e04c3fSmrgmicro_i64neg(union tgsi_double_channel *dst,
73301e04c3fSmrg             const union tgsi_double_channel *src)
73401e04c3fSmrg{
73501e04c3fSmrg   dst->i64[0] = -src->i64[0];
73601e04c3fSmrg   dst->i64[1] = -src->i64[1];
73701e04c3fSmrg   dst->i64[2] = -src->i64[2];
73801e04c3fSmrg   dst->i64[3] = -src->i64[3];
73901e04c3fSmrg}
74001e04c3fSmrg
74101e04c3fSmrgstatic void
74201e04c3fSmrgmicro_u64seq(union tgsi_double_channel *dst,
74301e04c3fSmrg           const union tgsi_double_channel *src)
74401e04c3fSmrg{
74501e04c3fSmrg   dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
74601e04c3fSmrg   dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
74701e04c3fSmrg   dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
74801e04c3fSmrg   dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
74901e04c3fSmrg}
75001e04c3fSmrg
75101e04c3fSmrgstatic void
75201e04c3fSmrgmicro_u64sne(union tgsi_double_channel *dst,
75301e04c3fSmrg             const union tgsi_double_channel *src)
75401e04c3fSmrg{
75501e04c3fSmrg   dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
75601e04c3fSmrg   dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
75701e04c3fSmrg   dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
75801e04c3fSmrg   dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
75901e04c3fSmrg}
76001e04c3fSmrg
76101e04c3fSmrgstatic void
76201e04c3fSmrgmicro_i64slt(union tgsi_double_channel *dst,
76301e04c3fSmrg             const union tgsi_double_channel *src)
76401e04c3fSmrg{
76501e04c3fSmrg   dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
76601e04c3fSmrg   dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
76701e04c3fSmrg   dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
76801e04c3fSmrg   dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
76901e04c3fSmrg}
77001e04c3fSmrg
77101e04c3fSmrgstatic void
77201e04c3fSmrgmicro_u64slt(union tgsi_double_channel *dst,
77301e04c3fSmrg             const union tgsi_double_channel *src)
77401e04c3fSmrg{
77501e04c3fSmrg   dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
77601e04c3fSmrg   dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
77701e04c3fSmrg   dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
77801e04c3fSmrg   dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
77901e04c3fSmrg}
78001e04c3fSmrg
78101e04c3fSmrgstatic void
78201e04c3fSmrgmicro_i64sge(union tgsi_double_channel *dst,
78301e04c3fSmrg           const union tgsi_double_channel *src)
78401e04c3fSmrg{
78501e04c3fSmrg   dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
78601e04c3fSmrg   dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
78701e04c3fSmrg   dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
78801e04c3fSmrg   dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
78901e04c3fSmrg}
79001e04c3fSmrg
79101e04c3fSmrgstatic void
79201e04c3fSmrgmicro_u64sge(union tgsi_double_channel *dst,
79301e04c3fSmrg             const union tgsi_double_channel *src)
79401e04c3fSmrg{
79501e04c3fSmrg   dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
79601e04c3fSmrg   dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
79701e04c3fSmrg   dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
79801e04c3fSmrg   dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
79901e04c3fSmrg}
80001e04c3fSmrg
80101e04c3fSmrgstatic void
80201e04c3fSmrgmicro_u64max(union tgsi_double_channel *dst,
80301e04c3fSmrg             const union tgsi_double_channel *src)
80401e04c3fSmrg{
80501e04c3fSmrg   dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
80601e04c3fSmrg   dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
80701e04c3fSmrg   dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
80801e04c3fSmrg   dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
80901e04c3fSmrg}
81001e04c3fSmrg
81101e04c3fSmrgstatic void
81201e04c3fSmrgmicro_i64max(union tgsi_double_channel *dst,
81301e04c3fSmrg             const union tgsi_double_channel *src)
81401e04c3fSmrg{
81501e04c3fSmrg   dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
81601e04c3fSmrg   dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
81701e04c3fSmrg   dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
81801e04c3fSmrg   dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
81901e04c3fSmrg}
82001e04c3fSmrg
82101e04c3fSmrgstatic void
82201e04c3fSmrgmicro_u64min(union tgsi_double_channel *dst,
82301e04c3fSmrg             const union tgsi_double_channel *src)
82401e04c3fSmrg{
82501e04c3fSmrg   dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
82601e04c3fSmrg   dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
82701e04c3fSmrg   dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
82801e04c3fSmrg   dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
82901e04c3fSmrg}
83001e04c3fSmrg
83101e04c3fSmrgstatic void
83201e04c3fSmrgmicro_i64min(union tgsi_double_channel *dst,
83301e04c3fSmrg             const union tgsi_double_channel *src)
83401e04c3fSmrg{
83501e04c3fSmrg   dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
83601e04c3fSmrg   dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
83701e04c3fSmrg   dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
83801e04c3fSmrg   dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
83901e04c3fSmrg}
84001e04c3fSmrg
84101e04c3fSmrgstatic void
84201e04c3fSmrgmicro_u64add(union tgsi_double_channel *dst,
84301e04c3fSmrg             const union tgsi_double_channel *src)
84401e04c3fSmrg{
84501e04c3fSmrg   dst->u64[0] = src[0].u64[0] + src[1].u64[0];
84601e04c3fSmrg   dst->u64[1] = src[0].u64[1] + src[1].u64[1];
84701e04c3fSmrg   dst->u64[2] = src[0].u64[2] + src[1].u64[2];
84801e04c3fSmrg   dst->u64[3] = src[0].u64[3] + src[1].u64[3];
84901e04c3fSmrg}
85001e04c3fSmrg
85101e04c3fSmrgstatic void
85201e04c3fSmrgmicro_u64mul(union tgsi_double_channel *dst,
85301e04c3fSmrg             const union tgsi_double_channel *src)
85401e04c3fSmrg{
85501e04c3fSmrg   dst->u64[0] = src[0].u64[0] * src[1].u64[0];
85601e04c3fSmrg   dst->u64[1] = src[0].u64[1] * src[1].u64[1];
85701e04c3fSmrg   dst->u64[2] = src[0].u64[2] * src[1].u64[2];
85801e04c3fSmrg   dst->u64[3] = src[0].u64[3] * src[1].u64[3];
85901e04c3fSmrg}
86001e04c3fSmrg
86101e04c3fSmrgstatic void
86201e04c3fSmrgmicro_u64div(union tgsi_double_channel *dst,
86301e04c3fSmrg             const union tgsi_double_channel *src)
86401e04c3fSmrg{
86501e04c3fSmrg   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
86601e04c3fSmrg   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
86701e04c3fSmrg   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
86801e04c3fSmrg   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
86901e04c3fSmrg}
87001e04c3fSmrg
87101e04c3fSmrgstatic void
87201e04c3fSmrgmicro_i64div(union tgsi_double_channel *dst,
87301e04c3fSmrg             const union tgsi_double_channel *src)
87401e04c3fSmrg{
87501e04c3fSmrg   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
87601e04c3fSmrg   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
87701e04c3fSmrg   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
87801e04c3fSmrg   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
87901e04c3fSmrg}
88001e04c3fSmrg
88101e04c3fSmrgstatic void
88201e04c3fSmrgmicro_u64mod(union tgsi_double_channel *dst,
88301e04c3fSmrg             const union tgsi_double_channel *src)
88401e04c3fSmrg{
88501e04c3fSmrg   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
88601e04c3fSmrg   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
88701e04c3fSmrg   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
88801e04c3fSmrg   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
88901e04c3fSmrg}
89001e04c3fSmrg
89101e04c3fSmrgstatic void
89201e04c3fSmrgmicro_i64mod(union tgsi_double_channel *dst,
89301e04c3fSmrg             const union tgsi_double_channel *src)
89401e04c3fSmrg{
89501e04c3fSmrg   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
89601e04c3fSmrg   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
89701e04c3fSmrg   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
89801e04c3fSmrg   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
89901e04c3fSmrg}
90001e04c3fSmrg
90101e04c3fSmrgstatic void
90201e04c3fSmrgmicro_u64shl(union tgsi_double_channel *dst,
90301e04c3fSmrg             const union tgsi_double_channel *src0,
90401e04c3fSmrg             union tgsi_exec_channel *src1)
90501e04c3fSmrg{
90601e04c3fSmrg   unsigned masked_count;
90701e04c3fSmrg   masked_count = src1->u[0] & 0x3f;
90801e04c3fSmrg   dst->u64[0] = src0->u64[0] << masked_count;
90901e04c3fSmrg   masked_count = src1->u[1] & 0x3f;
91001e04c3fSmrg   dst->u64[1] = src0->u64[1] << masked_count;
91101e04c3fSmrg   masked_count = src1->u[2] & 0x3f;
91201e04c3fSmrg   dst->u64[2] = src0->u64[2] << masked_count;
91301e04c3fSmrg   masked_count = src1->u[3] & 0x3f;
91401e04c3fSmrg   dst->u64[3] = src0->u64[3] << masked_count;
91501e04c3fSmrg}
91601e04c3fSmrg
91701e04c3fSmrgstatic void
91801e04c3fSmrgmicro_i64shr(union tgsi_double_channel *dst,
91901e04c3fSmrg             const union tgsi_double_channel *src0,
92001e04c3fSmrg             union tgsi_exec_channel *src1)
921cdc920a0Smrg{
92201e04c3fSmrg   unsigned masked_count;
92301e04c3fSmrg   masked_count = src1->u[0] & 0x3f;
92401e04c3fSmrg   dst->i64[0] = src0->i64[0] >> masked_count;
92501e04c3fSmrg   masked_count = src1->u[1] & 0x3f;
92601e04c3fSmrg   dst->i64[1] = src0->i64[1] >> masked_count;
92701e04c3fSmrg   masked_count = src1->u[2] & 0x3f;
92801e04c3fSmrg   dst->i64[2] = src0->i64[2] >> masked_count;
92901e04c3fSmrg   masked_count = src1->u[3] & 0x3f;
93001e04c3fSmrg   dst->i64[3] = src0->i64[3] >> masked_count;
931cdc920a0Smrg}
932cdc920a0Smrg
93301e04c3fSmrgstatic void
93401e04c3fSmrgmicro_u64shr(union tgsi_double_channel *dst,
93501e04c3fSmrg             const union tgsi_double_channel *src0,
93601e04c3fSmrg             union tgsi_exec_channel *src1)
93701e04c3fSmrg{
93801e04c3fSmrg   unsigned masked_count;
93901e04c3fSmrg   masked_count = src1->u[0] & 0x3f;
94001e04c3fSmrg   dst->u64[0] = src0->u64[0] >> masked_count;
94101e04c3fSmrg   masked_count = src1->u[1] & 0x3f;
94201e04c3fSmrg   dst->u64[1] = src0->u64[1] >> masked_count;
94301e04c3fSmrg   masked_count = src1->u[2] & 0x3f;
94401e04c3fSmrg   dst->u64[2] = src0->u64[2] >> masked_count;
94501e04c3fSmrg   masked_count = src1->u[3] & 0x3f;
94601e04c3fSmrg   dst->u64[3] = src0->u64[3] >> masked_count;
94701e04c3fSmrg}
948cdc920a0Smrg
949cdc920a0Smrgenum tgsi_exec_datatype {
950cdc920a0Smrg   TGSI_EXEC_DATA_FLOAT,
951cdc920a0Smrg   TGSI_EXEC_DATA_INT,
95201e04c3fSmrg   TGSI_EXEC_DATA_UINT,
95301e04c3fSmrg   TGSI_EXEC_DATA_DOUBLE,
95401e04c3fSmrg   TGSI_EXEC_DATA_INT64,
95501e04c3fSmrg   TGSI_EXEC_DATA_UINT64,
956cdc920a0Smrg};
957cdc920a0Smrg
9584a49301eSmrg/** The execution mask depends on the conditional mask and the loop mask */
9594a49301eSmrg#define UPDATE_EXEC_MASK(MACH) \
960cdc920a0Smrg      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
9614a49301eSmrg
9624a49301eSmrg
9634a49301eSmrgstatic const union tgsi_exec_channel ZeroVec =
9644a49301eSmrg   { { 0.0, 0.0, 0.0, 0.0 } };
9654a49301eSmrg
966cdc920a0Smrgstatic const union tgsi_exec_channel OneVec = {
967cdc920a0Smrg   {1.0f, 1.0f, 1.0f, 1.0f}
968cdc920a0Smrg};
969cdc920a0Smrg
9703464ebd5Sriastradhstatic const union tgsi_exec_channel P128Vec = {
9713464ebd5Sriastradh   {128.0f, 128.0f, 128.0f, 128.0f}
9723464ebd5Sriastradh};
9733464ebd5Sriastradh
9743464ebd5Sriastradhstatic const union tgsi_exec_channel M128Vec = {
9753464ebd5Sriastradh   {-128.0f, -128.0f, -128.0f, -128.0f}
9763464ebd5Sriastradh};
9773464ebd5Sriastradh
9784a49301eSmrg
979cdc920a0Smrg/**
980cdc920a0Smrg * Assert that none of the float values in 'chan' are infinite or NaN.
981cdc920a0Smrg * NaN and Inf may occur normally during program execution and should
982cdc920a0Smrg * not lead to crashes, etc.  But when debugging, it's helpful to catch
983cdc920a0Smrg * them.
984cdc920a0Smrg */
98501e04c3fSmrgstatic inline void
9864a49301eSmrgcheck_inf_or_nan(const union tgsi_exec_channel *chan)
9874a49301eSmrg{
988cdc920a0Smrg   assert(!util_is_inf_or_nan((chan)->f[0]));
989cdc920a0Smrg   assert(!util_is_inf_or_nan((chan)->f[1]));
990cdc920a0Smrg   assert(!util_is_inf_or_nan((chan)->f[2]));
991cdc920a0Smrg   assert(!util_is_inf_or_nan((chan)->f[3]));
9924a49301eSmrg}
9934a49301eSmrg
9944a49301eSmrg
9954a49301eSmrg#ifdef DEBUG
9964a49301eSmrgstatic void
9974a49301eSmrgprint_chan(const char *msg, const union tgsi_exec_channel *chan)
9984a49301eSmrg{
9994a49301eSmrg   debug_printf("%s = {%f, %f, %f, %f}\n",
10004a49301eSmrg                msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
10014a49301eSmrg}
10024a49301eSmrg#endif
10034a49301eSmrg
10044a49301eSmrg
10054a49301eSmrg#ifdef DEBUG
10064a49301eSmrgstatic void
10074a49301eSmrgprint_temp(const struct tgsi_exec_machine *mach, uint index)
10084a49301eSmrg{
10094a49301eSmrg   const struct tgsi_exec_vector *tmp = &mach->Temps[index];
10104a49301eSmrg   int i;
10114a49301eSmrg   debug_printf("Temp[%u] =\n", index);
10124a49301eSmrg   for (i = 0; i < 4; i++) {
10134a49301eSmrg      debug_printf("  %c: { %f, %f, %f, %f }\n",
10144a49301eSmrg                   "XYZW"[i],
10154a49301eSmrg                   tmp->xyzw[i].f[0],
10164a49301eSmrg                   tmp->xyzw[i].f[1],
10174a49301eSmrg                   tmp->xyzw[i].f[2],
10184a49301eSmrg                   tmp->xyzw[i].f[3]);
10194a49301eSmrg   }
10204a49301eSmrg}
10214a49301eSmrg#endif
10224a49301eSmrg
10234a49301eSmrg
10243464ebd5Sriastradhvoid
10253464ebd5Sriastradhtgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
10263464ebd5Sriastradh                               unsigned num_bufs,
10273464ebd5Sriastradh                               const void **bufs,
10283464ebd5Sriastradh                               const unsigned *buf_sizes)
10293464ebd5Sriastradh{
10303464ebd5Sriastradh   unsigned i;
10313464ebd5Sriastradh
10323464ebd5Sriastradh   for (i = 0; i < num_bufs; i++) {
10333464ebd5Sriastradh      mach->Consts[i] = bufs[i];
10343464ebd5Sriastradh      mach->ConstsSize[i] = buf_sizes[i];
10353464ebd5Sriastradh   }
10363464ebd5Sriastradh}
10373464ebd5Sriastradh
10384a49301eSmrg/**
10394a49301eSmrg * Initialize machine state by expanding tokens to full instructions,
10404a49301eSmrg * allocating temporary storage, setting up constants, etc.
10414a49301eSmrg * After this, we can call tgsi_exec_machine_run() many times.
10424a49301eSmrg */
10434a49301eSmrgvoid
10444a49301eSmrgtgsi_exec_machine_bind_shader(
10454a49301eSmrg   struct tgsi_exec_machine *mach,
10464a49301eSmrg   const struct tgsi_token *tokens,
104701e04c3fSmrg   struct tgsi_sampler *sampler,
104801e04c3fSmrg   struct tgsi_image *image,
104901e04c3fSmrg   struct tgsi_buffer *buffer)
10504a49301eSmrg{
10514a49301eSmrg   uint k;
10524a49301eSmrg   struct tgsi_parse_context parse;
10534a49301eSmrg   struct tgsi_full_instruction *instructions;
10544a49301eSmrg   struct tgsi_full_declaration *declarations;
10554a49301eSmrg   uint maxInstructions = 10, numInstructions = 0;
10564a49301eSmrg   uint maxDeclarations = 10, numDeclarations = 0;
10574a49301eSmrg
10584a49301eSmrg#if 0
10594a49301eSmrg   tgsi_dump(tokens, 0);
10604a49301eSmrg#endif
10614a49301eSmrg
10624a49301eSmrg   mach->Tokens = tokens;
1063af69d88dSmrg   mach->Sampler = sampler;
106401e04c3fSmrg   mach->Image = image;
106501e04c3fSmrg   mach->Buffer = buffer;
10664a49301eSmrg
10673464ebd5Sriastradh   if (!tokens) {
10683464ebd5Sriastradh      /* unbind and free all */
1069af69d88dSmrg      FREE(mach->Declarations);
10703464ebd5Sriastradh      mach->Declarations = NULL;
10713464ebd5Sriastradh      mach->NumDeclarations = 0;
10723464ebd5Sriastradh
1073af69d88dSmrg      FREE(mach->Instructions);
10743464ebd5Sriastradh      mach->Instructions = NULL;
10753464ebd5Sriastradh      mach->NumInstructions = 0;
10763464ebd5Sriastradh
10773464ebd5Sriastradh      return;
10783464ebd5Sriastradh   }
10793464ebd5Sriastradh
10804a49301eSmrg   k = tgsi_parse_init (&parse, mach->Tokens);
10814a49301eSmrg   if (k != TGSI_PARSE_OK) {
10824a49301eSmrg      debug_printf( "Problem parsing!\n" );
10834a49301eSmrg      return;
10844a49301eSmrg   }
10854a49301eSmrg
10864a49301eSmrg   mach->ImmLimit = 0;
1087af69d88dSmrg   mach->NumOutputs = 0;
10883464ebd5Sriastradh
108901e04c3fSmrg   for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
109001e04c3fSmrg      mach->SysSemanticToIndex[k] = -1;
109101e04c3fSmrg
109201e04c3fSmrg   if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
10933464ebd5Sriastradh       !mach->UsedGeometryShader) {
1094af69d88dSmrg      struct tgsi_exec_vector *inputs;
1095af69d88dSmrg      struct tgsi_exec_vector *outputs;
1096af69d88dSmrg
1097af69d88dSmrg      inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1098af69d88dSmrg                            TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1099af69d88dSmrg                            16);
11003464ebd5Sriastradh
11013464ebd5Sriastradh      if (!inputs)
11023464ebd5Sriastradh         return;
1103af69d88dSmrg
1104af69d88dSmrg      outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1105af69d88dSmrg                             TGSI_MAX_TOTAL_VERTICES, 16);
1106af69d88dSmrg
11073464ebd5Sriastradh      if (!outputs) {
11083464ebd5Sriastradh         align_free(inputs);
11093464ebd5Sriastradh         return;
11103464ebd5Sriastradh      }
11113464ebd5Sriastradh
11123464ebd5Sriastradh      align_free(mach->Inputs);
11133464ebd5Sriastradh      align_free(mach->Outputs);
11143464ebd5Sriastradh
11153464ebd5Sriastradh      mach->Inputs = inputs;
11163464ebd5Sriastradh      mach->Outputs = outputs;
11173464ebd5Sriastradh      mach->UsedGeometryShader = TRUE;
11183464ebd5Sriastradh   }
11194a49301eSmrg
11204a49301eSmrg   declarations = (struct tgsi_full_declaration *)
11214a49301eSmrg      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
11224a49301eSmrg
11234a49301eSmrg   if (!declarations) {
11244a49301eSmrg      return;
11254a49301eSmrg   }
11264a49301eSmrg
11274a49301eSmrg   instructions = (struct tgsi_full_instruction *)
11284a49301eSmrg      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
11294a49301eSmrg
11304a49301eSmrg   if (!instructions) {
11314a49301eSmrg      FREE( declarations );
11324a49301eSmrg      return;
11334a49301eSmrg   }
11344a49301eSmrg
11354a49301eSmrg   while( !tgsi_parse_end_of_tokens( &parse ) ) {
11364a49301eSmrg      uint i;
11374a49301eSmrg
11384a49301eSmrg      tgsi_parse_token( &parse );
11394a49301eSmrg      switch( parse.FullToken.Token.Type ) {
11404a49301eSmrg      case TGSI_TOKEN_TYPE_DECLARATION:
11414a49301eSmrg         /* save expanded declaration */
11424a49301eSmrg         if (numDeclarations == maxDeclarations) {
11434a49301eSmrg            declarations = REALLOC(declarations,
11444a49301eSmrg                                   maxDeclarations
11454a49301eSmrg                                   * sizeof(struct tgsi_full_declaration),
11464a49301eSmrg                                   (maxDeclarations + 10)
11474a49301eSmrg                                   * sizeof(struct tgsi_full_declaration));
11484a49301eSmrg            maxDeclarations += 10;
11494a49301eSmrg         }
11507ec681f3Smrg         if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
11517ec681f3Smrg            mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
115201e04c3fSmrg         else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
115301e04c3fSmrg            const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
115401e04c3fSmrg            mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
115501e04c3fSmrg         }
115601e04c3fSmrg
11574a49301eSmrg         memcpy(declarations + numDeclarations,
11584a49301eSmrg                &parse.FullToken.FullDeclaration,
11594a49301eSmrg                sizeof(declarations[0]));
11604a49301eSmrg         numDeclarations++;
11614a49301eSmrg         break;
11624a49301eSmrg
11634a49301eSmrg      case TGSI_TOKEN_TYPE_IMMEDIATE:
11644a49301eSmrg         {
11654a49301eSmrg            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
11664a49301eSmrg            assert( size <= 4 );
116701e04c3fSmrg            if (mach->ImmLimit >= mach->ImmsReserved) {
116801e04c3fSmrg               unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
116901e04c3fSmrg               float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
117001e04c3fSmrg               if (imms) {
117101e04c3fSmrg                  mach->ImmsReserved = newReserved;
117201e04c3fSmrg                  mach->Imms = imms;
117301e04c3fSmrg               } else {
117401e04c3fSmrg                  debug_printf("Unable to (re)allocate space for immidiate constants\n");
117501e04c3fSmrg                  break;
117601e04c3fSmrg               }
117701e04c3fSmrg            }
11784a49301eSmrg
11794a49301eSmrg            for( i = 0; i < size; i++ ) {
11804a49301eSmrg               mach->Imms[mach->ImmLimit][i] =
11814a49301eSmrg		  parse.FullToken.FullImmediate.u[i].Float;
11824a49301eSmrg            }
11834a49301eSmrg            mach->ImmLimit += 1;
11844a49301eSmrg         }
11854a49301eSmrg         break;
11864a49301eSmrg
11874a49301eSmrg      case TGSI_TOKEN_TYPE_INSTRUCTION:
11884a49301eSmrg
11894a49301eSmrg         /* save expanded instruction */
11904a49301eSmrg         if (numInstructions == maxInstructions) {
11914a49301eSmrg            instructions = REALLOC(instructions,
11924a49301eSmrg                                   maxInstructions
11934a49301eSmrg                                   * sizeof(struct tgsi_full_instruction),
11944a49301eSmrg                                   (maxInstructions + 10)
11954a49301eSmrg                                   * sizeof(struct tgsi_full_instruction));
11964a49301eSmrg            maxInstructions += 10;
11974a49301eSmrg         }
11984a49301eSmrg
11994a49301eSmrg         memcpy(instructions + numInstructions,
12004a49301eSmrg                &parse.FullToken.FullInstruction,
12014a49301eSmrg                sizeof(instructions[0]));
12024a49301eSmrg
12034a49301eSmrg         numInstructions++;
12044a49301eSmrg         break;
12054a49301eSmrg
1206cdc920a0Smrg      case TGSI_TOKEN_TYPE_PROPERTY:
120701e04c3fSmrg         if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1208af69d88dSmrg            if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1209af69d88dSmrg               mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1210af69d88dSmrg            }
1211af69d88dSmrg         }
1212cdc920a0Smrg         break;
1213cdc920a0Smrg
12144a49301eSmrg      default:
12154a49301eSmrg         assert( 0 );
12164a49301eSmrg      }
12174a49301eSmrg   }
12184a49301eSmrg   tgsi_parse_free (&parse);
12194a49301eSmrg
1220af69d88dSmrg   FREE(mach->Declarations);
12214a49301eSmrg   mach->Declarations = declarations;
12224a49301eSmrg   mach->NumDeclarations = numDeclarations;
12234a49301eSmrg
1224af69d88dSmrg   FREE(mach->Instructions);
12254a49301eSmrg   mach->Instructions = instructions;
12264a49301eSmrg   mach->NumInstructions = numInstructions;
12274a49301eSmrg}
12284a49301eSmrg
12294a49301eSmrg
12304a49301eSmrgstruct tgsi_exec_machine *
123101e04c3fSmrgtgsi_exec_machine_create(enum pipe_shader_type shader_type)
12324a49301eSmrg{
12334a49301eSmrg   struct tgsi_exec_machine *mach;
12344a49301eSmrg
12354a49301eSmrg   mach = align_malloc( sizeof *mach, 16 );
12364a49301eSmrg   if (!mach)
12374a49301eSmrg      goto fail;
12384a49301eSmrg
12394a49301eSmrg   memset(mach, 0, sizeof(*mach));
12404a49301eSmrg
124101e04c3fSmrg   mach->ShaderType = shader_type;
12424a49301eSmrg
124301e04c3fSmrg   if (shader_type != PIPE_SHADER_COMPUTE) {
124401e04c3fSmrg      mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
124501e04c3fSmrg      mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
124601e04c3fSmrg      if (!mach->Inputs || !mach->Outputs)
124701e04c3fSmrg         goto fail;
124801e04c3fSmrg   }
12493464ebd5Sriastradh
1250361fc4cbSmaya   if (shader_type == PIPE_SHADER_FRAGMENT) {
1251361fc4cbSmaya      mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1252361fc4cbSmaya      if (!mach->InputSampleOffsetApply)
1253361fc4cbSmaya         goto fail;
1254361fc4cbSmaya   }
1255361fc4cbSmaya
12564a49301eSmrg#ifdef DEBUG
12574a49301eSmrg   /* silence warnings */
12584a49301eSmrg   (void) print_chan;
12594a49301eSmrg   (void) print_temp;
12604a49301eSmrg#endif
12614a49301eSmrg
12624a49301eSmrg   return mach;
12634a49301eSmrg
12644a49301eSmrgfail:
12653464ebd5Sriastradh   if (mach) {
1266361fc4cbSmaya      align_free(mach->InputSampleOffsetApply);
12673464ebd5Sriastradh      align_free(mach->Inputs);
12683464ebd5Sriastradh      align_free(mach->Outputs);
12693464ebd5Sriastradh      align_free(mach);
12703464ebd5Sriastradh   }
12714a49301eSmrg   return NULL;
12724a49301eSmrg}
12734a49301eSmrg
12744a49301eSmrg
12754a49301eSmrgvoid
12764a49301eSmrgtgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
12774a49301eSmrg{
12784a49301eSmrg   if (mach) {
1279af69d88dSmrg      FREE(mach->Instructions);
1280af69d88dSmrg      FREE(mach->Declarations);
128101e04c3fSmrg      FREE(mach->Imms);
12824a49301eSmrg
1283361fc4cbSmaya      align_free(mach->InputSampleOffsetApply);
12843464ebd5Sriastradh      align_free(mach->Inputs);
12853464ebd5Sriastradh      align_free(mach->Outputs);
12863464ebd5Sriastradh
12873464ebd5Sriastradh      align_free(mach);
12883464ebd5Sriastradh   }
12894a49301eSmrg}
12904a49301eSmrg
12914a49301eSmrgstatic void
1292cdc920a0Smrgmicro_add(union tgsi_exec_channel *dst,
1293cdc920a0Smrg          const union tgsi_exec_channel *src0,
1294cdc920a0Smrg          const union tgsi_exec_channel *src1)
12954a49301eSmrg{
12964a49301eSmrg   dst->f[0] = src0->f[0] + src1->f[0];
12974a49301eSmrg   dst->f[1] = src0->f[1] + src1->f[1];
12984a49301eSmrg   dst->f[2] = src0->f[2] + src1->f[2];
12994a49301eSmrg   dst->f[3] = src0->f[3] + src1->f[3];
13004a49301eSmrg}
13014a49301eSmrg
13024a49301eSmrgstatic void
1303cdc920a0Smrgmicro_div(
13044a49301eSmrg   union tgsi_exec_channel *dst,
13054a49301eSmrg   const union tgsi_exec_channel *src0,
13064a49301eSmrg   const union tgsi_exec_channel *src1 )
13074a49301eSmrg{
1308cdc920a0Smrg   if (src1->f[0] != 0) {
1309cdc920a0Smrg      dst->f[0] = src0->f[0] / src1->f[0];
1310cdc920a0Smrg   }
1311cdc920a0Smrg   if (src1->f[1] != 0) {
1312cdc920a0Smrg      dst->f[1] = src0->f[1] / src1->f[1];
1313cdc920a0Smrg   }
1314cdc920a0Smrg   if (src1->f[2] != 0) {
1315cdc920a0Smrg      dst->f[2] = src0->f[2] / src1->f[2];
1316cdc920a0Smrg   }
1317cdc920a0Smrg   if (src1->f[3] != 0) {
1318cdc920a0Smrg      dst->f[3] = src0->f[3] / src1->f[3];
1319cdc920a0Smrg   }
13204a49301eSmrg}
13214a49301eSmrg
13224a49301eSmrgstatic void
1323cdc920a0Smrgmicro_lt(
13244a49301eSmrg   union tgsi_exec_channel *dst,
1325cdc920a0Smrg   const union tgsi_exec_channel *src0,
1326cdc920a0Smrg   const union tgsi_exec_channel *src1,
1327cdc920a0Smrg   const union tgsi_exec_channel *src2,
1328cdc920a0Smrg   const union tgsi_exec_channel *src3 )
13294a49301eSmrg{
1330cdc920a0Smrg   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1331cdc920a0Smrg   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1332cdc920a0Smrg   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1333cdc920a0Smrg   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
13344a49301eSmrg}
13354a49301eSmrg
13364a49301eSmrgstatic void
1337cdc920a0Smrgmicro_max(union tgsi_exec_channel *dst,
1338cdc920a0Smrg          const union tgsi_exec_channel *src0,
1339cdc920a0Smrg          const union tgsi_exec_channel *src1)
13404a49301eSmrg{
13417ec681f3Smrg   dst->f[0] = fmaxf(src0->f[0], src1->f[0]);
13427ec681f3Smrg   dst->f[1] = fmaxf(src0->f[1], src1->f[1]);
13437ec681f3Smrg   dst->f[2] = fmaxf(src0->f[2], src1->f[2]);
13447ec681f3Smrg   dst->f[3] = fmaxf(src0->f[3], src1->f[3]);
13454a49301eSmrg}
13464a49301eSmrg
13474a49301eSmrgstatic void
1348cdc920a0Smrgmicro_min(union tgsi_exec_channel *dst,
1349cdc920a0Smrg          const union tgsi_exec_channel *src0,
1350cdc920a0Smrg          const union tgsi_exec_channel *src1)
13514a49301eSmrg{
13527ec681f3Smrg   dst->f[0] = fminf(src0->f[0], src1->f[0]);
13537ec681f3Smrg   dst->f[1] = fminf(src0->f[1], src1->f[1]);
13547ec681f3Smrg   dst->f[2] = fminf(src0->f[2], src1->f[2]);
13557ec681f3Smrg   dst->f[3] = fminf(src0->f[3], src1->f[3]);
13564a49301eSmrg}
13574a49301eSmrg
13584a49301eSmrgstatic void
1359cdc920a0Smrgmicro_mul(union tgsi_exec_channel *dst,
1360cdc920a0Smrg          const union tgsi_exec_channel *src0,
1361cdc920a0Smrg          const union tgsi_exec_channel *src1)
13624a49301eSmrg{
1363cdc920a0Smrg   dst->f[0] = src0->f[0] * src1->f[0];
1364cdc920a0Smrg   dst->f[1] = src0->f[1] * src1->f[1];
1365cdc920a0Smrg   dst->f[2] = src0->f[2] * src1->f[2];
1366cdc920a0Smrg   dst->f[3] = src0->f[3] * src1->f[3];
13674a49301eSmrg}
13684a49301eSmrg
13694a49301eSmrgstatic void
1370cdc920a0Smrgmicro_neg(
13714a49301eSmrg   union tgsi_exec_channel *dst,
1372cdc920a0Smrg   const union tgsi_exec_channel *src )
13734a49301eSmrg{
1374cdc920a0Smrg   dst->f[0] = -src->f[0];
1375cdc920a0Smrg   dst->f[1] = -src->f[1];
1376cdc920a0Smrg   dst->f[2] = -src->f[2];
1377cdc920a0Smrg   dst->f[3] = -src->f[3];
13784a49301eSmrg}
13794a49301eSmrg
13804a49301eSmrgstatic void
1381cdc920a0Smrgmicro_pow(
13824a49301eSmrg   union tgsi_exec_channel *dst,
1383cdc920a0Smrg   const union tgsi_exec_channel *src0,
1384cdc920a0Smrg   const union tgsi_exec_channel *src1 )
13854a49301eSmrg{
1386cdc920a0Smrg   dst->f[0] = powf( src0->f[0], src1->f[0] );
1387cdc920a0Smrg   dst->f[1] = powf( src0->f[1], src1->f[1] );
1388cdc920a0Smrg   dst->f[2] = powf( src0->f[2], src1->f[2] );
1389cdc920a0Smrg   dst->f[3] = powf( src0->f[3], src1->f[3] );
13904a49301eSmrg}
13914a49301eSmrg
139201e04c3fSmrgstatic void
139301e04c3fSmrgmicro_ldexp(union tgsi_exec_channel *dst,
139401e04c3fSmrg            const union tgsi_exec_channel *src0,
139501e04c3fSmrg            const union tgsi_exec_channel *src1)
139601e04c3fSmrg{
139701e04c3fSmrg   dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
139801e04c3fSmrg   dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
139901e04c3fSmrg   dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
140001e04c3fSmrg   dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
140101e04c3fSmrg}
140201e04c3fSmrg
14034a49301eSmrgstatic void
1404cdc920a0Smrgmicro_sub(union tgsi_exec_channel *dst,
1405cdc920a0Smrg          const union tgsi_exec_channel *src0,
1406cdc920a0Smrg          const union tgsi_exec_channel *src1)
14074a49301eSmrg{
1408cdc920a0Smrg   dst->f[0] = src0->f[0] - src1->f[0];
1409cdc920a0Smrg   dst->f[1] = src0->f[1] - src1->f[1];
1410cdc920a0Smrg   dst->f[2] = src0->f[2] - src1->f[2];
1411cdc920a0Smrg   dst->f[3] = src0->f[3] - src1->f[3];
14124a49301eSmrg}
14134a49301eSmrg
14144a49301eSmrgstatic void
1415cdc920a0Smrgfetch_src_file_channel(const struct tgsi_exec_machine *mach,
1416cdc920a0Smrg                       const uint file,
1417cdc920a0Smrg                       const uint swizzle,
1418cdc920a0Smrg                       const union tgsi_exec_channel *index,
1419cdc920a0Smrg                       const union tgsi_exec_channel *index2D,
1420cdc920a0Smrg                       union tgsi_exec_channel *chan)
14214a49301eSmrg{
14224a49301eSmrg   uint i;
14234a49301eSmrg
14243464ebd5Sriastradh   assert(swizzle < 4);
14253464ebd5Sriastradh
1426cdc920a0Smrg   switch (file) {
1427cdc920a0Smrg   case TGSI_FILE_CONSTANT:
1428af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
14297ec681f3Smrg         /* NOTE: copying the const value as a uint instead of float */
14307ec681f3Smrg         const uint constbuf = index2D->i[i];
14317ec681f3Smrg         const unsigned pos = index->i[i] * 4 + swizzle;
14327ec681f3Smrg         /* const buffer bounds check */
14337ec681f3Smrg         if (pos >= mach->ConstsSize[constbuf] / 4) {
14347ec681f3Smrg            if (0) {
14357ec681f3Smrg               /* Debug: print warning */
14367ec681f3Smrg               static int count = 0;
14377ec681f3Smrg               if (count++ < 100)
14387ec681f3Smrg                  debug_printf("TGSI Exec: const buffer index %d"
14397ec681f3Smrg                                 " out of bounds\n", pos);
14407ec681f3Smrg            }
1441cdc920a0Smrg            chan->u[i] = 0;
1442cdc920a0Smrg         } else {
14433464ebd5Sriastradh            const uint *buf = (const uint *)mach->Consts[constbuf];
14447ec681f3Smrg            chan->u[i] = buf[pos];
1445cdc920a0Smrg         }
14464a49301eSmrg      }
1447cdc920a0Smrg      break;
14484a49301eSmrg
1449cdc920a0Smrg   case TGSI_FILE_INPUT:
1450af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
14513464ebd5Sriastradh         /*
145201e04c3fSmrg         if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
14533464ebd5Sriastradh            debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
14543464ebd5Sriastradh                         index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
14553464ebd5Sriastradh                         index2D->i[i], index->i[i]);
14563464ebd5Sriastradh                         }*/
14573464ebd5Sriastradh         int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
14583464ebd5Sriastradh         assert(pos >= 0);
14593464ebd5Sriastradh         assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
14603464ebd5Sriastradh         chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
14613464ebd5Sriastradh      }
14623464ebd5Sriastradh      break;
14633464ebd5Sriastradh
1464cdc920a0Smrg   case TGSI_FILE_SYSTEM_VALUE:
1465af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
146601e04c3fSmrg         chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1467cdc920a0Smrg      }
1468cdc920a0Smrg      break;
14694a49301eSmrg
1470cdc920a0Smrg   case TGSI_FILE_TEMPORARY:
1471af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1472cdc920a0Smrg         assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1473cdc920a0Smrg         assert(index2D->i[i] == 0);
14744a49301eSmrg
1475cdc920a0Smrg         chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1476cdc920a0Smrg      }
1477cdc920a0Smrg      break;
14784a49301eSmrg
1479cdc920a0Smrg   case TGSI_FILE_IMMEDIATE:
1480af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1481cdc920a0Smrg         assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1482cdc920a0Smrg         assert(index2D->i[i] == 0);
14834a49301eSmrg
1484cdc920a0Smrg         chan->f[i] = mach->Imms[index->i[i]][swizzle];
1485cdc920a0Smrg      }
1486cdc920a0Smrg      break;
14874a49301eSmrg
1488cdc920a0Smrg   case TGSI_FILE_ADDRESS:
1489af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
14907ec681f3Smrg         assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs));
1491cdc920a0Smrg         assert(index2D->i[i] == 0);
14924a49301eSmrg
1493cdc920a0Smrg         chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1494cdc920a0Smrg      }
1495cdc920a0Smrg      break;
14964a49301eSmrg
1497cdc920a0Smrg   case TGSI_FILE_OUTPUT:
1498cdc920a0Smrg      /* vertex/fragment output vars can be read too */
1499af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1500cdc920a0Smrg         assert(index->i[i] >= 0);
1501cdc920a0Smrg         assert(index2D->i[i] == 0);
15024a49301eSmrg
1503cdc920a0Smrg         chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1504cdc920a0Smrg      }
1505cdc920a0Smrg      break;
15064a49301eSmrg
1507cdc920a0Smrg   default:
1508cdc920a0Smrg      assert(0);
1509af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1510cdc920a0Smrg         chan->u[i] = 0;
1511cdc920a0Smrg      }
1512cdc920a0Smrg   }
15134a49301eSmrg}
15144a49301eSmrg
15154a49301eSmrgstatic void
1516361fc4cbSmayaget_index_registers(const struct tgsi_exec_machine *mach,
1517361fc4cbSmaya                    const struct tgsi_full_src_register *reg,
1518361fc4cbSmaya                    union tgsi_exec_channel *index,
1519361fc4cbSmaya                    union tgsi_exec_channel *index2D)
15204a49301eSmrg{
1521cdc920a0Smrg   uint swizzle;
15224a49301eSmrg
1523cdc920a0Smrg   /* We start with a direct index into a register file.
1524cdc920a0Smrg    *
1525cdc920a0Smrg    *    file[1],
1526cdc920a0Smrg    *    where:
1527cdc920a0Smrg    *       file = Register.File
1528cdc920a0Smrg    *       [1] = Register.Index
1529cdc920a0Smrg    */
1530361fc4cbSmaya   index->i[0] =
1531361fc4cbSmaya   index->i[1] =
1532361fc4cbSmaya   index->i[2] =
1533361fc4cbSmaya   index->i[3] = reg->Register.Index;
15344a49301eSmrg
1535cdc920a0Smrg   /* There is an extra source register that indirectly subscripts
1536cdc920a0Smrg    * a register file. The direct index now becomes an offset
1537cdc920a0Smrg    * that is being added to the indirect register.
1538cdc920a0Smrg    *
1539cdc920a0Smrg    *    file[ind[2].x+1],
1540cdc920a0Smrg    *    where:
1541cdc920a0Smrg    *       ind = Indirect.File
1542cdc920a0Smrg    *       [2] = Indirect.Index
1543cdc920a0Smrg    *       .x = Indirect.SwizzleX
1544cdc920a0Smrg    */
1545cdc920a0Smrg   if (reg->Register.Indirect) {
1546cdc920a0Smrg      union tgsi_exec_channel index2;
1547cdc920a0Smrg      union tgsi_exec_channel indir_index;
1548cdc920a0Smrg      const uint execmask = mach->ExecMask;
1549cdc920a0Smrg      uint i;
15504a49301eSmrg
1551cdc920a0Smrg      /* which address register (always zero now) */
1552cdc920a0Smrg      index2.i[0] =
1553cdc920a0Smrg      index2.i[1] =
1554cdc920a0Smrg      index2.i[2] =
1555cdc920a0Smrg      index2.i[3] = reg->Indirect.Index;
1556cdc920a0Smrg      /* get current value of address register[swizzle] */
1557af69d88dSmrg      swizzle = reg->Indirect.Swizzle;
1558cdc920a0Smrg      fetch_src_file_channel(mach,
1559cdc920a0Smrg                             reg->Indirect.File,
1560cdc920a0Smrg                             swizzle,
1561cdc920a0Smrg                             &index2,
1562cdc920a0Smrg                             &ZeroVec,
1563cdc920a0Smrg                             &indir_index);
1564cdc920a0Smrg
1565cdc920a0Smrg      /* add value of address register to the offset */
1566361fc4cbSmaya      index->i[0] += indir_index.i[0];
1567361fc4cbSmaya      index->i[1] += indir_index.i[1];
1568361fc4cbSmaya      index->i[2] += indir_index.i[2];
1569361fc4cbSmaya      index->i[3] += indir_index.i[3];
1570cdc920a0Smrg
1571cdc920a0Smrg      /* for disabled execution channels, zero-out the index to
1572cdc920a0Smrg       * avoid using a potential garbage value.
1573cdc920a0Smrg       */
1574af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1575cdc920a0Smrg         if ((execmask & (1 << i)) == 0)
1576361fc4cbSmaya            index->i[i] = 0;
1577cdc920a0Smrg      }
1578cdc920a0Smrg   }
1579cdc920a0Smrg
1580cdc920a0Smrg   /* There is an extra source register that is a second
1581cdc920a0Smrg    * subscript to a register file. Effectively it means that
1582cdc920a0Smrg    * the register file is actually a 2D array of registers.
1583cdc920a0Smrg    *
1584cdc920a0Smrg    *    file[3][1],
1585cdc920a0Smrg    *    where:
1586cdc920a0Smrg    *       [3] = Dimension.Index
1587cdc920a0Smrg    */
1588cdc920a0Smrg   if (reg->Register.Dimension) {
1589361fc4cbSmaya      index2D->i[0] =
1590361fc4cbSmaya      index2D->i[1] =
1591361fc4cbSmaya      index2D->i[2] =
1592361fc4cbSmaya      index2D->i[3] = reg->Dimension.Index;
1593cdc920a0Smrg
1594cdc920a0Smrg      /* Again, the second subscript index can be addressed indirectly
1595cdc920a0Smrg       * identically to the first one.
1596cdc920a0Smrg       * Nothing stops us from indirectly addressing the indirect register,
1597cdc920a0Smrg       * but there is no need for that, so we won't exercise it.
1598cdc920a0Smrg       *
1599cdc920a0Smrg       *    file[ind[4].y+3][1],
1600cdc920a0Smrg       *    where:
1601cdc920a0Smrg       *       ind = DimIndirect.File
1602cdc920a0Smrg       *       [4] = DimIndirect.Index
1603cdc920a0Smrg       *       .y = DimIndirect.SwizzleX
1604cdc920a0Smrg       */
1605cdc920a0Smrg      if (reg->Dimension.Indirect) {
1606cdc920a0Smrg         union tgsi_exec_channel index2;
1607cdc920a0Smrg         union tgsi_exec_channel indir_index;
1608cdc920a0Smrg         const uint execmask = mach->ExecMask;
1609cdc920a0Smrg         uint i;
1610cdc920a0Smrg
1611cdc920a0Smrg         index2.i[0] =
1612cdc920a0Smrg         index2.i[1] =
1613cdc920a0Smrg         index2.i[2] =
1614cdc920a0Smrg         index2.i[3] = reg->DimIndirect.Index;
1615cdc920a0Smrg
1616af69d88dSmrg         swizzle = reg->DimIndirect.Swizzle;
1617cdc920a0Smrg         fetch_src_file_channel(mach,
1618cdc920a0Smrg                                reg->DimIndirect.File,
1619cdc920a0Smrg                                swizzle,
1620cdc920a0Smrg                                &index2,
1621cdc920a0Smrg                                &ZeroVec,
1622cdc920a0Smrg                                &indir_index);
1623cdc920a0Smrg
1624361fc4cbSmaya         index2D->i[0] += indir_index.i[0];
1625361fc4cbSmaya         index2D->i[1] += indir_index.i[1];
1626361fc4cbSmaya         index2D->i[2] += indir_index.i[2];
1627361fc4cbSmaya         index2D->i[3] += indir_index.i[3];
1628cdc920a0Smrg
1629cdc920a0Smrg         /* for disabled execution channels, zero-out the index to
1630cdc920a0Smrg          * avoid using a potential garbage value.
1631cdc920a0Smrg          */
1632af69d88dSmrg         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1633cdc920a0Smrg            if ((execmask & (1 << i)) == 0) {
1634361fc4cbSmaya               index2D->i[i] = 0;
1635cdc920a0Smrg            }
1636cdc920a0Smrg         }
1637cdc920a0Smrg      }
1638cdc920a0Smrg
1639cdc920a0Smrg      /* If by any chance there was a need for a 3D array of register
1640cdc920a0Smrg       * files, we would have to check whether Dimension is followed
1641cdc920a0Smrg       * by a dimension register and continue the saga.
1642cdc920a0Smrg       */
1643cdc920a0Smrg   } else {
1644361fc4cbSmaya      index2D->i[0] =
1645361fc4cbSmaya      index2D->i[1] =
1646361fc4cbSmaya      index2D->i[2] =
1647361fc4cbSmaya      index2D->i[3] = 0;
1648cdc920a0Smrg   }
1649361fc4cbSmaya}
1650361fc4cbSmaya
1651361fc4cbSmaya
1652361fc4cbSmayastatic void
1653361fc4cbSmayafetch_source_d(const struct tgsi_exec_machine *mach,
1654361fc4cbSmaya               union tgsi_exec_channel *chan,
1655361fc4cbSmaya               const struct tgsi_full_src_register *reg,
1656361fc4cbSmaya	       const uint chan_index)
1657361fc4cbSmaya{
1658361fc4cbSmaya   union tgsi_exec_channel index;
1659361fc4cbSmaya   union tgsi_exec_channel index2D;
1660361fc4cbSmaya   uint swizzle;
1661361fc4cbSmaya
1662361fc4cbSmaya   get_index_registers(mach, reg, &index, &index2D);
1663361fc4cbSmaya
1664cdc920a0Smrg
1665cdc920a0Smrg   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1666cdc920a0Smrg   fetch_src_file_channel(mach,
1667cdc920a0Smrg                          reg->Register.File,
1668cdc920a0Smrg                          swizzle,
1669cdc920a0Smrg                          &index,
1670cdc920a0Smrg                          &index2D,
1671cdc920a0Smrg                          chan);
167201e04c3fSmrg}
167301e04c3fSmrg
167401e04c3fSmrgstatic void
167501e04c3fSmrgfetch_source(const struct tgsi_exec_machine *mach,
167601e04c3fSmrg             union tgsi_exec_channel *chan,
167701e04c3fSmrg             const struct tgsi_full_src_register *reg,
167801e04c3fSmrg             const uint chan_index,
167901e04c3fSmrg             enum tgsi_exec_datatype src_datatype)
168001e04c3fSmrg{
168101e04c3fSmrg   fetch_source_d(mach, chan, reg, chan_index);
1682cdc920a0Smrg
1683cdc920a0Smrg   if (reg->Register.Absolute) {
16847ec681f3Smrg      assert(src_datatype == TGSI_EXEC_DATA_FLOAT);
16857ec681f3Smrg      micro_abs(chan, chan);
1686cdc920a0Smrg   }
1687cdc920a0Smrg
1688cdc920a0Smrg   if (reg->Register.Negate) {
1689cdc920a0Smrg      if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1690cdc920a0Smrg         micro_neg(chan, chan);
1691cdc920a0Smrg      } else {
1692cdc920a0Smrg         micro_ineg(chan, chan);
1693cdc920a0Smrg      }
1694cdc920a0Smrg   }
16954a49301eSmrg}
16964a49301eSmrg
169701e04c3fSmrgstatic union tgsi_exec_channel *
169801e04c3fSmrgstore_dest_dstret(struct tgsi_exec_machine *mach,
169901e04c3fSmrg                 const union tgsi_exec_channel *chan,
170001e04c3fSmrg                 const struct tgsi_full_dst_register *reg,
17017ec681f3Smrg                 uint chan_index)
17024a49301eSmrg{
170301e04c3fSmrg   static union tgsi_exec_channel null;
1704cdc920a0Smrg   union tgsi_exec_channel *dst;
1705cdc920a0Smrg   int offset = 0;  /* indirection offset */
1706cdc920a0Smrg   int index;
1707cdc920a0Smrg
1708cdc920a0Smrg
1709cdc920a0Smrg   /* There is an extra source register that indirectly subscripts
1710cdc920a0Smrg    * a register file. The direct index now becomes an offset
1711cdc920a0Smrg    * that is being added to the indirect register.
1712cdc920a0Smrg    *
1713cdc920a0Smrg    *    file[ind[2].x+1],
1714cdc920a0Smrg    *    where:
1715cdc920a0Smrg    *       ind = Indirect.File
1716cdc920a0Smrg    *       [2] = Indirect.Index
1717cdc920a0Smrg    *       .x = Indirect.SwizzleX
1718cdc920a0Smrg    */
1719cdc920a0Smrg   if (reg->Register.Indirect) {
1720cdc920a0Smrg      union tgsi_exec_channel index;
1721cdc920a0Smrg      union tgsi_exec_channel indir_index;
1722cdc920a0Smrg      uint swizzle;
1723cdc920a0Smrg
1724cdc920a0Smrg      /* which address register (always zero for now) */
1725cdc920a0Smrg      index.i[0] =
1726cdc920a0Smrg      index.i[1] =
1727cdc920a0Smrg      index.i[2] =
1728cdc920a0Smrg      index.i[3] = reg->Indirect.Index;
1729cdc920a0Smrg
1730cdc920a0Smrg      /* get current value of address register[swizzle] */
1731af69d88dSmrg      swizzle = reg->Indirect.Swizzle;
1732cdc920a0Smrg
1733cdc920a0Smrg      /* fetch values from the address/indirection register */
1734cdc920a0Smrg      fetch_src_file_channel(mach,
1735cdc920a0Smrg                             reg->Indirect.File,
1736cdc920a0Smrg                             swizzle,
1737cdc920a0Smrg                             &index,
1738cdc920a0Smrg                             &ZeroVec,
1739cdc920a0Smrg                             &indir_index);
1740cdc920a0Smrg
1741cdc920a0Smrg      /* save indirection offset */
1742cdc920a0Smrg      offset = indir_index.i[0];
1743cdc920a0Smrg   }
1744cdc920a0Smrg
1745cdc920a0Smrg   switch (reg->Register.File) {
1746cdc920a0Smrg   case TGSI_FILE_NULL:
1747cdc920a0Smrg      dst = &null;
1748cdc920a0Smrg      break;
17494a49301eSmrg
1750cdc920a0Smrg   case TGSI_FILE_OUTPUT:
17517ec681f3Smrg      index = mach->OutputVertexOffset + reg->Register.Index;
1752cdc920a0Smrg      dst = &mach->Outputs[offset + index].xyzw[chan_index];
17534a49301eSmrg#if 0
1754af69d88dSmrg      debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1755af69d88dSmrg                   mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1756af69d88dSmrg                   reg->Register.Index);
175701e04c3fSmrg      if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1758af69d88dSmrg         debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1759af69d88dSmrg         for (i = 0; i < TGSI_QUAD_SIZE; i++)
1760cdc920a0Smrg            if (execmask & (1 << i))
1761af69d88dSmrg               debug_printf("%f, ", chan->f[i]);
1762af69d88dSmrg         debug_printf(")\n");
1763cdc920a0Smrg      }
17644a49301eSmrg#endif
1765cdc920a0Smrg      break;
17664a49301eSmrg
1767cdc920a0Smrg   case TGSI_FILE_TEMPORARY:
1768cdc920a0Smrg      index = reg->Register.Index;
1769cdc920a0Smrg      assert( index < TGSI_EXEC_NUM_TEMPS );
1770cdc920a0Smrg      dst = &mach->Temps[offset + index].xyzw[chan_index];
1771cdc920a0Smrg      break;
1772cdc920a0Smrg
17733464ebd5Sriastradh   case TGSI_FILE_ADDRESS:
17743464ebd5Sriastradh      index = reg->Register.Index;
17757ec681f3Smrg      assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs));
17763464ebd5Sriastradh      dst = &mach->Addrs[index].xyzw[chan_index];
1777cdc920a0Smrg      break;
1778cdc920a0Smrg
1779cdc920a0Smrg   default:
17807ec681f3Smrg      unreachable("Bad destination file");
1781cdc920a0Smrg   }
1782cdc920a0Smrg
178301e04c3fSmrg   return dst;
178401e04c3fSmrg}
1785cdc920a0Smrg
178601e04c3fSmrgstatic void
178701e04c3fSmrgstore_dest_double(struct tgsi_exec_machine *mach,
178801e04c3fSmrg                 const union tgsi_exec_channel *chan,
178901e04c3fSmrg                 const struct tgsi_full_dst_register *reg,
17907ec681f3Smrg                 uint chan_index)
179101e04c3fSmrg{
179201e04c3fSmrg   union tgsi_exec_channel *dst;
179301e04c3fSmrg   const uint execmask = mach->ExecMask;
179401e04c3fSmrg   int i;
1795cdc920a0Smrg
17967ec681f3Smrg   dst = store_dest_dstret(mach, chan, reg, chan_index);
179701e04c3fSmrg   if (!dst)
179801e04c3fSmrg      return;
1799cdc920a0Smrg
180001e04c3fSmrg   /* doubles path */
180101e04c3fSmrg   for (i = 0; i < TGSI_QUAD_SIZE; i++)
180201e04c3fSmrg      if (execmask & (1 << i))
180301e04c3fSmrg         dst->i[i] = chan->i[i];
180401e04c3fSmrg}
1805cdc920a0Smrg
180601e04c3fSmrgstatic void
180701e04c3fSmrgstore_dest(struct tgsi_exec_machine *mach,
180801e04c3fSmrg           const union tgsi_exec_channel *chan,
180901e04c3fSmrg           const struct tgsi_full_dst_register *reg,
181001e04c3fSmrg           const struct tgsi_full_instruction *inst,
18117ec681f3Smrg           uint chan_index)
181201e04c3fSmrg{
181301e04c3fSmrg   union tgsi_exec_channel *dst;
181401e04c3fSmrg   const uint execmask = mach->ExecMask;
181501e04c3fSmrg   int i;
181601e04c3fSmrg
18177ec681f3Smrg   dst = store_dest_dstret(mach, chan, reg, chan_index);
181801e04c3fSmrg   if (!dst)
181901e04c3fSmrg      return;
1820cdc920a0Smrg
182101e04c3fSmrg   if (!inst->Instruction.Saturate) {
1822af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1823cdc920a0Smrg         if (execmask & (1 << i))
1824cdc920a0Smrg            dst->i[i] = chan->i[i];
182501e04c3fSmrg   }
182601e04c3fSmrg   else {
1827af69d88dSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++)
18287ec681f3Smrg         if (execmask & (1 << i))
18297ec681f3Smrg            dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f);
1830cdc920a0Smrg   }
18314a49301eSmrg}
18324a49301eSmrg
1833cdc920a0Smrg#define FETCH(VAL,INDEX,CHAN)\
1834cdc920a0Smrg    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
18354a49301eSmrg
1836af69d88dSmrg#define IFETCH(VAL,INDEX,CHAN)\
1837af69d88dSmrg    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1838af69d88dSmrg
1839cdc920a0Smrg
1840cdc920a0Smrg/**
1841cdc920a0Smrg * Execute ARB-style KIL which is predicated by a src register.
1842cdc920a0Smrg * Kill fragment if any of the four values is less than zero.
1843cdc920a0Smrg */
18444a49301eSmrgstatic void
1845af69d88dSmrgexec_kill_if(struct tgsi_exec_machine *mach,
1846af69d88dSmrg             const struct tgsi_full_instruction *inst)
18474a49301eSmrg{
1848cdc920a0Smrg   uint uniquemask;
1849cdc920a0Smrg   uint chan_index;
1850cdc920a0Smrg   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1851cdc920a0Smrg   union tgsi_exec_channel r[1];
1852cdc920a0Smrg
1853cdc920a0Smrg   /* This mask stores component bits that were already tested. */
1854cdc920a0Smrg   uniquemask = 0;
1855cdc920a0Smrg
1856cdc920a0Smrg   for (chan_index = 0; chan_index < 4; chan_index++)
1857cdc920a0Smrg   {
1858cdc920a0Smrg      uint swizzle;
1859cdc920a0Smrg      uint i;
1860cdc920a0Smrg
1861cdc920a0Smrg      /* unswizzle channel */
1862cdc920a0Smrg      swizzle = tgsi_util_get_full_src_register_swizzle (
1863cdc920a0Smrg                        &inst->Src[0],
1864cdc920a0Smrg                        chan_index);
1865cdc920a0Smrg
1866cdc920a0Smrg      /* check if the component has not been already tested */
1867cdc920a0Smrg      if (uniquemask & (1 << swizzle))
1868cdc920a0Smrg         continue;
1869cdc920a0Smrg      uniquemask |= 1 << swizzle;
18704a49301eSmrg
1871cdc920a0Smrg      FETCH(&r[0], 0, chan_index);
1872cdc920a0Smrg      for (i = 0; i < 4; i++)
1873cdc920a0Smrg         if (r[0].f[i] < 0.0f)
1874cdc920a0Smrg            kilmask |= 1 << i;
1875cdc920a0Smrg   }
18764a49301eSmrg
1877af69d88dSmrg   /* restrict to fragments currently executing */
1878af69d88dSmrg   kilmask &= mach->ExecMask;
1879af69d88dSmrg
18807ec681f3Smrg   mach->KillMask |= kilmask;
18814a49301eSmrg}
18824a49301eSmrg
1883cdc920a0Smrg/**
1884af69d88dSmrg * Unconditional fragment kill/discard.
1885cdc920a0Smrg */
18864a49301eSmrgstatic void
188701e04c3fSmrgexec_kill(struct tgsi_exec_machine *mach)
18884a49301eSmrg{
18897ec681f3Smrg   /* kill fragment for all fragments currently executing.
18907ec681f3Smrg    * bit 0 = pixel 0, bit 1 = pixel 1, etc.
18917ec681f3Smrg    */
18927ec681f3Smrg   mach->KillMask |= mach->ExecMask;
18934a49301eSmrg}
18944a49301eSmrg
18954a49301eSmrgstatic void
1896361fc4cbSmayaemit_vertex(struct tgsi_exec_machine *mach,
1897361fc4cbSmaya            const struct tgsi_full_instruction *inst)
18984a49301eSmrg{
1899361fc4cbSmaya   union tgsi_exec_channel r[1];
1900361fc4cbSmaya   unsigned stream_id;
19017ec681f3Smrg   unsigned prim_count;
1902cdc920a0Smrg   /* FIXME: check for exec mask correctly
1903cdc920a0Smrg   unsigned i;
1904af69d88dSmrg   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1905cdc920a0Smrg         if ((mach->ExecMask & (1 << i)))
1906cdc920a0Smrg   */
1907361fc4cbSmaya   IFETCH(&r[0], 0, TGSI_CHAN_X);
1908361fc4cbSmaya   stream_id = r[0].u[0];
19097ec681f3Smrg   prim_count = mach->OutputPrimCount[stream_id];
1910cdc920a0Smrg   if (mach->ExecMask) {
19117ec681f3Smrg      if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices)
1912af69d88dSmrg         return;
1913af69d88dSmrg
19147ec681f3Smrg      if (mach->Primitives[stream_id][prim_count] == 0)
19157ec681f3Smrg         mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset;
19167ec681f3Smrg      mach->OutputVertexOffset += mach->NumOutputs;
19177ec681f3Smrg      mach->Primitives[stream_id][prim_count]++;
1918cdc920a0Smrg   }
19194a49301eSmrg}
19204a49301eSmrg
19214a49301eSmrgstatic void
1922361fc4cbSmayaemit_primitive(struct tgsi_exec_machine *mach,
1923361fc4cbSmaya               const struct tgsi_full_instruction *inst)
19244a49301eSmrg{
1925361fc4cbSmaya   unsigned *prim_count;
1926361fc4cbSmaya   union tgsi_exec_channel r[1];
1927361fc4cbSmaya   unsigned stream_id = 0;
1928cdc920a0Smrg   /* FIXME: check for exec mask correctly
1929cdc920a0Smrg   unsigned i;
1930af69d88dSmrg   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1931cdc920a0Smrg         if ((mach->ExecMask & (1 << i)))
1932cdc920a0Smrg   */
1933361fc4cbSmaya   if (inst) {
1934361fc4cbSmaya      IFETCH(&r[0], 0, TGSI_CHAN_X);
1935361fc4cbSmaya      stream_id = r[0].u[0];
1936361fc4cbSmaya   }
19377ec681f3Smrg   prim_count = &mach->OutputPrimCount[stream_id];
1938cdc920a0Smrg   if (mach->ExecMask) {
1939cdc920a0Smrg      ++(*prim_count);
19407ec681f3Smrg      debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES);
1941361fc4cbSmaya      mach->Primitives[stream_id][*prim_count] = 0;
1942cdc920a0Smrg   }
19434a49301eSmrg}
19444a49301eSmrg
19453464ebd5Sriastradhstatic void
19463464ebd5Sriastradhconditional_emit_primitive(struct tgsi_exec_machine *mach)
19473464ebd5Sriastradh{
194801e04c3fSmrg   if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
19497ec681f3Smrg      int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]];
19503464ebd5Sriastradh      if (emitted_verts) {
1951361fc4cbSmaya         emit_primitive(mach, NULL);
19523464ebd5Sriastradh      }
19533464ebd5Sriastradh   }
19543464ebd5Sriastradh}
19553464ebd5Sriastradh
19563464ebd5Sriastradh
1957cdc920a0Smrg/*
1958cdc920a0Smrg * Fetch four texture samples using STR texture coordinates.
1959cdc920a0Smrg */
19604a49301eSmrgstatic void
1961cdc920a0Smrgfetch_texel( struct tgsi_sampler *sampler,
1962af69d88dSmrg             const unsigned sview_idx,
1963af69d88dSmrg             const unsigned sampler_idx,
1964cdc920a0Smrg             const union tgsi_exec_channel *s,
1965cdc920a0Smrg             const union tgsi_exec_channel *t,
1966cdc920a0Smrg             const union tgsi_exec_channel *p,
1967cdc920a0Smrg             const union tgsi_exec_channel *c0,
1968af69d88dSmrg             const union tgsi_exec_channel *c1,
1969af69d88dSmrg             float derivs[3][2][TGSI_QUAD_SIZE],
1970af69d88dSmrg             const int8_t offset[3],
1971cdc920a0Smrg             enum tgsi_sampler_control control,
1972cdc920a0Smrg             union tgsi_exec_channel *r,
1973cdc920a0Smrg             union tgsi_exec_channel *g,
1974cdc920a0Smrg             union tgsi_exec_channel *b,
1975cdc920a0Smrg             union tgsi_exec_channel *a )
19764a49301eSmrg{
1977cdc920a0Smrg   uint j;
1978af69d88dSmrg   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
19794a49301eSmrg
1980af69d88dSmrg   /* FIXME: handle explicit derivs, offsets */
1981af69d88dSmrg   sampler->get_samples(sampler, sview_idx, sampler_idx,
1982af69d88dSmrg                        s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
19834a49301eSmrg
1984cdc920a0Smrg   for (j = 0; j < 4; j++) {
1985cdc920a0Smrg      r->f[j] = rgba[0][j];
1986cdc920a0Smrg      g->f[j] = rgba[1][j];
1987cdc920a0Smrg      b->f[j] = rgba[2][j];
1988cdc920a0Smrg      a->f[j] = rgba[3][j];
1989cdc920a0Smrg   }
19904a49301eSmrg}
19914a49301eSmrg
19924a49301eSmrg
1993cdc920a0Smrg#define TEX_MODIFIER_NONE           0
1994cdc920a0Smrg#define TEX_MODIFIER_PROJECTED      1
1995cdc920a0Smrg#define TEX_MODIFIER_LOD_BIAS       2
1996cdc920a0Smrg#define TEX_MODIFIER_EXPLICIT_LOD   3
1997af69d88dSmrg#define TEX_MODIFIER_LEVEL_ZERO     4
199801e04c3fSmrg#define TEX_MODIFIER_GATHER         5
1999af69d88dSmrg
2000af69d88dSmrg/*
2001af69d88dSmrg * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2002af69d88dSmrg */
2003af69d88dSmrgstatic void
2004af69d88dSmrgfetch_texel_offsets(struct tgsi_exec_machine *mach,
2005af69d88dSmrg                    const struct tgsi_full_instruction *inst,
2006af69d88dSmrg                    int8_t offsets[3])
2007af69d88dSmrg{
2008af69d88dSmrg   if (inst->Texture.NumOffsets == 1) {
2009af69d88dSmrg      union tgsi_exec_channel index;
2010af69d88dSmrg      union tgsi_exec_channel offset[3];
2011af69d88dSmrg      index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
201201e04c3fSmrg      fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2013af69d88dSmrg                             inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
201401e04c3fSmrg      fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2015af69d88dSmrg                             inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
201601e04c3fSmrg      fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2017af69d88dSmrg                             inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2018af69d88dSmrg     offsets[0] = offset[0].i[0];
2019af69d88dSmrg     offsets[1] = offset[1].i[0];
2020af69d88dSmrg     offsets[2] = offset[2].i[0];
2021af69d88dSmrg   } else {
2022af69d88dSmrg     assert(inst->Texture.NumOffsets == 0);
2023af69d88dSmrg     offsets[0] = offsets[1] = offsets[2] = 0;
2024af69d88dSmrg   }
2025af69d88dSmrg}
2026af69d88dSmrg
2027af69d88dSmrg
2028af69d88dSmrg/*
2029af69d88dSmrg * Fetch dx and dy values for one channel (s, t or r).
2030af69d88dSmrg * Put dx values into one float array, dy values into another.
2031af69d88dSmrg */
2032af69d88dSmrgstatic void
2033af69d88dSmrgfetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2034af69d88dSmrg                           const struct tgsi_full_instruction *inst,
2035af69d88dSmrg                           unsigned regdsrcx,
2036af69d88dSmrg                           unsigned chan,
2037af69d88dSmrg                           float derivs[2][TGSI_QUAD_SIZE])
2038af69d88dSmrg{
2039af69d88dSmrg   union tgsi_exec_channel d;
2040af69d88dSmrg   FETCH(&d, regdsrcx, chan);
2041af69d88dSmrg   derivs[0][0] = d.f[0];
2042af69d88dSmrg   derivs[0][1] = d.f[1];
2043af69d88dSmrg   derivs[0][2] = d.f[2];
2044af69d88dSmrg   derivs[0][3] = d.f[3];
2045af69d88dSmrg   FETCH(&d, regdsrcx + 1, chan);
2046af69d88dSmrg   derivs[1][0] = d.f[0];
2047af69d88dSmrg   derivs[1][1] = d.f[1];
2048af69d88dSmrg   derivs[1][2] = d.f[2];
2049af69d88dSmrg   derivs[1][3] = d.f[3];
2050af69d88dSmrg}
2051cdc920a0Smrg
205201e04c3fSmrgstatic uint
205301e04c3fSmrgfetch_sampler_unit(struct tgsi_exec_machine *mach,
205401e04c3fSmrg                   const struct tgsi_full_instruction *inst,
205501e04c3fSmrg                   uint sampler)
205601e04c3fSmrg{
205701e04c3fSmrg   uint unit = 0;
205801e04c3fSmrg   int i;
205901e04c3fSmrg   if (inst->Src[sampler].Register.Indirect) {
206001e04c3fSmrg      const struct tgsi_full_src_register *reg = &inst->Src[sampler];
206101e04c3fSmrg      union tgsi_exec_channel indir_index, index2;
206201e04c3fSmrg      const uint execmask = mach->ExecMask;
206301e04c3fSmrg      index2.i[0] =
206401e04c3fSmrg      index2.i[1] =
206501e04c3fSmrg      index2.i[2] =
206601e04c3fSmrg      index2.i[3] = reg->Indirect.Index;
206701e04c3fSmrg
206801e04c3fSmrg      fetch_src_file_channel(mach,
206901e04c3fSmrg                             reg->Indirect.File,
207001e04c3fSmrg                             reg->Indirect.Swizzle,
207101e04c3fSmrg                             &index2,
207201e04c3fSmrg                             &ZeroVec,
207301e04c3fSmrg                             &indir_index);
207401e04c3fSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
207501e04c3fSmrg         if (execmask & (1 << i)) {
207601e04c3fSmrg            unit = inst->Src[sampler].Register.Index + indir_index.i[i];
207701e04c3fSmrg            break;
207801e04c3fSmrg         }
207901e04c3fSmrg      }
208001e04c3fSmrg
208101e04c3fSmrg   } else {
208201e04c3fSmrg      unit = inst->Src[sampler].Register.Index;
208301e04c3fSmrg   }
208401e04c3fSmrg   return unit;
208501e04c3fSmrg}
2086cdc920a0Smrg
2087af69d88dSmrg/*
2088af69d88dSmrg * execute a texture instruction.
2089af69d88dSmrg *
209001e04c3fSmrg * modifier is used to control the channel routing for the
2091af69d88dSmrg * instruction variants like proj, lod, and texture with lod bias.
2092af69d88dSmrg * sampler indicates which src register the sampler is contained in.
2093af69d88dSmrg */
20944a49301eSmrgstatic void
2095cdc920a0Smrgexec_tex(struct tgsi_exec_machine *mach,
2096cdc920a0Smrg         const struct tgsi_full_instruction *inst,
2097af69d88dSmrg         uint modifier, uint sampler)
20984a49301eSmrg{
2099af69d88dSmrg   const union tgsi_exec_channel *args[5], *proj = NULL;
2100af69d88dSmrg   union tgsi_exec_channel r[5];
210101e04c3fSmrg   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
21023464ebd5Sriastradh   uint chan;
210301e04c3fSmrg   uint unit;
2104af69d88dSmrg   int8_t offsets[3];
2105af69d88dSmrg   int dim, shadow_ref, i;
2106cdc920a0Smrg
210701e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, sampler);
2108af69d88dSmrg   /* always fetch all 3 offsets, overkill but keeps code simple */
2109af69d88dSmrg   fetch_texel_offsets(mach, inst, offsets);
2110cdc920a0Smrg
2111af69d88dSmrg   assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2112af69d88dSmrg   assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2113cdc920a0Smrg
211401e04c3fSmrg   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
211501e04c3fSmrg   shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2116cdc920a0Smrg
2117af69d88dSmrg   assert(dim <= 4);
2118af69d88dSmrg   if (shadow_ref >= 0)
211901e04c3fSmrg      assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2120cdc920a0Smrg
2121af69d88dSmrg   /* fetch modifier to the last argument */
2122af69d88dSmrg   if (modifier != TEX_MODIFIER_NONE) {
212301e04c3fSmrg      const int last = ARRAY_SIZE(args) - 1;
2124cdc920a0Smrg
2125af69d88dSmrg      /* fetch modifier from src0.w or src1.x */
2126af69d88dSmrg      if (sampler == 1) {
2127af69d88dSmrg         assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2128af69d88dSmrg         FETCH(&r[last], 0, TGSI_CHAN_W);
2129af69d88dSmrg      }
2130af69d88dSmrg      else {
2131af69d88dSmrg         FETCH(&r[last], 1, TGSI_CHAN_X);
2132cdc920a0Smrg      }
2133cdc920a0Smrg
2134af69d88dSmrg      if (modifier != TEX_MODIFIER_PROJECTED) {
2135af69d88dSmrg         args[last] = &r[last];
2136af69d88dSmrg      }
2137af69d88dSmrg      else {
2138af69d88dSmrg         proj = &r[last];
2139af69d88dSmrg         args[last] = &ZeroVec;
21403464ebd5Sriastradh      }
21413464ebd5Sriastradh
2142af69d88dSmrg      /* point unused arguments to zero vector */
2143af69d88dSmrg      for (i = dim; i < last; i++)
2144af69d88dSmrg         args[i] = &ZeroVec;
21453464ebd5Sriastradh
2146af69d88dSmrg      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
214701e04c3fSmrg         control = TGSI_SAMPLER_LOD_EXPLICIT;
2148af69d88dSmrg      else if (modifier == TEX_MODIFIER_LOD_BIAS)
214901e04c3fSmrg         control = TGSI_SAMPLER_LOD_BIAS;
215001e04c3fSmrg      else if (modifier == TEX_MODIFIER_GATHER)
215101e04c3fSmrg         control = TGSI_SAMPLER_GATHER;
2152af69d88dSmrg   }
2153af69d88dSmrg   else {
215401e04c3fSmrg      for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2155af69d88dSmrg         args[i] = &ZeroVec;
2156af69d88dSmrg   }
21573464ebd5Sriastradh
2158af69d88dSmrg   /* fetch coordinates */
2159af69d88dSmrg   for (i = 0; i < dim; i++) {
2160af69d88dSmrg      FETCH(&r[i], 0, TGSI_CHAN_X + i);
21613464ebd5Sriastradh
2162af69d88dSmrg      if (proj)
2163af69d88dSmrg         micro_div(&r[i], &r[i], proj);
21643464ebd5Sriastradh
2165af69d88dSmrg      args[i] = &r[i];
2166af69d88dSmrg   }
2167cdc920a0Smrg
2168af69d88dSmrg   /* fetch reference value */
2169af69d88dSmrg   if (shadow_ref >= 0) {
2170af69d88dSmrg      FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2171cdc920a0Smrg
2172af69d88dSmrg      if (proj)
2173af69d88dSmrg         micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2174cdc920a0Smrg
2175af69d88dSmrg      args[shadow_ref] = &r[shadow_ref];
2176cdc920a0Smrg   }
2177cdc920a0Smrg
2178af69d88dSmrg   fetch_texel(mach->Sampler, unit, unit,
2179af69d88dSmrg         args[0], args[1], args[2], args[3], args[4],
2180af69d88dSmrg         NULL, offsets, control,
2181af69d88dSmrg         &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2182af69d88dSmrg
2183af69d88dSmrg#if 0
2184af69d88dSmrg   debug_printf("fetch r: %g %g %g %g\n",
2185af69d88dSmrg         r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2186af69d88dSmrg   debug_printf("fetch g: %g %g %g %g\n",
2187af69d88dSmrg         r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2188af69d88dSmrg   debug_printf("fetch b: %g %g %g %g\n",
2189af69d88dSmrg         r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2190af69d88dSmrg   debug_printf("fetch a: %g %g %g %g\n",
2191af69d88dSmrg         r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2192af69d88dSmrg#endif
2193af69d88dSmrg
2194af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
21953464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
21967ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
21973464ebd5Sriastradh      }
2198cdc920a0Smrg   }
21994a49301eSmrg}
22004a49301eSmrg
220101e04c3fSmrgstatic void
220201e04c3fSmrgexec_lodq(struct tgsi_exec_machine *mach,
220301e04c3fSmrg          const struct tgsi_full_instruction *inst)
220401e04c3fSmrg{
220501e04c3fSmrg   uint resource_unit, sampler_unit;
220601e04c3fSmrg   unsigned dim;
220701e04c3fSmrg   unsigned i;
220801e04c3fSmrg   union tgsi_exec_channel coords[4];
220901e04c3fSmrg   const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
221001e04c3fSmrg   union tgsi_exec_channel r[2];
221101e04c3fSmrg
221201e04c3fSmrg   resource_unit = fetch_sampler_unit(mach, inst, 1);
221301e04c3fSmrg   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
221401e04c3fSmrg      uint target = mach->SamplerViews[resource_unit].Resource;
221501e04c3fSmrg      dim = tgsi_util_get_texture_coord_dim(target);
221601e04c3fSmrg      sampler_unit = fetch_sampler_unit(mach, inst, 2);
221701e04c3fSmrg   } else {
221801e04c3fSmrg      dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
221901e04c3fSmrg      sampler_unit = resource_unit;
222001e04c3fSmrg   }
222101e04c3fSmrg   assert(dim <= ARRAY_SIZE(coords));
222201e04c3fSmrg   /* fetch coordinates */
222301e04c3fSmrg   for (i = 0; i < dim; i++) {
222401e04c3fSmrg      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
222501e04c3fSmrg      args[i] = &coords[i];
222601e04c3fSmrg   }
222701e04c3fSmrg   for (i = dim; i < ARRAY_SIZE(coords); i++) {
222801e04c3fSmrg      args[i] = &ZeroVec;
222901e04c3fSmrg   }
223001e04c3fSmrg   mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
223101e04c3fSmrg                            args[0]->f,
223201e04c3fSmrg                            args[1]->f,
223301e04c3fSmrg                            args[2]->f,
223401e04c3fSmrg                            args[3]->f,
223501e04c3fSmrg                            TGSI_SAMPLER_LOD_NONE,
223601e04c3fSmrg                            r[0].f,
223701e04c3fSmrg                            r[1].f);
223801e04c3fSmrg
223901e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
22407ec681f3Smrg      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
224101e04c3fSmrg   }
224201e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
22437ec681f3Smrg      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
224401e04c3fSmrg   }
224501e04c3fSmrg   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
224601e04c3fSmrg      unsigned char swizzles[4];
224701e04c3fSmrg      unsigned chan;
224801e04c3fSmrg      swizzles[0] = inst->Src[1].Register.SwizzleX;
224901e04c3fSmrg      swizzles[1] = inst->Src[1].Register.SwizzleY;
225001e04c3fSmrg      swizzles[2] = inst->Src[1].Register.SwizzleZ;
225101e04c3fSmrg      swizzles[3] = inst->Src[1].Register.SwizzleW;
225201e04c3fSmrg
225301e04c3fSmrg      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
225401e04c3fSmrg         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
225501e04c3fSmrg            if (swizzles[chan] >= 2) {
225601e04c3fSmrg               store_dest(mach, &ZeroVec,
22577ec681f3Smrg                          &inst->Dst[0], inst, chan);
225801e04c3fSmrg            } else {
225901e04c3fSmrg               store_dest(mach, &r[swizzles[chan]],
22607ec681f3Smrg                          &inst->Dst[0], inst, chan);
226101e04c3fSmrg            }
226201e04c3fSmrg         }
226301e04c3fSmrg      }
226401e04c3fSmrg   } else {
226501e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
22667ec681f3Smrg         store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
226701e04c3fSmrg      }
226801e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
22697ec681f3Smrg         store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
227001e04c3fSmrg      }
227101e04c3fSmrg   }
227201e04c3fSmrg}
2273af69d88dSmrg
22744a49301eSmrgstatic void
2275cdc920a0Smrgexec_txd(struct tgsi_exec_machine *mach,
2276cdc920a0Smrg         const struct tgsi_full_instruction *inst)
22774a49301eSmrg{
2278cdc920a0Smrg   union tgsi_exec_channel r[4];
2279af69d88dSmrg   float derivs[3][2][TGSI_QUAD_SIZE];
22803464ebd5Sriastradh   uint chan;
228101e04c3fSmrg   uint unit;
2282af69d88dSmrg   int8_t offsets[3];
2283cdc920a0Smrg
228401e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 3);
2285af69d88dSmrg   /* always fetch all 3 offsets, overkill but keeps code simple */
2286af69d88dSmrg   fetch_texel_offsets(mach, inst, offsets);
2287cdc920a0Smrg
2288cdc920a0Smrg   switch (inst->Texture.Texture) {
2289cdc920a0Smrg   case TGSI_TEXTURE_1D:
2290af69d88dSmrg      FETCH(&r[0], 0, TGSI_CHAN_X);
2291af69d88dSmrg
2292af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2293af69d88dSmrg
2294af69d88dSmrg      fetch_texel(mach->Sampler, unit, unit,
2295af69d88dSmrg                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
229601e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2297af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2298af69d88dSmrg      break;
2299af69d88dSmrg
2300cdc920a0Smrg   case TGSI_TEXTURE_SHADOW1D:
2301af69d88dSmrg   case TGSI_TEXTURE_1D_ARRAY:
2302af69d88dSmrg   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2303af69d88dSmrg      /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2304af69d88dSmrg      FETCH(&r[0], 0, TGSI_CHAN_X);
2305af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2306af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2307cdc920a0Smrg
2308af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2309cdc920a0Smrg
2310af69d88dSmrg      fetch_texel(mach->Sampler, unit, unit,
2311af69d88dSmrg                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
231201e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2313cdc920a0Smrg                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2314cdc920a0Smrg      break;
2315cdc920a0Smrg
2316cdc920a0Smrg   case TGSI_TEXTURE_2D:
2317cdc920a0Smrg   case TGSI_TEXTURE_RECT:
2318af69d88dSmrg      FETCH(&r[0], 0, TGSI_CHAN_X);
2319af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2320af69d88dSmrg
2321af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2322af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2323af69d88dSmrg
2324af69d88dSmrg      fetch_texel(mach->Sampler, unit, unit,
2325af69d88dSmrg                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
232601e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2327af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2328af69d88dSmrg      break;
2329cdc920a0Smrg
2330cdc920a0Smrg
2331af69d88dSmrg   case TGSI_TEXTURE_SHADOW2D:
2332af69d88dSmrg   case TGSI_TEXTURE_SHADOWRECT:
2333af69d88dSmrg   case TGSI_TEXTURE_2D_ARRAY:
2334af69d88dSmrg   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2335af69d88dSmrg      /* only SHADOW2D_ARRAY actually needs W */
2336af69d88dSmrg      FETCH(&r[0], 0, TGSI_CHAN_X);
2337af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2338af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2339af69d88dSmrg      FETCH(&r[3], 0, TGSI_CHAN_W);
2340af69d88dSmrg
2341af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2342af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2343af69d88dSmrg
2344af69d88dSmrg      fetch_texel(mach->Sampler, unit, unit,
2345af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
234601e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2347cdc920a0Smrg                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2348cdc920a0Smrg      break;
2349cdc920a0Smrg
2350cdc920a0Smrg   case TGSI_TEXTURE_3D:
2351cdc920a0Smrg   case TGSI_TEXTURE_CUBE:
2352af69d88dSmrg   case TGSI_TEXTURE_CUBE_ARRAY:
235301e04c3fSmrg   case TGSI_TEXTURE_SHADOWCUBE:
235401e04c3fSmrg      /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2355af69d88dSmrg      FETCH(&r[0], 0, TGSI_CHAN_X);
2356af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2357af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2358af69d88dSmrg      FETCH(&r[3], 0, TGSI_CHAN_W);
2359af69d88dSmrg
2360af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2361af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2362af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2363af69d88dSmrg
2364af69d88dSmrg      fetch_texel(mach->Sampler, unit, unit,
2365af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
236601e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2367af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2368cdc920a0Smrg      break;
2369cdc920a0Smrg
2370cdc920a0Smrg   default:
2371cdc920a0Smrg      assert(0);
2372cdc920a0Smrg   }
2373cdc920a0Smrg
2374af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
23753464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
23767ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
23773464ebd5Sriastradh      }
23783464ebd5Sriastradh   }
23793464ebd5Sriastradh}
23803464ebd5Sriastradh
23813464ebd5Sriastradh
2382af69d88dSmrgstatic void
2383af69d88dSmrgexec_txf(struct tgsi_exec_machine *mach,
2384af69d88dSmrg         const struct tgsi_full_instruction *inst)
2385af69d88dSmrg{
2386af69d88dSmrg   union tgsi_exec_channel r[4];
2387af69d88dSmrg   uint chan;
238801e04c3fSmrg   uint unit;
2389af69d88dSmrg   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2390af69d88dSmrg   int j;
2391af69d88dSmrg   int8_t offsets[3];
2392af69d88dSmrg   unsigned target;
2393af69d88dSmrg
239401e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 1);
2395af69d88dSmrg   /* always fetch all 3 offsets, overkill but keeps code simple */
2396af69d88dSmrg   fetch_texel_offsets(mach, inst, offsets);
2397af69d88dSmrg
2398af69d88dSmrg   IFETCH(&r[3], 0, TGSI_CHAN_W);
2399af69d88dSmrg
240001e04c3fSmrg   if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
240101e04c3fSmrg       inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2402af69d88dSmrg      target = mach->SamplerViews[unit].Resource;
2403af69d88dSmrg   }
2404af69d88dSmrg   else {
2405af69d88dSmrg      target = inst->Texture.Texture;
2406af69d88dSmrg   }
2407af69d88dSmrg   switch(target) {
2408af69d88dSmrg   case TGSI_TEXTURE_3D:
2409af69d88dSmrg   case TGSI_TEXTURE_2D_ARRAY:
2410af69d88dSmrg   case TGSI_TEXTURE_SHADOW2D_ARRAY:
241101e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2412af69d88dSmrg      IFETCH(&r[2], 0, TGSI_CHAN_Z);
24137ec681f3Smrg      FALLTHROUGH;
2414af69d88dSmrg   case TGSI_TEXTURE_2D:
2415af69d88dSmrg   case TGSI_TEXTURE_RECT:
2416af69d88dSmrg   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2417af69d88dSmrg   case TGSI_TEXTURE_SHADOW2D:
2418af69d88dSmrg   case TGSI_TEXTURE_SHADOWRECT:
2419af69d88dSmrg   case TGSI_TEXTURE_1D_ARRAY:
242001e04c3fSmrg   case TGSI_TEXTURE_2D_MSAA:
2421af69d88dSmrg      IFETCH(&r[1], 0, TGSI_CHAN_Y);
24227ec681f3Smrg      FALLTHROUGH;
2423af69d88dSmrg   case TGSI_TEXTURE_BUFFER:
2424af69d88dSmrg   case TGSI_TEXTURE_1D:
2425af69d88dSmrg   case TGSI_TEXTURE_SHADOW1D:
2426af69d88dSmrg      IFETCH(&r[0], 0, TGSI_CHAN_X);
2427af69d88dSmrg      break;
2428af69d88dSmrg   default:
2429af69d88dSmrg      assert(0);
2430af69d88dSmrg      break;
2431af69d88dSmrg   }
2432af69d88dSmrg
2433af69d88dSmrg   mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2434af69d88dSmrg                            offsets, rgba);
2435af69d88dSmrg
2436af69d88dSmrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2437af69d88dSmrg      r[0].f[j] = rgba[0][j];
2438af69d88dSmrg      r[1].f[j] = rgba[1][j];
2439af69d88dSmrg      r[2].f[j] = rgba[2][j];
2440af69d88dSmrg      r[3].f[j] = rgba[3][j];
2441af69d88dSmrg   }
2442af69d88dSmrg
244301e04c3fSmrg   if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
244401e04c3fSmrg       inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2445af69d88dSmrg      unsigned char swizzles[4];
2446af69d88dSmrg      swizzles[0] = inst->Src[1].Register.SwizzleX;
2447af69d88dSmrg      swizzles[1] = inst->Src[1].Register.SwizzleY;
2448af69d88dSmrg      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449af69d88dSmrg      swizzles[3] = inst->Src[1].Register.SwizzleW;
2450af69d88dSmrg
2451af69d88dSmrg      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2452af69d88dSmrg         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2453af69d88dSmrg            store_dest(mach, &r[swizzles[chan]],
24547ec681f3Smrg                       &inst->Dst[0], inst, chan);
2455af69d88dSmrg         }
2456af69d88dSmrg      }
2457af69d88dSmrg   }
2458af69d88dSmrg   else {
2459af69d88dSmrg      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2460af69d88dSmrg         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
24617ec681f3Smrg            store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2462af69d88dSmrg         }
2463af69d88dSmrg      }
2464af69d88dSmrg   }
2465af69d88dSmrg}
2466af69d88dSmrg
2467af69d88dSmrgstatic void
2468af69d88dSmrgexec_txq(struct tgsi_exec_machine *mach,
2469af69d88dSmrg         const struct tgsi_full_instruction *inst)
2470af69d88dSmrg{
2471af69d88dSmrg   int result[4];
2472af69d88dSmrg   union tgsi_exec_channel r[4], src;
2473af69d88dSmrg   uint chan;
247401e04c3fSmrg   uint unit;
2475af69d88dSmrg   int i,j;
2476af69d88dSmrg
247701e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 1);
247801e04c3fSmrg
2479af69d88dSmrg   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2480af69d88dSmrg
2481af69d88dSmrg   /* XXX: This interface can't return per-pixel values */
2482af69d88dSmrg   mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2483af69d88dSmrg
2484af69d88dSmrg   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2485af69d88dSmrg      for (j = 0; j < 4; j++) {
2486af69d88dSmrg         r[j].i[i] = result[j];
2487af69d88dSmrg      }
2488af69d88dSmrg   }
2489af69d88dSmrg
2490af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2491af69d88dSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
24927ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2493af69d88dSmrg      }
2494af69d88dSmrg   }
2495af69d88dSmrg}
24963464ebd5Sriastradh
24973464ebd5Sriastradhstatic void
24983464ebd5Sriastradhexec_sample(struct tgsi_exec_machine *mach,
24993464ebd5Sriastradh            const struct tgsi_full_instruction *inst,
2500af69d88dSmrg            uint modifier, boolean compare)
25013464ebd5Sriastradh{
25023464ebd5Sriastradh   const uint resource_unit = inst->Src[1].Register.Index;
25033464ebd5Sriastradh   const uint sampler_unit = inst->Src[2].Register.Index;
250401e04c3fSmrg   union tgsi_exec_channel r[5], c1;
25053464ebd5Sriastradh   const union tgsi_exec_channel *lod = &ZeroVec;
250601e04c3fSmrg   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
25073464ebd5Sriastradh   uint chan;
2508af69d88dSmrg   unsigned char swizzles[4];
2509af69d88dSmrg   int8_t offsets[3];
25103464ebd5Sriastradh
2511af69d88dSmrg   /* always fetch all 3 offsets, overkill but keeps code simple */
2512af69d88dSmrg   fetch_texel_offsets(mach, inst, offsets);
25133464ebd5Sriastradh
2514af69d88dSmrg   assert(modifier != TEX_MODIFIER_PROJECTED);
2515af69d88dSmrg
2516af69d88dSmrg   if (modifier != TEX_MODIFIER_NONE) {
2517af69d88dSmrg      if (modifier == TEX_MODIFIER_LOD_BIAS) {
2518af69d88dSmrg         FETCH(&c1, 3, TGSI_CHAN_X);
2519af69d88dSmrg         lod = &c1;
252001e04c3fSmrg         control = TGSI_SAMPLER_LOD_BIAS;
2521af69d88dSmrg      }
2522af69d88dSmrg      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2523af69d88dSmrg         FETCH(&c1, 3, TGSI_CHAN_X);
2524af69d88dSmrg         lod = &c1;
252501e04c3fSmrg         control = TGSI_SAMPLER_LOD_EXPLICIT;
252601e04c3fSmrg      }
252701e04c3fSmrg      else if (modifier == TEX_MODIFIER_GATHER) {
252801e04c3fSmrg         control = TGSI_SAMPLER_GATHER;
2529af69d88dSmrg      }
2530af69d88dSmrg      else {
2531af69d88dSmrg         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
253201e04c3fSmrg         control = TGSI_SAMPLER_LOD_ZERO;
25333464ebd5Sriastradh      }
25343464ebd5Sriastradh   }
25353464ebd5Sriastradh
2536af69d88dSmrg   FETCH(&r[0], 0, TGSI_CHAN_X);
25373464ebd5Sriastradh
2538af69d88dSmrg   switch (mach->SamplerViews[resource_unit].Resource) {
25393464ebd5Sriastradh   case TGSI_TEXTURE_1D:
2540af69d88dSmrg      if (compare) {
2541af69d88dSmrg         FETCH(&r[2], 3, TGSI_CHAN_X);
2542af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2543af69d88dSmrg                     &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2544af69d88dSmrg                     NULL, offsets, control,
2545af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2546af69d88dSmrg      }
2547af69d88dSmrg      else {
2548af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2549af69d88dSmrg                     &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2550af69d88dSmrg                     NULL, offsets, control,
2551af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
25523464ebd5Sriastradh      }
25533464ebd5Sriastradh      break;
25543464ebd5Sriastradh
2555af69d88dSmrg   case TGSI_TEXTURE_1D_ARRAY:
25563464ebd5Sriastradh   case TGSI_TEXTURE_2D:
25573464ebd5Sriastradh   case TGSI_TEXTURE_RECT:
2558af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2559af69d88dSmrg      if (compare) {
2560af69d88dSmrg         FETCH(&r[2], 3, TGSI_CHAN_X);
2561af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2562af69d88dSmrg                     &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2563af69d88dSmrg                     NULL, offsets, control,
2564af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2565af69d88dSmrg      }
2566af69d88dSmrg      else {
2567af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2568af69d88dSmrg                     &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2569af69d88dSmrg                     NULL, offsets, control,
2570af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
25713464ebd5Sriastradh      }
25723464ebd5Sriastradh      break;
25733464ebd5Sriastradh
2574af69d88dSmrg   case TGSI_TEXTURE_2D_ARRAY:
25753464ebd5Sriastradh   case TGSI_TEXTURE_3D:
25763464ebd5Sriastradh   case TGSI_TEXTURE_CUBE:
2577af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2578af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2579af69d88dSmrg      if(compare) {
2580af69d88dSmrg         FETCH(&r[3], 3, TGSI_CHAN_X);
2581af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2582af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3], lod,
2583af69d88dSmrg                     NULL, offsets, control,
2584af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);
2585af69d88dSmrg      }
2586af69d88dSmrg      else {
2587af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2588af69d88dSmrg                     &r[0], &r[1], &r[2], &ZeroVec, lod,
2589af69d88dSmrg                     NULL, offsets, control,
2590af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);
25913464ebd5Sriastradh      }
2592af69d88dSmrg      break;
25933464ebd5Sriastradh
2594af69d88dSmrg   case TGSI_TEXTURE_CUBE_ARRAY:
2595af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2596af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2597af69d88dSmrg      FETCH(&r[3], 0, TGSI_CHAN_W);
2598af69d88dSmrg      if(compare) {
2599af69d88dSmrg         FETCH(&r[4], 3, TGSI_CHAN_X);
2600af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2601af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3], &r[4],
2602af69d88dSmrg                     NULL, offsets, control,
2603af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);
2604af69d88dSmrg      }
2605af69d88dSmrg      else {
2606af69d88dSmrg         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2607af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3], lod,
2608af69d88dSmrg                     NULL, offsets, control,
2609af69d88dSmrg                     &r[0], &r[1], &r[2], &r[3]);
2610af69d88dSmrg      }
26113464ebd5Sriastradh      break;
26123464ebd5Sriastradh
2613af69d88dSmrg
26143464ebd5Sriastradh   default:
26153464ebd5Sriastradh      assert(0);
26163464ebd5Sriastradh   }
26173464ebd5Sriastradh
2618af69d88dSmrg   swizzles[0] = inst->Src[1].Register.SwizzleX;
2619af69d88dSmrg   swizzles[1] = inst->Src[1].Register.SwizzleY;
2620af69d88dSmrg   swizzles[2] = inst->Src[1].Register.SwizzleZ;
2621af69d88dSmrg   swizzles[3] = inst->Src[1].Register.SwizzleW;
2622af69d88dSmrg
2623af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
26243464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2625af69d88dSmrg         store_dest(mach, &r[swizzles[chan]],
26267ec681f3Smrg                    &inst->Dst[0], inst, chan);
26273464ebd5Sriastradh      }
26283464ebd5Sriastradh   }
26293464ebd5Sriastradh}
26303464ebd5Sriastradh
26313464ebd5Sriastradhstatic void
26323464ebd5Sriastradhexec_sample_d(struct tgsi_exec_machine *mach,
26333464ebd5Sriastradh              const struct tgsi_full_instruction *inst)
26343464ebd5Sriastradh{
26353464ebd5Sriastradh   const uint resource_unit = inst->Src[1].Register.Index;
26363464ebd5Sriastradh   const uint sampler_unit = inst->Src[2].Register.Index;
26373464ebd5Sriastradh   union tgsi_exec_channel r[4];
2638af69d88dSmrg   float derivs[3][2][TGSI_QUAD_SIZE];
26393464ebd5Sriastradh   uint chan;
2640af69d88dSmrg   unsigned char swizzles[4];
2641af69d88dSmrg   int8_t offsets[3];
2642af69d88dSmrg
2643af69d88dSmrg   /* always fetch all 3 offsets, overkill but keeps code simple */
2644af69d88dSmrg   fetch_texel_offsets(mach, inst, offsets);
2645af69d88dSmrg
2646af69d88dSmrg   FETCH(&r[0], 0, TGSI_CHAN_X);
26473464ebd5Sriastradh
2648af69d88dSmrg   switch (mach->SamplerViews[resource_unit].Resource) {
26493464ebd5Sriastradh   case TGSI_TEXTURE_1D:
2650af69d88dSmrg   case TGSI_TEXTURE_1D_ARRAY:
2651af69d88dSmrg      /* only 1D array actually needs Y */
2652af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
26533464ebd5Sriastradh
2654af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
26553464ebd5Sriastradh
2656af69d88dSmrg      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2657af69d88dSmrg                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
265801e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
26593464ebd5Sriastradh                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
26603464ebd5Sriastradh      break;
26613464ebd5Sriastradh
26623464ebd5Sriastradh   case TGSI_TEXTURE_2D:
26633464ebd5Sriastradh   case TGSI_TEXTURE_RECT:
2664af69d88dSmrg   case TGSI_TEXTURE_2D_ARRAY:
2665af69d88dSmrg      /* only 2D array actually needs Z */
2666af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2667af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
26683464ebd5Sriastradh
2669af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2670af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
26713464ebd5Sriastradh
2672af69d88dSmrg      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2673af69d88dSmrg                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
267401e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
26753464ebd5Sriastradh                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
26763464ebd5Sriastradh      break;
26773464ebd5Sriastradh
26783464ebd5Sriastradh   case TGSI_TEXTURE_3D:
26793464ebd5Sriastradh   case TGSI_TEXTURE_CUBE:
2680af69d88dSmrg   case TGSI_TEXTURE_CUBE_ARRAY:
2681af69d88dSmrg      /* only cube array actually needs W */
2682af69d88dSmrg      FETCH(&r[1], 0, TGSI_CHAN_Y);
2683af69d88dSmrg      FETCH(&r[2], 0, TGSI_CHAN_Z);
2684af69d88dSmrg      FETCH(&r[3], 0, TGSI_CHAN_W);
2685af69d88dSmrg
2686af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2687af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2688af69d88dSmrg      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2689af69d88dSmrg
2690af69d88dSmrg      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2691af69d88dSmrg                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
269201e04c3fSmrg                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
26933464ebd5Sriastradh                  &r[0], &r[1], &r[2], &r[3]);
26943464ebd5Sriastradh      break;
26953464ebd5Sriastradh
26963464ebd5Sriastradh   default:
26973464ebd5Sriastradh      assert(0);
26983464ebd5Sriastradh   }
26993464ebd5Sriastradh
2700af69d88dSmrg   swizzles[0] = inst->Src[1].Register.SwizzleX;
2701af69d88dSmrg   swizzles[1] = inst->Src[1].Register.SwizzleY;
2702af69d88dSmrg   swizzles[2] = inst->Src[1].Register.SwizzleZ;
2703af69d88dSmrg   swizzles[3] = inst->Src[1].Register.SwizzleW;
2704af69d88dSmrg
2705af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
27063464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2707af69d88dSmrg         store_dest(mach, &r[swizzles[chan]],
27087ec681f3Smrg                    &inst->Dst[0], inst, chan);
27093464ebd5Sriastradh      }
2710cdc920a0Smrg   }
27114a49301eSmrg}
27124a49301eSmrg
2713cdc920a0Smrg
2714cdc920a0Smrg/**
2715cdc920a0Smrg * Evaluate a constant-valued coefficient at the position of the
2716cdc920a0Smrg * current quad.
2717cdc920a0Smrg */
27184a49301eSmrgstatic void
2719cdc920a0Smrgeval_constant_coef(
2720cdc920a0Smrg   struct tgsi_exec_machine *mach,
2721cdc920a0Smrg   unsigned attrib,
2722cdc920a0Smrg   unsigned chan )
27234a49301eSmrg{
2724cdc920a0Smrg   unsigned i;
2725cdc920a0Smrg
2726af69d88dSmrg   for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2727cdc920a0Smrg      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2728cdc920a0Smrg   }
27294a49301eSmrg}
27304a49301eSmrg
2731361fc4cbSmayastatic void
2732361fc4cbSmayainterp_constant_offset(
2733361fc4cbSmaya      UNUSED const struct tgsi_exec_machine *mach,
2734361fc4cbSmaya      UNUSED unsigned attrib,
2735361fc4cbSmaya      UNUSED unsigned chan,
2736361fc4cbSmaya      UNUSED float ofs_x,
2737361fc4cbSmaya      UNUSED float ofs_y,
2738361fc4cbSmaya      UNUSED union tgsi_exec_channel *out_chan)
2739361fc4cbSmaya{
2740361fc4cbSmaya}
2741361fc4cbSmaya
2742cdc920a0Smrg/**
2743cdc920a0Smrg * Evaluate a linear-valued coefficient at the position of the
2744cdc920a0Smrg * current quad.
2745cdc920a0Smrg */
27464a49301eSmrgstatic void
2747361fc4cbSmayainterp_linear_offset(
2748361fc4cbSmaya      const struct tgsi_exec_machine *mach,
2749361fc4cbSmaya      unsigned attrib,
2750361fc4cbSmaya      unsigned chan,
2751361fc4cbSmaya      float ofs_x,
2752361fc4cbSmaya      float ofs_y,
2753361fc4cbSmaya      union tgsi_exec_channel *out_chan)
2754361fc4cbSmaya{
2755361fc4cbSmaya   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2756361fc4cbSmaya   const float dady = mach->InterpCoefs[attrib].dady[chan];
2757361fc4cbSmaya   const float delta = ofs_x * dadx + ofs_y * dady;
2758361fc4cbSmaya   out_chan->f[0] += delta;
2759361fc4cbSmaya   out_chan->f[1] += delta;
2760361fc4cbSmaya   out_chan->f[2] += delta;
2761361fc4cbSmaya   out_chan->f[3] += delta;
2762361fc4cbSmaya}
2763361fc4cbSmaya
2764361fc4cbSmayastatic void
2765361fc4cbSmayaeval_linear_coef(struct tgsi_exec_machine *mach,
2766361fc4cbSmaya                 unsigned attrib,
2767361fc4cbSmaya                 unsigned chan)
27684a49301eSmrg{
2769cdc920a0Smrg   const float x = mach->QuadPos.xyzw[0].f[0];
2770cdc920a0Smrg   const float y = mach->QuadPos.xyzw[1].f[0];
2771cdc920a0Smrg   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2772cdc920a0Smrg   const float dady = mach->InterpCoefs[attrib].dady[chan];
2773cdc920a0Smrg   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2774361fc4cbSmaya
2775cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2776cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2777cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2778cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
27794a49301eSmrg}
27804a49301eSmrg
2781cdc920a0Smrg/**
2782cdc920a0Smrg * Evaluate a perspective-valued coefficient at the position of the
2783cdc920a0Smrg * current quad.
2784cdc920a0Smrg */
2785361fc4cbSmaya
2786361fc4cbSmayastatic void
2787361fc4cbSmayainterp_perspective_offset(
2788361fc4cbSmaya   const struct tgsi_exec_machine *mach,
2789361fc4cbSmaya   unsigned attrib,
2790361fc4cbSmaya   unsigned chan,
2791361fc4cbSmaya   float ofs_x,
2792361fc4cbSmaya   float ofs_y,
2793361fc4cbSmaya   union tgsi_exec_channel *out_chan)
2794361fc4cbSmaya{
2795361fc4cbSmaya   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2796361fc4cbSmaya   const float dady = mach->InterpCoefs[attrib].dady[chan];
2797361fc4cbSmaya   const float *w = mach->QuadPos.xyzw[3].f;
2798361fc4cbSmaya   const float delta = ofs_x * dadx + ofs_y * dady;
2799361fc4cbSmaya   out_chan->f[0] += delta / w[0];
2800361fc4cbSmaya   out_chan->f[1] += delta / w[1];
2801361fc4cbSmaya   out_chan->f[2] += delta / w[2];
2802361fc4cbSmaya   out_chan->f[3] += delta / w[3];
2803361fc4cbSmaya}
2804361fc4cbSmaya
28054a49301eSmrgstatic void
2806cdc920a0Smrgeval_perspective_coef(
2807cdc920a0Smrg   struct tgsi_exec_machine *mach,
2808cdc920a0Smrg   unsigned attrib,
2809cdc920a0Smrg   unsigned chan )
28104a49301eSmrg{
2811cdc920a0Smrg   const float x = mach->QuadPos.xyzw[0].f[0];
2812cdc920a0Smrg   const float y = mach->QuadPos.xyzw[1].f[0];
2813cdc920a0Smrg   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2814cdc920a0Smrg   const float dady = mach->InterpCoefs[attrib].dady[chan];
2815cdc920a0Smrg   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2816cdc920a0Smrg   const float *w = mach->QuadPos.xyzw[3].f;
2817cdc920a0Smrg   /* divide by W here */
2818cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2819cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2820cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2821cdc920a0Smrg   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
28224a49301eSmrg}
2823cdc920a0Smrg
2824cdc920a0Smrg
2825cdc920a0Smrgtypedef void (* eval_coef_func)(
2826cdc920a0Smrg   struct tgsi_exec_machine *mach,
2827cdc920a0Smrg   unsigned attrib,
2828cdc920a0Smrg   unsigned chan );
28294a49301eSmrg
28304a49301eSmrgstatic void
2831cdc920a0Smrgexec_declaration(struct tgsi_exec_machine *mach,
2832cdc920a0Smrg                 const struct tgsi_full_declaration *decl)
28334a49301eSmrg{
2834af69d88dSmrg   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2835af69d88dSmrg      mach->SamplerViews[decl->Range.First] = decl->SamplerView;
28363464ebd5Sriastradh      return;
28373464ebd5Sriastradh   }
28383464ebd5Sriastradh
283901e04c3fSmrg   if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
28403464ebd5Sriastradh      if (decl->Declaration.File == TGSI_FILE_INPUT) {
2841cdc920a0Smrg         uint first, last, mask;
28424a49301eSmrg
2843cdc920a0Smrg         first = decl->Range.First;
2844cdc920a0Smrg         last = decl->Range.Last;
2845cdc920a0Smrg         mask = decl->Declaration.UsageMask;
28464a49301eSmrg
28473464ebd5Sriastradh         /* XXX we could remove this special-case code since
28483464ebd5Sriastradh          * mach->InterpCoefs[first].a0 should already have the
28493464ebd5Sriastradh          * front/back-face value.  But we should first update the
28503464ebd5Sriastradh          * ureg code to emit the right UsageMask value (WRITEMASK_X).
28513464ebd5Sriastradh          * Then, we could remove the tgsi_exec_machine::Face field.
28523464ebd5Sriastradh          */
28533464ebd5Sriastradh         /* XXX make FACE a system value */
2854cdc920a0Smrg         if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2855cdc920a0Smrg            uint i;
28564a49301eSmrg
2857cdc920a0Smrg            assert(decl->Semantic.Index == 0);
2858cdc920a0Smrg            assert(first == last);
28594a49301eSmrg
2860af69d88dSmrg            for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2861cdc920a0Smrg               mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2862cdc920a0Smrg            }
2863cdc920a0Smrg         } else {
2864cdc920a0Smrg            eval_coef_func eval;
2865361fc4cbSmaya            apply_sample_offset_func interp;
2866cdc920a0Smrg            uint i, j;
2867cdc920a0Smrg
2868af69d88dSmrg            switch (decl->Interp.Interpolate) {
2869cdc920a0Smrg            case TGSI_INTERPOLATE_CONSTANT:
2870cdc920a0Smrg               eval = eval_constant_coef;
2871361fc4cbSmaya               interp = interp_constant_offset;
2872cdc920a0Smrg               break;
2873cdc920a0Smrg
2874cdc920a0Smrg            case TGSI_INTERPOLATE_LINEAR:
2875cdc920a0Smrg               eval = eval_linear_coef;
2876361fc4cbSmaya               interp = interp_linear_offset;
2877cdc920a0Smrg               break;
2878cdc920a0Smrg
2879cdc920a0Smrg            case TGSI_INTERPOLATE_PERSPECTIVE:
2880cdc920a0Smrg               eval = eval_perspective_coef;
2881361fc4cbSmaya               interp = interp_perspective_offset;
2882cdc920a0Smrg               break;
2883cdc920a0Smrg
2884af69d88dSmrg            case TGSI_INTERPOLATE_COLOR:
2885af69d88dSmrg               eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
28867ec681f3Smrg               interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
2887af69d88dSmrg               break;
2888af69d88dSmrg
2889cdc920a0Smrg            default:
2890cdc920a0Smrg               assert(0);
2891cdc920a0Smrg               return;
2892cdc920a0Smrg            }
28934a49301eSmrg
2894361fc4cbSmaya            for (i = first; i <= last; i++)
2895361fc4cbSmaya               mach->InputSampleOffsetApply[i] = interp;
2896361fc4cbSmaya
2897af69d88dSmrg            for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2898cdc920a0Smrg               if (mask & (1 << j)) {
2899cdc920a0Smrg                  for (i = first; i <= last; i++) {
2900cdc920a0Smrg                     eval(mach, i, j);
2901cdc920a0Smrg                  }
2902cdc920a0Smrg               }
2903cdc920a0Smrg            }
2904cdc920a0Smrg         }
2905af69d88dSmrg
2906af69d88dSmrg         if (DEBUG_EXECUTION) {
2907af69d88dSmrg            uint i, j;
2908af69d88dSmrg            for (i = first; i <= last; ++i) {
2909af69d88dSmrg               debug_printf("IN[%2u] = ", i);
2910af69d88dSmrg               for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2911af69d88dSmrg                  if (j > 0) {
2912af69d88dSmrg                     debug_printf("         ");
2913af69d88dSmrg                  }
2914af69d88dSmrg                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2915af69d88dSmrg                               mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2916af69d88dSmrg                               mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2917af69d88dSmrg                               mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2918af69d88dSmrg                               mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2919af69d88dSmrg               }
2920af69d88dSmrg            }
2921af69d88dSmrg         }
2922cdc920a0Smrg      }
2923cdc920a0Smrg   }
29243464ebd5Sriastradh
2925cdc920a0Smrg}
29264a49301eSmrg
2927cdc920a0Smrgtypedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2928cdc920a0Smrg                                const union tgsi_exec_channel *src);
29294a49301eSmrg
2930cdc920a0Smrgstatic void
2931cdc920a0Smrgexec_scalar_unary(struct tgsi_exec_machine *mach,
2932cdc920a0Smrg                  const struct tgsi_full_instruction *inst,
2933cdc920a0Smrg                  micro_unary_op op,
2934cdc920a0Smrg                  enum tgsi_exec_datatype src_datatype)
2935cdc920a0Smrg{
2936cdc920a0Smrg   unsigned int chan;
2937cdc920a0Smrg   union tgsi_exec_channel src;
2938cdc920a0Smrg   union tgsi_exec_channel dst;
29394a49301eSmrg
2940af69d88dSmrg   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2941cdc920a0Smrg   op(&dst, &src);
2942af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2943cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
29447ec681f3Smrg         store_dest(mach, &dst, &inst->Dst[0], inst, chan);
29454a49301eSmrg      }
29464a49301eSmrg   }
29474a49301eSmrg}
29484a49301eSmrg
29494a49301eSmrgstatic void
2950cdc920a0Smrgexec_vector_unary(struct tgsi_exec_machine *mach,
2951cdc920a0Smrg                  const struct tgsi_full_instruction *inst,
2952cdc920a0Smrg                  micro_unary_op op,
2953cdc920a0Smrg                  enum tgsi_exec_datatype src_datatype)
29544a49301eSmrg{
2955cdc920a0Smrg   unsigned int chan;
2956cdc920a0Smrg   struct tgsi_exec_vector dst;
29574a49301eSmrg
2958af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2959cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2960cdc920a0Smrg         union tgsi_exec_channel src;
29614a49301eSmrg
2962cdc920a0Smrg         fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2963cdc920a0Smrg         op(&dst.xyzw[chan], &src);
29644a49301eSmrg      }
29654a49301eSmrg   }
2966af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2967cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
29687ec681f3Smrg         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
29694a49301eSmrg      }
2970cdc920a0Smrg   }
2971cdc920a0Smrg}
29724a49301eSmrg
2973cdc920a0Smrgtypedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2974cdc920a0Smrg                                 const union tgsi_exec_channel *src0,
2975cdc920a0Smrg                                 const union tgsi_exec_channel *src1);
29764a49301eSmrg
29773464ebd5Sriastradhstatic void
29783464ebd5Sriastradhexec_scalar_binary(struct tgsi_exec_machine *mach,
29793464ebd5Sriastradh                   const struct tgsi_full_instruction *inst,
29803464ebd5Sriastradh                   micro_binary_op op,
29813464ebd5Sriastradh                   enum tgsi_exec_datatype src_datatype)
29823464ebd5Sriastradh{
29833464ebd5Sriastradh   unsigned int chan;
29843464ebd5Sriastradh   union tgsi_exec_channel src[2];
29853464ebd5Sriastradh   union tgsi_exec_channel dst;
29863464ebd5Sriastradh
2987af69d88dSmrg   fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2988af69d88dSmrg   fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
29893464ebd5Sriastradh   op(&dst, &src[0], &src[1]);
2990af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
29913464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
29927ec681f3Smrg         store_dest(mach, &dst, &inst->Dst[0], inst, chan);
29933464ebd5Sriastradh      }
29943464ebd5Sriastradh   }
29953464ebd5Sriastradh}
29963464ebd5Sriastradh
2997cdc920a0Smrgstatic void
2998cdc920a0Smrgexec_vector_binary(struct tgsi_exec_machine *mach,
2999cdc920a0Smrg                   const struct tgsi_full_instruction *inst,
3000cdc920a0Smrg                   micro_binary_op op,
3001cdc920a0Smrg                   enum tgsi_exec_datatype src_datatype)
3002cdc920a0Smrg{
3003cdc920a0Smrg   unsigned int chan;
3004cdc920a0Smrg   struct tgsi_exec_vector dst;
30054a49301eSmrg
3006af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3007cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3008cdc920a0Smrg         union tgsi_exec_channel src[2];
30094a49301eSmrg
3010cdc920a0Smrg         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3011cdc920a0Smrg         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3012cdc920a0Smrg         op(&dst.xyzw[chan], &src[0], &src[1]);
30134a49301eSmrg      }
30144a49301eSmrg   }
3015af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3016cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
30177ec681f3Smrg         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3018cdc920a0Smrg      }
3019cdc920a0Smrg   }
3020cdc920a0Smrg}
30214a49301eSmrg
3022cdc920a0Smrgtypedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3023cdc920a0Smrg                                  const union tgsi_exec_channel *src0,
3024cdc920a0Smrg                                  const union tgsi_exec_channel *src1,
3025cdc920a0Smrg                                  const union tgsi_exec_channel *src2);
30264a49301eSmrg
3027cdc920a0Smrgstatic void
3028cdc920a0Smrgexec_vector_trinary(struct tgsi_exec_machine *mach,
3029cdc920a0Smrg                    const struct tgsi_full_instruction *inst,
3030cdc920a0Smrg                    micro_trinary_op op,
3031cdc920a0Smrg                    enum tgsi_exec_datatype src_datatype)
3032cdc920a0Smrg{
3033cdc920a0Smrg   unsigned int chan;
3034cdc920a0Smrg   struct tgsi_exec_vector dst;
30354a49301eSmrg
3036af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3037cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3038cdc920a0Smrg         union tgsi_exec_channel src[3];
30394a49301eSmrg
3040cdc920a0Smrg         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3041cdc920a0Smrg         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3042cdc920a0Smrg         fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3043cdc920a0Smrg         op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3044cdc920a0Smrg      }
30454a49301eSmrg   }
3046af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3047af69d88dSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
30487ec681f3Smrg         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3049af69d88dSmrg      }
3050af69d88dSmrg   }
3051af69d88dSmrg}
3052af69d88dSmrg
3053af69d88dSmrgtypedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3054af69d88dSmrg                                     const union tgsi_exec_channel *src0,
3055af69d88dSmrg                                     const union tgsi_exec_channel *src1,
3056af69d88dSmrg                                     const union tgsi_exec_channel *src2,
3057af69d88dSmrg                                     const union tgsi_exec_channel *src3);
3058af69d88dSmrg
3059af69d88dSmrgstatic void
3060af69d88dSmrgexec_vector_quaternary(struct tgsi_exec_machine *mach,
3061af69d88dSmrg                       const struct tgsi_full_instruction *inst,
3062af69d88dSmrg                       micro_quaternary_op op,
3063af69d88dSmrg                       enum tgsi_exec_datatype src_datatype)
3064af69d88dSmrg{
3065af69d88dSmrg   unsigned int chan;
3066af69d88dSmrg   struct tgsi_exec_vector dst;
3067af69d88dSmrg
3068af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3069af69d88dSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3070af69d88dSmrg         union tgsi_exec_channel src[4];
3071af69d88dSmrg
3072af69d88dSmrg         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3073af69d88dSmrg         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3074af69d88dSmrg         fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3075af69d88dSmrg         fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3076af69d88dSmrg         op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3077af69d88dSmrg      }
3078af69d88dSmrg   }
3079af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3080cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
30817ec681f3Smrg         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3082cdc920a0Smrg      }
30834a49301eSmrg   }
30844a49301eSmrg}
30854a49301eSmrg
30864a49301eSmrgstatic void
3087cdc920a0Smrgexec_dp3(struct tgsi_exec_machine *mach,
3088cdc920a0Smrg         const struct tgsi_full_instruction *inst)
30894a49301eSmrg{
3090cdc920a0Smrg   unsigned int chan;
3091cdc920a0Smrg   union tgsi_exec_channel arg[3];
30924a49301eSmrg
3093af69d88dSmrg   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3094af69d88dSmrg   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3095cdc920a0Smrg   micro_mul(&arg[2], &arg[0], &arg[1]);
30964a49301eSmrg
3097af69d88dSmrg   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3098cdc920a0Smrg      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3099cdc920a0Smrg      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3100cdc920a0Smrg      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
31014a49301eSmrg   }
31024a49301eSmrg
3103af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3104cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
31057ec681f3Smrg         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3106cdc920a0Smrg      }
31074a49301eSmrg   }
3108cdc920a0Smrg}
31094a49301eSmrg
3110cdc920a0Smrgstatic void
3111cdc920a0Smrgexec_dp4(struct tgsi_exec_machine *mach,
3112cdc920a0Smrg         const struct tgsi_full_instruction *inst)
3113cdc920a0Smrg{
3114cdc920a0Smrg   unsigned int chan;
3115cdc920a0Smrg   union tgsi_exec_channel arg[3];
31164a49301eSmrg
3117af69d88dSmrg   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3118af69d88dSmrg   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3119cdc920a0Smrg   micro_mul(&arg[2], &arg[0], &arg[1]);
3120cdc920a0Smrg
3121af69d88dSmrg   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3122cdc920a0Smrg      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3123cdc920a0Smrg      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3124cdc920a0Smrg      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3125cdc920a0Smrg   }
3126cdc920a0Smrg
3127af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3128cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
31297ec681f3Smrg         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3130cdc920a0Smrg      }
31314a49301eSmrg   }
31324a49301eSmrg}
31334a49301eSmrg
3134cdc920a0Smrgstatic void
313501e04c3fSmrgexec_dp2(struct tgsi_exec_machine *mach,
313601e04c3fSmrg         const struct tgsi_full_instruction *inst)
3137cdc920a0Smrg{
3138cdc920a0Smrg   unsigned int chan;
3139cdc920a0Smrg   union tgsi_exec_channel arg[3];
31404a49301eSmrg
3141af69d88dSmrg   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3142af69d88dSmrg   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3143cdc920a0Smrg   micro_mul(&arg[2], &arg[0], &arg[1]);
31444a49301eSmrg
3145af69d88dSmrg   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3146af69d88dSmrg   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
314701e04c3fSmrg   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3148cdc920a0Smrg
3149af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3150cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
31517ec681f3Smrg         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3152cdc920a0Smrg      }
3153cdc920a0Smrg   }
3154cdc920a0Smrg}
31554a49301eSmrg
31564a49301eSmrgstatic void
315701e04c3fSmrgexec_pk2h(struct tgsi_exec_machine *mach,
315801e04c3fSmrg          const struct tgsi_full_instruction *inst)
31594a49301eSmrg{
316001e04c3fSmrg   unsigned chan;
316101e04c3fSmrg   union tgsi_exec_channel arg[2], dst;
31624a49301eSmrg
3163af69d88dSmrg   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
316401e04c3fSmrg   fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
316501e04c3fSmrg   for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
31667ec681f3Smrg      dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
31677ec681f3Smrg         (_mesa_float_to_half(arg[1].f[chan]) << 16);
316801e04c3fSmrg   }
3169af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3170cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
31717ec681f3Smrg         store_dest(mach, &dst, &inst->Dst[0], inst, chan);
3172cdc920a0Smrg      }
31734a49301eSmrg   }
3174cdc920a0Smrg}
31754a49301eSmrg
3176cdc920a0Smrgstatic void
317701e04c3fSmrgexec_up2h(struct tgsi_exec_machine *mach,
317801e04c3fSmrg          const struct tgsi_full_instruction *inst)
3179cdc920a0Smrg{
318001e04c3fSmrg   unsigned chan;
318101e04c3fSmrg   union tgsi_exec_channel arg, dst[2];
3182cdc920a0Smrg
318301e04c3fSmrg   fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
318401e04c3fSmrg   for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
31857ec681f3Smrg      dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
31867ec681f3Smrg      dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
318701e04c3fSmrg   }
3188af69d88dSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3189cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
31907ec681f3Smrg         store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan);
3191cdc920a0Smrg      }
3192cdc920a0Smrg   }
31934a49301eSmrg}
31944a49301eSmrg
31954a49301eSmrgstatic void
319601e04c3fSmrgmicro_ucmp(union tgsi_exec_channel *dst,
319701e04c3fSmrg           const union tgsi_exec_channel *src0,
319801e04c3fSmrg           const union tgsi_exec_channel *src1,
319901e04c3fSmrg           const union tgsi_exec_channel *src2)
320001e04c3fSmrg{
320101e04c3fSmrg   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
320201e04c3fSmrg   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
320301e04c3fSmrg   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
320401e04c3fSmrg   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
320501e04c3fSmrg}
320601e04c3fSmrg
320701e04c3fSmrgstatic void
320801e04c3fSmrgexec_ucmp(struct tgsi_exec_machine *mach,
32094a49301eSmrg          const struct tgsi_full_instruction *inst)
32104a49301eSmrg{
3211cdc920a0Smrg   unsigned int chan;
321201e04c3fSmrg   struct tgsi_exec_vector dst;
3213cdc920a0Smrg
321401e04c3fSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
321501e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
321601e04c3fSmrg         union tgsi_exec_channel src[3];
3217cdc920a0Smrg
321801e04c3fSmrg         fetch_source(mach, &src[0], &inst->Src[0], chan,
321901e04c3fSmrg                      TGSI_EXEC_DATA_UINT);
322001e04c3fSmrg         fetch_source(mach, &src[1], &inst->Src[1], chan,
322101e04c3fSmrg                      TGSI_EXEC_DATA_FLOAT);
322201e04c3fSmrg         fetch_source(mach, &src[2], &inst->Src[2], chan,
322301e04c3fSmrg                      TGSI_EXEC_DATA_FLOAT);
322401e04c3fSmrg         micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
322501e04c3fSmrg      }
3226cdc920a0Smrg   }
322701e04c3fSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3228cdc920a0Smrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
32297ec681f3Smrg         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3230cdc920a0Smrg      }
3231cdc920a0Smrg   }
32324a49301eSmrg}
32334a49301eSmrg
3234cdc920a0Smrgstatic void
323501e04c3fSmrgexec_dst(struct tgsi_exec_machine *mach,
323601e04c3fSmrg         const struct tgsi_full_instruction *inst)
3237cdc920a0Smrg{
323801e04c3fSmrg   union tgsi_exec_channel r[2];
323901e04c3fSmrg   union tgsi_exec_channel d[4];
3240cdc920a0Smrg
324101e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
324201e04c3fSmrg      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
324301e04c3fSmrg      fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
324401e04c3fSmrg      micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
324501e04c3fSmrg   }
324601e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
324701e04c3fSmrg      fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
324801e04c3fSmrg   }
324901e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
325001e04c3fSmrg      fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3251cdc920a0Smrg   }
3252cdc920a0Smrg
325301e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
32547ec681f3Smrg      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
325501e04c3fSmrg   }
325601e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
32577ec681f3Smrg      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
325801e04c3fSmrg   }
325901e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
32607ec681f3Smrg      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
326101e04c3fSmrg   }
3262cdc920a0Smrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
32637ec681f3Smrg      store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W);
3264cdc920a0Smrg   }
3265cdc920a0Smrg}
32664a49301eSmrg
32673464ebd5Sriastradhstatic void
326801e04c3fSmrgexec_log(struct tgsi_exec_machine *mach,
32693464ebd5Sriastradh         const struct tgsi_full_instruction *inst)
32703464ebd5Sriastradh{
327101e04c3fSmrg   union tgsi_exec_channel r[3];
32723464ebd5Sriastradh
327301e04c3fSmrg   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
327401e04c3fSmrg   micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
327501e04c3fSmrg   micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
327601e04c3fSmrg   micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
327701e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
32787ec681f3Smrg      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
327901e04c3fSmrg   }
328001e04c3fSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
328101e04c3fSmrg      micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
328201e04c3fSmrg      micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
32837ec681f3Smrg      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y);
32843464ebd5Sriastradh   }
32853464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
32867ec681f3Smrg      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z);
32873464ebd5Sriastradh   }
32883464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
32897ec681f3Smrg      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
32903464ebd5Sriastradh   }
32913464ebd5Sriastradh}
32923464ebd5Sriastradh
32933464ebd5Sriastradhstatic void
329401e04c3fSmrgexec_exp(struct tgsi_exec_machine *mach,
32953464ebd5Sriastradh         const struct tgsi_full_instruction *inst)
32963464ebd5Sriastradh{
329701e04c3fSmrg   union tgsi_exec_channel r[3];
329801e04c3fSmrg
329901e04c3fSmrg   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
330001e04c3fSmrg   micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
33013464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
330201e04c3fSmrg      micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
33037ec681f3Smrg      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X);
33043464ebd5Sriastradh   }
33053464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
330601e04c3fSmrg      micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
33077ec681f3Smrg      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y);
33083464ebd5Sriastradh   }
33093464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
331001e04c3fSmrg      micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
33117ec681f3Smrg      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z);
33123464ebd5Sriastradh   }
33133464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
33147ec681f3Smrg      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
33153464ebd5Sriastradh   }
33163464ebd5Sriastradh}
33173464ebd5Sriastradh
33183464ebd5Sriastradhstatic void
331901e04c3fSmrgexec_lit(struct tgsi_exec_machine *mach,
33203464ebd5Sriastradh         const struct tgsi_full_instruction *inst)
33213464ebd5Sriastradh{
332201e04c3fSmrg   union tgsi_exec_channel r[3];
332301e04c3fSmrg   union tgsi_exec_channel d[3];
33243464ebd5Sriastradh
33253464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3326af69d88dSmrg      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
33273464ebd5Sriastradh      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3328af69d88dSmrg         fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
33293464ebd5Sriastradh         micro_max(&r[1], &r[1], &ZeroVec);
33303464ebd5Sriastradh
3331af69d88dSmrg         fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
33323464ebd5Sriastradh         micro_min(&r[2], &r[2], &P128Vec);
33333464ebd5Sriastradh         micro_max(&r[2], &r[2], &M128Vec);
33343464ebd5Sriastradh         micro_pow(&r[1], &r[1], &r[2]);
3335af69d88dSmrg         micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
33367ec681f3Smrg         store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3337af69d88dSmrg      }
3338af69d88dSmrg      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3339af69d88dSmrg         micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
33407ec681f3Smrg         store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
33413464ebd5Sriastradh      }
33423464ebd5Sriastradh   }
3343af69d88dSmrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
33447ec681f3Smrg      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3345af69d88dSmrg   }
3346af69d88dSmrg
33473464ebd5Sriastradh   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
33487ec681f3Smrg      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
33493464ebd5Sriastradh   }
33503464ebd5Sriastradh}
33513464ebd5Sriastradh
33524a49301eSmrgstatic void
3353cdc920a0Smrgexec_break(struct tgsi_exec_machine *mach)
33544a49301eSmrg{
3355cdc920a0Smrg   if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3356cdc920a0Smrg      /* turn off loop channels for each enabled exec channel */
3357cdc920a0Smrg      mach->LoopMask &= ~mach->ExecMask;
3358cdc920a0Smrg      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3359cdc920a0Smrg      UPDATE_EXEC_MASK(mach);
3360cdc920a0Smrg   } else {
3361cdc920a0Smrg      assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
33624a49301eSmrg
3363cdc920a0Smrg      mach->Switch.mask = 0x0;
33644a49301eSmrg
3365cdc920a0Smrg      UPDATE_EXEC_MASK(mach);
33664a49301eSmrg   }
33674a49301eSmrg}
33684a49301eSmrg
3369cdc920a0Smrgstatic void
3370cdc920a0Smrgexec_switch(struct tgsi_exec_machine *mach,
3371cdc920a0Smrg            const struct tgsi_full_instruction *inst)
3372cdc920a0Smrg{
3373cdc920a0Smrg   assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3374cdc920a0Smrg   assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3375cdc920a0Smrg
3376cdc920a0Smrg   mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3377af69d88dSmrg   fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3378cdc920a0Smrg   mach->Switch.mask = 0x0;
3379cdc920a0Smrg   mach->Switch.defaultMask = 0x0;
3380cdc920a0Smrg
3381cdc920a0Smrg   mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3382cdc920a0Smrg   mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3383cdc920a0Smrg
3384cdc920a0Smrg   UPDATE_EXEC_MASK(mach);
3385cdc920a0Smrg}
33864a49301eSmrg
33874a49301eSmrgstatic void
3388cdc920a0Smrgexec_case(struct tgsi_exec_machine *mach,
3389cdc920a0Smrg          const struct tgsi_full_instruction *inst)
33904a49301eSmrg{
3391cdc920a0Smrg   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3392cdc920a0Smrg   union tgsi_exec_channel src;
3393cdc920a0Smrg   uint mask = 0;
33944a49301eSmrg
3395af69d88dSmrg   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
33964a49301eSmrg
3397cdc920a0Smrg   if (mach->Switch.selector.u[0] == src.u[0]) {
3398cdc920a0Smrg      mask |= 0x1;
3399cdc920a0Smrg   }
3400cdc920a0Smrg   if (mach->Switch.selector.u[1] == src.u[1]) {
3401cdc920a0Smrg      mask |= 0x2;
3402cdc920a0Smrg   }
3403cdc920a0Smrg   if (mach->Switch.selector.u[2] == src.u[2]) {
3404cdc920a0Smrg      mask |= 0x4;
3405cdc920a0Smrg   }
3406cdc920a0Smrg   if (mach->Switch.selector.u[3] == src.u[3]) {
3407cdc920a0Smrg      mask |= 0x8;
3408cdc920a0Smrg   }
34094a49301eSmrg
341001e04c3fSmrg   mach->Switch.defaultMask |= mask;
341101e04c3fSmrg
341201e04c3fSmrg   mach->Switch.mask |= mask & prevMask;
341301e04c3fSmrg
341401e04c3fSmrg   UPDATE_EXEC_MASK(mach);
341501e04c3fSmrg}
341601e04c3fSmrg
341701e04c3fSmrg/* FIXME: this will only work if default is last */
341801e04c3fSmrgstatic void
341901e04c3fSmrgexec_default(struct tgsi_exec_machine *mach)
342001e04c3fSmrg{
342101e04c3fSmrg   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
342201e04c3fSmrg
342301e04c3fSmrg   mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
342401e04c3fSmrg
342501e04c3fSmrg   UPDATE_EXEC_MASK(mach);
342601e04c3fSmrg}
342701e04c3fSmrg
342801e04c3fSmrgstatic void
342901e04c3fSmrgexec_endswitch(struct tgsi_exec_machine *mach)
343001e04c3fSmrg{
343101e04c3fSmrg   mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
343201e04c3fSmrg   mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
343301e04c3fSmrg
343401e04c3fSmrg   UPDATE_EXEC_MASK(mach);
343501e04c3fSmrg}
343601e04c3fSmrg
343701e04c3fSmrgtypedef void (* micro_dop)(union tgsi_double_channel *dst,
343801e04c3fSmrg                           const union tgsi_double_channel *src);
343901e04c3fSmrg
344001e04c3fSmrgtypedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
344101e04c3fSmrg                               const union tgsi_double_channel *src0,
344201e04c3fSmrg                               union tgsi_exec_channel *src1);
344301e04c3fSmrg
344401e04c3fSmrgtypedef void (* micro_dop_s)(union tgsi_double_channel *dst,
344501e04c3fSmrg                             const union tgsi_exec_channel *src);
344601e04c3fSmrg
344701e04c3fSmrgtypedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
344801e04c3fSmrg                             const union tgsi_double_channel *src);
344901e04c3fSmrg
345001e04c3fSmrgstatic void
345101e04c3fSmrgfetch_double_channel(struct tgsi_exec_machine *mach,
345201e04c3fSmrg                     union tgsi_double_channel *chan,
345301e04c3fSmrg                     const struct tgsi_full_src_register *reg,
345401e04c3fSmrg                     uint chan_0,
345501e04c3fSmrg                     uint chan_1)
345601e04c3fSmrg{
345701e04c3fSmrg   union tgsi_exec_channel src[2];
345801e04c3fSmrg   uint i;
345901e04c3fSmrg
346001e04c3fSmrg   fetch_source_d(mach, &src[0], reg, chan_0);
346101e04c3fSmrg   fetch_source_d(mach, &src[1], reg, chan_1);
346201e04c3fSmrg
346301e04c3fSmrg   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
346401e04c3fSmrg      chan->u[i][0] = src[0].u[i];
346501e04c3fSmrg      chan->u[i][1] = src[1].u[i];
346601e04c3fSmrg   }
34677ec681f3Smrg   assert(!reg->Register.Absolute);
34687ec681f3Smrg   assert(!reg->Register.Negate);
346901e04c3fSmrg}
347001e04c3fSmrg
347101e04c3fSmrgstatic void
347201e04c3fSmrgstore_double_channel(struct tgsi_exec_machine *mach,
347301e04c3fSmrg                     const union tgsi_double_channel *chan,
347401e04c3fSmrg                     const struct tgsi_full_dst_register *reg,
347501e04c3fSmrg                     const struct tgsi_full_instruction *inst,
347601e04c3fSmrg                     uint chan_0,
347701e04c3fSmrg                     uint chan_1)
347801e04c3fSmrg{
347901e04c3fSmrg   union tgsi_exec_channel dst[2];
348001e04c3fSmrg   uint i;
348101e04c3fSmrg   union tgsi_double_channel temp;
348201e04c3fSmrg   const uint execmask = mach->ExecMask;
348301e04c3fSmrg
348401e04c3fSmrg   if (!inst->Instruction.Saturate) {
348501e04c3fSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++)
348601e04c3fSmrg         if (execmask & (1 << i)) {
348701e04c3fSmrg            dst[0].u[i] = chan->u[i][0];
348801e04c3fSmrg            dst[1].u[i] = chan->u[i][1];
348901e04c3fSmrg         }
349001e04c3fSmrg   }
349101e04c3fSmrg   else {
349201e04c3fSmrg      for (i = 0; i < TGSI_QUAD_SIZE; i++)
349301e04c3fSmrg         if (execmask & (1 << i)) {
34947ec681f3Smrg            if (chan->d[i] < 0.0 || isnan(chan->d[i]))
349501e04c3fSmrg               temp.d[i] = 0.0;
349601e04c3fSmrg            else if (chan->d[i] > 1.0)
349701e04c3fSmrg               temp.d[i] = 1.0;
349801e04c3fSmrg            else
349901e04c3fSmrg               temp.d[i] = chan->d[i];
350001e04c3fSmrg
350101e04c3fSmrg            dst[0].u[i] = temp.u[i][0];
350201e04c3fSmrg            dst[1].u[i] = temp.u[i][1];
350301e04c3fSmrg         }
350401e04c3fSmrg   }
350501e04c3fSmrg
35067ec681f3Smrg   store_dest_double(mach, &dst[0], reg, chan_0);
350701e04c3fSmrg   if (chan_1 != (unsigned)-1)
35087ec681f3Smrg      store_dest_double(mach, &dst[1], reg, chan_1);
350901e04c3fSmrg}
351001e04c3fSmrg
351101e04c3fSmrgstatic void
351201e04c3fSmrgexec_double_unary(struct tgsi_exec_machine *mach,
351301e04c3fSmrg                  const struct tgsi_full_instruction *inst,
351401e04c3fSmrg                  micro_dop op)
351501e04c3fSmrg{
351601e04c3fSmrg   union tgsi_double_channel src;
351701e04c3fSmrg   union tgsi_double_channel dst;
351801e04c3fSmrg
351901e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
352001e04c3fSmrg      fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
352101e04c3fSmrg      op(&dst, &src);
352201e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
352301e04c3fSmrg   }
352401e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
352501e04c3fSmrg      fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
352601e04c3fSmrg      op(&dst, &src);
352701e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
352801e04c3fSmrg   }
352901e04c3fSmrg}
353001e04c3fSmrg
353101e04c3fSmrgstatic void
353201e04c3fSmrgexec_double_binary(struct tgsi_exec_machine *mach,
353301e04c3fSmrg                   const struct tgsi_full_instruction *inst,
353401e04c3fSmrg                   micro_dop op,
353501e04c3fSmrg                   enum tgsi_exec_datatype dst_datatype)
353601e04c3fSmrg{
353701e04c3fSmrg   union tgsi_double_channel src[2];
353801e04c3fSmrg   union tgsi_double_channel dst;
353901e04c3fSmrg   int first_dest_chan, second_dest_chan;
354001e04c3fSmrg   int wmask;
354101e04c3fSmrg
354201e04c3fSmrg   wmask = inst->Dst[0].Register.WriteMask;
354301e04c3fSmrg   /* these are & because of the way DSLT etc store their destinations */
354401e04c3fSmrg   if (wmask & TGSI_WRITEMASK_XY) {
354501e04c3fSmrg      first_dest_chan = TGSI_CHAN_X;
354601e04c3fSmrg      second_dest_chan = TGSI_CHAN_Y;
354701e04c3fSmrg      if (dst_datatype == TGSI_EXEC_DATA_UINT) {
354801e04c3fSmrg         first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
354901e04c3fSmrg         second_dest_chan = -1;
355001e04c3fSmrg      }
355101e04c3fSmrg
355201e04c3fSmrg      fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
355301e04c3fSmrg      fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
355401e04c3fSmrg      op(&dst, src);
355501e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
355601e04c3fSmrg   }
355701e04c3fSmrg
355801e04c3fSmrg   if (wmask & TGSI_WRITEMASK_ZW) {
355901e04c3fSmrg      first_dest_chan = TGSI_CHAN_Z;
356001e04c3fSmrg      second_dest_chan = TGSI_CHAN_W;
356101e04c3fSmrg      if (dst_datatype == TGSI_EXEC_DATA_UINT) {
356201e04c3fSmrg         first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
356301e04c3fSmrg         second_dest_chan = -1;
356401e04c3fSmrg      }
356501e04c3fSmrg
356601e04c3fSmrg      fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
356701e04c3fSmrg      fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
356801e04c3fSmrg      op(&dst, src);
356901e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
357001e04c3fSmrg   }
357101e04c3fSmrg}
357201e04c3fSmrg
357301e04c3fSmrgstatic void
357401e04c3fSmrgexec_double_trinary(struct tgsi_exec_machine *mach,
357501e04c3fSmrg                    const struct tgsi_full_instruction *inst,
357601e04c3fSmrg                    micro_dop op)
357701e04c3fSmrg{
357801e04c3fSmrg   union tgsi_double_channel src[3];
357901e04c3fSmrg   union tgsi_double_channel dst;
358001e04c3fSmrg
358101e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
358201e04c3fSmrg      fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
358301e04c3fSmrg      fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
358401e04c3fSmrg      fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
358501e04c3fSmrg      op(&dst, src);
358601e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
358701e04c3fSmrg   }
358801e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
358901e04c3fSmrg      fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
359001e04c3fSmrg      fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
359101e04c3fSmrg      fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
359201e04c3fSmrg      op(&dst, src);
359301e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
359401e04c3fSmrg   }
359501e04c3fSmrg}
359601e04c3fSmrg
359701e04c3fSmrgstatic void
359801e04c3fSmrgexec_dldexp(struct tgsi_exec_machine *mach,
359901e04c3fSmrg            const struct tgsi_full_instruction *inst)
360001e04c3fSmrg{
360101e04c3fSmrg   union tgsi_double_channel src0;
360201e04c3fSmrg   union tgsi_exec_channel src1;
360301e04c3fSmrg   union tgsi_double_channel dst;
360401e04c3fSmrg   int wmask;
360501e04c3fSmrg
360601e04c3fSmrg   wmask = inst->Dst[0].Register.WriteMask;
360701e04c3fSmrg   if (wmask & TGSI_WRITEMASK_XY) {
360801e04c3fSmrg      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
360901e04c3fSmrg      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
361001e04c3fSmrg      micro_dldexp(&dst, &src0, &src1);
361101e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
361201e04c3fSmrg   }
361301e04c3fSmrg
361401e04c3fSmrg   if (wmask & TGSI_WRITEMASK_ZW) {
361501e04c3fSmrg      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
361601e04c3fSmrg      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
361701e04c3fSmrg      micro_dldexp(&dst, &src0, &src1);
361801e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
361901e04c3fSmrg   }
362001e04c3fSmrg}
362101e04c3fSmrg
362201e04c3fSmrgstatic void
362301e04c3fSmrgexec_dfracexp(struct tgsi_exec_machine *mach,
362401e04c3fSmrg              const struct tgsi_full_instruction *inst)
362501e04c3fSmrg{
362601e04c3fSmrg   union tgsi_double_channel src;
362701e04c3fSmrg   union tgsi_double_channel dst;
362801e04c3fSmrg   union tgsi_exec_channel dst_exp;
362901e04c3fSmrg
363001e04c3fSmrg   fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
363101e04c3fSmrg   micro_dfracexp(&dst, &dst_exp, &src);
363201e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
363301e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
363401e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
363501e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
363601e04c3fSmrg   for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
363701e04c3fSmrg      if (inst->Dst[1].Register.WriteMask & (1 << chan))
36387ec681f3Smrg         store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan);
363901e04c3fSmrg   }
364001e04c3fSmrg}
364101e04c3fSmrg
364201e04c3fSmrgstatic void
364301e04c3fSmrgexec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
364401e04c3fSmrg            const struct tgsi_full_instruction *inst,
364501e04c3fSmrg            micro_dop_sop op)
364601e04c3fSmrg{
364701e04c3fSmrg   union tgsi_double_channel src0;
364801e04c3fSmrg   union tgsi_exec_channel src1;
364901e04c3fSmrg   union tgsi_double_channel dst;
365001e04c3fSmrg   int wmask;
365101e04c3fSmrg
365201e04c3fSmrg   wmask = inst->Dst[0].Register.WriteMask;
365301e04c3fSmrg   if (wmask & TGSI_WRITEMASK_XY) {
365401e04c3fSmrg      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
365501e04c3fSmrg      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
365601e04c3fSmrg      op(&dst, &src0, &src1);
365701e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
365801e04c3fSmrg   }
365901e04c3fSmrg
366001e04c3fSmrg   if (wmask & TGSI_WRITEMASK_ZW) {
366101e04c3fSmrg      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
366201e04c3fSmrg      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
366301e04c3fSmrg      op(&dst, &src0, &src1);
366401e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
366501e04c3fSmrg   }
366601e04c3fSmrg}
366701e04c3fSmrg
366801e04c3fSmrgstatic int
366901e04c3fSmrgget_image_coord_dim(unsigned tgsi_tex)
367001e04c3fSmrg{
367101e04c3fSmrg   int dim;
367201e04c3fSmrg   switch (tgsi_tex) {
367301e04c3fSmrg   case TGSI_TEXTURE_BUFFER:
367401e04c3fSmrg   case TGSI_TEXTURE_1D:
367501e04c3fSmrg      dim = 1;
367601e04c3fSmrg      break;
367701e04c3fSmrg   case TGSI_TEXTURE_2D:
367801e04c3fSmrg   case TGSI_TEXTURE_RECT:
367901e04c3fSmrg   case TGSI_TEXTURE_1D_ARRAY:
368001e04c3fSmrg   case TGSI_TEXTURE_2D_MSAA:
368101e04c3fSmrg      dim = 2;
368201e04c3fSmrg      break;
368301e04c3fSmrg   case TGSI_TEXTURE_3D:
368401e04c3fSmrg   case TGSI_TEXTURE_CUBE:
368501e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY:
368601e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY_MSAA:
368701e04c3fSmrg   case TGSI_TEXTURE_CUBE_ARRAY:
368801e04c3fSmrg      dim = 3;
368901e04c3fSmrg      break;
369001e04c3fSmrg   default:
369101e04c3fSmrg      assert(!"unknown texture target");
369201e04c3fSmrg      dim = 0;
369301e04c3fSmrg      break;
369401e04c3fSmrg   }
369501e04c3fSmrg
369601e04c3fSmrg   return dim;
369701e04c3fSmrg}
369801e04c3fSmrg
369901e04c3fSmrgstatic int
370001e04c3fSmrgget_image_coord_sample(unsigned tgsi_tex)
370101e04c3fSmrg{
370201e04c3fSmrg   int sample = 0;
370301e04c3fSmrg   switch (tgsi_tex) {
370401e04c3fSmrg   case TGSI_TEXTURE_2D_MSAA:
370501e04c3fSmrg      sample = 3;
370601e04c3fSmrg      break;
370701e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY_MSAA:
370801e04c3fSmrg      sample = 4;
370901e04c3fSmrg      break;
371001e04c3fSmrg   default:
371101e04c3fSmrg      break;
371201e04c3fSmrg   }
371301e04c3fSmrg   return sample;
371401e04c3fSmrg}
371501e04c3fSmrg
371601e04c3fSmrgstatic void
371701e04c3fSmrgexec_load_img(struct tgsi_exec_machine *mach,
371801e04c3fSmrg              const struct tgsi_full_instruction *inst)
371901e04c3fSmrg{
372001e04c3fSmrg   union tgsi_exec_channel r[4], sample_r;
372101e04c3fSmrg   uint unit;
372201e04c3fSmrg   int sample;
372301e04c3fSmrg   int i, j;
372401e04c3fSmrg   int dim;
372501e04c3fSmrg   uint chan;
372601e04c3fSmrg   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
372701e04c3fSmrg   struct tgsi_image_params params;
372801e04c3fSmrg
372901e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 0);
373001e04c3fSmrg   dim = get_image_coord_dim(inst->Memory.Texture);
373101e04c3fSmrg   sample = get_image_coord_sample(inst->Memory.Texture);
373201e04c3fSmrg   assert(dim <= 3);
373301e04c3fSmrg
37347ec681f3Smrg   params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
373501e04c3fSmrg   params.unit = unit;
373601e04c3fSmrg   params.tgsi_tex_instr = inst->Memory.Texture;
373701e04c3fSmrg   params.format = inst->Memory.Format;
373801e04c3fSmrg
373901e04c3fSmrg   for (i = 0; i < dim; i++) {
374001e04c3fSmrg      IFETCH(&r[i], 1, TGSI_CHAN_X + i);
374101e04c3fSmrg   }
374201e04c3fSmrg
374301e04c3fSmrg   if (sample)
374401e04c3fSmrg      IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
374501e04c3fSmrg
374601e04c3fSmrg   mach->Image->load(mach->Image, &params,
374701e04c3fSmrg                     r[0].i, r[1].i, r[2].i, sample_r.i,
374801e04c3fSmrg                     rgba);
374901e04c3fSmrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375001e04c3fSmrg      r[0].f[j] = rgba[0][j];
375101e04c3fSmrg      r[1].f[j] = rgba[1][j];
375201e04c3fSmrg      r[2].f[j] = rgba[2][j];
375301e04c3fSmrg      r[3].f[j] = rgba[3][j];
375401e04c3fSmrg   }
375501e04c3fSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
375601e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
37577ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
375801e04c3fSmrg      }
375901e04c3fSmrg   }
376001e04c3fSmrg}
376101e04c3fSmrg
376201e04c3fSmrgstatic void
37637ec681f3Smrgexec_load_membuf(struct tgsi_exec_machine *mach,
37647ec681f3Smrg                 const struct tgsi_full_instruction *inst)
376501e04c3fSmrg{
37667ec681f3Smrg   uint32_t unit = fetch_sampler_unit(mach, inst, 0);
376701e04c3fSmrg
37687ec681f3Smrg   uint32_t size;
37697ec681f3Smrg   const char *ptr;
37707ec681f3Smrg   switch (inst->Src[0].Register.File) {
37717ec681f3Smrg   case TGSI_FILE_MEMORY:
37727ec681f3Smrg      ptr = mach->LocalMem;
37737ec681f3Smrg      size = mach->LocalMemSize;
37747ec681f3Smrg      break;
377501e04c3fSmrg
37767ec681f3Smrg   case TGSI_FILE_BUFFER:
37777ec681f3Smrg      ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
37787ec681f3Smrg      break;
377901e04c3fSmrg
37807ec681f3Smrg   case TGSI_FILE_CONSTANT:
37817ec681f3Smrg      if (unit < ARRAY_SIZE(mach->Consts)) {
37827ec681f3Smrg         ptr = mach->Consts[unit];
37837ec681f3Smrg         size = mach->ConstsSize[unit];
37847ec681f3Smrg      } else {
37857ec681f3Smrg         ptr = NULL;
37867ec681f3Smrg         size = 0;
378701e04c3fSmrg      }
37887ec681f3Smrg      break;
378901e04c3fSmrg
37907ec681f3Smrg   default:
37917ec681f3Smrg      unreachable("unsupported TGSI_OPCODE_LOAD file");
37927ec681f3Smrg   }
379301e04c3fSmrg
37947ec681f3Smrg   union tgsi_exec_channel offset;
37957ec681f3Smrg   IFETCH(&offset, 1, TGSI_CHAN_X);
379601e04c3fSmrg
37977ec681f3Smrg   assert(inst->Dst[0].Register.WriteMask);
37987ec681f3Smrg   uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4;
379901e04c3fSmrg
38007ec681f3Smrg   union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS];
38017ec681f3Smrg   memset(&rgba, 0, sizeof(rgba));
38027ec681f3Smrg   for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
38037ec681f3Smrg      if (size >= load_size && offset.u[j] <= (size - load_size)) {
38047ec681f3Smrg         for (int chan = 0; chan < load_size / 4; chan++)
38057ec681f3Smrg            rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4);
380601e04c3fSmrg      }
380701e04c3fSmrg   }
380801e04c3fSmrg
38097ec681f3Smrg   for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
381001e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
38117ec681f3Smrg         store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan);
381201e04c3fSmrg      }
381301e04c3fSmrg   }
381401e04c3fSmrg}
381501e04c3fSmrg
381601e04c3fSmrgstatic void
381701e04c3fSmrgexec_load(struct tgsi_exec_machine *mach,
381801e04c3fSmrg          const struct tgsi_full_instruction *inst)
381901e04c3fSmrg{
382001e04c3fSmrg   if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
382101e04c3fSmrg      exec_load_img(mach, inst);
38227ec681f3Smrg   else
38237ec681f3Smrg      exec_load_membuf(mach, inst);
382401e04c3fSmrg}
382501e04c3fSmrg
3826361fc4cbSmayastatic uint
3827361fc4cbSmayafetch_store_img_unit(struct tgsi_exec_machine *mach,
3828361fc4cbSmaya                     const struct tgsi_full_dst_register *dst)
3829361fc4cbSmaya{
3830361fc4cbSmaya   uint unit = 0;
3831361fc4cbSmaya   int i;
3832361fc4cbSmaya   if (dst->Register.Indirect) {
3833361fc4cbSmaya      union tgsi_exec_channel indir_index, index2;
3834361fc4cbSmaya      const uint execmask = mach->ExecMask;
3835361fc4cbSmaya      index2.i[0] =
3836361fc4cbSmaya      index2.i[1] =
3837361fc4cbSmaya      index2.i[2] =
3838361fc4cbSmaya      index2.i[3] = dst->Indirect.Index;
3839361fc4cbSmaya
3840361fc4cbSmaya      fetch_src_file_channel(mach,
3841361fc4cbSmaya                             dst->Indirect.File,
3842361fc4cbSmaya                             dst->Indirect.Swizzle,
3843361fc4cbSmaya                             &index2,
3844361fc4cbSmaya                             &ZeroVec,
3845361fc4cbSmaya                             &indir_index);
3846361fc4cbSmaya      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3847361fc4cbSmaya         if (execmask & (1 << i)) {
3848361fc4cbSmaya            unit = dst->Register.Index + indir_index.i[i];
3849361fc4cbSmaya            break;
3850361fc4cbSmaya         }
3851361fc4cbSmaya      }
3852361fc4cbSmaya   } else {
3853361fc4cbSmaya      unit = dst->Register.Index;
3854361fc4cbSmaya   }
3855361fc4cbSmaya   return unit;
3856361fc4cbSmaya}
3857361fc4cbSmaya
385801e04c3fSmrgstatic void
385901e04c3fSmrgexec_store_img(struct tgsi_exec_machine *mach,
386001e04c3fSmrg               const struct tgsi_full_instruction *inst)
386101e04c3fSmrg{
386201e04c3fSmrg   union tgsi_exec_channel r[3], sample_r;
386301e04c3fSmrg   union tgsi_exec_channel value[4];
386401e04c3fSmrg   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
386501e04c3fSmrg   struct tgsi_image_params params;
386601e04c3fSmrg   int dim;
386701e04c3fSmrg   int sample;
386801e04c3fSmrg   int i, j;
386901e04c3fSmrg   uint unit;
3870361fc4cbSmaya   unit = fetch_store_img_unit(mach, &inst->Dst[0]);
387101e04c3fSmrg   dim = get_image_coord_dim(inst->Memory.Texture);
387201e04c3fSmrg   sample = get_image_coord_sample(inst->Memory.Texture);
387301e04c3fSmrg   assert(dim <= 3);
387401e04c3fSmrg
38757ec681f3Smrg   params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
387601e04c3fSmrg   params.unit = unit;
387701e04c3fSmrg   params.tgsi_tex_instr = inst->Memory.Texture;
387801e04c3fSmrg   params.format = inst->Memory.Format;
387901e04c3fSmrg
388001e04c3fSmrg   for (i = 0; i < dim; i++) {
388101e04c3fSmrg      IFETCH(&r[i], 0, TGSI_CHAN_X + i);
388201e04c3fSmrg   }
388301e04c3fSmrg
388401e04c3fSmrg   for (i = 0; i < 4; i++) {
388501e04c3fSmrg      FETCH(&value[i], 1, TGSI_CHAN_X + i);
388601e04c3fSmrg   }
388701e04c3fSmrg   if (sample)
388801e04c3fSmrg      IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
388901e04c3fSmrg
389001e04c3fSmrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389101e04c3fSmrg      rgba[0][j] = value[0].f[j];
389201e04c3fSmrg      rgba[1][j] = value[1].f[j];
389301e04c3fSmrg      rgba[2][j] = value[2].f[j];
389401e04c3fSmrg      rgba[3][j] = value[3].f[j];
389501e04c3fSmrg   }
389601e04c3fSmrg
389701e04c3fSmrg   mach->Image->store(mach->Image, &params,
389801e04c3fSmrg                      r[0].i, r[1].i, r[2].i, sample_r.i,
389901e04c3fSmrg                      rgba);
390001e04c3fSmrg}
390101e04c3fSmrg
390201e04c3fSmrgstatic void
390301e04c3fSmrgexec_store_buf(struct tgsi_exec_machine *mach,
390401e04c3fSmrg               const struct tgsi_full_instruction *inst)
390501e04c3fSmrg{
39067ec681f3Smrg   uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]);
39077ec681f3Smrg   uint32_t size;
39087ec681f3Smrg   char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
390901e04c3fSmrg
39107ec681f3Smrg   int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
391101e04c3fSmrg
39127ec681f3Smrg   union tgsi_exec_channel offset;
39137ec681f3Smrg   IFETCH(&offset, 0, TGSI_CHAN_X);
391401e04c3fSmrg
39157ec681f3Smrg   union tgsi_exec_channel value[4];
39167ec681f3Smrg   for (int i = 0; i < 4; i++)
391701e04c3fSmrg      FETCH(&value[i], 1, TGSI_CHAN_X + i);
391801e04c3fSmrg
39197ec681f3Smrg   for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
39207ec681f3Smrg      if (!(execmask & (1 << j)))
39217ec681f3Smrg         continue;
39227ec681f3Smrg      if (size < offset.u[j])
39237ec681f3Smrg         continue;
392401e04c3fSmrg
39257ec681f3Smrg      uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]);
39267ec681f3Smrg      uint32_t size_avail = size - offset.u[j];
39277ec681f3Smrg
39287ec681f3Smrg      for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) {
39297ec681f3Smrg         if (inst->Dst[0].Register.WriteMask & (1 << chan))
39307ec681f3Smrg            memcpy(&invocation_ptr[chan], &value[chan].u[j], 4);
39317ec681f3Smrg      }
39327ec681f3Smrg   }
393301e04c3fSmrg}
393401e04c3fSmrg
393501e04c3fSmrgstatic void
393601e04c3fSmrgexec_store_mem(struct tgsi_exec_machine *mach,
393701e04c3fSmrg               const struct tgsi_full_instruction *inst)
393801e04c3fSmrg{
393901e04c3fSmrg   union tgsi_exec_channel r[3];
394001e04c3fSmrg   union tgsi_exec_channel value[4];
394101e04c3fSmrg   uint i, chan;
394201e04c3fSmrg   char *ptr = mach->LocalMem;
39437ec681f3Smrg   int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
394401e04c3fSmrg
394501e04c3fSmrg   IFETCH(&r[0], 0, TGSI_CHAN_X);
394601e04c3fSmrg
394701e04c3fSmrg   for (i = 0; i < 4; i++) {
394801e04c3fSmrg      FETCH(&value[i], 1, TGSI_CHAN_X + i);
394901e04c3fSmrg   }
395001e04c3fSmrg
395101e04c3fSmrg   if (r[0].u[0] >= mach->LocalMemSize)
395201e04c3fSmrg      return;
395301e04c3fSmrg   ptr += r[0].u[0];
395401e04c3fSmrg
395501e04c3fSmrg   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
395601e04c3fSmrg      if (execmask & (1 << i)) {
395701e04c3fSmrg         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
395801e04c3fSmrg            if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
395901e04c3fSmrg               memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
396001e04c3fSmrg            }
396101e04c3fSmrg         }
396201e04c3fSmrg      }
396301e04c3fSmrg   }
396401e04c3fSmrg}
396501e04c3fSmrg
396601e04c3fSmrgstatic void
396701e04c3fSmrgexec_store(struct tgsi_exec_machine *mach,
396801e04c3fSmrg           const struct tgsi_full_instruction *inst)
396901e04c3fSmrg{
397001e04c3fSmrg   if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
397101e04c3fSmrg      exec_store_img(mach, inst);
397201e04c3fSmrg   else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
397301e04c3fSmrg      exec_store_buf(mach, inst);
397401e04c3fSmrg   else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
397501e04c3fSmrg      exec_store_mem(mach, inst);
397601e04c3fSmrg}
397701e04c3fSmrg
397801e04c3fSmrgstatic void
397901e04c3fSmrgexec_atomop_img(struct tgsi_exec_machine *mach,
398001e04c3fSmrg                const struct tgsi_full_instruction *inst)
398101e04c3fSmrg{
398201e04c3fSmrg   union tgsi_exec_channel r[4], sample_r;
398301e04c3fSmrg   union tgsi_exec_channel value[4], value2[4];
398401e04c3fSmrg   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
398501e04c3fSmrg   float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
398601e04c3fSmrg   struct tgsi_image_params params;
398701e04c3fSmrg   int dim;
398801e04c3fSmrg   int sample;
398901e04c3fSmrg   int i, j;
399001e04c3fSmrg   uint unit, chan;
399101e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 0);
399201e04c3fSmrg   dim = get_image_coord_dim(inst->Memory.Texture);
399301e04c3fSmrg   sample = get_image_coord_sample(inst->Memory.Texture);
399401e04c3fSmrg   assert(dim <= 3);
399501e04c3fSmrg
39967ec681f3Smrg   params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
399701e04c3fSmrg   params.unit = unit;
399801e04c3fSmrg   params.tgsi_tex_instr = inst->Memory.Texture;
399901e04c3fSmrg   params.format = inst->Memory.Format;
400001e04c3fSmrg
400101e04c3fSmrg   for (i = 0; i < dim; i++) {
400201e04c3fSmrg      IFETCH(&r[i], 1, TGSI_CHAN_X + i);
400301e04c3fSmrg   }
400401e04c3fSmrg
400501e04c3fSmrg   for (i = 0; i < 4; i++) {
400601e04c3fSmrg      FETCH(&value[i], 2, TGSI_CHAN_X + i);
400701e04c3fSmrg      if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
400801e04c3fSmrg         FETCH(&value2[i], 3, TGSI_CHAN_X + i);
400901e04c3fSmrg   }
401001e04c3fSmrg   if (sample)
401101e04c3fSmrg      IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
401201e04c3fSmrg
401301e04c3fSmrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
401401e04c3fSmrg      rgba[0][j] = value[0].f[j];
401501e04c3fSmrg      rgba[1][j] = value[1].f[j];
401601e04c3fSmrg      rgba[2][j] = value[2].f[j];
401701e04c3fSmrg      rgba[3][j] = value[3].f[j];
401801e04c3fSmrg   }
401901e04c3fSmrg   if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
402001e04c3fSmrg      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
402101e04c3fSmrg         rgba2[0][j] = value2[0].f[j];
402201e04c3fSmrg         rgba2[1][j] = value2[1].f[j];
402301e04c3fSmrg         rgba2[2][j] = value2[2].f[j];
402401e04c3fSmrg         rgba2[3][j] = value2[3].f[j];
402501e04c3fSmrg      }
402601e04c3fSmrg   }
402701e04c3fSmrg
402801e04c3fSmrg   mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
402901e04c3fSmrg                   r[0].i, r[1].i, r[2].i, sample_r.i,
403001e04c3fSmrg                   rgba, rgba2);
403101e04c3fSmrg
403201e04c3fSmrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403301e04c3fSmrg      r[0].f[j] = rgba[0][j];
403401e04c3fSmrg      r[1].f[j] = rgba[1][j];
403501e04c3fSmrg      r[2].f[j] = rgba[2][j];
403601e04c3fSmrg      r[3].f[j] = rgba[3][j];
403701e04c3fSmrg   }
403801e04c3fSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
403901e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
40407ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
404101e04c3fSmrg      }
404201e04c3fSmrg   }
404301e04c3fSmrg}
404401e04c3fSmrg
404501e04c3fSmrgstatic void
40467ec681f3Smrgexec_atomop_membuf(struct tgsi_exec_machine *mach,
40477ec681f3Smrg                   const struct tgsi_full_instruction *inst)
404801e04c3fSmrg{
40497ec681f3Smrg   union tgsi_exec_channel offset, r0, r1;
40507ec681f3Smrg   uint chan, i;
40517ec681f3Smrg   int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
40527ec681f3Smrg   IFETCH(&offset, 1, TGSI_CHAN_X);
405301e04c3fSmrg
40547ec681f3Smrg   if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X))
40557ec681f3Smrg      return;
405601e04c3fSmrg
40577ec681f3Smrg   void *ptr[TGSI_QUAD_SIZE];
40587ec681f3Smrg   if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
40597ec681f3Smrg      uint32_t unit = fetch_sampler_unit(mach, inst, 0);
40607ec681f3Smrg      uint32_t size;
40617ec681f3Smrg      char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);
40627ec681f3Smrg      for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
40637ec681f3Smrg         if (likely(size >= 4 && offset.u[i] <= size - 4))
40647ec681f3Smrg            ptr[i] = buffer + offset.u[i];
40657ec681f3Smrg         else
40667ec681f3Smrg            ptr[i] = NULL;
406701e04c3fSmrg      }
40687ec681f3Smrg   } else {
40697ec681f3Smrg      assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);
407001e04c3fSmrg
40717ec681f3Smrg      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
40727ec681f3Smrg         if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
40737ec681f3Smrg            ptr[i] = (char *)mach->LocalMem + offset.u[i];
40747ec681f3Smrg         else
40757ec681f3Smrg            ptr[i] = NULL;
407601e04c3fSmrg      }
407701e04c3fSmrg   }
407801e04c3fSmrg
40797ec681f3Smrg   FETCH(&r0, 2, TGSI_CHAN_X);
40807ec681f3Smrg   if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
40817ec681f3Smrg      FETCH(&r1, 3, TGSI_CHAN_X);
408201e04c3fSmrg
40837ec681f3Smrg   /* The load/op/store sequence has to happen inside the loop since ptr
40847ec681f3Smrg    * may have the same ptr in some of the invocations.
40857ec681f3Smrg    */
40867ec681f3Smrg   for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
40877ec681f3Smrg      if (!(execmask & (1 << i)))
40887ec681f3Smrg         continue;
408901e04c3fSmrg
40907ec681f3Smrg      uint32_t val = 0;
40917ec681f3Smrg      if (ptr[i]) {
40927ec681f3Smrg         memcpy(&val, ptr[i], sizeof(val));
409301e04c3fSmrg
40947ec681f3Smrg         uint32_t result;
40957ec681f3Smrg         switch (inst->Instruction.Opcode) {
40967ec681f3Smrg         case TGSI_OPCODE_ATOMUADD:
40977ec681f3Smrg            result = val + r0.u[i];
40987ec681f3Smrg            break;
40997ec681f3Smrg         case TGSI_OPCODE_ATOMXOR:
41007ec681f3Smrg            result = val ^ r0.u[i];
41017ec681f3Smrg            break;
41027ec681f3Smrg         case TGSI_OPCODE_ATOMOR:
41037ec681f3Smrg            result = val | r0.u[i];
41047ec681f3Smrg            break;
41057ec681f3Smrg         case TGSI_OPCODE_ATOMAND:
41067ec681f3Smrg            result = val & r0.u[i];
41077ec681f3Smrg            break;
41087ec681f3Smrg         case TGSI_OPCODE_ATOMUMIN:
41097ec681f3Smrg            result = MIN2(val, r0.u[i]);
41107ec681f3Smrg            break;
41117ec681f3Smrg         case TGSI_OPCODE_ATOMUMAX:
41127ec681f3Smrg            result = MAX2(val, r0.u[i]);
41137ec681f3Smrg            break;
41147ec681f3Smrg         case TGSI_OPCODE_ATOMIMIN:
41157ec681f3Smrg            result = MIN2((int32_t)val, r0.i[i]);
41167ec681f3Smrg            break;
41177ec681f3Smrg         case TGSI_OPCODE_ATOMIMAX:
41187ec681f3Smrg            result = MAX2((int32_t)val, r0.i[i]);
41197ec681f3Smrg            break;
41207ec681f3Smrg         case TGSI_OPCODE_ATOMXCHG:
41217ec681f3Smrg            result = r0.u[i];
41227ec681f3Smrg            break;
41237ec681f3Smrg         case TGSI_OPCODE_ATOMCAS:
41247ec681f3Smrg            if (val == r0.u[i])
41257ec681f3Smrg               result = r1.u[i];
41267ec681f3Smrg            else
41277ec681f3Smrg               result = val;
41287ec681f3Smrg            break;
41297ec681f3Smrg         case TGSI_OPCODE_ATOMFADD:
41307ec681f3Smrg               result = fui(uif(val) + r0.f[i]);
41317ec681f3Smrg            break;
41327ec681f3Smrg         default:
41337ec681f3Smrg            unreachable("bad atomic op");
41347ec681f3Smrg         }
41357ec681f3Smrg         memcpy(ptr[i], &result, sizeof(result));
413601e04c3fSmrg      }
41377ec681f3Smrg
41387ec681f3Smrg      r0.u[i] = val;
413901e04c3fSmrg   }
41407ec681f3Smrg
41417ec681f3Smrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
41427ec681f3Smrg      store_dest(mach, &r0, &inst->Dst[0], inst, chan);
414301e04c3fSmrg}
414401e04c3fSmrg
414501e04c3fSmrgstatic void
414601e04c3fSmrgexec_atomop(struct tgsi_exec_machine *mach,
414701e04c3fSmrg            const struct tgsi_full_instruction *inst)
414801e04c3fSmrg{
414901e04c3fSmrg   if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
415001e04c3fSmrg      exec_atomop_img(mach, inst);
41517ec681f3Smrg   else
41527ec681f3Smrg      exec_atomop_membuf(mach, inst);
415301e04c3fSmrg}
415401e04c3fSmrg
415501e04c3fSmrgstatic void
415601e04c3fSmrgexec_resq_img(struct tgsi_exec_machine *mach,
415701e04c3fSmrg              const struct tgsi_full_instruction *inst)
415801e04c3fSmrg{
415901e04c3fSmrg   int result[4];
416001e04c3fSmrg   union tgsi_exec_channel r[4];
416101e04c3fSmrg   uint unit;
416201e04c3fSmrg   int i, chan, j;
416301e04c3fSmrg   struct tgsi_image_params params;
416401e04c3fSmrg
416501e04c3fSmrg   unit = fetch_sampler_unit(mach, inst, 0);
416601e04c3fSmrg
41677ec681f3Smrg   params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
416801e04c3fSmrg   params.unit = unit;
416901e04c3fSmrg   params.tgsi_tex_instr = inst->Memory.Texture;
417001e04c3fSmrg   params.format = inst->Memory.Format;
417101e04c3fSmrg
417201e04c3fSmrg   mach->Image->get_dims(mach->Image, &params, result);
417301e04c3fSmrg
417401e04c3fSmrg   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
417501e04c3fSmrg      for (j = 0; j < 4; j++) {
417601e04c3fSmrg         r[j].i[i] = result[j];
417701e04c3fSmrg      }
417801e04c3fSmrg   }
417901e04c3fSmrg
418001e04c3fSmrg   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
418101e04c3fSmrg      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
41827ec681f3Smrg         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
418301e04c3fSmrg      }
418401e04c3fSmrg   }
418501e04c3fSmrg}
418601e04c3fSmrg
418701e04c3fSmrgstatic void
418801e04c3fSmrgexec_resq_buf(struct tgsi_exec_machine *mach,
418901e04c3fSmrg              const struct tgsi_full_instruction *inst)
419001e04c3fSmrg{
41917ec681f3Smrg   uint32_t unit = fetch_sampler_unit(mach, inst, 0);
41927ec681f3Smrg   uint32_t size;
41937ec681f3Smrg   (void)mach->Buffer->lookup(mach->Buffer, unit, &size);
419401e04c3fSmrg
41957ec681f3Smrg   union tgsi_exec_channel r;
41967ec681f3Smrg   for (int i = 0; i < TGSI_QUAD_SIZE; i++)
41977ec681f3Smrg      r.i[i] = size;
419801e04c3fSmrg
41997ec681f3Smrg   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
42007ec681f3Smrg      for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
42017ec681f3Smrg         store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X);
420201e04c3fSmrg      }
420301e04c3fSmrg   }
420401e04c3fSmrg}
420501e04c3fSmrg
420601e04c3fSmrgstatic void
420701e04c3fSmrgexec_resq(struct tgsi_exec_machine *mach,
420801e04c3fSmrg          const struct tgsi_full_instruction *inst)
420901e04c3fSmrg{
421001e04c3fSmrg   if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
421101e04c3fSmrg      exec_resq_img(mach, inst);
421201e04c3fSmrg   else
421301e04c3fSmrg      exec_resq_buf(mach, inst);
421401e04c3fSmrg}
421501e04c3fSmrg
421601e04c3fSmrgstatic void
421701e04c3fSmrgmicro_f2u64(union tgsi_double_channel *dst,
421801e04c3fSmrg            const union tgsi_exec_channel *src)
421901e04c3fSmrg{
422001e04c3fSmrg   dst->u64[0] = (uint64_t)src->f[0];
422101e04c3fSmrg   dst->u64[1] = (uint64_t)src->f[1];
422201e04c3fSmrg   dst->u64[2] = (uint64_t)src->f[2];
422301e04c3fSmrg   dst->u64[3] = (uint64_t)src->f[3];
422401e04c3fSmrg}
422501e04c3fSmrg
422601e04c3fSmrgstatic void
422701e04c3fSmrgmicro_f2i64(union tgsi_double_channel *dst,
422801e04c3fSmrg            const union tgsi_exec_channel *src)
422901e04c3fSmrg{
423001e04c3fSmrg   dst->i64[0] = (int64_t)src->f[0];
423101e04c3fSmrg   dst->i64[1] = (int64_t)src->f[1];
423201e04c3fSmrg   dst->i64[2] = (int64_t)src->f[2];
423301e04c3fSmrg   dst->i64[3] = (int64_t)src->f[3];
423401e04c3fSmrg}
423501e04c3fSmrg
423601e04c3fSmrgstatic void
423701e04c3fSmrgmicro_u2i64(union tgsi_double_channel *dst,
423801e04c3fSmrg            const union tgsi_exec_channel *src)
423901e04c3fSmrg{
424001e04c3fSmrg   dst->u64[0] = (uint64_t)src->u[0];
424101e04c3fSmrg   dst->u64[1] = (uint64_t)src->u[1];
424201e04c3fSmrg   dst->u64[2] = (uint64_t)src->u[2];
424301e04c3fSmrg   dst->u64[3] = (uint64_t)src->u[3];
424401e04c3fSmrg}
424501e04c3fSmrg
424601e04c3fSmrgstatic void
424701e04c3fSmrgmicro_i2i64(union tgsi_double_channel *dst,
424801e04c3fSmrg            const union tgsi_exec_channel *src)
424901e04c3fSmrg{
425001e04c3fSmrg   dst->i64[0] = (int64_t)src->i[0];
425101e04c3fSmrg   dst->i64[1] = (int64_t)src->i[1];
425201e04c3fSmrg   dst->i64[2] = (int64_t)src->i[2];
425301e04c3fSmrg   dst->i64[3] = (int64_t)src->i[3];
425401e04c3fSmrg}
425501e04c3fSmrg
425601e04c3fSmrgstatic void
425701e04c3fSmrgmicro_d2u64(union tgsi_double_channel *dst,
425801e04c3fSmrg           const union tgsi_double_channel *src)
425901e04c3fSmrg{
426001e04c3fSmrg   dst->u64[0] = (uint64_t)src->d[0];
426101e04c3fSmrg   dst->u64[1] = (uint64_t)src->d[1];
426201e04c3fSmrg   dst->u64[2] = (uint64_t)src->d[2];
426301e04c3fSmrg   dst->u64[3] = (uint64_t)src->d[3];
426401e04c3fSmrg}
42654a49301eSmrg
426601e04c3fSmrgstatic void
426701e04c3fSmrgmicro_d2i64(union tgsi_double_channel *dst,
426801e04c3fSmrg           const union tgsi_double_channel *src)
426901e04c3fSmrg{
427001e04c3fSmrg   dst->i64[0] = (int64_t)src->d[0];
427101e04c3fSmrg   dst->i64[1] = (int64_t)src->d[1];
427201e04c3fSmrg   dst->i64[2] = (int64_t)src->d[2];
427301e04c3fSmrg   dst->i64[3] = (int64_t)src->d[3];
427401e04c3fSmrg}
42754a49301eSmrg
427601e04c3fSmrgstatic void
427701e04c3fSmrgmicro_u642d(union tgsi_double_channel *dst,
427801e04c3fSmrg           const union tgsi_double_channel *src)
427901e04c3fSmrg{
428001e04c3fSmrg   dst->d[0] = (double)src->u64[0];
428101e04c3fSmrg   dst->d[1] = (double)src->u64[1];
428201e04c3fSmrg   dst->d[2] = (double)src->u64[2];
428301e04c3fSmrg   dst->d[3] = (double)src->u64[3];
4284cdc920a0Smrg}
42854a49301eSmrg
4286cdc920a0Smrgstatic void
428701e04c3fSmrgmicro_i642d(union tgsi_double_channel *dst,
428801e04c3fSmrg           const union tgsi_double_channel *src)
4289cdc920a0Smrg{
429001e04c3fSmrg   dst->d[0] = (double)src->i64[0];
429101e04c3fSmrg   dst->d[1] = (double)src->i64[1];
429201e04c3fSmrg   dst->d[2] = (double)src->i64[2];
429301e04c3fSmrg   dst->d[3] = (double)src->i64[3];
429401e04c3fSmrg}
42954a49301eSmrg
429601e04c3fSmrgstatic void
429701e04c3fSmrgmicro_u642f(union tgsi_exec_channel *dst,
429801e04c3fSmrg            const union tgsi_double_channel *src)
429901e04c3fSmrg{
430001e04c3fSmrg   dst->f[0] = (float)src->u64[0];
430101e04c3fSmrg   dst->f[1] = (float)src->u64[1];
430201e04c3fSmrg   dst->f[2] = (float)src->u64[2];
430301e04c3fSmrg   dst->f[3] = (float)src->u64[3];
430401e04c3fSmrg}
43054a49301eSmrg
430601e04c3fSmrgstatic void
430701e04c3fSmrgmicro_i642f(union tgsi_exec_channel *dst,
430801e04c3fSmrg            const union tgsi_double_channel *src)
430901e04c3fSmrg{
431001e04c3fSmrg   dst->f[0] = (float)src->i64[0];
431101e04c3fSmrg   dst->f[1] = (float)src->i64[1];
431201e04c3fSmrg   dst->f[2] = (float)src->i64[2];
431301e04c3fSmrg   dst->f[3] = (float)src->i64[3];
4314cdc920a0Smrg}
43154a49301eSmrg
4316cdc920a0Smrgstatic void
431701e04c3fSmrgexec_t_2_64(struct tgsi_exec_machine *mach,
431801e04c3fSmrg          const struct tgsi_full_instruction *inst,
431901e04c3fSmrg          micro_dop_s op,
432001e04c3fSmrg          enum tgsi_exec_datatype src_datatype)
4321cdc920a0Smrg{
432201e04c3fSmrg   union tgsi_exec_channel src;
432301e04c3fSmrg   union tgsi_double_channel dst;
43244a49301eSmrg
432501e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
432601e04c3fSmrg      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
432701e04c3fSmrg      op(&dst, &src);
432801e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
432901e04c3fSmrg   }
433001e04c3fSmrg   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
433101e04c3fSmrg      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
433201e04c3fSmrg      op(&dst, &src);
433301e04c3fSmrg      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
433401e04c3fSmrg   }
433501e04c3fSmrg}
433601e04c3fSmrg
433701e04c3fSmrgstatic void
433801e04c3fSmrgexec_64_2_t(struct tgsi_exec_machine *mach,
433901e04c3fSmrg            const struct tgsi_full_instruction *inst,
43407ec681f3Smrg            micro_sop_d op)
434101e04c3fSmrg{
434201e04c3fSmrg   union tgsi_double_channel src;
434301e04c3fSmrg   union tgsi_exec_channel dst;
434401e04c3fSmrg   int wm = inst->Dst[0].Register.WriteMask;
434501e04c3fSmrg   int i;
434601e04c3fSmrg   int bit;
434701e04c3fSmrg   for (i = 0; i < 2; i++) {
434801e04c3fSmrg      bit = ffs(wm);
434901e04c3fSmrg      if (bit) {
435001e04c3fSmrg         wm &= ~(1 << (bit - 1));
435101e04c3fSmrg         if (i == 0)
435201e04c3fSmrg            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
435301e04c3fSmrg         else
435401e04c3fSmrg            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
435501e04c3fSmrg         op(&dst, &src);
43567ec681f3Smrg         store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1);
435701e04c3fSmrg      }
435801e04c3fSmrg   }
4359cdc920a0Smrg}
43604a49301eSmrg
4361cdc920a0Smrgstatic void
4362cdc920a0Smrgmicro_i2f(union tgsi_exec_channel *dst,
4363cdc920a0Smrg          const union tgsi_exec_channel *src)
4364cdc920a0Smrg{
4365cdc920a0Smrg   dst->f[0] = (float)src->i[0];
4366cdc920a0Smrg   dst->f[1] = (float)src->i[1];
4367cdc920a0Smrg   dst->f[2] = (float)src->i[2];
4368cdc920a0Smrg   dst->f[3] = (float)src->i[3];
4369cdc920a0Smrg}
43704a49301eSmrg
4371cdc920a0Smrgstatic void
4372cdc920a0Smrgmicro_not(union tgsi_exec_channel *dst,
4373cdc920a0Smrg          const union tgsi_exec_channel *src)
4374cdc920a0Smrg{
4375cdc920a0Smrg   dst->u[0] = ~src->u[0];
4376cdc920a0Smrg   dst->u[1] = ~src->u[1];
4377cdc920a0Smrg   dst->u[2] = ~src->u[2];
4378cdc920a0Smrg   dst->u[3] = ~src->u[3];
4379cdc920a0Smrg}
43804a49301eSmrg
4381cdc920a0Smrgstatic void
4382cdc920a0Smrgmicro_shl(union tgsi_exec_channel *dst,
4383cdc920a0Smrg          const union tgsi_exec_channel *src0,
4384cdc920a0Smrg          const union tgsi_exec_channel *src1)
4385cdc920a0Smrg{
4386af69d88dSmrg   unsigned masked_count;
4387af69d88dSmrg   masked_count = src1->u[0] & 0x1f;
4388af69d88dSmrg   dst->u[0] = src0->u[0] << masked_count;
4389af69d88dSmrg   masked_count = src1->u[1] & 0x1f;
4390af69d88dSmrg   dst->u[1] = src0->u[1] << masked_count;
4391af69d88dSmrg   masked_count = src1->u[2] & 0x1f;
4392af69d88dSmrg   dst->u[2] = src0->u[2] << masked_count;
4393af69d88dSmrg   masked_count = src1->u[3] & 0x1f;
4394af69d88dSmrg   dst->u[3] = src0->u[3] << masked_count;
4395cdc920a0Smrg}
43964a49301eSmrg
4397cdc920a0Smrgstatic void
4398cdc920a0Smrgmicro_and(union tgsi_exec_channel *dst,
4399cdc920a0Smrg          const union tgsi_exec_channel *src0,
4400cdc920a0Smrg          const union tgsi_exec_channel *src1)
4401cdc920a0Smrg{
4402cdc920a0Smrg   dst->u[0] = src0->u[0] & src1->u[0];
4403cdc920a0Smrg   dst->u[1] = src0->u[1] & src1->u[1];
4404cdc920a0Smrg   dst->u[2] = src0->u[2] & src1->u[2];
4405cdc920a0Smrg   dst->u[3] = src0->u[3] & src1->u[3];
4406cdc920a0Smrg}
44074a49301eSmrg
4408cdc920a0Smrgstatic void
4409cdc920a0Smrgmicro_or(union tgsi_exec_channel *dst,
4410cdc920a0Smrg         const union tgsi_exec_channel *src0,
4411cdc920a0Smrg         const union tgsi_exec_channel *src1)
4412cdc920a0Smrg{
4413cdc920a0Smrg   dst->u[0] = src0->u[0] | src1->u[0];
4414cdc920a0Smrg   dst->u[1] = src0->u[1] | src1->u[1];
4415cdc920a0Smrg   dst->u[2] = src0->u[2] | src1->u[2];
4416cdc920a0Smrg   dst->u[3] = src0->u[3] | src1->u[3];
4417cdc920a0Smrg}
44184a49301eSmrg
4419cdc920a0Smrgstatic void
4420cdc920a0Smrgmicro_xor(union tgsi_exec_channel *dst,
4421cdc920a0Smrg          const union tgsi_exec_channel *src0,
4422cdc920a0Smrg          const union tgsi_exec_channel *src1)
4423cdc920a0Smrg{
4424cdc920a0Smrg   dst->u[0] = src0->u[0] ^ src1->u[0];
4425cdc920a0Smrg   dst->u[1] = src0->u[1] ^ src1->u[1];
4426cdc920a0Smrg   dst->u[2] = src0->u[2] ^ src1->u[2];
4427cdc920a0Smrg   dst->u[3] = src0->u[3] ^ src1->u[3];
4428cdc920a0Smrg}
44294a49301eSmrg
4430af69d88dSmrgstatic void
4431af69d88dSmrgmicro_mod(union tgsi_exec_channel *dst,
4432af69d88dSmrg          const union tgsi_exec_channel *src0,
4433af69d88dSmrg          const union tgsi_exec_channel *src1)
4434af69d88dSmrg{
443501e04c3fSmrg   dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
443601e04c3fSmrg   dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
443701e04c3fSmrg   dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
443801e04c3fSmrg   dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4439af69d88dSmrg}
4440af69d88dSmrg
4441cdc920a0Smrgstatic void
4442cdc920a0Smrgmicro_f2i(union tgsi_exec_channel *dst,
4443cdc920a0Smrg          const union tgsi_exec_channel *src)
4444cdc920a0Smrg{
4445cdc920a0Smrg   dst->i[0] = (int)src->f[0];
4446cdc920a0Smrg   dst->i[1] = (int)src->f[1];
4447cdc920a0Smrg   dst->i[2] = (int)src->f[2];
4448cdc920a0Smrg   dst->i[3] = (int)src->f[3];
4449cdc920a0Smrg}
44504a49301eSmrg
4451af69d88dSmrgstatic void
4452af69d88dSmrgmicro_fseq(union tgsi_exec_channel *dst,
4453af69d88dSmrg           const union tgsi_exec_channel *src0,
4454af69d88dSmrg           const union tgsi_exec_channel *src1)
4455af69d88dSmrg{
4456af69d88dSmrg   dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4457af69d88dSmrg   dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4458af69d88dSmrg   dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4459af69d88dSmrg   dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4460af69d88dSmrg}
4461af69d88dSmrg
4462af69d88dSmrgstatic void
4463af69d88dSmrgmicro_fsge(union tgsi_exec_channel *dst,
4464af69d88dSmrg           const union tgsi_exec_channel *src0,
4465af69d88dSmrg           const union tgsi_exec_channel *src1)
4466af69d88dSmrg{
4467af69d88dSmrg   dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4468af69d88dSmrg   dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4469af69d88dSmrg   dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4470af69d88dSmrg   dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4471af69d88dSmrg}
4472af69d88dSmrg
4473af69d88dSmrgstatic void
4474af69d88dSmrgmicro_fslt(union tgsi_exec_channel *dst,
4475af69d88dSmrg           const union tgsi_exec_channel *src0,
4476af69d88dSmrg           const union tgsi_exec_channel *src1)
4477af69d88dSmrg{
4478af69d88dSmrg   dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4479af69d88dSmrg   dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4480af69d88dSmrg   dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4481af69d88dSmrg   dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4482af69d88dSmrg}
4483af69d88dSmrg
4484af69d88dSmrgstatic void
4485af69d88dSmrgmicro_fsne(union tgsi_exec_channel *dst,
4486af69d88dSmrg           const union tgsi_exec_channel *src0,
4487af69d88dSmrg           const union tgsi_exec_channel *src1)
4488af69d88dSmrg{
4489af69d88dSmrg   dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4490af69d88dSmrg   dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4491af69d88dSmrg   dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4492af69d88dSmrg   dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4493af69d88dSmrg}
4494af69d88dSmrg
4495cdc920a0Smrgstatic void
4496cdc920a0Smrgmicro_idiv(union tgsi_exec_channel *dst,
4497cdc920a0Smrg           const union tgsi_exec_channel *src0,
4498cdc920a0Smrg           const union tgsi_exec_channel *src1)
4499cdc920a0Smrg{
4500af69d88dSmrg   dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4501af69d88dSmrg   dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4502af69d88dSmrg   dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4503af69d88dSmrg   dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
45044a49301eSmrg}
45054a49301eSmrg
4506cdc920a0Smrgstatic void
4507cdc920a0Smrgmicro_imax(union tgsi_exec_channel *dst,
4508cdc920a0Smrg           const union tgsi_exec_channel *src0,
4509cdc920a0Smrg           const union tgsi_exec_channel *src1)
4510cdc920a0Smrg{
4511cdc920a0Smrg   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4512cdc920a0Smrg   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4513cdc920a0Smrg   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4514cdc920a0Smrg   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4515cdc920a0Smrg}
45164a49301eSmrg
45174a49301eSmrgstatic void
4518cdc920a0Smrgmicro_imin(union tgsi_exec_channel *dst,
4519cdc920a0Smrg           const union tgsi_exec_channel *src0,
4520cdc920a0Smrg           const union tgsi_exec_channel *src1)
45214a49301eSmrg{
4522cdc920a0Smrg   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4523cdc920a0Smrg   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4524cdc920a0Smrg   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4525cdc920a0Smrg   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4526cdc920a0Smrg}
45274a49301eSmrg
4528cdc920a0Smrgstatic void
4529cdc920a0Smrgmicro_isge(union tgsi_exec_channel *dst,
4530cdc920a0Smrg           const union tgsi_exec_channel *src0,
4531cdc920a0Smrg           const union tgsi_exec_channel *src1)
4532cdc920a0Smrg{
4533cdc920a0Smrg   dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4534cdc920a0Smrg   dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4535cdc920a0Smrg   dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4536cdc920a0Smrg   dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
45374a49301eSmrg}
45384a49301eSmrg
45394a49301eSmrgstatic void
4540cdc920a0Smrgmicro_ishr(union tgsi_exec_channel *dst,
4541cdc920a0Smrg           const union tgsi_exec_channel *src0,
4542cdc920a0Smrg           const union tgsi_exec_channel *src1)
45434a49301eSmrg{
4544af69d88dSmrg   unsigned masked_count;
4545af69d88dSmrg   masked_count = src1->i[0] & 0x1f;
4546af69d88dSmrg   dst->i[0] = src0->i[0] >> masked_count;
4547af69d88dSmrg   masked_count = src1->i[1] & 0x1f;
4548af69d88dSmrg   dst->i[1] = src0->i[1] >> masked_count;
4549af69d88dSmrg   masked_count = src1->i[2] & 0x1f;
4550af69d88dSmrg   dst->i[2] = src0->i[2] >> masked_count;
4551af69d88dSmrg   masked_count = src1->i[3] & 0x1f;
4552af69d88dSmrg   dst->i[3] = src0->i[3] >> masked_count;
4553cdc920a0Smrg}
4554cdc920a0Smrg
4555cdc920a0Smrgstatic void
4556cdc920a0Smrgmicro_islt(union tgsi_exec_channel *dst,
4557cdc920a0Smrg           const union tgsi_exec_channel *src0,
4558cdc920a0Smrg           const union tgsi_exec_channel *src1)
4559cdc920a0Smrg{
4560cdc920a0Smrg   dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4561cdc920a0Smrg   dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4562cdc920a0Smrg   dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4563cdc920a0Smrg   dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4564cdc920a0Smrg}
4565cdc920a0Smrg
4566cdc920a0Smrgstatic void
4567cdc920a0Smrgmicro_f2u(union tgsi_exec_channel *dst,
4568cdc920a0Smrg          const union tgsi_exec_channel *src)
4569cdc920a0Smrg{
4570cdc920a0Smrg   dst->u[0] = (uint)src->f[0];
4571cdc920a0Smrg   dst->u[1] = (uint)src->f[1];
4572cdc920a0Smrg   dst->u[2] = (uint)src->f[2];
4573cdc920a0Smrg   dst->u[3] = (uint)src->f[3];
4574cdc920a0Smrg}
4575cdc920a0Smrg
4576cdc920a0Smrgstatic void
4577cdc920a0Smrgmicro_u2f(union tgsi_exec_channel *dst,
4578cdc920a0Smrg          const union tgsi_exec_channel *src)
4579cdc920a0Smrg{
4580cdc920a0Smrg   dst->f[0] = (float)src->u[0];
4581cdc920a0Smrg   dst->f[1] = (float)src->u[1];
4582cdc920a0Smrg   dst->f[2] = (float)src->u[2];
4583cdc920a0Smrg   dst->f[3] = (float)src->u[3];
45844a49301eSmrg}
45854a49301eSmrg
45864a49301eSmrgstatic void
4587cdc920a0Smrgmicro_uadd(union tgsi_exec_channel *dst,
4588cdc920a0Smrg           const union tgsi_exec_channel *src0,
4589cdc920a0Smrg           const union tgsi_exec_channel *src1)
45904a49301eSmrg{
4591cdc920a0Smrg   dst->u[0] = src0->u[0] + src1->u[0];
4592cdc920a0Smrg   dst->u[1] = src0->u[1] + src1->u[1];
4593cdc920a0Smrg   dst->u[2] = src0->u[2] + src1->u[2];
4594cdc920a0Smrg   dst->u[3] = src0->u[3] + src1->u[3];
45954a49301eSmrg}
45964a49301eSmrg
4597cdc920a0Smrgstatic void
4598cdc920a0Smrgmicro_udiv(union tgsi_exec_channel *dst,
4599cdc920a0Smrg           const union tgsi_exec_channel *src0,
4600cdc920a0Smrg           const union tgsi_exec_channel *src1)
4601cdc920a0Smrg{
4602af69d88dSmrg   dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4603af69d88dSmrg   dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4604af69d88dSmrg   dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4605af69d88dSmrg   dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4606cdc920a0Smrg}
46074a49301eSmrg
4608cdc920a0Smrgstatic void
4609cdc920a0Smrgmicro_umad(union tgsi_exec_channel *dst,
4610cdc920a0Smrg           const union tgsi_exec_channel *src0,
4611cdc920a0Smrg           const union tgsi_exec_channel *src1,
4612cdc920a0Smrg           const union tgsi_exec_channel *src2)
4613cdc920a0Smrg{
4614cdc920a0Smrg   dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4615cdc920a0Smrg   dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4616cdc920a0Smrg   dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4617cdc920a0Smrg   dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4618cdc920a0Smrg}
46194a49301eSmrg
46204a49301eSmrgstatic void
4621cdc920a0Smrgmicro_umax(union tgsi_exec_channel *dst,
4622cdc920a0Smrg           const union tgsi_exec_channel *src0,
4623cdc920a0Smrg           const union tgsi_exec_channel *src1)
46244a49301eSmrg{
4625cdc920a0Smrg   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4626cdc920a0Smrg   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4627cdc920a0Smrg   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4628cdc920a0Smrg   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4629cdc920a0Smrg}
46304a49301eSmrg
4631cdc920a0Smrgstatic void
4632cdc920a0Smrgmicro_umin(union tgsi_exec_channel *dst,
4633cdc920a0Smrg           const union tgsi_exec_channel *src0,
4634cdc920a0Smrg           const union tgsi_exec_channel *src1)
4635cdc920a0Smrg{
4636cdc920a0Smrg   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4637cdc920a0Smrg   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4638cdc920a0Smrg   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4639cdc920a0Smrg   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4640cdc920a0Smrg}
46414a49301eSmrg
4642cdc920a0Smrgstatic void
4643cdc920a0Smrgmicro_umod(union tgsi_exec_channel *dst,
4644cdc920a0Smrg           const union tgsi_exec_channel *src0,
4645cdc920a0Smrg           const union tgsi_exec_channel *src1)
4646cdc920a0Smrg{
4647af69d88dSmrg   dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4648af69d88dSmrg   dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4649af69d88dSmrg   dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4650af69d88dSmrg   dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4651cdc920a0Smrg}
46524a49301eSmrg
4653cdc920a0Smrgstatic void
4654cdc920a0Smrgmicro_umul(union tgsi_exec_channel *dst,
4655cdc920a0Smrg           const union tgsi_exec_channel *src0,
4656cdc920a0Smrg           const union tgsi_exec_channel *src1)
4657cdc920a0Smrg{
4658cdc920a0Smrg   dst->u[0] = src0->u[0] * src1->u[0];
4659cdc920a0Smrg   dst->u[1] = src0->u[1] * src1->u[1];
4660cdc920a0Smrg   dst->u[2] = src0->u[2] * src1->u[2];
4661cdc920a0Smrg   dst->u[3] = src0->u[3] * src1->u[3];
4662cdc920a0Smrg}
46634a49301eSmrg
4664af69d88dSmrgstatic void
4665af69d88dSmrgmicro_imul_hi(union tgsi_exec_channel *dst,
4666af69d88dSmrg              const union tgsi_exec_channel *src0,
4667af69d88dSmrg              const union tgsi_exec_channel *src1)
4668af69d88dSmrg{
4669af69d88dSmrg#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4670af69d88dSmrg   dst->i[0] = I64M(src0->i[0], src1->i[0]);
4671af69d88dSmrg   dst->i[1] = I64M(src0->i[1], src1->i[1]);
4672af69d88dSmrg   dst->i[2] = I64M(src0->i[2], src1->i[2]);
4673af69d88dSmrg   dst->i[3] = I64M(src0->i[3], src1->i[3]);
4674af69d88dSmrg#undef I64M
4675af69d88dSmrg}
4676af69d88dSmrg
4677af69d88dSmrgstatic void
4678af69d88dSmrgmicro_umul_hi(union tgsi_exec_channel *dst,
4679af69d88dSmrg              const union tgsi_exec_channel *src0,
4680af69d88dSmrg              const union tgsi_exec_channel *src1)
4681af69d88dSmrg{
4682af69d88dSmrg#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4683af69d88dSmrg   dst->u[0] = U64M(src0->u[0], src1->u[0]);
4684af69d88dSmrg   dst->u[1] = U64M(src0->u[1], src1->u[1]);
4685af69d88dSmrg   dst->u[2] = U64M(src0->u[2], src1->u[2]);
4686af69d88dSmrg   dst->u[3] = U64M(src0->u[3], src1->u[3]);
4687af69d88dSmrg#undef U64M
4688af69d88dSmrg}
4689af69d88dSmrg
4690cdc920a0Smrgstatic void
4691cdc920a0Smrgmicro_useq(union tgsi_exec_channel *dst,
4692cdc920a0Smrg           const union tgsi_exec_channel *src0,
4693cdc920a0Smrg           const union tgsi_exec_channel *src1)
4694cdc920a0Smrg{
4695cdc920a0Smrg   dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4696cdc920a0Smrg   dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4697cdc920a0Smrg   dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4698cdc920a0Smrg   dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4699cdc920a0Smrg}
47004a49301eSmrg
4701cdc920a0Smrgstatic void
4702cdc920a0Smrgmicro_usge(union tgsi_exec_channel *dst,
4703cdc920a0Smrg           const union tgsi_exec_channel *src0,
4704cdc920a0Smrg           const union tgsi_exec_channel *src1)
4705cdc920a0Smrg{
4706cdc920a0Smrg   dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4707cdc920a0Smrg   dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4708cdc920a0Smrg   dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4709cdc920a0Smrg   dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4710cdc920a0Smrg}
47114a49301eSmrg
4712cdc920a0Smrgstatic void
4713cdc920a0Smrgmicro_ushr(union tgsi_exec_channel *dst,
4714cdc920a0Smrg           const union tgsi_exec_channel *src0,
4715cdc920a0Smrg           const union tgsi_exec_channel *src1)
4716cdc920a0Smrg{
4717af69d88dSmrg   unsigned masked_count;
4718af69d88dSmrg   masked_count = src1->u[0] & 0x1f;
4719af69d88dSmrg   dst->u[0] = src0->u[0] >> masked_count;
4720af69d88dSmrg   masked_count = src1->u[1] & 0x1f;
4721af69d88dSmrg   dst->u[1] = src0->u[1] >> masked_count;
4722af69d88dSmrg   masked_count = src1->u[2] & 0x1f;
4723af69d88dSmrg   dst->u[2] = src0->u[2] >> masked_count;
4724af69d88dSmrg   masked_count = src1->u[3] & 0x1f;
4725af69d88dSmrg   dst->u[3] = src0->u[3] >> masked_count;
4726cdc920a0Smrg}
47274a49301eSmrg
4728cdc920a0Smrgstatic void
4729cdc920a0Smrgmicro_uslt(union tgsi_exec_channel *dst,
4730cdc920a0Smrg           const union tgsi_exec_channel *src0,
4731cdc920a0Smrg           const union tgsi_exec_channel *src1)
4732cdc920a0Smrg{
4733cdc920a0Smrg   dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4734cdc920a0Smrg   dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4735cdc920a0Smrg   dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4736cdc920a0Smrg   dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4737cdc920a0Smrg}
47384a49301eSmrg
4739cdc920a0Smrgstatic void
4740cdc920a0Smrgmicro_usne(union tgsi_exec_channel *dst,
4741cdc920a0Smrg           const union tgsi_exec_channel *src0,
4742cdc920a0Smrg           const union tgsi_exec_channel *src1)
4743cdc920a0Smrg{
4744cdc920a0Smrg   dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4745cdc920a0Smrg   dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4746cdc920a0Smrg   dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4747cdc920a0Smrg   dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
47484a49301eSmrg}
47494a49301eSmrg
4750af69d88dSmrgstatic void
4751af69d88dSmrgmicro_uarl(union tgsi_exec_channel *dst,
4752af69d88dSmrg           const union tgsi_exec_channel *src)
4753af69d88dSmrg{
4754af69d88dSmrg   dst->i[0] = src->u[0];
4755af69d88dSmrg   dst->i[1] = src->u[1];
4756af69d88dSmrg   dst->i[2] = src->u[2];
4757af69d88dSmrg   dst->i[3] = src->u[3];
4758af69d88dSmrg}
4759af69d88dSmrg
4760af69d88dSmrg/**
4761af69d88dSmrg * Signed bitfield extract (i.e. sign-extend the extracted bits)
4762af69d88dSmrg */
4763af69d88dSmrgstatic void
4764af69d88dSmrgmicro_ibfe(union tgsi_exec_channel *dst,
4765af69d88dSmrg           const union tgsi_exec_channel *src0,
4766af69d88dSmrg           const union tgsi_exec_channel *src1,
4767af69d88dSmrg           const union tgsi_exec_channel *src2)
4768af69d88dSmrg{
4769af69d88dSmrg   int i;
4770af69d88dSmrg   for (i = 0; i < 4; i++) {
4771361fc4cbSmaya      int width = src2->i[i];
4772af69d88dSmrg      int offset = src1->i[i] & 0x1f;
4773361fc4cbSmaya      if (width == 32 && offset == 0) {
4774361fc4cbSmaya         dst->i[i] = src0->i[i];
4775361fc4cbSmaya         continue;
4776361fc4cbSmaya      }
4777361fc4cbSmaya      width &= 0x1f;
4778af69d88dSmrg      if (width == 0)
4779af69d88dSmrg         dst->i[i] = 0;
4780af69d88dSmrg      else if (width + offset < 32)
4781af69d88dSmrg         dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
4782af69d88dSmrg      else
4783af69d88dSmrg         dst->i[i] = src0->i[i] >> offset;
4784af69d88dSmrg   }
4785af69d88dSmrg}
4786af69d88dSmrg
4787af69d88dSmrg/**
4788af69d88dSmrg * Unsigned bitfield extract
4789af69d88dSmrg */
4790af69d88dSmrgstatic void
4791af69d88dSmrgmicro_ubfe(union tgsi_exec_channel *dst,
4792af69d88dSmrg           const union tgsi_exec_channel *src0,
4793af69d88dSmrg           const union tgsi_exec_channel *src1,
4794af69d88dSmrg           const union tgsi_exec_channel *src2)
4795af69d88dSmrg{
4796af69d88dSmrg   int i;
4797af69d88dSmrg   for (i = 0; i < 4; i++) {
4798361fc4cbSmaya      int width = src2->u[i];
4799af69d88dSmrg      int offset = src1->u[i] & 0x1f;
4800361fc4cbSmaya      if (width == 32 && offset == 0) {
4801361fc4cbSmaya         dst->u[i] = src0->u[i];
4802361fc4cbSmaya         continue;
4803361fc4cbSmaya      }
4804361fc4cbSmaya      width &= 0x1f;
4805af69d88dSmrg      if (width == 0)
4806af69d88dSmrg         dst->u[i] = 0;
4807af69d88dSmrg      else if (width + offset < 32)
4808af69d88dSmrg         dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
4809af69d88dSmrg      else
4810af69d88dSmrg         dst->u[i] = src0->u[i] >> offset;
4811af69d88dSmrg   }
4812af69d88dSmrg}
4813af69d88dSmrg
4814af69d88dSmrg/**
4815af69d88dSmrg * Bitfield insert: copy low bits from src1 into a region of src0.
4816af69d88dSmrg */
4817af69d88dSmrgstatic void
4818af69d88dSmrgmicro_bfi(union tgsi_exec_channel *dst,
4819af69d88dSmrg          const union tgsi_exec_channel *src0,
4820af69d88dSmrg          const union tgsi_exec_channel *src1,
4821af69d88dSmrg          const union tgsi_exec_channel *src2,
4822af69d88dSmrg          const union tgsi_exec_channel *src3)
4823af69d88dSmrg{
4824af69d88dSmrg   int i;
4825af69d88dSmrg   for (i = 0; i < 4; i++) {
4826361fc4cbSmaya      int width = src3->u[i];
4827af69d88dSmrg      int offset = src2->u[i] & 0x1f;
4828361fc4cbSmaya      if (width == 32) {
4829361fc4cbSmaya         dst->u[i] = src1->u[i];
4830361fc4cbSmaya      } else {
4831361fc4cbSmaya         int bitmask = ((1 << width) - 1) << offset;
4832361fc4cbSmaya         dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
4833361fc4cbSmaya      }
4834af69d88dSmrg   }
4835af69d88dSmrg}
4836af69d88dSmrg
4837af69d88dSmrgstatic void
4838af69d88dSmrgmicro_brev(union tgsi_exec_channel *dst,
4839af69d88dSmrg           const union tgsi_exec_channel *src)
4840af69d88dSmrg{
4841af69d88dSmrg   dst->u[0] = util_bitreverse(src->u[0]);
4842af69d88dSmrg   dst->u[1] = util_bitreverse(src->u[1]);
4843af69d88dSmrg   dst->u[2] = util_bitreverse(src->u[2]);
4844af69d88dSmrg   dst->u[3] = util_bitreverse(src->u[3]);
4845af69d88dSmrg}
4846af69d88dSmrg
4847af69d88dSmrgstatic void
4848af69d88dSmrgmicro_popc(union tgsi_exec_channel *dst,
4849af69d88dSmrg           const union tgsi_exec_channel *src)
4850af69d88dSmrg{
4851af69d88dSmrg   dst->u[0] = util_bitcount(src->u[0]);
4852af69d88dSmrg   dst->u[1] = util_bitcount(src->u[1]);
4853af69d88dSmrg   dst->u[2] = util_bitcount(src->u[2]);
4854af69d88dSmrg   dst->u[3] = util_bitcount(src->u[3]);
4855af69d88dSmrg}
4856af69d88dSmrg
4857af69d88dSmrgstatic void
4858af69d88dSmrgmicro_lsb(union tgsi_exec_channel *dst,
4859af69d88dSmrg          const union tgsi_exec_channel *src)
4860af69d88dSmrg{
4861af69d88dSmrg   dst->i[0] = ffs(src->u[0]) - 1;
4862af69d88dSmrg   dst->i[1] = ffs(src->u[1]) - 1;
4863af69d88dSmrg   dst->i[2] = ffs(src->u[2]) - 1;
4864af69d88dSmrg   dst->i[3] = ffs(src->u[3]) - 1;
4865af69d88dSmrg}
4866af69d88dSmrg
4867af69d88dSmrgstatic void
4868af69d88dSmrgmicro_imsb(union tgsi_exec_channel *dst,
4869af69d88dSmrg           const union tgsi_exec_channel *src)
4870af69d88dSmrg{
4871af69d88dSmrg   dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
4872af69d88dSmrg   dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
4873af69d88dSmrg   dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
4874af69d88dSmrg   dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
4875af69d88dSmrg}
4876af69d88dSmrg
4877af69d88dSmrgstatic void
4878af69d88dSmrgmicro_umsb(union tgsi_exec_channel *dst,
4879af69d88dSmrg           const union tgsi_exec_channel *src)
4880af69d88dSmrg{
4881af69d88dSmrg   dst->i[0] = util_last_bit(src->u[0]) - 1;
4882af69d88dSmrg   dst->i[1] = util_last_bit(src->u[1]) - 1;
4883af69d88dSmrg   dst->i[2] = util_last_bit(src->u[2]) - 1;
4884af69d88dSmrg   dst->i[3] = util_last_bit(src->u[3]) - 1;
4885af69d88dSmrg}
4886af69d88dSmrg
4887361fc4cbSmaya
4888361fc4cbSmayastatic void
4889361fc4cbSmayaexec_interp_at_sample(struct tgsi_exec_machine *mach,
4890361fc4cbSmaya                      const struct tgsi_full_instruction *inst)
4891361fc4cbSmaya{
4892361fc4cbSmaya   union tgsi_exec_channel index;
4893361fc4cbSmaya   union tgsi_exec_channel index2D;
4894361fc4cbSmaya   union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4895361fc4cbSmaya   const struct tgsi_full_src_register *reg = &inst->Src[0];
4896361fc4cbSmaya
4897361fc4cbSmaya   assert(reg->Register.File == TGSI_FILE_INPUT);
4898361fc4cbSmaya   assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
4899361fc4cbSmaya
4900361fc4cbSmaya   get_index_registers(mach, reg, &index, &index2D);
4901361fc4cbSmaya   float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
4902361fc4cbSmaya
4903361fc4cbSmaya   /* Short cut: sample 0 is like a normal fetch */
4904361fc4cbSmaya   for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4905361fc4cbSmaya      if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4906361fc4cbSmaya         continue;
4907361fc4cbSmaya
4908361fc4cbSmaya      fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4909361fc4cbSmaya                             &result[chan]);
4910361fc4cbSmaya      if (sample != 0.0f) {
4911361fc4cbSmaya
4912361fc4cbSmaya      /* TODO: define the samples > 0, but so far we only do fake MSAA */
4913361fc4cbSmaya         float x = 0;
4914361fc4cbSmaya         float y = 0;
4915361fc4cbSmaya
4916361fc4cbSmaya         unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
4917361fc4cbSmaya         assert(pos >= 0);
4918361fc4cbSmaya         assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
4919361fc4cbSmaya         mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
4920361fc4cbSmaya      }
49217ec681f3Smrg      store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4922361fc4cbSmaya   }
4923361fc4cbSmaya}
4924361fc4cbSmaya
4925361fc4cbSmaya
4926361fc4cbSmayastatic void
4927361fc4cbSmayaexec_interp_at_offset(struct tgsi_exec_machine *mach,
4928361fc4cbSmaya                      const struct tgsi_full_instruction *inst)
4929361fc4cbSmaya{
4930361fc4cbSmaya   union tgsi_exec_channel index;
4931361fc4cbSmaya   union tgsi_exec_channel index2D;
4932361fc4cbSmaya   union tgsi_exec_channel ofsx;
4933361fc4cbSmaya   union tgsi_exec_channel ofsy;
4934361fc4cbSmaya   const struct tgsi_full_src_register *reg = &inst->Src[0];
4935361fc4cbSmaya
4936361fc4cbSmaya   assert(reg->Register.File == TGSI_FILE_INPUT);
4937361fc4cbSmaya
4938361fc4cbSmaya   get_index_registers(mach, reg, &index, &index2D);
4939361fc4cbSmaya   unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
4940361fc4cbSmaya
4941361fc4cbSmaya   fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
4942361fc4cbSmaya   fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
4943361fc4cbSmaya
4944361fc4cbSmaya   for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4945361fc4cbSmaya      if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4946361fc4cbSmaya         continue;
4947361fc4cbSmaya      union tgsi_exec_channel result;
4948361fc4cbSmaya      fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
4949361fc4cbSmaya      mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
49507ec681f3Smrg      store_dest(mach, &result, &inst->Dst[0], inst, chan);
4951361fc4cbSmaya   }
4952361fc4cbSmaya}
4953361fc4cbSmaya
4954361fc4cbSmaya
4955361fc4cbSmayastatic void
4956361fc4cbSmayaexec_interp_at_centroid(struct tgsi_exec_machine *mach,
4957361fc4cbSmaya                        const struct tgsi_full_instruction *inst)
4958361fc4cbSmaya{
4959361fc4cbSmaya   union tgsi_exec_channel index;
4960361fc4cbSmaya   union tgsi_exec_channel index2D;
4961361fc4cbSmaya   union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4962361fc4cbSmaya   const struct tgsi_full_src_register *reg = &inst->Src[0];
4963361fc4cbSmaya
4964361fc4cbSmaya   assert(reg->Register.File == TGSI_FILE_INPUT);
4965361fc4cbSmaya   get_index_registers(mach, reg, &index, &index2D);
4966361fc4cbSmaya
4967361fc4cbSmaya   for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4968361fc4cbSmaya      if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4969361fc4cbSmaya         continue;
4970361fc4cbSmaya
4971361fc4cbSmaya      /* Here we should add the change to use a sample that lies within the
4972361fc4cbSmaya       * primitive (Section 15.2):
4973361fc4cbSmaya       *
4974361fc4cbSmaya       * "When interpolating variables declared using centroid in ,
4975361fc4cbSmaya       * the variable is sampled at a location within the pixel covered
4976361fc4cbSmaya       * by the primitive generating the fragment.
4977361fc4cbSmaya       * ...
4978361fc4cbSmaya       * The built-in functions interpolateAtCentroid ... will sample
4979361fc4cbSmaya       * variables as though they were declared with the centroid ...
4980361fc4cbSmaya       * qualifier[s]."
4981361fc4cbSmaya       *
4982361fc4cbSmaya       * Since we only support 1 sample currently, this is just a pass-through.
4983361fc4cbSmaya       */
4984361fc4cbSmaya      fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4985361fc4cbSmaya                             &result[chan]);
49867ec681f3Smrg      store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4987361fc4cbSmaya   }
4988361fc4cbSmaya
4989361fc4cbSmaya}
4990361fc4cbSmaya
4991361fc4cbSmaya
499201e04c3fSmrg/**
499301e04c3fSmrg * Execute a TGSI instruction.
499401e04c3fSmrg * Returns TRUE if a barrier instruction is hit,
499501e04c3fSmrg * otherwise FALSE.
499601e04c3fSmrg */
499701e04c3fSmrgstatic boolean
49984a49301eSmrgexec_instruction(
49994a49301eSmrg   struct tgsi_exec_machine *mach,
50004a49301eSmrg   const struct tgsi_full_instruction *inst,
50014a49301eSmrg   int *pc )
50024a49301eSmrg{
5003cdc920a0Smrg   union tgsi_exec_channel r[10];
50044a49301eSmrg
50054a49301eSmrg   (*pc)++;
50064a49301eSmrg
50074a49301eSmrg   switch (inst->Instruction.Opcode) {
50084a49301eSmrg   case TGSI_OPCODE_ARL:
50097ec681f3Smrg      exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT);
50104a49301eSmrg      break;
50114a49301eSmrg
50124a49301eSmrg   case TGSI_OPCODE_MOV:
50137ec681f3Smrg      exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT);
50144a49301eSmrg      break;
50154a49301eSmrg
50164a49301eSmrg   case TGSI_OPCODE_LIT:
50173464ebd5Sriastradh      exec_lit(mach, inst);
50184a49301eSmrg      break;
50194a49301eSmrg
50204a49301eSmrg   case TGSI_OPCODE_RCP:
50217ec681f3Smrg      exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT);
50224a49301eSmrg      break;
50234a49301eSmrg
50244a49301eSmrg   case TGSI_OPCODE_RSQ:
50257ec681f3Smrg      exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT);
50264a49301eSmrg      break;
50274a49301eSmrg
50284a49301eSmrg   case TGSI_OPCODE_EXP:
50293464ebd5Sriastradh      exec_exp(mach, inst);
50304a49301eSmrg      break;
50314a49301eSmrg
50324a49301eSmrg   case TGSI_OPCODE_LOG:
50333464ebd5Sriastradh      exec_log(mach, inst);
50344a49301eSmrg      break;
50354a49301eSmrg
50364a49301eSmrg   case TGSI_OPCODE_MUL:
50377ec681f3Smrg      exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT);
50384a49301eSmrg      break;
50394a49301eSmrg
50404a49301eSmrg   case TGSI_OPCODE_ADD:
50417ec681f3Smrg      exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT);
50424a49301eSmrg      break;
50434a49301eSmrg
50444a49301eSmrg   case TGSI_OPCODE_DP3:
5045cdc920a0Smrg      exec_dp3(mach, inst);
50464a49301eSmrg      break;
50474a49301eSmrg
5048cdc920a0Smrg   case TGSI_OPCODE_DP4:
5049cdc920a0Smrg      exec_dp4(mach, inst);
50504a49301eSmrg      break;
50514a49301eSmrg
50524a49301eSmrg   case TGSI_OPCODE_DST:
50533464ebd5Sriastradh      exec_dst(mach, inst);
50544a49301eSmrg      break;
50554a49301eSmrg
50564a49301eSmrg   case TGSI_OPCODE_MIN:
50577ec681f3Smrg      exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT);
50584a49301eSmrg      break;
50594a49301eSmrg
50604a49301eSmrg   case TGSI_OPCODE_MAX:
50617ec681f3Smrg      exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT);
50624a49301eSmrg      break;
50634a49301eSmrg
50644a49301eSmrg   case TGSI_OPCODE_SLT:
50657ec681f3Smrg      exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT);
50664a49301eSmrg      break;
50674a49301eSmrg
50684a49301eSmrg   case TGSI_OPCODE_SGE:
50697ec681f3Smrg      exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT);
50704a49301eSmrg      break;
50714a49301eSmrg
50724a49301eSmrg   case TGSI_OPCODE_MAD:
50737ec681f3Smrg      exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT);
50744a49301eSmrg      break;
50754a49301eSmrg
50764a49301eSmrg   case TGSI_OPCODE_LRP:
50777ec681f3Smrg      exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT);
50784a49301eSmrg      break;
50794a49301eSmrg
5080af69d88dSmrg   case TGSI_OPCODE_SQRT:
50817ec681f3Smrg      exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT);
5082af69d88dSmrg      break;
5083af69d88dSmrg
50844a49301eSmrg   case TGSI_OPCODE_FRC:
50857ec681f3Smrg      exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT);
50864a49301eSmrg      break;
50874a49301eSmrg
5088cdc920a0Smrg   case TGSI_OPCODE_FLR:
50897ec681f3Smrg      exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT);
50904a49301eSmrg      break;
50914a49301eSmrg
50924a49301eSmrg   case TGSI_OPCODE_ROUND:
50937ec681f3Smrg      exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT);
50944a49301eSmrg      break;
50954a49301eSmrg
50964a49301eSmrg   case TGSI_OPCODE_EX2:
50977ec681f3Smrg      exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT);
50984a49301eSmrg      break;
50994a49301eSmrg
51004a49301eSmrg   case TGSI_OPCODE_LG2:
51017ec681f3Smrg      exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT);
51024a49301eSmrg      break;
51034a49301eSmrg
51044a49301eSmrg   case TGSI_OPCODE_POW:
51057ec681f3Smrg      exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT);
51064a49301eSmrg      break;
51074a49301eSmrg
510801e04c3fSmrg   case TGSI_OPCODE_LDEXP:
51097ec681f3Smrg      exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT);
51104a49301eSmrg      break;
51114a49301eSmrg
5112cdc920a0Smrg   case TGSI_OPCODE_COS:
51137ec681f3Smrg      exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT);
51147ec681f3Smrg      break;
51157ec681f3Smrg
51167ec681f3Smrg   case TGSI_OPCODE_DDX_FINE:
51177ec681f3Smrg      exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT);
51184a49301eSmrg      break;
51194a49301eSmrg
51204a49301eSmrg   case TGSI_OPCODE_DDX:
51217ec681f3Smrg      exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT);
51227ec681f3Smrg      break;
51237ec681f3Smrg
51247ec681f3Smrg   case TGSI_OPCODE_DDY_FINE:
51257ec681f3Smrg      exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT);
51264a49301eSmrg      break;
51274a49301eSmrg
51284a49301eSmrg   case TGSI_OPCODE_DDY:
51297ec681f3Smrg      exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT);
51304a49301eSmrg      break;
51314a49301eSmrg
5132af69d88dSmrg   case TGSI_OPCODE_KILL:
513301e04c3fSmrg      exec_kill (mach);
51344a49301eSmrg      break;
51354a49301eSmrg
5136af69d88dSmrg   case TGSI_OPCODE_KILL_IF:
5137af69d88dSmrg      exec_kill_if (mach, inst);
51384a49301eSmrg      break;
51394a49301eSmrg
51404a49301eSmrg   case TGSI_OPCODE_PK2H:
514101e04c3fSmrg      exec_pk2h(mach, inst);
51424a49301eSmrg      break;
51434a49301eSmrg
51444a49301eSmrg   case TGSI_OPCODE_PK2US:
51454a49301eSmrg      assert (0);
51464a49301eSmrg      break;
51474a49301eSmrg
51484a49301eSmrg   case TGSI_OPCODE_PK4B:
51494a49301eSmrg      assert (0);
51504a49301eSmrg      break;
51514a49301eSmrg
51524a49301eSmrg   case TGSI_OPCODE_PK4UB:
51534a49301eSmrg      assert (0);
51544a49301eSmrg      break;
51554a49301eSmrg
51564a49301eSmrg   case TGSI_OPCODE_SEQ:
51577ec681f3Smrg      exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT);
51584a49301eSmrg      break;
51594a49301eSmrg
51604a49301eSmrg   case TGSI_OPCODE_SGT:
51617ec681f3Smrg      exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT);
51624a49301eSmrg      break;
51634a49301eSmrg
51644a49301eSmrg   case TGSI_OPCODE_SIN:
51657ec681f3Smrg      exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT);
51664a49301eSmrg      break;
51674a49301eSmrg
51684a49301eSmrg   case TGSI_OPCODE_SLE:
51697ec681f3Smrg      exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT);
51704a49301eSmrg      break;
51714a49301eSmrg
51724a49301eSmrg   case TGSI_OPCODE_SNE:
51737ec681f3Smrg      exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT);
51744a49301eSmrg      break;
51754a49301eSmrg
51764a49301eSmrg   case TGSI_OPCODE_TEX:
51774a49301eSmrg      /* simple texture lookup */
51784a49301eSmrg      /* src[0] = texcoord */
51794a49301eSmrg      /* src[1] = sampler unit */
5180af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
51814a49301eSmrg      break;
51824a49301eSmrg
51834a49301eSmrg   case TGSI_OPCODE_TXB:
51844a49301eSmrg      /* Texture lookup with lod bias */
51854a49301eSmrg      /* src[0] = texcoord (src[0].w = LOD bias) */
51864a49301eSmrg      /* src[1] = sampler unit */
5187af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
51884a49301eSmrg      break;
51894a49301eSmrg
51904a49301eSmrg   case TGSI_OPCODE_TXD:
51914a49301eSmrg      /* Texture lookup with explict partial derivatives */
51924a49301eSmrg      /* src[0] = texcoord */
51934a49301eSmrg      /* src[1] = d[strq]/dx */
51944a49301eSmrg      /* src[2] = d[strq]/dy */
51954a49301eSmrg      /* src[3] = sampler unit */
5196cdc920a0Smrg      exec_txd(mach, inst);
51974a49301eSmrg      break;
51984a49301eSmrg
51994a49301eSmrg   case TGSI_OPCODE_TXL:
52004a49301eSmrg      /* Texture lookup with explit LOD */
52014a49301eSmrg      /* src[0] = texcoord (src[0].w = LOD) */
52024a49301eSmrg      /* src[1] = sampler unit */
5203af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
52044a49301eSmrg      break;
52054a49301eSmrg
52064a49301eSmrg   case TGSI_OPCODE_TXP:
52074a49301eSmrg      /* Texture lookup with projection */
52084a49301eSmrg      /* src[0] = texcoord (src[0].w = projection) */
52094a49301eSmrg      /* src[1] = sampler unit */
5210af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
52114a49301eSmrg      break;
52124a49301eSmrg
521301e04c3fSmrg   case TGSI_OPCODE_TG4:
521401e04c3fSmrg      /* src[0] = texcoord */
521501e04c3fSmrg      /* src[1] = component */
521601e04c3fSmrg      /* src[2] = sampler unit */
521701e04c3fSmrg      exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
521801e04c3fSmrg      break;
521901e04c3fSmrg
522001e04c3fSmrg   case TGSI_OPCODE_LODQ:
522101e04c3fSmrg      /* src[0] = texcoord */
522201e04c3fSmrg      /* src[1] = sampler unit */
522301e04c3fSmrg      exec_lodq(mach, inst);
522401e04c3fSmrg      break;
522501e04c3fSmrg
52264a49301eSmrg   case TGSI_OPCODE_UP2H:
522701e04c3fSmrg      exec_up2h(mach, inst);
52284a49301eSmrg      break;
52294a49301eSmrg
52304a49301eSmrg   case TGSI_OPCODE_UP2US:
52314a49301eSmrg      assert (0);
52324a49301eSmrg      break;
52334a49301eSmrg
52344a49301eSmrg   case TGSI_OPCODE_UP4B:
52354a49301eSmrg      assert (0);
52364a49301eSmrg      break;
52374a49301eSmrg
52384a49301eSmrg   case TGSI_OPCODE_UP4UB:
52394a49301eSmrg      assert (0);
52404a49301eSmrg      break;
52414a49301eSmrg
5242cdc920a0Smrg   case TGSI_OPCODE_ARR:
52437ec681f3Smrg      exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT);
5244cdc920a0Smrg      break;
5245cdc920a0Smrg
52464a49301eSmrg   case TGSI_OPCODE_CAL:
52474a49301eSmrg      /* skip the call if no execution channels are enabled */
52484a49301eSmrg      if (mach->ExecMask) {
52494a49301eSmrg         /* do the call */
52504a49301eSmrg
52514a49301eSmrg         /* First, record the depths of the execution stacks.
52524a49301eSmrg          * This is important for deeply nested/looped return statements.
52534a49301eSmrg          * We have to unwind the stacks by the correct amount.  For a
52544a49301eSmrg          * real code generator, we could determine the number of entries
52554a49301eSmrg          * to pop off each stack with simple static analysis and avoid
52564a49301eSmrg          * implementing this data structure at run time.
52574a49301eSmrg          */
52584a49301eSmrg         mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
52594a49301eSmrg         mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
52604a49301eSmrg         mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5261cdc920a0Smrg         mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5262cdc920a0Smrg         mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
52634a49301eSmrg         /* note that PC was already incremented above */
52644a49301eSmrg         mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
52654a49301eSmrg
52664a49301eSmrg         mach->CallStackTop++;
52674a49301eSmrg
52684a49301eSmrg         /* Second, push the Cond, Loop, Cont, Func stacks */
52694a49301eSmrg         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
52704a49301eSmrg         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
52714a49301eSmrg         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5272cdc920a0Smrg         assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5273cdc920a0Smrg         assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
52744a49301eSmrg         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5275cdc920a0Smrg
5276cdc920a0Smrg         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5277cdc920a0Smrg         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5278cdc920a0Smrg         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5279cdc920a0Smrg         mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5280cdc920a0Smrg         mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
52814a49301eSmrg         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
52824a49301eSmrg
528301e04c3fSmrg         /* Finally, jump to the subroutine.  The label is a pointer
528401e04c3fSmrg          * (an instruction number) to the BGNSUB instruction.
528501e04c3fSmrg          */
5286cdc920a0Smrg         *pc = inst->Label.Label;
528701e04c3fSmrg         assert(mach->Instructions[*pc].Instruction.Opcode
528801e04c3fSmrg                == TGSI_OPCODE_BGNSUB);
52894a49301eSmrg      }
52904a49301eSmrg      break;
52914a49301eSmrg
52924a49301eSmrg   case TGSI_OPCODE_RET:
52934a49301eSmrg      mach->FuncMask &= ~mach->ExecMask;
52944a49301eSmrg      UPDATE_EXEC_MASK(mach);
52954a49301eSmrg
52964a49301eSmrg      if (mach->FuncMask == 0x0) {
52974a49301eSmrg         /* really return now (otherwise, keep executing */
52984a49301eSmrg
52994a49301eSmrg         if (mach->CallStackTop == 0) {
53004a49301eSmrg            /* returning from main() */
53013464ebd5Sriastradh            mach->CondStackTop = 0;
53023464ebd5Sriastradh            mach->LoopStackTop = 0;
530301e04c3fSmrg            mach->ContStackTop = 0;
530401e04c3fSmrg            mach->LoopLabelStackTop = 0;
530501e04c3fSmrg            mach->SwitchStackTop = 0;
530601e04c3fSmrg            mach->BreakStackTop = 0;
53074a49301eSmrg            *pc = -1;
530801e04c3fSmrg            return FALSE;
53094a49301eSmrg         }
53104a49301eSmrg
53114a49301eSmrg         assert(mach->CallStackTop > 0);
53124a49301eSmrg         mach->CallStackTop--;
53134a49301eSmrg
53144a49301eSmrg         mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
53154a49301eSmrg         mach->CondMask = mach->CondStack[mach->CondStackTop];
53164a49301eSmrg
53174a49301eSmrg         mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
53184a49301eSmrg         mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
53194a49301eSmrg
53204a49301eSmrg         mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
53214a49301eSmrg         mach->ContMask = mach->ContStack[mach->ContStackTop];
53224a49301eSmrg
5323cdc920a0Smrg         mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5324cdc920a0Smrg         mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5325cdc920a0Smrg
5326cdc920a0Smrg         mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5327cdc920a0Smrg         mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5328cdc920a0Smrg
53294a49301eSmrg         assert(mach->FuncStackTop > 0);
53304a49301eSmrg         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
53314a49301eSmrg
53324a49301eSmrg         *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
53334a49301eSmrg
53344a49301eSmrg         UPDATE_EXEC_MASK(mach);
53354a49301eSmrg      }
53364a49301eSmrg      break;
53374a49301eSmrg
53384a49301eSmrg   case TGSI_OPCODE_SSG:
53397ec681f3Smrg      exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT);
53404a49301eSmrg      break;
53414a49301eSmrg
53424a49301eSmrg   case TGSI_OPCODE_CMP:
53437ec681f3Smrg      exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT);
53444a49301eSmrg      break;
53454a49301eSmrg
53464a49301eSmrg   case TGSI_OPCODE_DIV:
53477ec681f3Smrg      exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT);
53484a49301eSmrg      break;
53494a49301eSmrg
53504a49301eSmrg   case TGSI_OPCODE_DP2:
5351cdc920a0Smrg      exec_dp2(mach, inst);
53524a49301eSmrg      break;
53534a49301eSmrg
53544a49301eSmrg   case TGSI_OPCODE_IF:
53554a49301eSmrg      /* push CondMask */
53564a49301eSmrg      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
53574a49301eSmrg      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5358af69d88dSmrg      FETCH( &r[0], 0, TGSI_CHAN_X );
53597ec681f3Smrg      for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
53607ec681f3Smrg         if (!r[0].f[i])
53617ec681f3Smrg            mach->CondMask &= ~(1 << i);
5362af69d88dSmrg      }
5363af69d88dSmrg      UPDATE_EXEC_MASK(mach);
53647ec681f3Smrg      /* If no channels are taking the then branch, jump to ELSE. */
53657ec681f3Smrg      if (!mach->CondMask)
53667ec681f3Smrg         *pc = inst->Label.Label;
5367af69d88dSmrg      break;
5368af69d88dSmrg
5369af69d88dSmrg   case TGSI_OPCODE_UIF:
5370af69d88dSmrg      /* push CondMask */
5371af69d88dSmrg      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5372af69d88dSmrg      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5373af69d88dSmrg      IFETCH( &r[0], 0, TGSI_CHAN_X );
53747ec681f3Smrg      for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
53757ec681f3Smrg         if (!r[0].u[i])
53767ec681f3Smrg            mach->CondMask &= ~(1 << i);
53774a49301eSmrg      }
53784a49301eSmrg      UPDATE_EXEC_MASK(mach);
53797ec681f3Smrg      /* If no channels are taking the then branch, jump to ELSE. */
53807ec681f3Smrg      if (!mach->CondMask)
53817ec681f3Smrg         *pc = inst->Label.Label;
53824a49301eSmrg      break;
53834a49301eSmrg
53844a49301eSmrg   case TGSI_OPCODE_ELSE:
53854a49301eSmrg      /* invert CondMask wrt previous mask */
53864a49301eSmrg      {
53874a49301eSmrg         uint prevMask;
53884a49301eSmrg         assert(mach->CondStackTop > 0);
53894a49301eSmrg         prevMask = mach->CondStack[mach->CondStackTop - 1];
53904a49301eSmrg         mach->CondMask = ~mach->CondMask & prevMask;
53914a49301eSmrg         UPDATE_EXEC_MASK(mach);
53927ec681f3Smrg
53937ec681f3Smrg         /* If no channels are taking ELSE, jump to ENDIF */
53947ec681f3Smrg         if (!mach->CondMask)
53957ec681f3Smrg            *pc = inst->Label.Label;
53964a49301eSmrg      }
53974a49301eSmrg      break;
53984a49301eSmrg
53994a49301eSmrg   case TGSI_OPCODE_ENDIF:
54004a49301eSmrg      /* pop CondMask */
54014a49301eSmrg      assert(mach->CondStackTop > 0);
54024a49301eSmrg      mach->CondMask = mach->CondStack[--mach->CondStackTop];
54034a49301eSmrg      UPDATE_EXEC_MASK(mach);
54044a49301eSmrg      break;
54054a49301eSmrg
54064a49301eSmrg   case TGSI_OPCODE_END:
54073464ebd5Sriastradh      /* make sure we end primitives which haven't
54083464ebd5Sriastradh       * been explicitly emitted */
54093464ebd5Sriastradh      conditional_emit_primitive(mach);
54104a49301eSmrg      /* halt execution */
54114a49301eSmrg      *pc = -1;
54124a49301eSmrg      break;
54134a49301eSmrg
54144a49301eSmrg   case TGSI_OPCODE_CEIL:
54157ec681f3Smrg      exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT);
54164a49301eSmrg      break;
54174a49301eSmrg
54184a49301eSmrg   case TGSI_OPCODE_I2F:
54197ec681f3Smrg      exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT);
54204a49301eSmrg      break;
54214a49301eSmrg
54224a49301eSmrg   case TGSI_OPCODE_NOT:
54237ec681f3Smrg      exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT);
54244a49301eSmrg      break;
54254a49301eSmrg
54264a49301eSmrg   case TGSI_OPCODE_TRUNC:
54277ec681f3Smrg      exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT);
54284a49301eSmrg      break;
54294a49301eSmrg
54304a49301eSmrg   case TGSI_OPCODE_SHL:
54317ec681f3Smrg      exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT);
54324a49301eSmrg      break;
54334a49301eSmrg
54344a49301eSmrg   case TGSI_OPCODE_AND:
54357ec681f3Smrg      exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT);
54364a49301eSmrg      break;
54374a49301eSmrg
54384a49301eSmrg   case TGSI_OPCODE_OR:
54397ec681f3Smrg      exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT);
54404a49301eSmrg      break;
54414a49301eSmrg
54424a49301eSmrg   case TGSI_OPCODE_MOD:
54437ec681f3Smrg      exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT);
54444a49301eSmrg      break;
54454a49301eSmrg
54464a49301eSmrg   case TGSI_OPCODE_XOR:
54477ec681f3Smrg      exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT);
54484a49301eSmrg      break;
54494a49301eSmrg
54504a49301eSmrg   case TGSI_OPCODE_TXF:
5451af69d88dSmrg      exec_txf(mach, inst);
54524a49301eSmrg      break;
54534a49301eSmrg
54544a49301eSmrg   case TGSI_OPCODE_TXQ:
5455af69d88dSmrg      exec_txq(mach, inst);
54564a49301eSmrg      break;
54574a49301eSmrg
54584a49301eSmrg   case TGSI_OPCODE_EMIT:
5459361fc4cbSmaya      emit_vertex(mach, inst);
54604a49301eSmrg      break;
54614a49301eSmrg
54624a49301eSmrg   case TGSI_OPCODE_ENDPRIM:
5463361fc4cbSmaya      emit_primitive(mach, inst);
54644a49301eSmrg      break;
54654a49301eSmrg
54664a49301eSmrg   case TGSI_OPCODE_BGNLOOP:
54674a49301eSmrg      /* push LoopMask and ContMasks */
54684a49301eSmrg      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
54694a49301eSmrg      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
54704a49301eSmrg      assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5471cdc920a0Smrg      assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5472cdc920a0Smrg
5473cdc920a0Smrg      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5474cdc920a0Smrg      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
54754a49301eSmrg      mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5476cdc920a0Smrg      mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5477cdc920a0Smrg      mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
54784a49301eSmrg      break;
54794a49301eSmrg
54804a49301eSmrg   case TGSI_OPCODE_ENDLOOP:
54814a49301eSmrg      /* Restore ContMask, but don't pop */
54824a49301eSmrg      assert(mach->ContStackTop > 0);
54834a49301eSmrg      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
54844a49301eSmrg      UPDATE_EXEC_MASK(mach);
54854a49301eSmrg      if (mach->ExecMask) {
54864a49301eSmrg         /* repeat loop: jump to instruction just past BGNLOOP */
54874a49301eSmrg         assert(mach->LoopLabelStackTop > 0);
54884a49301eSmrg         *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
54894a49301eSmrg      }
54904a49301eSmrg      else {
54914a49301eSmrg         /* exit loop: pop LoopMask */
54924a49301eSmrg         assert(mach->LoopStackTop > 0);
54934a49301eSmrg         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
54944a49301eSmrg         /* pop ContMask */
54954a49301eSmrg         assert(mach->ContStackTop > 0);
54964a49301eSmrg         mach->ContMask = mach->ContStack[--mach->ContStackTop];
54974a49301eSmrg         assert(mach->LoopLabelStackTop > 0);
54984a49301eSmrg         --mach->LoopLabelStackTop;
5499cdc920a0Smrg
5500cdc920a0Smrg         mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
55014a49301eSmrg      }
55024a49301eSmrg      UPDATE_EXEC_MASK(mach);
55034a49301eSmrg      break;
55044a49301eSmrg
55054a49301eSmrg   case TGSI_OPCODE_BRK:
5506cdc920a0Smrg      exec_break(mach);
55074a49301eSmrg      break;
55084a49301eSmrg
55094a49301eSmrg   case TGSI_OPCODE_CONT:
55104a49301eSmrg      /* turn off cont channels for each enabled exec channel */
55114a49301eSmrg      mach->ContMask &= ~mach->ExecMask;
55124a49301eSmrg      /* Todo: if mach->LoopMask == 0, jump to end of loop */
55134a49301eSmrg      UPDATE_EXEC_MASK(mach);
55144a49301eSmrg      break;
55154a49301eSmrg
55164a49301eSmrg   case TGSI_OPCODE_BGNSUB:
55174a49301eSmrg      /* no-op */
55184a49301eSmrg      break;
55194a49301eSmrg
55204a49301eSmrg   case TGSI_OPCODE_ENDSUB:
5521cdc920a0Smrg      /*
5522cdc920a0Smrg       * XXX: This really should be a no-op. We should never reach this opcode.
5523cdc920a0Smrg       */
5524cdc920a0Smrg
5525cdc920a0Smrg      assert(mach->CallStackTop > 0);
5526cdc920a0Smrg      mach->CallStackTop--;
5527cdc920a0Smrg
5528cdc920a0Smrg      mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5529cdc920a0Smrg      mach->CondMask = mach->CondStack[mach->CondStackTop];
5530cdc920a0Smrg
5531cdc920a0Smrg      mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5532cdc920a0Smrg      mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5533cdc920a0Smrg
5534cdc920a0Smrg      mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5535cdc920a0Smrg      mach->ContMask = mach->ContStack[mach->ContStackTop];
5536cdc920a0Smrg
5537cdc920a0Smrg      mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5538cdc920a0Smrg      mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5539cdc920a0Smrg
5540cdc920a0Smrg      mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5541cdc920a0Smrg      mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5542cdc920a0Smrg
5543cdc920a0Smrg      assert(mach->FuncStackTop > 0);
5544cdc920a0Smrg      mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5545cdc920a0Smrg
5546cdc920a0Smrg      *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5547cdc920a0Smrg
5548cdc920a0Smrg      UPDATE_EXEC_MASK(mach);
55494a49301eSmrg      break;
55504a49301eSmrg
55514a49301eSmrg   case TGSI_OPCODE_NOP:
55524a49301eSmrg      break;
55534a49301eSmrg
5554cdc920a0Smrg   case TGSI_OPCODE_F2I:
55557ec681f3Smrg      exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT);
5556cdc920a0Smrg      break;
5557cdc920a0Smrg
5558af69d88dSmrg   case TGSI_OPCODE_FSEQ:
55597ec681f3Smrg      exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT);
5560af69d88dSmrg      break;
5561af69d88dSmrg
5562af69d88dSmrg   case TGSI_OPCODE_FSGE:
55637ec681f3Smrg      exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT);
5564af69d88dSmrg      break;
5565af69d88dSmrg
5566af69d88dSmrg   case TGSI_OPCODE_FSLT:
55677ec681f3Smrg      exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT);
5568af69d88dSmrg      break;
5569af69d88dSmrg
5570af69d88dSmrg   case TGSI_OPCODE_FSNE:
55717ec681f3Smrg      exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT);
5572af69d88dSmrg      break;
5573af69d88dSmrg
5574cdc920a0Smrg   case TGSI_OPCODE_IDIV:
55757ec681f3Smrg      exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT);
5576cdc920a0Smrg      break;
5577cdc920a0Smrg
5578cdc920a0Smrg   case TGSI_OPCODE_IMAX:
55797ec681f3Smrg      exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT);
5580cdc920a0Smrg      break;
5581cdc920a0Smrg
5582cdc920a0Smrg   case TGSI_OPCODE_IMIN:
55837ec681f3Smrg      exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT);
5584cdc920a0Smrg      break;
5585cdc920a0Smrg
5586cdc920a0Smrg   case TGSI_OPCODE_INEG:
55877ec681f3Smrg      exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT);
5588cdc920a0Smrg      break;
5589cdc920a0Smrg
5590cdc920a0Smrg   case TGSI_OPCODE_ISGE:
55917ec681f3Smrg      exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT);
5592cdc920a0Smrg      break;
5593cdc920a0Smrg
5594cdc920a0Smrg   case TGSI_OPCODE_ISHR:
55957ec681f3Smrg      exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT);
5596cdc920a0Smrg      break;
5597cdc920a0Smrg
5598cdc920a0Smrg   case TGSI_OPCODE_ISLT:
55997ec681f3Smrg      exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT);
5600cdc920a0Smrg      break;
5601cdc920a0Smrg
5602cdc920a0Smrg   case TGSI_OPCODE_F2U:
56037ec681f3Smrg      exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT);
5604cdc920a0Smrg      break;
5605cdc920a0Smrg
5606cdc920a0Smrg   case TGSI_OPCODE_U2F:
56077ec681f3Smrg      exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT);
5608cdc920a0Smrg      break;
5609cdc920a0Smrg
5610cdc920a0Smrg   case TGSI_OPCODE_UADD:
56117ec681f3Smrg      exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT);
5612cdc920a0Smrg      break;
5613cdc920a0Smrg
5614cdc920a0Smrg   case TGSI_OPCODE_UDIV:
56157ec681f3Smrg      exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT);
5616cdc920a0Smrg      break;
5617cdc920a0Smrg
5618cdc920a0Smrg   case TGSI_OPCODE_UMAD:
56197ec681f3Smrg      exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT);
5620cdc920a0Smrg      break;
5621cdc920a0Smrg
5622cdc920a0Smrg   case TGSI_OPCODE_UMAX:
56237ec681f3Smrg      exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT);
5624cdc920a0Smrg      break;
5625cdc920a0Smrg
5626cdc920a0Smrg   case TGSI_OPCODE_UMIN:
56277ec681f3Smrg      exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT);
5628cdc920a0Smrg      break;
5629cdc920a0Smrg
5630cdc920a0Smrg   case TGSI_OPCODE_UMOD:
56317ec681f3Smrg      exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT);
5632cdc920a0Smrg      break;
5633cdc920a0Smrg
5634cdc920a0Smrg   case TGSI_OPCODE_UMUL:
56357ec681f3Smrg      exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT);
5636cdc920a0Smrg      break;
5637cdc920a0Smrg
5638af69d88dSmrg   case TGSI_OPCODE_IMUL_HI:
56397ec681f3Smrg      exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT);
5640af69d88dSmrg      break;
5641af69d88dSmrg
5642af69d88dSmrg   case TGSI_OPCODE_UMUL_HI:
56437ec681f3Smrg      exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT);
5644af69d88dSmrg      break;
5645af69d88dSmrg
5646cdc920a0Smrg   case TGSI_OPCODE_USEQ:
56477ec681f3Smrg      exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT);
5648cdc920a0Smrg      break;
5649cdc920a0Smrg
5650cdc920a0Smrg   case TGSI_OPCODE_USGE:
56517ec681f3Smrg      exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT);
5652cdc920a0Smrg      break;
5653cdc920a0Smrg
5654cdc920a0Smrg   case TGSI_OPCODE_USHR:
56557ec681f3Smrg      exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT);
5656cdc920a0Smrg      break;
5657cdc920a0Smrg
5658cdc920a0Smrg   case TGSI_OPCODE_USLT:
56597ec681f3Smrg      exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT);
5660cdc920a0Smrg      break;
5661cdc920a0Smrg
5662cdc920a0Smrg   case TGSI_OPCODE_USNE:
56637ec681f3Smrg      exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT);
5664cdc920a0Smrg      break;
5665cdc920a0Smrg
5666cdc920a0Smrg   case TGSI_OPCODE_SWITCH:
5667cdc920a0Smrg      exec_switch(mach, inst);
5668cdc920a0Smrg      break;
5669cdc920a0Smrg
5670cdc920a0Smrg   case TGSI_OPCODE_CASE:
5671cdc920a0Smrg      exec_case(mach, inst);
5672cdc920a0Smrg      break;
5673cdc920a0Smrg
5674cdc920a0Smrg   case TGSI_OPCODE_DEFAULT:
5675cdc920a0Smrg      exec_default(mach);
5676cdc920a0Smrg      break;
5677cdc920a0Smrg
5678cdc920a0Smrg   case TGSI_OPCODE_ENDSWITCH:
5679cdc920a0Smrg      exec_endswitch(mach);
5680cdc920a0Smrg      break;
5681cdc920a0Smrg
5682af69d88dSmrg   case TGSI_OPCODE_SAMPLE_I:
5683af69d88dSmrg      exec_txf(mach, inst);
56843464ebd5Sriastradh      break;
56853464ebd5Sriastradh
5686af69d88dSmrg   case TGSI_OPCODE_SAMPLE_I_MS:
568701e04c3fSmrg      exec_txf(mach, inst);
56883464ebd5Sriastradh      break;
56893464ebd5Sriastradh
56903464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE:
5691af69d88dSmrg      exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
56923464ebd5Sriastradh      break;
56933464ebd5Sriastradh
56943464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_B:
5695af69d88dSmrg      exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
56963464ebd5Sriastradh      break;
56973464ebd5Sriastradh
56983464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_C:
5699af69d88dSmrg      exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
57003464ebd5Sriastradh      break;
57013464ebd5Sriastradh
57023464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_C_LZ:
5703af69d88dSmrg      exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
57043464ebd5Sriastradh      break;
57053464ebd5Sriastradh
57063464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_D:
57073464ebd5Sriastradh      exec_sample_d(mach, inst);
57083464ebd5Sriastradh      break;
57093464ebd5Sriastradh
57103464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_L:
5711af69d88dSmrg      exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
57123464ebd5Sriastradh      break;
57133464ebd5Sriastradh
57143464ebd5Sriastradh   case TGSI_OPCODE_GATHER4:
571501e04c3fSmrg      exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
57163464ebd5Sriastradh      break;
57173464ebd5Sriastradh
5718af69d88dSmrg   case TGSI_OPCODE_SVIEWINFO:
5719af69d88dSmrg      exec_txq(mach, inst);
57203464ebd5Sriastradh      break;
57213464ebd5Sriastradh
57223464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_POS:
57233464ebd5Sriastradh      assert(0);
57243464ebd5Sriastradh      break;
57253464ebd5Sriastradh
57263464ebd5Sriastradh   case TGSI_OPCODE_SAMPLE_INFO:
57273464ebd5Sriastradh      assert(0);
57283464ebd5Sriastradh      break;
57293464ebd5Sriastradh
573001e04c3fSmrg   case TGSI_OPCODE_LOD:
573101e04c3fSmrg      exec_lodq(mach, inst);
573201e04c3fSmrg      break;
573301e04c3fSmrg
5734af69d88dSmrg   case TGSI_OPCODE_UARL:
57357ec681f3Smrg      exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT);
5736af69d88dSmrg      break;
5737af69d88dSmrg
5738af69d88dSmrg   case TGSI_OPCODE_UCMP:
573901e04c3fSmrg      exec_ucmp(mach, inst);
5740af69d88dSmrg      break;
5741af69d88dSmrg
5742af69d88dSmrg   case TGSI_OPCODE_IABS:
57437ec681f3Smrg      exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT);
5744af69d88dSmrg      break;
5745af69d88dSmrg
5746af69d88dSmrg   case TGSI_OPCODE_ISSG:
57477ec681f3Smrg      exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT);
5748af69d88dSmrg      break;
5749af69d88dSmrg
5750af69d88dSmrg   case TGSI_OPCODE_TEX2:
5751af69d88dSmrg      /* simple texture lookup */
5752af69d88dSmrg      /* src[0] = texcoord */
5753af69d88dSmrg      /* src[1] = compare */
5754af69d88dSmrg      /* src[2] = sampler unit */
5755af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5756af69d88dSmrg      break;
5757af69d88dSmrg   case TGSI_OPCODE_TXB2:
5758af69d88dSmrg      /* simple texture lookup */
5759af69d88dSmrg      /* src[0] = texcoord */
5760af69d88dSmrg      /* src[1] = bias */
5761af69d88dSmrg      /* src[2] = sampler unit */
5762af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5763af69d88dSmrg      break;
5764af69d88dSmrg   case TGSI_OPCODE_TXL2:
5765af69d88dSmrg      /* simple texture lookup */
5766af69d88dSmrg      /* src[0] = texcoord */
5767af69d88dSmrg      /* src[1] = lod */
5768af69d88dSmrg      /* src[2] = sampler unit */
5769af69d88dSmrg      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5770af69d88dSmrg      break;
5771af69d88dSmrg
5772af69d88dSmrg   case TGSI_OPCODE_IBFE:
57737ec681f3Smrg      exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT);
5774af69d88dSmrg      break;
5775af69d88dSmrg   case TGSI_OPCODE_UBFE:
57767ec681f3Smrg      exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT);
5777af69d88dSmrg      break;
5778af69d88dSmrg   case TGSI_OPCODE_BFI:
57797ec681f3Smrg      exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT);
5780af69d88dSmrg      break;
5781af69d88dSmrg   case TGSI_OPCODE_BREV:
57827ec681f3Smrg      exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT);
5783af69d88dSmrg      break;
5784af69d88dSmrg   case TGSI_OPCODE_POPC:
57857ec681f3Smrg      exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT);
5786af69d88dSmrg      break;
5787af69d88dSmrg   case TGSI_OPCODE_LSB:
57887ec681f3Smrg      exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT);
5789af69d88dSmrg      break;
5790af69d88dSmrg   case TGSI_OPCODE_IMSB:
57917ec681f3Smrg      exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT);
5792af69d88dSmrg      break;
5793af69d88dSmrg   case TGSI_OPCODE_UMSB:
57947ec681f3Smrg      exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT);
5795af69d88dSmrg      break;
579601e04c3fSmrg
579701e04c3fSmrg   case TGSI_OPCODE_F2D:
579801e04c3fSmrg      exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
579901e04c3fSmrg      break;
580001e04c3fSmrg
580101e04c3fSmrg   case TGSI_OPCODE_D2F:
58027ec681f3Smrg      exec_64_2_t(mach, inst, micro_d2f);
580301e04c3fSmrg      break;
580401e04c3fSmrg
580501e04c3fSmrg   case TGSI_OPCODE_DABS:
580601e04c3fSmrg      exec_double_unary(mach, inst, micro_dabs);
580701e04c3fSmrg      break;
580801e04c3fSmrg
580901e04c3fSmrg   case TGSI_OPCODE_DNEG:
581001e04c3fSmrg      exec_double_unary(mach, inst, micro_dneg);
581101e04c3fSmrg      break;
581201e04c3fSmrg
581301e04c3fSmrg   case TGSI_OPCODE_DADD:
581401e04c3fSmrg      exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
581501e04c3fSmrg      break;
581601e04c3fSmrg
581701e04c3fSmrg   case TGSI_OPCODE_DDIV:
581801e04c3fSmrg      exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
581901e04c3fSmrg      break;
582001e04c3fSmrg
582101e04c3fSmrg   case TGSI_OPCODE_DMUL:
582201e04c3fSmrg      exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
582301e04c3fSmrg      break;
582401e04c3fSmrg
582501e04c3fSmrg   case TGSI_OPCODE_DMAX:
582601e04c3fSmrg      exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
582701e04c3fSmrg      break;
582801e04c3fSmrg
582901e04c3fSmrg   case TGSI_OPCODE_DMIN:
583001e04c3fSmrg      exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
583101e04c3fSmrg      break;
583201e04c3fSmrg
583301e04c3fSmrg   case TGSI_OPCODE_DSLT:
583401e04c3fSmrg      exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
583501e04c3fSmrg      break;
583601e04c3fSmrg
583701e04c3fSmrg   case TGSI_OPCODE_DSGE:
583801e04c3fSmrg      exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
583901e04c3fSmrg      break;
584001e04c3fSmrg
584101e04c3fSmrg   case TGSI_OPCODE_DSEQ:
584201e04c3fSmrg      exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
584301e04c3fSmrg      break;
584401e04c3fSmrg
584501e04c3fSmrg   case TGSI_OPCODE_DSNE:
584601e04c3fSmrg      exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
584701e04c3fSmrg      break;
584801e04c3fSmrg
584901e04c3fSmrg   case TGSI_OPCODE_DRCP:
585001e04c3fSmrg      exec_double_unary(mach, inst, micro_drcp);
585101e04c3fSmrg      break;
585201e04c3fSmrg
585301e04c3fSmrg   case TGSI_OPCODE_DSQRT:
585401e04c3fSmrg      exec_double_unary(mach, inst, micro_dsqrt);
585501e04c3fSmrg      break;
585601e04c3fSmrg
585701e04c3fSmrg   case TGSI_OPCODE_DRSQ:
585801e04c3fSmrg      exec_double_unary(mach, inst, micro_drsq);
585901e04c3fSmrg      break;
586001e04c3fSmrg
586101e04c3fSmrg   case TGSI_OPCODE_DMAD:
586201e04c3fSmrg      exec_double_trinary(mach, inst, micro_dmad);
586301e04c3fSmrg      break;
586401e04c3fSmrg
586501e04c3fSmrg   case TGSI_OPCODE_DFRAC:
586601e04c3fSmrg      exec_double_unary(mach, inst, micro_dfrac);
586701e04c3fSmrg      break;
586801e04c3fSmrg
58697ec681f3Smrg   case TGSI_OPCODE_DFLR:
58707ec681f3Smrg      exec_double_unary(mach, inst, micro_dflr);
58717ec681f3Smrg      break;
58727ec681f3Smrg
587301e04c3fSmrg   case TGSI_OPCODE_DLDEXP:
587401e04c3fSmrg      exec_dldexp(mach, inst);
587501e04c3fSmrg      break;
587601e04c3fSmrg
587701e04c3fSmrg   case TGSI_OPCODE_DFRACEXP:
587801e04c3fSmrg      exec_dfracexp(mach, inst);
587901e04c3fSmrg      break;
588001e04c3fSmrg
588101e04c3fSmrg   case TGSI_OPCODE_I2D:
58827ec681f3Smrg      exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);
588301e04c3fSmrg      break;
588401e04c3fSmrg
588501e04c3fSmrg   case TGSI_OPCODE_D2I:
58867ec681f3Smrg      exec_64_2_t(mach, inst, micro_d2i);
588701e04c3fSmrg      break;
588801e04c3fSmrg
588901e04c3fSmrg   case TGSI_OPCODE_U2D:
58907ec681f3Smrg      exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT);
589101e04c3fSmrg      break;
589201e04c3fSmrg
589301e04c3fSmrg   case TGSI_OPCODE_D2U:
58947ec681f3Smrg      exec_64_2_t(mach, inst, micro_d2u);
589501e04c3fSmrg      break;
589601e04c3fSmrg
589701e04c3fSmrg   case TGSI_OPCODE_LOAD:
589801e04c3fSmrg      exec_load(mach, inst);
589901e04c3fSmrg      break;
590001e04c3fSmrg
590101e04c3fSmrg   case TGSI_OPCODE_STORE:
590201e04c3fSmrg      exec_store(mach, inst);
590301e04c3fSmrg      break;
590401e04c3fSmrg
590501e04c3fSmrg   case TGSI_OPCODE_ATOMUADD:
590601e04c3fSmrg   case TGSI_OPCODE_ATOMXCHG:
590701e04c3fSmrg   case TGSI_OPCODE_ATOMCAS:
590801e04c3fSmrg   case TGSI_OPCODE_ATOMAND:
590901e04c3fSmrg   case TGSI_OPCODE_ATOMOR:
591001e04c3fSmrg   case TGSI_OPCODE_ATOMXOR:
591101e04c3fSmrg   case TGSI_OPCODE_ATOMUMIN:
591201e04c3fSmrg   case TGSI_OPCODE_ATOMUMAX:
591301e04c3fSmrg   case TGSI_OPCODE_ATOMIMIN:
591401e04c3fSmrg   case TGSI_OPCODE_ATOMIMAX:
5915361fc4cbSmaya   case TGSI_OPCODE_ATOMFADD:
591601e04c3fSmrg      exec_atomop(mach, inst);
591701e04c3fSmrg      break;
591801e04c3fSmrg
591901e04c3fSmrg   case TGSI_OPCODE_RESQ:
592001e04c3fSmrg      exec_resq(mach, inst);
592101e04c3fSmrg      break;
592201e04c3fSmrg   case TGSI_OPCODE_BARRIER:
592301e04c3fSmrg   case TGSI_OPCODE_MEMBAR:
592401e04c3fSmrg      return TRUE;
592501e04c3fSmrg      break;
592601e04c3fSmrg
592701e04c3fSmrg   case TGSI_OPCODE_I64ABS:
592801e04c3fSmrg      exec_double_unary(mach, inst, micro_i64abs);
592901e04c3fSmrg      break;
593001e04c3fSmrg
593101e04c3fSmrg   case TGSI_OPCODE_I64SSG:
593201e04c3fSmrg      exec_double_unary(mach, inst, micro_i64sgn);
593301e04c3fSmrg      break;
593401e04c3fSmrg
593501e04c3fSmrg   case TGSI_OPCODE_I64NEG:
593601e04c3fSmrg      exec_double_unary(mach, inst, micro_i64neg);
593701e04c3fSmrg      break;
593801e04c3fSmrg
593901e04c3fSmrg   case TGSI_OPCODE_U64SEQ:
594001e04c3fSmrg      exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
594101e04c3fSmrg      break;
594201e04c3fSmrg
594301e04c3fSmrg   case TGSI_OPCODE_U64SNE:
594401e04c3fSmrg      exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
594501e04c3fSmrg      break;
594601e04c3fSmrg
594701e04c3fSmrg   case TGSI_OPCODE_I64SLT:
594801e04c3fSmrg      exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
594901e04c3fSmrg      break;
595001e04c3fSmrg   case TGSI_OPCODE_U64SLT:
595101e04c3fSmrg      exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
595201e04c3fSmrg      break;
595301e04c3fSmrg
595401e04c3fSmrg   case TGSI_OPCODE_I64SGE:
595501e04c3fSmrg      exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
595601e04c3fSmrg      break;
595701e04c3fSmrg   case TGSI_OPCODE_U64SGE:
595801e04c3fSmrg      exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
595901e04c3fSmrg      break;
596001e04c3fSmrg
596101e04c3fSmrg   case TGSI_OPCODE_I64MIN:
596201e04c3fSmrg      exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
596301e04c3fSmrg      break;
596401e04c3fSmrg   case TGSI_OPCODE_U64MIN:
596501e04c3fSmrg      exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
596601e04c3fSmrg      break;
596701e04c3fSmrg   case TGSI_OPCODE_I64MAX:
596801e04c3fSmrg      exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
596901e04c3fSmrg      break;
597001e04c3fSmrg   case TGSI_OPCODE_U64MAX:
597101e04c3fSmrg      exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
597201e04c3fSmrg      break;
597301e04c3fSmrg   case TGSI_OPCODE_U64ADD:
597401e04c3fSmrg      exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
597501e04c3fSmrg      break;
597601e04c3fSmrg   case TGSI_OPCODE_U64MUL:
597701e04c3fSmrg      exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
597801e04c3fSmrg      break;
597901e04c3fSmrg   case TGSI_OPCODE_U64SHL:
598001e04c3fSmrg      exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
598101e04c3fSmrg      break;
598201e04c3fSmrg   case TGSI_OPCODE_I64SHR:
598301e04c3fSmrg      exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
598401e04c3fSmrg      break;
598501e04c3fSmrg   case TGSI_OPCODE_U64SHR:
598601e04c3fSmrg      exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
598701e04c3fSmrg      break;
598801e04c3fSmrg   case TGSI_OPCODE_U64DIV:
598901e04c3fSmrg      exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
599001e04c3fSmrg      break;
599101e04c3fSmrg   case TGSI_OPCODE_I64DIV:
599201e04c3fSmrg      exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
599301e04c3fSmrg      break;
599401e04c3fSmrg   case TGSI_OPCODE_U64MOD:
599501e04c3fSmrg      exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
599601e04c3fSmrg      break;
599701e04c3fSmrg   case TGSI_OPCODE_I64MOD:
599801e04c3fSmrg      exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
599901e04c3fSmrg      break;
600001e04c3fSmrg
600101e04c3fSmrg   case TGSI_OPCODE_F2U64:
600201e04c3fSmrg      exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
600301e04c3fSmrg      break;
600401e04c3fSmrg
600501e04c3fSmrg   case TGSI_OPCODE_F2I64:
600601e04c3fSmrg      exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
600701e04c3fSmrg      break;
600801e04c3fSmrg
600901e04c3fSmrg   case TGSI_OPCODE_U2I64:
601001e04c3fSmrg      exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
601101e04c3fSmrg      break;
601201e04c3fSmrg   case TGSI_OPCODE_I2I64:
601301e04c3fSmrg      exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
601401e04c3fSmrg      break;
601501e04c3fSmrg
601601e04c3fSmrg   case TGSI_OPCODE_D2U64:
601701e04c3fSmrg      exec_double_unary(mach, inst, micro_d2u64);
601801e04c3fSmrg      break;
601901e04c3fSmrg
602001e04c3fSmrg   case TGSI_OPCODE_D2I64:
602101e04c3fSmrg      exec_double_unary(mach, inst, micro_d2i64);
602201e04c3fSmrg      break;
602301e04c3fSmrg
602401e04c3fSmrg   case TGSI_OPCODE_U642F:
60257ec681f3Smrg      exec_64_2_t(mach, inst, micro_u642f);
602601e04c3fSmrg      break;
602701e04c3fSmrg   case TGSI_OPCODE_I642F:
60287ec681f3Smrg      exec_64_2_t(mach, inst, micro_i642f);
602901e04c3fSmrg      break;
603001e04c3fSmrg
603101e04c3fSmrg   case TGSI_OPCODE_U642D:
603201e04c3fSmrg      exec_double_unary(mach, inst, micro_u642d);
603301e04c3fSmrg      break;
603401e04c3fSmrg   case TGSI_OPCODE_I642D:
603501e04c3fSmrg      exec_double_unary(mach, inst, micro_i642d);
603601e04c3fSmrg      break;
6037361fc4cbSmaya   case TGSI_OPCODE_INTERP_SAMPLE:
6038361fc4cbSmaya      exec_interp_at_sample(mach, inst);
6039361fc4cbSmaya      break;
6040361fc4cbSmaya   case TGSI_OPCODE_INTERP_OFFSET:
6041361fc4cbSmaya      exec_interp_at_offset(mach, inst);
6042361fc4cbSmaya      break;
6043361fc4cbSmaya   case TGSI_OPCODE_INTERP_CENTROID:
6044361fc4cbSmaya      exec_interp_at_centroid(mach, inst);
6045361fc4cbSmaya      break;
60464a49301eSmrg   default:
60474a49301eSmrg      assert( 0 );
60484a49301eSmrg   }
604901e04c3fSmrg   return FALSE;
60504a49301eSmrg}
60514a49301eSmrg
605201e04c3fSmrgstatic void
605301e04c3fSmrgtgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
60544a49301eSmrg{
6055af69d88dSmrg   uint default_mask = 0xf;
60564a49301eSmrg
60577ec681f3Smrg   mach->KillMask = 0;
60587ec681f3Smrg   mach->OutputVertexOffset = 0;
6059af69d88dSmrg
606001e04c3fSmrg   if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6061361fc4cbSmaya      for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
60627ec681f3Smrg         mach->OutputPrimCount[i] = 0;
6063361fc4cbSmaya         mach->Primitives[i][0] = 0;
6064361fc4cbSmaya      }
6065af69d88dSmrg      /* GS runs on a single primitive for now */
6066af69d88dSmrg      default_mask = 0x1;
6067af69d88dSmrg   }
6068af69d88dSmrg
606901e04c3fSmrg   if (mach->NonHelperMask == 0)
607001e04c3fSmrg      mach->NonHelperMask = default_mask;
6071af69d88dSmrg   mach->CondMask = default_mask;
6072af69d88dSmrg   mach->LoopMask = default_mask;
6073af69d88dSmrg   mach->ContMask = default_mask;
6074af69d88dSmrg   mach->FuncMask = default_mask;
6075af69d88dSmrg   mach->ExecMask = default_mask;
60764a49301eSmrg
6077af69d88dSmrg   mach->Switch.mask = default_mask;
6078cdc920a0Smrg
60794a49301eSmrg   assert(mach->CondStackTop == 0);
60804a49301eSmrg   assert(mach->LoopStackTop == 0);
60814a49301eSmrg   assert(mach->ContStackTop == 0);
6082cdc920a0Smrg   assert(mach->SwitchStackTop == 0);
6083cdc920a0Smrg   assert(mach->BreakStackTop == 0);
60844a49301eSmrg   assert(mach->CallStackTop == 0);
608501e04c3fSmrg}
608601e04c3fSmrg
608701e04c3fSmrg/**
608801e04c3fSmrg * Run TGSI interpreter.
608901e04c3fSmrg * \return bitmask of "alive" quad components
609001e04c3fSmrg */
609101e04c3fSmrguint
609201e04c3fSmrgtgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
609301e04c3fSmrg{
609401e04c3fSmrg   uint i;
609501e04c3fSmrg
609601e04c3fSmrg   mach->pc = start_pc;
60974a49301eSmrg
609801e04c3fSmrg   if (!start_pc) {
609901e04c3fSmrg      tgsi_exec_machine_setup_masks(mach);
61004a49301eSmrg
610101e04c3fSmrg      /* execute declarations (interpolants) */
610201e04c3fSmrg      for (i = 0; i < mach->NumDeclarations; i++) {
610301e04c3fSmrg         exec_declaration( mach, mach->Declarations+i );
610401e04c3fSmrg      }
61054a49301eSmrg   }
61064a49301eSmrg
6107cdc920a0Smrg   {
6108cdc920a0Smrg#if DEBUG_EXECUTION
61097ec681f3Smrg      struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS];
6110cdc920a0Smrg      struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6111cdc920a0Smrg      uint inst = 1;
6112cdc920a0Smrg
611301e04c3fSmrg      if (!start_pc) {
611401e04c3fSmrg         memset(mach->Temps, 0, sizeof(temps));
611501e04c3fSmrg         if (mach->Outputs)
611601e04c3fSmrg            memset(mach->Outputs, 0, sizeof(outputs));
611701e04c3fSmrg         memset(temps, 0, sizeof(temps));
611801e04c3fSmrg         memset(outputs, 0, sizeof(outputs));
611901e04c3fSmrg      }
6120cdc920a0Smrg#endif
6121cdc920a0Smrg
6122cdc920a0Smrg      /* execute instructions, until pc is set to -1 */
612301e04c3fSmrg      while (mach->pc != -1) {
612401e04c3fSmrg         boolean barrier_hit;
6125cdc920a0Smrg#if DEBUG_EXECUTION
6126cdc920a0Smrg         uint i;
6127cdc920a0Smrg
612801e04c3fSmrg         tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6129cdc920a0Smrg#endif
6130cdc920a0Smrg
613101e04c3fSmrg         assert(mach->pc < (int) mach->NumInstructions);
613201e04c3fSmrg         barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
613301e04c3fSmrg
613401e04c3fSmrg         /* for compute shaders if we hit a barrier return now for later rescheduling */
613501e04c3fSmrg         if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
613601e04c3fSmrg            return 0;
6137cdc920a0Smrg
6138cdc920a0Smrg#if DEBUG_EXECUTION
61397ec681f3Smrg         for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
6140cdc920a0Smrg            if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6141cdc920a0Smrg               uint j;
6142cdc920a0Smrg
6143cdc920a0Smrg               memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6144cdc920a0Smrg               debug_printf("TEMP[%2u] = ", i);
6145cdc920a0Smrg               for (j = 0; j < 4; j++) {
6146cdc920a0Smrg                  if (j > 0) {
6147cdc920a0Smrg                     debug_printf("           ");
6148cdc920a0Smrg                  }
6149cdc920a0Smrg                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6150cdc920a0Smrg                               temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6151cdc920a0Smrg                               temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6152cdc920a0Smrg                               temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6153cdc920a0Smrg                               temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6154cdc920a0Smrg               }
6155cdc920a0Smrg            }
6156cdc920a0Smrg         }
615701e04c3fSmrg         if (mach->Outputs) {
615801e04c3fSmrg            for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
615901e04c3fSmrg               if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
616001e04c3fSmrg                  uint j;
616101e04c3fSmrg
616201e04c3fSmrg                  memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
616301e04c3fSmrg                  debug_printf("OUT[%2u] =  ", i);
616401e04c3fSmrg                  for (j = 0; j < 4; j++) {
616501e04c3fSmrg                     if (j > 0) {
616601e04c3fSmrg                        debug_printf("           ");
616701e04c3fSmrg                     }
616801e04c3fSmrg                     debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
616901e04c3fSmrg                                  outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
617001e04c3fSmrg                                  outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
617101e04c3fSmrg                                  outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
617201e04c3fSmrg                                  outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6173cdc920a0Smrg                  }
6174cdc920a0Smrg               }
6175cdc920a0Smrg            }
6176cdc920a0Smrg         }
6177cdc920a0Smrg#endif
6178cdc920a0Smrg      }
61794a49301eSmrg   }
61804a49301eSmrg
61814a49301eSmrg#if 0
61824a49301eSmrg   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
618301e04c3fSmrg   if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
61844a49301eSmrg      /*
61854a49301eSmrg       * Scale back depth component.
61864a49301eSmrg       */
61874a49301eSmrg      for (i = 0; i < 4; i++)
61884a49301eSmrg         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
61894a49301eSmrg   }
61904a49301eSmrg#endif
61914a49301eSmrg
61923464ebd5Sriastradh   /* Strictly speaking, these assertions aren't really needed but they
61933464ebd5Sriastradh    * can potentially catch some bugs in the control flow code.
61943464ebd5Sriastradh    */
6195cdc920a0Smrg   assert(mach->CondStackTop == 0);
6196cdc920a0Smrg   assert(mach->LoopStackTop == 0);
6197cdc920a0Smrg   assert(mach->ContStackTop == 0);
6198cdc920a0Smrg   assert(mach->SwitchStackTop == 0);
6199cdc920a0Smrg   assert(mach->BreakStackTop == 0);
6200cdc920a0Smrg   assert(mach->CallStackTop == 0);
6201cdc920a0Smrg
62027ec681f3Smrg   return ~mach->KillMask;
62034a49301eSmrg}
6204