14a49301eSmrg/************************************************************************** 24a49301eSmrg * 3af69d88dSmrg * Copyright 2007-2008 VMware, Inc. 44a49301eSmrg * All Rights Reserved. 5cdc920a0Smrg * Copyright 2009-2010 VMware, Inc. All rights Reserved. 64a49301eSmrg * 74a49301eSmrg * Permission is hereby granted, free of charge, to any person obtaining a 84a49301eSmrg * copy of this software and associated documentation files (the 94a49301eSmrg * "Software"), to deal in the Software without restriction, including 104a49301eSmrg * without limitation the rights to use, copy, modify, merge, publish, 114a49301eSmrg * distribute, sub license, and/or sell copies of the Software, and to 124a49301eSmrg * permit persons to whom the Software is furnished to do so, subject to 134a49301eSmrg * the following conditions: 144a49301eSmrg * 154a49301eSmrg * The above copyright notice and this permission notice (including the 164a49301eSmrg * next paragraph) shall be included in all copies or substantial portions 174a49301eSmrg * of the Software. 184a49301eSmrg * 194a49301eSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 204a49301eSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 214a49301eSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 234a49301eSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 244a49301eSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 254a49301eSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 264a49301eSmrg * 274a49301eSmrg **************************************************************************/ 284a49301eSmrg 294a49301eSmrg/** 304a49301eSmrg * TGSI interpreter/executor. 314a49301eSmrg * 324a49301eSmrg * Flow control information: 334a49301eSmrg * 344a49301eSmrg * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 354a49301eSmrg * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 364a49301eSmrg * care since a condition may be true for some quad components but false 374a49301eSmrg * for other components. 384a49301eSmrg * 394a49301eSmrg * We basically execute all statements (even if they're in the part of 404a49301eSmrg * an IF/ELSE clause that's "not taken") and use a special mask to 414a49301eSmrg * control writing to destination registers. This is the ExecMask. 424a49301eSmrg * See store_dest(). 434a49301eSmrg * 444a49301eSmrg * The ExecMask is computed from three other masks (CondMask, LoopMask and 454a49301eSmrg * ContMask) which are controlled by the flow control instructions (namely: 464a49301eSmrg * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 474a49301eSmrg * 484a49301eSmrg * 494a49301eSmrg * Authors: 504a49301eSmrg * Michal Krol 514a49301eSmrg * Brian Paul 524a49301eSmrg */ 534a49301eSmrg 544a49301eSmrg#include "pipe/p_compiler.h" 554a49301eSmrg#include "pipe/p_state.h" 564a49301eSmrg#include "pipe/p_shader_tokens.h" 574a49301eSmrg#include "tgsi/tgsi_dump.h" 584a49301eSmrg#include "tgsi/tgsi_parse.h" 594a49301eSmrg#include "tgsi/tgsi_util.h" 604a49301eSmrg#include "tgsi_exec.h" 617ec681f3Smrg#include "util/compiler.h" 627ec681f3Smrg#include "util/half_float.h" 634a49301eSmrg#include "util/u_memory.h" 644a49301eSmrg#include "util/u_math.h" 6501e04c3fSmrg#include "util/rounding.h" 664a49301eSmrg 674a49301eSmrg 68af69d88dSmrg#define DEBUG_EXECUTION 0 69af69d88dSmrg 70af69d88dSmrg 714a49301eSmrg#define TILE_TOP_LEFT 0 724a49301eSmrg#define TILE_TOP_RIGHT 1 734a49301eSmrg#define TILE_BOTTOM_LEFT 2 744a49301eSmrg#define TILE_BOTTOM_RIGHT 3 754a49301eSmrg 7601e04c3fSmrgunion tgsi_double_channel { 7701e04c3fSmrg double d[TGSI_QUAD_SIZE]; 7801e04c3fSmrg unsigned u[TGSI_QUAD_SIZE][2]; 7901e04c3fSmrg uint64_t u64[TGSI_QUAD_SIZE]; 8001e04c3fSmrg int64_t i64[TGSI_QUAD_SIZE]; 817ec681f3Smrg} ALIGN16; 8201e04c3fSmrg 837ec681f3Smrgstruct ALIGN16 tgsi_double_vector { 8401e04c3fSmrg union tgsi_double_channel xy; 8501e04c3fSmrg union tgsi_double_channel zw; 8601e04c3fSmrg}; 8701e04c3fSmrg 88cdc920a0Smrgstatic void 89cdc920a0Smrgmicro_abs(union tgsi_exec_channel *dst, 90cdc920a0Smrg const union tgsi_exec_channel *src) 91cdc920a0Smrg{ 92cdc920a0Smrg dst->f[0] = fabsf(src->f[0]); 93cdc920a0Smrg dst->f[1] = fabsf(src->f[1]); 94cdc920a0Smrg dst->f[2] = fabsf(src->f[2]); 95cdc920a0Smrg dst->f[3] = fabsf(src->f[3]); 96cdc920a0Smrg} 97cdc920a0Smrg 98cdc920a0Smrgstatic void 99cdc920a0Smrgmicro_arl(union tgsi_exec_channel *dst, 100cdc920a0Smrg const union tgsi_exec_channel *src) 101cdc920a0Smrg{ 102cdc920a0Smrg dst->i[0] = (int)floorf(src->f[0]); 103cdc920a0Smrg dst->i[1] = (int)floorf(src->f[1]); 104cdc920a0Smrg dst->i[2] = (int)floorf(src->f[2]); 105cdc920a0Smrg dst->i[3] = (int)floorf(src->f[3]); 106cdc920a0Smrg} 107cdc920a0Smrg 108cdc920a0Smrgstatic void 109cdc920a0Smrgmicro_arr(union tgsi_exec_channel *dst, 110cdc920a0Smrg const union tgsi_exec_channel *src) 111cdc920a0Smrg{ 112cdc920a0Smrg dst->i[0] = (int)floorf(src->f[0] + 0.5f); 113cdc920a0Smrg dst->i[1] = (int)floorf(src->f[1] + 0.5f); 114cdc920a0Smrg dst->i[2] = (int)floorf(src->f[2] + 0.5f); 115cdc920a0Smrg dst->i[3] = (int)floorf(src->f[3] + 0.5f); 116cdc920a0Smrg} 117cdc920a0Smrg 118cdc920a0Smrgstatic void 119cdc920a0Smrgmicro_ceil(union tgsi_exec_channel *dst, 120cdc920a0Smrg const union tgsi_exec_channel *src) 121cdc920a0Smrg{ 122cdc920a0Smrg dst->f[0] = ceilf(src->f[0]); 123cdc920a0Smrg dst->f[1] = ceilf(src->f[1]); 124cdc920a0Smrg dst->f[2] = ceilf(src->f[2]); 125cdc920a0Smrg dst->f[3] = ceilf(src->f[3]); 126cdc920a0Smrg} 127cdc920a0Smrg 128cdc920a0Smrgstatic void 129cdc920a0Smrgmicro_cmp(union tgsi_exec_channel *dst, 130cdc920a0Smrg const union tgsi_exec_channel *src0, 131cdc920a0Smrg const union tgsi_exec_channel *src1, 132cdc920a0Smrg const union tgsi_exec_channel *src2) 133cdc920a0Smrg{ 134cdc920a0Smrg dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 135cdc920a0Smrg dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 136cdc920a0Smrg dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 137cdc920a0Smrg dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 138cdc920a0Smrg} 139cdc920a0Smrg 140cdc920a0Smrgstatic void 141cdc920a0Smrgmicro_cos(union tgsi_exec_channel *dst, 142cdc920a0Smrg const union tgsi_exec_channel *src) 143cdc920a0Smrg{ 144cdc920a0Smrg dst->f[0] = cosf(src->f[0]); 145cdc920a0Smrg dst->f[1] = cosf(src->f[1]); 146cdc920a0Smrg dst->f[2] = cosf(src->f[2]); 147cdc920a0Smrg dst->f[3] = cosf(src->f[3]); 148cdc920a0Smrg} 149cdc920a0Smrg 15001e04c3fSmrgstatic void 15101e04c3fSmrgmicro_d2f(union tgsi_exec_channel *dst, 15201e04c3fSmrg const union tgsi_double_channel *src) 15301e04c3fSmrg{ 15401e04c3fSmrg dst->f[0] = (float)src->d[0]; 15501e04c3fSmrg dst->f[1] = (float)src->d[1]; 15601e04c3fSmrg dst->f[2] = (float)src->d[2]; 15701e04c3fSmrg dst->f[3] = (float)src->d[3]; 15801e04c3fSmrg} 15901e04c3fSmrg 16001e04c3fSmrgstatic void 16101e04c3fSmrgmicro_d2i(union tgsi_exec_channel *dst, 16201e04c3fSmrg const union tgsi_double_channel *src) 16301e04c3fSmrg{ 16401e04c3fSmrg dst->i[0] = (int)src->d[0]; 16501e04c3fSmrg dst->i[1] = (int)src->d[1]; 16601e04c3fSmrg dst->i[2] = (int)src->d[2]; 16701e04c3fSmrg dst->i[3] = (int)src->d[3]; 16801e04c3fSmrg} 16901e04c3fSmrg 17001e04c3fSmrgstatic void 17101e04c3fSmrgmicro_d2u(union tgsi_exec_channel *dst, 17201e04c3fSmrg const union tgsi_double_channel *src) 17301e04c3fSmrg{ 17401e04c3fSmrg dst->u[0] = (unsigned)src->d[0]; 17501e04c3fSmrg dst->u[1] = (unsigned)src->d[1]; 17601e04c3fSmrg dst->u[2] = (unsigned)src->d[2]; 17701e04c3fSmrg dst->u[3] = (unsigned)src->d[3]; 17801e04c3fSmrg} 17901e04c3fSmrgstatic void 18001e04c3fSmrgmicro_dabs(union tgsi_double_channel *dst, 18101e04c3fSmrg const union tgsi_double_channel *src) 18201e04c3fSmrg{ 18301e04c3fSmrg dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 18401e04c3fSmrg dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 18501e04c3fSmrg dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 18601e04c3fSmrg dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 18701e04c3fSmrg} 18801e04c3fSmrg 18901e04c3fSmrgstatic void 19001e04c3fSmrgmicro_dadd(union tgsi_double_channel *dst, 19101e04c3fSmrg const union tgsi_double_channel *src) 19201e04c3fSmrg{ 19301e04c3fSmrg dst->d[0] = src[0].d[0] + src[1].d[0]; 19401e04c3fSmrg dst->d[1] = src[0].d[1] + src[1].d[1]; 19501e04c3fSmrg dst->d[2] = src[0].d[2] + src[1].d[2]; 19601e04c3fSmrg dst->d[3] = src[0].d[3] + src[1].d[3]; 19701e04c3fSmrg} 19801e04c3fSmrg 19901e04c3fSmrgstatic void 20001e04c3fSmrgmicro_ddiv(union tgsi_double_channel *dst, 20101e04c3fSmrg const union tgsi_double_channel *src) 20201e04c3fSmrg{ 20301e04c3fSmrg dst->d[0] = src[0].d[0] / src[1].d[0]; 20401e04c3fSmrg dst->d[1] = src[0].d[1] / src[1].d[1]; 20501e04c3fSmrg dst->d[2] = src[0].d[2] / src[1].d[2]; 20601e04c3fSmrg dst->d[3] = src[0].d[3] / src[1].d[3]; 20701e04c3fSmrg} 20801e04c3fSmrg 209cdc920a0Smrgstatic void 210cdc920a0Smrgmicro_ddx(union tgsi_exec_channel *dst, 211cdc920a0Smrg const union tgsi_exec_channel *src) 212cdc920a0Smrg{ 213cdc920a0Smrg dst->f[0] = 214cdc920a0Smrg dst->f[1] = 215cdc920a0Smrg dst->f[2] = 216cdc920a0Smrg dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 217cdc920a0Smrg} 218cdc920a0Smrg 2197ec681f3Smrgstatic void 2207ec681f3Smrgmicro_ddx_fine(union tgsi_exec_channel *dst, 2217ec681f3Smrg const union tgsi_exec_channel *src) 2227ec681f3Smrg{ 2237ec681f3Smrg dst->f[0] = 2247ec681f3Smrg dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT]; 2257ec681f3Smrg dst->f[2] = 2267ec681f3Smrg dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 2277ec681f3Smrg} 2287ec681f3Smrg 2297ec681f3Smrg 230cdc920a0Smrgstatic void 231cdc920a0Smrgmicro_ddy(union tgsi_exec_channel *dst, 232cdc920a0Smrg const union tgsi_exec_channel *src) 233cdc920a0Smrg{ 234cdc920a0Smrg dst->f[0] = 235cdc920a0Smrg dst->f[1] = 236cdc920a0Smrg dst->f[2] = 237cdc920a0Smrg dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 238cdc920a0Smrg} 239cdc920a0Smrg 2407ec681f3Smrgstatic void 2417ec681f3Smrgmicro_ddy_fine(union tgsi_exec_channel *dst, 2427ec681f3Smrg const union tgsi_exec_channel *src) 2437ec681f3Smrg{ 2447ec681f3Smrg dst->f[0] = 2457ec681f3Smrg dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 2467ec681f3Smrg dst->f[1] = 2477ec681f3Smrg dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT]; 2487ec681f3Smrg} 2497ec681f3Smrg 25001e04c3fSmrgstatic void 25101e04c3fSmrgmicro_dmul(union tgsi_double_channel *dst, 25201e04c3fSmrg const union tgsi_double_channel *src) 25301e04c3fSmrg{ 25401e04c3fSmrg dst->d[0] = src[0].d[0] * src[1].d[0]; 25501e04c3fSmrg dst->d[1] = src[0].d[1] * src[1].d[1]; 25601e04c3fSmrg dst->d[2] = src[0].d[2] * src[1].d[2]; 25701e04c3fSmrg dst->d[3] = src[0].d[3] * src[1].d[3]; 25801e04c3fSmrg} 25901e04c3fSmrg 26001e04c3fSmrgstatic void 26101e04c3fSmrgmicro_dmax(union tgsi_double_channel *dst, 26201e04c3fSmrg const union tgsi_double_channel *src) 26301e04c3fSmrg{ 2647ec681f3Smrg dst->d[0] = fmax(src[0].d[0], src[1].d[0]); 2657ec681f3Smrg dst->d[1] = fmax(src[0].d[1], src[1].d[1]); 2667ec681f3Smrg dst->d[2] = fmax(src[0].d[2], src[1].d[2]); 2677ec681f3Smrg dst->d[3] = fmax(src[0].d[3], src[1].d[3]); 26801e04c3fSmrg} 26901e04c3fSmrg 27001e04c3fSmrgstatic void 27101e04c3fSmrgmicro_dmin(union tgsi_double_channel *dst, 27201e04c3fSmrg const union tgsi_double_channel *src) 27301e04c3fSmrg{ 2747ec681f3Smrg dst->d[0] = fmin(src[0].d[0], src[1].d[0]); 2757ec681f3Smrg dst->d[1] = fmin(src[0].d[1], src[1].d[1]); 2767ec681f3Smrg dst->d[2] = fmin(src[0].d[2], src[1].d[2]); 2777ec681f3Smrg dst->d[3] = fmin(src[0].d[3], src[1].d[3]); 27801e04c3fSmrg} 27901e04c3fSmrg 28001e04c3fSmrgstatic void 28101e04c3fSmrgmicro_dneg(union tgsi_double_channel *dst, 28201e04c3fSmrg const union tgsi_double_channel *src) 28301e04c3fSmrg{ 28401e04c3fSmrg dst->d[0] = -src->d[0]; 28501e04c3fSmrg dst->d[1] = -src->d[1]; 28601e04c3fSmrg dst->d[2] = -src->d[2]; 28701e04c3fSmrg dst->d[3] = -src->d[3]; 28801e04c3fSmrg} 28901e04c3fSmrg 29001e04c3fSmrgstatic void 29101e04c3fSmrgmicro_dslt(union tgsi_double_channel *dst, 29201e04c3fSmrg const union tgsi_double_channel *src) 29301e04c3fSmrg{ 29401e04c3fSmrg dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 29501e04c3fSmrg dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 29601e04c3fSmrg dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 29701e04c3fSmrg dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 29801e04c3fSmrg} 29901e04c3fSmrg 30001e04c3fSmrgstatic void 30101e04c3fSmrgmicro_dsne(union tgsi_double_channel *dst, 30201e04c3fSmrg const union tgsi_double_channel *src) 30301e04c3fSmrg{ 30401e04c3fSmrg dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 30501e04c3fSmrg dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 30601e04c3fSmrg dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 30701e04c3fSmrg dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 30801e04c3fSmrg} 30901e04c3fSmrg 31001e04c3fSmrgstatic void 31101e04c3fSmrgmicro_dsge(union tgsi_double_channel *dst, 31201e04c3fSmrg const union tgsi_double_channel *src) 31301e04c3fSmrg{ 31401e04c3fSmrg dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 31501e04c3fSmrg dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 31601e04c3fSmrg dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 31701e04c3fSmrg dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 31801e04c3fSmrg} 31901e04c3fSmrg 32001e04c3fSmrgstatic void 32101e04c3fSmrgmicro_dseq(union tgsi_double_channel *dst, 32201e04c3fSmrg const union tgsi_double_channel *src) 32301e04c3fSmrg{ 32401e04c3fSmrg dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 32501e04c3fSmrg dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 32601e04c3fSmrg dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 32701e04c3fSmrg dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 32801e04c3fSmrg} 32901e04c3fSmrg 33001e04c3fSmrgstatic void 33101e04c3fSmrgmicro_drcp(union tgsi_double_channel *dst, 33201e04c3fSmrg const union tgsi_double_channel *src) 33301e04c3fSmrg{ 33401e04c3fSmrg dst->d[0] = 1.0 / src->d[0]; 33501e04c3fSmrg dst->d[1] = 1.0 / src->d[1]; 33601e04c3fSmrg dst->d[2] = 1.0 / src->d[2]; 33701e04c3fSmrg dst->d[3] = 1.0 / src->d[3]; 33801e04c3fSmrg} 33901e04c3fSmrg 34001e04c3fSmrgstatic void 34101e04c3fSmrgmicro_dsqrt(union tgsi_double_channel *dst, 34201e04c3fSmrg const union tgsi_double_channel *src) 34301e04c3fSmrg{ 34401e04c3fSmrg dst->d[0] = sqrt(src->d[0]); 34501e04c3fSmrg dst->d[1] = sqrt(src->d[1]); 34601e04c3fSmrg dst->d[2] = sqrt(src->d[2]); 34701e04c3fSmrg dst->d[3] = sqrt(src->d[3]); 34801e04c3fSmrg} 34901e04c3fSmrg 35001e04c3fSmrgstatic void 35101e04c3fSmrgmicro_drsq(union tgsi_double_channel *dst, 35201e04c3fSmrg const union tgsi_double_channel *src) 35301e04c3fSmrg{ 35401e04c3fSmrg dst->d[0] = 1.0 / sqrt(src->d[0]); 35501e04c3fSmrg dst->d[1] = 1.0 / sqrt(src->d[1]); 35601e04c3fSmrg dst->d[2] = 1.0 / sqrt(src->d[2]); 35701e04c3fSmrg dst->d[3] = 1.0 / sqrt(src->d[3]); 35801e04c3fSmrg} 35901e04c3fSmrg 36001e04c3fSmrgstatic void 36101e04c3fSmrgmicro_dmad(union tgsi_double_channel *dst, 36201e04c3fSmrg const union tgsi_double_channel *src) 36301e04c3fSmrg{ 36401e04c3fSmrg dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 36501e04c3fSmrg dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 36601e04c3fSmrg dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 36701e04c3fSmrg dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 36801e04c3fSmrg} 36901e04c3fSmrg 37001e04c3fSmrgstatic void 37101e04c3fSmrgmicro_dfrac(union tgsi_double_channel *dst, 37201e04c3fSmrg const union tgsi_double_channel *src) 37301e04c3fSmrg{ 37401e04c3fSmrg dst->d[0] = src->d[0] - floor(src->d[0]); 37501e04c3fSmrg dst->d[1] = src->d[1] - floor(src->d[1]); 37601e04c3fSmrg dst->d[2] = src->d[2] - floor(src->d[2]); 37701e04c3fSmrg dst->d[3] = src->d[3] - floor(src->d[3]); 37801e04c3fSmrg} 37901e04c3fSmrg 3807ec681f3Smrgstatic void 3817ec681f3Smrgmicro_dflr(union tgsi_double_channel *dst, 3827ec681f3Smrg const union tgsi_double_channel *src) 3837ec681f3Smrg{ 3847ec681f3Smrg dst->d[0] = floor(src->d[0]); 3857ec681f3Smrg dst->d[1] = floor(src->d[1]); 3867ec681f3Smrg dst->d[2] = floor(src->d[2]); 3877ec681f3Smrg dst->d[3] = floor(src->d[3]); 3887ec681f3Smrg} 3897ec681f3Smrg 39001e04c3fSmrgstatic void 39101e04c3fSmrgmicro_dldexp(union tgsi_double_channel *dst, 39201e04c3fSmrg const union tgsi_double_channel *src0, 39301e04c3fSmrg union tgsi_exec_channel *src1) 39401e04c3fSmrg{ 39501e04c3fSmrg dst->d[0] = ldexp(src0->d[0], src1->i[0]); 39601e04c3fSmrg dst->d[1] = ldexp(src0->d[1], src1->i[1]); 39701e04c3fSmrg dst->d[2] = ldexp(src0->d[2], src1->i[2]); 39801e04c3fSmrg dst->d[3] = ldexp(src0->d[3], src1->i[3]); 39901e04c3fSmrg} 40001e04c3fSmrg 40101e04c3fSmrgstatic void 40201e04c3fSmrgmicro_dfracexp(union tgsi_double_channel *dst, 40301e04c3fSmrg union tgsi_exec_channel *dst_exp, 40401e04c3fSmrg const union tgsi_double_channel *src) 40501e04c3fSmrg{ 40601e04c3fSmrg dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 40701e04c3fSmrg dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 40801e04c3fSmrg dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 40901e04c3fSmrg dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 41001e04c3fSmrg} 41101e04c3fSmrg 412cdc920a0Smrgstatic void 413cdc920a0Smrgmicro_exp2(union tgsi_exec_channel *dst, 414cdc920a0Smrg const union tgsi_exec_channel *src) 415cdc920a0Smrg{ 416cdc920a0Smrg#if DEBUG 417cdc920a0Smrg /* Inf is okay for this instruction, so clamp it to silence assertions. */ 418cdc920a0Smrg uint i; 419cdc920a0Smrg union tgsi_exec_channel clamped; 420cdc920a0Smrg 421cdc920a0Smrg for (i = 0; i < 4; i++) { 422cdc920a0Smrg if (src->f[i] > 127.99999f) { 423cdc920a0Smrg clamped.f[i] = 127.99999f; 424cdc920a0Smrg } else if (src->f[i] < -126.99999f) { 425cdc920a0Smrg clamped.f[i] = -126.99999f; 426cdc920a0Smrg } else { 427cdc920a0Smrg clamped.f[i] = src->f[i]; 428cdc920a0Smrg } 429cdc920a0Smrg } 430cdc920a0Smrg src = &clamped; 431cdc920a0Smrg#endif /* DEBUG */ 432cdc920a0Smrg 433cdc920a0Smrg dst->f[0] = powf(2.0f, src->f[0]); 434cdc920a0Smrg dst->f[1] = powf(2.0f, src->f[1]); 435cdc920a0Smrg dst->f[2] = powf(2.0f, src->f[2]); 436cdc920a0Smrg dst->f[3] = powf(2.0f, src->f[3]); 437cdc920a0Smrg} 438cdc920a0Smrg 43901e04c3fSmrgstatic void 44001e04c3fSmrgmicro_f2d(union tgsi_double_channel *dst, 44101e04c3fSmrg const union tgsi_exec_channel *src) 44201e04c3fSmrg{ 44301e04c3fSmrg dst->d[0] = (double)src->f[0]; 44401e04c3fSmrg dst->d[1] = (double)src->f[1]; 44501e04c3fSmrg dst->d[2] = (double)src->f[2]; 44601e04c3fSmrg dst->d[3] = (double)src->f[3]; 44701e04c3fSmrg} 44801e04c3fSmrg 449cdc920a0Smrgstatic void 450cdc920a0Smrgmicro_flr(union tgsi_exec_channel *dst, 451cdc920a0Smrg const union tgsi_exec_channel *src) 452cdc920a0Smrg{ 453cdc920a0Smrg dst->f[0] = floorf(src->f[0]); 454cdc920a0Smrg dst->f[1] = floorf(src->f[1]); 455cdc920a0Smrg dst->f[2] = floorf(src->f[2]); 456cdc920a0Smrg dst->f[3] = floorf(src->f[3]); 457cdc920a0Smrg} 458cdc920a0Smrg 459cdc920a0Smrgstatic void 460cdc920a0Smrgmicro_frc(union tgsi_exec_channel *dst, 461cdc920a0Smrg const union tgsi_exec_channel *src) 462cdc920a0Smrg{ 463cdc920a0Smrg dst->f[0] = src->f[0] - floorf(src->f[0]); 464cdc920a0Smrg dst->f[1] = src->f[1] - floorf(src->f[1]); 465cdc920a0Smrg dst->f[2] = src->f[2] - floorf(src->f[2]); 466cdc920a0Smrg dst->f[3] = src->f[3] - floorf(src->f[3]); 467cdc920a0Smrg} 468cdc920a0Smrg 46901e04c3fSmrgstatic void 47001e04c3fSmrgmicro_i2d(union tgsi_double_channel *dst, 47101e04c3fSmrg const union tgsi_exec_channel *src) 47201e04c3fSmrg{ 47301e04c3fSmrg dst->d[0] = (double)src->i[0]; 47401e04c3fSmrg dst->d[1] = (double)src->i[1]; 47501e04c3fSmrg dst->d[2] = (double)src->i[2]; 47601e04c3fSmrg dst->d[3] = (double)src->i[3]; 47701e04c3fSmrg} 47801e04c3fSmrg 479cdc920a0Smrgstatic void 480cdc920a0Smrgmicro_iabs(union tgsi_exec_channel *dst, 481cdc920a0Smrg const union tgsi_exec_channel *src) 482cdc920a0Smrg{ 483cdc920a0Smrg dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 484cdc920a0Smrg dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 485cdc920a0Smrg dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 486cdc920a0Smrg dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 487cdc920a0Smrg} 488cdc920a0Smrg 489cdc920a0Smrgstatic void 490cdc920a0Smrgmicro_ineg(union tgsi_exec_channel *dst, 491cdc920a0Smrg const union tgsi_exec_channel *src) 492cdc920a0Smrg{ 493cdc920a0Smrg dst->i[0] = -src->i[0]; 494cdc920a0Smrg dst->i[1] = -src->i[1]; 495cdc920a0Smrg dst->i[2] = -src->i[2]; 496cdc920a0Smrg dst->i[3] = -src->i[3]; 497cdc920a0Smrg} 498cdc920a0Smrg 499cdc920a0Smrgstatic void 500cdc920a0Smrgmicro_lg2(union tgsi_exec_channel *dst, 501cdc920a0Smrg const union tgsi_exec_channel *src) 502cdc920a0Smrg{ 503cdc920a0Smrg dst->f[0] = logf(src->f[0]) * 1.442695f; 504cdc920a0Smrg dst->f[1] = logf(src->f[1]) * 1.442695f; 505cdc920a0Smrg dst->f[2] = logf(src->f[2]) * 1.442695f; 506cdc920a0Smrg dst->f[3] = logf(src->f[3]) * 1.442695f; 507cdc920a0Smrg} 508cdc920a0Smrg 509cdc920a0Smrgstatic void 510cdc920a0Smrgmicro_lrp(union tgsi_exec_channel *dst, 511cdc920a0Smrg const union tgsi_exec_channel *src0, 512cdc920a0Smrg const union tgsi_exec_channel *src1, 513cdc920a0Smrg const union tgsi_exec_channel *src2) 514cdc920a0Smrg{ 515cdc920a0Smrg dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 516cdc920a0Smrg dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 517cdc920a0Smrg dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 518cdc920a0Smrg dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 519cdc920a0Smrg} 520cdc920a0Smrg 521cdc920a0Smrgstatic void 522cdc920a0Smrgmicro_mad(union tgsi_exec_channel *dst, 523cdc920a0Smrg const union tgsi_exec_channel *src0, 524cdc920a0Smrg const union tgsi_exec_channel *src1, 525cdc920a0Smrg const union tgsi_exec_channel *src2) 526cdc920a0Smrg{ 527cdc920a0Smrg dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 528cdc920a0Smrg dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 529cdc920a0Smrg dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 530cdc920a0Smrg dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 531cdc920a0Smrg} 532cdc920a0Smrg 533cdc920a0Smrgstatic void 534cdc920a0Smrgmicro_mov(union tgsi_exec_channel *dst, 535cdc920a0Smrg const union tgsi_exec_channel *src) 536cdc920a0Smrg{ 537cdc920a0Smrg dst->u[0] = src->u[0]; 538cdc920a0Smrg dst->u[1] = src->u[1]; 539cdc920a0Smrg dst->u[2] = src->u[2]; 540cdc920a0Smrg dst->u[3] = src->u[3]; 541cdc920a0Smrg} 542cdc920a0Smrg 543cdc920a0Smrgstatic void 544cdc920a0Smrgmicro_rcp(union tgsi_exec_channel *dst, 545cdc920a0Smrg const union tgsi_exec_channel *src) 546cdc920a0Smrg{ 547cdc920a0Smrg#if 0 /* for debugging */ 548cdc920a0Smrg assert(src->f[0] != 0.0f); 549cdc920a0Smrg assert(src->f[1] != 0.0f); 550cdc920a0Smrg assert(src->f[2] != 0.0f); 551cdc920a0Smrg assert(src->f[3] != 0.0f); 552cdc920a0Smrg#endif 553cdc920a0Smrg dst->f[0] = 1.0f / src->f[0]; 554cdc920a0Smrg dst->f[1] = 1.0f / src->f[1]; 555cdc920a0Smrg dst->f[2] = 1.0f / src->f[2]; 556cdc920a0Smrg dst->f[3] = 1.0f / src->f[3]; 557cdc920a0Smrg} 558cdc920a0Smrg 559cdc920a0Smrgstatic void 560cdc920a0Smrgmicro_rnd(union tgsi_exec_channel *dst, 561cdc920a0Smrg const union tgsi_exec_channel *src) 562cdc920a0Smrg{ 56301e04c3fSmrg dst->f[0] = _mesa_roundevenf(src->f[0]); 56401e04c3fSmrg dst->f[1] = _mesa_roundevenf(src->f[1]); 56501e04c3fSmrg dst->f[2] = _mesa_roundevenf(src->f[2]); 56601e04c3fSmrg dst->f[3] = _mesa_roundevenf(src->f[3]); 567cdc920a0Smrg} 568cdc920a0Smrg 569cdc920a0Smrgstatic void 570cdc920a0Smrgmicro_rsq(union tgsi_exec_channel *dst, 571cdc920a0Smrg const union tgsi_exec_channel *src) 572cdc920a0Smrg{ 573cdc920a0Smrg#if 0 /* for debugging */ 574cdc920a0Smrg assert(src->f[0] != 0.0f); 575cdc920a0Smrg assert(src->f[1] != 0.0f); 576cdc920a0Smrg assert(src->f[2] != 0.0f); 577cdc920a0Smrg assert(src->f[3] != 0.0f); 578cdc920a0Smrg#endif 579af69d88dSmrg dst->f[0] = 1.0f / sqrtf(src->f[0]); 580af69d88dSmrg dst->f[1] = 1.0f / sqrtf(src->f[1]); 581af69d88dSmrg dst->f[2] = 1.0f / sqrtf(src->f[2]); 582af69d88dSmrg dst->f[3] = 1.0f / sqrtf(src->f[3]); 583af69d88dSmrg} 584af69d88dSmrg 585af69d88dSmrgstatic void 586af69d88dSmrgmicro_sqrt(union tgsi_exec_channel *dst, 587af69d88dSmrg const union tgsi_exec_channel *src) 588af69d88dSmrg{ 589af69d88dSmrg dst->f[0] = sqrtf(src->f[0]); 590af69d88dSmrg dst->f[1] = sqrtf(src->f[1]); 591af69d88dSmrg dst->f[2] = sqrtf(src->f[2]); 592af69d88dSmrg dst->f[3] = sqrtf(src->f[3]); 593cdc920a0Smrg} 594cdc920a0Smrg 595cdc920a0Smrgstatic void 596cdc920a0Smrgmicro_seq(union tgsi_exec_channel *dst, 597cdc920a0Smrg const union tgsi_exec_channel *src0, 598cdc920a0Smrg const union tgsi_exec_channel *src1) 599cdc920a0Smrg{ 600cdc920a0Smrg dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 601cdc920a0Smrg dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 602cdc920a0Smrg dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 603cdc920a0Smrg dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 604cdc920a0Smrg} 605cdc920a0Smrg 606cdc920a0Smrgstatic void 607cdc920a0Smrgmicro_sge(union tgsi_exec_channel *dst, 608cdc920a0Smrg const union tgsi_exec_channel *src0, 609cdc920a0Smrg const union tgsi_exec_channel *src1) 610cdc920a0Smrg{ 611cdc920a0Smrg dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 612cdc920a0Smrg dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 613cdc920a0Smrg dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 614cdc920a0Smrg dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 615cdc920a0Smrg} 616cdc920a0Smrg 617cdc920a0Smrgstatic void 618cdc920a0Smrgmicro_sgn(union tgsi_exec_channel *dst, 619cdc920a0Smrg const union tgsi_exec_channel *src) 620cdc920a0Smrg{ 621cdc920a0Smrg dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 622cdc920a0Smrg dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 623cdc920a0Smrg dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 624cdc920a0Smrg dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 625cdc920a0Smrg} 626cdc920a0Smrg 627af69d88dSmrgstatic void 628af69d88dSmrgmicro_isgn(union tgsi_exec_channel *dst, 629af69d88dSmrg const union tgsi_exec_channel *src) 630af69d88dSmrg{ 631af69d88dSmrg dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 632af69d88dSmrg dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 633af69d88dSmrg dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 634af69d88dSmrg dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 635af69d88dSmrg} 636af69d88dSmrg 637cdc920a0Smrgstatic void 638cdc920a0Smrgmicro_sgt(union tgsi_exec_channel *dst, 639cdc920a0Smrg const union tgsi_exec_channel *src0, 640cdc920a0Smrg const union tgsi_exec_channel *src1) 641cdc920a0Smrg{ 642cdc920a0Smrg dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 643cdc920a0Smrg dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 644cdc920a0Smrg dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 645cdc920a0Smrg dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 646cdc920a0Smrg} 647cdc920a0Smrg 648cdc920a0Smrgstatic void 649cdc920a0Smrgmicro_sin(union tgsi_exec_channel *dst, 650cdc920a0Smrg const union tgsi_exec_channel *src) 651cdc920a0Smrg{ 652cdc920a0Smrg dst->f[0] = sinf(src->f[0]); 653cdc920a0Smrg dst->f[1] = sinf(src->f[1]); 654cdc920a0Smrg dst->f[2] = sinf(src->f[2]); 655cdc920a0Smrg dst->f[3] = sinf(src->f[3]); 656cdc920a0Smrg} 657cdc920a0Smrg 658cdc920a0Smrgstatic void 659cdc920a0Smrgmicro_sle(union tgsi_exec_channel *dst, 660cdc920a0Smrg const union tgsi_exec_channel *src0, 661cdc920a0Smrg const union tgsi_exec_channel *src1) 662cdc920a0Smrg{ 663cdc920a0Smrg dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 664cdc920a0Smrg dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 665cdc920a0Smrg dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 666cdc920a0Smrg dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 667cdc920a0Smrg} 668cdc920a0Smrg 669cdc920a0Smrgstatic void 670cdc920a0Smrgmicro_slt(union tgsi_exec_channel *dst, 671cdc920a0Smrg const union tgsi_exec_channel *src0, 672cdc920a0Smrg const union tgsi_exec_channel *src1) 673cdc920a0Smrg{ 674cdc920a0Smrg dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 675cdc920a0Smrg dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 676cdc920a0Smrg dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 677cdc920a0Smrg dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 678cdc920a0Smrg} 679cdc920a0Smrg 680cdc920a0Smrgstatic void 681cdc920a0Smrgmicro_sne(union tgsi_exec_channel *dst, 682cdc920a0Smrg const union tgsi_exec_channel *src0, 683cdc920a0Smrg const union tgsi_exec_channel *src1) 684cdc920a0Smrg{ 685cdc920a0Smrg dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 686cdc920a0Smrg dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 687cdc920a0Smrg dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 688cdc920a0Smrg dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 689cdc920a0Smrg} 690cdc920a0Smrg 6913464ebd5Sriastradhstatic void 69201e04c3fSmrgmicro_trunc(union tgsi_exec_channel *dst, 69301e04c3fSmrg const union tgsi_exec_channel *src) 69401e04c3fSmrg{ 69501e04c3fSmrg dst->f[0] = truncf(src->f[0]); 69601e04c3fSmrg dst->f[1] = truncf(src->f[1]); 69701e04c3fSmrg dst->f[2] = truncf(src->f[2]); 69801e04c3fSmrg dst->f[3] = truncf(src->f[3]); 69901e04c3fSmrg} 70001e04c3fSmrg 70101e04c3fSmrgstatic void 70201e04c3fSmrgmicro_u2d(union tgsi_double_channel *dst, 70301e04c3fSmrg const union tgsi_exec_channel *src) 7043464ebd5Sriastradh{ 70501e04c3fSmrg dst->d[0] = (double)src->u[0]; 70601e04c3fSmrg dst->d[1] = (double)src->u[1]; 70701e04c3fSmrg dst->d[2] = (double)src->u[2]; 70801e04c3fSmrg dst->d[3] = (double)src->u[3]; 7093464ebd5Sriastradh} 7103464ebd5Sriastradh 7113464ebd5Sriastradhstatic void 71201e04c3fSmrgmicro_i64abs(union tgsi_double_channel *dst, 71301e04c3fSmrg const union tgsi_double_channel *src) 7143464ebd5Sriastradh{ 71501e04c3fSmrg dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 71601e04c3fSmrg dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 71701e04c3fSmrg dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 71801e04c3fSmrg dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 7193464ebd5Sriastradh} 7203464ebd5Sriastradh 721cdc920a0Smrgstatic void 72201e04c3fSmrgmicro_i64sgn(union tgsi_double_channel *dst, 72301e04c3fSmrg const union tgsi_double_channel *src) 72401e04c3fSmrg{ 72501e04c3fSmrg dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 72601e04c3fSmrg dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 72701e04c3fSmrg dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 72801e04c3fSmrg dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 72901e04c3fSmrg} 73001e04c3fSmrg 73101e04c3fSmrgstatic void 73201e04c3fSmrgmicro_i64neg(union tgsi_double_channel *dst, 73301e04c3fSmrg const union tgsi_double_channel *src) 73401e04c3fSmrg{ 73501e04c3fSmrg dst->i64[0] = -src->i64[0]; 73601e04c3fSmrg dst->i64[1] = -src->i64[1]; 73701e04c3fSmrg dst->i64[2] = -src->i64[2]; 73801e04c3fSmrg dst->i64[3] = -src->i64[3]; 73901e04c3fSmrg} 74001e04c3fSmrg 74101e04c3fSmrgstatic void 74201e04c3fSmrgmicro_u64seq(union tgsi_double_channel *dst, 74301e04c3fSmrg const union tgsi_double_channel *src) 74401e04c3fSmrg{ 74501e04c3fSmrg dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 74601e04c3fSmrg dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 74701e04c3fSmrg dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 74801e04c3fSmrg dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 74901e04c3fSmrg} 75001e04c3fSmrg 75101e04c3fSmrgstatic void 75201e04c3fSmrgmicro_u64sne(union tgsi_double_channel *dst, 75301e04c3fSmrg const union tgsi_double_channel *src) 75401e04c3fSmrg{ 75501e04c3fSmrg dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 75601e04c3fSmrg dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 75701e04c3fSmrg dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 75801e04c3fSmrg dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 75901e04c3fSmrg} 76001e04c3fSmrg 76101e04c3fSmrgstatic void 76201e04c3fSmrgmicro_i64slt(union tgsi_double_channel *dst, 76301e04c3fSmrg const union tgsi_double_channel *src) 76401e04c3fSmrg{ 76501e04c3fSmrg dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 76601e04c3fSmrg dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 76701e04c3fSmrg dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 76801e04c3fSmrg dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 76901e04c3fSmrg} 77001e04c3fSmrg 77101e04c3fSmrgstatic void 77201e04c3fSmrgmicro_u64slt(union tgsi_double_channel *dst, 77301e04c3fSmrg const union tgsi_double_channel *src) 77401e04c3fSmrg{ 77501e04c3fSmrg dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 77601e04c3fSmrg dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 77701e04c3fSmrg dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 77801e04c3fSmrg dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 77901e04c3fSmrg} 78001e04c3fSmrg 78101e04c3fSmrgstatic void 78201e04c3fSmrgmicro_i64sge(union tgsi_double_channel *dst, 78301e04c3fSmrg const union tgsi_double_channel *src) 78401e04c3fSmrg{ 78501e04c3fSmrg dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 78601e04c3fSmrg dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 78701e04c3fSmrg dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 78801e04c3fSmrg dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 78901e04c3fSmrg} 79001e04c3fSmrg 79101e04c3fSmrgstatic void 79201e04c3fSmrgmicro_u64sge(union tgsi_double_channel *dst, 79301e04c3fSmrg const union tgsi_double_channel *src) 79401e04c3fSmrg{ 79501e04c3fSmrg dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 79601e04c3fSmrg dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 79701e04c3fSmrg dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 79801e04c3fSmrg dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 79901e04c3fSmrg} 80001e04c3fSmrg 80101e04c3fSmrgstatic void 80201e04c3fSmrgmicro_u64max(union tgsi_double_channel *dst, 80301e04c3fSmrg const union tgsi_double_channel *src) 80401e04c3fSmrg{ 80501e04c3fSmrg dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 80601e04c3fSmrg dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 80701e04c3fSmrg dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 80801e04c3fSmrg dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 80901e04c3fSmrg} 81001e04c3fSmrg 81101e04c3fSmrgstatic void 81201e04c3fSmrgmicro_i64max(union tgsi_double_channel *dst, 81301e04c3fSmrg const union tgsi_double_channel *src) 81401e04c3fSmrg{ 81501e04c3fSmrg dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 81601e04c3fSmrg dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 81701e04c3fSmrg dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 81801e04c3fSmrg dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 81901e04c3fSmrg} 82001e04c3fSmrg 82101e04c3fSmrgstatic void 82201e04c3fSmrgmicro_u64min(union tgsi_double_channel *dst, 82301e04c3fSmrg const union tgsi_double_channel *src) 82401e04c3fSmrg{ 82501e04c3fSmrg dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 82601e04c3fSmrg dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 82701e04c3fSmrg dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 82801e04c3fSmrg dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 82901e04c3fSmrg} 83001e04c3fSmrg 83101e04c3fSmrgstatic void 83201e04c3fSmrgmicro_i64min(union tgsi_double_channel *dst, 83301e04c3fSmrg const union tgsi_double_channel *src) 83401e04c3fSmrg{ 83501e04c3fSmrg dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 83601e04c3fSmrg dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 83701e04c3fSmrg dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 83801e04c3fSmrg dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 83901e04c3fSmrg} 84001e04c3fSmrg 84101e04c3fSmrgstatic void 84201e04c3fSmrgmicro_u64add(union tgsi_double_channel *dst, 84301e04c3fSmrg const union tgsi_double_channel *src) 84401e04c3fSmrg{ 84501e04c3fSmrg dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 84601e04c3fSmrg dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 84701e04c3fSmrg dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 84801e04c3fSmrg dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 84901e04c3fSmrg} 85001e04c3fSmrg 85101e04c3fSmrgstatic void 85201e04c3fSmrgmicro_u64mul(union tgsi_double_channel *dst, 85301e04c3fSmrg const union tgsi_double_channel *src) 85401e04c3fSmrg{ 85501e04c3fSmrg dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 85601e04c3fSmrg dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 85701e04c3fSmrg dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 85801e04c3fSmrg dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 85901e04c3fSmrg} 86001e04c3fSmrg 86101e04c3fSmrgstatic void 86201e04c3fSmrgmicro_u64div(union tgsi_double_channel *dst, 86301e04c3fSmrg const union tgsi_double_channel *src) 86401e04c3fSmrg{ 86501e04c3fSmrg dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 86601e04c3fSmrg dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 86701e04c3fSmrg dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 86801e04c3fSmrg dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 86901e04c3fSmrg} 87001e04c3fSmrg 87101e04c3fSmrgstatic void 87201e04c3fSmrgmicro_i64div(union tgsi_double_channel *dst, 87301e04c3fSmrg const union tgsi_double_channel *src) 87401e04c3fSmrg{ 87501e04c3fSmrg dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 87601e04c3fSmrg dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 87701e04c3fSmrg dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 87801e04c3fSmrg dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 87901e04c3fSmrg} 88001e04c3fSmrg 88101e04c3fSmrgstatic void 88201e04c3fSmrgmicro_u64mod(union tgsi_double_channel *dst, 88301e04c3fSmrg const union tgsi_double_channel *src) 88401e04c3fSmrg{ 88501e04c3fSmrg dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 88601e04c3fSmrg dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 88701e04c3fSmrg dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 88801e04c3fSmrg dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 88901e04c3fSmrg} 89001e04c3fSmrg 89101e04c3fSmrgstatic void 89201e04c3fSmrgmicro_i64mod(union tgsi_double_channel *dst, 89301e04c3fSmrg const union tgsi_double_channel *src) 89401e04c3fSmrg{ 89501e04c3fSmrg dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 89601e04c3fSmrg dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 89701e04c3fSmrg dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 89801e04c3fSmrg dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 89901e04c3fSmrg} 90001e04c3fSmrg 90101e04c3fSmrgstatic void 90201e04c3fSmrgmicro_u64shl(union tgsi_double_channel *dst, 90301e04c3fSmrg const union tgsi_double_channel *src0, 90401e04c3fSmrg union tgsi_exec_channel *src1) 90501e04c3fSmrg{ 90601e04c3fSmrg unsigned masked_count; 90701e04c3fSmrg masked_count = src1->u[0] & 0x3f; 90801e04c3fSmrg dst->u64[0] = src0->u64[0] << masked_count; 90901e04c3fSmrg masked_count = src1->u[1] & 0x3f; 91001e04c3fSmrg dst->u64[1] = src0->u64[1] << masked_count; 91101e04c3fSmrg masked_count = src1->u[2] & 0x3f; 91201e04c3fSmrg dst->u64[2] = src0->u64[2] << masked_count; 91301e04c3fSmrg masked_count = src1->u[3] & 0x3f; 91401e04c3fSmrg dst->u64[3] = src0->u64[3] << masked_count; 91501e04c3fSmrg} 91601e04c3fSmrg 91701e04c3fSmrgstatic void 91801e04c3fSmrgmicro_i64shr(union tgsi_double_channel *dst, 91901e04c3fSmrg const union tgsi_double_channel *src0, 92001e04c3fSmrg union tgsi_exec_channel *src1) 921cdc920a0Smrg{ 92201e04c3fSmrg unsigned masked_count; 92301e04c3fSmrg masked_count = src1->u[0] & 0x3f; 92401e04c3fSmrg dst->i64[0] = src0->i64[0] >> masked_count; 92501e04c3fSmrg masked_count = src1->u[1] & 0x3f; 92601e04c3fSmrg dst->i64[1] = src0->i64[1] >> masked_count; 92701e04c3fSmrg masked_count = src1->u[2] & 0x3f; 92801e04c3fSmrg dst->i64[2] = src0->i64[2] >> masked_count; 92901e04c3fSmrg masked_count = src1->u[3] & 0x3f; 93001e04c3fSmrg dst->i64[3] = src0->i64[3] >> masked_count; 931cdc920a0Smrg} 932cdc920a0Smrg 93301e04c3fSmrgstatic void 93401e04c3fSmrgmicro_u64shr(union tgsi_double_channel *dst, 93501e04c3fSmrg const union tgsi_double_channel *src0, 93601e04c3fSmrg union tgsi_exec_channel *src1) 93701e04c3fSmrg{ 93801e04c3fSmrg unsigned masked_count; 93901e04c3fSmrg masked_count = src1->u[0] & 0x3f; 94001e04c3fSmrg dst->u64[0] = src0->u64[0] >> masked_count; 94101e04c3fSmrg masked_count = src1->u[1] & 0x3f; 94201e04c3fSmrg dst->u64[1] = src0->u64[1] >> masked_count; 94301e04c3fSmrg masked_count = src1->u[2] & 0x3f; 94401e04c3fSmrg dst->u64[2] = src0->u64[2] >> masked_count; 94501e04c3fSmrg masked_count = src1->u[3] & 0x3f; 94601e04c3fSmrg dst->u64[3] = src0->u64[3] >> masked_count; 94701e04c3fSmrg} 948cdc920a0Smrg 949cdc920a0Smrgenum tgsi_exec_datatype { 950cdc920a0Smrg TGSI_EXEC_DATA_FLOAT, 951cdc920a0Smrg TGSI_EXEC_DATA_INT, 95201e04c3fSmrg TGSI_EXEC_DATA_UINT, 95301e04c3fSmrg TGSI_EXEC_DATA_DOUBLE, 95401e04c3fSmrg TGSI_EXEC_DATA_INT64, 95501e04c3fSmrg TGSI_EXEC_DATA_UINT64, 956cdc920a0Smrg}; 957cdc920a0Smrg 9584a49301eSmrg/** The execution mask depends on the conditional mask and the loop mask */ 9594a49301eSmrg#define UPDATE_EXEC_MASK(MACH) \ 960cdc920a0Smrg MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 9614a49301eSmrg 9624a49301eSmrg 9634a49301eSmrgstatic const union tgsi_exec_channel ZeroVec = 9644a49301eSmrg { { 0.0, 0.0, 0.0, 0.0 } }; 9654a49301eSmrg 966cdc920a0Smrgstatic const union tgsi_exec_channel OneVec = { 967cdc920a0Smrg {1.0f, 1.0f, 1.0f, 1.0f} 968cdc920a0Smrg}; 969cdc920a0Smrg 9703464ebd5Sriastradhstatic const union tgsi_exec_channel P128Vec = { 9713464ebd5Sriastradh {128.0f, 128.0f, 128.0f, 128.0f} 9723464ebd5Sriastradh}; 9733464ebd5Sriastradh 9743464ebd5Sriastradhstatic const union tgsi_exec_channel M128Vec = { 9753464ebd5Sriastradh {-128.0f, -128.0f, -128.0f, -128.0f} 9763464ebd5Sriastradh}; 9773464ebd5Sriastradh 9784a49301eSmrg 979cdc920a0Smrg/** 980cdc920a0Smrg * Assert that none of the float values in 'chan' are infinite or NaN. 981cdc920a0Smrg * NaN and Inf may occur normally during program execution and should 982cdc920a0Smrg * not lead to crashes, etc. But when debugging, it's helpful to catch 983cdc920a0Smrg * them. 984cdc920a0Smrg */ 98501e04c3fSmrgstatic inline void 9864a49301eSmrgcheck_inf_or_nan(const union tgsi_exec_channel *chan) 9874a49301eSmrg{ 988cdc920a0Smrg assert(!util_is_inf_or_nan((chan)->f[0])); 989cdc920a0Smrg assert(!util_is_inf_or_nan((chan)->f[1])); 990cdc920a0Smrg assert(!util_is_inf_or_nan((chan)->f[2])); 991cdc920a0Smrg assert(!util_is_inf_or_nan((chan)->f[3])); 9924a49301eSmrg} 9934a49301eSmrg 9944a49301eSmrg 9954a49301eSmrg#ifdef DEBUG 9964a49301eSmrgstatic void 9974a49301eSmrgprint_chan(const char *msg, const union tgsi_exec_channel *chan) 9984a49301eSmrg{ 9994a49301eSmrg debug_printf("%s = {%f, %f, %f, %f}\n", 10004a49301eSmrg msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 10014a49301eSmrg} 10024a49301eSmrg#endif 10034a49301eSmrg 10044a49301eSmrg 10054a49301eSmrg#ifdef DEBUG 10064a49301eSmrgstatic void 10074a49301eSmrgprint_temp(const struct tgsi_exec_machine *mach, uint index) 10084a49301eSmrg{ 10094a49301eSmrg const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 10104a49301eSmrg int i; 10114a49301eSmrg debug_printf("Temp[%u] =\n", index); 10124a49301eSmrg for (i = 0; i < 4; i++) { 10134a49301eSmrg debug_printf(" %c: { %f, %f, %f, %f }\n", 10144a49301eSmrg "XYZW"[i], 10154a49301eSmrg tmp->xyzw[i].f[0], 10164a49301eSmrg tmp->xyzw[i].f[1], 10174a49301eSmrg tmp->xyzw[i].f[2], 10184a49301eSmrg tmp->xyzw[i].f[3]); 10194a49301eSmrg } 10204a49301eSmrg} 10214a49301eSmrg#endif 10224a49301eSmrg 10234a49301eSmrg 10243464ebd5Sriastradhvoid 10253464ebd5Sriastradhtgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 10263464ebd5Sriastradh unsigned num_bufs, 10273464ebd5Sriastradh const void **bufs, 10283464ebd5Sriastradh const unsigned *buf_sizes) 10293464ebd5Sriastradh{ 10303464ebd5Sriastradh unsigned i; 10313464ebd5Sriastradh 10323464ebd5Sriastradh for (i = 0; i < num_bufs; i++) { 10333464ebd5Sriastradh mach->Consts[i] = bufs[i]; 10343464ebd5Sriastradh mach->ConstsSize[i] = buf_sizes[i]; 10353464ebd5Sriastradh } 10363464ebd5Sriastradh} 10373464ebd5Sriastradh 10384a49301eSmrg/** 10394a49301eSmrg * Initialize machine state by expanding tokens to full instructions, 10404a49301eSmrg * allocating temporary storage, setting up constants, etc. 10414a49301eSmrg * After this, we can call tgsi_exec_machine_run() many times. 10424a49301eSmrg */ 10434a49301eSmrgvoid 10444a49301eSmrgtgsi_exec_machine_bind_shader( 10454a49301eSmrg struct tgsi_exec_machine *mach, 10464a49301eSmrg const struct tgsi_token *tokens, 104701e04c3fSmrg struct tgsi_sampler *sampler, 104801e04c3fSmrg struct tgsi_image *image, 104901e04c3fSmrg struct tgsi_buffer *buffer) 10504a49301eSmrg{ 10514a49301eSmrg uint k; 10524a49301eSmrg struct tgsi_parse_context parse; 10534a49301eSmrg struct tgsi_full_instruction *instructions; 10544a49301eSmrg struct tgsi_full_declaration *declarations; 10554a49301eSmrg uint maxInstructions = 10, numInstructions = 0; 10564a49301eSmrg uint maxDeclarations = 10, numDeclarations = 0; 10574a49301eSmrg 10584a49301eSmrg#if 0 10594a49301eSmrg tgsi_dump(tokens, 0); 10604a49301eSmrg#endif 10614a49301eSmrg 10624a49301eSmrg mach->Tokens = tokens; 1063af69d88dSmrg mach->Sampler = sampler; 106401e04c3fSmrg mach->Image = image; 106501e04c3fSmrg mach->Buffer = buffer; 10664a49301eSmrg 10673464ebd5Sriastradh if (!tokens) { 10683464ebd5Sriastradh /* unbind and free all */ 1069af69d88dSmrg FREE(mach->Declarations); 10703464ebd5Sriastradh mach->Declarations = NULL; 10713464ebd5Sriastradh mach->NumDeclarations = 0; 10723464ebd5Sriastradh 1073af69d88dSmrg FREE(mach->Instructions); 10743464ebd5Sriastradh mach->Instructions = NULL; 10753464ebd5Sriastradh mach->NumInstructions = 0; 10763464ebd5Sriastradh 10773464ebd5Sriastradh return; 10783464ebd5Sriastradh } 10793464ebd5Sriastradh 10804a49301eSmrg k = tgsi_parse_init (&parse, mach->Tokens); 10814a49301eSmrg if (k != TGSI_PARSE_OK) { 10824a49301eSmrg debug_printf( "Problem parsing!\n" ); 10834a49301eSmrg return; 10844a49301eSmrg } 10854a49301eSmrg 10864a49301eSmrg mach->ImmLimit = 0; 1087af69d88dSmrg mach->NumOutputs = 0; 10883464ebd5Sriastradh 108901e04c3fSmrg for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 109001e04c3fSmrg mach->SysSemanticToIndex[k] = -1; 109101e04c3fSmrg 109201e04c3fSmrg if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 10933464ebd5Sriastradh !mach->UsedGeometryShader) { 1094af69d88dSmrg struct tgsi_exec_vector *inputs; 1095af69d88dSmrg struct tgsi_exec_vector *outputs; 1096af69d88dSmrg 1097af69d88dSmrg inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1098af69d88dSmrg TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1099af69d88dSmrg 16); 11003464ebd5Sriastradh 11013464ebd5Sriastradh if (!inputs) 11023464ebd5Sriastradh return; 1103af69d88dSmrg 1104af69d88dSmrg outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1105af69d88dSmrg TGSI_MAX_TOTAL_VERTICES, 16); 1106af69d88dSmrg 11073464ebd5Sriastradh if (!outputs) { 11083464ebd5Sriastradh align_free(inputs); 11093464ebd5Sriastradh return; 11103464ebd5Sriastradh } 11113464ebd5Sriastradh 11123464ebd5Sriastradh align_free(mach->Inputs); 11133464ebd5Sriastradh align_free(mach->Outputs); 11143464ebd5Sriastradh 11153464ebd5Sriastradh mach->Inputs = inputs; 11163464ebd5Sriastradh mach->Outputs = outputs; 11173464ebd5Sriastradh mach->UsedGeometryShader = TRUE; 11183464ebd5Sriastradh } 11194a49301eSmrg 11204a49301eSmrg declarations = (struct tgsi_full_declaration *) 11214a49301eSmrg MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 11224a49301eSmrg 11234a49301eSmrg if (!declarations) { 11244a49301eSmrg return; 11254a49301eSmrg } 11264a49301eSmrg 11274a49301eSmrg instructions = (struct tgsi_full_instruction *) 11284a49301eSmrg MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 11294a49301eSmrg 11304a49301eSmrg if (!instructions) { 11314a49301eSmrg FREE( declarations ); 11324a49301eSmrg return; 11334a49301eSmrg } 11344a49301eSmrg 11354a49301eSmrg while( !tgsi_parse_end_of_tokens( &parse ) ) { 11364a49301eSmrg uint i; 11374a49301eSmrg 11384a49301eSmrg tgsi_parse_token( &parse ); 11394a49301eSmrg switch( parse.FullToken.Token.Type ) { 11404a49301eSmrg case TGSI_TOKEN_TYPE_DECLARATION: 11414a49301eSmrg /* save expanded declaration */ 11424a49301eSmrg if (numDeclarations == maxDeclarations) { 11434a49301eSmrg declarations = REALLOC(declarations, 11444a49301eSmrg maxDeclarations 11454a49301eSmrg * sizeof(struct tgsi_full_declaration), 11464a49301eSmrg (maxDeclarations + 10) 11474a49301eSmrg * sizeof(struct tgsi_full_declaration)); 11484a49301eSmrg maxDeclarations += 10; 11494a49301eSmrg } 11507ec681f3Smrg if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) 11517ec681f3Smrg mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1); 115201e04c3fSmrg else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 115301e04c3fSmrg const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 115401e04c3fSmrg mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 115501e04c3fSmrg } 115601e04c3fSmrg 11574a49301eSmrg memcpy(declarations + numDeclarations, 11584a49301eSmrg &parse.FullToken.FullDeclaration, 11594a49301eSmrg sizeof(declarations[0])); 11604a49301eSmrg numDeclarations++; 11614a49301eSmrg break; 11624a49301eSmrg 11634a49301eSmrg case TGSI_TOKEN_TYPE_IMMEDIATE: 11644a49301eSmrg { 11654a49301eSmrg uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 11664a49301eSmrg assert( size <= 4 ); 116701e04c3fSmrg if (mach->ImmLimit >= mach->ImmsReserved) { 116801e04c3fSmrg unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128; 116901e04c3fSmrg float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4)); 117001e04c3fSmrg if (imms) { 117101e04c3fSmrg mach->ImmsReserved = newReserved; 117201e04c3fSmrg mach->Imms = imms; 117301e04c3fSmrg } else { 117401e04c3fSmrg debug_printf("Unable to (re)allocate space for immidiate constants\n"); 117501e04c3fSmrg break; 117601e04c3fSmrg } 117701e04c3fSmrg } 11784a49301eSmrg 11794a49301eSmrg for( i = 0; i < size; i++ ) { 11804a49301eSmrg mach->Imms[mach->ImmLimit][i] = 11814a49301eSmrg parse.FullToken.FullImmediate.u[i].Float; 11824a49301eSmrg } 11834a49301eSmrg mach->ImmLimit += 1; 11844a49301eSmrg } 11854a49301eSmrg break; 11864a49301eSmrg 11874a49301eSmrg case TGSI_TOKEN_TYPE_INSTRUCTION: 11884a49301eSmrg 11894a49301eSmrg /* save expanded instruction */ 11904a49301eSmrg if (numInstructions == maxInstructions) { 11914a49301eSmrg instructions = REALLOC(instructions, 11924a49301eSmrg maxInstructions 11934a49301eSmrg * sizeof(struct tgsi_full_instruction), 11944a49301eSmrg (maxInstructions + 10) 11954a49301eSmrg * sizeof(struct tgsi_full_instruction)); 11964a49301eSmrg maxInstructions += 10; 11974a49301eSmrg } 11984a49301eSmrg 11994a49301eSmrg memcpy(instructions + numInstructions, 12004a49301eSmrg &parse.FullToken.FullInstruction, 12014a49301eSmrg sizeof(instructions[0])); 12024a49301eSmrg 12034a49301eSmrg numInstructions++; 12044a49301eSmrg break; 12054a49301eSmrg 1206cdc920a0Smrg case TGSI_TOKEN_TYPE_PROPERTY: 120701e04c3fSmrg if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1208af69d88dSmrg if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1209af69d88dSmrg mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1210af69d88dSmrg } 1211af69d88dSmrg } 1212cdc920a0Smrg break; 1213cdc920a0Smrg 12144a49301eSmrg default: 12154a49301eSmrg assert( 0 ); 12164a49301eSmrg } 12174a49301eSmrg } 12184a49301eSmrg tgsi_parse_free (&parse); 12194a49301eSmrg 1220af69d88dSmrg FREE(mach->Declarations); 12214a49301eSmrg mach->Declarations = declarations; 12224a49301eSmrg mach->NumDeclarations = numDeclarations; 12234a49301eSmrg 1224af69d88dSmrg FREE(mach->Instructions); 12254a49301eSmrg mach->Instructions = instructions; 12264a49301eSmrg mach->NumInstructions = numInstructions; 12274a49301eSmrg} 12284a49301eSmrg 12294a49301eSmrg 12304a49301eSmrgstruct tgsi_exec_machine * 123101e04c3fSmrgtgsi_exec_machine_create(enum pipe_shader_type shader_type) 12324a49301eSmrg{ 12334a49301eSmrg struct tgsi_exec_machine *mach; 12344a49301eSmrg 12354a49301eSmrg mach = align_malloc( sizeof *mach, 16 ); 12364a49301eSmrg if (!mach) 12374a49301eSmrg goto fail; 12384a49301eSmrg 12394a49301eSmrg memset(mach, 0, sizeof(*mach)); 12404a49301eSmrg 124101e04c3fSmrg mach->ShaderType = shader_type; 12424a49301eSmrg 124301e04c3fSmrg if (shader_type != PIPE_SHADER_COMPUTE) { 124401e04c3fSmrg mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 124501e04c3fSmrg mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 124601e04c3fSmrg if (!mach->Inputs || !mach->Outputs) 124701e04c3fSmrg goto fail; 124801e04c3fSmrg } 12493464ebd5Sriastradh 1250361fc4cbSmaya if (shader_type == PIPE_SHADER_FRAGMENT) { 1251361fc4cbSmaya mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16); 1252361fc4cbSmaya if (!mach->InputSampleOffsetApply) 1253361fc4cbSmaya goto fail; 1254361fc4cbSmaya } 1255361fc4cbSmaya 12564a49301eSmrg#ifdef DEBUG 12574a49301eSmrg /* silence warnings */ 12584a49301eSmrg (void) print_chan; 12594a49301eSmrg (void) print_temp; 12604a49301eSmrg#endif 12614a49301eSmrg 12624a49301eSmrg return mach; 12634a49301eSmrg 12644a49301eSmrgfail: 12653464ebd5Sriastradh if (mach) { 1266361fc4cbSmaya align_free(mach->InputSampleOffsetApply); 12673464ebd5Sriastradh align_free(mach->Inputs); 12683464ebd5Sriastradh align_free(mach->Outputs); 12693464ebd5Sriastradh align_free(mach); 12703464ebd5Sriastradh } 12714a49301eSmrg return NULL; 12724a49301eSmrg} 12734a49301eSmrg 12744a49301eSmrg 12754a49301eSmrgvoid 12764a49301eSmrgtgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 12774a49301eSmrg{ 12784a49301eSmrg if (mach) { 1279af69d88dSmrg FREE(mach->Instructions); 1280af69d88dSmrg FREE(mach->Declarations); 128101e04c3fSmrg FREE(mach->Imms); 12824a49301eSmrg 1283361fc4cbSmaya align_free(mach->InputSampleOffsetApply); 12843464ebd5Sriastradh align_free(mach->Inputs); 12853464ebd5Sriastradh align_free(mach->Outputs); 12863464ebd5Sriastradh 12873464ebd5Sriastradh align_free(mach); 12883464ebd5Sriastradh } 12894a49301eSmrg} 12904a49301eSmrg 12914a49301eSmrgstatic void 1292cdc920a0Smrgmicro_add(union tgsi_exec_channel *dst, 1293cdc920a0Smrg const union tgsi_exec_channel *src0, 1294cdc920a0Smrg const union tgsi_exec_channel *src1) 12954a49301eSmrg{ 12964a49301eSmrg dst->f[0] = src0->f[0] + src1->f[0]; 12974a49301eSmrg dst->f[1] = src0->f[1] + src1->f[1]; 12984a49301eSmrg dst->f[2] = src0->f[2] + src1->f[2]; 12994a49301eSmrg dst->f[3] = src0->f[3] + src1->f[3]; 13004a49301eSmrg} 13014a49301eSmrg 13024a49301eSmrgstatic void 1303cdc920a0Smrgmicro_div( 13044a49301eSmrg union tgsi_exec_channel *dst, 13054a49301eSmrg const union tgsi_exec_channel *src0, 13064a49301eSmrg const union tgsi_exec_channel *src1 ) 13074a49301eSmrg{ 1308cdc920a0Smrg if (src1->f[0] != 0) { 1309cdc920a0Smrg dst->f[0] = src0->f[0] / src1->f[0]; 1310cdc920a0Smrg } 1311cdc920a0Smrg if (src1->f[1] != 0) { 1312cdc920a0Smrg dst->f[1] = src0->f[1] / src1->f[1]; 1313cdc920a0Smrg } 1314cdc920a0Smrg if (src1->f[2] != 0) { 1315cdc920a0Smrg dst->f[2] = src0->f[2] / src1->f[2]; 1316cdc920a0Smrg } 1317cdc920a0Smrg if (src1->f[3] != 0) { 1318cdc920a0Smrg dst->f[3] = src0->f[3] / src1->f[3]; 1319cdc920a0Smrg } 13204a49301eSmrg} 13214a49301eSmrg 13224a49301eSmrgstatic void 1323cdc920a0Smrgmicro_lt( 13244a49301eSmrg union tgsi_exec_channel *dst, 1325cdc920a0Smrg const union tgsi_exec_channel *src0, 1326cdc920a0Smrg const union tgsi_exec_channel *src1, 1327cdc920a0Smrg const union tgsi_exec_channel *src2, 1328cdc920a0Smrg const union tgsi_exec_channel *src3 ) 13294a49301eSmrg{ 1330cdc920a0Smrg dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1331cdc920a0Smrg dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1332cdc920a0Smrg dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1333cdc920a0Smrg dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 13344a49301eSmrg} 13354a49301eSmrg 13364a49301eSmrgstatic void 1337cdc920a0Smrgmicro_max(union tgsi_exec_channel *dst, 1338cdc920a0Smrg const union tgsi_exec_channel *src0, 1339cdc920a0Smrg const union tgsi_exec_channel *src1) 13404a49301eSmrg{ 13417ec681f3Smrg dst->f[0] = fmaxf(src0->f[0], src1->f[0]); 13427ec681f3Smrg dst->f[1] = fmaxf(src0->f[1], src1->f[1]); 13437ec681f3Smrg dst->f[2] = fmaxf(src0->f[2], src1->f[2]); 13447ec681f3Smrg dst->f[3] = fmaxf(src0->f[3], src1->f[3]); 13454a49301eSmrg} 13464a49301eSmrg 13474a49301eSmrgstatic void 1348cdc920a0Smrgmicro_min(union tgsi_exec_channel *dst, 1349cdc920a0Smrg const union tgsi_exec_channel *src0, 1350cdc920a0Smrg const union tgsi_exec_channel *src1) 13514a49301eSmrg{ 13527ec681f3Smrg dst->f[0] = fminf(src0->f[0], src1->f[0]); 13537ec681f3Smrg dst->f[1] = fminf(src0->f[1], src1->f[1]); 13547ec681f3Smrg dst->f[2] = fminf(src0->f[2], src1->f[2]); 13557ec681f3Smrg dst->f[3] = fminf(src0->f[3], src1->f[3]); 13564a49301eSmrg} 13574a49301eSmrg 13584a49301eSmrgstatic void 1359cdc920a0Smrgmicro_mul(union tgsi_exec_channel *dst, 1360cdc920a0Smrg const union tgsi_exec_channel *src0, 1361cdc920a0Smrg const union tgsi_exec_channel *src1) 13624a49301eSmrg{ 1363cdc920a0Smrg dst->f[0] = src0->f[0] * src1->f[0]; 1364cdc920a0Smrg dst->f[1] = src0->f[1] * src1->f[1]; 1365cdc920a0Smrg dst->f[2] = src0->f[2] * src1->f[2]; 1366cdc920a0Smrg dst->f[3] = src0->f[3] * src1->f[3]; 13674a49301eSmrg} 13684a49301eSmrg 13694a49301eSmrgstatic void 1370cdc920a0Smrgmicro_neg( 13714a49301eSmrg union tgsi_exec_channel *dst, 1372cdc920a0Smrg const union tgsi_exec_channel *src ) 13734a49301eSmrg{ 1374cdc920a0Smrg dst->f[0] = -src->f[0]; 1375cdc920a0Smrg dst->f[1] = -src->f[1]; 1376cdc920a0Smrg dst->f[2] = -src->f[2]; 1377cdc920a0Smrg dst->f[3] = -src->f[3]; 13784a49301eSmrg} 13794a49301eSmrg 13804a49301eSmrgstatic void 1381cdc920a0Smrgmicro_pow( 13824a49301eSmrg union tgsi_exec_channel *dst, 1383cdc920a0Smrg const union tgsi_exec_channel *src0, 1384cdc920a0Smrg const union tgsi_exec_channel *src1 ) 13854a49301eSmrg{ 1386cdc920a0Smrg dst->f[0] = powf( src0->f[0], src1->f[0] ); 1387cdc920a0Smrg dst->f[1] = powf( src0->f[1], src1->f[1] ); 1388cdc920a0Smrg dst->f[2] = powf( src0->f[2], src1->f[2] ); 1389cdc920a0Smrg dst->f[3] = powf( src0->f[3], src1->f[3] ); 13904a49301eSmrg} 13914a49301eSmrg 139201e04c3fSmrgstatic void 139301e04c3fSmrgmicro_ldexp(union tgsi_exec_channel *dst, 139401e04c3fSmrg const union tgsi_exec_channel *src0, 139501e04c3fSmrg const union tgsi_exec_channel *src1) 139601e04c3fSmrg{ 139701e04c3fSmrg dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 139801e04c3fSmrg dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 139901e04c3fSmrg dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 140001e04c3fSmrg dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 140101e04c3fSmrg} 140201e04c3fSmrg 14034a49301eSmrgstatic void 1404cdc920a0Smrgmicro_sub(union tgsi_exec_channel *dst, 1405cdc920a0Smrg const union tgsi_exec_channel *src0, 1406cdc920a0Smrg const union tgsi_exec_channel *src1) 14074a49301eSmrg{ 1408cdc920a0Smrg dst->f[0] = src0->f[0] - src1->f[0]; 1409cdc920a0Smrg dst->f[1] = src0->f[1] - src1->f[1]; 1410cdc920a0Smrg dst->f[2] = src0->f[2] - src1->f[2]; 1411cdc920a0Smrg dst->f[3] = src0->f[3] - src1->f[3]; 14124a49301eSmrg} 14134a49301eSmrg 14144a49301eSmrgstatic void 1415cdc920a0Smrgfetch_src_file_channel(const struct tgsi_exec_machine *mach, 1416cdc920a0Smrg const uint file, 1417cdc920a0Smrg const uint swizzle, 1418cdc920a0Smrg const union tgsi_exec_channel *index, 1419cdc920a0Smrg const union tgsi_exec_channel *index2D, 1420cdc920a0Smrg union tgsi_exec_channel *chan) 14214a49301eSmrg{ 14224a49301eSmrg uint i; 14234a49301eSmrg 14243464ebd5Sriastradh assert(swizzle < 4); 14253464ebd5Sriastradh 1426cdc920a0Smrg switch (file) { 1427cdc920a0Smrg case TGSI_FILE_CONSTANT: 1428af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 14297ec681f3Smrg /* NOTE: copying the const value as a uint instead of float */ 14307ec681f3Smrg const uint constbuf = index2D->i[i]; 14317ec681f3Smrg const unsigned pos = index->i[i] * 4 + swizzle; 14327ec681f3Smrg /* const buffer bounds check */ 14337ec681f3Smrg if (pos >= mach->ConstsSize[constbuf] / 4) { 14347ec681f3Smrg if (0) { 14357ec681f3Smrg /* Debug: print warning */ 14367ec681f3Smrg static int count = 0; 14377ec681f3Smrg if (count++ < 100) 14387ec681f3Smrg debug_printf("TGSI Exec: const buffer index %d" 14397ec681f3Smrg " out of bounds\n", pos); 14407ec681f3Smrg } 1441cdc920a0Smrg chan->u[i] = 0; 1442cdc920a0Smrg } else { 14433464ebd5Sriastradh const uint *buf = (const uint *)mach->Consts[constbuf]; 14447ec681f3Smrg chan->u[i] = buf[pos]; 1445cdc920a0Smrg } 14464a49301eSmrg } 1447cdc920a0Smrg break; 14484a49301eSmrg 1449cdc920a0Smrg case TGSI_FILE_INPUT: 1450af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 14513464ebd5Sriastradh /* 145201e04c3fSmrg if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 14533464ebd5Sriastradh debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 14543464ebd5Sriastradh index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 14553464ebd5Sriastradh index2D->i[i], index->i[i]); 14563464ebd5Sriastradh }*/ 14573464ebd5Sriastradh int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 14583464ebd5Sriastradh assert(pos >= 0); 14593464ebd5Sriastradh assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 14603464ebd5Sriastradh chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 14613464ebd5Sriastradh } 14623464ebd5Sriastradh break; 14633464ebd5Sriastradh 1464cdc920a0Smrg case TGSI_FILE_SYSTEM_VALUE: 1465af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 146601e04c3fSmrg chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1467cdc920a0Smrg } 1468cdc920a0Smrg break; 14694a49301eSmrg 1470cdc920a0Smrg case TGSI_FILE_TEMPORARY: 1471af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1472cdc920a0Smrg assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1473cdc920a0Smrg assert(index2D->i[i] == 0); 14744a49301eSmrg 1475cdc920a0Smrg chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1476cdc920a0Smrg } 1477cdc920a0Smrg break; 14784a49301eSmrg 1479cdc920a0Smrg case TGSI_FILE_IMMEDIATE: 1480af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1481cdc920a0Smrg assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1482cdc920a0Smrg assert(index2D->i[i] == 0); 14834a49301eSmrg 1484cdc920a0Smrg chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1485cdc920a0Smrg } 1486cdc920a0Smrg break; 14874a49301eSmrg 1488cdc920a0Smrg case TGSI_FILE_ADDRESS: 1489af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 14907ec681f3Smrg assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs)); 1491cdc920a0Smrg assert(index2D->i[i] == 0); 14924a49301eSmrg 1493cdc920a0Smrg chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1494cdc920a0Smrg } 1495cdc920a0Smrg break; 14964a49301eSmrg 1497cdc920a0Smrg case TGSI_FILE_OUTPUT: 1498cdc920a0Smrg /* vertex/fragment output vars can be read too */ 1499af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1500cdc920a0Smrg assert(index->i[i] >= 0); 1501cdc920a0Smrg assert(index2D->i[i] == 0); 15024a49301eSmrg 1503cdc920a0Smrg chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1504cdc920a0Smrg } 1505cdc920a0Smrg break; 15064a49301eSmrg 1507cdc920a0Smrg default: 1508cdc920a0Smrg assert(0); 1509af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1510cdc920a0Smrg chan->u[i] = 0; 1511cdc920a0Smrg } 1512cdc920a0Smrg } 15134a49301eSmrg} 15144a49301eSmrg 15154a49301eSmrgstatic void 1516361fc4cbSmayaget_index_registers(const struct tgsi_exec_machine *mach, 1517361fc4cbSmaya const struct tgsi_full_src_register *reg, 1518361fc4cbSmaya union tgsi_exec_channel *index, 1519361fc4cbSmaya union tgsi_exec_channel *index2D) 15204a49301eSmrg{ 1521cdc920a0Smrg uint swizzle; 15224a49301eSmrg 1523cdc920a0Smrg /* We start with a direct index into a register file. 1524cdc920a0Smrg * 1525cdc920a0Smrg * file[1], 1526cdc920a0Smrg * where: 1527cdc920a0Smrg * file = Register.File 1528cdc920a0Smrg * [1] = Register.Index 1529cdc920a0Smrg */ 1530361fc4cbSmaya index->i[0] = 1531361fc4cbSmaya index->i[1] = 1532361fc4cbSmaya index->i[2] = 1533361fc4cbSmaya index->i[3] = reg->Register.Index; 15344a49301eSmrg 1535cdc920a0Smrg /* There is an extra source register that indirectly subscripts 1536cdc920a0Smrg * a register file. The direct index now becomes an offset 1537cdc920a0Smrg * that is being added to the indirect register. 1538cdc920a0Smrg * 1539cdc920a0Smrg * file[ind[2].x+1], 1540cdc920a0Smrg * where: 1541cdc920a0Smrg * ind = Indirect.File 1542cdc920a0Smrg * [2] = Indirect.Index 1543cdc920a0Smrg * .x = Indirect.SwizzleX 1544cdc920a0Smrg */ 1545cdc920a0Smrg if (reg->Register.Indirect) { 1546cdc920a0Smrg union tgsi_exec_channel index2; 1547cdc920a0Smrg union tgsi_exec_channel indir_index; 1548cdc920a0Smrg const uint execmask = mach->ExecMask; 1549cdc920a0Smrg uint i; 15504a49301eSmrg 1551cdc920a0Smrg /* which address register (always zero now) */ 1552cdc920a0Smrg index2.i[0] = 1553cdc920a0Smrg index2.i[1] = 1554cdc920a0Smrg index2.i[2] = 1555cdc920a0Smrg index2.i[3] = reg->Indirect.Index; 1556cdc920a0Smrg /* get current value of address register[swizzle] */ 1557af69d88dSmrg swizzle = reg->Indirect.Swizzle; 1558cdc920a0Smrg fetch_src_file_channel(mach, 1559cdc920a0Smrg reg->Indirect.File, 1560cdc920a0Smrg swizzle, 1561cdc920a0Smrg &index2, 1562cdc920a0Smrg &ZeroVec, 1563cdc920a0Smrg &indir_index); 1564cdc920a0Smrg 1565cdc920a0Smrg /* add value of address register to the offset */ 1566361fc4cbSmaya index->i[0] += indir_index.i[0]; 1567361fc4cbSmaya index->i[1] += indir_index.i[1]; 1568361fc4cbSmaya index->i[2] += indir_index.i[2]; 1569361fc4cbSmaya index->i[3] += indir_index.i[3]; 1570cdc920a0Smrg 1571cdc920a0Smrg /* for disabled execution channels, zero-out the index to 1572cdc920a0Smrg * avoid using a potential garbage value. 1573cdc920a0Smrg */ 1574af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1575cdc920a0Smrg if ((execmask & (1 << i)) == 0) 1576361fc4cbSmaya index->i[i] = 0; 1577cdc920a0Smrg } 1578cdc920a0Smrg } 1579cdc920a0Smrg 1580cdc920a0Smrg /* There is an extra source register that is a second 1581cdc920a0Smrg * subscript to a register file. Effectively it means that 1582cdc920a0Smrg * the register file is actually a 2D array of registers. 1583cdc920a0Smrg * 1584cdc920a0Smrg * file[3][1], 1585cdc920a0Smrg * where: 1586cdc920a0Smrg * [3] = Dimension.Index 1587cdc920a0Smrg */ 1588cdc920a0Smrg if (reg->Register.Dimension) { 1589361fc4cbSmaya index2D->i[0] = 1590361fc4cbSmaya index2D->i[1] = 1591361fc4cbSmaya index2D->i[2] = 1592361fc4cbSmaya index2D->i[3] = reg->Dimension.Index; 1593cdc920a0Smrg 1594cdc920a0Smrg /* Again, the second subscript index can be addressed indirectly 1595cdc920a0Smrg * identically to the first one. 1596cdc920a0Smrg * Nothing stops us from indirectly addressing the indirect register, 1597cdc920a0Smrg * but there is no need for that, so we won't exercise it. 1598cdc920a0Smrg * 1599cdc920a0Smrg * file[ind[4].y+3][1], 1600cdc920a0Smrg * where: 1601cdc920a0Smrg * ind = DimIndirect.File 1602cdc920a0Smrg * [4] = DimIndirect.Index 1603cdc920a0Smrg * .y = DimIndirect.SwizzleX 1604cdc920a0Smrg */ 1605cdc920a0Smrg if (reg->Dimension.Indirect) { 1606cdc920a0Smrg union tgsi_exec_channel index2; 1607cdc920a0Smrg union tgsi_exec_channel indir_index; 1608cdc920a0Smrg const uint execmask = mach->ExecMask; 1609cdc920a0Smrg uint i; 1610cdc920a0Smrg 1611cdc920a0Smrg index2.i[0] = 1612cdc920a0Smrg index2.i[1] = 1613cdc920a0Smrg index2.i[2] = 1614cdc920a0Smrg index2.i[3] = reg->DimIndirect.Index; 1615cdc920a0Smrg 1616af69d88dSmrg swizzle = reg->DimIndirect.Swizzle; 1617cdc920a0Smrg fetch_src_file_channel(mach, 1618cdc920a0Smrg reg->DimIndirect.File, 1619cdc920a0Smrg swizzle, 1620cdc920a0Smrg &index2, 1621cdc920a0Smrg &ZeroVec, 1622cdc920a0Smrg &indir_index); 1623cdc920a0Smrg 1624361fc4cbSmaya index2D->i[0] += indir_index.i[0]; 1625361fc4cbSmaya index2D->i[1] += indir_index.i[1]; 1626361fc4cbSmaya index2D->i[2] += indir_index.i[2]; 1627361fc4cbSmaya index2D->i[3] += indir_index.i[3]; 1628cdc920a0Smrg 1629cdc920a0Smrg /* for disabled execution channels, zero-out the index to 1630cdc920a0Smrg * avoid using a potential garbage value. 1631cdc920a0Smrg */ 1632af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1633cdc920a0Smrg if ((execmask & (1 << i)) == 0) { 1634361fc4cbSmaya index2D->i[i] = 0; 1635cdc920a0Smrg } 1636cdc920a0Smrg } 1637cdc920a0Smrg } 1638cdc920a0Smrg 1639cdc920a0Smrg /* If by any chance there was a need for a 3D array of register 1640cdc920a0Smrg * files, we would have to check whether Dimension is followed 1641cdc920a0Smrg * by a dimension register and continue the saga. 1642cdc920a0Smrg */ 1643cdc920a0Smrg } else { 1644361fc4cbSmaya index2D->i[0] = 1645361fc4cbSmaya index2D->i[1] = 1646361fc4cbSmaya index2D->i[2] = 1647361fc4cbSmaya index2D->i[3] = 0; 1648cdc920a0Smrg } 1649361fc4cbSmaya} 1650361fc4cbSmaya 1651361fc4cbSmaya 1652361fc4cbSmayastatic void 1653361fc4cbSmayafetch_source_d(const struct tgsi_exec_machine *mach, 1654361fc4cbSmaya union tgsi_exec_channel *chan, 1655361fc4cbSmaya const struct tgsi_full_src_register *reg, 1656361fc4cbSmaya const uint chan_index) 1657361fc4cbSmaya{ 1658361fc4cbSmaya union tgsi_exec_channel index; 1659361fc4cbSmaya union tgsi_exec_channel index2D; 1660361fc4cbSmaya uint swizzle; 1661361fc4cbSmaya 1662361fc4cbSmaya get_index_registers(mach, reg, &index, &index2D); 1663361fc4cbSmaya 1664cdc920a0Smrg 1665cdc920a0Smrg swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1666cdc920a0Smrg fetch_src_file_channel(mach, 1667cdc920a0Smrg reg->Register.File, 1668cdc920a0Smrg swizzle, 1669cdc920a0Smrg &index, 1670cdc920a0Smrg &index2D, 1671cdc920a0Smrg chan); 167201e04c3fSmrg} 167301e04c3fSmrg 167401e04c3fSmrgstatic void 167501e04c3fSmrgfetch_source(const struct tgsi_exec_machine *mach, 167601e04c3fSmrg union tgsi_exec_channel *chan, 167701e04c3fSmrg const struct tgsi_full_src_register *reg, 167801e04c3fSmrg const uint chan_index, 167901e04c3fSmrg enum tgsi_exec_datatype src_datatype) 168001e04c3fSmrg{ 168101e04c3fSmrg fetch_source_d(mach, chan, reg, chan_index); 1682cdc920a0Smrg 1683cdc920a0Smrg if (reg->Register.Absolute) { 16847ec681f3Smrg assert(src_datatype == TGSI_EXEC_DATA_FLOAT); 16857ec681f3Smrg micro_abs(chan, chan); 1686cdc920a0Smrg } 1687cdc920a0Smrg 1688cdc920a0Smrg if (reg->Register.Negate) { 1689cdc920a0Smrg if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1690cdc920a0Smrg micro_neg(chan, chan); 1691cdc920a0Smrg } else { 1692cdc920a0Smrg micro_ineg(chan, chan); 1693cdc920a0Smrg } 1694cdc920a0Smrg } 16954a49301eSmrg} 16964a49301eSmrg 169701e04c3fSmrgstatic union tgsi_exec_channel * 169801e04c3fSmrgstore_dest_dstret(struct tgsi_exec_machine *mach, 169901e04c3fSmrg const union tgsi_exec_channel *chan, 170001e04c3fSmrg const struct tgsi_full_dst_register *reg, 17017ec681f3Smrg uint chan_index) 17024a49301eSmrg{ 170301e04c3fSmrg static union tgsi_exec_channel null; 1704cdc920a0Smrg union tgsi_exec_channel *dst; 1705cdc920a0Smrg int offset = 0; /* indirection offset */ 1706cdc920a0Smrg int index; 1707cdc920a0Smrg 1708cdc920a0Smrg 1709cdc920a0Smrg /* There is an extra source register that indirectly subscripts 1710cdc920a0Smrg * a register file. The direct index now becomes an offset 1711cdc920a0Smrg * that is being added to the indirect register. 1712cdc920a0Smrg * 1713cdc920a0Smrg * file[ind[2].x+1], 1714cdc920a0Smrg * where: 1715cdc920a0Smrg * ind = Indirect.File 1716cdc920a0Smrg * [2] = Indirect.Index 1717cdc920a0Smrg * .x = Indirect.SwizzleX 1718cdc920a0Smrg */ 1719cdc920a0Smrg if (reg->Register.Indirect) { 1720cdc920a0Smrg union tgsi_exec_channel index; 1721cdc920a0Smrg union tgsi_exec_channel indir_index; 1722cdc920a0Smrg uint swizzle; 1723cdc920a0Smrg 1724cdc920a0Smrg /* which address register (always zero for now) */ 1725cdc920a0Smrg index.i[0] = 1726cdc920a0Smrg index.i[1] = 1727cdc920a0Smrg index.i[2] = 1728cdc920a0Smrg index.i[3] = reg->Indirect.Index; 1729cdc920a0Smrg 1730cdc920a0Smrg /* get current value of address register[swizzle] */ 1731af69d88dSmrg swizzle = reg->Indirect.Swizzle; 1732cdc920a0Smrg 1733cdc920a0Smrg /* fetch values from the address/indirection register */ 1734cdc920a0Smrg fetch_src_file_channel(mach, 1735cdc920a0Smrg reg->Indirect.File, 1736cdc920a0Smrg swizzle, 1737cdc920a0Smrg &index, 1738cdc920a0Smrg &ZeroVec, 1739cdc920a0Smrg &indir_index); 1740cdc920a0Smrg 1741cdc920a0Smrg /* save indirection offset */ 1742cdc920a0Smrg offset = indir_index.i[0]; 1743cdc920a0Smrg } 1744cdc920a0Smrg 1745cdc920a0Smrg switch (reg->Register.File) { 1746cdc920a0Smrg case TGSI_FILE_NULL: 1747cdc920a0Smrg dst = &null; 1748cdc920a0Smrg break; 17494a49301eSmrg 1750cdc920a0Smrg case TGSI_FILE_OUTPUT: 17517ec681f3Smrg index = mach->OutputVertexOffset + reg->Register.Index; 1752cdc920a0Smrg dst = &mach->Outputs[offset + index].xyzw[chan_index]; 17534a49301eSmrg#if 0 1754af69d88dSmrg debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1755af69d88dSmrg mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1756af69d88dSmrg reg->Register.Index); 175701e04c3fSmrg if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1758af69d88dSmrg debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1759af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 1760cdc920a0Smrg if (execmask & (1 << i)) 1761af69d88dSmrg debug_printf("%f, ", chan->f[i]); 1762af69d88dSmrg debug_printf(")\n"); 1763cdc920a0Smrg } 17644a49301eSmrg#endif 1765cdc920a0Smrg break; 17664a49301eSmrg 1767cdc920a0Smrg case TGSI_FILE_TEMPORARY: 1768cdc920a0Smrg index = reg->Register.Index; 1769cdc920a0Smrg assert( index < TGSI_EXEC_NUM_TEMPS ); 1770cdc920a0Smrg dst = &mach->Temps[offset + index].xyzw[chan_index]; 1771cdc920a0Smrg break; 1772cdc920a0Smrg 17733464ebd5Sriastradh case TGSI_FILE_ADDRESS: 17743464ebd5Sriastradh index = reg->Register.Index; 17757ec681f3Smrg assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs)); 17763464ebd5Sriastradh dst = &mach->Addrs[index].xyzw[chan_index]; 1777cdc920a0Smrg break; 1778cdc920a0Smrg 1779cdc920a0Smrg default: 17807ec681f3Smrg unreachable("Bad destination file"); 1781cdc920a0Smrg } 1782cdc920a0Smrg 178301e04c3fSmrg return dst; 178401e04c3fSmrg} 1785cdc920a0Smrg 178601e04c3fSmrgstatic void 178701e04c3fSmrgstore_dest_double(struct tgsi_exec_machine *mach, 178801e04c3fSmrg const union tgsi_exec_channel *chan, 178901e04c3fSmrg const struct tgsi_full_dst_register *reg, 17907ec681f3Smrg uint chan_index) 179101e04c3fSmrg{ 179201e04c3fSmrg union tgsi_exec_channel *dst; 179301e04c3fSmrg const uint execmask = mach->ExecMask; 179401e04c3fSmrg int i; 1795cdc920a0Smrg 17967ec681f3Smrg dst = store_dest_dstret(mach, chan, reg, chan_index); 179701e04c3fSmrg if (!dst) 179801e04c3fSmrg return; 1799cdc920a0Smrg 180001e04c3fSmrg /* doubles path */ 180101e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 180201e04c3fSmrg if (execmask & (1 << i)) 180301e04c3fSmrg dst->i[i] = chan->i[i]; 180401e04c3fSmrg} 1805cdc920a0Smrg 180601e04c3fSmrgstatic void 180701e04c3fSmrgstore_dest(struct tgsi_exec_machine *mach, 180801e04c3fSmrg const union tgsi_exec_channel *chan, 180901e04c3fSmrg const struct tgsi_full_dst_register *reg, 181001e04c3fSmrg const struct tgsi_full_instruction *inst, 18117ec681f3Smrg uint chan_index) 181201e04c3fSmrg{ 181301e04c3fSmrg union tgsi_exec_channel *dst; 181401e04c3fSmrg const uint execmask = mach->ExecMask; 181501e04c3fSmrg int i; 181601e04c3fSmrg 18177ec681f3Smrg dst = store_dest_dstret(mach, chan, reg, chan_index); 181801e04c3fSmrg if (!dst) 181901e04c3fSmrg return; 1820cdc920a0Smrg 182101e04c3fSmrg if (!inst->Instruction.Saturate) { 1822af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 1823cdc920a0Smrg if (execmask & (1 << i)) 1824cdc920a0Smrg dst->i[i] = chan->i[i]; 182501e04c3fSmrg } 182601e04c3fSmrg else { 1827af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 18287ec681f3Smrg if (execmask & (1 << i)) 18297ec681f3Smrg dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f); 1830cdc920a0Smrg } 18314a49301eSmrg} 18324a49301eSmrg 1833cdc920a0Smrg#define FETCH(VAL,INDEX,CHAN)\ 1834cdc920a0Smrg fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 18354a49301eSmrg 1836af69d88dSmrg#define IFETCH(VAL,INDEX,CHAN)\ 1837af69d88dSmrg fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1838af69d88dSmrg 1839cdc920a0Smrg 1840cdc920a0Smrg/** 1841cdc920a0Smrg * Execute ARB-style KIL which is predicated by a src register. 1842cdc920a0Smrg * Kill fragment if any of the four values is less than zero. 1843cdc920a0Smrg */ 18444a49301eSmrgstatic void 1845af69d88dSmrgexec_kill_if(struct tgsi_exec_machine *mach, 1846af69d88dSmrg const struct tgsi_full_instruction *inst) 18474a49301eSmrg{ 1848cdc920a0Smrg uint uniquemask; 1849cdc920a0Smrg uint chan_index; 1850cdc920a0Smrg uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1851cdc920a0Smrg union tgsi_exec_channel r[1]; 1852cdc920a0Smrg 1853cdc920a0Smrg /* This mask stores component bits that were already tested. */ 1854cdc920a0Smrg uniquemask = 0; 1855cdc920a0Smrg 1856cdc920a0Smrg for (chan_index = 0; chan_index < 4; chan_index++) 1857cdc920a0Smrg { 1858cdc920a0Smrg uint swizzle; 1859cdc920a0Smrg uint i; 1860cdc920a0Smrg 1861cdc920a0Smrg /* unswizzle channel */ 1862cdc920a0Smrg swizzle = tgsi_util_get_full_src_register_swizzle ( 1863cdc920a0Smrg &inst->Src[0], 1864cdc920a0Smrg chan_index); 1865cdc920a0Smrg 1866cdc920a0Smrg /* check if the component has not been already tested */ 1867cdc920a0Smrg if (uniquemask & (1 << swizzle)) 1868cdc920a0Smrg continue; 1869cdc920a0Smrg uniquemask |= 1 << swizzle; 18704a49301eSmrg 1871cdc920a0Smrg FETCH(&r[0], 0, chan_index); 1872cdc920a0Smrg for (i = 0; i < 4; i++) 1873cdc920a0Smrg if (r[0].f[i] < 0.0f) 1874cdc920a0Smrg kilmask |= 1 << i; 1875cdc920a0Smrg } 18764a49301eSmrg 1877af69d88dSmrg /* restrict to fragments currently executing */ 1878af69d88dSmrg kilmask &= mach->ExecMask; 1879af69d88dSmrg 18807ec681f3Smrg mach->KillMask |= kilmask; 18814a49301eSmrg} 18824a49301eSmrg 1883cdc920a0Smrg/** 1884af69d88dSmrg * Unconditional fragment kill/discard. 1885cdc920a0Smrg */ 18864a49301eSmrgstatic void 188701e04c3fSmrgexec_kill(struct tgsi_exec_machine *mach) 18884a49301eSmrg{ 18897ec681f3Smrg /* kill fragment for all fragments currently executing. 18907ec681f3Smrg * bit 0 = pixel 0, bit 1 = pixel 1, etc. 18917ec681f3Smrg */ 18927ec681f3Smrg mach->KillMask |= mach->ExecMask; 18934a49301eSmrg} 18944a49301eSmrg 18954a49301eSmrgstatic void 1896361fc4cbSmayaemit_vertex(struct tgsi_exec_machine *mach, 1897361fc4cbSmaya const struct tgsi_full_instruction *inst) 18984a49301eSmrg{ 1899361fc4cbSmaya union tgsi_exec_channel r[1]; 1900361fc4cbSmaya unsigned stream_id; 19017ec681f3Smrg unsigned prim_count; 1902cdc920a0Smrg /* FIXME: check for exec mask correctly 1903cdc920a0Smrg unsigned i; 1904af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1905cdc920a0Smrg if ((mach->ExecMask & (1 << i))) 1906cdc920a0Smrg */ 1907361fc4cbSmaya IFETCH(&r[0], 0, TGSI_CHAN_X); 1908361fc4cbSmaya stream_id = r[0].u[0]; 19097ec681f3Smrg prim_count = mach->OutputPrimCount[stream_id]; 1910cdc920a0Smrg if (mach->ExecMask) { 19117ec681f3Smrg if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices) 1912af69d88dSmrg return; 1913af69d88dSmrg 19147ec681f3Smrg if (mach->Primitives[stream_id][prim_count] == 0) 19157ec681f3Smrg mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset; 19167ec681f3Smrg mach->OutputVertexOffset += mach->NumOutputs; 19177ec681f3Smrg mach->Primitives[stream_id][prim_count]++; 1918cdc920a0Smrg } 19194a49301eSmrg} 19204a49301eSmrg 19214a49301eSmrgstatic void 1922361fc4cbSmayaemit_primitive(struct tgsi_exec_machine *mach, 1923361fc4cbSmaya const struct tgsi_full_instruction *inst) 19244a49301eSmrg{ 1925361fc4cbSmaya unsigned *prim_count; 1926361fc4cbSmaya union tgsi_exec_channel r[1]; 1927361fc4cbSmaya unsigned stream_id = 0; 1928cdc920a0Smrg /* FIXME: check for exec mask correctly 1929cdc920a0Smrg unsigned i; 1930af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1931cdc920a0Smrg if ((mach->ExecMask & (1 << i))) 1932cdc920a0Smrg */ 1933361fc4cbSmaya if (inst) { 1934361fc4cbSmaya IFETCH(&r[0], 0, TGSI_CHAN_X); 1935361fc4cbSmaya stream_id = r[0].u[0]; 1936361fc4cbSmaya } 19377ec681f3Smrg prim_count = &mach->OutputPrimCount[stream_id]; 1938cdc920a0Smrg if (mach->ExecMask) { 1939cdc920a0Smrg ++(*prim_count); 19407ec681f3Smrg debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES); 1941361fc4cbSmaya mach->Primitives[stream_id][*prim_count] = 0; 1942cdc920a0Smrg } 19434a49301eSmrg} 19444a49301eSmrg 19453464ebd5Sriastradhstatic void 19463464ebd5Sriastradhconditional_emit_primitive(struct tgsi_exec_machine *mach) 19473464ebd5Sriastradh{ 194801e04c3fSmrg if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 19497ec681f3Smrg int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]]; 19503464ebd5Sriastradh if (emitted_verts) { 1951361fc4cbSmaya emit_primitive(mach, NULL); 19523464ebd5Sriastradh } 19533464ebd5Sriastradh } 19543464ebd5Sriastradh} 19553464ebd5Sriastradh 19563464ebd5Sriastradh 1957cdc920a0Smrg/* 1958cdc920a0Smrg * Fetch four texture samples using STR texture coordinates. 1959cdc920a0Smrg */ 19604a49301eSmrgstatic void 1961cdc920a0Smrgfetch_texel( struct tgsi_sampler *sampler, 1962af69d88dSmrg const unsigned sview_idx, 1963af69d88dSmrg const unsigned sampler_idx, 1964cdc920a0Smrg const union tgsi_exec_channel *s, 1965cdc920a0Smrg const union tgsi_exec_channel *t, 1966cdc920a0Smrg const union tgsi_exec_channel *p, 1967cdc920a0Smrg const union tgsi_exec_channel *c0, 1968af69d88dSmrg const union tgsi_exec_channel *c1, 1969af69d88dSmrg float derivs[3][2][TGSI_QUAD_SIZE], 1970af69d88dSmrg const int8_t offset[3], 1971cdc920a0Smrg enum tgsi_sampler_control control, 1972cdc920a0Smrg union tgsi_exec_channel *r, 1973cdc920a0Smrg union tgsi_exec_channel *g, 1974cdc920a0Smrg union tgsi_exec_channel *b, 1975cdc920a0Smrg union tgsi_exec_channel *a ) 19764a49301eSmrg{ 1977cdc920a0Smrg uint j; 1978af69d88dSmrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 19794a49301eSmrg 1980af69d88dSmrg /* FIXME: handle explicit derivs, offsets */ 1981af69d88dSmrg sampler->get_samples(sampler, sview_idx, sampler_idx, 1982af69d88dSmrg s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 19834a49301eSmrg 1984cdc920a0Smrg for (j = 0; j < 4; j++) { 1985cdc920a0Smrg r->f[j] = rgba[0][j]; 1986cdc920a0Smrg g->f[j] = rgba[1][j]; 1987cdc920a0Smrg b->f[j] = rgba[2][j]; 1988cdc920a0Smrg a->f[j] = rgba[3][j]; 1989cdc920a0Smrg } 19904a49301eSmrg} 19914a49301eSmrg 19924a49301eSmrg 1993cdc920a0Smrg#define TEX_MODIFIER_NONE 0 1994cdc920a0Smrg#define TEX_MODIFIER_PROJECTED 1 1995cdc920a0Smrg#define TEX_MODIFIER_LOD_BIAS 2 1996cdc920a0Smrg#define TEX_MODIFIER_EXPLICIT_LOD 3 1997af69d88dSmrg#define TEX_MODIFIER_LEVEL_ZERO 4 199801e04c3fSmrg#define TEX_MODIFIER_GATHER 5 1999af69d88dSmrg 2000af69d88dSmrg/* 2001af69d88dSmrg * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2002af69d88dSmrg */ 2003af69d88dSmrgstatic void 2004af69d88dSmrgfetch_texel_offsets(struct tgsi_exec_machine *mach, 2005af69d88dSmrg const struct tgsi_full_instruction *inst, 2006af69d88dSmrg int8_t offsets[3]) 2007af69d88dSmrg{ 2008af69d88dSmrg if (inst->Texture.NumOffsets == 1) { 2009af69d88dSmrg union tgsi_exec_channel index; 2010af69d88dSmrg union tgsi_exec_channel offset[3]; 2011af69d88dSmrg index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 201201e04c3fSmrg fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2013af69d88dSmrg inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 201401e04c3fSmrg fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2015af69d88dSmrg inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 201601e04c3fSmrg fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2017af69d88dSmrg inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2018af69d88dSmrg offsets[0] = offset[0].i[0]; 2019af69d88dSmrg offsets[1] = offset[1].i[0]; 2020af69d88dSmrg offsets[2] = offset[2].i[0]; 2021af69d88dSmrg } else { 2022af69d88dSmrg assert(inst->Texture.NumOffsets == 0); 2023af69d88dSmrg offsets[0] = offsets[1] = offsets[2] = 0; 2024af69d88dSmrg } 2025af69d88dSmrg} 2026af69d88dSmrg 2027af69d88dSmrg 2028af69d88dSmrg/* 2029af69d88dSmrg * Fetch dx and dy values for one channel (s, t or r). 2030af69d88dSmrg * Put dx values into one float array, dy values into another. 2031af69d88dSmrg */ 2032af69d88dSmrgstatic void 2033af69d88dSmrgfetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2034af69d88dSmrg const struct tgsi_full_instruction *inst, 2035af69d88dSmrg unsigned regdsrcx, 2036af69d88dSmrg unsigned chan, 2037af69d88dSmrg float derivs[2][TGSI_QUAD_SIZE]) 2038af69d88dSmrg{ 2039af69d88dSmrg union tgsi_exec_channel d; 2040af69d88dSmrg FETCH(&d, regdsrcx, chan); 2041af69d88dSmrg derivs[0][0] = d.f[0]; 2042af69d88dSmrg derivs[0][1] = d.f[1]; 2043af69d88dSmrg derivs[0][2] = d.f[2]; 2044af69d88dSmrg derivs[0][3] = d.f[3]; 2045af69d88dSmrg FETCH(&d, regdsrcx + 1, chan); 2046af69d88dSmrg derivs[1][0] = d.f[0]; 2047af69d88dSmrg derivs[1][1] = d.f[1]; 2048af69d88dSmrg derivs[1][2] = d.f[2]; 2049af69d88dSmrg derivs[1][3] = d.f[3]; 2050af69d88dSmrg} 2051cdc920a0Smrg 205201e04c3fSmrgstatic uint 205301e04c3fSmrgfetch_sampler_unit(struct tgsi_exec_machine *mach, 205401e04c3fSmrg const struct tgsi_full_instruction *inst, 205501e04c3fSmrg uint sampler) 205601e04c3fSmrg{ 205701e04c3fSmrg uint unit = 0; 205801e04c3fSmrg int i; 205901e04c3fSmrg if (inst->Src[sampler].Register.Indirect) { 206001e04c3fSmrg const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 206101e04c3fSmrg union tgsi_exec_channel indir_index, index2; 206201e04c3fSmrg const uint execmask = mach->ExecMask; 206301e04c3fSmrg index2.i[0] = 206401e04c3fSmrg index2.i[1] = 206501e04c3fSmrg index2.i[2] = 206601e04c3fSmrg index2.i[3] = reg->Indirect.Index; 206701e04c3fSmrg 206801e04c3fSmrg fetch_src_file_channel(mach, 206901e04c3fSmrg reg->Indirect.File, 207001e04c3fSmrg reg->Indirect.Swizzle, 207101e04c3fSmrg &index2, 207201e04c3fSmrg &ZeroVec, 207301e04c3fSmrg &indir_index); 207401e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 207501e04c3fSmrg if (execmask & (1 << i)) { 207601e04c3fSmrg unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 207701e04c3fSmrg break; 207801e04c3fSmrg } 207901e04c3fSmrg } 208001e04c3fSmrg 208101e04c3fSmrg } else { 208201e04c3fSmrg unit = inst->Src[sampler].Register.Index; 208301e04c3fSmrg } 208401e04c3fSmrg return unit; 208501e04c3fSmrg} 2086cdc920a0Smrg 2087af69d88dSmrg/* 2088af69d88dSmrg * execute a texture instruction. 2089af69d88dSmrg * 209001e04c3fSmrg * modifier is used to control the channel routing for the 2091af69d88dSmrg * instruction variants like proj, lod, and texture with lod bias. 2092af69d88dSmrg * sampler indicates which src register the sampler is contained in. 2093af69d88dSmrg */ 20944a49301eSmrgstatic void 2095cdc920a0Smrgexec_tex(struct tgsi_exec_machine *mach, 2096cdc920a0Smrg const struct tgsi_full_instruction *inst, 2097af69d88dSmrg uint modifier, uint sampler) 20984a49301eSmrg{ 2099af69d88dSmrg const union tgsi_exec_channel *args[5], *proj = NULL; 2100af69d88dSmrg union tgsi_exec_channel r[5]; 210101e04c3fSmrg enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 21023464ebd5Sriastradh uint chan; 210301e04c3fSmrg uint unit; 2104af69d88dSmrg int8_t offsets[3]; 2105af69d88dSmrg int dim, shadow_ref, i; 2106cdc920a0Smrg 210701e04c3fSmrg unit = fetch_sampler_unit(mach, inst, sampler); 2108af69d88dSmrg /* always fetch all 3 offsets, overkill but keeps code simple */ 2109af69d88dSmrg fetch_texel_offsets(mach, inst, offsets); 2110cdc920a0Smrg 2111af69d88dSmrg assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2112af69d88dSmrg assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2113cdc920a0Smrg 211401e04c3fSmrg dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 211501e04c3fSmrg shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2116cdc920a0Smrg 2117af69d88dSmrg assert(dim <= 4); 2118af69d88dSmrg if (shadow_ref >= 0) 211901e04c3fSmrg assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args)); 2120cdc920a0Smrg 2121af69d88dSmrg /* fetch modifier to the last argument */ 2122af69d88dSmrg if (modifier != TEX_MODIFIER_NONE) { 212301e04c3fSmrg const int last = ARRAY_SIZE(args) - 1; 2124cdc920a0Smrg 2125af69d88dSmrg /* fetch modifier from src0.w or src1.x */ 2126af69d88dSmrg if (sampler == 1) { 2127af69d88dSmrg assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2128af69d88dSmrg FETCH(&r[last], 0, TGSI_CHAN_W); 2129af69d88dSmrg } 2130af69d88dSmrg else { 2131af69d88dSmrg FETCH(&r[last], 1, TGSI_CHAN_X); 2132cdc920a0Smrg } 2133cdc920a0Smrg 2134af69d88dSmrg if (modifier != TEX_MODIFIER_PROJECTED) { 2135af69d88dSmrg args[last] = &r[last]; 2136af69d88dSmrg } 2137af69d88dSmrg else { 2138af69d88dSmrg proj = &r[last]; 2139af69d88dSmrg args[last] = &ZeroVec; 21403464ebd5Sriastradh } 21413464ebd5Sriastradh 2142af69d88dSmrg /* point unused arguments to zero vector */ 2143af69d88dSmrg for (i = dim; i < last; i++) 2144af69d88dSmrg args[i] = &ZeroVec; 21453464ebd5Sriastradh 2146af69d88dSmrg if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 214701e04c3fSmrg control = TGSI_SAMPLER_LOD_EXPLICIT; 2148af69d88dSmrg else if (modifier == TEX_MODIFIER_LOD_BIAS) 214901e04c3fSmrg control = TGSI_SAMPLER_LOD_BIAS; 215001e04c3fSmrg else if (modifier == TEX_MODIFIER_GATHER) 215101e04c3fSmrg control = TGSI_SAMPLER_GATHER; 2152af69d88dSmrg } 2153af69d88dSmrg else { 215401e04c3fSmrg for (i = dim; i < (int)ARRAY_SIZE(args); i++) 2155af69d88dSmrg args[i] = &ZeroVec; 2156af69d88dSmrg } 21573464ebd5Sriastradh 2158af69d88dSmrg /* fetch coordinates */ 2159af69d88dSmrg for (i = 0; i < dim; i++) { 2160af69d88dSmrg FETCH(&r[i], 0, TGSI_CHAN_X + i); 21613464ebd5Sriastradh 2162af69d88dSmrg if (proj) 2163af69d88dSmrg micro_div(&r[i], &r[i], proj); 21643464ebd5Sriastradh 2165af69d88dSmrg args[i] = &r[i]; 2166af69d88dSmrg } 2167cdc920a0Smrg 2168af69d88dSmrg /* fetch reference value */ 2169af69d88dSmrg if (shadow_ref >= 0) { 2170af69d88dSmrg FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2171cdc920a0Smrg 2172af69d88dSmrg if (proj) 2173af69d88dSmrg micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2174cdc920a0Smrg 2175af69d88dSmrg args[shadow_ref] = &r[shadow_ref]; 2176cdc920a0Smrg } 2177cdc920a0Smrg 2178af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2179af69d88dSmrg args[0], args[1], args[2], args[3], args[4], 2180af69d88dSmrg NULL, offsets, control, 2181af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2182af69d88dSmrg 2183af69d88dSmrg#if 0 2184af69d88dSmrg debug_printf("fetch r: %g %g %g %g\n", 2185af69d88dSmrg r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2186af69d88dSmrg debug_printf("fetch g: %g %g %g %g\n", 2187af69d88dSmrg r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2188af69d88dSmrg debug_printf("fetch b: %g %g %g %g\n", 2189af69d88dSmrg r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2190af69d88dSmrg debug_printf("fetch a: %g %g %g %g\n", 2191af69d88dSmrg r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2192af69d88dSmrg#endif 2193af69d88dSmrg 2194af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 21953464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 21967ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 21973464ebd5Sriastradh } 2198cdc920a0Smrg } 21994a49301eSmrg} 22004a49301eSmrg 220101e04c3fSmrgstatic void 220201e04c3fSmrgexec_lodq(struct tgsi_exec_machine *mach, 220301e04c3fSmrg const struct tgsi_full_instruction *inst) 220401e04c3fSmrg{ 220501e04c3fSmrg uint resource_unit, sampler_unit; 220601e04c3fSmrg unsigned dim; 220701e04c3fSmrg unsigned i; 220801e04c3fSmrg union tgsi_exec_channel coords[4]; 220901e04c3fSmrg const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 221001e04c3fSmrg union tgsi_exec_channel r[2]; 221101e04c3fSmrg 221201e04c3fSmrg resource_unit = fetch_sampler_unit(mach, inst, 1); 221301e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 221401e04c3fSmrg uint target = mach->SamplerViews[resource_unit].Resource; 221501e04c3fSmrg dim = tgsi_util_get_texture_coord_dim(target); 221601e04c3fSmrg sampler_unit = fetch_sampler_unit(mach, inst, 2); 221701e04c3fSmrg } else { 221801e04c3fSmrg dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 221901e04c3fSmrg sampler_unit = resource_unit; 222001e04c3fSmrg } 222101e04c3fSmrg assert(dim <= ARRAY_SIZE(coords)); 222201e04c3fSmrg /* fetch coordinates */ 222301e04c3fSmrg for (i = 0; i < dim; i++) { 222401e04c3fSmrg FETCH(&coords[i], 0, TGSI_CHAN_X + i); 222501e04c3fSmrg args[i] = &coords[i]; 222601e04c3fSmrg } 222701e04c3fSmrg for (i = dim; i < ARRAY_SIZE(coords); i++) { 222801e04c3fSmrg args[i] = &ZeroVec; 222901e04c3fSmrg } 223001e04c3fSmrg mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 223101e04c3fSmrg args[0]->f, 223201e04c3fSmrg args[1]->f, 223301e04c3fSmrg args[2]->f, 223401e04c3fSmrg args[3]->f, 223501e04c3fSmrg TGSI_SAMPLER_LOD_NONE, 223601e04c3fSmrg r[0].f, 223701e04c3fSmrg r[1].f); 223801e04c3fSmrg 223901e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 22407ec681f3Smrg store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 224101e04c3fSmrg } 224201e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 22437ec681f3Smrg store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 224401e04c3fSmrg } 224501e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 224601e04c3fSmrg unsigned char swizzles[4]; 224701e04c3fSmrg unsigned chan; 224801e04c3fSmrg swizzles[0] = inst->Src[1].Register.SwizzleX; 224901e04c3fSmrg swizzles[1] = inst->Src[1].Register.SwizzleY; 225001e04c3fSmrg swizzles[2] = inst->Src[1].Register.SwizzleZ; 225101e04c3fSmrg swizzles[3] = inst->Src[1].Register.SwizzleW; 225201e04c3fSmrg 225301e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 225401e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 225501e04c3fSmrg if (swizzles[chan] >= 2) { 225601e04c3fSmrg store_dest(mach, &ZeroVec, 22577ec681f3Smrg &inst->Dst[0], inst, chan); 225801e04c3fSmrg } else { 225901e04c3fSmrg store_dest(mach, &r[swizzles[chan]], 22607ec681f3Smrg &inst->Dst[0], inst, chan); 226101e04c3fSmrg } 226201e04c3fSmrg } 226301e04c3fSmrg } 226401e04c3fSmrg } else { 226501e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 22667ec681f3Smrg store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 226701e04c3fSmrg } 226801e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 22697ec681f3Smrg store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 227001e04c3fSmrg } 227101e04c3fSmrg } 227201e04c3fSmrg} 2273af69d88dSmrg 22744a49301eSmrgstatic void 2275cdc920a0Smrgexec_txd(struct tgsi_exec_machine *mach, 2276cdc920a0Smrg const struct tgsi_full_instruction *inst) 22774a49301eSmrg{ 2278cdc920a0Smrg union tgsi_exec_channel r[4]; 2279af69d88dSmrg float derivs[3][2][TGSI_QUAD_SIZE]; 22803464ebd5Sriastradh uint chan; 228101e04c3fSmrg uint unit; 2282af69d88dSmrg int8_t offsets[3]; 2283cdc920a0Smrg 228401e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 3); 2285af69d88dSmrg /* always fetch all 3 offsets, overkill but keeps code simple */ 2286af69d88dSmrg fetch_texel_offsets(mach, inst, offsets); 2287cdc920a0Smrg 2288cdc920a0Smrg switch (inst->Texture.Texture) { 2289cdc920a0Smrg case TGSI_TEXTURE_1D: 2290af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 2291af69d88dSmrg 2292af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2293af69d88dSmrg 2294af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2295af69d88dSmrg &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 229601e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2297af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2298af69d88dSmrg break; 2299af69d88dSmrg 2300cdc920a0Smrg case TGSI_TEXTURE_SHADOW1D: 2301af69d88dSmrg case TGSI_TEXTURE_1D_ARRAY: 2302af69d88dSmrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 2303af69d88dSmrg /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2304af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 2305af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2306af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2307cdc920a0Smrg 2308af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2309cdc920a0Smrg 2310af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2311af69d88dSmrg &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 231201e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2313cdc920a0Smrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2314cdc920a0Smrg break; 2315cdc920a0Smrg 2316cdc920a0Smrg case TGSI_TEXTURE_2D: 2317cdc920a0Smrg case TGSI_TEXTURE_RECT: 2318af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 2319af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2320af69d88dSmrg 2321af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2322af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2323af69d88dSmrg 2324af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2325af69d88dSmrg &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 232601e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2327af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2328af69d88dSmrg break; 2329cdc920a0Smrg 2330cdc920a0Smrg 2331af69d88dSmrg case TGSI_TEXTURE_SHADOW2D: 2332af69d88dSmrg case TGSI_TEXTURE_SHADOWRECT: 2333af69d88dSmrg case TGSI_TEXTURE_2D_ARRAY: 2334af69d88dSmrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 2335af69d88dSmrg /* only SHADOW2D_ARRAY actually needs W */ 2336af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 2337af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2338af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2339af69d88dSmrg FETCH(&r[3], 0, TGSI_CHAN_W); 2340af69d88dSmrg 2341af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2342af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2343af69d88dSmrg 2344af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2345af69d88dSmrg &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 234601e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2347cdc920a0Smrg &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2348cdc920a0Smrg break; 2349cdc920a0Smrg 2350cdc920a0Smrg case TGSI_TEXTURE_3D: 2351cdc920a0Smrg case TGSI_TEXTURE_CUBE: 2352af69d88dSmrg case TGSI_TEXTURE_CUBE_ARRAY: 235301e04c3fSmrg case TGSI_TEXTURE_SHADOWCUBE: 235401e04c3fSmrg /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2355af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 2356af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2357af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2358af69d88dSmrg FETCH(&r[3], 0, TGSI_CHAN_W); 2359af69d88dSmrg 2360af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2361af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2362af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2363af69d88dSmrg 2364af69d88dSmrg fetch_texel(mach->Sampler, unit, unit, 2365af69d88dSmrg &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 236601e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2367af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2368cdc920a0Smrg break; 2369cdc920a0Smrg 2370cdc920a0Smrg default: 2371cdc920a0Smrg assert(0); 2372cdc920a0Smrg } 2373cdc920a0Smrg 2374af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 23753464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 23767ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 23773464ebd5Sriastradh } 23783464ebd5Sriastradh } 23793464ebd5Sriastradh} 23803464ebd5Sriastradh 23813464ebd5Sriastradh 2382af69d88dSmrgstatic void 2383af69d88dSmrgexec_txf(struct tgsi_exec_machine *mach, 2384af69d88dSmrg const struct tgsi_full_instruction *inst) 2385af69d88dSmrg{ 2386af69d88dSmrg union tgsi_exec_channel r[4]; 2387af69d88dSmrg uint chan; 238801e04c3fSmrg uint unit; 2389af69d88dSmrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2390af69d88dSmrg int j; 2391af69d88dSmrg int8_t offsets[3]; 2392af69d88dSmrg unsigned target; 2393af69d88dSmrg 239401e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 1); 2395af69d88dSmrg /* always fetch all 3 offsets, overkill but keeps code simple */ 2396af69d88dSmrg fetch_texel_offsets(mach, inst, offsets); 2397af69d88dSmrg 2398af69d88dSmrg IFETCH(&r[3], 0, TGSI_CHAN_W); 2399af69d88dSmrg 240001e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 240101e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2402af69d88dSmrg target = mach->SamplerViews[unit].Resource; 2403af69d88dSmrg } 2404af69d88dSmrg else { 2405af69d88dSmrg target = inst->Texture.Texture; 2406af69d88dSmrg } 2407af69d88dSmrg switch(target) { 2408af69d88dSmrg case TGSI_TEXTURE_3D: 2409af69d88dSmrg case TGSI_TEXTURE_2D_ARRAY: 2410af69d88dSmrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 241101e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY_MSAA: 2412af69d88dSmrg IFETCH(&r[2], 0, TGSI_CHAN_Z); 24137ec681f3Smrg FALLTHROUGH; 2414af69d88dSmrg case TGSI_TEXTURE_2D: 2415af69d88dSmrg case TGSI_TEXTURE_RECT: 2416af69d88dSmrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 2417af69d88dSmrg case TGSI_TEXTURE_SHADOW2D: 2418af69d88dSmrg case TGSI_TEXTURE_SHADOWRECT: 2419af69d88dSmrg case TGSI_TEXTURE_1D_ARRAY: 242001e04c3fSmrg case TGSI_TEXTURE_2D_MSAA: 2421af69d88dSmrg IFETCH(&r[1], 0, TGSI_CHAN_Y); 24227ec681f3Smrg FALLTHROUGH; 2423af69d88dSmrg case TGSI_TEXTURE_BUFFER: 2424af69d88dSmrg case TGSI_TEXTURE_1D: 2425af69d88dSmrg case TGSI_TEXTURE_SHADOW1D: 2426af69d88dSmrg IFETCH(&r[0], 0, TGSI_CHAN_X); 2427af69d88dSmrg break; 2428af69d88dSmrg default: 2429af69d88dSmrg assert(0); 2430af69d88dSmrg break; 2431af69d88dSmrg } 2432af69d88dSmrg 2433af69d88dSmrg mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2434af69d88dSmrg offsets, rgba); 2435af69d88dSmrg 2436af69d88dSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2437af69d88dSmrg r[0].f[j] = rgba[0][j]; 2438af69d88dSmrg r[1].f[j] = rgba[1][j]; 2439af69d88dSmrg r[2].f[j] = rgba[2][j]; 2440af69d88dSmrg r[3].f[j] = rgba[3][j]; 2441af69d88dSmrg } 2442af69d88dSmrg 244301e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 244401e04c3fSmrg inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2445af69d88dSmrg unsigned char swizzles[4]; 2446af69d88dSmrg swizzles[0] = inst->Src[1].Register.SwizzleX; 2447af69d88dSmrg swizzles[1] = inst->Src[1].Register.SwizzleY; 2448af69d88dSmrg swizzles[2] = inst->Src[1].Register.SwizzleZ; 2449af69d88dSmrg swizzles[3] = inst->Src[1].Register.SwizzleW; 2450af69d88dSmrg 2451af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2452af69d88dSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2453af69d88dSmrg store_dest(mach, &r[swizzles[chan]], 24547ec681f3Smrg &inst->Dst[0], inst, chan); 2455af69d88dSmrg } 2456af69d88dSmrg } 2457af69d88dSmrg } 2458af69d88dSmrg else { 2459af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2460af69d88dSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 24617ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2462af69d88dSmrg } 2463af69d88dSmrg } 2464af69d88dSmrg } 2465af69d88dSmrg} 2466af69d88dSmrg 2467af69d88dSmrgstatic void 2468af69d88dSmrgexec_txq(struct tgsi_exec_machine *mach, 2469af69d88dSmrg const struct tgsi_full_instruction *inst) 2470af69d88dSmrg{ 2471af69d88dSmrg int result[4]; 2472af69d88dSmrg union tgsi_exec_channel r[4], src; 2473af69d88dSmrg uint chan; 247401e04c3fSmrg uint unit; 2475af69d88dSmrg int i,j; 2476af69d88dSmrg 247701e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 1); 247801e04c3fSmrg 2479af69d88dSmrg fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2480af69d88dSmrg 2481af69d88dSmrg /* XXX: This interface can't return per-pixel values */ 2482af69d88dSmrg mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2483af69d88dSmrg 2484af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2485af69d88dSmrg for (j = 0; j < 4; j++) { 2486af69d88dSmrg r[j].i[i] = result[j]; 2487af69d88dSmrg } 2488af69d88dSmrg } 2489af69d88dSmrg 2490af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2491af69d88dSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 24927ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2493af69d88dSmrg } 2494af69d88dSmrg } 2495af69d88dSmrg} 24963464ebd5Sriastradh 24973464ebd5Sriastradhstatic void 24983464ebd5Sriastradhexec_sample(struct tgsi_exec_machine *mach, 24993464ebd5Sriastradh const struct tgsi_full_instruction *inst, 2500af69d88dSmrg uint modifier, boolean compare) 25013464ebd5Sriastradh{ 25023464ebd5Sriastradh const uint resource_unit = inst->Src[1].Register.Index; 25033464ebd5Sriastradh const uint sampler_unit = inst->Src[2].Register.Index; 250401e04c3fSmrg union tgsi_exec_channel r[5], c1; 25053464ebd5Sriastradh const union tgsi_exec_channel *lod = &ZeroVec; 250601e04c3fSmrg enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 25073464ebd5Sriastradh uint chan; 2508af69d88dSmrg unsigned char swizzles[4]; 2509af69d88dSmrg int8_t offsets[3]; 25103464ebd5Sriastradh 2511af69d88dSmrg /* always fetch all 3 offsets, overkill but keeps code simple */ 2512af69d88dSmrg fetch_texel_offsets(mach, inst, offsets); 25133464ebd5Sriastradh 2514af69d88dSmrg assert(modifier != TEX_MODIFIER_PROJECTED); 2515af69d88dSmrg 2516af69d88dSmrg if (modifier != TEX_MODIFIER_NONE) { 2517af69d88dSmrg if (modifier == TEX_MODIFIER_LOD_BIAS) { 2518af69d88dSmrg FETCH(&c1, 3, TGSI_CHAN_X); 2519af69d88dSmrg lod = &c1; 252001e04c3fSmrg control = TGSI_SAMPLER_LOD_BIAS; 2521af69d88dSmrg } 2522af69d88dSmrg else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2523af69d88dSmrg FETCH(&c1, 3, TGSI_CHAN_X); 2524af69d88dSmrg lod = &c1; 252501e04c3fSmrg control = TGSI_SAMPLER_LOD_EXPLICIT; 252601e04c3fSmrg } 252701e04c3fSmrg else if (modifier == TEX_MODIFIER_GATHER) { 252801e04c3fSmrg control = TGSI_SAMPLER_GATHER; 2529af69d88dSmrg } 2530af69d88dSmrg else { 2531af69d88dSmrg assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 253201e04c3fSmrg control = TGSI_SAMPLER_LOD_ZERO; 25333464ebd5Sriastradh } 25343464ebd5Sriastradh } 25353464ebd5Sriastradh 2536af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 25373464ebd5Sriastradh 2538af69d88dSmrg switch (mach->SamplerViews[resource_unit].Resource) { 25393464ebd5Sriastradh case TGSI_TEXTURE_1D: 2540af69d88dSmrg if (compare) { 2541af69d88dSmrg FETCH(&r[2], 3, TGSI_CHAN_X); 2542af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2543af69d88dSmrg &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2544af69d88dSmrg NULL, offsets, control, 2545af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2546af69d88dSmrg } 2547af69d88dSmrg else { 2548af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2549af69d88dSmrg &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2550af69d88dSmrg NULL, offsets, control, 2551af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 25523464ebd5Sriastradh } 25533464ebd5Sriastradh break; 25543464ebd5Sriastradh 2555af69d88dSmrg case TGSI_TEXTURE_1D_ARRAY: 25563464ebd5Sriastradh case TGSI_TEXTURE_2D: 25573464ebd5Sriastradh case TGSI_TEXTURE_RECT: 2558af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2559af69d88dSmrg if (compare) { 2560af69d88dSmrg FETCH(&r[2], 3, TGSI_CHAN_X); 2561af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2562af69d88dSmrg &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2563af69d88dSmrg NULL, offsets, control, 2564af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2565af69d88dSmrg } 2566af69d88dSmrg else { 2567af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2568af69d88dSmrg &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2569af69d88dSmrg NULL, offsets, control, 2570af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); /* outputs */ 25713464ebd5Sriastradh } 25723464ebd5Sriastradh break; 25733464ebd5Sriastradh 2574af69d88dSmrg case TGSI_TEXTURE_2D_ARRAY: 25753464ebd5Sriastradh case TGSI_TEXTURE_3D: 25763464ebd5Sriastradh case TGSI_TEXTURE_CUBE: 2577af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2578af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2579af69d88dSmrg if(compare) { 2580af69d88dSmrg FETCH(&r[3], 3, TGSI_CHAN_X); 2581af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2582af69d88dSmrg &r[0], &r[1], &r[2], &r[3], lod, 2583af69d88dSmrg NULL, offsets, control, 2584af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); 2585af69d88dSmrg } 2586af69d88dSmrg else { 2587af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2588af69d88dSmrg &r[0], &r[1], &r[2], &ZeroVec, lod, 2589af69d88dSmrg NULL, offsets, control, 2590af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); 25913464ebd5Sriastradh } 2592af69d88dSmrg break; 25933464ebd5Sriastradh 2594af69d88dSmrg case TGSI_TEXTURE_CUBE_ARRAY: 2595af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2596af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2597af69d88dSmrg FETCH(&r[3], 0, TGSI_CHAN_W); 2598af69d88dSmrg if(compare) { 2599af69d88dSmrg FETCH(&r[4], 3, TGSI_CHAN_X); 2600af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2601af69d88dSmrg &r[0], &r[1], &r[2], &r[3], &r[4], 2602af69d88dSmrg NULL, offsets, control, 2603af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); 2604af69d88dSmrg } 2605af69d88dSmrg else { 2606af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2607af69d88dSmrg &r[0], &r[1], &r[2], &r[3], lod, 2608af69d88dSmrg NULL, offsets, control, 2609af69d88dSmrg &r[0], &r[1], &r[2], &r[3]); 2610af69d88dSmrg } 26113464ebd5Sriastradh break; 26123464ebd5Sriastradh 2613af69d88dSmrg 26143464ebd5Sriastradh default: 26153464ebd5Sriastradh assert(0); 26163464ebd5Sriastradh } 26173464ebd5Sriastradh 2618af69d88dSmrg swizzles[0] = inst->Src[1].Register.SwizzleX; 2619af69d88dSmrg swizzles[1] = inst->Src[1].Register.SwizzleY; 2620af69d88dSmrg swizzles[2] = inst->Src[1].Register.SwizzleZ; 2621af69d88dSmrg swizzles[3] = inst->Src[1].Register.SwizzleW; 2622af69d88dSmrg 2623af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 26243464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2625af69d88dSmrg store_dest(mach, &r[swizzles[chan]], 26267ec681f3Smrg &inst->Dst[0], inst, chan); 26273464ebd5Sriastradh } 26283464ebd5Sriastradh } 26293464ebd5Sriastradh} 26303464ebd5Sriastradh 26313464ebd5Sriastradhstatic void 26323464ebd5Sriastradhexec_sample_d(struct tgsi_exec_machine *mach, 26333464ebd5Sriastradh const struct tgsi_full_instruction *inst) 26343464ebd5Sriastradh{ 26353464ebd5Sriastradh const uint resource_unit = inst->Src[1].Register.Index; 26363464ebd5Sriastradh const uint sampler_unit = inst->Src[2].Register.Index; 26373464ebd5Sriastradh union tgsi_exec_channel r[4]; 2638af69d88dSmrg float derivs[3][2][TGSI_QUAD_SIZE]; 26393464ebd5Sriastradh uint chan; 2640af69d88dSmrg unsigned char swizzles[4]; 2641af69d88dSmrg int8_t offsets[3]; 2642af69d88dSmrg 2643af69d88dSmrg /* always fetch all 3 offsets, overkill but keeps code simple */ 2644af69d88dSmrg fetch_texel_offsets(mach, inst, offsets); 2645af69d88dSmrg 2646af69d88dSmrg FETCH(&r[0], 0, TGSI_CHAN_X); 26473464ebd5Sriastradh 2648af69d88dSmrg switch (mach->SamplerViews[resource_unit].Resource) { 26493464ebd5Sriastradh case TGSI_TEXTURE_1D: 2650af69d88dSmrg case TGSI_TEXTURE_1D_ARRAY: 2651af69d88dSmrg /* only 1D array actually needs Y */ 2652af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 26533464ebd5Sriastradh 2654af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 26553464ebd5Sriastradh 2656af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2657af69d88dSmrg &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 265801e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 26593464ebd5Sriastradh &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 26603464ebd5Sriastradh break; 26613464ebd5Sriastradh 26623464ebd5Sriastradh case TGSI_TEXTURE_2D: 26633464ebd5Sriastradh case TGSI_TEXTURE_RECT: 2664af69d88dSmrg case TGSI_TEXTURE_2D_ARRAY: 2665af69d88dSmrg /* only 2D array actually needs Z */ 2666af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2667af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 26683464ebd5Sriastradh 2669af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2670af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 26713464ebd5Sriastradh 2672af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2673af69d88dSmrg &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 267401e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 26753464ebd5Sriastradh &r[0], &r[1], &r[2], &r[3]); /* outputs */ 26763464ebd5Sriastradh break; 26773464ebd5Sriastradh 26783464ebd5Sriastradh case TGSI_TEXTURE_3D: 26793464ebd5Sriastradh case TGSI_TEXTURE_CUBE: 2680af69d88dSmrg case TGSI_TEXTURE_CUBE_ARRAY: 2681af69d88dSmrg /* only cube array actually needs W */ 2682af69d88dSmrg FETCH(&r[1], 0, TGSI_CHAN_Y); 2683af69d88dSmrg FETCH(&r[2], 0, TGSI_CHAN_Z); 2684af69d88dSmrg FETCH(&r[3], 0, TGSI_CHAN_W); 2685af69d88dSmrg 2686af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2687af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2688af69d88dSmrg fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2689af69d88dSmrg 2690af69d88dSmrg fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2691af69d88dSmrg &r[0], &r[1], &r[2], &r[3], &ZeroVec, 269201e04c3fSmrg derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 26933464ebd5Sriastradh &r[0], &r[1], &r[2], &r[3]); 26943464ebd5Sriastradh break; 26953464ebd5Sriastradh 26963464ebd5Sriastradh default: 26973464ebd5Sriastradh assert(0); 26983464ebd5Sriastradh } 26993464ebd5Sriastradh 2700af69d88dSmrg swizzles[0] = inst->Src[1].Register.SwizzleX; 2701af69d88dSmrg swizzles[1] = inst->Src[1].Register.SwizzleY; 2702af69d88dSmrg swizzles[2] = inst->Src[1].Register.SwizzleZ; 2703af69d88dSmrg swizzles[3] = inst->Src[1].Register.SwizzleW; 2704af69d88dSmrg 2705af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 27063464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2707af69d88dSmrg store_dest(mach, &r[swizzles[chan]], 27087ec681f3Smrg &inst->Dst[0], inst, chan); 27093464ebd5Sriastradh } 2710cdc920a0Smrg } 27114a49301eSmrg} 27124a49301eSmrg 2713cdc920a0Smrg 2714cdc920a0Smrg/** 2715cdc920a0Smrg * Evaluate a constant-valued coefficient at the position of the 2716cdc920a0Smrg * current quad. 2717cdc920a0Smrg */ 27184a49301eSmrgstatic void 2719cdc920a0Smrgeval_constant_coef( 2720cdc920a0Smrg struct tgsi_exec_machine *mach, 2721cdc920a0Smrg unsigned attrib, 2722cdc920a0Smrg unsigned chan ) 27234a49301eSmrg{ 2724cdc920a0Smrg unsigned i; 2725cdc920a0Smrg 2726af69d88dSmrg for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2727cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2728cdc920a0Smrg } 27294a49301eSmrg} 27304a49301eSmrg 2731361fc4cbSmayastatic void 2732361fc4cbSmayainterp_constant_offset( 2733361fc4cbSmaya UNUSED const struct tgsi_exec_machine *mach, 2734361fc4cbSmaya UNUSED unsigned attrib, 2735361fc4cbSmaya UNUSED unsigned chan, 2736361fc4cbSmaya UNUSED float ofs_x, 2737361fc4cbSmaya UNUSED float ofs_y, 2738361fc4cbSmaya UNUSED union tgsi_exec_channel *out_chan) 2739361fc4cbSmaya{ 2740361fc4cbSmaya} 2741361fc4cbSmaya 2742cdc920a0Smrg/** 2743cdc920a0Smrg * Evaluate a linear-valued coefficient at the position of the 2744cdc920a0Smrg * current quad. 2745cdc920a0Smrg */ 27464a49301eSmrgstatic void 2747361fc4cbSmayainterp_linear_offset( 2748361fc4cbSmaya const struct tgsi_exec_machine *mach, 2749361fc4cbSmaya unsigned attrib, 2750361fc4cbSmaya unsigned chan, 2751361fc4cbSmaya float ofs_x, 2752361fc4cbSmaya float ofs_y, 2753361fc4cbSmaya union tgsi_exec_channel *out_chan) 2754361fc4cbSmaya{ 2755361fc4cbSmaya const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2756361fc4cbSmaya const float dady = mach->InterpCoefs[attrib].dady[chan]; 2757361fc4cbSmaya const float delta = ofs_x * dadx + ofs_y * dady; 2758361fc4cbSmaya out_chan->f[0] += delta; 2759361fc4cbSmaya out_chan->f[1] += delta; 2760361fc4cbSmaya out_chan->f[2] += delta; 2761361fc4cbSmaya out_chan->f[3] += delta; 2762361fc4cbSmaya} 2763361fc4cbSmaya 2764361fc4cbSmayastatic void 2765361fc4cbSmayaeval_linear_coef(struct tgsi_exec_machine *mach, 2766361fc4cbSmaya unsigned attrib, 2767361fc4cbSmaya unsigned chan) 27684a49301eSmrg{ 2769cdc920a0Smrg const float x = mach->QuadPos.xyzw[0].f[0]; 2770cdc920a0Smrg const float y = mach->QuadPos.xyzw[1].f[0]; 2771cdc920a0Smrg const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2772cdc920a0Smrg const float dady = mach->InterpCoefs[attrib].dady[chan]; 2773cdc920a0Smrg const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2774361fc4cbSmaya 2775cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2776cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2777cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2778cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 27794a49301eSmrg} 27804a49301eSmrg 2781cdc920a0Smrg/** 2782cdc920a0Smrg * Evaluate a perspective-valued coefficient at the position of the 2783cdc920a0Smrg * current quad. 2784cdc920a0Smrg */ 2785361fc4cbSmaya 2786361fc4cbSmayastatic void 2787361fc4cbSmayainterp_perspective_offset( 2788361fc4cbSmaya const struct tgsi_exec_machine *mach, 2789361fc4cbSmaya unsigned attrib, 2790361fc4cbSmaya unsigned chan, 2791361fc4cbSmaya float ofs_x, 2792361fc4cbSmaya float ofs_y, 2793361fc4cbSmaya union tgsi_exec_channel *out_chan) 2794361fc4cbSmaya{ 2795361fc4cbSmaya const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2796361fc4cbSmaya const float dady = mach->InterpCoefs[attrib].dady[chan]; 2797361fc4cbSmaya const float *w = mach->QuadPos.xyzw[3].f; 2798361fc4cbSmaya const float delta = ofs_x * dadx + ofs_y * dady; 2799361fc4cbSmaya out_chan->f[0] += delta / w[0]; 2800361fc4cbSmaya out_chan->f[1] += delta / w[1]; 2801361fc4cbSmaya out_chan->f[2] += delta / w[2]; 2802361fc4cbSmaya out_chan->f[3] += delta / w[3]; 2803361fc4cbSmaya} 2804361fc4cbSmaya 28054a49301eSmrgstatic void 2806cdc920a0Smrgeval_perspective_coef( 2807cdc920a0Smrg struct tgsi_exec_machine *mach, 2808cdc920a0Smrg unsigned attrib, 2809cdc920a0Smrg unsigned chan ) 28104a49301eSmrg{ 2811cdc920a0Smrg const float x = mach->QuadPos.xyzw[0].f[0]; 2812cdc920a0Smrg const float y = mach->QuadPos.xyzw[1].f[0]; 2813cdc920a0Smrg const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2814cdc920a0Smrg const float dady = mach->InterpCoefs[attrib].dady[chan]; 2815cdc920a0Smrg const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2816cdc920a0Smrg const float *w = mach->QuadPos.xyzw[3].f; 2817cdc920a0Smrg /* divide by W here */ 2818cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2819cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2820cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2821cdc920a0Smrg mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 28224a49301eSmrg} 2823cdc920a0Smrg 2824cdc920a0Smrg 2825cdc920a0Smrgtypedef void (* eval_coef_func)( 2826cdc920a0Smrg struct tgsi_exec_machine *mach, 2827cdc920a0Smrg unsigned attrib, 2828cdc920a0Smrg unsigned chan ); 28294a49301eSmrg 28304a49301eSmrgstatic void 2831cdc920a0Smrgexec_declaration(struct tgsi_exec_machine *mach, 2832cdc920a0Smrg const struct tgsi_full_declaration *decl) 28334a49301eSmrg{ 2834af69d88dSmrg if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2835af69d88dSmrg mach->SamplerViews[decl->Range.First] = decl->SamplerView; 28363464ebd5Sriastradh return; 28373464ebd5Sriastradh } 28383464ebd5Sriastradh 283901e04c3fSmrg if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 28403464ebd5Sriastradh if (decl->Declaration.File == TGSI_FILE_INPUT) { 2841cdc920a0Smrg uint first, last, mask; 28424a49301eSmrg 2843cdc920a0Smrg first = decl->Range.First; 2844cdc920a0Smrg last = decl->Range.Last; 2845cdc920a0Smrg mask = decl->Declaration.UsageMask; 28464a49301eSmrg 28473464ebd5Sriastradh /* XXX we could remove this special-case code since 28483464ebd5Sriastradh * mach->InterpCoefs[first].a0 should already have the 28493464ebd5Sriastradh * front/back-face value. But we should first update the 28503464ebd5Sriastradh * ureg code to emit the right UsageMask value (WRITEMASK_X). 28513464ebd5Sriastradh * Then, we could remove the tgsi_exec_machine::Face field. 28523464ebd5Sriastradh */ 28533464ebd5Sriastradh /* XXX make FACE a system value */ 2854cdc920a0Smrg if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2855cdc920a0Smrg uint i; 28564a49301eSmrg 2857cdc920a0Smrg assert(decl->Semantic.Index == 0); 2858cdc920a0Smrg assert(first == last); 28594a49301eSmrg 2860af69d88dSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2861cdc920a0Smrg mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2862cdc920a0Smrg } 2863cdc920a0Smrg } else { 2864cdc920a0Smrg eval_coef_func eval; 2865361fc4cbSmaya apply_sample_offset_func interp; 2866cdc920a0Smrg uint i, j; 2867cdc920a0Smrg 2868af69d88dSmrg switch (decl->Interp.Interpolate) { 2869cdc920a0Smrg case TGSI_INTERPOLATE_CONSTANT: 2870cdc920a0Smrg eval = eval_constant_coef; 2871361fc4cbSmaya interp = interp_constant_offset; 2872cdc920a0Smrg break; 2873cdc920a0Smrg 2874cdc920a0Smrg case TGSI_INTERPOLATE_LINEAR: 2875cdc920a0Smrg eval = eval_linear_coef; 2876361fc4cbSmaya interp = interp_linear_offset; 2877cdc920a0Smrg break; 2878cdc920a0Smrg 2879cdc920a0Smrg case TGSI_INTERPOLATE_PERSPECTIVE: 2880cdc920a0Smrg eval = eval_perspective_coef; 2881361fc4cbSmaya interp = interp_perspective_offset; 2882cdc920a0Smrg break; 2883cdc920a0Smrg 2884af69d88dSmrg case TGSI_INTERPOLATE_COLOR: 2885af69d88dSmrg eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 28867ec681f3Smrg interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset; 2887af69d88dSmrg break; 2888af69d88dSmrg 2889cdc920a0Smrg default: 2890cdc920a0Smrg assert(0); 2891cdc920a0Smrg return; 2892cdc920a0Smrg } 28934a49301eSmrg 2894361fc4cbSmaya for (i = first; i <= last; i++) 2895361fc4cbSmaya mach->InputSampleOffsetApply[i] = interp; 2896361fc4cbSmaya 2897af69d88dSmrg for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2898cdc920a0Smrg if (mask & (1 << j)) { 2899cdc920a0Smrg for (i = first; i <= last; i++) { 2900cdc920a0Smrg eval(mach, i, j); 2901cdc920a0Smrg } 2902cdc920a0Smrg } 2903cdc920a0Smrg } 2904cdc920a0Smrg } 2905af69d88dSmrg 2906af69d88dSmrg if (DEBUG_EXECUTION) { 2907af69d88dSmrg uint i, j; 2908af69d88dSmrg for (i = first; i <= last; ++i) { 2909af69d88dSmrg debug_printf("IN[%2u] = ", i); 2910af69d88dSmrg for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2911af69d88dSmrg if (j > 0) { 2912af69d88dSmrg debug_printf(" "); 2913af69d88dSmrg } 2914af69d88dSmrg debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 2915af69d88dSmrg mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 2916af69d88dSmrg mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 2917af69d88dSmrg mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 2918af69d88dSmrg mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 2919af69d88dSmrg } 2920af69d88dSmrg } 2921af69d88dSmrg } 2922cdc920a0Smrg } 2923cdc920a0Smrg } 29243464ebd5Sriastradh 2925cdc920a0Smrg} 29264a49301eSmrg 2927cdc920a0Smrgtypedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 2928cdc920a0Smrg const union tgsi_exec_channel *src); 29294a49301eSmrg 2930cdc920a0Smrgstatic void 2931cdc920a0Smrgexec_scalar_unary(struct tgsi_exec_machine *mach, 2932cdc920a0Smrg const struct tgsi_full_instruction *inst, 2933cdc920a0Smrg micro_unary_op op, 2934cdc920a0Smrg enum tgsi_exec_datatype src_datatype) 2935cdc920a0Smrg{ 2936cdc920a0Smrg unsigned int chan; 2937cdc920a0Smrg union tgsi_exec_channel src; 2938cdc920a0Smrg union tgsi_exec_channel dst; 29394a49301eSmrg 2940af69d88dSmrg fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 2941cdc920a0Smrg op(&dst, &src); 2942af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2943cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 29447ec681f3Smrg store_dest(mach, &dst, &inst->Dst[0], inst, chan); 29454a49301eSmrg } 29464a49301eSmrg } 29474a49301eSmrg} 29484a49301eSmrg 29494a49301eSmrgstatic void 2950cdc920a0Smrgexec_vector_unary(struct tgsi_exec_machine *mach, 2951cdc920a0Smrg const struct tgsi_full_instruction *inst, 2952cdc920a0Smrg micro_unary_op op, 2953cdc920a0Smrg enum tgsi_exec_datatype src_datatype) 29544a49301eSmrg{ 2955cdc920a0Smrg unsigned int chan; 2956cdc920a0Smrg struct tgsi_exec_vector dst; 29574a49301eSmrg 2958af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2959cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2960cdc920a0Smrg union tgsi_exec_channel src; 29614a49301eSmrg 2962cdc920a0Smrg fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2963cdc920a0Smrg op(&dst.xyzw[chan], &src); 29644a49301eSmrg } 29654a49301eSmrg } 2966af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2967cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 29687ec681f3Smrg store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 29694a49301eSmrg } 2970cdc920a0Smrg } 2971cdc920a0Smrg} 29724a49301eSmrg 2973cdc920a0Smrgtypedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2974cdc920a0Smrg const union tgsi_exec_channel *src0, 2975cdc920a0Smrg const union tgsi_exec_channel *src1); 29764a49301eSmrg 29773464ebd5Sriastradhstatic void 29783464ebd5Sriastradhexec_scalar_binary(struct tgsi_exec_machine *mach, 29793464ebd5Sriastradh const struct tgsi_full_instruction *inst, 29803464ebd5Sriastradh micro_binary_op op, 29813464ebd5Sriastradh enum tgsi_exec_datatype src_datatype) 29823464ebd5Sriastradh{ 29833464ebd5Sriastradh unsigned int chan; 29843464ebd5Sriastradh union tgsi_exec_channel src[2]; 29853464ebd5Sriastradh union tgsi_exec_channel dst; 29863464ebd5Sriastradh 2987af69d88dSmrg fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 2988af69d88dSmrg fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 29893464ebd5Sriastradh op(&dst, &src[0], &src[1]); 2990af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 29913464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 29927ec681f3Smrg store_dest(mach, &dst, &inst->Dst[0], inst, chan); 29933464ebd5Sriastradh } 29943464ebd5Sriastradh } 29953464ebd5Sriastradh} 29963464ebd5Sriastradh 2997cdc920a0Smrgstatic void 2998cdc920a0Smrgexec_vector_binary(struct tgsi_exec_machine *mach, 2999cdc920a0Smrg const struct tgsi_full_instruction *inst, 3000cdc920a0Smrg micro_binary_op op, 3001cdc920a0Smrg enum tgsi_exec_datatype src_datatype) 3002cdc920a0Smrg{ 3003cdc920a0Smrg unsigned int chan; 3004cdc920a0Smrg struct tgsi_exec_vector dst; 30054a49301eSmrg 3006af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3007cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3008cdc920a0Smrg union tgsi_exec_channel src[2]; 30094a49301eSmrg 3010cdc920a0Smrg fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3011cdc920a0Smrg fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3012cdc920a0Smrg op(&dst.xyzw[chan], &src[0], &src[1]); 30134a49301eSmrg } 30144a49301eSmrg } 3015af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3016cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 30177ec681f3Smrg store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3018cdc920a0Smrg } 3019cdc920a0Smrg } 3020cdc920a0Smrg} 30214a49301eSmrg 3022cdc920a0Smrgtypedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3023cdc920a0Smrg const union tgsi_exec_channel *src0, 3024cdc920a0Smrg const union tgsi_exec_channel *src1, 3025cdc920a0Smrg const union tgsi_exec_channel *src2); 30264a49301eSmrg 3027cdc920a0Smrgstatic void 3028cdc920a0Smrgexec_vector_trinary(struct tgsi_exec_machine *mach, 3029cdc920a0Smrg const struct tgsi_full_instruction *inst, 3030cdc920a0Smrg micro_trinary_op op, 3031cdc920a0Smrg enum tgsi_exec_datatype src_datatype) 3032cdc920a0Smrg{ 3033cdc920a0Smrg unsigned int chan; 3034cdc920a0Smrg struct tgsi_exec_vector dst; 30354a49301eSmrg 3036af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3037cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3038cdc920a0Smrg union tgsi_exec_channel src[3]; 30394a49301eSmrg 3040cdc920a0Smrg fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3041cdc920a0Smrg fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3042cdc920a0Smrg fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3043cdc920a0Smrg op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3044cdc920a0Smrg } 30454a49301eSmrg } 3046af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3047af69d88dSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 30487ec681f3Smrg store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3049af69d88dSmrg } 3050af69d88dSmrg } 3051af69d88dSmrg} 3052af69d88dSmrg 3053af69d88dSmrgtypedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3054af69d88dSmrg const union tgsi_exec_channel *src0, 3055af69d88dSmrg const union tgsi_exec_channel *src1, 3056af69d88dSmrg const union tgsi_exec_channel *src2, 3057af69d88dSmrg const union tgsi_exec_channel *src3); 3058af69d88dSmrg 3059af69d88dSmrgstatic void 3060af69d88dSmrgexec_vector_quaternary(struct tgsi_exec_machine *mach, 3061af69d88dSmrg const struct tgsi_full_instruction *inst, 3062af69d88dSmrg micro_quaternary_op op, 3063af69d88dSmrg enum tgsi_exec_datatype src_datatype) 3064af69d88dSmrg{ 3065af69d88dSmrg unsigned int chan; 3066af69d88dSmrg struct tgsi_exec_vector dst; 3067af69d88dSmrg 3068af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3069af69d88dSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3070af69d88dSmrg union tgsi_exec_channel src[4]; 3071af69d88dSmrg 3072af69d88dSmrg fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3073af69d88dSmrg fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3074af69d88dSmrg fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3075af69d88dSmrg fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3076af69d88dSmrg op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3077af69d88dSmrg } 3078af69d88dSmrg } 3079af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3080cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 30817ec681f3Smrg store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3082cdc920a0Smrg } 30834a49301eSmrg } 30844a49301eSmrg} 30854a49301eSmrg 30864a49301eSmrgstatic void 3087cdc920a0Smrgexec_dp3(struct tgsi_exec_machine *mach, 3088cdc920a0Smrg const struct tgsi_full_instruction *inst) 30894a49301eSmrg{ 3090cdc920a0Smrg unsigned int chan; 3091cdc920a0Smrg union tgsi_exec_channel arg[3]; 30924a49301eSmrg 3093af69d88dSmrg fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3094af69d88dSmrg fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3095cdc920a0Smrg micro_mul(&arg[2], &arg[0], &arg[1]); 30964a49301eSmrg 3097af69d88dSmrg for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3098cdc920a0Smrg fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3099cdc920a0Smrg fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3100cdc920a0Smrg micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 31014a49301eSmrg } 31024a49301eSmrg 3103af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3104cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 31057ec681f3Smrg store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3106cdc920a0Smrg } 31074a49301eSmrg } 3108cdc920a0Smrg} 31094a49301eSmrg 3110cdc920a0Smrgstatic void 3111cdc920a0Smrgexec_dp4(struct tgsi_exec_machine *mach, 3112cdc920a0Smrg const struct tgsi_full_instruction *inst) 3113cdc920a0Smrg{ 3114cdc920a0Smrg unsigned int chan; 3115cdc920a0Smrg union tgsi_exec_channel arg[3]; 31164a49301eSmrg 3117af69d88dSmrg fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3118af69d88dSmrg fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3119cdc920a0Smrg micro_mul(&arg[2], &arg[0], &arg[1]); 3120cdc920a0Smrg 3121af69d88dSmrg for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3122cdc920a0Smrg fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3123cdc920a0Smrg fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3124cdc920a0Smrg micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3125cdc920a0Smrg } 3126cdc920a0Smrg 3127af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3128cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 31297ec681f3Smrg store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3130cdc920a0Smrg } 31314a49301eSmrg } 31324a49301eSmrg} 31334a49301eSmrg 3134cdc920a0Smrgstatic void 313501e04c3fSmrgexec_dp2(struct tgsi_exec_machine *mach, 313601e04c3fSmrg const struct tgsi_full_instruction *inst) 3137cdc920a0Smrg{ 3138cdc920a0Smrg unsigned int chan; 3139cdc920a0Smrg union tgsi_exec_channel arg[3]; 31404a49301eSmrg 3141af69d88dSmrg fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3142af69d88dSmrg fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3143cdc920a0Smrg micro_mul(&arg[2], &arg[0], &arg[1]); 31444a49301eSmrg 3145af69d88dSmrg fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3146af69d88dSmrg fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 314701e04c3fSmrg micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3148cdc920a0Smrg 3149af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3150cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 31517ec681f3Smrg store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3152cdc920a0Smrg } 3153cdc920a0Smrg } 3154cdc920a0Smrg} 31554a49301eSmrg 31564a49301eSmrgstatic void 315701e04c3fSmrgexec_pk2h(struct tgsi_exec_machine *mach, 315801e04c3fSmrg const struct tgsi_full_instruction *inst) 31594a49301eSmrg{ 316001e04c3fSmrg unsigned chan; 316101e04c3fSmrg union tgsi_exec_channel arg[2], dst; 31624a49301eSmrg 3163af69d88dSmrg fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 316401e04c3fSmrg fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 316501e04c3fSmrg for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 31667ec681f3Smrg dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) | 31677ec681f3Smrg (_mesa_float_to_half(arg[1].f[chan]) << 16); 316801e04c3fSmrg } 3169af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3170cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 31717ec681f3Smrg store_dest(mach, &dst, &inst->Dst[0], inst, chan); 3172cdc920a0Smrg } 31734a49301eSmrg } 3174cdc920a0Smrg} 31754a49301eSmrg 3176cdc920a0Smrgstatic void 317701e04c3fSmrgexec_up2h(struct tgsi_exec_machine *mach, 317801e04c3fSmrg const struct tgsi_full_instruction *inst) 3179cdc920a0Smrg{ 318001e04c3fSmrg unsigned chan; 318101e04c3fSmrg union tgsi_exec_channel arg, dst[2]; 3182cdc920a0Smrg 318301e04c3fSmrg fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 318401e04c3fSmrg for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 31857ec681f3Smrg dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff); 31867ec681f3Smrg dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16); 318701e04c3fSmrg } 3188af69d88dSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3189cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 31907ec681f3Smrg store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan); 3191cdc920a0Smrg } 3192cdc920a0Smrg } 31934a49301eSmrg} 31944a49301eSmrg 31954a49301eSmrgstatic void 319601e04c3fSmrgmicro_ucmp(union tgsi_exec_channel *dst, 319701e04c3fSmrg const union tgsi_exec_channel *src0, 319801e04c3fSmrg const union tgsi_exec_channel *src1, 319901e04c3fSmrg const union tgsi_exec_channel *src2) 320001e04c3fSmrg{ 320101e04c3fSmrg dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 320201e04c3fSmrg dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 320301e04c3fSmrg dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 320401e04c3fSmrg dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 320501e04c3fSmrg} 320601e04c3fSmrg 320701e04c3fSmrgstatic void 320801e04c3fSmrgexec_ucmp(struct tgsi_exec_machine *mach, 32094a49301eSmrg const struct tgsi_full_instruction *inst) 32104a49301eSmrg{ 3211cdc920a0Smrg unsigned int chan; 321201e04c3fSmrg struct tgsi_exec_vector dst; 3213cdc920a0Smrg 321401e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 321501e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 321601e04c3fSmrg union tgsi_exec_channel src[3]; 3217cdc920a0Smrg 321801e04c3fSmrg fetch_source(mach, &src[0], &inst->Src[0], chan, 321901e04c3fSmrg TGSI_EXEC_DATA_UINT); 322001e04c3fSmrg fetch_source(mach, &src[1], &inst->Src[1], chan, 322101e04c3fSmrg TGSI_EXEC_DATA_FLOAT); 322201e04c3fSmrg fetch_source(mach, &src[2], &inst->Src[2], chan, 322301e04c3fSmrg TGSI_EXEC_DATA_FLOAT); 322401e04c3fSmrg micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 322501e04c3fSmrg } 3226cdc920a0Smrg } 322701e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3228cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 32297ec681f3Smrg store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3230cdc920a0Smrg } 3231cdc920a0Smrg } 32324a49301eSmrg} 32334a49301eSmrg 3234cdc920a0Smrgstatic void 323501e04c3fSmrgexec_dst(struct tgsi_exec_machine *mach, 323601e04c3fSmrg const struct tgsi_full_instruction *inst) 3237cdc920a0Smrg{ 323801e04c3fSmrg union tgsi_exec_channel r[2]; 323901e04c3fSmrg union tgsi_exec_channel d[4]; 3240cdc920a0Smrg 324101e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 324201e04c3fSmrg fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 324301e04c3fSmrg fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 324401e04c3fSmrg micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 324501e04c3fSmrg } 324601e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 324701e04c3fSmrg fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 324801e04c3fSmrg } 324901e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 325001e04c3fSmrg fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3251cdc920a0Smrg } 3252cdc920a0Smrg 325301e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 32547ec681f3Smrg store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 325501e04c3fSmrg } 325601e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 32577ec681f3Smrg store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 325801e04c3fSmrg } 325901e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 32607ec681f3Smrg store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 326101e04c3fSmrg } 3262cdc920a0Smrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 32637ec681f3Smrg store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W); 3264cdc920a0Smrg } 3265cdc920a0Smrg} 32664a49301eSmrg 32673464ebd5Sriastradhstatic void 326801e04c3fSmrgexec_log(struct tgsi_exec_machine *mach, 32693464ebd5Sriastradh const struct tgsi_full_instruction *inst) 32703464ebd5Sriastradh{ 327101e04c3fSmrg union tgsi_exec_channel r[3]; 32723464ebd5Sriastradh 327301e04c3fSmrg fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 327401e04c3fSmrg micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 327501e04c3fSmrg micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 327601e04c3fSmrg micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 327701e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 32787ec681f3Smrg store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 327901e04c3fSmrg } 328001e04c3fSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 328101e04c3fSmrg micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 328201e04c3fSmrg micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 32837ec681f3Smrg store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y); 32843464ebd5Sriastradh } 32853464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 32867ec681f3Smrg store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z); 32873464ebd5Sriastradh } 32883464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 32897ec681f3Smrg store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 32903464ebd5Sriastradh } 32913464ebd5Sriastradh} 32923464ebd5Sriastradh 32933464ebd5Sriastradhstatic void 329401e04c3fSmrgexec_exp(struct tgsi_exec_machine *mach, 32953464ebd5Sriastradh const struct tgsi_full_instruction *inst) 32963464ebd5Sriastradh{ 329701e04c3fSmrg union tgsi_exec_channel r[3]; 329801e04c3fSmrg 329901e04c3fSmrg fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 330001e04c3fSmrg micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 33013464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 330201e04c3fSmrg micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 33037ec681f3Smrg store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X); 33043464ebd5Sriastradh } 33053464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 330601e04c3fSmrg micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 33077ec681f3Smrg store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y); 33083464ebd5Sriastradh } 33093464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 331001e04c3fSmrg micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 33117ec681f3Smrg store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z); 33123464ebd5Sriastradh } 33133464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 33147ec681f3Smrg store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 33153464ebd5Sriastradh } 33163464ebd5Sriastradh} 33173464ebd5Sriastradh 33183464ebd5Sriastradhstatic void 331901e04c3fSmrgexec_lit(struct tgsi_exec_machine *mach, 33203464ebd5Sriastradh const struct tgsi_full_instruction *inst) 33213464ebd5Sriastradh{ 332201e04c3fSmrg union tgsi_exec_channel r[3]; 332301e04c3fSmrg union tgsi_exec_channel d[3]; 33243464ebd5Sriastradh 33253464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3326af69d88dSmrg fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 33273464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3328af69d88dSmrg fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 33293464ebd5Sriastradh micro_max(&r[1], &r[1], &ZeroVec); 33303464ebd5Sriastradh 3331af69d88dSmrg fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 33323464ebd5Sriastradh micro_min(&r[2], &r[2], &P128Vec); 33333464ebd5Sriastradh micro_max(&r[2], &r[2], &M128Vec); 33343464ebd5Sriastradh micro_pow(&r[1], &r[1], &r[2]); 3335af69d88dSmrg micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 33367ec681f3Smrg store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 3337af69d88dSmrg } 3338af69d88dSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3339af69d88dSmrg micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 33407ec681f3Smrg store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 33413464ebd5Sriastradh } 33423464ebd5Sriastradh } 3343af69d88dSmrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 33447ec681f3Smrg store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 3345af69d88dSmrg } 3346af69d88dSmrg 33473464ebd5Sriastradh if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 33487ec681f3Smrg store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 33493464ebd5Sriastradh } 33503464ebd5Sriastradh} 33513464ebd5Sriastradh 33524a49301eSmrgstatic void 3353cdc920a0Smrgexec_break(struct tgsi_exec_machine *mach) 33544a49301eSmrg{ 3355cdc920a0Smrg if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3356cdc920a0Smrg /* turn off loop channels for each enabled exec channel */ 3357cdc920a0Smrg mach->LoopMask &= ~mach->ExecMask; 3358cdc920a0Smrg /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3359cdc920a0Smrg UPDATE_EXEC_MASK(mach); 3360cdc920a0Smrg } else { 3361cdc920a0Smrg assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 33624a49301eSmrg 3363cdc920a0Smrg mach->Switch.mask = 0x0; 33644a49301eSmrg 3365cdc920a0Smrg UPDATE_EXEC_MASK(mach); 33664a49301eSmrg } 33674a49301eSmrg} 33684a49301eSmrg 3369cdc920a0Smrgstatic void 3370cdc920a0Smrgexec_switch(struct tgsi_exec_machine *mach, 3371cdc920a0Smrg const struct tgsi_full_instruction *inst) 3372cdc920a0Smrg{ 3373cdc920a0Smrg assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3374cdc920a0Smrg assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3375cdc920a0Smrg 3376cdc920a0Smrg mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3377af69d88dSmrg fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3378cdc920a0Smrg mach->Switch.mask = 0x0; 3379cdc920a0Smrg mach->Switch.defaultMask = 0x0; 3380cdc920a0Smrg 3381cdc920a0Smrg mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3382cdc920a0Smrg mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3383cdc920a0Smrg 3384cdc920a0Smrg UPDATE_EXEC_MASK(mach); 3385cdc920a0Smrg} 33864a49301eSmrg 33874a49301eSmrgstatic void 3388cdc920a0Smrgexec_case(struct tgsi_exec_machine *mach, 3389cdc920a0Smrg const struct tgsi_full_instruction *inst) 33904a49301eSmrg{ 3391cdc920a0Smrg uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3392cdc920a0Smrg union tgsi_exec_channel src; 3393cdc920a0Smrg uint mask = 0; 33944a49301eSmrg 3395af69d88dSmrg fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 33964a49301eSmrg 3397cdc920a0Smrg if (mach->Switch.selector.u[0] == src.u[0]) { 3398cdc920a0Smrg mask |= 0x1; 3399cdc920a0Smrg } 3400cdc920a0Smrg if (mach->Switch.selector.u[1] == src.u[1]) { 3401cdc920a0Smrg mask |= 0x2; 3402cdc920a0Smrg } 3403cdc920a0Smrg if (mach->Switch.selector.u[2] == src.u[2]) { 3404cdc920a0Smrg mask |= 0x4; 3405cdc920a0Smrg } 3406cdc920a0Smrg if (mach->Switch.selector.u[3] == src.u[3]) { 3407cdc920a0Smrg mask |= 0x8; 3408cdc920a0Smrg } 34094a49301eSmrg 341001e04c3fSmrg mach->Switch.defaultMask |= mask; 341101e04c3fSmrg 341201e04c3fSmrg mach->Switch.mask |= mask & prevMask; 341301e04c3fSmrg 341401e04c3fSmrg UPDATE_EXEC_MASK(mach); 341501e04c3fSmrg} 341601e04c3fSmrg 341701e04c3fSmrg/* FIXME: this will only work if default is last */ 341801e04c3fSmrgstatic void 341901e04c3fSmrgexec_default(struct tgsi_exec_machine *mach) 342001e04c3fSmrg{ 342101e04c3fSmrg uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 342201e04c3fSmrg 342301e04c3fSmrg mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 342401e04c3fSmrg 342501e04c3fSmrg UPDATE_EXEC_MASK(mach); 342601e04c3fSmrg} 342701e04c3fSmrg 342801e04c3fSmrgstatic void 342901e04c3fSmrgexec_endswitch(struct tgsi_exec_machine *mach) 343001e04c3fSmrg{ 343101e04c3fSmrg mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 343201e04c3fSmrg mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 343301e04c3fSmrg 343401e04c3fSmrg UPDATE_EXEC_MASK(mach); 343501e04c3fSmrg} 343601e04c3fSmrg 343701e04c3fSmrgtypedef void (* micro_dop)(union tgsi_double_channel *dst, 343801e04c3fSmrg const union tgsi_double_channel *src); 343901e04c3fSmrg 344001e04c3fSmrgtypedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 344101e04c3fSmrg const union tgsi_double_channel *src0, 344201e04c3fSmrg union tgsi_exec_channel *src1); 344301e04c3fSmrg 344401e04c3fSmrgtypedef void (* micro_dop_s)(union tgsi_double_channel *dst, 344501e04c3fSmrg const union tgsi_exec_channel *src); 344601e04c3fSmrg 344701e04c3fSmrgtypedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 344801e04c3fSmrg const union tgsi_double_channel *src); 344901e04c3fSmrg 345001e04c3fSmrgstatic void 345101e04c3fSmrgfetch_double_channel(struct tgsi_exec_machine *mach, 345201e04c3fSmrg union tgsi_double_channel *chan, 345301e04c3fSmrg const struct tgsi_full_src_register *reg, 345401e04c3fSmrg uint chan_0, 345501e04c3fSmrg uint chan_1) 345601e04c3fSmrg{ 345701e04c3fSmrg union tgsi_exec_channel src[2]; 345801e04c3fSmrg uint i; 345901e04c3fSmrg 346001e04c3fSmrg fetch_source_d(mach, &src[0], reg, chan_0); 346101e04c3fSmrg fetch_source_d(mach, &src[1], reg, chan_1); 346201e04c3fSmrg 346301e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 346401e04c3fSmrg chan->u[i][0] = src[0].u[i]; 346501e04c3fSmrg chan->u[i][1] = src[1].u[i]; 346601e04c3fSmrg } 34677ec681f3Smrg assert(!reg->Register.Absolute); 34687ec681f3Smrg assert(!reg->Register.Negate); 346901e04c3fSmrg} 347001e04c3fSmrg 347101e04c3fSmrgstatic void 347201e04c3fSmrgstore_double_channel(struct tgsi_exec_machine *mach, 347301e04c3fSmrg const union tgsi_double_channel *chan, 347401e04c3fSmrg const struct tgsi_full_dst_register *reg, 347501e04c3fSmrg const struct tgsi_full_instruction *inst, 347601e04c3fSmrg uint chan_0, 347701e04c3fSmrg uint chan_1) 347801e04c3fSmrg{ 347901e04c3fSmrg union tgsi_exec_channel dst[2]; 348001e04c3fSmrg uint i; 348101e04c3fSmrg union tgsi_double_channel temp; 348201e04c3fSmrg const uint execmask = mach->ExecMask; 348301e04c3fSmrg 348401e04c3fSmrg if (!inst->Instruction.Saturate) { 348501e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 348601e04c3fSmrg if (execmask & (1 << i)) { 348701e04c3fSmrg dst[0].u[i] = chan->u[i][0]; 348801e04c3fSmrg dst[1].u[i] = chan->u[i][1]; 348901e04c3fSmrg } 349001e04c3fSmrg } 349101e04c3fSmrg else { 349201e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) 349301e04c3fSmrg if (execmask & (1 << i)) { 34947ec681f3Smrg if (chan->d[i] < 0.0 || isnan(chan->d[i])) 349501e04c3fSmrg temp.d[i] = 0.0; 349601e04c3fSmrg else if (chan->d[i] > 1.0) 349701e04c3fSmrg temp.d[i] = 1.0; 349801e04c3fSmrg else 349901e04c3fSmrg temp.d[i] = chan->d[i]; 350001e04c3fSmrg 350101e04c3fSmrg dst[0].u[i] = temp.u[i][0]; 350201e04c3fSmrg dst[1].u[i] = temp.u[i][1]; 350301e04c3fSmrg } 350401e04c3fSmrg } 350501e04c3fSmrg 35067ec681f3Smrg store_dest_double(mach, &dst[0], reg, chan_0); 350701e04c3fSmrg if (chan_1 != (unsigned)-1) 35087ec681f3Smrg store_dest_double(mach, &dst[1], reg, chan_1); 350901e04c3fSmrg} 351001e04c3fSmrg 351101e04c3fSmrgstatic void 351201e04c3fSmrgexec_double_unary(struct tgsi_exec_machine *mach, 351301e04c3fSmrg const struct tgsi_full_instruction *inst, 351401e04c3fSmrg micro_dop op) 351501e04c3fSmrg{ 351601e04c3fSmrg union tgsi_double_channel src; 351701e04c3fSmrg union tgsi_double_channel dst; 351801e04c3fSmrg 351901e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 352001e04c3fSmrg fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 352101e04c3fSmrg op(&dst, &src); 352201e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 352301e04c3fSmrg } 352401e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 352501e04c3fSmrg fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 352601e04c3fSmrg op(&dst, &src); 352701e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 352801e04c3fSmrg } 352901e04c3fSmrg} 353001e04c3fSmrg 353101e04c3fSmrgstatic void 353201e04c3fSmrgexec_double_binary(struct tgsi_exec_machine *mach, 353301e04c3fSmrg const struct tgsi_full_instruction *inst, 353401e04c3fSmrg micro_dop op, 353501e04c3fSmrg enum tgsi_exec_datatype dst_datatype) 353601e04c3fSmrg{ 353701e04c3fSmrg union tgsi_double_channel src[2]; 353801e04c3fSmrg union tgsi_double_channel dst; 353901e04c3fSmrg int first_dest_chan, second_dest_chan; 354001e04c3fSmrg int wmask; 354101e04c3fSmrg 354201e04c3fSmrg wmask = inst->Dst[0].Register.WriteMask; 354301e04c3fSmrg /* these are & because of the way DSLT etc store their destinations */ 354401e04c3fSmrg if (wmask & TGSI_WRITEMASK_XY) { 354501e04c3fSmrg first_dest_chan = TGSI_CHAN_X; 354601e04c3fSmrg second_dest_chan = TGSI_CHAN_Y; 354701e04c3fSmrg if (dst_datatype == TGSI_EXEC_DATA_UINT) { 354801e04c3fSmrg first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 354901e04c3fSmrg second_dest_chan = -1; 355001e04c3fSmrg } 355101e04c3fSmrg 355201e04c3fSmrg fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 355301e04c3fSmrg fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 355401e04c3fSmrg op(&dst, src); 355501e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 355601e04c3fSmrg } 355701e04c3fSmrg 355801e04c3fSmrg if (wmask & TGSI_WRITEMASK_ZW) { 355901e04c3fSmrg first_dest_chan = TGSI_CHAN_Z; 356001e04c3fSmrg second_dest_chan = TGSI_CHAN_W; 356101e04c3fSmrg if (dst_datatype == TGSI_EXEC_DATA_UINT) { 356201e04c3fSmrg first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 356301e04c3fSmrg second_dest_chan = -1; 356401e04c3fSmrg } 356501e04c3fSmrg 356601e04c3fSmrg fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 356701e04c3fSmrg fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 356801e04c3fSmrg op(&dst, src); 356901e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 357001e04c3fSmrg } 357101e04c3fSmrg} 357201e04c3fSmrg 357301e04c3fSmrgstatic void 357401e04c3fSmrgexec_double_trinary(struct tgsi_exec_machine *mach, 357501e04c3fSmrg const struct tgsi_full_instruction *inst, 357601e04c3fSmrg micro_dop op) 357701e04c3fSmrg{ 357801e04c3fSmrg union tgsi_double_channel src[3]; 357901e04c3fSmrg union tgsi_double_channel dst; 358001e04c3fSmrg 358101e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 358201e04c3fSmrg fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 358301e04c3fSmrg fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 358401e04c3fSmrg fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 358501e04c3fSmrg op(&dst, src); 358601e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 358701e04c3fSmrg } 358801e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 358901e04c3fSmrg fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 359001e04c3fSmrg fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 359101e04c3fSmrg fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 359201e04c3fSmrg op(&dst, src); 359301e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 359401e04c3fSmrg } 359501e04c3fSmrg} 359601e04c3fSmrg 359701e04c3fSmrgstatic void 359801e04c3fSmrgexec_dldexp(struct tgsi_exec_machine *mach, 359901e04c3fSmrg const struct tgsi_full_instruction *inst) 360001e04c3fSmrg{ 360101e04c3fSmrg union tgsi_double_channel src0; 360201e04c3fSmrg union tgsi_exec_channel src1; 360301e04c3fSmrg union tgsi_double_channel dst; 360401e04c3fSmrg int wmask; 360501e04c3fSmrg 360601e04c3fSmrg wmask = inst->Dst[0].Register.WriteMask; 360701e04c3fSmrg if (wmask & TGSI_WRITEMASK_XY) { 360801e04c3fSmrg fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 360901e04c3fSmrg fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 361001e04c3fSmrg micro_dldexp(&dst, &src0, &src1); 361101e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 361201e04c3fSmrg } 361301e04c3fSmrg 361401e04c3fSmrg if (wmask & TGSI_WRITEMASK_ZW) { 361501e04c3fSmrg fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 361601e04c3fSmrg fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 361701e04c3fSmrg micro_dldexp(&dst, &src0, &src1); 361801e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 361901e04c3fSmrg } 362001e04c3fSmrg} 362101e04c3fSmrg 362201e04c3fSmrgstatic void 362301e04c3fSmrgexec_dfracexp(struct tgsi_exec_machine *mach, 362401e04c3fSmrg const struct tgsi_full_instruction *inst) 362501e04c3fSmrg{ 362601e04c3fSmrg union tgsi_double_channel src; 362701e04c3fSmrg union tgsi_double_channel dst; 362801e04c3fSmrg union tgsi_exec_channel dst_exp; 362901e04c3fSmrg 363001e04c3fSmrg fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 363101e04c3fSmrg micro_dfracexp(&dst, &dst_exp, &src); 363201e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 363301e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 363401e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 363501e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 363601e04c3fSmrg for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 363701e04c3fSmrg if (inst->Dst[1].Register.WriteMask & (1 << chan)) 36387ec681f3Smrg store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan); 363901e04c3fSmrg } 364001e04c3fSmrg} 364101e04c3fSmrg 364201e04c3fSmrgstatic void 364301e04c3fSmrgexec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 364401e04c3fSmrg const struct tgsi_full_instruction *inst, 364501e04c3fSmrg micro_dop_sop op) 364601e04c3fSmrg{ 364701e04c3fSmrg union tgsi_double_channel src0; 364801e04c3fSmrg union tgsi_exec_channel src1; 364901e04c3fSmrg union tgsi_double_channel dst; 365001e04c3fSmrg int wmask; 365101e04c3fSmrg 365201e04c3fSmrg wmask = inst->Dst[0].Register.WriteMask; 365301e04c3fSmrg if (wmask & TGSI_WRITEMASK_XY) { 365401e04c3fSmrg fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 365501e04c3fSmrg fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 365601e04c3fSmrg op(&dst, &src0, &src1); 365701e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 365801e04c3fSmrg } 365901e04c3fSmrg 366001e04c3fSmrg if (wmask & TGSI_WRITEMASK_ZW) { 366101e04c3fSmrg fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 366201e04c3fSmrg fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 366301e04c3fSmrg op(&dst, &src0, &src1); 366401e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 366501e04c3fSmrg } 366601e04c3fSmrg} 366701e04c3fSmrg 366801e04c3fSmrgstatic int 366901e04c3fSmrgget_image_coord_dim(unsigned tgsi_tex) 367001e04c3fSmrg{ 367101e04c3fSmrg int dim; 367201e04c3fSmrg switch (tgsi_tex) { 367301e04c3fSmrg case TGSI_TEXTURE_BUFFER: 367401e04c3fSmrg case TGSI_TEXTURE_1D: 367501e04c3fSmrg dim = 1; 367601e04c3fSmrg break; 367701e04c3fSmrg case TGSI_TEXTURE_2D: 367801e04c3fSmrg case TGSI_TEXTURE_RECT: 367901e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 368001e04c3fSmrg case TGSI_TEXTURE_2D_MSAA: 368101e04c3fSmrg dim = 2; 368201e04c3fSmrg break; 368301e04c3fSmrg case TGSI_TEXTURE_3D: 368401e04c3fSmrg case TGSI_TEXTURE_CUBE: 368501e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY: 368601e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY_MSAA: 368701e04c3fSmrg case TGSI_TEXTURE_CUBE_ARRAY: 368801e04c3fSmrg dim = 3; 368901e04c3fSmrg break; 369001e04c3fSmrg default: 369101e04c3fSmrg assert(!"unknown texture target"); 369201e04c3fSmrg dim = 0; 369301e04c3fSmrg break; 369401e04c3fSmrg } 369501e04c3fSmrg 369601e04c3fSmrg return dim; 369701e04c3fSmrg} 369801e04c3fSmrg 369901e04c3fSmrgstatic int 370001e04c3fSmrgget_image_coord_sample(unsigned tgsi_tex) 370101e04c3fSmrg{ 370201e04c3fSmrg int sample = 0; 370301e04c3fSmrg switch (tgsi_tex) { 370401e04c3fSmrg case TGSI_TEXTURE_2D_MSAA: 370501e04c3fSmrg sample = 3; 370601e04c3fSmrg break; 370701e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY_MSAA: 370801e04c3fSmrg sample = 4; 370901e04c3fSmrg break; 371001e04c3fSmrg default: 371101e04c3fSmrg break; 371201e04c3fSmrg } 371301e04c3fSmrg return sample; 371401e04c3fSmrg} 371501e04c3fSmrg 371601e04c3fSmrgstatic void 371701e04c3fSmrgexec_load_img(struct tgsi_exec_machine *mach, 371801e04c3fSmrg const struct tgsi_full_instruction *inst) 371901e04c3fSmrg{ 372001e04c3fSmrg union tgsi_exec_channel r[4], sample_r; 372101e04c3fSmrg uint unit; 372201e04c3fSmrg int sample; 372301e04c3fSmrg int i, j; 372401e04c3fSmrg int dim; 372501e04c3fSmrg uint chan; 372601e04c3fSmrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 372701e04c3fSmrg struct tgsi_image_params params; 372801e04c3fSmrg 372901e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 0); 373001e04c3fSmrg dim = get_image_coord_dim(inst->Memory.Texture); 373101e04c3fSmrg sample = get_image_coord_sample(inst->Memory.Texture); 373201e04c3fSmrg assert(dim <= 3); 373301e04c3fSmrg 37347ec681f3Smrg params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 373501e04c3fSmrg params.unit = unit; 373601e04c3fSmrg params.tgsi_tex_instr = inst->Memory.Texture; 373701e04c3fSmrg params.format = inst->Memory.Format; 373801e04c3fSmrg 373901e04c3fSmrg for (i = 0; i < dim; i++) { 374001e04c3fSmrg IFETCH(&r[i], 1, TGSI_CHAN_X + i); 374101e04c3fSmrg } 374201e04c3fSmrg 374301e04c3fSmrg if (sample) 374401e04c3fSmrg IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 374501e04c3fSmrg 374601e04c3fSmrg mach->Image->load(mach->Image, ¶ms, 374701e04c3fSmrg r[0].i, r[1].i, r[2].i, sample_r.i, 374801e04c3fSmrg rgba); 374901e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 375001e04c3fSmrg r[0].f[j] = rgba[0][j]; 375101e04c3fSmrg r[1].f[j] = rgba[1][j]; 375201e04c3fSmrg r[2].f[j] = rgba[2][j]; 375301e04c3fSmrg r[3].f[j] = rgba[3][j]; 375401e04c3fSmrg } 375501e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 375601e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 37577ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 375801e04c3fSmrg } 375901e04c3fSmrg } 376001e04c3fSmrg} 376101e04c3fSmrg 376201e04c3fSmrgstatic void 37637ec681f3Smrgexec_load_membuf(struct tgsi_exec_machine *mach, 37647ec681f3Smrg const struct tgsi_full_instruction *inst) 376501e04c3fSmrg{ 37667ec681f3Smrg uint32_t unit = fetch_sampler_unit(mach, inst, 0); 376701e04c3fSmrg 37687ec681f3Smrg uint32_t size; 37697ec681f3Smrg const char *ptr; 37707ec681f3Smrg switch (inst->Src[0].Register.File) { 37717ec681f3Smrg case TGSI_FILE_MEMORY: 37727ec681f3Smrg ptr = mach->LocalMem; 37737ec681f3Smrg size = mach->LocalMemSize; 37747ec681f3Smrg break; 377501e04c3fSmrg 37767ec681f3Smrg case TGSI_FILE_BUFFER: 37777ec681f3Smrg ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 37787ec681f3Smrg break; 377901e04c3fSmrg 37807ec681f3Smrg case TGSI_FILE_CONSTANT: 37817ec681f3Smrg if (unit < ARRAY_SIZE(mach->Consts)) { 37827ec681f3Smrg ptr = mach->Consts[unit]; 37837ec681f3Smrg size = mach->ConstsSize[unit]; 37847ec681f3Smrg } else { 37857ec681f3Smrg ptr = NULL; 37867ec681f3Smrg size = 0; 378701e04c3fSmrg } 37887ec681f3Smrg break; 378901e04c3fSmrg 37907ec681f3Smrg default: 37917ec681f3Smrg unreachable("unsupported TGSI_OPCODE_LOAD file"); 37927ec681f3Smrg } 379301e04c3fSmrg 37947ec681f3Smrg union tgsi_exec_channel offset; 37957ec681f3Smrg IFETCH(&offset, 1, TGSI_CHAN_X); 379601e04c3fSmrg 37977ec681f3Smrg assert(inst->Dst[0].Register.WriteMask); 37987ec681f3Smrg uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4; 379901e04c3fSmrg 38007ec681f3Smrg union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS]; 38017ec681f3Smrg memset(&rgba, 0, sizeof(rgba)); 38027ec681f3Smrg for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 38037ec681f3Smrg if (size >= load_size && offset.u[j] <= (size - load_size)) { 38047ec681f3Smrg for (int chan = 0; chan < load_size / 4; chan++) 38057ec681f3Smrg rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4); 380601e04c3fSmrg } 380701e04c3fSmrg } 380801e04c3fSmrg 38097ec681f3Smrg for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 381001e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 38117ec681f3Smrg store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan); 381201e04c3fSmrg } 381301e04c3fSmrg } 381401e04c3fSmrg} 381501e04c3fSmrg 381601e04c3fSmrgstatic void 381701e04c3fSmrgexec_load(struct tgsi_exec_machine *mach, 381801e04c3fSmrg const struct tgsi_full_instruction *inst) 381901e04c3fSmrg{ 382001e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 382101e04c3fSmrg exec_load_img(mach, inst); 38227ec681f3Smrg else 38237ec681f3Smrg exec_load_membuf(mach, inst); 382401e04c3fSmrg} 382501e04c3fSmrg 3826361fc4cbSmayastatic uint 3827361fc4cbSmayafetch_store_img_unit(struct tgsi_exec_machine *mach, 3828361fc4cbSmaya const struct tgsi_full_dst_register *dst) 3829361fc4cbSmaya{ 3830361fc4cbSmaya uint unit = 0; 3831361fc4cbSmaya int i; 3832361fc4cbSmaya if (dst->Register.Indirect) { 3833361fc4cbSmaya union tgsi_exec_channel indir_index, index2; 3834361fc4cbSmaya const uint execmask = mach->ExecMask; 3835361fc4cbSmaya index2.i[0] = 3836361fc4cbSmaya index2.i[1] = 3837361fc4cbSmaya index2.i[2] = 3838361fc4cbSmaya index2.i[3] = dst->Indirect.Index; 3839361fc4cbSmaya 3840361fc4cbSmaya fetch_src_file_channel(mach, 3841361fc4cbSmaya dst->Indirect.File, 3842361fc4cbSmaya dst->Indirect.Swizzle, 3843361fc4cbSmaya &index2, 3844361fc4cbSmaya &ZeroVec, 3845361fc4cbSmaya &indir_index); 3846361fc4cbSmaya for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3847361fc4cbSmaya if (execmask & (1 << i)) { 3848361fc4cbSmaya unit = dst->Register.Index + indir_index.i[i]; 3849361fc4cbSmaya break; 3850361fc4cbSmaya } 3851361fc4cbSmaya } 3852361fc4cbSmaya } else { 3853361fc4cbSmaya unit = dst->Register.Index; 3854361fc4cbSmaya } 3855361fc4cbSmaya return unit; 3856361fc4cbSmaya} 3857361fc4cbSmaya 385801e04c3fSmrgstatic void 385901e04c3fSmrgexec_store_img(struct tgsi_exec_machine *mach, 386001e04c3fSmrg const struct tgsi_full_instruction *inst) 386101e04c3fSmrg{ 386201e04c3fSmrg union tgsi_exec_channel r[3], sample_r; 386301e04c3fSmrg union tgsi_exec_channel value[4]; 386401e04c3fSmrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 386501e04c3fSmrg struct tgsi_image_params params; 386601e04c3fSmrg int dim; 386701e04c3fSmrg int sample; 386801e04c3fSmrg int i, j; 386901e04c3fSmrg uint unit; 3870361fc4cbSmaya unit = fetch_store_img_unit(mach, &inst->Dst[0]); 387101e04c3fSmrg dim = get_image_coord_dim(inst->Memory.Texture); 387201e04c3fSmrg sample = get_image_coord_sample(inst->Memory.Texture); 387301e04c3fSmrg assert(dim <= 3); 387401e04c3fSmrg 38757ec681f3Smrg params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 387601e04c3fSmrg params.unit = unit; 387701e04c3fSmrg params.tgsi_tex_instr = inst->Memory.Texture; 387801e04c3fSmrg params.format = inst->Memory.Format; 387901e04c3fSmrg 388001e04c3fSmrg for (i = 0; i < dim; i++) { 388101e04c3fSmrg IFETCH(&r[i], 0, TGSI_CHAN_X + i); 388201e04c3fSmrg } 388301e04c3fSmrg 388401e04c3fSmrg for (i = 0; i < 4; i++) { 388501e04c3fSmrg FETCH(&value[i], 1, TGSI_CHAN_X + i); 388601e04c3fSmrg } 388701e04c3fSmrg if (sample) 388801e04c3fSmrg IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 388901e04c3fSmrg 389001e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 389101e04c3fSmrg rgba[0][j] = value[0].f[j]; 389201e04c3fSmrg rgba[1][j] = value[1].f[j]; 389301e04c3fSmrg rgba[2][j] = value[2].f[j]; 389401e04c3fSmrg rgba[3][j] = value[3].f[j]; 389501e04c3fSmrg } 389601e04c3fSmrg 389701e04c3fSmrg mach->Image->store(mach->Image, ¶ms, 389801e04c3fSmrg r[0].i, r[1].i, r[2].i, sample_r.i, 389901e04c3fSmrg rgba); 390001e04c3fSmrg} 390101e04c3fSmrg 390201e04c3fSmrgstatic void 390301e04c3fSmrgexec_store_buf(struct tgsi_exec_machine *mach, 390401e04c3fSmrg const struct tgsi_full_instruction *inst) 390501e04c3fSmrg{ 39067ec681f3Smrg uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]); 39077ec681f3Smrg uint32_t size; 39087ec681f3Smrg char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 390901e04c3fSmrg 39107ec681f3Smrg int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 391101e04c3fSmrg 39127ec681f3Smrg union tgsi_exec_channel offset; 39137ec681f3Smrg IFETCH(&offset, 0, TGSI_CHAN_X); 391401e04c3fSmrg 39157ec681f3Smrg union tgsi_exec_channel value[4]; 39167ec681f3Smrg for (int i = 0; i < 4; i++) 391701e04c3fSmrg FETCH(&value[i], 1, TGSI_CHAN_X + i); 391801e04c3fSmrg 39197ec681f3Smrg for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 39207ec681f3Smrg if (!(execmask & (1 << j))) 39217ec681f3Smrg continue; 39227ec681f3Smrg if (size < offset.u[j]) 39237ec681f3Smrg continue; 392401e04c3fSmrg 39257ec681f3Smrg uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]); 39267ec681f3Smrg uint32_t size_avail = size - offset.u[j]; 39277ec681f3Smrg 39287ec681f3Smrg for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) { 39297ec681f3Smrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) 39307ec681f3Smrg memcpy(&invocation_ptr[chan], &value[chan].u[j], 4); 39317ec681f3Smrg } 39327ec681f3Smrg } 393301e04c3fSmrg} 393401e04c3fSmrg 393501e04c3fSmrgstatic void 393601e04c3fSmrgexec_store_mem(struct tgsi_exec_machine *mach, 393701e04c3fSmrg const struct tgsi_full_instruction *inst) 393801e04c3fSmrg{ 393901e04c3fSmrg union tgsi_exec_channel r[3]; 394001e04c3fSmrg union tgsi_exec_channel value[4]; 394101e04c3fSmrg uint i, chan; 394201e04c3fSmrg char *ptr = mach->LocalMem; 39437ec681f3Smrg int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 394401e04c3fSmrg 394501e04c3fSmrg IFETCH(&r[0], 0, TGSI_CHAN_X); 394601e04c3fSmrg 394701e04c3fSmrg for (i = 0; i < 4; i++) { 394801e04c3fSmrg FETCH(&value[i], 1, TGSI_CHAN_X + i); 394901e04c3fSmrg } 395001e04c3fSmrg 395101e04c3fSmrg if (r[0].u[0] >= mach->LocalMemSize) 395201e04c3fSmrg return; 395301e04c3fSmrg ptr += r[0].u[0]; 395401e04c3fSmrg 395501e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 395601e04c3fSmrg if (execmask & (1 << i)) { 395701e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 395801e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 395901e04c3fSmrg memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 396001e04c3fSmrg } 396101e04c3fSmrg } 396201e04c3fSmrg } 396301e04c3fSmrg } 396401e04c3fSmrg} 396501e04c3fSmrg 396601e04c3fSmrgstatic void 396701e04c3fSmrgexec_store(struct tgsi_exec_machine *mach, 396801e04c3fSmrg const struct tgsi_full_instruction *inst) 396901e04c3fSmrg{ 397001e04c3fSmrg if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 397101e04c3fSmrg exec_store_img(mach, inst); 397201e04c3fSmrg else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 397301e04c3fSmrg exec_store_buf(mach, inst); 397401e04c3fSmrg else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 397501e04c3fSmrg exec_store_mem(mach, inst); 397601e04c3fSmrg} 397701e04c3fSmrg 397801e04c3fSmrgstatic void 397901e04c3fSmrgexec_atomop_img(struct tgsi_exec_machine *mach, 398001e04c3fSmrg const struct tgsi_full_instruction *inst) 398101e04c3fSmrg{ 398201e04c3fSmrg union tgsi_exec_channel r[4], sample_r; 398301e04c3fSmrg union tgsi_exec_channel value[4], value2[4]; 398401e04c3fSmrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 398501e04c3fSmrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 398601e04c3fSmrg struct tgsi_image_params params; 398701e04c3fSmrg int dim; 398801e04c3fSmrg int sample; 398901e04c3fSmrg int i, j; 399001e04c3fSmrg uint unit, chan; 399101e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 0); 399201e04c3fSmrg dim = get_image_coord_dim(inst->Memory.Texture); 399301e04c3fSmrg sample = get_image_coord_sample(inst->Memory.Texture); 399401e04c3fSmrg assert(dim <= 3); 399501e04c3fSmrg 39967ec681f3Smrg params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 399701e04c3fSmrg params.unit = unit; 399801e04c3fSmrg params.tgsi_tex_instr = inst->Memory.Texture; 399901e04c3fSmrg params.format = inst->Memory.Format; 400001e04c3fSmrg 400101e04c3fSmrg for (i = 0; i < dim; i++) { 400201e04c3fSmrg IFETCH(&r[i], 1, TGSI_CHAN_X + i); 400301e04c3fSmrg } 400401e04c3fSmrg 400501e04c3fSmrg for (i = 0; i < 4; i++) { 400601e04c3fSmrg FETCH(&value[i], 2, TGSI_CHAN_X + i); 400701e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 400801e04c3fSmrg FETCH(&value2[i], 3, TGSI_CHAN_X + i); 400901e04c3fSmrg } 401001e04c3fSmrg if (sample) 401101e04c3fSmrg IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 401201e04c3fSmrg 401301e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 401401e04c3fSmrg rgba[0][j] = value[0].f[j]; 401501e04c3fSmrg rgba[1][j] = value[1].f[j]; 401601e04c3fSmrg rgba[2][j] = value[2].f[j]; 401701e04c3fSmrg rgba[3][j] = value[3].f[j]; 401801e04c3fSmrg } 401901e04c3fSmrg if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 402001e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 402101e04c3fSmrg rgba2[0][j] = value2[0].f[j]; 402201e04c3fSmrg rgba2[1][j] = value2[1].f[j]; 402301e04c3fSmrg rgba2[2][j] = value2[2].f[j]; 402401e04c3fSmrg rgba2[3][j] = value2[3].f[j]; 402501e04c3fSmrg } 402601e04c3fSmrg } 402701e04c3fSmrg 402801e04c3fSmrg mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 402901e04c3fSmrg r[0].i, r[1].i, r[2].i, sample_r.i, 403001e04c3fSmrg rgba, rgba2); 403101e04c3fSmrg 403201e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 403301e04c3fSmrg r[0].f[j] = rgba[0][j]; 403401e04c3fSmrg r[1].f[j] = rgba[1][j]; 403501e04c3fSmrg r[2].f[j] = rgba[2][j]; 403601e04c3fSmrg r[3].f[j] = rgba[3][j]; 403701e04c3fSmrg } 403801e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 403901e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 40407ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 404101e04c3fSmrg } 404201e04c3fSmrg } 404301e04c3fSmrg} 404401e04c3fSmrg 404501e04c3fSmrgstatic void 40467ec681f3Smrgexec_atomop_membuf(struct tgsi_exec_machine *mach, 40477ec681f3Smrg const struct tgsi_full_instruction *inst) 404801e04c3fSmrg{ 40497ec681f3Smrg union tgsi_exec_channel offset, r0, r1; 40507ec681f3Smrg uint chan, i; 40517ec681f3Smrg int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 40527ec681f3Smrg IFETCH(&offset, 1, TGSI_CHAN_X); 405301e04c3fSmrg 40547ec681f3Smrg if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X)) 40557ec681f3Smrg return; 405601e04c3fSmrg 40577ec681f3Smrg void *ptr[TGSI_QUAD_SIZE]; 40587ec681f3Smrg if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 40597ec681f3Smrg uint32_t unit = fetch_sampler_unit(mach, inst, 0); 40607ec681f3Smrg uint32_t size; 40617ec681f3Smrg char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size); 40627ec681f3Smrg for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 40637ec681f3Smrg if (likely(size >= 4 && offset.u[i] <= size - 4)) 40647ec681f3Smrg ptr[i] = buffer + offset.u[i]; 40657ec681f3Smrg else 40667ec681f3Smrg ptr[i] = NULL; 406701e04c3fSmrg } 40687ec681f3Smrg } else { 40697ec681f3Smrg assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY); 407001e04c3fSmrg 40717ec681f3Smrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 40727ec681f3Smrg if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4)) 40737ec681f3Smrg ptr[i] = (char *)mach->LocalMem + offset.u[i]; 40747ec681f3Smrg else 40757ec681f3Smrg ptr[i] = NULL; 407601e04c3fSmrg } 407701e04c3fSmrg } 407801e04c3fSmrg 40797ec681f3Smrg FETCH(&r0, 2, TGSI_CHAN_X); 40807ec681f3Smrg if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 40817ec681f3Smrg FETCH(&r1, 3, TGSI_CHAN_X); 408201e04c3fSmrg 40837ec681f3Smrg /* The load/op/store sequence has to happen inside the loop since ptr 40847ec681f3Smrg * may have the same ptr in some of the invocations. 40857ec681f3Smrg */ 40867ec681f3Smrg for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 40877ec681f3Smrg if (!(execmask & (1 << i))) 40887ec681f3Smrg continue; 408901e04c3fSmrg 40907ec681f3Smrg uint32_t val = 0; 40917ec681f3Smrg if (ptr[i]) { 40927ec681f3Smrg memcpy(&val, ptr[i], sizeof(val)); 409301e04c3fSmrg 40947ec681f3Smrg uint32_t result; 40957ec681f3Smrg switch (inst->Instruction.Opcode) { 40967ec681f3Smrg case TGSI_OPCODE_ATOMUADD: 40977ec681f3Smrg result = val + r0.u[i]; 40987ec681f3Smrg break; 40997ec681f3Smrg case TGSI_OPCODE_ATOMXOR: 41007ec681f3Smrg result = val ^ r0.u[i]; 41017ec681f3Smrg break; 41027ec681f3Smrg case TGSI_OPCODE_ATOMOR: 41037ec681f3Smrg result = val | r0.u[i]; 41047ec681f3Smrg break; 41057ec681f3Smrg case TGSI_OPCODE_ATOMAND: 41067ec681f3Smrg result = val & r0.u[i]; 41077ec681f3Smrg break; 41087ec681f3Smrg case TGSI_OPCODE_ATOMUMIN: 41097ec681f3Smrg result = MIN2(val, r0.u[i]); 41107ec681f3Smrg break; 41117ec681f3Smrg case TGSI_OPCODE_ATOMUMAX: 41127ec681f3Smrg result = MAX2(val, r0.u[i]); 41137ec681f3Smrg break; 41147ec681f3Smrg case TGSI_OPCODE_ATOMIMIN: 41157ec681f3Smrg result = MIN2((int32_t)val, r0.i[i]); 41167ec681f3Smrg break; 41177ec681f3Smrg case TGSI_OPCODE_ATOMIMAX: 41187ec681f3Smrg result = MAX2((int32_t)val, r0.i[i]); 41197ec681f3Smrg break; 41207ec681f3Smrg case TGSI_OPCODE_ATOMXCHG: 41217ec681f3Smrg result = r0.u[i]; 41227ec681f3Smrg break; 41237ec681f3Smrg case TGSI_OPCODE_ATOMCAS: 41247ec681f3Smrg if (val == r0.u[i]) 41257ec681f3Smrg result = r1.u[i]; 41267ec681f3Smrg else 41277ec681f3Smrg result = val; 41287ec681f3Smrg break; 41297ec681f3Smrg case TGSI_OPCODE_ATOMFADD: 41307ec681f3Smrg result = fui(uif(val) + r0.f[i]); 41317ec681f3Smrg break; 41327ec681f3Smrg default: 41337ec681f3Smrg unreachable("bad atomic op"); 41347ec681f3Smrg } 41357ec681f3Smrg memcpy(ptr[i], &result, sizeof(result)); 413601e04c3fSmrg } 41377ec681f3Smrg 41387ec681f3Smrg r0.u[i] = val; 413901e04c3fSmrg } 41407ec681f3Smrg 41417ec681f3Smrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) 41427ec681f3Smrg store_dest(mach, &r0, &inst->Dst[0], inst, chan); 414301e04c3fSmrg} 414401e04c3fSmrg 414501e04c3fSmrgstatic void 414601e04c3fSmrgexec_atomop(struct tgsi_exec_machine *mach, 414701e04c3fSmrg const struct tgsi_full_instruction *inst) 414801e04c3fSmrg{ 414901e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 415001e04c3fSmrg exec_atomop_img(mach, inst); 41517ec681f3Smrg else 41527ec681f3Smrg exec_atomop_membuf(mach, inst); 415301e04c3fSmrg} 415401e04c3fSmrg 415501e04c3fSmrgstatic void 415601e04c3fSmrgexec_resq_img(struct tgsi_exec_machine *mach, 415701e04c3fSmrg const struct tgsi_full_instruction *inst) 415801e04c3fSmrg{ 415901e04c3fSmrg int result[4]; 416001e04c3fSmrg union tgsi_exec_channel r[4]; 416101e04c3fSmrg uint unit; 416201e04c3fSmrg int i, chan, j; 416301e04c3fSmrg struct tgsi_image_params params; 416401e04c3fSmrg 416501e04c3fSmrg unit = fetch_sampler_unit(mach, inst, 0); 416601e04c3fSmrg 41677ec681f3Smrg params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 416801e04c3fSmrg params.unit = unit; 416901e04c3fSmrg params.tgsi_tex_instr = inst->Memory.Texture; 417001e04c3fSmrg params.format = inst->Memory.Format; 417101e04c3fSmrg 417201e04c3fSmrg mach->Image->get_dims(mach->Image, ¶ms, result); 417301e04c3fSmrg 417401e04c3fSmrg for (i = 0; i < TGSI_QUAD_SIZE; i++) { 417501e04c3fSmrg for (j = 0; j < 4; j++) { 417601e04c3fSmrg r[j].i[i] = result[j]; 417701e04c3fSmrg } 417801e04c3fSmrg } 417901e04c3fSmrg 418001e04c3fSmrg for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 418101e04c3fSmrg if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 41827ec681f3Smrg store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 418301e04c3fSmrg } 418401e04c3fSmrg } 418501e04c3fSmrg} 418601e04c3fSmrg 418701e04c3fSmrgstatic void 418801e04c3fSmrgexec_resq_buf(struct tgsi_exec_machine *mach, 418901e04c3fSmrg const struct tgsi_full_instruction *inst) 419001e04c3fSmrg{ 41917ec681f3Smrg uint32_t unit = fetch_sampler_unit(mach, inst, 0); 41927ec681f3Smrg uint32_t size; 41937ec681f3Smrg (void)mach->Buffer->lookup(mach->Buffer, unit, &size); 419401e04c3fSmrg 41957ec681f3Smrg union tgsi_exec_channel r; 41967ec681f3Smrg for (int i = 0; i < TGSI_QUAD_SIZE; i++) 41977ec681f3Smrg r.i[i] = size; 419801e04c3fSmrg 41997ec681f3Smrg if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 42007ec681f3Smrg for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 42017ec681f3Smrg store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X); 420201e04c3fSmrg } 420301e04c3fSmrg } 420401e04c3fSmrg} 420501e04c3fSmrg 420601e04c3fSmrgstatic void 420701e04c3fSmrgexec_resq(struct tgsi_exec_machine *mach, 420801e04c3fSmrg const struct tgsi_full_instruction *inst) 420901e04c3fSmrg{ 421001e04c3fSmrg if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 421101e04c3fSmrg exec_resq_img(mach, inst); 421201e04c3fSmrg else 421301e04c3fSmrg exec_resq_buf(mach, inst); 421401e04c3fSmrg} 421501e04c3fSmrg 421601e04c3fSmrgstatic void 421701e04c3fSmrgmicro_f2u64(union tgsi_double_channel *dst, 421801e04c3fSmrg const union tgsi_exec_channel *src) 421901e04c3fSmrg{ 422001e04c3fSmrg dst->u64[0] = (uint64_t)src->f[0]; 422101e04c3fSmrg dst->u64[1] = (uint64_t)src->f[1]; 422201e04c3fSmrg dst->u64[2] = (uint64_t)src->f[2]; 422301e04c3fSmrg dst->u64[3] = (uint64_t)src->f[3]; 422401e04c3fSmrg} 422501e04c3fSmrg 422601e04c3fSmrgstatic void 422701e04c3fSmrgmicro_f2i64(union tgsi_double_channel *dst, 422801e04c3fSmrg const union tgsi_exec_channel *src) 422901e04c3fSmrg{ 423001e04c3fSmrg dst->i64[0] = (int64_t)src->f[0]; 423101e04c3fSmrg dst->i64[1] = (int64_t)src->f[1]; 423201e04c3fSmrg dst->i64[2] = (int64_t)src->f[2]; 423301e04c3fSmrg dst->i64[3] = (int64_t)src->f[3]; 423401e04c3fSmrg} 423501e04c3fSmrg 423601e04c3fSmrgstatic void 423701e04c3fSmrgmicro_u2i64(union tgsi_double_channel *dst, 423801e04c3fSmrg const union tgsi_exec_channel *src) 423901e04c3fSmrg{ 424001e04c3fSmrg dst->u64[0] = (uint64_t)src->u[0]; 424101e04c3fSmrg dst->u64[1] = (uint64_t)src->u[1]; 424201e04c3fSmrg dst->u64[2] = (uint64_t)src->u[2]; 424301e04c3fSmrg dst->u64[3] = (uint64_t)src->u[3]; 424401e04c3fSmrg} 424501e04c3fSmrg 424601e04c3fSmrgstatic void 424701e04c3fSmrgmicro_i2i64(union tgsi_double_channel *dst, 424801e04c3fSmrg const union tgsi_exec_channel *src) 424901e04c3fSmrg{ 425001e04c3fSmrg dst->i64[0] = (int64_t)src->i[0]; 425101e04c3fSmrg dst->i64[1] = (int64_t)src->i[1]; 425201e04c3fSmrg dst->i64[2] = (int64_t)src->i[2]; 425301e04c3fSmrg dst->i64[3] = (int64_t)src->i[3]; 425401e04c3fSmrg} 425501e04c3fSmrg 425601e04c3fSmrgstatic void 425701e04c3fSmrgmicro_d2u64(union tgsi_double_channel *dst, 425801e04c3fSmrg const union tgsi_double_channel *src) 425901e04c3fSmrg{ 426001e04c3fSmrg dst->u64[0] = (uint64_t)src->d[0]; 426101e04c3fSmrg dst->u64[1] = (uint64_t)src->d[1]; 426201e04c3fSmrg dst->u64[2] = (uint64_t)src->d[2]; 426301e04c3fSmrg dst->u64[3] = (uint64_t)src->d[3]; 426401e04c3fSmrg} 42654a49301eSmrg 426601e04c3fSmrgstatic void 426701e04c3fSmrgmicro_d2i64(union tgsi_double_channel *dst, 426801e04c3fSmrg const union tgsi_double_channel *src) 426901e04c3fSmrg{ 427001e04c3fSmrg dst->i64[0] = (int64_t)src->d[0]; 427101e04c3fSmrg dst->i64[1] = (int64_t)src->d[1]; 427201e04c3fSmrg dst->i64[2] = (int64_t)src->d[2]; 427301e04c3fSmrg dst->i64[3] = (int64_t)src->d[3]; 427401e04c3fSmrg} 42754a49301eSmrg 427601e04c3fSmrgstatic void 427701e04c3fSmrgmicro_u642d(union tgsi_double_channel *dst, 427801e04c3fSmrg const union tgsi_double_channel *src) 427901e04c3fSmrg{ 428001e04c3fSmrg dst->d[0] = (double)src->u64[0]; 428101e04c3fSmrg dst->d[1] = (double)src->u64[1]; 428201e04c3fSmrg dst->d[2] = (double)src->u64[2]; 428301e04c3fSmrg dst->d[3] = (double)src->u64[3]; 4284cdc920a0Smrg} 42854a49301eSmrg 4286cdc920a0Smrgstatic void 428701e04c3fSmrgmicro_i642d(union tgsi_double_channel *dst, 428801e04c3fSmrg const union tgsi_double_channel *src) 4289cdc920a0Smrg{ 429001e04c3fSmrg dst->d[0] = (double)src->i64[0]; 429101e04c3fSmrg dst->d[1] = (double)src->i64[1]; 429201e04c3fSmrg dst->d[2] = (double)src->i64[2]; 429301e04c3fSmrg dst->d[3] = (double)src->i64[3]; 429401e04c3fSmrg} 42954a49301eSmrg 429601e04c3fSmrgstatic void 429701e04c3fSmrgmicro_u642f(union tgsi_exec_channel *dst, 429801e04c3fSmrg const union tgsi_double_channel *src) 429901e04c3fSmrg{ 430001e04c3fSmrg dst->f[0] = (float)src->u64[0]; 430101e04c3fSmrg dst->f[1] = (float)src->u64[1]; 430201e04c3fSmrg dst->f[2] = (float)src->u64[2]; 430301e04c3fSmrg dst->f[3] = (float)src->u64[3]; 430401e04c3fSmrg} 43054a49301eSmrg 430601e04c3fSmrgstatic void 430701e04c3fSmrgmicro_i642f(union tgsi_exec_channel *dst, 430801e04c3fSmrg const union tgsi_double_channel *src) 430901e04c3fSmrg{ 431001e04c3fSmrg dst->f[0] = (float)src->i64[0]; 431101e04c3fSmrg dst->f[1] = (float)src->i64[1]; 431201e04c3fSmrg dst->f[2] = (float)src->i64[2]; 431301e04c3fSmrg dst->f[3] = (float)src->i64[3]; 4314cdc920a0Smrg} 43154a49301eSmrg 4316cdc920a0Smrgstatic void 431701e04c3fSmrgexec_t_2_64(struct tgsi_exec_machine *mach, 431801e04c3fSmrg const struct tgsi_full_instruction *inst, 431901e04c3fSmrg micro_dop_s op, 432001e04c3fSmrg enum tgsi_exec_datatype src_datatype) 4321cdc920a0Smrg{ 432201e04c3fSmrg union tgsi_exec_channel src; 432301e04c3fSmrg union tgsi_double_channel dst; 43244a49301eSmrg 432501e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 432601e04c3fSmrg fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 432701e04c3fSmrg op(&dst, &src); 432801e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 432901e04c3fSmrg } 433001e04c3fSmrg if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 433101e04c3fSmrg fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 433201e04c3fSmrg op(&dst, &src); 433301e04c3fSmrg store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 433401e04c3fSmrg } 433501e04c3fSmrg} 433601e04c3fSmrg 433701e04c3fSmrgstatic void 433801e04c3fSmrgexec_64_2_t(struct tgsi_exec_machine *mach, 433901e04c3fSmrg const struct tgsi_full_instruction *inst, 43407ec681f3Smrg micro_sop_d op) 434101e04c3fSmrg{ 434201e04c3fSmrg union tgsi_double_channel src; 434301e04c3fSmrg union tgsi_exec_channel dst; 434401e04c3fSmrg int wm = inst->Dst[0].Register.WriteMask; 434501e04c3fSmrg int i; 434601e04c3fSmrg int bit; 434701e04c3fSmrg for (i = 0; i < 2; i++) { 434801e04c3fSmrg bit = ffs(wm); 434901e04c3fSmrg if (bit) { 435001e04c3fSmrg wm &= ~(1 << (bit - 1)); 435101e04c3fSmrg if (i == 0) 435201e04c3fSmrg fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 435301e04c3fSmrg else 435401e04c3fSmrg fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 435501e04c3fSmrg op(&dst, &src); 43567ec681f3Smrg store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1); 435701e04c3fSmrg } 435801e04c3fSmrg } 4359cdc920a0Smrg} 43604a49301eSmrg 4361cdc920a0Smrgstatic void 4362cdc920a0Smrgmicro_i2f(union tgsi_exec_channel *dst, 4363cdc920a0Smrg const union tgsi_exec_channel *src) 4364cdc920a0Smrg{ 4365cdc920a0Smrg dst->f[0] = (float)src->i[0]; 4366cdc920a0Smrg dst->f[1] = (float)src->i[1]; 4367cdc920a0Smrg dst->f[2] = (float)src->i[2]; 4368cdc920a0Smrg dst->f[3] = (float)src->i[3]; 4369cdc920a0Smrg} 43704a49301eSmrg 4371cdc920a0Smrgstatic void 4372cdc920a0Smrgmicro_not(union tgsi_exec_channel *dst, 4373cdc920a0Smrg const union tgsi_exec_channel *src) 4374cdc920a0Smrg{ 4375cdc920a0Smrg dst->u[0] = ~src->u[0]; 4376cdc920a0Smrg dst->u[1] = ~src->u[1]; 4377cdc920a0Smrg dst->u[2] = ~src->u[2]; 4378cdc920a0Smrg dst->u[3] = ~src->u[3]; 4379cdc920a0Smrg} 43804a49301eSmrg 4381cdc920a0Smrgstatic void 4382cdc920a0Smrgmicro_shl(union tgsi_exec_channel *dst, 4383cdc920a0Smrg const union tgsi_exec_channel *src0, 4384cdc920a0Smrg const union tgsi_exec_channel *src1) 4385cdc920a0Smrg{ 4386af69d88dSmrg unsigned masked_count; 4387af69d88dSmrg masked_count = src1->u[0] & 0x1f; 4388af69d88dSmrg dst->u[0] = src0->u[0] << masked_count; 4389af69d88dSmrg masked_count = src1->u[1] & 0x1f; 4390af69d88dSmrg dst->u[1] = src0->u[1] << masked_count; 4391af69d88dSmrg masked_count = src1->u[2] & 0x1f; 4392af69d88dSmrg dst->u[2] = src0->u[2] << masked_count; 4393af69d88dSmrg masked_count = src1->u[3] & 0x1f; 4394af69d88dSmrg dst->u[3] = src0->u[3] << masked_count; 4395cdc920a0Smrg} 43964a49301eSmrg 4397cdc920a0Smrgstatic void 4398cdc920a0Smrgmicro_and(union tgsi_exec_channel *dst, 4399cdc920a0Smrg const union tgsi_exec_channel *src0, 4400cdc920a0Smrg const union tgsi_exec_channel *src1) 4401cdc920a0Smrg{ 4402cdc920a0Smrg dst->u[0] = src0->u[0] & src1->u[0]; 4403cdc920a0Smrg dst->u[1] = src0->u[1] & src1->u[1]; 4404cdc920a0Smrg dst->u[2] = src0->u[2] & src1->u[2]; 4405cdc920a0Smrg dst->u[3] = src0->u[3] & src1->u[3]; 4406cdc920a0Smrg} 44074a49301eSmrg 4408cdc920a0Smrgstatic void 4409cdc920a0Smrgmicro_or(union tgsi_exec_channel *dst, 4410cdc920a0Smrg const union tgsi_exec_channel *src0, 4411cdc920a0Smrg const union tgsi_exec_channel *src1) 4412cdc920a0Smrg{ 4413cdc920a0Smrg dst->u[0] = src0->u[0] | src1->u[0]; 4414cdc920a0Smrg dst->u[1] = src0->u[1] | src1->u[1]; 4415cdc920a0Smrg dst->u[2] = src0->u[2] | src1->u[2]; 4416cdc920a0Smrg dst->u[3] = src0->u[3] | src1->u[3]; 4417cdc920a0Smrg} 44184a49301eSmrg 4419cdc920a0Smrgstatic void 4420cdc920a0Smrgmicro_xor(union tgsi_exec_channel *dst, 4421cdc920a0Smrg const union tgsi_exec_channel *src0, 4422cdc920a0Smrg const union tgsi_exec_channel *src1) 4423cdc920a0Smrg{ 4424cdc920a0Smrg dst->u[0] = src0->u[0] ^ src1->u[0]; 4425cdc920a0Smrg dst->u[1] = src0->u[1] ^ src1->u[1]; 4426cdc920a0Smrg dst->u[2] = src0->u[2] ^ src1->u[2]; 4427cdc920a0Smrg dst->u[3] = src0->u[3] ^ src1->u[3]; 4428cdc920a0Smrg} 44294a49301eSmrg 4430af69d88dSmrgstatic void 4431af69d88dSmrgmicro_mod(union tgsi_exec_channel *dst, 4432af69d88dSmrg const union tgsi_exec_channel *src0, 4433af69d88dSmrg const union tgsi_exec_channel *src1) 4434af69d88dSmrg{ 443501e04c3fSmrg dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 443601e04c3fSmrg dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 443701e04c3fSmrg dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 443801e04c3fSmrg dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4439af69d88dSmrg} 4440af69d88dSmrg 4441cdc920a0Smrgstatic void 4442cdc920a0Smrgmicro_f2i(union tgsi_exec_channel *dst, 4443cdc920a0Smrg const union tgsi_exec_channel *src) 4444cdc920a0Smrg{ 4445cdc920a0Smrg dst->i[0] = (int)src->f[0]; 4446cdc920a0Smrg dst->i[1] = (int)src->f[1]; 4447cdc920a0Smrg dst->i[2] = (int)src->f[2]; 4448cdc920a0Smrg dst->i[3] = (int)src->f[3]; 4449cdc920a0Smrg} 44504a49301eSmrg 4451af69d88dSmrgstatic void 4452af69d88dSmrgmicro_fseq(union tgsi_exec_channel *dst, 4453af69d88dSmrg const union tgsi_exec_channel *src0, 4454af69d88dSmrg const union tgsi_exec_channel *src1) 4455af69d88dSmrg{ 4456af69d88dSmrg dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4457af69d88dSmrg dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4458af69d88dSmrg dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4459af69d88dSmrg dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4460af69d88dSmrg} 4461af69d88dSmrg 4462af69d88dSmrgstatic void 4463af69d88dSmrgmicro_fsge(union tgsi_exec_channel *dst, 4464af69d88dSmrg const union tgsi_exec_channel *src0, 4465af69d88dSmrg const union tgsi_exec_channel *src1) 4466af69d88dSmrg{ 4467af69d88dSmrg dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4468af69d88dSmrg dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4469af69d88dSmrg dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4470af69d88dSmrg dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4471af69d88dSmrg} 4472af69d88dSmrg 4473af69d88dSmrgstatic void 4474af69d88dSmrgmicro_fslt(union tgsi_exec_channel *dst, 4475af69d88dSmrg const union tgsi_exec_channel *src0, 4476af69d88dSmrg const union tgsi_exec_channel *src1) 4477af69d88dSmrg{ 4478af69d88dSmrg dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4479af69d88dSmrg dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4480af69d88dSmrg dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4481af69d88dSmrg dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4482af69d88dSmrg} 4483af69d88dSmrg 4484af69d88dSmrgstatic void 4485af69d88dSmrgmicro_fsne(union tgsi_exec_channel *dst, 4486af69d88dSmrg const union tgsi_exec_channel *src0, 4487af69d88dSmrg const union tgsi_exec_channel *src1) 4488af69d88dSmrg{ 4489af69d88dSmrg dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4490af69d88dSmrg dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4491af69d88dSmrg dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4492af69d88dSmrg dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4493af69d88dSmrg} 4494af69d88dSmrg 4495cdc920a0Smrgstatic void 4496cdc920a0Smrgmicro_idiv(union tgsi_exec_channel *dst, 4497cdc920a0Smrg const union tgsi_exec_channel *src0, 4498cdc920a0Smrg const union tgsi_exec_channel *src1) 4499cdc920a0Smrg{ 4500af69d88dSmrg dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4501af69d88dSmrg dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4502af69d88dSmrg dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4503af69d88dSmrg dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 45044a49301eSmrg} 45054a49301eSmrg 4506cdc920a0Smrgstatic void 4507cdc920a0Smrgmicro_imax(union tgsi_exec_channel *dst, 4508cdc920a0Smrg const union tgsi_exec_channel *src0, 4509cdc920a0Smrg const union tgsi_exec_channel *src1) 4510cdc920a0Smrg{ 4511cdc920a0Smrg dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4512cdc920a0Smrg dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4513cdc920a0Smrg dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4514cdc920a0Smrg dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4515cdc920a0Smrg} 45164a49301eSmrg 45174a49301eSmrgstatic void 4518cdc920a0Smrgmicro_imin(union tgsi_exec_channel *dst, 4519cdc920a0Smrg const union tgsi_exec_channel *src0, 4520cdc920a0Smrg const union tgsi_exec_channel *src1) 45214a49301eSmrg{ 4522cdc920a0Smrg dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4523cdc920a0Smrg dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4524cdc920a0Smrg dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4525cdc920a0Smrg dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4526cdc920a0Smrg} 45274a49301eSmrg 4528cdc920a0Smrgstatic void 4529cdc920a0Smrgmicro_isge(union tgsi_exec_channel *dst, 4530cdc920a0Smrg const union tgsi_exec_channel *src0, 4531cdc920a0Smrg const union tgsi_exec_channel *src1) 4532cdc920a0Smrg{ 4533cdc920a0Smrg dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4534cdc920a0Smrg dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4535cdc920a0Smrg dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4536cdc920a0Smrg dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 45374a49301eSmrg} 45384a49301eSmrg 45394a49301eSmrgstatic void 4540cdc920a0Smrgmicro_ishr(union tgsi_exec_channel *dst, 4541cdc920a0Smrg const union tgsi_exec_channel *src0, 4542cdc920a0Smrg const union tgsi_exec_channel *src1) 45434a49301eSmrg{ 4544af69d88dSmrg unsigned masked_count; 4545af69d88dSmrg masked_count = src1->i[0] & 0x1f; 4546af69d88dSmrg dst->i[0] = src0->i[0] >> masked_count; 4547af69d88dSmrg masked_count = src1->i[1] & 0x1f; 4548af69d88dSmrg dst->i[1] = src0->i[1] >> masked_count; 4549af69d88dSmrg masked_count = src1->i[2] & 0x1f; 4550af69d88dSmrg dst->i[2] = src0->i[2] >> masked_count; 4551af69d88dSmrg masked_count = src1->i[3] & 0x1f; 4552af69d88dSmrg dst->i[3] = src0->i[3] >> masked_count; 4553cdc920a0Smrg} 4554cdc920a0Smrg 4555cdc920a0Smrgstatic void 4556cdc920a0Smrgmicro_islt(union tgsi_exec_channel *dst, 4557cdc920a0Smrg const union tgsi_exec_channel *src0, 4558cdc920a0Smrg const union tgsi_exec_channel *src1) 4559cdc920a0Smrg{ 4560cdc920a0Smrg dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4561cdc920a0Smrg dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4562cdc920a0Smrg dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4563cdc920a0Smrg dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4564cdc920a0Smrg} 4565cdc920a0Smrg 4566cdc920a0Smrgstatic void 4567cdc920a0Smrgmicro_f2u(union tgsi_exec_channel *dst, 4568cdc920a0Smrg const union tgsi_exec_channel *src) 4569cdc920a0Smrg{ 4570cdc920a0Smrg dst->u[0] = (uint)src->f[0]; 4571cdc920a0Smrg dst->u[1] = (uint)src->f[1]; 4572cdc920a0Smrg dst->u[2] = (uint)src->f[2]; 4573cdc920a0Smrg dst->u[3] = (uint)src->f[3]; 4574cdc920a0Smrg} 4575cdc920a0Smrg 4576cdc920a0Smrgstatic void 4577cdc920a0Smrgmicro_u2f(union tgsi_exec_channel *dst, 4578cdc920a0Smrg const union tgsi_exec_channel *src) 4579cdc920a0Smrg{ 4580cdc920a0Smrg dst->f[0] = (float)src->u[0]; 4581cdc920a0Smrg dst->f[1] = (float)src->u[1]; 4582cdc920a0Smrg dst->f[2] = (float)src->u[2]; 4583cdc920a0Smrg dst->f[3] = (float)src->u[3]; 45844a49301eSmrg} 45854a49301eSmrg 45864a49301eSmrgstatic void 4587cdc920a0Smrgmicro_uadd(union tgsi_exec_channel *dst, 4588cdc920a0Smrg const union tgsi_exec_channel *src0, 4589cdc920a0Smrg const union tgsi_exec_channel *src1) 45904a49301eSmrg{ 4591cdc920a0Smrg dst->u[0] = src0->u[0] + src1->u[0]; 4592cdc920a0Smrg dst->u[1] = src0->u[1] + src1->u[1]; 4593cdc920a0Smrg dst->u[2] = src0->u[2] + src1->u[2]; 4594cdc920a0Smrg dst->u[3] = src0->u[3] + src1->u[3]; 45954a49301eSmrg} 45964a49301eSmrg 4597cdc920a0Smrgstatic void 4598cdc920a0Smrgmicro_udiv(union tgsi_exec_channel *dst, 4599cdc920a0Smrg const union tgsi_exec_channel *src0, 4600cdc920a0Smrg const union tgsi_exec_channel *src1) 4601cdc920a0Smrg{ 4602af69d88dSmrg dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4603af69d88dSmrg dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4604af69d88dSmrg dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4605af69d88dSmrg dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4606cdc920a0Smrg} 46074a49301eSmrg 4608cdc920a0Smrgstatic void 4609cdc920a0Smrgmicro_umad(union tgsi_exec_channel *dst, 4610cdc920a0Smrg const union tgsi_exec_channel *src0, 4611cdc920a0Smrg const union tgsi_exec_channel *src1, 4612cdc920a0Smrg const union tgsi_exec_channel *src2) 4613cdc920a0Smrg{ 4614cdc920a0Smrg dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4615cdc920a0Smrg dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4616cdc920a0Smrg dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4617cdc920a0Smrg dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4618cdc920a0Smrg} 46194a49301eSmrg 46204a49301eSmrgstatic void 4621cdc920a0Smrgmicro_umax(union tgsi_exec_channel *dst, 4622cdc920a0Smrg const union tgsi_exec_channel *src0, 4623cdc920a0Smrg const union tgsi_exec_channel *src1) 46244a49301eSmrg{ 4625cdc920a0Smrg dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4626cdc920a0Smrg dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4627cdc920a0Smrg dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4628cdc920a0Smrg dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4629cdc920a0Smrg} 46304a49301eSmrg 4631cdc920a0Smrgstatic void 4632cdc920a0Smrgmicro_umin(union tgsi_exec_channel *dst, 4633cdc920a0Smrg const union tgsi_exec_channel *src0, 4634cdc920a0Smrg const union tgsi_exec_channel *src1) 4635cdc920a0Smrg{ 4636cdc920a0Smrg dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4637cdc920a0Smrg dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4638cdc920a0Smrg dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4639cdc920a0Smrg dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4640cdc920a0Smrg} 46414a49301eSmrg 4642cdc920a0Smrgstatic void 4643cdc920a0Smrgmicro_umod(union tgsi_exec_channel *dst, 4644cdc920a0Smrg const union tgsi_exec_channel *src0, 4645cdc920a0Smrg const union tgsi_exec_channel *src1) 4646cdc920a0Smrg{ 4647af69d88dSmrg dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4648af69d88dSmrg dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4649af69d88dSmrg dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4650af69d88dSmrg dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4651cdc920a0Smrg} 46524a49301eSmrg 4653cdc920a0Smrgstatic void 4654cdc920a0Smrgmicro_umul(union tgsi_exec_channel *dst, 4655cdc920a0Smrg const union tgsi_exec_channel *src0, 4656cdc920a0Smrg const union tgsi_exec_channel *src1) 4657cdc920a0Smrg{ 4658cdc920a0Smrg dst->u[0] = src0->u[0] * src1->u[0]; 4659cdc920a0Smrg dst->u[1] = src0->u[1] * src1->u[1]; 4660cdc920a0Smrg dst->u[2] = src0->u[2] * src1->u[2]; 4661cdc920a0Smrg dst->u[3] = src0->u[3] * src1->u[3]; 4662cdc920a0Smrg} 46634a49301eSmrg 4664af69d88dSmrgstatic void 4665af69d88dSmrgmicro_imul_hi(union tgsi_exec_channel *dst, 4666af69d88dSmrg const union tgsi_exec_channel *src0, 4667af69d88dSmrg const union tgsi_exec_channel *src1) 4668af69d88dSmrg{ 4669af69d88dSmrg#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4670af69d88dSmrg dst->i[0] = I64M(src0->i[0], src1->i[0]); 4671af69d88dSmrg dst->i[1] = I64M(src0->i[1], src1->i[1]); 4672af69d88dSmrg dst->i[2] = I64M(src0->i[2], src1->i[2]); 4673af69d88dSmrg dst->i[3] = I64M(src0->i[3], src1->i[3]); 4674af69d88dSmrg#undef I64M 4675af69d88dSmrg} 4676af69d88dSmrg 4677af69d88dSmrgstatic void 4678af69d88dSmrgmicro_umul_hi(union tgsi_exec_channel *dst, 4679af69d88dSmrg const union tgsi_exec_channel *src0, 4680af69d88dSmrg const union tgsi_exec_channel *src1) 4681af69d88dSmrg{ 4682af69d88dSmrg#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4683af69d88dSmrg dst->u[0] = U64M(src0->u[0], src1->u[0]); 4684af69d88dSmrg dst->u[1] = U64M(src0->u[1], src1->u[1]); 4685af69d88dSmrg dst->u[2] = U64M(src0->u[2], src1->u[2]); 4686af69d88dSmrg dst->u[3] = U64M(src0->u[3], src1->u[3]); 4687af69d88dSmrg#undef U64M 4688af69d88dSmrg} 4689af69d88dSmrg 4690cdc920a0Smrgstatic void 4691cdc920a0Smrgmicro_useq(union tgsi_exec_channel *dst, 4692cdc920a0Smrg const union tgsi_exec_channel *src0, 4693cdc920a0Smrg const union tgsi_exec_channel *src1) 4694cdc920a0Smrg{ 4695cdc920a0Smrg dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4696cdc920a0Smrg dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4697cdc920a0Smrg dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4698cdc920a0Smrg dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4699cdc920a0Smrg} 47004a49301eSmrg 4701cdc920a0Smrgstatic void 4702cdc920a0Smrgmicro_usge(union tgsi_exec_channel *dst, 4703cdc920a0Smrg const union tgsi_exec_channel *src0, 4704cdc920a0Smrg const union tgsi_exec_channel *src1) 4705cdc920a0Smrg{ 4706cdc920a0Smrg dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4707cdc920a0Smrg dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4708cdc920a0Smrg dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4709cdc920a0Smrg dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4710cdc920a0Smrg} 47114a49301eSmrg 4712cdc920a0Smrgstatic void 4713cdc920a0Smrgmicro_ushr(union tgsi_exec_channel *dst, 4714cdc920a0Smrg const union tgsi_exec_channel *src0, 4715cdc920a0Smrg const union tgsi_exec_channel *src1) 4716cdc920a0Smrg{ 4717af69d88dSmrg unsigned masked_count; 4718af69d88dSmrg masked_count = src1->u[0] & 0x1f; 4719af69d88dSmrg dst->u[0] = src0->u[0] >> masked_count; 4720af69d88dSmrg masked_count = src1->u[1] & 0x1f; 4721af69d88dSmrg dst->u[1] = src0->u[1] >> masked_count; 4722af69d88dSmrg masked_count = src1->u[2] & 0x1f; 4723af69d88dSmrg dst->u[2] = src0->u[2] >> masked_count; 4724af69d88dSmrg masked_count = src1->u[3] & 0x1f; 4725af69d88dSmrg dst->u[3] = src0->u[3] >> masked_count; 4726cdc920a0Smrg} 47274a49301eSmrg 4728cdc920a0Smrgstatic void 4729cdc920a0Smrgmicro_uslt(union tgsi_exec_channel *dst, 4730cdc920a0Smrg const union tgsi_exec_channel *src0, 4731cdc920a0Smrg const union tgsi_exec_channel *src1) 4732cdc920a0Smrg{ 4733cdc920a0Smrg dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4734cdc920a0Smrg dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4735cdc920a0Smrg dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4736cdc920a0Smrg dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4737cdc920a0Smrg} 47384a49301eSmrg 4739cdc920a0Smrgstatic void 4740cdc920a0Smrgmicro_usne(union tgsi_exec_channel *dst, 4741cdc920a0Smrg const union tgsi_exec_channel *src0, 4742cdc920a0Smrg const union tgsi_exec_channel *src1) 4743cdc920a0Smrg{ 4744cdc920a0Smrg dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4745cdc920a0Smrg dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4746cdc920a0Smrg dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4747cdc920a0Smrg dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 47484a49301eSmrg} 47494a49301eSmrg 4750af69d88dSmrgstatic void 4751af69d88dSmrgmicro_uarl(union tgsi_exec_channel *dst, 4752af69d88dSmrg const union tgsi_exec_channel *src) 4753af69d88dSmrg{ 4754af69d88dSmrg dst->i[0] = src->u[0]; 4755af69d88dSmrg dst->i[1] = src->u[1]; 4756af69d88dSmrg dst->i[2] = src->u[2]; 4757af69d88dSmrg dst->i[3] = src->u[3]; 4758af69d88dSmrg} 4759af69d88dSmrg 4760af69d88dSmrg/** 4761af69d88dSmrg * Signed bitfield extract (i.e. sign-extend the extracted bits) 4762af69d88dSmrg */ 4763af69d88dSmrgstatic void 4764af69d88dSmrgmicro_ibfe(union tgsi_exec_channel *dst, 4765af69d88dSmrg const union tgsi_exec_channel *src0, 4766af69d88dSmrg const union tgsi_exec_channel *src1, 4767af69d88dSmrg const union tgsi_exec_channel *src2) 4768af69d88dSmrg{ 4769af69d88dSmrg int i; 4770af69d88dSmrg for (i = 0; i < 4; i++) { 4771361fc4cbSmaya int width = src2->i[i]; 4772af69d88dSmrg int offset = src1->i[i] & 0x1f; 4773361fc4cbSmaya if (width == 32 && offset == 0) { 4774361fc4cbSmaya dst->i[i] = src0->i[i]; 4775361fc4cbSmaya continue; 4776361fc4cbSmaya } 4777361fc4cbSmaya width &= 0x1f; 4778af69d88dSmrg if (width == 0) 4779af69d88dSmrg dst->i[i] = 0; 4780af69d88dSmrg else if (width + offset < 32) 4781af69d88dSmrg dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 4782af69d88dSmrg else 4783af69d88dSmrg dst->i[i] = src0->i[i] >> offset; 4784af69d88dSmrg } 4785af69d88dSmrg} 4786af69d88dSmrg 4787af69d88dSmrg/** 4788af69d88dSmrg * Unsigned bitfield extract 4789af69d88dSmrg */ 4790af69d88dSmrgstatic void 4791af69d88dSmrgmicro_ubfe(union tgsi_exec_channel *dst, 4792af69d88dSmrg const union tgsi_exec_channel *src0, 4793af69d88dSmrg const union tgsi_exec_channel *src1, 4794af69d88dSmrg const union tgsi_exec_channel *src2) 4795af69d88dSmrg{ 4796af69d88dSmrg int i; 4797af69d88dSmrg for (i = 0; i < 4; i++) { 4798361fc4cbSmaya int width = src2->u[i]; 4799af69d88dSmrg int offset = src1->u[i] & 0x1f; 4800361fc4cbSmaya if (width == 32 && offset == 0) { 4801361fc4cbSmaya dst->u[i] = src0->u[i]; 4802361fc4cbSmaya continue; 4803361fc4cbSmaya } 4804361fc4cbSmaya width &= 0x1f; 4805af69d88dSmrg if (width == 0) 4806af69d88dSmrg dst->u[i] = 0; 4807af69d88dSmrg else if (width + offset < 32) 4808af69d88dSmrg dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 4809af69d88dSmrg else 4810af69d88dSmrg dst->u[i] = src0->u[i] >> offset; 4811af69d88dSmrg } 4812af69d88dSmrg} 4813af69d88dSmrg 4814af69d88dSmrg/** 4815af69d88dSmrg * Bitfield insert: copy low bits from src1 into a region of src0. 4816af69d88dSmrg */ 4817af69d88dSmrgstatic void 4818af69d88dSmrgmicro_bfi(union tgsi_exec_channel *dst, 4819af69d88dSmrg const union tgsi_exec_channel *src0, 4820af69d88dSmrg const union tgsi_exec_channel *src1, 4821af69d88dSmrg const union tgsi_exec_channel *src2, 4822af69d88dSmrg const union tgsi_exec_channel *src3) 4823af69d88dSmrg{ 4824af69d88dSmrg int i; 4825af69d88dSmrg for (i = 0; i < 4; i++) { 4826361fc4cbSmaya int width = src3->u[i]; 4827af69d88dSmrg int offset = src2->u[i] & 0x1f; 4828361fc4cbSmaya if (width == 32) { 4829361fc4cbSmaya dst->u[i] = src1->u[i]; 4830361fc4cbSmaya } else { 4831361fc4cbSmaya int bitmask = ((1 << width) - 1) << offset; 4832361fc4cbSmaya dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 4833361fc4cbSmaya } 4834af69d88dSmrg } 4835af69d88dSmrg} 4836af69d88dSmrg 4837af69d88dSmrgstatic void 4838af69d88dSmrgmicro_brev(union tgsi_exec_channel *dst, 4839af69d88dSmrg const union tgsi_exec_channel *src) 4840af69d88dSmrg{ 4841af69d88dSmrg dst->u[0] = util_bitreverse(src->u[0]); 4842af69d88dSmrg dst->u[1] = util_bitreverse(src->u[1]); 4843af69d88dSmrg dst->u[2] = util_bitreverse(src->u[2]); 4844af69d88dSmrg dst->u[3] = util_bitreverse(src->u[3]); 4845af69d88dSmrg} 4846af69d88dSmrg 4847af69d88dSmrgstatic void 4848af69d88dSmrgmicro_popc(union tgsi_exec_channel *dst, 4849af69d88dSmrg const union tgsi_exec_channel *src) 4850af69d88dSmrg{ 4851af69d88dSmrg dst->u[0] = util_bitcount(src->u[0]); 4852af69d88dSmrg dst->u[1] = util_bitcount(src->u[1]); 4853af69d88dSmrg dst->u[2] = util_bitcount(src->u[2]); 4854af69d88dSmrg dst->u[3] = util_bitcount(src->u[3]); 4855af69d88dSmrg} 4856af69d88dSmrg 4857af69d88dSmrgstatic void 4858af69d88dSmrgmicro_lsb(union tgsi_exec_channel *dst, 4859af69d88dSmrg const union tgsi_exec_channel *src) 4860af69d88dSmrg{ 4861af69d88dSmrg dst->i[0] = ffs(src->u[0]) - 1; 4862af69d88dSmrg dst->i[1] = ffs(src->u[1]) - 1; 4863af69d88dSmrg dst->i[2] = ffs(src->u[2]) - 1; 4864af69d88dSmrg dst->i[3] = ffs(src->u[3]) - 1; 4865af69d88dSmrg} 4866af69d88dSmrg 4867af69d88dSmrgstatic void 4868af69d88dSmrgmicro_imsb(union tgsi_exec_channel *dst, 4869af69d88dSmrg const union tgsi_exec_channel *src) 4870af69d88dSmrg{ 4871af69d88dSmrg dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 4872af69d88dSmrg dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 4873af69d88dSmrg dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 4874af69d88dSmrg dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 4875af69d88dSmrg} 4876af69d88dSmrg 4877af69d88dSmrgstatic void 4878af69d88dSmrgmicro_umsb(union tgsi_exec_channel *dst, 4879af69d88dSmrg const union tgsi_exec_channel *src) 4880af69d88dSmrg{ 4881af69d88dSmrg dst->i[0] = util_last_bit(src->u[0]) - 1; 4882af69d88dSmrg dst->i[1] = util_last_bit(src->u[1]) - 1; 4883af69d88dSmrg dst->i[2] = util_last_bit(src->u[2]) - 1; 4884af69d88dSmrg dst->i[3] = util_last_bit(src->u[3]) - 1; 4885af69d88dSmrg} 4886af69d88dSmrg 4887361fc4cbSmaya 4888361fc4cbSmayastatic void 4889361fc4cbSmayaexec_interp_at_sample(struct tgsi_exec_machine *mach, 4890361fc4cbSmaya const struct tgsi_full_instruction *inst) 4891361fc4cbSmaya{ 4892361fc4cbSmaya union tgsi_exec_channel index; 4893361fc4cbSmaya union tgsi_exec_channel index2D; 4894361fc4cbSmaya union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4895361fc4cbSmaya const struct tgsi_full_src_register *reg = &inst->Src[0]; 4896361fc4cbSmaya 4897361fc4cbSmaya assert(reg->Register.File == TGSI_FILE_INPUT); 4898361fc4cbSmaya assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE); 4899361fc4cbSmaya 4900361fc4cbSmaya get_index_registers(mach, reg, &index, &index2D); 4901361fc4cbSmaya float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX]; 4902361fc4cbSmaya 4903361fc4cbSmaya /* Short cut: sample 0 is like a normal fetch */ 4904361fc4cbSmaya for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4905361fc4cbSmaya if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4906361fc4cbSmaya continue; 4907361fc4cbSmaya 4908361fc4cbSmaya fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4909361fc4cbSmaya &result[chan]); 4910361fc4cbSmaya if (sample != 0.0f) { 4911361fc4cbSmaya 4912361fc4cbSmaya /* TODO: define the samples > 0, but so far we only do fake MSAA */ 4913361fc4cbSmaya float x = 0; 4914361fc4cbSmaya float y = 0; 4915361fc4cbSmaya 4916361fc4cbSmaya unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan]; 4917361fc4cbSmaya assert(pos >= 0); 4918361fc4cbSmaya assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 4919361fc4cbSmaya mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]); 4920361fc4cbSmaya } 49217ec681f3Smrg store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4922361fc4cbSmaya } 4923361fc4cbSmaya} 4924361fc4cbSmaya 4925361fc4cbSmaya 4926361fc4cbSmayastatic void 4927361fc4cbSmayaexec_interp_at_offset(struct tgsi_exec_machine *mach, 4928361fc4cbSmaya const struct tgsi_full_instruction *inst) 4929361fc4cbSmaya{ 4930361fc4cbSmaya union tgsi_exec_channel index; 4931361fc4cbSmaya union tgsi_exec_channel index2D; 4932361fc4cbSmaya union tgsi_exec_channel ofsx; 4933361fc4cbSmaya union tgsi_exec_channel ofsy; 4934361fc4cbSmaya const struct tgsi_full_src_register *reg = &inst->Src[0]; 4935361fc4cbSmaya 4936361fc4cbSmaya assert(reg->Register.File == TGSI_FILE_INPUT); 4937361fc4cbSmaya 4938361fc4cbSmaya get_index_registers(mach, reg, &index, &index2D); 4939361fc4cbSmaya unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0]; 4940361fc4cbSmaya 4941361fc4cbSmaya fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 4942361fc4cbSmaya fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 4943361fc4cbSmaya 4944361fc4cbSmaya for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4945361fc4cbSmaya if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4946361fc4cbSmaya continue; 4947361fc4cbSmaya union tgsi_exec_channel result; 4948361fc4cbSmaya fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result); 4949361fc4cbSmaya mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result); 49507ec681f3Smrg store_dest(mach, &result, &inst->Dst[0], inst, chan); 4951361fc4cbSmaya } 4952361fc4cbSmaya} 4953361fc4cbSmaya 4954361fc4cbSmaya 4955361fc4cbSmayastatic void 4956361fc4cbSmayaexec_interp_at_centroid(struct tgsi_exec_machine *mach, 4957361fc4cbSmaya const struct tgsi_full_instruction *inst) 4958361fc4cbSmaya{ 4959361fc4cbSmaya union tgsi_exec_channel index; 4960361fc4cbSmaya union tgsi_exec_channel index2D; 4961361fc4cbSmaya union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4962361fc4cbSmaya const struct tgsi_full_src_register *reg = &inst->Src[0]; 4963361fc4cbSmaya 4964361fc4cbSmaya assert(reg->Register.File == TGSI_FILE_INPUT); 4965361fc4cbSmaya get_index_registers(mach, reg, &index, &index2D); 4966361fc4cbSmaya 4967361fc4cbSmaya for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4968361fc4cbSmaya if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4969361fc4cbSmaya continue; 4970361fc4cbSmaya 4971361fc4cbSmaya /* Here we should add the change to use a sample that lies within the 4972361fc4cbSmaya * primitive (Section 15.2): 4973361fc4cbSmaya * 4974361fc4cbSmaya * "When interpolating variables declared using centroid in , 4975361fc4cbSmaya * the variable is sampled at a location within the pixel covered 4976361fc4cbSmaya * by the primitive generating the fragment. 4977361fc4cbSmaya * ... 4978361fc4cbSmaya * The built-in functions interpolateAtCentroid ... will sample 4979361fc4cbSmaya * variables as though they were declared with the centroid ... 4980361fc4cbSmaya * qualifier[s]." 4981361fc4cbSmaya * 4982361fc4cbSmaya * Since we only support 1 sample currently, this is just a pass-through. 4983361fc4cbSmaya */ 4984361fc4cbSmaya fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4985361fc4cbSmaya &result[chan]); 49867ec681f3Smrg store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4987361fc4cbSmaya } 4988361fc4cbSmaya 4989361fc4cbSmaya} 4990361fc4cbSmaya 4991361fc4cbSmaya 499201e04c3fSmrg/** 499301e04c3fSmrg * Execute a TGSI instruction. 499401e04c3fSmrg * Returns TRUE if a barrier instruction is hit, 499501e04c3fSmrg * otherwise FALSE. 499601e04c3fSmrg */ 499701e04c3fSmrgstatic boolean 49984a49301eSmrgexec_instruction( 49994a49301eSmrg struct tgsi_exec_machine *mach, 50004a49301eSmrg const struct tgsi_full_instruction *inst, 50014a49301eSmrg int *pc ) 50024a49301eSmrg{ 5003cdc920a0Smrg union tgsi_exec_channel r[10]; 50044a49301eSmrg 50054a49301eSmrg (*pc)++; 50064a49301eSmrg 50074a49301eSmrg switch (inst->Instruction.Opcode) { 50084a49301eSmrg case TGSI_OPCODE_ARL: 50097ec681f3Smrg exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT); 50104a49301eSmrg break; 50114a49301eSmrg 50124a49301eSmrg case TGSI_OPCODE_MOV: 50137ec681f3Smrg exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT); 50144a49301eSmrg break; 50154a49301eSmrg 50164a49301eSmrg case TGSI_OPCODE_LIT: 50173464ebd5Sriastradh exec_lit(mach, inst); 50184a49301eSmrg break; 50194a49301eSmrg 50204a49301eSmrg case TGSI_OPCODE_RCP: 50217ec681f3Smrg exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT); 50224a49301eSmrg break; 50234a49301eSmrg 50244a49301eSmrg case TGSI_OPCODE_RSQ: 50257ec681f3Smrg exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT); 50264a49301eSmrg break; 50274a49301eSmrg 50284a49301eSmrg case TGSI_OPCODE_EXP: 50293464ebd5Sriastradh exec_exp(mach, inst); 50304a49301eSmrg break; 50314a49301eSmrg 50324a49301eSmrg case TGSI_OPCODE_LOG: 50333464ebd5Sriastradh exec_log(mach, inst); 50344a49301eSmrg break; 50354a49301eSmrg 50364a49301eSmrg case TGSI_OPCODE_MUL: 50377ec681f3Smrg exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT); 50384a49301eSmrg break; 50394a49301eSmrg 50404a49301eSmrg case TGSI_OPCODE_ADD: 50417ec681f3Smrg exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT); 50424a49301eSmrg break; 50434a49301eSmrg 50444a49301eSmrg case TGSI_OPCODE_DP3: 5045cdc920a0Smrg exec_dp3(mach, inst); 50464a49301eSmrg break; 50474a49301eSmrg 5048cdc920a0Smrg case TGSI_OPCODE_DP4: 5049cdc920a0Smrg exec_dp4(mach, inst); 50504a49301eSmrg break; 50514a49301eSmrg 50524a49301eSmrg case TGSI_OPCODE_DST: 50533464ebd5Sriastradh exec_dst(mach, inst); 50544a49301eSmrg break; 50554a49301eSmrg 50564a49301eSmrg case TGSI_OPCODE_MIN: 50577ec681f3Smrg exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT); 50584a49301eSmrg break; 50594a49301eSmrg 50604a49301eSmrg case TGSI_OPCODE_MAX: 50617ec681f3Smrg exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT); 50624a49301eSmrg break; 50634a49301eSmrg 50644a49301eSmrg case TGSI_OPCODE_SLT: 50657ec681f3Smrg exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT); 50664a49301eSmrg break; 50674a49301eSmrg 50684a49301eSmrg case TGSI_OPCODE_SGE: 50697ec681f3Smrg exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT); 50704a49301eSmrg break; 50714a49301eSmrg 50724a49301eSmrg case TGSI_OPCODE_MAD: 50737ec681f3Smrg exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT); 50744a49301eSmrg break; 50754a49301eSmrg 50764a49301eSmrg case TGSI_OPCODE_LRP: 50777ec681f3Smrg exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT); 50784a49301eSmrg break; 50794a49301eSmrg 5080af69d88dSmrg case TGSI_OPCODE_SQRT: 50817ec681f3Smrg exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT); 5082af69d88dSmrg break; 5083af69d88dSmrg 50844a49301eSmrg case TGSI_OPCODE_FRC: 50857ec681f3Smrg exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT); 50864a49301eSmrg break; 50874a49301eSmrg 5088cdc920a0Smrg case TGSI_OPCODE_FLR: 50897ec681f3Smrg exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT); 50904a49301eSmrg break; 50914a49301eSmrg 50924a49301eSmrg case TGSI_OPCODE_ROUND: 50937ec681f3Smrg exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT); 50944a49301eSmrg break; 50954a49301eSmrg 50964a49301eSmrg case TGSI_OPCODE_EX2: 50977ec681f3Smrg exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT); 50984a49301eSmrg break; 50994a49301eSmrg 51004a49301eSmrg case TGSI_OPCODE_LG2: 51017ec681f3Smrg exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT); 51024a49301eSmrg break; 51034a49301eSmrg 51044a49301eSmrg case TGSI_OPCODE_POW: 51057ec681f3Smrg exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT); 51064a49301eSmrg break; 51074a49301eSmrg 510801e04c3fSmrg case TGSI_OPCODE_LDEXP: 51097ec681f3Smrg exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT); 51104a49301eSmrg break; 51114a49301eSmrg 5112cdc920a0Smrg case TGSI_OPCODE_COS: 51137ec681f3Smrg exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT); 51147ec681f3Smrg break; 51157ec681f3Smrg 51167ec681f3Smrg case TGSI_OPCODE_DDX_FINE: 51177ec681f3Smrg exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT); 51184a49301eSmrg break; 51194a49301eSmrg 51204a49301eSmrg case TGSI_OPCODE_DDX: 51217ec681f3Smrg exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT); 51227ec681f3Smrg break; 51237ec681f3Smrg 51247ec681f3Smrg case TGSI_OPCODE_DDY_FINE: 51257ec681f3Smrg exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT); 51264a49301eSmrg break; 51274a49301eSmrg 51284a49301eSmrg case TGSI_OPCODE_DDY: 51297ec681f3Smrg exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT); 51304a49301eSmrg break; 51314a49301eSmrg 5132af69d88dSmrg case TGSI_OPCODE_KILL: 513301e04c3fSmrg exec_kill (mach); 51344a49301eSmrg break; 51354a49301eSmrg 5136af69d88dSmrg case TGSI_OPCODE_KILL_IF: 5137af69d88dSmrg exec_kill_if (mach, inst); 51384a49301eSmrg break; 51394a49301eSmrg 51404a49301eSmrg case TGSI_OPCODE_PK2H: 514101e04c3fSmrg exec_pk2h(mach, inst); 51424a49301eSmrg break; 51434a49301eSmrg 51444a49301eSmrg case TGSI_OPCODE_PK2US: 51454a49301eSmrg assert (0); 51464a49301eSmrg break; 51474a49301eSmrg 51484a49301eSmrg case TGSI_OPCODE_PK4B: 51494a49301eSmrg assert (0); 51504a49301eSmrg break; 51514a49301eSmrg 51524a49301eSmrg case TGSI_OPCODE_PK4UB: 51534a49301eSmrg assert (0); 51544a49301eSmrg break; 51554a49301eSmrg 51564a49301eSmrg case TGSI_OPCODE_SEQ: 51577ec681f3Smrg exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT); 51584a49301eSmrg break; 51594a49301eSmrg 51604a49301eSmrg case TGSI_OPCODE_SGT: 51617ec681f3Smrg exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT); 51624a49301eSmrg break; 51634a49301eSmrg 51644a49301eSmrg case TGSI_OPCODE_SIN: 51657ec681f3Smrg exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT); 51664a49301eSmrg break; 51674a49301eSmrg 51684a49301eSmrg case TGSI_OPCODE_SLE: 51697ec681f3Smrg exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT); 51704a49301eSmrg break; 51714a49301eSmrg 51724a49301eSmrg case TGSI_OPCODE_SNE: 51737ec681f3Smrg exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT); 51744a49301eSmrg break; 51754a49301eSmrg 51764a49301eSmrg case TGSI_OPCODE_TEX: 51774a49301eSmrg /* simple texture lookup */ 51784a49301eSmrg /* src[0] = texcoord */ 51794a49301eSmrg /* src[1] = sampler unit */ 5180af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 51814a49301eSmrg break; 51824a49301eSmrg 51834a49301eSmrg case TGSI_OPCODE_TXB: 51844a49301eSmrg /* Texture lookup with lod bias */ 51854a49301eSmrg /* src[0] = texcoord (src[0].w = LOD bias) */ 51864a49301eSmrg /* src[1] = sampler unit */ 5187af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 51884a49301eSmrg break; 51894a49301eSmrg 51904a49301eSmrg case TGSI_OPCODE_TXD: 51914a49301eSmrg /* Texture lookup with explict partial derivatives */ 51924a49301eSmrg /* src[0] = texcoord */ 51934a49301eSmrg /* src[1] = d[strq]/dx */ 51944a49301eSmrg /* src[2] = d[strq]/dy */ 51954a49301eSmrg /* src[3] = sampler unit */ 5196cdc920a0Smrg exec_txd(mach, inst); 51974a49301eSmrg break; 51984a49301eSmrg 51994a49301eSmrg case TGSI_OPCODE_TXL: 52004a49301eSmrg /* Texture lookup with explit LOD */ 52014a49301eSmrg /* src[0] = texcoord (src[0].w = LOD) */ 52024a49301eSmrg /* src[1] = sampler unit */ 5203af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 52044a49301eSmrg break; 52054a49301eSmrg 52064a49301eSmrg case TGSI_OPCODE_TXP: 52074a49301eSmrg /* Texture lookup with projection */ 52084a49301eSmrg /* src[0] = texcoord (src[0].w = projection) */ 52094a49301eSmrg /* src[1] = sampler unit */ 5210af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 52114a49301eSmrg break; 52124a49301eSmrg 521301e04c3fSmrg case TGSI_OPCODE_TG4: 521401e04c3fSmrg /* src[0] = texcoord */ 521501e04c3fSmrg /* src[1] = component */ 521601e04c3fSmrg /* src[2] = sampler unit */ 521701e04c3fSmrg exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 521801e04c3fSmrg break; 521901e04c3fSmrg 522001e04c3fSmrg case TGSI_OPCODE_LODQ: 522101e04c3fSmrg /* src[0] = texcoord */ 522201e04c3fSmrg /* src[1] = sampler unit */ 522301e04c3fSmrg exec_lodq(mach, inst); 522401e04c3fSmrg break; 522501e04c3fSmrg 52264a49301eSmrg case TGSI_OPCODE_UP2H: 522701e04c3fSmrg exec_up2h(mach, inst); 52284a49301eSmrg break; 52294a49301eSmrg 52304a49301eSmrg case TGSI_OPCODE_UP2US: 52314a49301eSmrg assert (0); 52324a49301eSmrg break; 52334a49301eSmrg 52344a49301eSmrg case TGSI_OPCODE_UP4B: 52354a49301eSmrg assert (0); 52364a49301eSmrg break; 52374a49301eSmrg 52384a49301eSmrg case TGSI_OPCODE_UP4UB: 52394a49301eSmrg assert (0); 52404a49301eSmrg break; 52414a49301eSmrg 5242cdc920a0Smrg case TGSI_OPCODE_ARR: 52437ec681f3Smrg exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT); 5244cdc920a0Smrg break; 5245cdc920a0Smrg 52464a49301eSmrg case TGSI_OPCODE_CAL: 52474a49301eSmrg /* skip the call if no execution channels are enabled */ 52484a49301eSmrg if (mach->ExecMask) { 52494a49301eSmrg /* do the call */ 52504a49301eSmrg 52514a49301eSmrg /* First, record the depths of the execution stacks. 52524a49301eSmrg * This is important for deeply nested/looped return statements. 52534a49301eSmrg * We have to unwind the stacks by the correct amount. For a 52544a49301eSmrg * real code generator, we could determine the number of entries 52554a49301eSmrg * to pop off each stack with simple static analysis and avoid 52564a49301eSmrg * implementing this data structure at run time. 52574a49301eSmrg */ 52584a49301eSmrg mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 52594a49301eSmrg mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 52604a49301eSmrg mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5261cdc920a0Smrg mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5262cdc920a0Smrg mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 52634a49301eSmrg /* note that PC was already incremented above */ 52644a49301eSmrg mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 52654a49301eSmrg 52664a49301eSmrg mach->CallStackTop++; 52674a49301eSmrg 52684a49301eSmrg /* Second, push the Cond, Loop, Cont, Func stacks */ 52694a49301eSmrg assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 52704a49301eSmrg assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 52714a49301eSmrg assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5272cdc920a0Smrg assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5273cdc920a0Smrg assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 52744a49301eSmrg assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5275cdc920a0Smrg 5276cdc920a0Smrg mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5277cdc920a0Smrg mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5278cdc920a0Smrg mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5279cdc920a0Smrg mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5280cdc920a0Smrg mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 52814a49301eSmrg mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 52824a49301eSmrg 528301e04c3fSmrg /* Finally, jump to the subroutine. The label is a pointer 528401e04c3fSmrg * (an instruction number) to the BGNSUB instruction. 528501e04c3fSmrg */ 5286cdc920a0Smrg *pc = inst->Label.Label; 528701e04c3fSmrg assert(mach->Instructions[*pc].Instruction.Opcode 528801e04c3fSmrg == TGSI_OPCODE_BGNSUB); 52894a49301eSmrg } 52904a49301eSmrg break; 52914a49301eSmrg 52924a49301eSmrg case TGSI_OPCODE_RET: 52934a49301eSmrg mach->FuncMask &= ~mach->ExecMask; 52944a49301eSmrg UPDATE_EXEC_MASK(mach); 52954a49301eSmrg 52964a49301eSmrg if (mach->FuncMask == 0x0) { 52974a49301eSmrg /* really return now (otherwise, keep executing */ 52984a49301eSmrg 52994a49301eSmrg if (mach->CallStackTop == 0) { 53004a49301eSmrg /* returning from main() */ 53013464ebd5Sriastradh mach->CondStackTop = 0; 53023464ebd5Sriastradh mach->LoopStackTop = 0; 530301e04c3fSmrg mach->ContStackTop = 0; 530401e04c3fSmrg mach->LoopLabelStackTop = 0; 530501e04c3fSmrg mach->SwitchStackTop = 0; 530601e04c3fSmrg mach->BreakStackTop = 0; 53074a49301eSmrg *pc = -1; 530801e04c3fSmrg return FALSE; 53094a49301eSmrg } 53104a49301eSmrg 53114a49301eSmrg assert(mach->CallStackTop > 0); 53124a49301eSmrg mach->CallStackTop--; 53134a49301eSmrg 53144a49301eSmrg mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 53154a49301eSmrg mach->CondMask = mach->CondStack[mach->CondStackTop]; 53164a49301eSmrg 53174a49301eSmrg mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 53184a49301eSmrg mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 53194a49301eSmrg 53204a49301eSmrg mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 53214a49301eSmrg mach->ContMask = mach->ContStack[mach->ContStackTop]; 53224a49301eSmrg 5323cdc920a0Smrg mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5324cdc920a0Smrg mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5325cdc920a0Smrg 5326cdc920a0Smrg mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5327cdc920a0Smrg mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5328cdc920a0Smrg 53294a49301eSmrg assert(mach->FuncStackTop > 0); 53304a49301eSmrg mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 53314a49301eSmrg 53324a49301eSmrg *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 53334a49301eSmrg 53344a49301eSmrg UPDATE_EXEC_MASK(mach); 53354a49301eSmrg } 53364a49301eSmrg break; 53374a49301eSmrg 53384a49301eSmrg case TGSI_OPCODE_SSG: 53397ec681f3Smrg exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT); 53404a49301eSmrg break; 53414a49301eSmrg 53424a49301eSmrg case TGSI_OPCODE_CMP: 53437ec681f3Smrg exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT); 53444a49301eSmrg break; 53454a49301eSmrg 53464a49301eSmrg case TGSI_OPCODE_DIV: 53477ec681f3Smrg exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT); 53484a49301eSmrg break; 53494a49301eSmrg 53504a49301eSmrg case TGSI_OPCODE_DP2: 5351cdc920a0Smrg exec_dp2(mach, inst); 53524a49301eSmrg break; 53534a49301eSmrg 53544a49301eSmrg case TGSI_OPCODE_IF: 53554a49301eSmrg /* push CondMask */ 53564a49301eSmrg assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 53574a49301eSmrg mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5358af69d88dSmrg FETCH( &r[0], 0, TGSI_CHAN_X ); 53597ec681f3Smrg for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 53607ec681f3Smrg if (!r[0].f[i]) 53617ec681f3Smrg mach->CondMask &= ~(1 << i); 5362af69d88dSmrg } 5363af69d88dSmrg UPDATE_EXEC_MASK(mach); 53647ec681f3Smrg /* If no channels are taking the then branch, jump to ELSE. */ 53657ec681f3Smrg if (!mach->CondMask) 53667ec681f3Smrg *pc = inst->Label.Label; 5367af69d88dSmrg break; 5368af69d88dSmrg 5369af69d88dSmrg case TGSI_OPCODE_UIF: 5370af69d88dSmrg /* push CondMask */ 5371af69d88dSmrg assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5372af69d88dSmrg mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5373af69d88dSmrg IFETCH( &r[0], 0, TGSI_CHAN_X ); 53747ec681f3Smrg for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 53757ec681f3Smrg if (!r[0].u[i]) 53767ec681f3Smrg mach->CondMask &= ~(1 << i); 53774a49301eSmrg } 53784a49301eSmrg UPDATE_EXEC_MASK(mach); 53797ec681f3Smrg /* If no channels are taking the then branch, jump to ELSE. */ 53807ec681f3Smrg if (!mach->CondMask) 53817ec681f3Smrg *pc = inst->Label.Label; 53824a49301eSmrg break; 53834a49301eSmrg 53844a49301eSmrg case TGSI_OPCODE_ELSE: 53854a49301eSmrg /* invert CondMask wrt previous mask */ 53864a49301eSmrg { 53874a49301eSmrg uint prevMask; 53884a49301eSmrg assert(mach->CondStackTop > 0); 53894a49301eSmrg prevMask = mach->CondStack[mach->CondStackTop - 1]; 53904a49301eSmrg mach->CondMask = ~mach->CondMask & prevMask; 53914a49301eSmrg UPDATE_EXEC_MASK(mach); 53927ec681f3Smrg 53937ec681f3Smrg /* If no channels are taking ELSE, jump to ENDIF */ 53947ec681f3Smrg if (!mach->CondMask) 53957ec681f3Smrg *pc = inst->Label.Label; 53964a49301eSmrg } 53974a49301eSmrg break; 53984a49301eSmrg 53994a49301eSmrg case TGSI_OPCODE_ENDIF: 54004a49301eSmrg /* pop CondMask */ 54014a49301eSmrg assert(mach->CondStackTop > 0); 54024a49301eSmrg mach->CondMask = mach->CondStack[--mach->CondStackTop]; 54034a49301eSmrg UPDATE_EXEC_MASK(mach); 54044a49301eSmrg break; 54054a49301eSmrg 54064a49301eSmrg case TGSI_OPCODE_END: 54073464ebd5Sriastradh /* make sure we end primitives which haven't 54083464ebd5Sriastradh * been explicitly emitted */ 54093464ebd5Sriastradh conditional_emit_primitive(mach); 54104a49301eSmrg /* halt execution */ 54114a49301eSmrg *pc = -1; 54124a49301eSmrg break; 54134a49301eSmrg 54144a49301eSmrg case TGSI_OPCODE_CEIL: 54157ec681f3Smrg exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT); 54164a49301eSmrg break; 54174a49301eSmrg 54184a49301eSmrg case TGSI_OPCODE_I2F: 54197ec681f3Smrg exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT); 54204a49301eSmrg break; 54214a49301eSmrg 54224a49301eSmrg case TGSI_OPCODE_NOT: 54237ec681f3Smrg exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT); 54244a49301eSmrg break; 54254a49301eSmrg 54264a49301eSmrg case TGSI_OPCODE_TRUNC: 54277ec681f3Smrg exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT); 54284a49301eSmrg break; 54294a49301eSmrg 54304a49301eSmrg case TGSI_OPCODE_SHL: 54317ec681f3Smrg exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT); 54324a49301eSmrg break; 54334a49301eSmrg 54344a49301eSmrg case TGSI_OPCODE_AND: 54357ec681f3Smrg exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT); 54364a49301eSmrg break; 54374a49301eSmrg 54384a49301eSmrg case TGSI_OPCODE_OR: 54397ec681f3Smrg exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT); 54404a49301eSmrg break; 54414a49301eSmrg 54424a49301eSmrg case TGSI_OPCODE_MOD: 54437ec681f3Smrg exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT); 54444a49301eSmrg break; 54454a49301eSmrg 54464a49301eSmrg case TGSI_OPCODE_XOR: 54477ec681f3Smrg exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT); 54484a49301eSmrg break; 54494a49301eSmrg 54504a49301eSmrg case TGSI_OPCODE_TXF: 5451af69d88dSmrg exec_txf(mach, inst); 54524a49301eSmrg break; 54534a49301eSmrg 54544a49301eSmrg case TGSI_OPCODE_TXQ: 5455af69d88dSmrg exec_txq(mach, inst); 54564a49301eSmrg break; 54574a49301eSmrg 54584a49301eSmrg case TGSI_OPCODE_EMIT: 5459361fc4cbSmaya emit_vertex(mach, inst); 54604a49301eSmrg break; 54614a49301eSmrg 54624a49301eSmrg case TGSI_OPCODE_ENDPRIM: 5463361fc4cbSmaya emit_primitive(mach, inst); 54644a49301eSmrg break; 54654a49301eSmrg 54664a49301eSmrg case TGSI_OPCODE_BGNLOOP: 54674a49301eSmrg /* push LoopMask and ContMasks */ 54684a49301eSmrg assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 54694a49301eSmrg assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 54704a49301eSmrg assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5471cdc920a0Smrg assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5472cdc920a0Smrg 5473cdc920a0Smrg mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5474cdc920a0Smrg mach->ContStack[mach->ContStackTop++] = mach->ContMask; 54754a49301eSmrg mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5476cdc920a0Smrg mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5477cdc920a0Smrg mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 54784a49301eSmrg break; 54794a49301eSmrg 54804a49301eSmrg case TGSI_OPCODE_ENDLOOP: 54814a49301eSmrg /* Restore ContMask, but don't pop */ 54824a49301eSmrg assert(mach->ContStackTop > 0); 54834a49301eSmrg mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 54844a49301eSmrg UPDATE_EXEC_MASK(mach); 54854a49301eSmrg if (mach->ExecMask) { 54864a49301eSmrg /* repeat loop: jump to instruction just past BGNLOOP */ 54874a49301eSmrg assert(mach->LoopLabelStackTop > 0); 54884a49301eSmrg *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 54894a49301eSmrg } 54904a49301eSmrg else { 54914a49301eSmrg /* exit loop: pop LoopMask */ 54924a49301eSmrg assert(mach->LoopStackTop > 0); 54934a49301eSmrg mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 54944a49301eSmrg /* pop ContMask */ 54954a49301eSmrg assert(mach->ContStackTop > 0); 54964a49301eSmrg mach->ContMask = mach->ContStack[--mach->ContStackTop]; 54974a49301eSmrg assert(mach->LoopLabelStackTop > 0); 54984a49301eSmrg --mach->LoopLabelStackTop; 5499cdc920a0Smrg 5500cdc920a0Smrg mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 55014a49301eSmrg } 55024a49301eSmrg UPDATE_EXEC_MASK(mach); 55034a49301eSmrg break; 55044a49301eSmrg 55054a49301eSmrg case TGSI_OPCODE_BRK: 5506cdc920a0Smrg exec_break(mach); 55074a49301eSmrg break; 55084a49301eSmrg 55094a49301eSmrg case TGSI_OPCODE_CONT: 55104a49301eSmrg /* turn off cont channels for each enabled exec channel */ 55114a49301eSmrg mach->ContMask &= ~mach->ExecMask; 55124a49301eSmrg /* Todo: if mach->LoopMask == 0, jump to end of loop */ 55134a49301eSmrg UPDATE_EXEC_MASK(mach); 55144a49301eSmrg break; 55154a49301eSmrg 55164a49301eSmrg case TGSI_OPCODE_BGNSUB: 55174a49301eSmrg /* no-op */ 55184a49301eSmrg break; 55194a49301eSmrg 55204a49301eSmrg case TGSI_OPCODE_ENDSUB: 5521cdc920a0Smrg /* 5522cdc920a0Smrg * XXX: This really should be a no-op. We should never reach this opcode. 5523cdc920a0Smrg */ 5524cdc920a0Smrg 5525cdc920a0Smrg assert(mach->CallStackTop > 0); 5526cdc920a0Smrg mach->CallStackTop--; 5527cdc920a0Smrg 5528cdc920a0Smrg mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5529cdc920a0Smrg mach->CondMask = mach->CondStack[mach->CondStackTop]; 5530cdc920a0Smrg 5531cdc920a0Smrg mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5532cdc920a0Smrg mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5533cdc920a0Smrg 5534cdc920a0Smrg mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5535cdc920a0Smrg mach->ContMask = mach->ContStack[mach->ContStackTop]; 5536cdc920a0Smrg 5537cdc920a0Smrg mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5538cdc920a0Smrg mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5539cdc920a0Smrg 5540cdc920a0Smrg mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5541cdc920a0Smrg mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5542cdc920a0Smrg 5543cdc920a0Smrg assert(mach->FuncStackTop > 0); 5544cdc920a0Smrg mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5545cdc920a0Smrg 5546cdc920a0Smrg *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5547cdc920a0Smrg 5548cdc920a0Smrg UPDATE_EXEC_MASK(mach); 55494a49301eSmrg break; 55504a49301eSmrg 55514a49301eSmrg case TGSI_OPCODE_NOP: 55524a49301eSmrg break; 55534a49301eSmrg 5554cdc920a0Smrg case TGSI_OPCODE_F2I: 55557ec681f3Smrg exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT); 5556cdc920a0Smrg break; 5557cdc920a0Smrg 5558af69d88dSmrg case TGSI_OPCODE_FSEQ: 55597ec681f3Smrg exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT); 5560af69d88dSmrg break; 5561af69d88dSmrg 5562af69d88dSmrg case TGSI_OPCODE_FSGE: 55637ec681f3Smrg exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT); 5564af69d88dSmrg break; 5565af69d88dSmrg 5566af69d88dSmrg case TGSI_OPCODE_FSLT: 55677ec681f3Smrg exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT); 5568af69d88dSmrg break; 5569af69d88dSmrg 5570af69d88dSmrg case TGSI_OPCODE_FSNE: 55717ec681f3Smrg exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT); 5572af69d88dSmrg break; 5573af69d88dSmrg 5574cdc920a0Smrg case TGSI_OPCODE_IDIV: 55757ec681f3Smrg exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT); 5576cdc920a0Smrg break; 5577cdc920a0Smrg 5578cdc920a0Smrg case TGSI_OPCODE_IMAX: 55797ec681f3Smrg exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT); 5580cdc920a0Smrg break; 5581cdc920a0Smrg 5582cdc920a0Smrg case TGSI_OPCODE_IMIN: 55837ec681f3Smrg exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT); 5584cdc920a0Smrg break; 5585cdc920a0Smrg 5586cdc920a0Smrg case TGSI_OPCODE_INEG: 55877ec681f3Smrg exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT); 5588cdc920a0Smrg break; 5589cdc920a0Smrg 5590cdc920a0Smrg case TGSI_OPCODE_ISGE: 55917ec681f3Smrg exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT); 5592cdc920a0Smrg break; 5593cdc920a0Smrg 5594cdc920a0Smrg case TGSI_OPCODE_ISHR: 55957ec681f3Smrg exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT); 5596cdc920a0Smrg break; 5597cdc920a0Smrg 5598cdc920a0Smrg case TGSI_OPCODE_ISLT: 55997ec681f3Smrg exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT); 5600cdc920a0Smrg break; 5601cdc920a0Smrg 5602cdc920a0Smrg case TGSI_OPCODE_F2U: 56037ec681f3Smrg exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT); 5604cdc920a0Smrg break; 5605cdc920a0Smrg 5606cdc920a0Smrg case TGSI_OPCODE_U2F: 56077ec681f3Smrg exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT); 5608cdc920a0Smrg break; 5609cdc920a0Smrg 5610cdc920a0Smrg case TGSI_OPCODE_UADD: 56117ec681f3Smrg exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT); 5612cdc920a0Smrg break; 5613cdc920a0Smrg 5614cdc920a0Smrg case TGSI_OPCODE_UDIV: 56157ec681f3Smrg exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT); 5616cdc920a0Smrg break; 5617cdc920a0Smrg 5618cdc920a0Smrg case TGSI_OPCODE_UMAD: 56197ec681f3Smrg exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT); 5620cdc920a0Smrg break; 5621cdc920a0Smrg 5622cdc920a0Smrg case TGSI_OPCODE_UMAX: 56237ec681f3Smrg exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT); 5624cdc920a0Smrg break; 5625cdc920a0Smrg 5626cdc920a0Smrg case TGSI_OPCODE_UMIN: 56277ec681f3Smrg exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT); 5628cdc920a0Smrg break; 5629cdc920a0Smrg 5630cdc920a0Smrg case TGSI_OPCODE_UMOD: 56317ec681f3Smrg exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT); 5632cdc920a0Smrg break; 5633cdc920a0Smrg 5634cdc920a0Smrg case TGSI_OPCODE_UMUL: 56357ec681f3Smrg exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT); 5636cdc920a0Smrg break; 5637cdc920a0Smrg 5638af69d88dSmrg case TGSI_OPCODE_IMUL_HI: 56397ec681f3Smrg exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT); 5640af69d88dSmrg break; 5641af69d88dSmrg 5642af69d88dSmrg case TGSI_OPCODE_UMUL_HI: 56437ec681f3Smrg exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT); 5644af69d88dSmrg break; 5645af69d88dSmrg 5646cdc920a0Smrg case TGSI_OPCODE_USEQ: 56477ec681f3Smrg exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT); 5648cdc920a0Smrg break; 5649cdc920a0Smrg 5650cdc920a0Smrg case TGSI_OPCODE_USGE: 56517ec681f3Smrg exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT); 5652cdc920a0Smrg break; 5653cdc920a0Smrg 5654cdc920a0Smrg case TGSI_OPCODE_USHR: 56557ec681f3Smrg exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT); 5656cdc920a0Smrg break; 5657cdc920a0Smrg 5658cdc920a0Smrg case TGSI_OPCODE_USLT: 56597ec681f3Smrg exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT); 5660cdc920a0Smrg break; 5661cdc920a0Smrg 5662cdc920a0Smrg case TGSI_OPCODE_USNE: 56637ec681f3Smrg exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT); 5664cdc920a0Smrg break; 5665cdc920a0Smrg 5666cdc920a0Smrg case TGSI_OPCODE_SWITCH: 5667cdc920a0Smrg exec_switch(mach, inst); 5668cdc920a0Smrg break; 5669cdc920a0Smrg 5670cdc920a0Smrg case TGSI_OPCODE_CASE: 5671cdc920a0Smrg exec_case(mach, inst); 5672cdc920a0Smrg break; 5673cdc920a0Smrg 5674cdc920a0Smrg case TGSI_OPCODE_DEFAULT: 5675cdc920a0Smrg exec_default(mach); 5676cdc920a0Smrg break; 5677cdc920a0Smrg 5678cdc920a0Smrg case TGSI_OPCODE_ENDSWITCH: 5679cdc920a0Smrg exec_endswitch(mach); 5680cdc920a0Smrg break; 5681cdc920a0Smrg 5682af69d88dSmrg case TGSI_OPCODE_SAMPLE_I: 5683af69d88dSmrg exec_txf(mach, inst); 56843464ebd5Sriastradh break; 56853464ebd5Sriastradh 5686af69d88dSmrg case TGSI_OPCODE_SAMPLE_I_MS: 568701e04c3fSmrg exec_txf(mach, inst); 56883464ebd5Sriastradh break; 56893464ebd5Sriastradh 56903464ebd5Sriastradh case TGSI_OPCODE_SAMPLE: 5691af69d88dSmrg exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 56923464ebd5Sriastradh break; 56933464ebd5Sriastradh 56943464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_B: 5695af69d88dSmrg exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 56963464ebd5Sriastradh break; 56973464ebd5Sriastradh 56983464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_C: 5699af69d88dSmrg exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 57003464ebd5Sriastradh break; 57013464ebd5Sriastradh 57023464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_C_LZ: 5703af69d88dSmrg exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 57043464ebd5Sriastradh break; 57053464ebd5Sriastradh 57063464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_D: 57073464ebd5Sriastradh exec_sample_d(mach, inst); 57083464ebd5Sriastradh break; 57093464ebd5Sriastradh 57103464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_L: 5711af69d88dSmrg exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 57123464ebd5Sriastradh break; 57133464ebd5Sriastradh 57143464ebd5Sriastradh case TGSI_OPCODE_GATHER4: 571501e04c3fSmrg exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 57163464ebd5Sriastradh break; 57173464ebd5Sriastradh 5718af69d88dSmrg case TGSI_OPCODE_SVIEWINFO: 5719af69d88dSmrg exec_txq(mach, inst); 57203464ebd5Sriastradh break; 57213464ebd5Sriastradh 57223464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_POS: 57233464ebd5Sriastradh assert(0); 57243464ebd5Sriastradh break; 57253464ebd5Sriastradh 57263464ebd5Sriastradh case TGSI_OPCODE_SAMPLE_INFO: 57273464ebd5Sriastradh assert(0); 57283464ebd5Sriastradh break; 57293464ebd5Sriastradh 573001e04c3fSmrg case TGSI_OPCODE_LOD: 573101e04c3fSmrg exec_lodq(mach, inst); 573201e04c3fSmrg break; 573301e04c3fSmrg 5734af69d88dSmrg case TGSI_OPCODE_UARL: 57357ec681f3Smrg exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT); 5736af69d88dSmrg break; 5737af69d88dSmrg 5738af69d88dSmrg case TGSI_OPCODE_UCMP: 573901e04c3fSmrg exec_ucmp(mach, inst); 5740af69d88dSmrg break; 5741af69d88dSmrg 5742af69d88dSmrg case TGSI_OPCODE_IABS: 57437ec681f3Smrg exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT); 5744af69d88dSmrg break; 5745af69d88dSmrg 5746af69d88dSmrg case TGSI_OPCODE_ISSG: 57477ec681f3Smrg exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT); 5748af69d88dSmrg break; 5749af69d88dSmrg 5750af69d88dSmrg case TGSI_OPCODE_TEX2: 5751af69d88dSmrg /* simple texture lookup */ 5752af69d88dSmrg /* src[0] = texcoord */ 5753af69d88dSmrg /* src[1] = compare */ 5754af69d88dSmrg /* src[2] = sampler unit */ 5755af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5756af69d88dSmrg break; 5757af69d88dSmrg case TGSI_OPCODE_TXB2: 5758af69d88dSmrg /* simple texture lookup */ 5759af69d88dSmrg /* src[0] = texcoord */ 5760af69d88dSmrg /* src[1] = bias */ 5761af69d88dSmrg /* src[2] = sampler unit */ 5762af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5763af69d88dSmrg break; 5764af69d88dSmrg case TGSI_OPCODE_TXL2: 5765af69d88dSmrg /* simple texture lookup */ 5766af69d88dSmrg /* src[0] = texcoord */ 5767af69d88dSmrg /* src[1] = lod */ 5768af69d88dSmrg /* src[2] = sampler unit */ 5769af69d88dSmrg exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5770af69d88dSmrg break; 5771af69d88dSmrg 5772af69d88dSmrg case TGSI_OPCODE_IBFE: 57737ec681f3Smrg exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT); 5774af69d88dSmrg break; 5775af69d88dSmrg case TGSI_OPCODE_UBFE: 57767ec681f3Smrg exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT); 5777af69d88dSmrg break; 5778af69d88dSmrg case TGSI_OPCODE_BFI: 57797ec681f3Smrg exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT); 5780af69d88dSmrg break; 5781af69d88dSmrg case TGSI_OPCODE_BREV: 57827ec681f3Smrg exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT); 5783af69d88dSmrg break; 5784af69d88dSmrg case TGSI_OPCODE_POPC: 57857ec681f3Smrg exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT); 5786af69d88dSmrg break; 5787af69d88dSmrg case TGSI_OPCODE_LSB: 57887ec681f3Smrg exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT); 5789af69d88dSmrg break; 5790af69d88dSmrg case TGSI_OPCODE_IMSB: 57917ec681f3Smrg exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT); 5792af69d88dSmrg break; 5793af69d88dSmrg case TGSI_OPCODE_UMSB: 57947ec681f3Smrg exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT); 5795af69d88dSmrg break; 579601e04c3fSmrg 579701e04c3fSmrg case TGSI_OPCODE_F2D: 579801e04c3fSmrg exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 579901e04c3fSmrg break; 580001e04c3fSmrg 580101e04c3fSmrg case TGSI_OPCODE_D2F: 58027ec681f3Smrg exec_64_2_t(mach, inst, micro_d2f); 580301e04c3fSmrg break; 580401e04c3fSmrg 580501e04c3fSmrg case TGSI_OPCODE_DABS: 580601e04c3fSmrg exec_double_unary(mach, inst, micro_dabs); 580701e04c3fSmrg break; 580801e04c3fSmrg 580901e04c3fSmrg case TGSI_OPCODE_DNEG: 581001e04c3fSmrg exec_double_unary(mach, inst, micro_dneg); 581101e04c3fSmrg break; 581201e04c3fSmrg 581301e04c3fSmrg case TGSI_OPCODE_DADD: 581401e04c3fSmrg exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 581501e04c3fSmrg break; 581601e04c3fSmrg 581701e04c3fSmrg case TGSI_OPCODE_DDIV: 581801e04c3fSmrg exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 581901e04c3fSmrg break; 582001e04c3fSmrg 582101e04c3fSmrg case TGSI_OPCODE_DMUL: 582201e04c3fSmrg exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 582301e04c3fSmrg break; 582401e04c3fSmrg 582501e04c3fSmrg case TGSI_OPCODE_DMAX: 582601e04c3fSmrg exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 582701e04c3fSmrg break; 582801e04c3fSmrg 582901e04c3fSmrg case TGSI_OPCODE_DMIN: 583001e04c3fSmrg exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 583101e04c3fSmrg break; 583201e04c3fSmrg 583301e04c3fSmrg case TGSI_OPCODE_DSLT: 583401e04c3fSmrg exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 583501e04c3fSmrg break; 583601e04c3fSmrg 583701e04c3fSmrg case TGSI_OPCODE_DSGE: 583801e04c3fSmrg exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 583901e04c3fSmrg break; 584001e04c3fSmrg 584101e04c3fSmrg case TGSI_OPCODE_DSEQ: 584201e04c3fSmrg exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 584301e04c3fSmrg break; 584401e04c3fSmrg 584501e04c3fSmrg case TGSI_OPCODE_DSNE: 584601e04c3fSmrg exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 584701e04c3fSmrg break; 584801e04c3fSmrg 584901e04c3fSmrg case TGSI_OPCODE_DRCP: 585001e04c3fSmrg exec_double_unary(mach, inst, micro_drcp); 585101e04c3fSmrg break; 585201e04c3fSmrg 585301e04c3fSmrg case TGSI_OPCODE_DSQRT: 585401e04c3fSmrg exec_double_unary(mach, inst, micro_dsqrt); 585501e04c3fSmrg break; 585601e04c3fSmrg 585701e04c3fSmrg case TGSI_OPCODE_DRSQ: 585801e04c3fSmrg exec_double_unary(mach, inst, micro_drsq); 585901e04c3fSmrg break; 586001e04c3fSmrg 586101e04c3fSmrg case TGSI_OPCODE_DMAD: 586201e04c3fSmrg exec_double_trinary(mach, inst, micro_dmad); 586301e04c3fSmrg break; 586401e04c3fSmrg 586501e04c3fSmrg case TGSI_OPCODE_DFRAC: 586601e04c3fSmrg exec_double_unary(mach, inst, micro_dfrac); 586701e04c3fSmrg break; 586801e04c3fSmrg 58697ec681f3Smrg case TGSI_OPCODE_DFLR: 58707ec681f3Smrg exec_double_unary(mach, inst, micro_dflr); 58717ec681f3Smrg break; 58727ec681f3Smrg 587301e04c3fSmrg case TGSI_OPCODE_DLDEXP: 587401e04c3fSmrg exec_dldexp(mach, inst); 587501e04c3fSmrg break; 587601e04c3fSmrg 587701e04c3fSmrg case TGSI_OPCODE_DFRACEXP: 587801e04c3fSmrg exec_dfracexp(mach, inst); 587901e04c3fSmrg break; 588001e04c3fSmrg 588101e04c3fSmrg case TGSI_OPCODE_I2D: 58827ec681f3Smrg exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT); 588301e04c3fSmrg break; 588401e04c3fSmrg 588501e04c3fSmrg case TGSI_OPCODE_D2I: 58867ec681f3Smrg exec_64_2_t(mach, inst, micro_d2i); 588701e04c3fSmrg break; 588801e04c3fSmrg 588901e04c3fSmrg case TGSI_OPCODE_U2D: 58907ec681f3Smrg exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT); 589101e04c3fSmrg break; 589201e04c3fSmrg 589301e04c3fSmrg case TGSI_OPCODE_D2U: 58947ec681f3Smrg exec_64_2_t(mach, inst, micro_d2u); 589501e04c3fSmrg break; 589601e04c3fSmrg 589701e04c3fSmrg case TGSI_OPCODE_LOAD: 589801e04c3fSmrg exec_load(mach, inst); 589901e04c3fSmrg break; 590001e04c3fSmrg 590101e04c3fSmrg case TGSI_OPCODE_STORE: 590201e04c3fSmrg exec_store(mach, inst); 590301e04c3fSmrg break; 590401e04c3fSmrg 590501e04c3fSmrg case TGSI_OPCODE_ATOMUADD: 590601e04c3fSmrg case TGSI_OPCODE_ATOMXCHG: 590701e04c3fSmrg case TGSI_OPCODE_ATOMCAS: 590801e04c3fSmrg case TGSI_OPCODE_ATOMAND: 590901e04c3fSmrg case TGSI_OPCODE_ATOMOR: 591001e04c3fSmrg case TGSI_OPCODE_ATOMXOR: 591101e04c3fSmrg case TGSI_OPCODE_ATOMUMIN: 591201e04c3fSmrg case TGSI_OPCODE_ATOMUMAX: 591301e04c3fSmrg case TGSI_OPCODE_ATOMIMIN: 591401e04c3fSmrg case TGSI_OPCODE_ATOMIMAX: 5915361fc4cbSmaya case TGSI_OPCODE_ATOMFADD: 591601e04c3fSmrg exec_atomop(mach, inst); 591701e04c3fSmrg break; 591801e04c3fSmrg 591901e04c3fSmrg case TGSI_OPCODE_RESQ: 592001e04c3fSmrg exec_resq(mach, inst); 592101e04c3fSmrg break; 592201e04c3fSmrg case TGSI_OPCODE_BARRIER: 592301e04c3fSmrg case TGSI_OPCODE_MEMBAR: 592401e04c3fSmrg return TRUE; 592501e04c3fSmrg break; 592601e04c3fSmrg 592701e04c3fSmrg case TGSI_OPCODE_I64ABS: 592801e04c3fSmrg exec_double_unary(mach, inst, micro_i64abs); 592901e04c3fSmrg break; 593001e04c3fSmrg 593101e04c3fSmrg case TGSI_OPCODE_I64SSG: 593201e04c3fSmrg exec_double_unary(mach, inst, micro_i64sgn); 593301e04c3fSmrg break; 593401e04c3fSmrg 593501e04c3fSmrg case TGSI_OPCODE_I64NEG: 593601e04c3fSmrg exec_double_unary(mach, inst, micro_i64neg); 593701e04c3fSmrg break; 593801e04c3fSmrg 593901e04c3fSmrg case TGSI_OPCODE_U64SEQ: 594001e04c3fSmrg exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 594101e04c3fSmrg break; 594201e04c3fSmrg 594301e04c3fSmrg case TGSI_OPCODE_U64SNE: 594401e04c3fSmrg exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 594501e04c3fSmrg break; 594601e04c3fSmrg 594701e04c3fSmrg case TGSI_OPCODE_I64SLT: 594801e04c3fSmrg exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 594901e04c3fSmrg break; 595001e04c3fSmrg case TGSI_OPCODE_U64SLT: 595101e04c3fSmrg exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 595201e04c3fSmrg break; 595301e04c3fSmrg 595401e04c3fSmrg case TGSI_OPCODE_I64SGE: 595501e04c3fSmrg exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 595601e04c3fSmrg break; 595701e04c3fSmrg case TGSI_OPCODE_U64SGE: 595801e04c3fSmrg exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 595901e04c3fSmrg break; 596001e04c3fSmrg 596101e04c3fSmrg case TGSI_OPCODE_I64MIN: 596201e04c3fSmrg exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 596301e04c3fSmrg break; 596401e04c3fSmrg case TGSI_OPCODE_U64MIN: 596501e04c3fSmrg exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 596601e04c3fSmrg break; 596701e04c3fSmrg case TGSI_OPCODE_I64MAX: 596801e04c3fSmrg exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 596901e04c3fSmrg break; 597001e04c3fSmrg case TGSI_OPCODE_U64MAX: 597101e04c3fSmrg exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 597201e04c3fSmrg break; 597301e04c3fSmrg case TGSI_OPCODE_U64ADD: 597401e04c3fSmrg exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 597501e04c3fSmrg break; 597601e04c3fSmrg case TGSI_OPCODE_U64MUL: 597701e04c3fSmrg exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 597801e04c3fSmrg break; 597901e04c3fSmrg case TGSI_OPCODE_U64SHL: 598001e04c3fSmrg exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 598101e04c3fSmrg break; 598201e04c3fSmrg case TGSI_OPCODE_I64SHR: 598301e04c3fSmrg exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 598401e04c3fSmrg break; 598501e04c3fSmrg case TGSI_OPCODE_U64SHR: 598601e04c3fSmrg exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 598701e04c3fSmrg break; 598801e04c3fSmrg case TGSI_OPCODE_U64DIV: 598901e04c3fSmrg exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 599001e04c3fSmrg break; 599101e04c3fSmrg case TGSI_OPCODE_I64DIV: 599201e04c3fSmrg exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 599301e04c3fSmrg break; 599401e04c3fSmrg case TGSI_OPCODE_U64MOD: 599501e04c3fSmrg exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 599601e04c3fSmrg break; 599701e04c3fSmrg case TGSI_OPCODE_I64MOD: 599801e04c3fSmrg exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 599901e04c3fSmrg break; 600001e04c3fSmrg 600101e04c3fSmrg case TGSI_OPCODE_F2U64: 600201e04c3fSmrg exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 600301e04c3fSmrg break; 600401e04c3fSmrg 600501e04c3fSmrg case TGSI_OPCODE_F2I64: 600601e04c3fSmrg exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 600701e04c3fSmrg break; 600801e04c3fSmrg 600901e04c3fSmrg case TGSI_OPCODE_U2I64: 601001e04c3fSmrg exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 601101e04c3fSmrg break; 601201e04c3fSmrg case TGSI_OPCODE_I2I64: 601301e04c3fSmrg exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 601401e04c3fSmrg break; 601501e04c3fSmrg 601601e04c3fSmrg case TGSI_OPCODE_D2U64: 601701e04c3fSmrg exec_double_unary(mach, inst, micro_d2u64); 601801e04c3fSmrg break; 601901e04c3fSmrg 602001e04c3fSmrg case TGSI_OPCODE_D2I64: 602101e04c3fSmrg exec_double_unary(mach, inst, micro_d2i64); 602201e04c3fSmrg break; 602301e04c3fSmrg 602401e04c3fSmrg case TGSI_OPCODE_U642F: 60257ec681f3Smrg exec_64_2_t(mach, inst, micro_u642f); 602601e04c3fSmrg break; 602701e04c3fSmrg case TGSI_OPCODE_I642F: 60287ec681f3Smrg exec_64_2_t(mach, inst, micro_i642f); 602901e04c3fSmrg break; 603001e04c3fSmrg 603101e04c3fSmrg case TGSI_OPCODE_U642D: 603201e04c3fSmrg exec_double_unary(mach, inst, micro_u642d); 603301e04c3fSmrg break; 603401e04c3fSmrg case TGSI_OPCODE_I642D: 603501e04c3fSmrg exec_double_unary(mach, inst, micro_i642d); 603601e04c3fSmrg break; 6037361fc4cbSmaya case TGSI_OPCODE_INTERP_SAMPLE: 6038361fc4cbSmaya exec_interp_at_sample(mach, inst); 6039361fc4cbSmaya break; 6040361fc4cbSmaya case TGSI_OPCODE_INTERP_OFFSET: 6041361fc4cbSmaya exec_interp_at_offset(mach, inst); 6042361fc4cbSmaya break; 6043361fc4cbSmaya case TGSI_OPCODE_INTERP_CENTROID: 6044361fc4cbSmaya exec_interp_at_centroid(mach, inst); 6045361fc4cbSmaya break; 60464a49301eSmrg default: 60474a49301eSmrg assert( 0 ); 60484a49301eSmrg } 604901e04c3fSmrg return FALSE; 60504a49301eSmrg} 60514a49301eSmrg 605201e04c3fSmrgstatic void 605301e04c3fSmrgtgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 60544a49301eSmrg{ 6055af69d88dSmrg uint default_mask = 0xf; 60564a49301eSmrg 60577ec681f3Smrg mach->KillMask = 0; 60587ec681f3Smrg mach->OutputVertexOffset = 0; 6059af69d88dSmrg 606001e04c3fSmrg if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6061361fc4cbSmaya for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 60627ec681f3Smrg mach->OutputPrimCount[i] = 0; 6063361fc4cbSmaya mach->Primitives[i][0] = 0; 6064361fc4cbSmaya } 6065af69d88dSmrg /* GS runs on a single primitive for now */ 6066af69d88dSmrg default_mask = 0x1; 6067af69d88dSmrg } 6068af69d88dSmrg 606901e04c3fSmrg if (mach->NonHelperMask == 0) 607001e04c3fSmrg mach->NonHelperMask = default_mask; 6071af69d88dSmrg mach->CondMask = default_mask; 6072af69d88dSmrg mach->LoopMask = default_mask; 6073af69d88dSmrg mach->ContMask = default_mask; 6074af69d88dSmrg mach->FuncMask = default_mask; 6075af69d88dSmrg mach->ExecMask = default_mask; 60764a49301eSmrg 6077af69d88dSmrg mach->Switch.mask = default_mask; 6078cdc920a0Smrg 60794a49301eSmrg assert(mach->CondStackTop == 0); 60804a49301eSmrg assert(mach->LoopStackTop == 0); 60814a49301eSmrg assert(mach->ContStackTop == 0); 6082cdc920a0Smrg assert(mach->SwitchStackTop == 0); 6083cdc920a0Smrg assert(mach->BreakStackTop == 0); 60844a49301eSmrg assert(mach->CallStackTop == 0); 608501e04c3fSmrg} 608601e04c3fSmrg 608701e04c3fSmrg/** 608801e04c3fSmrg * Run TGSI interpreter. 608901e04c3fSmrg * \return bitmask of "alive" quad components 609001e04c3fSmrg */ 609101e04c3fSmrguint 609201e04c3fSmrgtgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 609301e04c3fSmrg{ 609401e04c3fSmrg uint i; 609501e04c3fSmrg 609601e04c3fSmrg mach->pc = start_pc; 60974a49301eSmrg 609801e04c3fSmrg if (!start_pc) { 609901e04c3fSmrg tgsi_exec_machine_setup_masks(mach); 61004a49301eSmrg 610101e04c3fSmrg /* execute declarations (interpolants) */ 610201e04c3fSmrg for (i = 0; i < mach->NumDeclarations; i++) { 610301e04c3fSmrg exec_declaration( mach, mach->Declarations+i ); 610401e04c3fSmrg } 61054a49301eSmrg } 61064a49301eSmrg 6107cdc920a0Smrg { 6108cdc920a0Smrg#if DEBUG_EXECUTION 61097ec681f3Smrg struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS]; 6110cdc920a0Smrg struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6111cdc920a0Smrg uint inst = 1; 6112cdc920a0Smrg 611301e04c3fSmrg if (!start_pc) { 611401e04c3fSmrg memset(mach->Temps, 0, sizeof(temps)); 611501e04c3fSmrg if (mach->Outputs) 611601e04c3fSmrg memset(mach->Outputs, 0, sizeof(outputs)); 611701e04c3fSmrg memset(temps, 0, sizeof(temps)); 611801e04c3fSmrg memset(outputs, 0, sizeof(outputs)); 611901e04c3fSmrg } 6120cdc920a0Smrg#endif 6121cdc920a0Smrg 6122cdc920a0Smrg /* execute instructions, until pc is set to -1 */ 612301e04c3fSmrg while (mach->pc != -1) { 612401e04c3fSmrg boolean barrier_hit; 6125cdc920a0Smrg#if DEBUG_EXECUTION 6126cdc920a0Smrg uint i; 6127cdc920a0Smrg 612801e04c3fSmrg tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6129cdc920a0Smrg#endif 6130cdc920a0Smrg 613101e04c3fSmrg assert(mach->pc < (int) mach->NumInstructions); 613201e04c3fSmrg barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 613301e04c3fSmrg 613401e04c3fSmrg /* for compute shaders if we hit a barrier return now for later rescheduling */ 613501e04c3fSmrg if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 613601e04c3fSmrg return 0; 6137cdc920a0Smrg 6138cdc920a0Smrg#if DEBUG_EXECUTION 61397ec681f3Smrg for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) { 6140cdc920a0Smrg if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6141cdc920a0Smrg uint j; 6142cdc920a0Smrg 6143cdc920a0Smrg memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6144cdc920a0Smrg debug_printf("TEMP[%2u] = ", i); 6145cdc920a0Smrg for (j = 0; j < 4; j++) { 6146cdc920a0Smrg if (j > 0) { 6147cdc920a0Smrg debug_printf(" "); 6148cdc920a0Smrg } 6149cdc920a0Smrg debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6150cdc920a0Smrg temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6151cdc920a0Smrg temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6152cdc920a0Smrg temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6153cdc920a0Smrg temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6154cdc920a0Smrg } 6155cdc920a0Smrg } 6156cdc920a0Smrg } 615701e04c3fSmrg if (mach->Outputs) { 615801e04c3fSmrg for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 615901e04c3fSmrg if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 616001e04c3fSmrg uint j; 616101e04c3fSmrg 616201e04c3fSmrg memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 616301e04c3fSmrg debug_printf("OUT[%2u] = ", i); 616401e04c3fSmrg for (j = 0; j < 4; j++) { 616501e04c3fSmrg if (j > 0) { 616601e04c3fSmrg debug_printf(" "); 616701e04c3fSmrg } 616801e04c3fSmrg debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 616901e04c3fSmrg outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 617001e04c3fSmrg outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 617101e04c3fSmrg outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 617201e04c3fSmrg outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6173cdc920a0Smrg } 6174cdc920a0Smrg } 6175cdc920a0Smrg } 6176cdc920a0Smrg } 6177cdc920a0Smrg#endif 6178cdc920a0Smrg } 61794a49301eSmrg } 61804a49301eSmrg 61814a49301eSmrg#if 0 61824a49301eSmrg /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 618301e04c3fSmrg if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 61844a49301eSmrg /* 61854a49301eSmrg * Scale back depth component. 61864a49301eSmrg */ 61874a49301eSmrg for (i = 0; i < 4; i++) 61884a49301eSmrg mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 61894a49301eSmrg } 61904a49301eSmrg#endif 61914a49301eSmrg 61923464ebd5Sriastradh /* Strictly speaking, these assertions aren't really needed but they 61933464ebd5Sriastradh * can potentially catch some bugs in the control flow code. 61943464ebd5Sriastradh */ 6195cdc920a0Smrg assert(mach->CondStackTop == 0); 6196cdc920a0Smrg assert(mach->LoopStackTop == 0); 6197cdc920a0Smrg assert(mach->ContStackTop == 0); 6198cdc920a0Smrg assert(mach->SwitchStackTop == 0); 6199cdc920a0Smrg assert(mach->BreakStackTop == 0); 6200cdc920a0Smrg assert(mach->CallStackTop == 0); 6201cdc920a0Smrg 62027ec681f3Smrg return ~mach->KillMask; 62034a49301eSmrg} 6204