tgsi_util.c revision b8e80941
11b5d61b8Smrg/************************************************************************** 21b5d61b8Smrg * 305b261ecSmrg * Copyright 2007 VMware, Inc. 405b261ecSmrg * All Rights Reserved. 51b5d61b8Smrg * 61b5d61b8Smrg * Permission is hereby granted, free of charge, to any person obtaining a 705b261ecSmrg * copy of this software and associated documentation files (the 81b5d61b8Smrg * "Software"), to deal in the Software without restriction, including 91b5d61b8Smrg * without limitation the rights to use, copy, modify, merge, publish, 101b5d61b8Smrg * distribute, sub license, and/or sell copies of the Software, and to 111b5d61b8Smrg * permit persons to whom the Software is furnished to do so, subject to 121b5d61b8Smrg * the following conditions: 131b5d61b8Smrg * 141b5d61b8Smrg * The above copyright notice and this permission notice (including the 151b5d61b8Smrg * next paragraph) shall be included in all copies or substantial portions 161b5d61b8Smrg * of the Software. 171b5d61b8Smrg * 181b5d61b8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191b5d61b8Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201b5d61b8Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211b5d61b8Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 2205b261ecSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2305b261ecSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241b5d61b8Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251b5d61b8Smrg * 261b5d61b8Smrg **************************************************************************/ 271b5d61b8Smrg 281b5d61b8Smrg#include "util/u_debug.h" 291b5d61b8Smrg#include "pipe/p_shader_tokens.h" 301b5d61b8Smrg#include "tgsi_info.h" 311b5d61b8Smrg#include "tgsi_parse.h" 321b5d61b8Smrg#include "tgsi_util.h" 331b5d61b8Smrg#include "tgsi_exec.h" 341b5d61b8Smrg#include "util/bitscan.h" 351b5d61b8Smrg 361b5d61b8Smrgunion pointer_hack 371b5d61b8Smrg{ 381b5d61b8Smrg void *pointer; 391b5d61b8Smrg uint64_t uint64; 401b5d61b8Smrg}; 411b5d61b8Smrg 421b5d61b8Smrgvoid * 431b5d61b8Smrgtgsi_align_128bit(void *unaligned) 441b5d61b8Smrg{ 451b5d61b8Smrg union pointer_hack ph; 461b5d61b8Smrg 471b5d61b8Smrg ph.uint64 = 0; 481b5d61b8Smrg ph.pointer = unaligned; 491b5d61b8Smrg ph.uint64 = (ph.uint64 + 15) & ~15; 501b5d61b8Smrg return ph.pointer; 511b5d61b8Smrg} 521b5d61b8Smrg 531b5d61b8Smrgunsigned 541b5d61b8Smrgtgsi_util_get_src_register_swizzle(const struct tgsi_src_register *reg, 551b5d61b8Smrg unsigned component) 561b5d61b8Smrg{ 571b5d61b8Smrg switch (component) { 581b5d61b8Smrg case TGSI_CHAN_X: 591b5d61b8Smrg return reg->SwizzleX; 601b5d61b8Smrg case TGSI_CHAN_Y: 611b5d61b8Smrg return reg->SwizzleY; 621b5d61b8Smrg case TGSI_CHAN_Z: 631b5d61b8Smrg return reg->SwizzleZ; 641b5d61b8Smrg case TGSI_CHAN_W: 651b5d61b8Smrg return reg->SwizzleW; 661b5d61b8Smrg default: 671b5d61b8Smrg assert(0); 681b5d61b8Smrg } 691b5d61b8Smrg return 0; 701b5d61b8Smrg} 711b5d61b8Smrg 721b5d61b8Smrg 731b5d61b8Smrgunsigned 741b5d61b8Smrgtgsi_util_get_full_src_register_swizzle( 751b5d61b8Smrg const struct tgsi_full_src_register *reg, 761b5d61b8Smrg unsigned component) 771b5d61b8Smrg{ 781b5d61b8Smrg return tgsi_util_get_src_register_swizzle(®->Register, component); 791b5d61b8Smrg} 801b5d61b8Smrg 811b5d61b8Smrg 821b5d61b8Smrgvoid 831b5d61b8Smrgtgsi_util_set_src_register_swizzle(struct tgsi_src_register *reg, 841b5d61b8Smrg unsigned swizzle, 851b5d61b8Smrg unsigned component) 861b5d61b8Smrg{ 871b5d61b8Smrg switch (component) { 881b5d61b8Smrg case 0: 891b5d61b8Smrg reg->SwizzleX = swizzle; 901b5d61b8Smrg break; 911b5d61b8Smrg case 1: 92 reg->SwizzleY = swizzle; 93 break; 94 case 2: 95 reg->SwizzleZ = swizzle; 96 break; 97 case 3: 98 reg->SwizzleW = swizzle; 99 break; 100 default: 101 assert(0); 102 } 103} 104 105 106unsigned 107tgsi_util_get_full_src_register_sign_mode( 108 const struct tgsi_full_src_register *reg, 109 UNUSED unsigned component) 110{ 111 unsigned sign_mode; 112 113 if (reg->Register.Absolute) { 114 /* Consider only the post-abs negation. */ 115 116 if (reg->Register.Negate) { 117 sign_mode = TGSI_UTIL_SIGN_SET; 118 } 119 else { 120 sign_mode = TGSI_UTIL_SIGN_CLEAR; 121 } 122 } 123 else { 124 if (reg->Register.Negate) { 125 sign_mode = TGSI_UTIL_SIGN_TOGGLE; 126 } 127 else { 128 sign_mode = TGSI_UTIL_SIGN_KEEP; 129 } 130 } 131 132 return sign_mode; 133} 134 135 136void 137tgsi_util_set_full_src_register_sign_mode(struct tgsi_full_src_register *reg, 138 unsigned sign_mode) 139{ 140 switch (sign_mode) { 141 case TGSI_UTIL_SIGN_CLEAR: 142 reg->Register.Negate = 0; 143 reg->Register.Absolute = 1; 144 break; 145 146 case TGSI_UTIL_SIGN_SET: 147 reg->Register.Absolute = 1; 148 reg->Register.Negate = 1; 149 break; 150 151 case TGSI_UTIL_SIGN_TOGGLE: 152 reg->Register.Negate = 1; 153 reg->Register.Absolute = 0; 154 break; 155 156 case TGSI_UTIL_SIGN_KEEP: 157 reg->Register.Negate = 0; 158 reg->Register.Absolute = 0; 159 break; 160 161 default: 162 assert(0); 163 } 164} 165 166 167/** 168 * Determine which channels of the specificed src register are effectively 169 * used by this instruction. 170 */ 171unsigned 172tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, 173 unsigned src_idx) 174{ 175 const struct tgsi_full_src_register *src = &inst->Src[src_idx]; 176 unsigned write_mask = inst->Dst[0].Register.WriteMask; 177 unsigned read_mask; 178 unsigned usage_mask; 179 unsigned chan; 180 181 switch (inst->Instruction.Opcode) { 182 case TGSI_OPCODE_IF: 183 case TGSI_OPCODE_UIF: 184 case TGSI_OPCODE_EMIT: 185 case TGSI_OPCODE_ENDPRIM: 186 case TGSI_OPCODE_RCP: 187 case TGSI_OPCODE_RSQ: 188 case TGSI_OPCODE_SQRT: 189 case TGSI_OPCODE_EX2: 190 case TGSI_OPCODE_LG2: 191 case TGSI_OPCODE_SIN: 192 case TGSI_OPCODE_COS: 193 case TGSI_OPCODE_POW: /* reads src0.x and src1.x */ 194 case TGSI_OPCODE_UP2H: 195 case TGSI_OPCODE_UP2US: 196 case TGSI_OPCODE_UP4B: 197 case TGSI_OPCODE_UP4UB: 198 case TGSI_OPCODE_MEMBAR: 199 case TGSI_OPCODE_BALLOT: 200 read_mask = TGSI_WRITEMASK_X; 201 break; 202 203 case TGSI_OPCODE_DP2: 204 case TGSI_OPCODE_PK2H: 205 case TGSI_OPCODE_PK2US: 206 case TGSI_OPCODE_DFRACEXP: 207 case TGSI_OPCODE_F2D: 208 case TGSI_OPCODE_I2D: 209 case TGSI_OPCODE_U2D: 210 case TGSI_OPCODE_F2U64: 211 case TGSI_OPCODE_F2I64: 212 case TGSI_OPCODE_U2I64: 213 case TGSI_OPCODE_I2I64: 214 case TGSI_OPCODE_TXQS: /* bindless handle possible */ 215 case TGSI_OPCODE_RESQ: /* bindless handle possible */ 216 read_mask = TGSI_WRITEMASK_XY; 217 break; 218 219 case TGSI_OPCODE_TXQ: 220 if (src_idx == 0) 221 read_mask = TGSI_WRITEMASK_X; 222 else 223 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 224 break; 225 226 case TGSI_OPCODE_DP3: 227 read_mask = TGSI_WRITEMASK_XYZ; 228 break; 229 230 case TGSI_OPCODE_DSEQ: 231 case TGSI_OPCODE_DSNE: 232 case TGSI_OPCODE_DSLT: 233 case TGSI_OPCODE_DSGE: 234 case TGSI_OPCODE_DP4: 235 case TGSI_OPCODE_PK4B: 236 case TGSI_OPCODE_PK4UB: 237 case TGSI_OPCODE_D2F: 238 case TGSI_OPCODE_D2I: 239 case TGSI_OPCODE_D2U: 240 case TGSI_OPCODE_I2F: 241 case TGSI_OPCODE_U2F: 242 case TGSI_OPCODE_U64SEQ: 243 case TGSI_OPCODE_U64SNE: 244 case TGSI_OPCODE_U64SLT: 245 case TGSI_OPCODE_U64SGE: 246 case TGSI_OPCODE_U642F: 247 case TGSI_OPCODE_I64SLT: 248 case TGSI_OPCODE_I64SGE: 249 case TGSI_OPCODE_I642F: 250 read_mask = TGSI_WRITEMASK_XYZW; 251 break; 252 253 case TGSI_OPCODE_LIT: 254 read_mask = write_mask & TGSI_WRITEMASK_YZ ? 255 TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0; 256 break; 257 258 case TGSI_OPCODE_EXP: 259 case TGSI_OPCODE_LOG: 260 read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; 261 break; 262 263 case TGSI_OPCODE_DST: 264 if (src_idx == 0) 265 read_mask = TGSI_WRITEMASK_YZ; 266 else 267 read_mask = TGSI_WRITEMASK_YW; 268 break; 269 270 case TGSI_OPCODE_DLDEXP: 271 if (src_idx == 0) { 272 read_mask = write_mask; 273 } else { 274 read_mask = 275 (write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) | 276 (write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0); 277 } 278 break; 279 280 case TGSI_OPCODE_READ_INVOC: 281 if (src_idx == 0) 282 read_mask = write_mask; 283 else 284 read_mask = TGSI_WRITEMASK_X; 285 break; 286 287 case TGSI_OPCODE_FBFETCH: 288 read_mask = 0; /* not a real register read */ 289 break; 290 291 case TGSI_OPCODE_TEX: 292 case TGSI_OPCODE_TEX_LZ: 293 case TGSI_OPCODE_TXF_LZ: 294 case TGSI_OPCODE_TXF: 295 case TGSI_OPCODE_TXB: 296 case TGSI_OPCODE_TXL: 297 case TGSI_OPCODE_TXP: 298 case TGSI_OPCODE_TXD: 299 case TGSI_OPCODE_TEX2: 300 case TGSI_OPCODE_TXB2: 301 case TGSI_OPCODE_TXL2: 302 case TGSI_OPCODE_LODQ: 303 case TGSI_OPCODE_TG4: { 304 unsigned dim_layer = 305 tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 306 unsigned dim_layer_shadow, dim; 307 308 /* Add shadow. */ 309 if (tgsi_is_shadow_target(inst->Texture.Texture)) { 310 dim_layer_shadow = dim_layer + 1; 311 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) 312 dim_layer_shadow = 3; 313 } else { 314 dim_layer_shadow = dim_layer; 315 } 316 317 /* Remove layer. */ 318 if (tgsi_is_array_sampler(inst->Texture.Texture)) 319 dim = dim_layer - 1; 320 else 321 dim = dim_layer; 322 323 read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */ 324 325 switch (src_idx) { 326 case 0: 327 if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) 328 read_mask = u_bit_consecutive(0, dim); 329 else 330 read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf; 331 332 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) 333 read_mask &= ~TGSI_WRITEMASK_Y; 334 335 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF || 336 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 337 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 338 inst->Instruction.Opcode == TGSI_OPCODE_TXP) 339 read_mask |= TGSI_WRITEMASK_W; 340 break; 341 342 case 1: 343 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 344 read_mask = u_bit_consecutive(0, dim); 345 else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 346 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 347 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 348 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 349 read_mask = TGSI_WRITEMASK_X; 350 break; 351 352 case 2: 353 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 354 read_mask = u_bit_consecutive(0, dim); 355 break; 356 } 357 break; 358 } 359 360 case TGSI_OPCODE_LOAD: 361 if (src_idx == 0) { 362 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 363 } else { 364 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); 365 read_mask = u_bit_consecutive(0, dim); 366 } 367 break; 368 369 case TGSI_OPCODE_STORE: 370 if (src_idx == 0) { 371 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); 372 read_mask = u_bit_consecutive(0, dim); 373 } else { 374 read_mask = TGSI_WRITEMASK_XYZW; 375 } 376 break; 377 378 case TGSI_OPCODE_ATOMUADD: 379 case TGSI_OPCODE_ATOMXCHG: 380 case TGSI_OPCODE_ATOMCAS: 381 case TGSI_OPCODE_ATOMAND: 382 case TGSI_OPCODE_ATOMOR: 383 case TGSI_OPCODE_ATOMXOR: 384 case TGSI_OPCODE_ATOMUMIN: 385 case TGSI_OPCODE_ATOMUMAX: 386 case TGSI_OPCODE_ATOMIMIN: 387 case TGSI_OPCODE_ATOMIMAX: 388 case TGSI_OPCODE_ATOMFADD: 389 if (src_idx == 0) { 390 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 391 } else if (src_idx == 1) { 392 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); 393 read_mask = u_bit_consecutive(0, dim); 394 } else { 395 read_mask = TGSI_WRITEMASK_XYZW; 396 } 397 break; 398 399 case TGSI_OPCODE_INTERP_CENTROID: 400 case TGSI_OPCODE_INTERP_SAMPLE: 401 case TGSI_OPCODE_INTERP_OFFSET: 402 if (src_idx == 0) 403 read_mask = write_mask; 404 else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) 405 read_mask = TGSI_WRITEMASK_XY; /* offset */ 406 else 407 read_mask = TGSI_WRITEMASK_X; /* sample */ 408 break; 409 410 default: 411 if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode == 412 TGSI_OUTPUT_COMPONENTWISE) 413 read_mask = write_mask; 414 else 415 read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */ 416 break; 417 } 418 419 usage_mask = 0; 420 for (chan = 0; chan < 4; ++chan) { 421 if (read_mask & (1 << chan)) { 422 usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan); 423 } 424 } 425 426 return usage_mask; 427} 428 429/** 430 * Convert a tgsi_ind_register into a tgsi_src_register 431 */ 432struct tgsi_src_register 433tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg) 434{ 435 struct tgsi_src_register src = { 0 }; 436 437 src.File = reg->File; 438 src.Index = reg->Index; 439 src.SwizzleX = reg->Swizzle; 440 src.SwizzleY = reg->Swizzle; 441 src.SwizzleZ = reg->Swizzle; 442 src.SwizzleW = reg->Swizzle; 443 444 return src; 445} 446 447/** 448 * Return the dimension of the texture coordinates (layer included for array 449 * textures), as well as the location of the shadow reference value or the 450 * sample index. 451 */ 452int 453tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex) 454{ 455 /* 456 * Depending on the texture target, (src0.xyzw, src1.x) is interpreted 457 * differently: 458 * 459 * (s, X, X, X, X), for BUFFER 460 * (s, X, X, X, X), for 1D 461 * (s, t, X, X, X), for 2D, RECT 462 * (s, t, r, X, X), for 3D, CUBE 463 * 464 * (s, layer, X, X, X), for 1D_ARRAY 465 * (s, t, layer, X, X), for 2D_ARRAY 466 * (s, t, r, layer, X), for CUBE_ARRAY 467 * 468 * (s, X, shadow, X, X), for SHADOW1D 469 * (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT 470 * (s, t, r, shadow, X), for SHADOWCUBE 471 * 472 * (s, layer, shadow, X, X), for SHADOW1D_ARRAY 473 * (s, t, layer, shadow, X), for SHADOW2D_ARRAY 474 * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY 475 * 476 * (s, t, sample, X, X), for 2D_MSAA 477 * (s, t, layer, sample, X), for 2D_ARRAY_MSAA 478 */ 479 switch (tgsi_tex) { 480 case TGSI_TEXTURE_BUFFER: 481 case TGSI_TEXTURE_1D: 482 case TGSI_TEXTURE_SHADOW1D: 483 return 1; 484 case TGSI_TEXTURE_2D: 485 case TGSI_TEXTURE_RECT: 486 case TGSI_TEXTURE_1D_ARRAY: 487 case TGSI_TEXTURE_SHADOW2D: 488 case TGSI_TEXTURE_SHADOWRECT: 489 case TGSI_TEXTURE_SHADOW1D_ARRAY: 490 case TGSI_TEXTURE_2D_MSAA: 491 return 2; 492 case TGSI_TEXTURE_3D: 493 case TGSI_TEXTURE_CUBE: 494 case TGSI_TEXTURE_2D_ARRAY: 495 case TGSI_TEXTURE_SHADOWCUBE: 496 case TGSI_TEXTURE_SHADOW2D_ARRAY: 497 case TGSI_TEXTURE_2D_ARRAY_MSAA: 498 return 3; 499 case TGSI_TEXTURE_CUBE_ARRAY: 500 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 501 return 4; 502 default: 503 assert(!"unknown texture target"); 504 return 0; 505 } 506} 507 508 509/** 510 * Given a TGSI_TEXTURE_x target, return register component where the 511 * shadow reference/distance coordinate is found. Typically, components 512 * 0 and 1 are the (s,t) texcoords and component 2 or 3 hold the shadow 513 * reference value. But if we return 4, it means the reference value is 514 * found in the 0th component of the second coordinate argument to the 515 * TEX2 instruction. 516 */ 517int 518tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex) 519{ 520 switch (tgsi_tex) { 521 case TGSI_TEXTURE_SHADOW1D: 522 case TGSI_TEXTURE_SHADOW2D: 523 case TGSI_TEXTURE_SHADOWRECT: 524 case TGSI_TEXTURE_SHADOW1D_ARRAY: 525 return 2; 526 case TGSI_TEXTURE_SHADOWCUBE: 527 case TGSI_TEXTURE_SHADOW2D_ARRAY: 528 case TGSI_TEXTURE_2D_MSAA: 529 case TGSI_TEXTURE_2D_ARRAY_MSAA: 530 return 3; 531 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 532 return 4; 533 default: 534 /* no shadow nor sample */ 535 return -1; 536 } 537} 538 539 540boolean 541tgsi_is_shadow_target(enum tgsi_texture_type target) 542{ 543 switch (target) { 544 case TGSI_TEXTURE_SHADOW1D: 545 case TGSI_TEXTURE_SHADOW2D: 546 case TGSI_TEXTURE_SHADOWRECT: 547 case TGSI_TEXTURE_SHADOW1D_ARRAY: 548 case TGSI_TEXTURE_SHADOW2D_ARRAY: 549 case TGSI_TEXTURE_SHADOWCUBE: 550 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 551 return TRUE; 552 default: 553 return FALSE; 554 } 555} 556