tgsi_exec.h revision 7ec681f3
1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29#ifndef TGSI_EXEC_H 30#define TGSI_EXEC_H 31 32#include "pipe/p_compiler.h" 33#include "pipe/p_state.h" 34#include "pipe/p_shader_tokens.h" 35 36#if defined __cplusplus 37extern "C" { 38#endif 39 40#define TGSI_CHAN_X 0 41#define TGSI_CHAN_Y 1 42#define TGSI_CHAN_Z 2 43#define TGSI_CHAN_W 3 44 45#define TGSI_NUM_CHANNELS 4 /* R,G,B,A */ 46#define TGSI_QUAD_SIZE 4 /* 4 pixel/quad */ 47 48#define TGSI_FOR_EACH_CHANNEL( CHAN )\ 49 for (CHAN = 0; CHAN < TGSI_NUM_CHANNELS; CHAN++) 50 51#define TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 52 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 53 54#define TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 55 if (TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 56 57#define TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 58 TGSI_FOR_EACH_CHANNEL( CHAN )\ 59 TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 60 61#define TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\ 62 ((INST)->Dst[1].Register.WriteMask & (1 << (CHAN))) 63 64#define TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\ 65 if (TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )) 66 67#define TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( INST, CHAN )\ 68 TGSI_FOR_EACH_CHANNEL( CHAN )\ 69 TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN ) 70 71/** 72 * Registers may be treated as float, signed int or unsigned int. 73 */ 74union tgsi_exec_channel 75{ 76 float f[TGSI_QUAD_SIZE]; 77 int i[TGSI_QUAD_SIZE]; 78 unsigned u[TGSI_QUAD_SIZE]; 79} ALIGN16; 80 81/** 82 * A vector[RGBA] of channels[4 pixels] 83 */ 84struct ALIGN16 tgsi_exec_vector 85{ 86 union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS]; 87}; 88 89/** 90 * For fragment programs, information for computing fragment input 91 * values from plane equation of the triangle/line. 92 */ 93struct tgsi_interp_coef 94{ 95 float a0[TGSI_NUM_CHANNELS]; /* in an xyzw layout */ 96 float dadx[TGSI_NUM_CHANNELS]; 97 float dady[TGSI_NUM_CHANNELS]; 98}; 99 100enum tgsi_sampler_control 101{ 102 TGSI_SAMPLER_LOD_NONE, 103 TGSI_SAMPLER_LOD_BIAS, 104 TGSI_SAMPLER_LOD_EXPLICIT, 105 TGSI_SAMPLER_LOD_ZERO, 106 TGSI_SAMPLER_DERIVS_EXPLICIT, 107 TGSI_SAMPLER_GATHER, 108}; 109 110struct tgsi_image_params { 111 unsigned unit; 112 unsigned tgsi_tex_instr; 113 enum pipe_format format; 114 unsigned execmask; 115}; 116 117struct tgsi_image { 118 /* image interfaces */ 119 void (*load)(const struct tgsi_image *image, 120 const struct tgsi_image_params *params, 121 const int s[TGSI_QUAD_SIZE], 122 const int t[TGSI_QUAD_SIZE], 123 const int r[TGSI_QUAD_SIZE], 124 const int sample[TGSI_QUAD_SIZE], 125 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); 126 127 void (*store)(const struct tgsi_image *image, 128 const struct tgsi_image_params *params, 129 const int s[TGSI_QUAD_SIZE], 130 const int t[TGSI_QUAD_SIZE], 131 const int r[TGSI_QUAD_SIZE], 132 const int sample[TGSI_QUAD_SIZE], 133 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); 134 135 void (*op)(const struct tgsi_image *image, 136 const struct tgsi_image_params *params, 137 enum tgsi_opcode opcode, 138 const int s[TGSI_QUAD_SIZE], 139 const int t[TGSI_QUAD_SIZE], 140 const int r[TGSI_QUAD_SIZE], 141 const int sample[TGSI_QUAD_SIZE], 142 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 143 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); 144 145 void (*get_dims)(const struct tgsi_image *image, 146 const struct tgsi_image_params *params, 147 int dims[4]); 148}; 149 150struct tgsi_buffer_params { 151 unsigned unit; 152 unsigned execmask; 153 unsigned writemask; 154}; 155 156/* SSBO interfaces */ 157struct tgsi_buffer { 158 void *(*lookup)(const struct tgsi_buffer *buffer, 159 uint32_t unit, uint32_t *size); 160}; 161 162/** 163 * Information for sampling textures, which must be implemented 164 * by code outside the TGSI executor. 165 */ 166struct tgsi_sampler 167{ 168 /** Get samples for four fragments in a quad */ 169 /* this interface contains 5 sets of channels that vary 170 * depending on the sampler. 171 * s - the first texture coordinate for sampling. 172 * t - the second texture coordinate for sampling - unused for 1D, 173 layer for 1D arrays. 174 * r - the third coordinate for sampling for 3D, cube, cube arrays, 175 * layer for 2D arrays. Compare value for 1D/2D shadows. 176 * c0 - Compare value for shadow cube and shadow 2d arrays, 177 * layer for cube arrays. 178 * derivs - explicit derivatives. 179 * offset - texel offsets 180 * lod - lod value, except for shadow cube arrays (compare value there). 181 */ 182 void (*get_samples)(struct tgsi_sampler *sampler, 183 const unsigned sview_index, 184 const unsigned sampler_index, 185 const float s[TGSI_QUAD_SIZE], 186 const float t[TGSI_QUAD_SIZE], 187 const float r[TGSI_QUAD_SIZE], 188 const float c0[TGSI_QUAD_SIZE], 189 const float c1[TGSI_QUAD_SIZE], 190 float derivs[3][2][TGSI_QUAD_SIZE], 191 const int8_t offset[3], 192 enum tgsi_sampler_control control, 193 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); 194 void (*get_dims)(struct tgsi_sampler *sampler, 195 const unsigned sview_index, 196 int level, int dims[4]); 197 void (*get_texel)(struct tgsi_sampler *sampler, 198 const unsigned sview_index, 199 const int i[TGSI_QUAD_SIZE], 200 const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE], 201 const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], 202 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); 203 void (*query_lod)(const struct tgsi_sampler *tgsi_sampler, 204 const unsigned sview_index, 205 const unsigned sampler_index, 206 const float s[TGSI_QUAD_SIZE], 207 const float t[TGSI_QUAD_SIZE], 208 const float p[TGSI_QUAD_SIZE], 209 const float c0[TGSI_QUAD_SIZE], 210 const enum tgsi_sampler_control control, 211 float mipmap[TGSI_QUAD_SIZE], 212 float lod[TGSI_QUAD_SIZE]); 213}; 214 215#define TGSI_EXEC_NUM_TEMPS 4096 216 217#define TGSI_EXEC_MAX_NESTING 32 218#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING 219#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING 220#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING 221#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING 222 223/* The maximum number of input attributes per vertex. For 2D 224 * input register files, this is the stride between two 1D 225 * arrays. 226 */ 227#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32 228 229/* The maximum number of bytes per constant buffer. 230 */ 231#define TGSI_EXEC_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4])) 232 233/* The maximum number of vertices per primitive */ 234#define TGSI_MAX_PRIM_VERTICES 6 235 236/* The maximum number of primitives to be generated */ 237#define TGSI_MAX_PRIMITIVES 64 238 239/* The maximum total number of vertices */ 240#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) 241 242#define TGSI_MAX_MISC_INPUTS 8 243 244#define TGSI_MAX_VERTEX_STREAMS 4 245 246/** function call/activation record */ 247struct tgsi_call_record 248{ 249 uint CondStackTop; 250 uint LoopStackTop; 251 uint ContStackTop; 252 int SwitchStackTop; 253 int BreakStackTop; 254 uint ReturnAddr; 255}; 256 257 258/* Switch-case block state. */ 259struct tgsi_switch_record { 260 uint mask; /**< execution mask */ 261 union tgsi_exec_channel selector; /**< a value case statements are compared to */ 262 uint defaultMask; /**< non-execute mask for default case */ 263}; 264 265 266enum tgsi_break_type { 267 TGSI_EXEC_BREAK_INSIDE_LOOP, 268 TGSI_EXEC_BREAK_INSIDE_SWITCH 269}; 270 271 272#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) 273 274typedef float float4[4]; 275 276struct tgsi_exec_machine; 277 278typedef void (* apply_sample_offset_func)( 279 const struct tgsi_exec_machine *mach, 280 unsigned attrib, 281 unsigned chan, 282 float ofs_x, 283 float ofs_y, 284 union tgsi_exec_channel *out_chan); 285 286/** 287 * Run-time virtual machine state for executing TGSI shader. 288 */ 289struct ALIGN16 tgsi_exec_machine 290{ 291 /* Total = program temporaries + internal temporaries 292 */ 293 struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS]; 294 295 unsigned ImmsReserved; 296 float4 *Imms; 297 298 struct tgsi_exec_vector *Inputs; 299 struct tgsi_exec_vector *Outputs; 300 apply_sample_offset_func *InputSampleOffsetApply; 301 302 /* System values */ 303 unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; 304 struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS]; 305 306 struct tgsi_exec_vector Addrs[3]; 307 308 struct tgsi_sampler *Sampler; 309 310 struct tgsi_image *Image; 311 struct tgsi_buffer *Buffer; 312 unsigned ImmLimit; 313 314 const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; 315 unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; 316 317 const struct tgsi_token *Tokens; /**< Declarations, instructions */ 318 enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */ 319 320 /* GEOMETRY processor only. */ 321 /* Number of vertices emitted per emitted primitive. */ 322 unsigned *Primitives[TGSI_MAX_VERTEX_STREAMS]; 323 /* Offsets in ->Outputs of the primitives' vertex output data */ 324 unsigned *PrimitiveOffsets[TGSI_MAX_VERTEX_STREAMS]; 325 unsigned NumOutputs; 326 unsigned MaxOutputVertices; 327 /* Offset in ->Outputs for the current vertex to be emitted. */ 328 unsigned OutputVertexOffset; 329 /* Number of primitives emitted. */ 330 unsigned OutputPrimCount[TGSI_MAX_VERTEX_STREAMS]; 331 332 /* FRAGMENT processor only. */ 333 const struct tgsi_interp_coef *InterpCoefs; 334 struct tgsi_exec_vector QuadPos; 335 float Face; /**< +1 if front facing, -1 if back facing */ 336 bool flatshade_color; 337 338 /* Compute Only */ 339 void *LocalMem; 340 unsigned LocalMemSize; 341 342 /* See GLSL 4.50 specification for definition of helper invocations */ 343 uint NonHelperMask; /**< non-helpers */ 344 /* Conditional execution masks */ 345 uint CondMask; /**< For IF/ELSE/ENDIF */ 346 uint LoopMask; /**< For BGNLOOP/ENDLOOP */ 347 uint ContMask; /**< For loop CONT statements */ 348 uint FuncMask; /**< For function calls */ 349 uint ExecMask; /**< = CondMask & LoopMask */ 350 uint KillMask; /**< Mask of channels killed in the current shader execution */ 351 352 /* Current switch-case state. */ 353 struct tgsi_switch_record Switch; 354 355 /* Current break type. */ 356 enum tgsi_break_type BreakType; 357 358 /** Condition mask stack (for nested conditionals) */ 359 uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; 360 int CondStackTop; 361 362 /** Loop mask stack (for nested loops) */ 363 uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; 364 int LoopStackTop; 365 366 /** Loop label stack */ 367 uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; 368 int LoopLabelStackTop; 369 370 /** Loop continue mask stack (see comments in tgsi_exec.c) */ 371 uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; 372 int ContStackTop; 373 374 /** Switch case stack */ 375 struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; 376 int SwitchStackTop; 377 378 enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; 379 int BreakStackTop; 380 381 /** Function execution mask stack (for executing subroutine code) */ 382 uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; 383 int FuncStackTop; 384 385 /** Function call stack for saving/restoring the program counter */ 386 struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING]; 387 int CallStackTop; 388 389 struct tgsi_full_instruction *Instructions; 390 uint NumInstructions; 391 392 struct tgsi_full_declaration *Declarations; 393 uint NumDeclarations; 394 395 struct tgsi_declaration_sampler_view 396 SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 397 398 boolean UsedGeometryShader; 399 400 int pc; 401}; 402 403struct tgsi_exec_machine * 404tgsi_exec_machine_create(enum pipe_shader_type shader_type); 405 406void 407tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach); 408 409 410void 411tgsi_exec_machine_bind_shader( 412 struct tgsi_exec_machine *mach, 413 const struct tgsi_token *tokens, 414 struct tgsi_sampler *sampler, 415 struct tgsi_image *image, 416 struct tgsi_buffer *buffer); 417 418uint 419tgsi_exec_machine_run( 420 struct tgsi_exec_machine *mach, int start_pc ); 421 422 423void 424tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); 425 426 427extern void 428tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 429 unsigned num_bufs, 430 const void **bufs, 431 const unsigned *buf_sizes); 432 433 434static inline int 435tgsi_exec_get_shader_param(enum pipe_shader_cap param) 436{ 437 switch(param) { 438 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: 439 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: 440 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: 441 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: 442 return INT_MAX; 443 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: 444 return TGSI_EXEC_MAX_NESTING; 445 case PIPE_SHADER_CAP_MAX_INPUTS: 446 return TGSI_EXEC_MAX_INPUT_ATTRIBS; 447 case PIPE_SHADER_CAP_MAX_OUTPUTS: 448 return 32; 449 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: 450 return TGSI_EXEC_MAX_CONST_BUFFER_SIZE; 451 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: 452 return PIPE_MAX_CONSTANT_BUFFERS; 453 case PIPE_SHADER_CAP_MAX_TEMPS: 454 return TGSI_EXEC_NUM_TEMPS; 455 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: 456 return 1; 457 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: 458 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: 459 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: 460 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: 461 return 1; 462 case PIPE_SHADER_CAP_SUBROUTINES: 463 return 1; 464 case PIPE_SHADER_CAP_INTEGERS: 465 return 1; 466 case PIPE_SHADER_CAP_INT64_ATOMICS: 467 case PIPE_SHADER_CAP_FP16: 468 case PIPE_SHADER_CAP_FP16_DERIVATIVES: 469 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: 470 case PIPE_SHADER_CAP_INT16: 471 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: 472 return 0; 473 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: 474 return PIPE_MAX_SAMPLERS; 475 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: 476 return PIPE_MAX_SHADER_SAMPLER_VIEWS; 477 case PIPE_SHADER_CAP_PREFERRED_IR: 478 return PIPE_SHADER_IR_TGSI; 479 case PIPE_SHADER_CAP_SUPPORTED_IRS: 480 return 1 << PIPE_SHADER_IR_TGSI; 481 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: 482 return 1; 483 case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: 484 case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: 485 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: 486 return 1; 487 case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: 488 case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: 489 case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: 490 case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: 491 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: 492 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: 493 return 0; 494 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: 495 return PIPE_MAX_SHADER_BUFFERS; 496 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: 497 return PIPE_MAX_SHADER_IMAGES; 498 499 case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: 500 return 32; 501 } 502 /* if we get here, we missed a shader cap above (and should have seen 503 * a compiler warning.) 504 */ 505 return 0; 506} 507 508#if defined __cplusplus 509} /* extern "C" */ 510#endif 511 512#endif /* TGSI_EXEC_H */ 513