1/* 2 * Copyright 2018 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_pipe.h" 26#include "tgsi/tgsi_text.h" 27#include "tgsi/tgsi_ureg.h" 28 29void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type, unsigned num_layers) 30{ 31 unsigned vs_blit_property; 32 void **vs; 33 34 switch (type) { 35 case UTIL_BLITTER_ATTRIB_NONE: 36 vs = num_layers > 1 ? &sctx->vs_blit_pos_layered : &sctx->vs_blit_pos; 37 vs_blit_property = SI_VS_BLIT_SGPRS_POS; 38 break; 39 case UTIL_BLITTER_ATTRIB_COLOR: 40 vs = num_layers > 1 ? &sctx->vs_blit_color_layered : &sctx->vs_blit_color; 41 vs_blit_property = SI_VS_BLIT_SGPRS_POS_COLOR; 42 break; 43 case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: 44 case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW: 45 assert(num_layers == 1); 46 vs = &sctx->vs_blit_texcoord; 47 vs_blit_property = SI_VS_BLIT_SGPRS_POS_TEXCOORD; 48 break; 49 default: 50 assert(0); 51 return NULL; 52 } 53 if (*vs) 54 return *vs; 55 56 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 57 if (!ureg) 58 return NULL; 59 60 /* Tell the shader to load VS inputs from SGPRs: */ 61 ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS_AMD, vs_blit_property); 62 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true); 63 64 /* This is just a pass-through shader with 1-3 MOV instructions. */ 65 ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); 66 67 if (type != UTIL_BLITTER_ATTRIB_NONE) { 68 ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0), ureg_DECL_vs_input(ureg, 1)); 69 } 70 71 if (num_layers > 1) { 72 struct ureg_src instance_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0); 73 struct ureg_dst layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); 74 75 ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X), 76 ureg_scalar(instance_id, TGSI_SWIZZLE_X)); 77 } 78 ureg_END(ureg); 79 80 *vs = ureg_create_shader_and_destroy(ureg, &sctx->b); 81 return *vs; 82} 83 84/** 85 * This is used when TCS is NULL in the VS->TCS->TES chain. In this case, 86 * VS passes its outputs to TES directly, so the fixed-function shader only 87 * has to write TESSOUTER and TESSINNER. 88 */ 89void *si_create_fixed_func_tcs(struct si_context *sctx) 90{ 91 struct ureg_src outer, inner; 92 struct ureg_dst tessouter, tessinner; 93 struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL); 94 95 if (!ureg) 96 return NULL; 97 98 outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0); 99 inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0); 100 101 tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0); 102 tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0); 103 104 ureg_MOV(ureg, tessouter, outer); 105 ureg_MOV(ureg, tessinner, inner); 106 ureg_END(ureg); 107 108 return ureg_create_shader_and_destroy(ureg, &sctx->b); 109} 110 111/* Create a compute shader implementing clear_buffer or copy_buffer. */ 112void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, 113 bool dst_stream_cache_policy, bool is_copy) 114{ 115 struct si_screen *sscreen = (struct si_screen *)ctx->screen; 116 assert(util_is_power_of_two_nonzero(num_dwords_per_thread)); 117 118 unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT; 119 if (dst_stream_cache_policy) 120 store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY; 121 122 /* Don't cache loads, because there is no reuse. */ 123 unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY; 124 125 unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4); 126 unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned)); 127 128 for (unsigned i = 0; i < num_mem_ops; i++) { 129 if (i * 4 < num_dwords_per_thread) 130 inst_dwords[i] = MIN2(4, num_dwords_per_thread - i * 4); 131 } 132 133 struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); 134 if (!ureg) 135 return NULL; 136 137 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, sscreen->compute_wave_size); 138 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1); 139 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); 140 141 struct ureg_src value; 142 if (!is_copy) { 143 ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD, inst_dwords[0]); 144 value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA_AMD, 0); 145 } 146 147 struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); 148 struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); 149 struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); 150 struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); 151 struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false)); 152 struct ureg_src srcbuf; 153 struct ureg_src *values = NULL; 154 155 if (is_copy) { 156 srcbuf = ureg_DECL_buffer(ureg, 1, false); 157 values = malloc(num_mem_ops * sizeof(struct ureg_src)); 158 } 159 160 /* If there are multiple stores, the first store writes into 0*wavesize+tid, 161 * the 2nd store writes into 1*wavesize+tid, the 3rd store writes into 2*wavesize+tid, etc. 162 */ 163 ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, sscreen->compute_wave_size * num_mem_ops), 164 tid); 165 /* Convert from a "store size unit" into bytes. */ 166 ureg_UMUL(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[0])); 167 ureg_MOV(ureg, load_addr, ureg_src(store_addr)); 168 169 /* Distance between a load and a store for latency hiding. */ 170 unsigned load_store_distance = is_copy ? 8 : 0; 171 172 for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) { 173 int d = i - load_store_distance; 174 175 if (is_copy && i < num_mem_ops) { 176 if (i) { 177 ureg_UADD(ureg, load_addr, ureg_src(load_addr), 178 ureg_imm1u(ureg, 4 * inst_dwords[i] * sscreen->compute_wave_size)); 179 } 180 181 values[i] = ureg_src(ureg_DECL_temporary(ureg)); 182 struct ureg_dst dst = 183 ureg_writemask(ureg_dst(values[i]), u_bit_consecutive(0, inst_dwords[i])); 184 struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)}; 185 ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2, load_qualifier, 186 TGSI_TEXTURE_BUFFER, 0); 187 } 188 189 if (d >= 0) { 190 if (d) { 191 ureg_UADD(ureg, store_addr, ureg_src(store_addr), 192 ureg_imm1u(ureg, 4 * inst_dwords[d] * sscreen->compute_wave_size)); 193 } 194 195 struct ureg_dst dst = ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d])); 196 struct ureg_src srcs[] = {ureg_src(store_addr), is_copy ? values[d] : value}; 197 ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2, store_qualifier, 198 TGSI_TEXTURE_BUFFER, 0); 199 } 200 } 201 ureg_END(ureg); 202 203 struct pipe_compute_state state = {}; 204 state.ir_type = PIPE_SHADER_IR_TGSI; 205 state.prog = ureg_get_tokens(ureg, NULL); 206 207 void *cs = ctx->create_compute_state(ctx, &state); 208 ureg_destroy(ureg); 209 ureg_free_tokens(state.prog); 210 211 free(values); 212 return cs; 213} 214 215/* Create a compute shader implementing clear_buffer or copy_buffer. */ 216void *si_create_clear_buffer_rmw_cs(struct pipe_context *ctx) 217{ 218 const char *text = "COMP\n" 219 "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" 220 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 221 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 222 "PROPERTY CS_USER_DATA_COMPONENTS_AMD 2\n" 223 "DCL SV[0], THREAD_ID\n" 224 "DCL SV[1], BLOCK_ID\n" 225 "DCL SV[2], CS_USER_DATA_AMD\n" 226 "DCL BUFFER[0]\n" 227 "DCL TEMP[0..1]\n" 228 "IMM[0] UINT32 {64, 16, 0, 0}\n" 229 /* ADDRESS = BLOCK_ID * 64 + THREAD_ID; */ 230 "UMAD TEMP[0].x, SV[1].xxxx, IMM[0].xxxx, SV[0].xxxx\n" 231 /* ADDRESS = ADDRESS * 16; (byte offset, loading one vec4 per thread) */ 232 "UMUL TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy\n" 233 "LOAD TEMP[1], BUFFER[0], TEMP[0].xxxx\n" 234 /* DATA &= inverted_writemask; */ 235 "AND TEMP[1], TEMP[1], SV[2].yyyy\n" 236 /* DATA |= clear_value_masked; */ 237 "OR TEMP[1], TEMP[1], SV[2].xxxx\n" 238 "STORE BUFFER[0].xyzw, TEMP[0], TEMP[1]%s\n" 239 "END\n"; 240 char final_text[2048]; 241 struct tgsi_token tokens[1024]; 242 struct pipe_compute_state state = {0}; 243 244 snprintf(final_text, sizeof(final_text), text, 245 SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ", STREAM_CACHE_POLICY" : ""); 246 247 if (!tgsi_text_translate(final_text, tokens, ARRAY_SIZE(tokens))) { 248 assert(false); 249 return NULL; 250 } 251 252 state.ir_type = PIPE_SHADER_IR_TGSI; 253 state.prog = tokens; 254 255 return ctx->create_compute_state(ctx, &state); 256} 257 258/* Create the compute shader that is used to collect the results. 259 * 260 * One compute grid with a single thread is launched for every query result 261 * buffer. The thread (optionally) reads a previous summary buffer, then 262 * accumulates data from the query result buffer, and writes the result either 263 * to a summary buffer to be consumed by the next grid invocation or to the 264 * user-supplied buffer. 265 * 266 * Data layout: 267 * 268 * CONST 269 * 0.x = end_offset 270 * 0.y = result_stride 271 * 0.z = result_count 272 * 0.w = bit field: 273 * 1: read previously accumulated values 274 * 2: write accumulated values for chaining 275 * 4: write result available 276 * 8: convert result to boolean (0/1) 277 * 16: only read one dword and use that as result 278 * 32: apply timestamp conversion 279 * 64: store full 64 bits result 280 * 128: store signed 32 bits result 281 * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs 282 * 1.x = fence_offset 283 * 1.y = pair_stride 284 * 1.z = pair_count 285 * 286 * BUFFER[0] = query result buffer 287 * BUFFER[1] = previous summary buffer 288 * BUFFER[2] = next summary buffer or user-supplied buffer 289 */ 290void *si_create_query_result_cs(struct si_context *sctx) 291{ 292 /* TEMP[0].xy = accumulated result so far 293 * TEMP[0].z = result not available 294 * 295 * TEMP[1].x = current result index 296 * TEMP[1].y = current pair index 297 */ 298 static const char text_tmpl[] = 299 "COMP\n" 300 "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" 301 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 302 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 303 "DCL BUFFER[0]\n" 304 "DCL BUFFER[1]\n" 305 "DCL BUFFER[2]\n" 306 "DCL CONST[0][0..1]\n" 307 "DCL TEMP[0..5]\n" 308 "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n" 309 "IMM[1] UINT32 {1, 2, 4, 8}\n" 310 "IMM[2] UINT32 {16, 32, 64, 128}\n" 311 "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */ 312 "IMM[4] UINT32 {256, 0, 0, 0}\n" 313 314 "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n" 315 "UIF TEMP[5]\n" 316 /* Check result availability. */ 317 "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n" 318 "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n" 319 "MOV TEMP[1], TEMP[0].zzzz\n" 320 "NOT TEMP[0].z, TEMP[0].zzzz\n" 321 322 /* Load result if available. */ 323 "UIF TEMP[1]\n" 324 "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n" 325 "ENDIF\n" 326 "ELSE\n" 327 /* Load previously accumulated result if requested. */ 328 "MOV TEMP[0], IMM[0].xxxx\n" 329 "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n" 330 "UIF TEMP[4]\n" 331 "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n" 332 "ENDIF\n" 333 334 "MOV TEMP[1].x, IMM[0].xxxx\n" 335 "BGNLOOP\n" 336 /* Break if accumulated result so far is not available. */ 337 "UIF TEMP[0].zzzz\n" 338 "BRK\n" 339 "ENDIF\n" 340 341 /* Break if result_index >= result_count. */ 342 "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n" 343 "UIF TEMP[5]\n" 344 "BRK\n" 345 "ENDIF\n" 346 347 /* Load fence and check result availability */ 348 "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n" 349 "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" 350 "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n" 351 "NOT TEMP[0].z, TEMP[0].zzzz\n" 352 "UIF TEMP[0].zzzz\n" 353 "BRK\n" 354 "ENDIF\n" 355 356 "MOV TEMP[1].y, IMM[0].xxxx\n" 357 "BGNLOOP\n" 358 /* Load start and end. */ 359 "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n" 360 "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n" 361 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" 362 363 "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n" 364 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" 365 366 "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n" 367 368 "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n" 369 "UIF TEMP[5].zzzz\n" 370 /* Load second start/end half-pair and 371 * take the difference 372 */ 373 "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n" 374 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" 375 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" 376 377 "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n" 378 "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n" 379 "ENDIF\n" 380 381 "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n" 382 383 /* Increment pair index */ 384 "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n" 385 "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n" 386 "UIF TEMP[5]\n" 387 "BRK\n" 388 "ENDIF\n" 389 "ENDLOOP\n" 390 391 /* Increment result index */ 392 "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n" 393 "ENDLOOP\n" 394 "ENDIF\n" 395 396 "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n" 397 "UIF TEMP[4]\n" 398 /* Store accumulated data for chaining. */ 399 "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n" 400 "ELSE\n" 401 "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n" 402 "UIF TEMP[4]\n" 403 /* Store result availability. */ 404 "NOT TEMP[0].z, TEMP[0]\n" 405 "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n" 406 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n" 407 408 "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" 409 "UIF TEMP[4]\n" 410 "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n" 411 "ENDIF\n" 412 "ELSE\n" 413 /* Store result if it is available. */ 414 "NOT TEMP[4], TEMP[0].zzzz\n" 415 "UIF TEMP[4]\n" 416 /* Apply timestamp conversion */ 417 "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n" 418 "UIF TEMP[4]\n" 419 "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n" 420 "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n" 421 "ENDIF\n" 422 423 /* Convert to boolean */ 424 "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n" 425 "UIF TEMP[4]\n" 426 "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n" 427 "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n" 428 "MOV TEMP[0].y, IMM[0].xxxx\n" 429 "ENDIF\n" 430 431 "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" 432 "UIF TEMP[4]\n" 433 "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n" 434 "ELSE\n" 435 /* Clamping */ 436 "UIF TEMP[0].yyyy\n" 437 "MOV TEMP[0].x, IMM[0].wwww\n" 438 "ENDIF\n" 439 440 "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n" 441 "UIF TEMP[4]\n" 442 "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n" 443 "ENDIF\n" 444 445 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n" 446 "ENDIF\n" 447 "ENDIF\n" 448 "ENDIF\n" 449 "ENDIF\n" 450 451 "END\n"; 452 453 char text[sizeof(text_tmpl) + 32]; 454 struct tgsi_token tokens[1024]; 455 struct pipe_compute_state state = {}; 456 457 /* Hard code the frequency into the shader so that the backend can 458 * use the full range of optimizations for divide-by-constant. 459 */ 460 snprintf(text, sizeof(text), text_tmpl, sctx->screen->info.clock_crystal_freq); 461 462 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 463 assert(false); 464 return NULL; 465 } 466 467 state.ir_type = PIPE_SHADER_IR_TGSI; 468 state.prog = tokens; 469 470 return sctx->b.create_compute_state(&sctx->b, &state); 471} 472 473/* Create a compute shader implementing copy_image. 474 * Luckily, this works with all texture targets except 1D_ARRAY. 475 */ 476void *si_create_copy_image_compute_shader(struct pipe_context *ctx) 477{ 478 static const char text[] = 479 "COMP\n" 480 "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n" 481 "DCL SV[0], THREAD_ID\n" 482 "DCL SV[1], BLOCK_ID\n" 483 "DCL SV[2], BLOCK_SIZE\n" 484 "DCL SV[3], CS_USER_DATA_AMD\n" 485 "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 486 "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 487 "DCL TEMP[0..3], LOCAL\n" 488 "IMM[0] UINT32 {65535, 16, 0, 0}\n" 489 490 "UMAD TEMP[0].xyz, SV[1], SV[2], SV[0]\n" /* threadID.xyz */ 491 "AND TEMP[1].xyz, SV[3], IMM[0].xxxx\n" /* src.xyz */ 492 "UADD TEMP[1].xyz, TEMP[1], TEMP[0]\n" /* src.xyz + threadID.xyz */ 493 "LOAD TEMP[3], IMAGE[0], TEMP[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 494 "USHR TEMP[2].xyz, SV[3], IMM[0].yyyy\n" /* dst.xyz */ 495 "UADD TEMP[2].xyz, TEMP[2], TEMP[0]\n" /* dst.xyz + threadID.xyz */ 496 "STORE IMAGE[1], TEMP[2], TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 497 "END\n"; 498 499 struct tgsi_token tokens[1024]; 500 struct pipe_compute_state state = {0}; 501 502 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 503 assert(false); 504 return NULL; 505 } 506 507 state.ir_type = PIPE_SHADER_IR_TGSI; 508 state.prog = tokens; 509 510 return ctx->create_compute_state(ctx, &state); 511} 512 513void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx) 514{ 515 static const char text[] = 516 "COMP\n" 517 "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" 518 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 519 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 520 "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n" 521 "DCL SV[0], THREAD_ID\n" 522 "DCL SV[1], BLOCK_ID\n" 523 "DCL SV[2], CS_USER_DATA_AMD\n" 524 "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 525 "DCL IMAGE[1], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 526 "DCL TEMP[0..4], LOCAL\n" 527 "IMM[0] UINT32 {64, 1, 65535, 16}\n" 528 529 "UMAD TEMP[0].xz, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* threadID.xz */ 530 "AND TEMP[1].xz, SV[2], IMM[0].zzzz\n" /* src.xz */ 531 "UADD TEMP[1].xz, TEMP[1], TEMP[0]\n" /* src.xz + threadID.xz */ 532 "LOAD TEMP[3], IMAGE[0], TEMP[1].xzzz, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 533 "USHR TEMP[2].xz, SV[2], IMM[0].wwww\n" /* dst.xz */ 534 "UADD TEMP[2].xz, TEMP[2], TEMP[0]\n" /* dst.xz + threadID.xz */ 535 "STORE IMAGE[1], TEMP[2].xzzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 536 "END\n"; 537 538 struct tgsi_token tokens[1024]; 539 struct pipe_compute_state state = {0}; 540 541 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 542 assert(false); 543 return NULL; 544 } 545 546 state.ir_type = PIPE_SHADER_IR_TGSI; 547 state.prog = tokens; 548 549 return ctx->create_compute_state(ctx, &state); 550} 551 552/* Create a compute shader implementing DCC decompression via a blit. 553 * This is a trivial copy_image shader except that it has a variable block 554 * size and a barrier. 555 */ 556void *si_create_dcc_decompress_cs(struct pipe_context *ctx) 557{ 558 static const char text[] = 559 "COMP\n" 560 "DCL SV[0], THREAD_ID\n" 561 "DCL SV[1], BLOCK_ID\n" 562 "DCL SV[2], BLOCK_SIZE\n" 563 "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 564 "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 565 "DCL TEMP[0..1]\n" 566 567 "UMAD TEMP[0].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n" 568 "LOAD TEMP[1], IMAGE[0], TEMP[0].xyzz, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 569 /* Wait for the whole threadgroup (= DCC block) to load texels before 570 * overwriting them, because overwriting any pixel within a DCC block 571 * can break compression for the whole block. 572 */ 573 "BARRIER\n" 574 "STORE IMAGE[1], TEMP[0].xyzz, TEMP[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 575 "END\n"; 576 577 struct tgsi_token tokens[1024]; 578 struct pipe_compute_state state = {0}; 579 580 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 581 assert(false); 582 return NULL; 583 } 584 585 state.ir_type = PIPE_SHADER_IR_TGSI; 586 state.prog = tokens; 587 588 return ctx->create_compute_state(ctx, &state); 589} 590 591void *si_clear_render_target_shader(struct pipe_context *ctx) 592{ 593 static const char text[] = 594 "COMP\n" 595 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" 596 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" 597 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 598 "DCL SV[0], THREAD_ID\n" 599 "DCL SV[1], BLOCK_ID\n" 600 "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 601 "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw 602 "DCL TEMP[0..3], LOCAL\n" 603 "IMM[0] UINT32 {8, 1, 0, 0}\n" 604 "MOV TEMP[0].xyz, CONST[0][0].xyzw\n" 605 "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n" 606 "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n" 607 "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" 608 "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 609 "END\n"; 610 611 struct tgsi_token tokens[1024]; 612 struct pipe_compute_state state = {0}; 613 614 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 615 assert(false); 616 return NULL; 617 } 618 619 state.ir_type = PIPE_SHADER_IR_TGSI; 620 state.prog = tokens; 621 622 return ctx->create_compute_state(ctx, &state); 623} 624 625/* TODO: Didn't really test 1D_ARRAY */ 626void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx) 627{ 628 static const char text[] = 629 "COMP\n" 630 "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" 631 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 632 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 633 "DCL SV[0], THREAD_ID\n" 634 "DCL SV[1], BLOCK_ID\n" 635 "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" 636 "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw 637 "DCL TEMP[0..3], LOCAL\n" 638 "IMM[0] UINT32 {64, 1, 0, 0}\n" 639 "MOV TEMP[0].xy, CONST[0][0].xzzw\n" 640 "UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" 641 "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n" 642 "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" 643 "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" 644 "END\n"; 645 646 struct tgsi_token tokens[1024]; 647 struct pipe_compute_state state = {0}; 648 649 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 650 assert(false); 651 return NULL; 652 } 653 654 state.ir_type = PIPE_SHADER_IR_TGSI; 655 state.prog = tokens; 656 657 return ctx->create_compute_state(ctx, &state); 658} 659 660void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx) 661{ 662 static const char text[] = "COMP\n" 663 "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" 664 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 665 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 666 "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n" 667 "DCL SV[0], THREAD_ID\n" 668 "DCL SV[1], BLOCK_ID\n" 669 "DCL SV[2], CS_USER_DATA_AMD\n" 670 "DCL BUFFER[0]\n" 671 "DCL TEMP[0..0]\n" 672 "IMM[0] UINT32 {64, 1, 12, 0}\n" 673 "UMAD TEMP[0].x, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" 674 "UMUL TEMP[0].x, TEMP[0].xyzz, IMM[0].zzzz\n" // 12 bytes 675 "STORE BUFFER[0].xyz, TEMP[0].xxxx, SV[2].xyzz%s\n" 676 "END\n"; 677 char final_text[2048]; 678 struct tgsi_token tokens[1024]; 679 struct pipe_compute_state state = {0}; 680 681 snprintf(final_text, sizeof(final_text), text, 682 SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ", STREAM_CACHE_POLICY" : ""); 683 684 if (!tgsi_text_translate(final_text, tokens, ARRAY_SIZE(tokens))) { 685 assert(false); 686 return NULL; 687 } 688 689 state.ir_type = PIPE_SHADER_IR_TGSI; 690 state.prog = tokens; 691 692 return ctx->create_compute_state(ctx, &state); 693} 694 695/* Load samples from the image, and copy them to the same image. This looks like 696 * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are 697 * reordered to match expanded FMASK. 698 * 699 * After the shader finishes, FMASK should be cleared to identity. 700 */ 701void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array) 702{ 703 enum tgsi_texture_type target = is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : TGSI_TEXTURE_2D_MSAA; 704 struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); 705 if (!ureg) 706 return NULL; 707 708 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 8); 709 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 8); 710 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); 711 712 /* Compute the image coordinates. */ 713 struct ureg_src image = ureg_DECL_image(ureg, 0, target, 0, true, false); 714 struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); 715 struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); 716 struct ureg_dst coord = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZW); 717 ureg_UMAD(ureg, ureg_writemask(coord, TGSI_WRITEMASK_XY), ureg_swizzle(blk, 0, 1, 1, 1), 718 ureg_imm2u(ureg, 8, 8), ureg_swizzle(tid, 0, 1, 1, 1)); 719 if (is_array) { 720 ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_Z), ureg_scalar(blk, TGSI_SWIZZLE_Z)); 721 } 722 723 /* Load samples, resolving FMASK. */ 724 struct ureg_dst sample[8]; 725 assert(num_samples <= ARRAY_SIZE(sample)); 726 727 for (unsigned i = 0; i < num_samples; i++) { 728 sample[i] = ureg_DECL_temporary(ureg); 729 730 ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); 731 732 struct ureg_src srcs[] = {image, ureg_src(coord)}; 733 ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &sample[i], 1, srcs, 2, TGSI_MEMORY_RESTRICT, target, 734 0); 735 } 736 737 /* Store samples, ignoring FMASK. */ 738 for (unsigned i = 0; i < num_samples; i++) { 739 ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); 740 741 struct ureg_dst dst_image = ureg_dst(image); 742 struct ureg_src srcs[] = {ureg_src(coord), ureg_src(sample[i])}; 743 ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst_image, 1, srcs, 2, TGSI_MEMORY_RESTRICT, 744 target, 0); 745 } 746 ureg_END(ureg); 747 748 struct pipe_compute_state state = {}; 749 state.ir_type = PIPE_SHADER_IR_TGSI; 750 state.prog = ureg_get_tokens(ureg, NULL); 751 752 void *cs = ctx->create_compute_state(ctx, &state); 753 ureg_destroy(ureg); 754 return cs; 755} 756 757/* Create the compute shader that is used to collect the results of gfx10+ 758 * shader queries. 759 * 760 * One compute grid with a single thread is launched for every query result 761 * buffer. The thread (optionally) reads a previous summary buffer, then 762 * accumulates data from the query result buffer, and writes the result either 763 * to a summary buffer to be consumed by the next grid invocation or to the 764 * user-supplied buffer. 765 * 766 * Data layout: 767 * 768 * BUFFER[0] = query result buffer (layout is defined by gfx10_sh_query_buffer_mem) 769 * BUFFER[1] = previous summary buffer 770 * BUFFER[2] = next summary buffer or user-supplied buffer 771 * 772 * CONST 773 * 0.x = config; the low 3 bits indicate the mode: 774 * 0: sum up counts 775 * 1: determine result availability and write it as a boolean 776 * 2: SO_OVERFLOW 777 * 3: SO_ANY_OVERFLOW 778 * the remaining bits form a bitfield: 779 * 8: write result as a 64-bit value 780 * 0.y = offset in bytes to counts or stream for SO_OVERFLOW mode 781 * 0.z = chain bit field: 782 * 1: have previous summary buffer 783 * 2: write next summary buffer 784 * 0.w = result_count 785 */ 786void *gfx10_create_sh_query_result_cs(struct si_context *sctx) 787{ 788 /* TEMP[0].x = accumulated result so far 789 * TEMP[0].y = result missing 790 * TEMP[0].z = whether we're in overflow mode 791 */ 792 static const char text_tmpl[] = "COMP\n" 793 "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" 794 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 795 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 796 "DCL BUFFER[0]\n" 797 "DCL BUFFER[1]\n" 798 "DCL BUFFER[2]\n" 799 "DCL CONST[0][0..0]\n" 800 "DCL TEMP[0..5]\n" 801 "IMM[0] UINT32 {0, 7, 256, 4294967295}\n" 802 "IMM[1] UINT32 {1, 2, 4, 8}\n" 803 "IMM[2] UINT32 {16, 32, 64, 128}\n" 804 805 /* 806 acc_result = 0; 807 acc_missing = 0; 808 if (chain & 1) { 809 acc_result = buffer[1][0]; 810 acc_missing = buffer[1][1]; 811 } 812 */ 813 "MOV TEMP[0].xy, IMM[0].xxxx\n" 814 "AND TEMP[5], CONST[0][0].zzzz, IMM[1].xxxx\n" 815 "UIF TEMP[5]\n" 816 "LOAD TEMP[0].xy, BUFFER[1], IMM[0].xxxx\n" 817 "ENDIF\n" 818 819 /* 820 is_overflow (TEMP[0].z) = (config & 7) >= 2; 821 result_remaining (TEMP[1].x) = (is_overflow && acc_result) ? 0 : 822 result_count; base_offset (TEMP[1].y) = 0; for (;;) { if 823 (!result_remaining) break; result_remaining--; 824 */ 825 "AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n" 826 "USGE TEMP[0].z, TEMP[5].xxxx, IMM[1].yyyy\n" 827 828 "AND TEMP[5].x, TEMP[0].zzzz, TEMP[0].xxxx\n" 829 "UCMP TEMP[1].x, TEMP[5].xxxx, IMM[0].xxxx, CONST[0][0].wwww\n" 830 "MOV TEMP[1].y, IMM[0].xxxx\n" 831 832 "BGNLOOP\n" 833 "USEQ TEMP[5], TEMP[1].xxxx, IMM[0].xxxx\n" 834 "UIF TEMP[5]\n" 835 "BRK\n" 836 "ENDIF\n" 837 "UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n" 838 839 /* 840 fence = buffer[0]@(base_offset + sizeof(gfx10_sh_query_buffer_mem.stream)); 841 if (!fence) { 842 acc_missing = ~0u; 843 break; 844 } 845 */ 846 "UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].wwww\n" 847 "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" 848 "USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n" 849 "UIF TEMP[5]\n" 850 "MOV TEMP[0].y, TEMP[5].xxxx\n" 851 "BRK\n" 852 "ENDIF\n" 853 854 /* 855 stream_offset (TEMP[2].x) = base_offset + offset; 856 857 if (!(config & 7)) { 858 acc_result += buffer[0]@stream_offset; 859 } 860 */ 861 "UADD TEMP[2].x, TEMP[1].yyyy, CONST[0][0].yyyy\n" 862 863 "AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n" 864 "USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n" 865 "UIF TEMP[5]\n" 866 "LOAD TEMP[5].x, BUFFER[0], TEMP[2].xxxx\n" 867 "UADD TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx\n" 868 "ENDIF\n" 869 870 /* 871 if ((config & 7) >= 2) { 872 count (TEMP[2].y) = (config & 1) ? 4 : 1; 873 */ 874 "AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n" 875 "USGE TEMP[5], TEMP[5].xxxx, IMM[1].yyyy\n" 876 "UIF TEMP[5]\n" 877 "AND TEMP[5].x, CONST[0][0].xxxx, IMM[1].xxxx\n" 878 "UCMP TEMP[2].y, TEMP[5].xxxx, IMM[1].zzzz, IMM[1].xxxx\n" 879 880 /* 881 do { 882 generated = buffer[0]@(stream_offset + 2 * sizeof(uint64_t)); 883 emitted = buffer[0]@(stream_offset + 3 * sizeof(uint64_t)); 884 if (generated != emitted) { 885 acc_result = 1; 886 result_remaining = 0; 887 break; 888 } 889 890 stream_offset += sizeof(gfx10_sh_query_buffer_mem.stream[0]); 891 } while (--count); 892 */ 893 "BGNLOOP\n" 894 "UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n" 895 "LOAD TEMP[4].xyzw, BUFFER[0], TEMP[5].xxxx\n" 896 "USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n" 897 "UIF TEMP[5]\n" 898 "MOV TEMP[0].x, IMM[1].xxxx\n" 899 "MOV TEMP[1].y, IMM[0].xxxx\n" 900 "BRK\n" 901 "ENDIF\n" 902 903 "UADD TEMP[2].y, TEMP[2].yyyy, IMM[0].wwww\n" 904 "USEQ TEMP[5], TEMP[2].yyyy, IMM[0].xxxx\n" 905 "UIF TEMP[5]\n" 906 "BRK\n" 907 "ENDIF\n" 908 "UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy\n" 909 "ENDLOOP\n" 910 "ENDIF\n" 911 912 /* 913 base_offset += sizeof(gfx10_sh_query_buffer_mem); 914 } // end outer loop 915 */ 916 "UADD TEMP[1].y, TEMP[1].yyyy, IMM[0].zzzz\n" 917 "ENDLOOP\n" 918 919 /* 920 if (chain & 2) { 921 buffer[2][0] = acc_result; 922 buffer[2][1] = acc_missing; 923 } else { 924 */ 925 "AND TEMP[5], CONST[0][0].zzzz, IMM[1].yyyy\n" 926 "UIF TEMP[5]\n" 927 "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0]\n" 928 "ELSE\n" 929 930 /* 931 if ((config & 7) == 1) { 932 acc_result = acc_missing ? 0 : 1; 933 acc_missing = 0; 934 } 935 */ 936 "AND TEMP[5], CONST[0][0].xxxx, IMM[0].yyyy\n" 937 "USEQ TEMP[5], TEMP[5].xxxx, IMM[1].xxxx\n" 938 "UIF TEMP[5]\n" 939 "UCMP TEMP[0].x, TEMP[0].yyyy, IMM[0].xxxx, IMM[1].xxxx\n" 940 "MOV TEMP[0].y, IMM[0].xxxx\n" 941 "ENDIF\n" 942 943 /* 944 if (!acc_missing) { 945 buffer[2][0] = acc_result; 946 if (config & 8) 947 buffer[2][1] = 0; 948 } 949 */ 950 "USEQ TEMP[5], TEMP[0].yyyy, IMM[0].xxxx\n" 951 "UIF TEMP[5]\n" 952 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n" 953 954 "AND TEMP[5], CONST[0][0].xxxx, IMM[1].wwww\n" 955 "UIF TEMP[5]\n" 956 "STORE BUFFER[2].x, IMM[1].zzzz, TEMP[0].yyyy\n" 957 "ENDIF\n" 958 "ENDIF\n" 959 "ENDIF\n" 960 961 "END\n"; 962 963 struct tgsi_token tokens[1024]; 964 struct pipe_compute_state state = {}; 965 966 if (!tgsi_text_translate(text_tmpl, tokens, ARRAY_SIZE(tokens))) { 967 assert(false); 968 return NULL; 969 } 970 971 state.ir_type = PIPE_SHADER_IR_TGSI; 972 state.prog = tokens; 973 974 return sctx->b.create_compute_state(&sctx->b, &state); 975} 976