1/* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_shader_internal.h" 26#include "si_pipe.h" 27#include "ac_llvm_util.h" 28#include "util/u_memory.h" 29 30enum si_llvm_calling_convention { 31 RADEON_LLVM_AMDGPU_VS = 87, 32 RADEON_LLVM_AMDGPU_GS = 88, 33 RADEON_LLVM_AMDGPU_PS = 89, 34 RADEON_LLVM_AMDGPU_CS = 90, 35 RADEON_LLVM_AMDGPU_HS = 93, 36}; 37 38struct si_llvm_diagnostics { 39 struct pipe_debug_callback *debug; 40 unsigned retval; 41}; 42 43static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) 44{ 45 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context; 46 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); 47 char *description = LLVMGetDiagInfoDescription(di); 48 const char *severity_str = NULL; 49 50 switch (severity) { 51 case LLVMDSError: 52 severity_str = "error"; 53 break; 54 case LLVMDSWarning: 55 severity_str = "warning"; 56 break; 57 case LLVMDSRemark: 58 severity_str = "remark"; 59 break; 60 case LLVMDSNote: 61 severity_str = "note"; 62 break; 63 default: 64 severity_str = "unknown"; 65 } 66 67 pipe_debug_message(diag->debug, SHADER_INFO, 68 "LLVM diagnostic (%s): %s", severity_str, description); 69 70 if (severity == LLVMDSError) { 71 diag->retval = 1; 72 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); 73 } 74 75 LLVMDisposeMessage(description); 76} 77 78/** 79 * Compile an LLVM module to machine code. 80 * 81 * @returns 0 for success, 1 for failure 82 */ 83unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, 84 struct ac_llvm_compiler *compiler, 85 struct pipe_debug_callback *debug, 86 bool less_optimized) 87{ 88 struct ac_compiler_passes *passes = 89 less_optimized && compiler->low_opt_passes ? 90 compiler->low_opt_passes : compiler->passes; 91 struct si_llvm_diagnostics diag; 92 LLVMContextRef llvm_ctx; 93 94 diag.debug = debug; 95 diag.retval = 0; 96 97 /* Setup Diagnostic Handler*/ 98 llvm_ctx = LLVMGetModuleContext(M); 99 100 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); 101 102 /* Compile IR. */ 103 if (!ac_compile_module_to_binary(passes, M, binary)) 104 diag.retval = 1; 105 106 if (diag.retval != 0) 107 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); 108 return diag.retval; 109} 110 111LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, 112 enum tgsi_opcode_type type) 113{ 114 struct si_shader_context *ctx = si_shader_context(bld_base); 115 116 switch (type) { 117 case TGSI_TYPE_UNSIGNED: 118 case TGSI_TYPE_SIGNED: 119 return ctx->ac.i32; 120 case TGSI_TYPE_UNSIGNED64: 121 case TGSI_TYPE_SIGNED64: 122 return ctx->ac.i64; 123 case TGSI_TYPE_DOUBLE: 124 return ctx->ac.f64; 125 case TGSI_TYPE_UNTYPED: 126 case TGSI_TYPE_FLOAT: 127 return ctx->ac.f32; 128 default: break; 129 } 130 return 0; 131} 132 133LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, 134 enum tgsi_opcode_type type, LLVMValueRef value) 135{ 136 struct si_shader_context *ctx = si_shader_context(bld_base); 137 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); 138 139 if (dst_type) 140 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, ""); 141 else 142 return value; 143} 144 145/** 146 * Return a value that is equal to the given i32 \p index if it lies in [0,num) 147 * or an undefined value in the same interval otherwise. 148 */ 149LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, 150 LLVMValueRef index, 151 unsigned num) 152{ 153 LLVMBuilderRef builder = ctx->ac.builder; 154 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); 155 LLVMValueRef cc; 156 157 if (util_is_power_of_two_or_zero(num)) { 158 index = LLVMBuildAnd(builder, index, c_max, ""); 159 } else { 160 /* In theory, this MAX pattern should result in code that is 161 * as good as the bit-wise AND above. 162 * 163 * In practice, LLVM generates worse code (at the time of 164 * writing), because its value tracking is not strong enough. 165 */ 166 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); 167 index = LLVMBuildSelect(builder, cc, index, c_max, ""); 168 } 169 170 return index; 171} 172 173static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, 174 LLVMValueRef value, 175 unsigned swizzle_x, 176 unsigned swizzle_y, 177 unsigned swizzle_z, 178 unsigned swizzle_w) 179{ 180 struct si_shader_context *ctx = si_shader_context(bld_base); 181 LLVMValueRef swizzles[4]; 182 183 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0); 184 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0); 185 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0); 186 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0); 187 188 return LLVMBuildShuffleVector(ctx->ac.builder, 189 value, 190 LLVMGetUndef(LLVMTypeOf(value)), 191 LLVMConstVector(swizzles, 4), ""); 192} 193 194/** 195 * Return the description of the array covering the given temporary register 196 * index. 197 */ 198static unsigned 199get_temp_array_id(struct lp_build_tgsi_context *bld_base, 200 unsigned reg_index, 201 const struct tgsi_ind_register *reg) 202{ 203 struct si_shader_context *ctx = si_shader_context(bld_base); 204 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY]; 205 unsigned i; 206 207 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) 208 return reg->ArrayID; 209 210 for (i = 0; i < num_arrays; i++) { 211 const struct tgsi_array_info *array = &ctx->temp_arrays[i]; 212 213 if (reg_index >= array->range.First && reg_index <= array->range.Last) 214 return i + 1; 215 } 216 217 return 0; 218} 219 220static struct tgsi_declaration_range 221get_array_range(struct lp_build_tgsi_context *bld_base, 222 unsigned File, unsigned reg_index, 223 const struct tgsi_ind_register *reg) 224{ 225 struct si_shader_context *ctx = si_shader_context(bld_base); 226 struct tgsi_declaration_range range; 227 228 if (File == TGSI_FILE_TEMPORARY) { 229 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); 230 if (array_id) 231 return ctx->temp_arrays[array_id - 1].range; 232 } 233 234 range.First = 0; 235 range.Last = bld_base->info->file_max[File]; 236 return range; 237} 238 239/** 240 * For indirect registers, construct a pointer directly to the requested 241 * element using getelementptr if possible. 242 * 243 * Returns NULL if the insertelement/extractelement fallback for array access 244 * must be used. 245 */ 246static LLVMValueRef 247get_pointer_into_array(struct si_shader_context *ctx, 248 unsigned file, 249 unsigned swizzle, 250 unsigned reg_index, 251 const struct tgsi_ind_register *reg_indirect) 252{ 253 unsigned array_id; 254 struct tgsi_array_info *array; 255 LLVMValueRef idxs[2]; 256 LLVMValueRef index; 257 LLVMValueRef alloca; 258 259 if (file != TGSI_FILE_TEMPORARY) 260 return NULL; 261 262 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect); 263 if (!array_id) 264 return NULL; 265 266 alloca = ctx->temp_array_allocas[array_id - 1]; 267 if (!alloca) 268 return NULL; 269 270 array = &ctx->temp_arrays[array_id - 1]; 271 272 if (!(array->writemask & (1 << swizzle))) 273 return ctx->undef_alloca; 274 275 index = si_get_indirect_index(ctx, reg_indirect, 1, 276 reg_index - ctx->temp_arrays[array_id - 1].range.First); 277 278 /* Ensure that the index is within a valid range, to guard against 279 * VM faults and overwriting critical data (e.g. spilled resource 280 * descriptors). 281 * 282 * TODO It should be possible to avoid the additional instructions 283 * if LLVM is changed so that it guarantuees: 284 * 1. the scratch space descriptor isolates the current wave (this 285 * could even save the scratch offset SGPR at the cost of an 286 * additional SALU instruction) 287 * 2. the memory for allocas must be allocated at the _end_ of the 288 * scratch space (after spilled registers) 289 */ 290 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); 291 292 index = ac_build_imad(&ctx->ac, index, 293 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0), 294 LLVMConstInt(ctx->i32, 295 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0)); 296 idxs[0] = ctx->i32_0; 297 idxs[1] = index; 298 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, ""); 299} 300 301LLVMValueRef 302si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, 303 LLVMTypeRef type, 304 LLVMValueRef ptr, 305 LLVMValueRef ptr2) 306{ 307 struct si_shader_context *ctx = si_shader_context(bld_base); 308 LLVMValueRef values[2] = { 309 ac_to_integer(&ctx->ac, ptr), 310 ac_to_integer(&ctx->ac, ptr2), 311 }; 312 LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2); 313 return LLVMBuildBitCast(ctx->ac.builder, result, type, ""); 314} 315 316static LLVMValueRef 317emit_array_fetch(struct lp_build_tgsi_context *bld_base, 318 unsigned File, enum tgsi_opcode_type type, 319 struct tgsi_declaration_range range, 320 unsigned swizzle_in) 321{ 322 struct si_shader_context *ctx = si_shader_context(bld_base); 323 unsigned i, size = range.Last - range.First + 1; 324 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); 325 LLVMValueRef result = LLVMGetUndef(vec); 326 unsigned swizzle = swizzle_in; 327 struct tgsi_full_src_register tmp_reg = {}; 328 tmp_reg.Register.File = File; 329 if (tgsi_type_is_64bit(type)) 330 swizzle |= (swizzle_in + 1) << 16; 331 332 for (i = 0; i < size; ++i) { 333 tmp_reg.Register.Index = i + range.First; 334 335 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); 336 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp, 337 LLVMConstInt(ctx->i32, i, 0), "array_vector"); 338 } 339 return result; 340} 341 342static LLVMValueRef 343load_value_from_array(struct lp_build_tgsi_context *bld_base, 344 unsigned file, 345 enum tgsi_opcode_type type, 346 unsigned swizzle, 347 unsigned reg_index, 348 const struct tgsi_ind_register *reg_indirect) 349{ 350 struct si_shader_context *ctx = si_shader_context(bld_base); 351 LLVMBuilderRef builder = ctx->ac.builder; 352 LLVMValueRef ptr; 353 354 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); 355 if (ptr) { 356 LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); 357 if (tgsi_type_is_64bit(type)) { 358 LLVMValueRef ptr_hi, val_hi; 359 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, ""); 360 val_hi = LLVMBuildLoad(builder, ptr_hi, ""); 361 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 362 val, val_hi); 363 } 364 365 return val; 366 } else { 367 struct tgsi_declaration_range range = 368 get_array_range(bld_base, file, reg_index, reg_indirect); 369 LLVMValueRef index = 370 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); 371 LLVMValueRef array = 372 emit_array_fetch(bld_base, file, type, range, swizzle); 373 return LLVMBuildExtractElement(builder, array, index, ""); 374 } 375} 376 377static void 378store_value_to_array(struct lp_build_tgsi_context *bld_base, 379 LLVMValueRef value, 380 unsigned file, 381 unsigned chan_index, 382 unsigned reg_index, 383 const struct tgsi_ind_register *reg_indirect) 384{ 385 struct si_shader_context *ctx = si_shader_context(bld_base); 386 LLVMBuilderRef builder = ctx->ac.builder; 387 LLVMValueRef ptr; 388 389 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); 390 if (ptr) { 391 LLVMBuildStore(builder, value, ptr); 392 } else { 393 unsigned i, size; 394 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); 395 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); 396 LLVMValueRef array = 397 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); 398 LLVMValueRef temp_ptr; 399 400 array = LLVMBuildInsertElement(builder, array, value, index, ""); 401 402 size = range.Last - range.First + 1; 403 for (i = 0; i < size; ++i) { 404 switch(file) { 405 case TGSI_FILE_OUTPUT: 406 temp_ptr = ctx->outputs[i + range.First][chan_index]; 407 break; 408 409 case TGSI_FILE_TEMPORARY: 410 if (range.First + i >= ctx->temps_count) 411 continue; 412 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; 413 break; 414 415 default: 416 continue; 417 } 418 value = LLVMBuildExtractElement(builder, array, 419 LLVMConstInt(ctx->i32, i, 0), ""); 420 LLVMBuildStore(builder, value, temp_ptr); 421 } 422 } 423} 424 425/* If this is true, preload FS inputs at the beginning of shaders. Otherwise, 426 * reload them at each use. This must be true if the shader is using 427 * derivatives and KILL, because KILL can leave the WQM and then a lazy 428 * input load isn't in the WQM anymore. 429 */ 430static bool si_preload_fs_inputs(struct si_shader_context *ctx) 431{ 432 struct si_shader_selector *sel = ctx->shader->selector; 433 434 return sel->info.uses_derivatives && 435 sel->info.uses_kill; 436} 437 438static LLVMValueRef 439get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index, 440 unsigned chan) 441{ 442 struct si_shader_context *ctx = si_shader_context(bld_base); 443 444 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]); 445 return ctx->outputs[index][chan]; 446} 447 448LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, 449 const struct tgsi_full_src_register *reg, 450 enum tgsi_opcode_type type, 451 unsigned swizzle_in) 452{ 453 struct si_shader_context *ctx = si_shader_context(bld_base); 454 LLVMBuilderRef builder = ctx->ac.builder; 455 LLVMValueRef result = NULL, ptr, ptr2; 456 unsigned swizzle = swizzle_in & 0xffff; 457 458 if (swizzle_in == ~0) { 459 LLVMValueRef values[TGSI_NUM_CHANNELS]; 460 unsigned chan; 461 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 462 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan); 463 } 464 return ac_build_gather_values(&ctx->ac, values, 465 TGSI_NUM_CHANNELS); 466 } 467 468 if (reg->Register.Indirect) { 469 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, 470 swizzle, reg->Register.Index, ®->Indirect); 471 return bitcast(bld_base, type, load); 472 } 473 474 switch(reg->Register.File) { 475 case TGSI_FILE_IMMEDIATE: { 476 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); 477 if (tgsi_type_is_64bit(type)) { 478 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); 479 result = LLVMConstInsertElement(result, 480 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], 481 ctx->i32_0); 482 result = LLVMConstInsertElement(result, 483 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)], 484 ctx->i32_1); 485 return LLVMConstBitCast(result, ctype); 486 } else { 487 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype); 488 } 489 } 490 491 case TGSI_FILE_INPUT: { 492 unsigned index = reg->Register.Index; 493 LLVMValueRef input[4]; 494 495 /* I don't think doing this for vertex shaders is beneficial. 496 * For those, we want to make sure the VMEM loads are executed 497 * only once. Fragment shaders don't care much, because 498 * v_interp instructions are much cheaper than VMEM loads. 499 */ 500 if (!si_preload_fs_inputs(ctx) && 501 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT) 502 ctx->load_input(ctx, index, &ctx->input_decls[index], input); 503 else 504 memcpy(input, &ctx->inputs[index * 4], sizeof(input)); 505 506 result = input[swizzle]; 507 508 if (tgsi_type_is_64bit(type)) { 509 ptr = result; 510 ptr2 = input[swizzle_in >> 16]; 511 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 512 ptr, ptr2); 513 } 514 break; 515 } 516 517 case TGSI_FILE_TEMPORARY: 518 if (reg->Register.Index >= ctx->temps_count) 519 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 520 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; 521 if (tgsi_type_is_64bit(type)) { 522 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)]; 523 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 524 LLVMBuildLoad(builder, ptr, ""), 525 LLVMBuildLoad(builder, ptr2, "")); 526 } 527 result = LLVMBuildLoad(builder, ptr, ""); 528 break; 529 530 case TGSI_FILE_OUTPUT: 531 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle); 532 if (tgsi_type_is_64bit(type)) { 533 ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16)); 534 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 535 LLVMBuildLoad(builder, ptr, ""), 536 LLVMBuildLoad(builder, ptr2, "")); 537 } 538 result = LLVMBuildLoad(builder, ptr, ""); 539 break; 540 541 default: 542 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 543 } 544 545 return bitcast(bld_base, type, result); 546} 547 548static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, 549 const struct tgsi_full_src_register *reg, 550 enum tgsi_opcode_type type, 551 unsigned swizzle_in) 552{ 553 struct si_shader_context *ctx = si_shader_context(bld_base); 554 LLVMBuilderRef builder = ctx->ac.builder; 555 LLVMValueRef cval = ctx->system_values[reg->Register.Index]; 556 unsigned swizzle = swizzle_in & 0xffff; 557 558 if (tgsi_type_is_64bit(type)) { 559 LLVMValueRef lo, hi; 560 561 assert(swizzle == 0 || swizzle == 2); 562 563 lo = LLVMBuildExtractElement( 564 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); 565 hi = LLVMBuildExtractElement( 566 builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), ""); 567 568 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 569 lo, hi); 570 } 571 572 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { 573 cval = LLVMBuildExtractElement( 574 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); 575 } else { 576 assert(swizzle == 0); 577 } 578 579 return bitcast(bld_base, type, cval); 580} 581 582static void emit_declaration(struct lp_build_tgsi_context *bld_base, 583 const struct tgsi_full_declaration *decl) 584{ 585 struct si_shader_context *ctx = si_shader_context(bld_base); 586 LLVMBuilderRef builder = ctx->ac.builder; 587 unsigned first, last, i; 588 switch(decl->Declaration.File) { 589 case TGSI_FILE_ADDRESS: 590 { 591 unsigned idx; 592 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 593 unsigned chan; 594 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 595 ctx->addrs[idx][chan] = ac_build_alloca_undef( 596 &ctx->ac, ctx->i32, ""); 597 } 598 } 599 break; 600 } 601 602 case TGSI_FILE_TEMPORARY: 603 { 604 char name[18] = ""; 605 LLVMValueRef array_alloca = NULL; 606 unsigned decl_size; 607 unsigned writemask = decl->Declaration.UsageMask; 608 first = decl->Range.First; 609 last = decl->Range.Last; 610 decl_size = 4 * ((last - first) + 1); 611 612 if (decl->Declaration.Array) { 613 unsigned id = decl->Array.ArrayID - 1; 614 unsigned array_size; 615 616 writemask &= ctx->temp_arrays[id].writemask; 617 ctx->temp_arrays[id].writemask = writemask; 618 array_size = ((last - first) + 1) * util_bitcount(writemask); 619 620 /* If the array has more than 16 elements, store it 621 * in memory using an alloca that spans the entire 622 * array. 623 * 624 * Otherwise, store each array element individually. 625 * We will then generate vectors (per-channel, up to 626 * <16 x float> if the usagemask is a single bit) for 627 * indirect addressing. 628 * 629 * Note that 16 is the number of vector elements that 630 * LLVM will store in a register, so theoretically an 631 * array with up to 4 * 16 = 64 elements could be 632 * handled this way, but whether that's a good idea 633 * depends on VGPR register pressure elsewhere. 634 * 635 * FIXME: We shouldn't need to have the non-alloca 636 * code path for arrays. LLVM should be smart enough to 637 * promote allocas into registers when profitable. 638 */ 639 if (array_size > 16 || 640 !ctx->screen->llvm_has_working_vgpr_indexing) { 641 array_alloca = ac_build_alloca_undef(&ctx->ac, 642 LLVMArrayType(ctx->f32, 643 array_size), "array"); 644 ctx->temp_array_allocas[id] = array_alloca; 645 } 646 } 647 648 if (!ctx->temps_count) { 649 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; 650 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); 651 } 652 if (!array_alloca) { 653 for (i = 0; i < decl_size; ++i) { 654#ifdef DEBUG 655 snprintf(name, sizeof(name), "TEMP%d.%c", 656 first + i / 4, "xyzw"[i % 4]); 657#endif 658 ctx->temps[first * TGSI_NUM_CHANNELS + i] = 659 ac_build_alloca_undef(&ctx->ac, 660 ctx->f32, 661 name); 662 } 663 } else { 664 LLVMValueRef idxs[2] = { 665 ctx->i32_0, 666 NULL 667 }; 668 unsigned j = 0; 669 670 if (writemask != TGSI_WRITEMASK_XYZW && 671 !ctx->undef_alloca) { 672 /* Create a dummy alloca. We use it so that we 673 * have a pointer that is safe to load from if 674 * a shader ever reads from a channel that 675 * it never writes to. 676 */ 677 ctx->undef_alloca = ac_build_alloca_undef( 678 &ctx->ac, ctx->f32, "undef"); 679 } 680 681 for (i = 0; i < decl_size; ++i) { 682 LLVMValueRef ptr; 683 if (writemask & (1 << (i % 4))) { 684#ifdef DEBUG 685 snprintf(name, sizeof(name), "TEMP%d.%c", 686 first + i / 4, "xyzw"[i % 4]); 687#endif 688 idxs[1] = LLVMConstInt(ctx->i32, j, 0); 689 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); 690 j++; 691 } else { 692 ptr = ctx->undef_alloca; 693 } 694 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; 695 } 696 } 697 break; 698 } 699 case TGSI_FILE_INPUT: 700 { 701 unsigned idx; 702 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 703 if (ctx->load_input && 704 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { 705 ctx->input_decls[idx] = *decl; 706 ctx->input_decls[idx].Range.First = idx; 707 ctx->input_decls[idx].Range.Last = idx; 708 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First; 709 710 if (si_preload_fs_inputs(ctx) || 711 bld_base->info->processor != PIPE_SHADER_FRAGMENT) 712 ctx->load_input(ctx, idx, &ctx->input_decls[idx], 713 &ctx->inputs[idx * 4]); 714 } 715 } 716 } 717 break; 718 719 case TGSI_FILE_SYSTEM_VALUE: 720 { 721 unsigned idx; 722 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 723 si_load_system_value(ctx, idx, decl); 724 } 725 } 726 break; 727 728 case TGSI_FILE_OUTPUT: 729 { 730 char name[16] = ""; 731 unsigned idx; 732 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 733 unsigned chan; 734 assert(idx < RADEON_LLVM_MAX_OUTPUTS); 735 if (ctx->outputs[idx][0]) 736 continue; 737 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 738#ifdef DEBUG 739 snprintf(name, sizeof(name), "OUT%d.%c", 740 idx, "xyzw"[chan % 4]); 741#endif 742 ctx->outputs[idx][chan] = ac_build_alloca_undef( 743 &ctx->ac, ctx->f32, name); 744 } 745 } 746 break; 747 } 748 749 case TGSI_FILE_MEMORY: 750 si_tgsi_declare_compute_memory(ctx, decl); 751 break; 752 753 default: 754 break; 755 } 756} 757 758void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, 759 const struct tgsi_full_instruction *inst, 760 const struct tgsi_opcode_info *info, 761 unsigned index, 762 LLVMValueRef dst[4]) 763{ 764 struct si_shader_context *ctx = si_shader_context(bld_base); 765 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 766 LLVMBuilderRef builder = ctx->ac.builder; 767 LLVMValueRef temp_ptr, temp_ptr2 = NULL; 768 bool is_vec_store = false; 769 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 770 771 if (dst[0]) { 772 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); 773 is_vec_store = (k == LLVMVectorTypeKind); 774 } 775 776 if (is_vec_store) { 777 LLVMValueRef values[4] = {}; 778 uint32_t writemask = reg->Register.WriteMask; 779 while (writemask) { 780 unsigned chan = u_bit_scan(&writemask); 781 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0); 782 values[chan] = LLVMBuildExtractElement(ctx->ac.builder, 783 dst[0], index, ""); 784 } 785 bld_base->emit_store(bld_base, inst, info, index, values); 786 return; 787 } 788 789 uint32_t writemask = reg->Register.WriteMask; 790 while (writemask) { 791 unsigned chan_index = u_bit_scan(&writemask); 792 LLVMValueRef value = dst[chan_index]; 793 794 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 795 continue; 796 if (inst->Instruction.Saturate) 797 value = ac_build_clamp(&ctx->ac, value); 798 799 if (reg->Register.File == TGSI_FILE_ADDRESS) { 800 temp_ptr = ctx->addrs[reg->Register.Index][chan_index]; 801 LLVMBuildStore(builder, value, temp_ptr); 802 continue; 803 } 804 805 if (!tgsi_type_is_64bit(dtype)) 806 value = ac_to_float(&ctx->ac, value); 807 808 if (reg->Register.Indirect) { 809 unsigned file = reg->Register.File; 810 unsigned reg_index = reg->Register.Index; 811 store_value_to_array(bld_base, value, file, chan_index, 812 reg_index, ®->Indirect); 813 } else { 814 switch(reg->Register.File) { 815 case TGSI_FILE_OUTPUT: 816 temp_ptr = ctx->outputs[reg->Register.Index][chan_index]; 817 if (tgsi_type_is_64bit(dtype)) 818 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1]; 819 break; 820 821 case TGSI_FILE_TEMPORARY: 822 { 823 if (reg->Register.Index >= ctx->temps_count) 824 continue; 825 826 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; 827 if (tgsi_type_is_64bit(dtype)) 828 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; 829 830 break; 831 } 832 default: 833 return; 834 } 835 if (!tgsi_type_is_64bit(dtype)) 836 LLVMBuildStore(builder, value, temp_ptr); 837 else { 838 LLVMValueRef ptr = LLVMBuildBitCast(builder, value, 839 LLVMVectorType(ctx->i32, 2), ""); 840 LLVMValueRef val2; 841 value = LLVMBuildExtractElement(builder, ptr, 842 ctx->i32_0, ""); 843 val2 = LLVMBuildExtractElement(builder, ptr, 844 ctx->i32_1, ""); 845 846 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr); 847 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2); 848 } 849 } 850 } 851} 852 853static int get_line(int pc) 854{ 855 /* Subtract 1 so that the number shown is that of the corresponding 856 * opcode in the TGSI dump, e.g. an if block has the same suffix as 857 * the instruction number of the corresponding TGSI IF. 858 */ 859 return pc - 1; 860} 861 862static void bgnloop_emit(const struct lp_build_tgsi_action *action, 863 struct lp_build_tgsi_context *bld_base, 864 struct lp_build_emit_data *emit_data) 865{ 866 struct si_shader_context *ctx = si_shader_context(bld_base); 867 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc)); 868} 869 870static void brk_emit(const struct lp_build_tgsi_action *action, 871 struct lp_build_tgsi_context *bld_base, 872 struct lp_build_emit_data *emit_data) 873{ 874 struct si_shader_context *ctx = si_shader_context(bld_base); 875 ac_build_break(&ctx->ac); 876} 877 878static void cont_emit(const struct lp_build_tgsi_action *action, 879 struct lp_build_tgsi_context *bld_base, 880 struct lp_build_emit_data *emit_data) 881{ 882 struct si_shader_context *ctx = si_shader_context(bld_base); 883 ac_build_continue(&ctx->ac); 884} 885 886static void else_emit(const struct lp_build_tgsi_action *action, 887 struct lp_build_tgsi_context *bld_base, 888 struct lp_build_emit_data *emit_data) 889{ 890 struct si_shader_context *ctx = si_shader_context(bld_base); 891 ac_build_else(&ctx->ac, get_line(bld_base->pc)); 892} 893 894static void endif_emit(const struct lp_build_tgsi_action *action, 895 struct lp_build_tgsi_context *bld_base, 896 struct lp_build_emit_data *emit_data) 897{ 898 struct si_shader_context *ctx = si_shader_context(bld_base); 899 ac_build_endif(&ctx->ac, get_line(bld_base->pc)); 900} 901 902static void endloop_emit(const struct lp_build_tgsi_action *action, 903 struct lp_build_tgsi_context *bld_base, 904 struct lp_build_emit_data *emit_data) 905{ 906 struct si_shader_context *ctx = si_shader_context(bld_base); 907 ac_build_endloop(&ctx->ac, get_line(bld_base->pc)); 908} 909 910static void if_emit(const struct lp_build_tgsi_action *action, 911 struct lp_build_tgsi_context *bld_base, 912 struct lp_build_emit_data *emit_data) 913{ 914 struct si_shader_context *ctx = si_shader_context(bld_base); 915 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); 916} 917 918static void uif_emit(const struct lp_build_tgsi_action *action, 919 struct lp_build_tgsi_context *bld_base, 920 struct lp_build_emit_data *emit_data) 921{ 922 struct si_shader_context *ctx = si_shader_context(bld_base); 923 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); 924} 925 926static void emit_immediate(struct lp_build_tgsi_context *bld_base, 927 const struct tgsi_full_immediate *imm) 928{ 929 unsigned i; 930 struct si_shader_context *ctx = si_shader_context(bld_base); 931 932 for (i = 0; i < 4; ++i) { 933 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] = 934 LLVMConstInt(ctx->i32, imm->u[i].Uint, false ); 935 } 936 937 ctx->imms_num++; 938} 939 940void si_llvm_context_init(struct si_shader_context *ctx, 941 struct si_screen *sscreen, 942 struct ac_llvm_compiler *compiler) 943{ 944 struct lp_type type; 945 946 /* Initialize the gallivm object: 947 * We are only using the module, context, and builder fields of this struct. 948 * This should be enough for us to be able to pass our gallivm struct to the 949 * helper functions in the gallivm module. 950 */ 951 memset(ctx, 0, sizeof(*ctx)); 952 ctx->screen = sscreen; 953 ctx->compiler = compiler; 954 955 ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family); 956 ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context); 957 958 enum ac_float_mode float_mode = 959 sscreen->debug_flags & DBG(UNSAFE_MATH) ? 960 AC_FLOAT_MODE_UNSAFE_FP_MATH : 961 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH; 962 ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode); 963 964 ctx->gallivm.context = ctx->ac.context; 965 ctx->gallivm.module = ctx->ac.module; 966 ctx->gallivm.builder = ctx->ac.builder; 967 968 struct lp_build_tgsi_context *bld_base = &ctx->bld_base; 969 970 type.floating = true; 971 type.fixed = false; 972 type.sign = true; 973 type.norm = false; 974 type.width = 32; 975 type.length = 1; 976 977 lp_build_context_init(&bld_base->base, &ctx->gallivm, type); 978 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); 979 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); 980 type.width *= 2; 981 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type); 982 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); 983 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); 984 985 bld_base->soa = 1; 986 bld_base->emit_swizzle = emit_swizzle; 987 bld_base->emit_declaration = emit_declaration; 988 bld_base->emit_immediate = emit_immediate; 989 990 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 991 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 992 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 993 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; 994 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 995 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 996 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 997 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 998 999 si_shader_context_init_alu(&ctx->bld_base); 1000 si_shader_context_init_mem(ctx); 1001 1002 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context); 1003 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context); 1004 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context); 1005 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context); 1006 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context); 1007 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128); 1008 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context); 1009 ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 1010 ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 1011 ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 1012 ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 1013 1014 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0); 1015 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0); 1016 ctx->i1false = LLVMConstInt(ctx->i1, 0, 0); 1017 ctx->i1true = LLVMConstInt(ctx->i1, 1, 0); 1018} 1019 1020/* Set the context to a certain TGSI shader. Can be called repeatedly 1021 * to change the shader. */ 1022void si_llvm_context_set_tgsi(struct si_shader_context *ctx, 1023 struct si_shader *shader) 1024{ 1025 const struct tgsi_shader_info *info = NULL; 1026 const struct tgsi_token *tokens = NULL; 1027 1028 if (shader && shader->selector) { 1029 info = &shader->selector->info; 1030 tokens = shader->selector->tokens; 1031 } 1032 1033 ctx->shader = shader; 1034 ctx->type = info ? info->processor : -1; 1035 ctx->bld_base.info = info; 1036 1037 /* Clean up the old contents. */ 1038 FREE(ctx->temp_arrays); 1039 ctx->temp_arrays = NULL; 1040 FREE(ctx->temp_array_allocas); 1041 ctx->temp_array_allocas = NULL; 1042 1043 FREE(ctx->imms); 1044 ctx->imms = NULL; 1045 ctx->imms_num = 0; 1046 1047 FREE(ctx->temps); 1048 ctx->temps = NULL; 1049 ctx->temps_count = 0; 1050 1051 if (!info) 1052 return; 1053 1054 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared); 1055 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared); 1056 1057 ctx->num_samplers = util_last_bit(info->samplers_declared); 1058 ctx->num_images = util_last_bit(info->images_declared); 1059 1060 if (!tokens) 1061 return; 1062 1063 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) { 1064 int size = info->array_max[TGSI_FILE_TEMPORARY]; 1065 1066 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); 1067 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); 1068 1069 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size, 1070 ctx->temp_arrays); 1071 } 1072 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) { 1073 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; 1074 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef)); 1075 } 1076 1077 /* Re-set these to start with a clean slate. */ 1078 ctx->bld_base.num_instructions = 0; 1079 ctx->bld_base.pc = 0; 1080 memset(ctx->outputs, 0, sizeof(ctx->outputs)); 1081 1082 ctx->bld_base.emit_store = si_llvm_emit_store; 1083 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch; 1084 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch; 1085 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch; 1086 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch; 1087 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; 1088} 1089 1090void si_llvm_create_func(struct si_shader_context *ctx, 1091 const char *name, 1092 LLVMTypeRef *return_types, unsigned num_return_elems, 1093 LLVMTypeRef *ParamTypes, unsigned ParamCount) 1094{ 1095 LLVMTypeRef main_fn_type, ret_type; 1096 LLVMBasicBlockRef main_fn_body; 1097 enum si_llvm_calling_convention call_conv; 1098 unsigned real_shader_type; 1099 1100 if (num_return_elems) 1101 ret_type = LLVMStructTypeInContext(ctx->ac.context, 1102 return_types, 1103 num_return_elems, true); 1104 else 1105 ret_type = ctx->voidt; 1106 1107 /* Setup the function */ 1108 ctx->return_type = ret_type; 1109 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); 1110 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type); 1111 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context, 1112 ctx->main_fn, "main_body"); 1113 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body); 1114 1115 real_shader_type = ctx->type; 1116 1117 /* LS is merged into HS (TCS), and ES is merged into GS. */ 1118 if (ctx->screen->info.chip_class >= GFX9) { 1119 if (ctx->shader->key.as_ls) 1120 real_shader_type = PIPE_SHADER_TESS_CTRL; 1121 else if (ctx->shader->key.as_es) 1122 real_shader_type = PIPE_SHADER_GEOMETRY; 1123 } 1124 1125 switch (real_shader_type) { 1126 case PIPE_SHADER_VERTEX: 1127 case PIPE_SHADER_TESS_EVAL: 1128 call_conv = RADEON_LLVM_AMDGPU_VS; 1129 break; 1130 case PIPE_SHADER_TESS_CTRL: 1131 call_conv = RADEON_LLVM_AMDGPU_HS; 1132 break; 1133 case PIPE_SHADER_GEOMETRY: 1134 call_conv = RADEON_LLVM_AMDGPU_GS; 1135 break; 1136 case PIPE_SHADER_FRAGMENT: 1137 call_conv = RADEON_LLVM_AMDGPU_PS; 1138 break; 1139 case PIPE_SHADER_COMPUTE: 1140 call_conv = RADEON_LLVM_AMDGPU_CS; 1141 break; 1142 default: 1143 unreachable("Unhandle shader type"); 1144 } 1145 1146 LLVMSetFunctionCallConv(ctx->main_fn, call_conv); 1147} 1148 1149void si_llvm_optimize_module(struct si_shader_context *ctx) 1150{ 1151 /* Dump LLVM IR before any optimization passes */ 1152 if (ctx->screen->debug_flags & DBG(PREOPT_IR) && 1153 si_can_dump_shader(ctx->screen, ctx->type)) 1154 LLVMDumpModule(ctx->gallivm.module); 1155 1156 /* Run the pass */ 1157 LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module); 1158 LLVMDisposeBuilder(ctx->ac.builder); 1159} 1160 1161void si_llvm_dispose(struct si_shader_context *ctx) 1162{ 1163 LLVMDisposeModule(ctx->gallivm.module); 1164 LLVMContextDispose(ctx->gallivm.context); 1165 FREE(ctx->temp_arrays); 1166 ctx->temp_arrays = NULL; 1167 FREE(ctx->temp_array_allocas); 1168 ctx->temp_array_allocas = NULL; 1169 FREE(ctx->temps); 1170 ctx->temps = NULL; 1171 ctx->temps_count = 0; 1172 FREE(ctx->imms); 1173 ctx->imms = NULL; 1174 ctx->imms_num = 0; 1175 ac_llvm_context_dispose(&ctx->ac); 1176} 1177