1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26#include "ac_llvm_util.h" 27 28#include "ac_llvm_build.h" 29#include "c11/threads.h" 30#include "gallivm/lp_bld_misc.h" 31#include "util/bitscan.h" 32#include "util/u_math.h" 33#include <llvm-c/Core.h> 34#include <llvm-c/Support.h> 35#include <llvm-c/Transforms/IPO.h> 36#include <llvm-c/Transforms/Scalar.h> 37#include <llvm-c/Transforms/Utils.h> 38 39#include <assert.h> 40#include <stdio.h> 41#include <string.h> 42 43static void ac_init_llvm_target(void) 44{ 45 LLVMInitializeAMDGPUTargetInfo(); 46 LLVMInitializeAMDGPUTarget(); 47 LLVMInitializeAMDGPUTargetMC(); 48 LLVMInitializeAMDGPUAsmPrinter(); 49 50 /* For inline assembly. */ 51 LLVMInitializeAMDGPUAsmParser(); 52 53 /* For ACO disassembly. */ 54 LLVMInitializeAMDGPUDisassembler(); 55 56 /* Workaround for bug in llvm 4.0 that causes image intrinsics 57 * to disappear. 58 * https://reviews.llvm.org/D26348 59 * 60 * "mesa" is the prefix for error messages. 61 * 62 * -global-isel-abort=2 is a no-op unless global isel has been enabled. 63 * This option tells the backend to fall-back to SelectionDAG and print 64 * a diagnostic message if global isel fails. 65 */ 66 const char *argv[] = { 67 "mesa", 68 "-simplifycfg-sink-common=false", 69 "-global-isel-abort=2", 70 "-amdgpu-atomic-optimizations=true", 71#if LLVM_VERSION_MAJOR == 11 72 /* This fixes variable indexing on LLVM 11. It also breaks atomic.cmpswap on LLVM >= 12. */ 73 "-structurizecfg-skip-uniform-regions", 74#endif 75 }; 76 LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL); 77} 78 79PUBLIC void ac_init_shared_llvm_once(void) 80{ 81 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; 82 call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); 83} 84 85#if !LLVM_IS_SHARED 86static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT; 87static void ac_init_static_llvm_once(void) 88{ 89 call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target); 90} 91#endif 92 93void ac_init_llvm_once(void) 94{ 95#if LLVM_IS_SHARED 96 ac_init_shared_llvm_once(); 97#else 98 ac_init_static_llvm_once(); 99#endif 100} 101 102static LLVMTargetRef ac_get_llvm_target(const char *triple) 103{ 104 LLVMTargetRef target = NULL; 105 char *err_message = NULL; 106 107 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { 108 fprintf(stderr, "Cannot find target for triple %s ", triple); 109 if (err_message) { 110 fprintf(stderr, "%s\n", err_message); 111 } 112 LLVMDisposeMessage(err_message); 113 return NULL; 114 } 115 return target; 116} 117 118const char *ac_get_llvm_processor_name(enum radeon_family family) 119{ 120 switch (family) { 121 case CHIP_TAHITI: 122 return "tahiti"; 123 case CHIP_PITCAIRN: 124 return "pitcairn"; 125 case CHIP_VERDE: 126 return "verde"; 127 case CHIP_OLAND: 128 return "oland"; 129 case CHIP_HAINAN: 130 return "hainan"; 131 case CHIP_BONAIRE: 132 return "bonaire"; 133 case CHIP_KABINI: 134 return "kabini"; 135 case CHIP_KAVERI: 136 return "kaveri"; 137 case CHIP_HAWAII: 138 return "hawaii"; 139 case CHIP_TONGA: 140 return "tonga"; 141 case CHIP_ICELAND: 142 return "iceland"; 143 case CHIP_CARRIZO: 144 return "carrizo"; 145 case CHIP_FIJI: 146 return "fiji"; 147 case CHIP_STONEY: 148 return "stoney"; 149 case CHIP_POLARIS10: 150 return "polaris10"; 151 case CHIP_POLARIS11: 152 case CHIP_POLARIS12: 153 case CHIP_VEGAM: 154 return "polaris11"; 155 case CHIP_VEGA10: 156 return "gfx900"; 157 case CHIP_RAVEN: 158 return "gfx902"; 159 case CHIP_VEGA12: 160 return "gfx904"; 161 case CHIP_VEGA20: 162 return "gfx906"; 163 case CHIP_RAVEN2: 164 case CHIP_RENOIR: 165 return "gfx909"; 166 case CHIP_ARCTURUS: 167 return "gfx908"; 168 case CHIP_ALDEBARAN: 169 return "gfx90a"; 170 case CHIP_NAVI10: 171 return "gfx1010"; 172 case CHIP_NAVI12: 173 return "gfx1011"; 174 case CHIP_NAVI14: 175 return "gfx1012"; 176 case CHIP_SIENNA_CICHLID: 177 case CHIP_NAVY_FLOUNDER: 178 case CHIP_DIMGREY_CAVEFISH: 179 case CHIP_BEIGE_GOBY: 180 case CHIP_VANGOGH: 181 case CHIP_YELLOW_CARP: 182 return "gfx1030"; 183 default: 184 return ""; 185 } 186} 187 188static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, 189 enum ac_target_machine_options tm_options, 190 LLVMCodeGenOptLevel level, 191 const char **out_triple) 192{ 193 assert(family >= CHIP_TAHITI); 194 const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; 195 LLVMTargetRef target = ac_get_llvm_target(triple); 196 197 LLVMTargetMachineRef tm = 198 LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level, 199 LLVMRelocDefault, LLVMCodeModelDefault); 200 201 if (out_triple) 202 *out_triple = triple; 203 if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL) 204 ac_enable_global_isel(tm); 205 return tm; 206} 207 208static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, 209 bool check_ir) 210{ 211 LLVMPassManagerRef passmgr = LLVMCreatePassManager(); 212 if (!passmgr) 213 return NULL; 214 215 if (target_library_info) 216 LLVMAddTargetLibraryInfo(target_library_info, passmgr); 217 218 if (check_ir) 219 LLVMAddVerifierPass(passmgr); 220 LLVMAddAlwaysInlinerPass(passmgr); 221 /* Normally, the pass manager runs all passes on one function before 222 * moving onto another. Adding a barrier no-op pass forces the pass 223 * manager to run the inliner on all functions first, which makes sure 224 * that the following passes are only run on the remaining non-inline 225 * function, so it removes useless work done on dead inline functions. 226 */ 227 ac_llvm_add_barrier_noop_pass(passmgr); 228 /* This pass should eliminate all the load and store instructions. */ 229 LLVMAddPromoteMemoryToRegisterPass(passmgr); 230 LLVMAddScalarReplAggregatesPass(passmgr); 231 LLVMAddLICMPass(passmgr); 232 LLVMAddAggressiveDCEPass(passmgr); 233 LLVMAddCFGSimplificationPass(passmgr); 234 /* This is recommended by the instruction combining pass. */ 235 LLVMAddEarlyCSEMemSSAPass(passmgr); 236 LLVMAddInstructionCombiningPass(passmgr); 237 return passmgr; 238} 239 240static const char *attr_to_str(enum ac_func_attr attr) 241{ 242 switch (attr) { 243 case AC_FUNC_ATTR_ALWAYSINLINE: 244 return "alwaysinline"; 245 case AC_FUNC_ATTR_INREG: 246 return "inreg"; 247 case AC_FUNC_ATTR_NOALIAS: 248 return "noalias"; 249 case AC_FUNC_ATTR_NOUNWIND: 250 return "nounwind"; 251 case AC_FUNC_ATTR_READNONE: 252 return "readnone"; 253 case AC_FUNC_ATTR_READONLY: 254 return "readonly"; 255 case AC_FUNC_ATTR_WRITEONLY: 256 return "writeonly"; 257 case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: 258 return "inaccessiblememonly"; 259 case AC_FUNC_ATTR_CONVERGENT: 260 return "convergent"; 261 default: 262 fprintf(stderr, "Unhandled function attribute: %x\n", attr); 263 return 0; 264 } 265} 266 267void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, 268 enum ac_func_attr attr) 269{ 270 const char *attr_name = attr_to_str(attr); 271 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); 272 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); 273 274 if (LLVMIsAFunction(function)) 275 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); 276 else 277 LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); 278} 279 280void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask) 281{ 282 attrib_mask |= AC_FUNC_ATTR_NOUNWIND; 283 attrib_mask &= ~AC_FUNC_ATTR_LEGACY; 284 285 while (attrib_mask) { 286 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); 287 ac_add_function_attr(ctx, function, -1, attr); 288 } 289} 290 291void ac_dump_module(LLVMModuleRef module) 292{ 293 char *str = LLVMPrintModuleToString(module); 294 fprintf(stderr, "%s", str); 295 LLVMDisposeMessage(str); 296} 297 298void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value) 299{ 300 char str[16]; 301 302 snprintf(str, sizeof(str), "0x%x", value); 303 LLVMAddTargetDependentFunctionAttr(F, name, str); 304} 305 306void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size) 307{ 308 if (!size) 309 return; 310 311 char str[32]; 312 snprintf(str, sizeof(str), "%u,%u", size, size); 313 LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str); 314} 315 316void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx) 317{ 318 char features[2048]; 319 320 snprintf(features, sizeof(features), "+DumpCode%s%s", 321 /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */ 322 ctx->chip_class == GFX9 ? ",-promote-alloca" : "", 323 /* Wave32 is the default. */ 324 ctx->chip_class >= GFX10 && ctx->wave_size == 64 ? 325 ",+wavefrontsize64,-wavefrontsize32" : ""); 326 327 LLVMAddTargetDependentFunctionAttr(F, "target-features", features); 328} 329 330unsigned ac_count_scratch_private_memory(LLVMValueRef function) 331{ 332 unsigned private_mem_vgprs = 0; 333 334 /* Process all LLVM instructions. */ 335 LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); 336 while (bb) { 337 LLVMValueRef next = LLVMGetFirstInstruction(bb); 338 339 while (next) { 340 LLVMValueRef inst = next; 341 next = LLVMGetNextInstruction(next); 342 343 if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) 344 continue; 345 346 LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); 347 /* No idea why LLVM aligns allocas to 4 elements. */ 348 unsigned alignment = LLVMGetAlignment(inst); 349 unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); 350 private_mem_vgprs += dw_size; 351 } 352 bb = LLVMGetNextBasicBlock(bb); 353 } 354 355 return private_mem_vgprs; 356} 357 358bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family, 359 enum ac_target_machine_options tm_options) 360{ 361 const char *triple; 362 memset(compiler, 0, sizeof(*compiler)); 363 364 compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple); 365 if (!compiler->tm) 366 return false; 367 368 if (tm_options & AC_TM_CREATE_LOW_OPT) { 369 compiler->low_opt_tm = 370 ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL); 371 if (!compiler->low_opt_tm) 372 goto fail; 373 } 374 375 compiler->target_library_info = ac_create_target_library_info(triple); 376 if (!compiler->target_library_info) 377 goto fail; 378 379 compiler->passmgr = 380 ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); 381 if (!compiler->passmgr) 382 goto fail; 383 384 return true; 385fail: 386 ac_destroy_llvm_compiler(compiler); 387 return false; 388} 389 390void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) 391{ 392 ac_destroy_llvm_passes(compiler->passes); 393 ac_destroy_llvm_passes(compiler->low_opt_passes); 394 395 if (compiler->passmgr) 396 LLVMDisposePassManager(compiler->passmgr); 397 if (compiler->target_library_info) 398 ac_dispose_target_library_info(compiler->target_library_info); 399 if (compiler->low_opt_tm) 400 LLVMDisposeTargetMachine(compiler->low_opt_tm); 401 if (compiler->tm) 402 LLVMDisposeTargetMachine(compiler->tm); 403} 404