1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the 6b8e80941Smrg * "Software"), to deal in the Software without restriction, including 7b8e80941Smrg * without limitation the rights to use, copy, modify, merge, publish, 8b8e80941Smrg * distribute, sub license, and/or sell copies of the Software, and to 9b8e80941Smrg * permit persons to whom the Software is furnished to do so, subject to 10b8e80941Smrg * the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15b8e80941Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 19b8e80941Smrg * 20b8e80941Smrg * The above copyright notice and this permission notice (including the 21b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 22b8e80941Smrg * of the Software. 23b8e80941Smrg * 24b8e80941Smrg */ 25b8e80941Smrg/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26b8e80941Smrg#include "ac_llvm_util.h" 27b8e80941Smrg#include "ac_llvm_build.h" 28b8e80941Smrg#include "util/bitscan.h" 29b8e80941Smrg#include <llvm-c/Core.h> 30b8e80941Smrg#include <llvm-c/Support.h> 31b8e80941Smrg#include <llvm-c/Transforms/IPO.h> 32b8e80941Smrg#include <llvm-c/Transforms/Scalar.h> 33b8e80941Smrg#include <llvm-c/Transforms/Utils.h> 34b8e80941Smrg#include "c11/threads.h" 35b8e80941Smrg#include "gallivm/lp_bld_misc.h" 36b8e80941Smrg#include "util/u_math.h" 37b8e80941Smrg 38b8e80941Smrg#include <assert.h> 39b8e80941Smrg#include <stdio.h> 40b8e80941Smrg#include <string.h> 41b8e80941Smrg 42b8e80941Smrgstatic void ac_init_llvm_target() 43b8e80941Smrg{ 44b8e80941Smrg LLVMInitializeAMDGPUTargetInfo(); 45b8e80941Smrg LLVMInitializeAMDGPUTarget(); 46b8e80941Smrg LLVMInitializeAMDGPUTargetMC(); 47b8e80941Smrg LLVMInitializeAMDGPUAsmPrinter(); 48b8e80941Smrg 49b8e80941Smrg /* For inline assembly. */ 50b8e80941Smrg LLVMInitializeAMDGPUAsmParser(); 51b8e80941Smrg 52b8e80941Smrg /* Workaround for bug in llvm 4.0 that causes image intrinsics 53b8e80941Smrg * to disappear. 54b8e80941Smrg * https://reviews.llvm.org/D26348 55b8e80941Smrg * 56b8e80941Smrg * "mesa" is the prefix for error messages. 57b8e80941Smrg * 58b8e80941Smrg * -global-isel-abort=2 is a no-op unless global isel has been enabled. 59b8e80941Smrg * This option tells the backend to fall-back to SelectionDAG and print 60b8e80941Smrg * a diagnostic message if global isel fails. 61b8e80941Smrg */ 62b8e80941Smrg const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" }; 63b8e80941Smrg LLVMParseCommandLineOptions(3, argv, NULL); 64b8e80941Smrg} 65b8e80941Smrg 66b8e80941Smrgstatic once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; 67b8e80941Smrg 68b8e80941Smrgvoid ac_init_llvm_once(void) 69b8e80941Smrg{ 70b8e80941Smrg call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); 71b8e80941Smrg} 72b8e80941Smrg 73b8e80941Smrgstatic LLVMTargetRef ac_get_llvm_target(const char *triple) 74b8e80941Smrg{ 75b8e80941Smrg LLVMTargetRef target = NULL; 76b8e80941Smrg char *err_message = NULL; 77b8e80941Smrg 78b8e80941Smrg if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { 79b8e80941Smrg fprintf(stderr, "Cannot find target for triple %s ", triple); 80b8e80941Smrg if (err_message) { 81b8e80941Smrg fprintf(stderr, "%s\n", err_message); 82b8e80941Smrg } 83b8e80941Smrg LLVMDisposeMessage(err_message); 84b8e80941Smrg return NULL; 85b8e80941Smrg } 86b8e80941Smrg return target; 87b8e80941Smrg} 88b8e80941Smrg 89b8e80941Smrgconst char *ac_get_llvm_processor_name(enum radeon_family family) 90b8e80941Smrg{ 91b8e80941Smrg switch (family) { 92b8e80941Smrg case CHIP_TAHITI: 93b8e80941Smrg return "tahiti"; 94b8e80941Smrg case CHIP_PITCAIRN: 95b8e80941Smrg return "pitcairn"; 96b8e80941Smrg case CHIP_VERDE: 97b8e80941Smrg return "verde"; 98b8e80941Smrg case CHIP_OLAND: 99b8e80941Smrg return "oland"; 100b8e80941Smrg case CHIP_HAINAN: 101b8e80941Smrg return "hainan"; 102b8e80941Smrg case CHIP_BONAIRE: 103b8e80941Smrg return "bonaire"; 104b8e80941Smrg case CHIP_KABINI: 105b8e80941Smrg return "kabini"; 106b8e80941Smrg case CHIP_KAVERI: 107b8e80941Smrg return "kaveri"; 108b8e80941Smrg case CHIP_HAWAII: 109b8e80941Smrg return "hawaii"; 110b8e80941Smrg case CHIP_MULLINS: 111b8e80941Smrg return "mullins"; 112b8e80941Smrg case CHIP_TONGA: 113b8e80941Smrg return "tonga"; 114b8e80941Smrg case CHIP_ICELAND: 115b8e80941Smrg return "iceland"; 116b8e80941Smrg case CHIP_CARRIZO: 117b8e80941Smrg return "carrizo"; 118b8e80941Smrg case CHIP_FIJI: 119b8e80941Smrg return "fiji"; 120b8e80941Smrg case CHIP_STONEY: 121b8e80941Smrg return "stoney"; 122b8e80941Smrg case CHIP_POLARIS10: 123b8e80941Smrg return "polaris10"; 124b8e80941Smrg case CHIP_POLARIS11: 125b8e80941Smrg case CHIP_POLARIS12: 126b8e80941Smrg case CHIP_VEGAM: 127b8e80941Smrg return "polaris11"; 128b8e80941Smrg case CHIP_VEGA10: 129b8e80941Smrg return "gfx900"; 130b8e80941Smrg case CHIP_RAVEN: 131b8e80941Smrg return "gfx902"; 132b8e80941Smrg case CHIP_VEGA12: 133b8e80941Smrg return "gfx904"; 134b8e80941Smrg case CHIP_VEGA20: 135b8e80941Smrg return "gfx906"; 136b8e80941Smrg case CHIP_RAVEN2: 137b8e80941Smrg return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902"; 138b8e80941Smrg default: 139b8e80941Smrg return ""; 140b8e80941Smrg } 141b8e80941Smrg} 142b8e80941Smrg 143b8e80941Smrgstatic LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, 144b8e80941Smrg enum ac_target_machine_options tm_options, 145b8e80941Smrg LLVMCodeGenOptLevel level, 146b8e80941Smrg const char **out_triple) 147b8e80941Smrg{ 148b8e80941Smrg assert(family >= CHIP_TAHITI); 149b8e80941Smrg char features[256]; 150b8e80941Smrg const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; 151b8e80941Smrg LLVMTargetRef target = ac_get_llvm_target(triple); 152b8e80941Smrg 153b8e80941Smrg snprintf(features, sizeof(features), 154ac028361Smrg "+DumpCode%s%s%s%s%s%s%s", 155ac028361Smrg HAVE_LLVM >= 0x0B00 ? "" : ",-fp32-denormals,+fp64-denormals", 156b8e80941Smrg HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling", 157b8e80941Smrg tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", 158ac028361Smrg (tm_options & AC_TM_FORCE_ENABLE_XNACK) && HAVE_LLVM <= 0x0800 ? ",+xnack" : "", 159ac028361Smrg (tm_options & AC_TM_FORCE_DISABLE_XNACK) && HAVE_LLVM <= 0x0800 ? ",-xnack" : "", 160b8e80941Smrg tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "", 161b8e80941Smrg tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : ""); 162b8e80941Smrg 163b8e80941Smrg LLVMTargetMachineRef tm = LLVMCreateTargetMachine( 164b8e80941Smrg target, 165b8e80941Smrg triple, 166b8e80941Smrg ac_get_llvm_processor_name(family), 167b8e80941Smrg features, 168b8e80941Smrg level, 169b8e80941Smrg LLVMRelocDefault, 170b8e80941Smrg LLVMCodeModelDefault); 171b8e80941Smrg 172b8e80941Smrg if (out_triple) 173b8e80941Smrg *out_triple = triple; 174b8e80941Smrg if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL) 175b8e80941Smrg ac_enable_global_isel(tm); 176b8e80941Smrg return tm; 177b8e80941Smrg} 178b8e80941Smrg 179b8e80941Smrgstatic LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, 180b8e80941Smrg bool check_ir) 181b8e80941Smrg{ 182b8e80941Smrg LLVMPassManagerRef passmgr = LLVMCreatePassManager(); 183b8e80941Smrg if (!passmgr) 184b8e80941Smrg return NULL; 185b8e80941Smrg 186b8e80941Smrg if (target_library_info) 187b8e80941Smrg LLVMAddTargetLibraryInfo(target_library_info, 188b8e80941Smrg passmgr); 189b8e80941Smrg 190b8e80941Smrg if (check_ir) 191b8e80941Smrg LLVMAddVerifierPass(passmgr); 192b8e80941Smrg LLVMAddAlwaysInlinerPass(passmgr); 193b8e80941Smrg /* Normally, the pass manager runs all passes on one function before 194b8e80941Smrg * moving onto another. Adding a barrier no-op pass forces the pass 195b8e80941Smrg * manager to run the inliner on all functions first, which makes sure 196b8e80941Smrg * that the following passes are only run on the remaining non-inline 197b8e80941Smrg * function, so it removes useless work done on dead inline functions. 198b8e80941Smrg */ 199b8e80941Smrg ac_llvm_add_barrier_noop_pass(passmgr); 200b8e80941Smrg /* This pass should eliminate all the load and store instructions. */ 201b8e80941Smrg LLVMAddPromoteMemoryToRegisterPass(passmgr); 202b8e80941Smrg LLVMAddScalarReplAggregatesPass(passmgr); 203b8e80941Smrg LLVMAddLICMPass(passmgr); 204b8e80941Smrg LLVMAddAggressiveDCEPass(passmgr); 205b8e80941Smrg LLVMAddCFGSimplificationPass(passmgr); 206b8e80941Smrg /* This is recommended by the instruction combining pass. */ 207b8e80941Smrg LLVMAddEarlyCSEMemSSAPass(passmgr); 208b8e80941Smrg LLVMAddInstructionCombiningPass(passmgr); 209b8e80941Smrg return passmgr; 210b8e80941Smrg} 211b8e80941Smrg 212b8e80941Smrgstatic const char *attr_to_str(enum ac_func_attr attr) 213b8e80941Smrg{ 214b8e80941Smrg switch (attr) { 215b8e80941Smrg case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; 216b8e80941Smrg case AC_FUNC_ATTR_INREG: return "inreg"; 217b8e80941Smrg case AC_FUNC_ATTR_NOALIAS: return "noalias"; 218b8e80941Smrg case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; 219b8e80941Smrg case AC_FUNC_ATTR_READNONE: return "readnone"; 220b8e80941Smrg case AC_FUNC_ATTR_READONLY: return "readonly"; 221b8e80941Smrg case AC_FUNC_ATTR_WRITEONLY: return "writeonly"; 222b8e80941Smrg case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly"; 223b8e80941Smrg case AC_FUNC_ATTR_CONVERGENT: return "convergent"; 224b8e80941Smrg default: 225b8e80941Smrg fprintf(stderr, "Unhandled function attribute: %x\n", attr); 226b8e80941Smrg return 0; 227b8e80941Smrg } 228b8e80941Smrg} 229b8e80941Smrg 230b8e80941Smrgvoid 231b8e80941Smrgac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, 232b8e80941Smrg int attr_idx, enum ac_func_attr attr) 233b8e80941Smrg{ 234b8e80941Smrg const char *attr_name = attr_to_str(attr); 235b8e80941Smrg unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, 236b8e80941Smrg strlen(attr_name)); 237b8e80941Smrg LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); 238b8e80941Smrg 239b8e80941Smrg if (LLVMIsAFunction(function)) 240b8e80941Smrg LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); 241b8e80941Smrg else 242b8e80941Smrg LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); 243b8e80941Smrg} 244b8e80941Smrg 245b8e80941Smrgvoid ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, 246b8e80941Smrg unsigned attrib_mask) 247b8e80941Smrg{ 248b8e80941Smrg attrib_mask |= AC_FUNC_ATTR_NOUNWIND; 249b8e80941Smrg attrib_mask &= ~AC_FUNC_ATTR_LEGACY; 250b8e80941Smrg 251b8e80941Smrg while (attrib_mask) { 252b8e80941Smrg enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); 253b8e80941Smrg ac_add_function_attr(ctx, function, -1, attr); 254b8e80941Smrg } 255b8e80941Smrg} 256b8e80941Smrg 257b8e80941Smrgvoid 258b8e80941Smrgac_dump_module(LLVMModuleRef module) 259b8e80941Smrg{ 260b8e80941Smrg char *str = LLVMPrintModuleToString(module); 261b8e80941Smrg fprintf(stderr, "%s", str); 262b8e80941Smrg LLVMDisposeMessage(str); 263b8e80941Smrg} 264b8e80941Smrg 265b8e80941Smrgvoid 266b8e80941Smrgac_llvm_add_target_dep_function_attr(LLVMValueRef F, 267b8e80941Smrg const char *name, unsigned value) 268b8e80941Smrg{ 269b8e80941Smrg char str[16]; 270b8e80941Smrg 271b8e80941Smrg snprintf(str, sizeof(str), "0x%x", value); 272b8e80941Smrg LLVMAddTargetDependentFunctionAttr(F, name, str); 273b8e80941Smrg} 274b8e80941Smrg 275b8e80941Smrgunsigned 276b8e80941Smrgac_count_scratch_private_memory(LLVMValueRef function) 277b8e80941Smrg{ 278b8e80941Smrg unsigned private_mem_vgprs = 0; 279b8e80941Smrg 280b8e80941Smrg /* Process all LLVM instructions. */ 281b8e80941Smrg LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); 282b8e80941Smrg while (bb) { 283b8e80941Smrg LLVMValueRef next = LLVMGetFirstInstruction(bb); 284b8e80941Smrg 285b8e80941Smrg while (next) { 286b8e80941Smrg LLVMValueRef inst = next; 287b8e80941Smrg next = LLVMGetNextInstruction(next); 288b8e80941Smrg 289b8e80941Smrg if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) 290b8e80941Smrg continue; 291b8e80941Smrg 292b8e80941Smrg LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); 293b8e80941Smrg /* No idea why LLVM aligns allocas to 4 elements. */ 294b8e80941Smrg unsigned alignment = LLVMGetAlignment(inst); 295b8e80941Smrg unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); 296b8e80941Smrg private_mem_vgprs += dw_size; 297b8e80941Smrg } 298b8e80941Smrg bb = LLVMGetNextBasicBlock(bb); 299b8e80941Smrg } 300b8e80941Smrg 301b8e80941Smrg return private_mem_vgprs; 302b8e80941Smrg} 303b8e80941Smrg 304b8e80941Smrgbool 305b8e80941Smrgac_init_llvm_compiler(struct ac_llvm_compiler *compiler, 306b8e80941Smrg enum radeon_family family, 307b8e80941Smrg enum ac_target_machine_options tm_options) 308b8e80941Smrg{ 309b8e80941Smrg const char *triple; 310b8e80941Smrg memset(compiler, 0, sizeof(*compiler)); 311b8e80941Smrg 312b8e80941Smrg compiler->tm = ac_create_target_machine(family, tm_options, 313b8e80941Smrg LLVMCodeGenLevelDefault, 314b8e80941Smrg &triple); 315b8e80941Smrg if (!compiler->tm) 316b8e80941Smrg return false; 317b8e80941Smrg 318b8e80941Smrg if (tm_options & AC_TM_CREATE_LOW_OPT) { 319b8e80941Smrg compiler->low_opt_tm = 320b8e80941Smrg ac_create_target_machine(family, tm_options, 321b8e80941Smrg LLVMCodeGenLevelLess, NULL); 322b8e80941Smrg if (!compiler->low_opt_tm) 323b8e80941Smrg goto fail; 324b8e80941Smrg } 325b8e80941Smrg 326b8e80941Smrg compiler->target_library_info = 327b8e80941Smrg ac_create_target_library_info(triple); 328b8e80941Smrg if (!compiler->target_library_info) 329b8e80941Smrg goto fail; 330b8e80941Smrg 331b8e80941Smrg compiler->passmgr = ac_create_passmgr(compiler->target_library_info, 332b8e80941Smrg tm_options & AC_TM_CHECK_IR); 333b8e80941Smrg if (!compiler->passmgr) 334b8e80941Smrg goto fail; 335b8e80941Smrg 336b8e80941Smrg return true; 337b8e80941Smrgfail: 338b8e80941Smrg ac_destroy_llvm_compiler(compiler); 339b8e80941Smrg return false; 340b8e80941Smrg} 341b8e80941Smrg 342b8e80941Smrgvoid 343b8e80941Smrgac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) 344b8e80941Smrg{ 345b8e80941Smrg if (compiler->passmgr) 346b8e80941Smrg LLVMDisposePassManager(compiler->passmgr); 347b8e80941Smrg if (compiler->target_library_info) 348b8e80941Smrg ac_dispose_target_library_info(compiler->target_library_info); 349b8e80941Smrg if (compiler->low_opt_tm) 350b8e80941Smrg LLVMDisposeTargetMachine(compiler->low_opt_tm); 351b8e80941Smrg if (compiler->tm) 352b8e80941Smrg LLVMDisposeTargetMachine(compiler->tm); 353b8e80941Smrg} 354