Home | History | Annotate | Line # | Download | only in ToolChains
      1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #include "AMDGPU.h"
     10 #include "CommonArgs.h"
     11 #include "InputInfo.h"
     12 #include "clang/Basic/TargetID.h"
     13 #include "clang/Driver/Compilation.h"
     14 #include "clang/Driver/DriverDiagnostic.h"
     15 #include "clang/Driver/Options.h"
     16 #include "llvm/Option/ArgList.h"
     17 #include "llvm/Support/Error.h"
     18 #include "llvm/Support/FileUtilities.h"
     19 #include "llvm/Support/LineIterator.h"
     20 #include "llvm/Support/Path.h"
     21 #include "llvm/Support/VirtualFileSystem.h"
     22 #include <system_error>
     23 
     24 #define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
     25 
     26 using namespace clang::driver;
     27 using namespace clang::driver::tools;
     28 using namespace clang::driver::toolchains;
     29 using namespace clang;
     30 using namespace llvm::opt;
     31 
     32 // Look for sub-directory starts with PackageName under ROCm candidate path.
     33 // If there is one and only one matching sub-directory found, append the
     34 // sub-directory to Path. If there is no matching sub-directory or there are
     35 // more than one matching sub-directories, diagnose them. Returns the full
     36 // path of the package if there is only one matching sub-directory, otherwise
     37 // returns an empty string.
     38 llvm::SmallString<0>
     39 RocmInstallationDetector::findSPACKPackage(const Candidate &Cand,
     40                                            StringRef PackageName) {
     41   if (!Cand.isSPACK())
     42     return {};
     43   std::error_code EC;
     44   std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str();
     45   llvm::SmallVector<llvm::SmallString<0>> SubDirs;
     46   for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC),
     47                                      FileEnd;
     48        File != FileEnd && !EC; File.increment(EC)) {
     49     llvm::StringRef FileName = llvm::sys::path::filename(File->path());
     50     if (FileName.startswith(Prefix)) {
     51       SubDirs.push_back(FileName);
     52       if (SubDirs.size() > 1)
     53         break;
     54     }
     55   }
     56   if (SubDirs.size() == 1) {
     57     auto PackagePath = Cand.Path;
     58     llvm::sys::path::append(PackagePath, SubDirs[0]);
     59     return PackagePath;
     60   }
     61   if (SubDirs.size() == 0 && Verbose) {
     62     llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path
     63                  << '\n';
     64     return {};
     65   }
     66 
     67   if (SubDirs.size() > 1 && Verbose) {
     68     llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path
     69                  << " due to multiple installations for the same version\n";
     70   }
     71   return {};
     72 }
     73 
     74 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
     75   assert(!Path.empty());
     76 
     77   const StringRef Suffix(".bc");
     78   const StringRef Suffix2(".amdgcn.bc");
     79 
     80   std::error_code EC;
     81   for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
     82        !EC && LI != LE; LI = LI.increment(EC)) {
     83     StringRef FilePath = LI->path();
     84     StringRef FileName = llvm::sys::path::filename(FilePath);
     85     if (!FileName.endswith(Suffix))
     86       continue;
     87 
     88     StringRef BaseName;
     89     if (FileName.endswith(Suffix2))
     90       BaseName = FileName.drop_back(Suffix2.size());
     91     else if (FileName.endswith(Suffix))
     92       BaseName = FileName.drop_back(Suffix.size());
     93 
     94     if (BaseName == "ocml") {
     95       OCML = FilePath;
     96     } else if (BaseName == "ockl") {
     97       OCKL = FilePath;
     98     } else if (BaseName == "opencl") {
     99       OpenCL = FilePath;
    100     } else if (BaseName == "hip") {
    101       HIP = FilePath;
    102     } else if (BaseName == "asanrtl") {
    103       AsanRTL = FilePath;
    104     } else if (BaseName == "oclc_finite_only_off") {
    105       FiniteOnly.Off = FilePath;
    106     } else if (BaseName == "oclc_finite_only_on") {
    107       FiniteOnly.On = FilePath;
    108     } else if (BaseName == "oclc_daz_opt_on") {
    109       DenormalsAreZero.On = FilePath;
    110     } else if (BaseName == "oclc_daz_opt_off") {
    111       DenormalsAreZero.Off = FilePath;
    112     } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
    113       CorrectlyRoundedSqrt.On = FilePath;
    114     } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
    115       CorrectlyRoundedSqrt.Off = FilePath;
    116     } else if (BaseName == "oclc_unsafe_math_on") {
    117       UnsafeMath.On = FilePath;
    118     } else if (BaseName == "oclc_unsafe_math_off") {
    119       UnsafeMath.Off = FilePath;
    120     } else if (BaseName == "oclc_wavefrontsize64_on") {
    121       WavefrontSize64.On = FilePath;
    122     } else if (BaseName == "oclc_wavefrontsize64_off") {
    123       WavefrontSize64.Off = FilePath;
    124     } else {
    125       // Process all bitcode filenames that look like
    126       // ocl_isa_version_XXX.amdgcn.bc
    127       const StringRef DeviceLibPrefix = "oclc_isa_version_";
    128       if (!BaseName.startswith(DeviceLibPrefix))
    129         continue;
    130 
    131       StringRef IsaVersionNumber =
    132         BaseName.drop_front(DeviceLibPrefix.size());
    133 
    134       llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
    135       SmallString<8> Tmp;
    136       LibDeviceMap.insert(
    137         std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
    138     }
    139   }
    140 }
    141 
    142 // Parse and extract version numbers from `.hipVersion`. Return `true` if
    143 // the parsing fails.
    144 bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
    145   SmallVector<StringRef, 4> VersionParts;
    146   V.split(VersionParts, '\n');
    147   unsigned Major = ~0U;
    148   unsigned Minor = ~0U;
    149   for (auto Part : VersionParts) {
    150     auto Splits = Part.rtrim().split('=');
    151     if (Splits.first == "HIP_VERSION_MAJOR") {
    152       if (Splits.second.getAsInteger(0, Major))
    153         return true;
    154     } else if (Splits.first == "HIP_VERSION_MINOR") {
    155       if (Splits.second.getAsInteger(0, Minor))
    156         return true;
    157     } else if (Splits.first == "HIP_VERSION_PATCH")
    158       VersionPatch = Splits.second.str();
    159   }
    160   if (Major == ~0U || Minor == ~0U)
    161     return true;
    162   VersionMajorMinor = llvm::VersionTuple(Major, Minor);
    163   DetectedVersion =
    164       (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
    165   return false;
    166 }
    167 
    168 /// \returns a list of candidate directories for ROCm installation, which is
    169 /// cached and populated only once.
    170 const SmallVectorImpl<RocmInstallationDetector::Candidate> &
    171 RocmInstallationDetector::getInstallationPathCandidates() {
    172 
    173   // Return the cached candidate list if it has already been populated.
    174   if (!ROCmSearchDirs.empty())
    175     return ROCmSearchDirs;
    176 
    177   auto DoPrintROCmSearchDirs = [&]() {
    178     if (PrintROCmSearchDirs)
    179       for (auto Cand : ROCmSearchDirs) {
    180         llvm::errs() << "ROCm installation search path";
    181         if (Cand.isSPACK())
    182           llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")";
    183         llvm::errs() << ": " << Cand.Path << '\n';
    184       }
    185   };
    186 
    187   // For candidate specified by --rocm-path we do not do strict check, i.e.,
    188   // checking existence of HIP version file and device library files.
    189   if (!RocmPathArg.empty()) {
    190     ROCmSearchDirs.emplace_back(RocmPathArg.str());
    191     DoPrintROCmSearchDirs();
    192     return ROCmSearchDirs;
    193   } else if (const char *RocmPathEnv = ::getenv("ROCM_PATH")) {
    194     if (!StringRef(RocmPathEnv).empty()) {
    195       ROCmSearchDirs.emplace_back(RocmPathEnv);
    196       DoPrintROCmSearchDirs();
    197       return ROCmSearchDirs;
    198     }
    199   }
    200 
    201   // Try to find relative to the compiler binary.
    202   const char *InstallDir = D.getInstalledDir();
    203 
    204   // Check both a normal Unix prefix position of the clang binary, as well as
    205   // the Windows-esque layout the ROCm packages use with the host architecture
    206   // subdirectory of bin.
    207   auto DeduceROCmPath = [](StringRef ClangPath) {
    208     // Strip off directory (usually bin)
    209     StringRef ParentDir = llvm::sys::path::parent_path(ClangPath);
    210     StringRef ParentName = llvm::sys::path::filename(ParentDir);
    211 
    212     // Some builds use bin/{host arch}, so go up again.
    213     if (ParentName == "bin") {
    214       ParentDir = llvm::sys::path::parent_path(ParentDir);
    215       ParentName = llvm::sys::path::filename(ParentDir);
    216     }
    217 
    218     // Detect ROCm packages built with SPACK.
    219     // clang is installed at
    220     // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
    221     // We only consider the parent directory of llvm-amdgpu package as ROCm
    222     // installation candidate for SPACK.
    223     if (ParentName.startswith("llvm-amdgpu-")) {
    224       auto SPACKPostfix =
    225           ParentName.drop_front(strlen("llvm-amdgpu-")).split('-');
    226       auto SPACKReleaseStr = SPACKPostfix.first;
    227       if (!SPACKReleaseStr.empty()) {
    228         ParentDir = llvm::sys::path::parent_path(ParentDir);
    229         return Candidate(ParentDir.str(), /*StrictChecking=*/true,
    230                          SPACKReleaseStr);
    231       }
    232     }
    233 
    234     // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
    235     // Some versions of the aomp package install to /opt/rocm/aomp/bin
    236     if (ParentName == "llvm" || ParentName.startswith("aomp"))
    237       ParentDir = llvm::sys::path::parent_path(ParentDir);
    238 
    239     return Candidate(ParentDir.str(), /*StrictChecking=*/true);
    240   };
    241 
    242   // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
    243   // link of clang itself.
    244   ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir));
    245 
    246   // Deduce ROCm path by the real path of the invoked clang, resolving symbolic
    247   // link of clang itself.
    248   llvm::SmallString<256> RealClangPath;
    249   llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath);
    250   auto ParentPath = llvm::sys::path::parent_path(RealClangPath);
    251   if (ParentPath != InstallDir)
    252     ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath));
    253 
    254   // Device library may be installed in clang resource directory.
    255   ROCmSearchDirs.emplace_back(D.ResourceDir,
    256                               /*StrictChecking=*/true);
    257 
    258   ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm",
    259                               /*StrictChecking=*/true);
    260 
    261   // Find the latest /opt/rocm-{release} directory.
    262   std::error_code EC;
    263   std::string LatestROCm;
    264   llvm::VersionTuple LatestVer;
    265   // Get ROCm version from ROCm directory name.
    266   auto GetROCmVersion = [](StringRef DirName) {
    267     llvm::VersionTuple V;
    268     std::string VerStr = DirName.drop_front(strlen("rocm-")).str();
    269     // The ROCm directory name follows the format of
    270     // rocm-{major}.{minor}.{subMinor}[-{build}]
    271     std::replace(VerStr.begin(), VerStr.end(), '-', '.');
    272     V.tryParse(VerStr);
    273     return V;
    274   };
    275   for (llvm::vfs::directory_iterator
    276            File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC),
    277            FileEnd;
    278        File != FileEnd && !EC; File.increment(EC)) {
    279     llvm::StringRef FileName = llvm::sys::path::filename(File->path());
    280     if (!FileName.startswith("rocm-"))
    281       continue;
    282     if (LatestROCm.empty()) {
    283       LatestROCm = FileName.str();
    284       LatestVer = GetROCmVersion(LatestROCm);
    285       continue;
    286     }
    287     auto Ver = GetROCmVersion(FileName);
    288     if (LatestVer < Ver) {
    289       LatestROCm = FileName.str();
    290       LatestVer = Ver;
    291     }
    292   }
    293   if (!LatestROCm.empty())
    294     ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm,
    295                                 /*StrictChecking=*/true);
    296 
    297   DoPrintROCmSearchDirs();
    298   return ROCmSearchDirs;
    299 }
    300 
    301 RocmInstallationDetector::RocmInstallationDetector(
    302     const Driver &D, const llvm::Triple &HostTriple,
    303     const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
    304     : D(D) {
    305   Verbose = Args.hasArg(options::OPT_v);
    306   RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
    307   PrintROCmSearchDirs =
    308       Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs);
    309   RocmDeviceLibPathArg =
    310       Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
    311   HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
    312   if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
    313     HIPVersionArg = A->getValue();
    314     unsigned Major = 0;
    315     unsigned Minor = 0;
    316     SmallVector<StringRef, 3> Parts;
    317     HIPVersionArg.split(Parts, '.');
    318     if (Parts.size())
    319       Parts[0].getAsInteger(0, Major);
    320     if (Parts.size() > 1)
    321       Parts[1].getAsInteger(0, Minor);
    322     if (Parts.size() > 2)
    323       VersionPatch = Parts[2].str();
    324     if (VersionPatch.empty())
    325       VersionPatch = "0";
    326     if (Major == 0 || Minor == 0)
    327       D.Diag(diag::err_drv_invalid_value)
    328           << A->getAsString(Args) << HIPVersionArg;
    329 
    330     VersionMajorMinor = llvm::VersionTuple(Major, Minor);
    331     DetectedVersion =
    332         (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
    333   } else {
    334     VersionPatch = DefaultVersionPatch;
    335     VersionMajorMinor =
    336         llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
    337     DetectedVersion = (Twine(DefaultVersionMajor) + "." +
    338                        Twine(DefaultVersionMinor) + "." + VersionPatch)
    339                           .str();
    340   }
    341 
    342   if (DetectHIPRuntime)
    343     detectHIPRuntime();
    344   if (DetectDeviceLib)
    345     detectDeviceLibrary();
    346 }
    347 
    348 void RocmInstallationDetector::detectDeviceLibrary() {
    349   assert(LibDevicePath.empty());
    350 
    351   if (!RocmDeviceLibPathArg.empty())
    352     LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
    353   else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
    354     LibDevicePath = LibPathEnv;
    355 
    356   auto &FS = D.getVFS();
    357   if (!LibDevicePath.empty()) {
    358     // Maintain compatability with HIP flag/envvar pointing directly at the
    359     // bitcode library directory. This points directly at the library path instead
    360     // of the rocm root installation.
    361     if (!FS.exists(LibDevicePath))
    362       return;
    363 
    364     scanLibDevicePath(LibDevicePath);
    365     HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
    366     return;
    367   }
    368 
    369   // The install path situation in old versions of ROCm is a real mess, and
    370   // use a different install layout. Multiple copies of the device libraries
    371   // exist for each frontend project, and differ depending on which build
    372   // system produced the packages. Standalone OpenCL builds also have a
    373   // different directory structure from the ROCm OpenCL package.
    374   auto &ROCmDirs = getInstallationPathCandidates();
    375   for (const auto &Candidate : ROCmDirs) {
    376     auto CandidatePath = Candidate.Path;
    377 
    378     // Check device library exists at the given path.
    379     auto CheckDeviceLib = [&](StringRef Path) {
    380       bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
    381       if (CheckLibDevice && !FS.exists(Path))
    382         return false;
    383 
    384       scanLibDevicePath(Path);
    385 
    386       if (!NoBuiltinLibs) {
    387         // Check that the required non-target libraries are all available.
    388         if (!allGenericLibsValid())
    389           return false;
    390 
    391         // Check that we have found at least one libdevice that we can link in
    392         // if -nobuiltinlib hasn't been specified.
    393         if (LibDeviceMap.empty())
    394           return false;
    395       }
    396       return true;
    397     };
    398 
    399     // The possible structures are:
    400     // - ${ROCM_ROOT}/amdgcn/bitcode/*
    401     // - ${ROCM_ROOT}/lib/*
    402     // - ${ROCM_ROOT}/lib/bitcode/*
    403     // so try to detect these layouts.
    404     static constexpr std::array<const char *, 2> SubDirsList[] = {
    405         {"amdgcn", "bitcode"},
    406         {"lib", ""},
    407         {"lib", "bitcode"},
    408     };
    409 
    410     // Make a path by appending sub-directories to InstallPath.
    411     auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
    412       // Device library built by SPACK is installed to
    413       // <rocm_root>/rocm-device-libs-<rocm_release_string>-<hash> directory.
    414       auto SPACKPath = findSPACKPackage(Candidate, "rocm-device-libs");
    415       auto Path = SPACKPath.empty() ? CandidatePath : SPACKPath;
    416       for (auto SubDir : SubDirs)
    417         llvm::sys::path::append(Path, SubDir);
    418       return Path;
    419     };
    420 
    421     for (auto SubDirs : SubDirsList) {
    422       LibDevicePath = MakePath(SubDirs);
    423       HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
    424       if (HasDeviceLibrary)
    425         return;
    426     }
    427   }
    428 }
    429 
    430 void RocmInstallationDetector::detectHIPRuntime() {
    431   SmallVector<Candidate, 4> HIPSearchDirs;
    432   if (!HIPPathArg.empty())
    433     HIPSearchDirs.emplace_back(HIPPathArg.str(), /*StrictChecking=*/true);
    434   else
    435     HIPSearchDirs.append(getInstallationPathCandidates());
    436   auto &FS = D.getVFS();
    437 
    438   for (const auto &Candidate : HIPSearchDirs) {
    439     InstallPath = Candidate.Path;
    440     if (InstallPath.empty() || !FS.exists(InstallPath))
    441       continue;
    442     // HIP runtime built by SPACK is installed to
    443     // <rocm_root>/hip-<rocm_release_string>-<hash> directory.
    444     auto SPACKPath = findSPACKPackage(Candidate, "hip");
    445     InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath;
    446 
    447     BinPath = InstallPath;
    448     llvm::sys::path::append(BinPath, "bin");
    449     IncludePath = InstallPath;
    450     llvm::sys::path::append(IncludePath, "include");
    451     LibPath = InstallPath;
    452     llvm::sys::path::append(LibPath, "lib");
    453 
    454     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
    455         FS.getBufferForFile(BinPath + "/.hipVersion");
    456     if (!VersionFile && Candidate.StrictChecking)
    457       continue;
    458 
    459     if (HIPVersionArg.empty() && VersionFile)
    460       if (parseHIPVersionFile((*VersionFile)->getBuffer()))
    461         continue;
    462 
    463     HasHIPRuntime = true;
    464     return;
    465   }
    466   HasHIPRuntime = false;
    467 }
    468 
    469 void RocmInstallationDetector::print(raw_ostream &OS) const {
    470   if (hasHIPRuntime())
    471     OS << "Found HIP installation: " << InstallPath << ", version "
    472        << DetectedVersion << '\n';
    473 }
    474 
    475 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
    476                                                  ArgStringList &CC1Args) const {
    477   bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
    478 
    479   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
    480     // HIP header includes standard library wrapper headers under clang
    481     // cuda_wrappers directory. Since these wrapper headers include_next
    482     // standard C++ headers, whereas libc++ headers include_next other clang
    483     // headers. The include paths have to follow this order:
    484     // - wrapper include path
    485     // - standard C++ include path
    486     // - other clang include path
    487     // Since standard C++ and other clang include paths are added in other
    488     // places after this function, here we only need to make sure wrapper
    489     // include path is added.
    490     //
    491     // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
    492     // a workaround.
    493     SmallString<128> P(D.ResourceDir);
    494     if (UsesRuntimeWrapper)
    495       llvm::sys::path::append(P, "include", "cuda_wrappers");
    496     CC1Args.push_back("-internal-isystem");
    497     CC1Args.push_back(DriverArgs.MakeArgString(P));
    498   }
    499 
    500   if (DriverArgs.hasArg(options::OPT_nogpuinc))
    501     return;
    502 
    503   if (!hasHIPRuntime()) {
    504     D.Diag(diag::err_drv_no_hip_runtime);
    505     return;
    506   }
    507 
    508   CC1Args.push_back("-internal-isystem");
    509   CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
    510   if (UsesRuntimeWrapper)
    511     CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
    512 }
    513 
    514 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
    515                                   const InputInfo &Output,
    516                                   const InputInfoList &Inputs,
    517                                   const ArgList &Args,
    518                                   const char *LinkingOutput) const {
    519 
    520   std::string Linker = getToolChain().GetProgramPath(getShortName());
    521   ArgStringList CmdArgs;
    522   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
    523   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
    524   CmdArgs.push_back("-shared");
    525   CmdArgs.push_back("-o");
    526   CmdArgs.push_back(Output.getFilename());
    527   C.addCommand(std::make_unique<Command>(
    528       JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
    529       CmdArgs, Inputs, Output));
    530 }
    531 
    532 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
    533                                      const llvm::Triple &Triple,
    534                                      const llvm::opt::ArgList &Args,
    535                                      std::vector<StringRef> &Features) {
    536   // Add target ID features to -target-feature options. No diagnostics should
    537   // be emitted here since invalid target ID is diagnosed at other places.
    538   StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
    539   if (!TargetID.empty()) {
    540     llvm::StringMap<bool> FeatureMap;
    541     auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
    542     if (OptionalGpuArch) {
    543       StringRef GpuArch = OptionalGpuArch.getValue();
    544       // Iterate through all possible target ID features for the given GPU.
    545       // If it is mapped to true, add +feature.
    546       // If it is mapped to false, add -feature.
    547       // If it is not in the map (default), do not add it
    548       for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
    549         auto Pos = FeatureMap.find(Feature);
    550         if (Pos == FeatureMap.end())
    551           continue;
    552         Features.push_back(Args.MakeArgStringRef(
    553             (Twine(Pos->second ? "+" : "-") + Feature).str()));
    554       }
    555     }
    556   }
    557 
    558   if (Args.hasFlag(options::OPT_mwavefrontsize64,
    559                    options::OPT_mno_wavefrontsize64, false))
    560     Features.push_back("+wavefrontsize64");
    561 
    562   handleTargetFeaturesGroup(
    563     Args, Features, options::OPT_m_amdgpu_Features_Group);
    564 }
    565 
    566 /// AMDGPU Toolchain
    567 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
    568                                  const ArgList &Args)
    569     : Generic_ELF(D, Triple, Args),
    570       OptionsDefault(
    571           {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
    572   // Check code object version options. Emit warnings for legacy options
    573   // and errors for the last invalid code object version options.
    574   // It is done here to avoid repeated warning or error messages for
    575   // each tool invocation.
    576   checkAMDGPUCodeObjectVersion(D, Args);
    577 }
    578 
    579 Tool *AMDGPUToolChain::buildLinker() const {
    580   return new tools::amdgpu::Linker(*this);
    581 }
    582 
    583 DerivedArgList *
    584 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
    585                                Action::OffloadKind DeviceOffloadKind) const {
    586 
    587   DerivedArgList *DAL =
    588       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
    589 
    590   const OptTable &Opts = getDriver().getOpts();
    591 
    592   if (!DAL)
    593     DAL = new DerivedArgList(Args.getBaseArgs());
    594 
    595   for (Arg *A : Args) {
    596     if (!shouldSkipArgument(A))
    597       DAL->append(A);
    598   }
    599 
    600   checkTargetID(*DAL);
    601 
    602   if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
    603     return DAL;
    604 
    605   // Phase 1 (.cl -> .bc)
    606   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
    607     DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
    608                                                 ? options::OPT_m64
    609                                                 : options::OPT_m32));
    610 
    611     // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
    612     // as they defined that way in Options.td
    613     if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
    614                      options::OPT_Ofast))
    615       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
    616                         getOptionDefault(options::OPT_O));
    617   }
    618 
    619   return DAL;
    620 }
    621 
    622 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
    623     llvm::AMDGPU::GPUKind Kind) {
    624 
    625   // Assume nothing without a specific target.
    626   if (Kind == llvm::AMDGPU::GK_NONE)
    627     return false;
    628 
    629   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
    630 
    631   // Default to enabling f32 denormals by default on subtargets where fma is
    632   // fast with denormals
    633   const bool BothDenormAndFMAFast =
    634       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
    635       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
    636   return !BothDenormAndFMAFast;
    637 }
    638 
    639 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
    640     const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
    641     const llvm::fltSemantics *FPType) const {
    642   // Denormals should always be enabled for f16 and f64.
    643   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
    644     return llvm::DenormalMode::getIEEE();
    645 
    646   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
    647       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
    648     auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
    649     auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
    650     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
    651         DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
    652                            options::OPT_fno_gpu_flush_denormals_to_zero,
    653                            getDefaultDenormsAreZeroForTarget(Kind)))
    654       return llvm::DenormalMode::getPreserveSign();
    655 
    656     return llvm::DenormalMode::getIEEE();
    657   }
    658 
    659   const StringRef GpuArch = getGPUArch(DriverArgs);
    660   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
    661 
    662   // TODO: There are way too many flags that change this. Do we need to check
    663   // them all?
    664   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
    665              getDefaultDenormsAreZeroForTarget(Kind);
    666 
    667   // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
    668   // also implicit treated as zero (DAZ).
    669   return DAZ ? llvm::DenormalMode::getPreserveSign() :
    670                llvm::DenormalMode::getIEEE();
    671 }
    672 
    673 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
    674                                llvm::AMDGPU::GPUKind Kind) {
    675   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
    676   bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
    677 
    678   return !HasWave32 || DriverArgs.hasFlag(
    679     options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
    680 }
    681 
    682 
    683 /// ROCM Toolchain
    684 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
    685                              const ArgList &Args)
    686     : AMDGPUToolChain(D, Triple, Args) {
    687   RocmInstallation.detectDeviceLibrary();
    688 }
    689 
    690 void AMDGPUToolChain::addClangTargetOptions(
    691     const llvm::opt::ArgList &DriverArgs,
    692     llvm::opt::ArgStringList &CC1Args,
    693     Action::OffloadKind DeviceOffloadingKind) const {
    694   // Default to "hidden" visibility, as object level linking will not be
    695   // supported for the foreseeable future.
    696   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
    697                          options::OPT_fvisibility_ms_compat)) {
    698     CC1Args.push_back("-fvisibility");
    699     CC1Args.push_back("hidden");
    700     CC1Args.push_back("-fapply-global-visibility-to-externs");
    701   }
    702 }
    703 
    704 StringRef
    705 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
    706   return getProcessorFromTargetID(
    707       getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
    708 }
    709 
    710 void AMDGPUToolChain::checkTargetID(
    711     const llvm::opt::ArgList &DriverArgs) const {
    712   StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
    713   if (TargetID.empty())
    714     return;
    715 
    716   llvm::StringMap<bool> FeatureMap;
    717   auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
    718   if (!OptionalGpuArch) {
    719     getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
    720   }
    721 }
    722 
    723 llvm::Error
    724 AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
    725                                   SmallVector<std::string, 1> &GPUArchs) const {
    726   std::string Program;
    727   if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
    728     Program = A->getValue();
    729   else
    730     Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
    731   llvm::SmallString<64> OutputFile;
    732   llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
    733                                      OutputFile);
    734   llvm::FileRemover OutputRemover(OutputFile.c_str());
    735   llvm::Optional<llvm::StringRef> Redirects[] = {
    736       {""},
    737       StringRef(OutputFile),
    738       {""},
    739   };
    740 
    741   std::string ErrorMessage;
    742   if (int Result = llvm::sys::ExecuteAndWait(
    743           Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0,
    744           /*MemoryLimit*/ 0, &ErrorMessage)) {
    745     if (Result > 0) {
    746       ErrorMessage = "Exited with error code " + std::to_string(Result);
    747     } else if (Result == -1) {
    748       ErrorMessage = "Execute failed: " + ErrorMessage;
    749     } else {
    750       ErrorMessage = "Crashed: " + ErrorMessage;
    751     }
    752 
    753     return llvm::createStringError(std::error_code(),
    754                                    Program + ": " + ErrorMessage);
    755   }
    756 
    757   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
    758       llvm::MemoryBuffer::getFile(OutputFile.c_str());
    759   if (!OutputBuf) {
    760     return llvm::createStringError(OutputBuf.getError(),
    761                                    "Failed to read stdout of " + Program +
    762                                        ": " + OutputBuf.getError().message());
    763   }
    764 
    765   for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
    766     GPUArchs.push_back(LineIt->str());
    767   }
    768   return llvm::Error::success();
    769 }
    770 
    771 llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
    772                                               std::string &GPUArch) const {
    773   // detect the AMDGPU installed in system
    774   SmallVector<std::string, 1> GPUArchs;
    775   auto Err = detectSystemGPUs(Args, GPUArchs);
    776   if (Err) {
    777     return Err;
    778   }
    779   if (GPUArchs.empty()) {
    780     return llvm::createStringError(std::error_code(),
    781                                    "No AMD GPU detected in the system");
    782   }
    783   GPUArch = GPUArchs[0];
    784   if (GPUArchs.size() > 1) {
    785     bool AllSame = std::all_of(
    786         GPUArchs.begin(), GPUArchs.end(),
    787         [&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); });
    788     if (!AllSame)
    789       return llvm::createStringError(
    790           std::error_code(), "Multiple AMD GPUs found with different archs");
    791   }
    792   return llvm::Error::success();
    793 }
    794 
    795 void ROCMToolChain::addClangTargetOptions(
    796     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
    797     Action::OffloadKind DeviceOffloadingKind) const {
    798   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
    799                                          DeviceOffloadingKind);
    800 
    801   // For the OpenCL case where there is no offload target, accept -nostdlib to
    802   // disable bitcode linking.
    803   if (DeviceOffloadingKind == Action::OFK_None &&
    804       DriverArgs.hasArg(options::OPT_nostdlib))
    805     return;
    806 
    807   if (DriverArgs.hasArg(options::OPT_nogpulib))
    808     return;
    809 
    810   if (!RocmInstallation.hasDeviceLibrary()) {
    811     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
    812     return;
    813   }
    814 
    815   // Get the device name and canonicalize it
    816   const StringRef GpuArch = getGPUArch(DriverArgs);
    817   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
    818   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
    819   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
    820   if (LibDeviceFile.empty()) {
    821     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
    822     return;
    823   }
    824 
    825   bool Wave64 = isWave64(DriverArgs, Kind);
    826 
    827   // TODO: There are way too many flags that change this. Do we need to check
    828   // them all?
    829   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
    830              getDefaultDenormsAreZeroForTarget(Kind);
    831   bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
    832 
    833   bool UnsafeMathOpt =
    834       DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
    835   bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
    836   bool CorrectSqrt =
    837       DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
    838 
    839   // Add the OpenCL specific bitcode library.
    840   llvm::SmallVector<std::string, 12> BCLibs;
    841   BCLibs.push_back(RocmInstallation.getOpenCLPath().str());
    842 
    843   // Add the generic set of libraries.
    844   BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
    845       DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
    846       FastRelaxedMath, CorrectSqrt));
    847 
    848   llvm::for_each(BCLibs, [&](StringRef BCFile) {
    849     CC1Args.push_back("-mlink-builtin-bitcode");
    850     CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
    851   });
    852 }
    853 
    854 llvm::SmallVector<std::string, 12>
    855 RocmInstallationDetector::getCommonBitcodeLibs(
    856     const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
    857     bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
    858     bool CorrectSqrt) const {
    859 
    860   llvm::SmallVector<std::string, 12> BCLibs;
    861 
    862   auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); };
    863 
    864   AddBCLib(getOCMLPath());
    865   AddBCLib(getOCKLPath());
    866   AddBCLib(getDenormalsAreZeroPath(DAZ));
    867   AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
    868   AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
    869   AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
    870   AddBCLib(getWavefrontSize64Path(Wave64));
    871   AddBCLib(LibDeviceFile);
    872 
    873   return BCLibs;
    874 }
    875 
    876 bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
    877   Option O = A->getOption();
    878   if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
    879     return true;
    880   return false;
    881 }
    882