Home | History | Annotate | Line # | Download | only in Targets
      1      1.1  joerg //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
      2      1.1  joerg //
      3      1.1  joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4      1.1  joerg // See https://llvm.org/LICENSE.txt for license information.
      5      1.1  joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6      1.1  joerg //
      7      1.1  joerg //===----------------------------------------------------------------------===//
      8      1.1  joerg //
      9      1.1  joerg // This file implements AMDGPU TargetInfo objects.
     10      1.1  joerg //
     11      1.1  joerg //===----------------------------------------------------------------------===//
     12      1.1  joerg 
     13      1.1  joerg #include "AMDGPU.h"
     14      1.1  joerg #include "clang/Basic/Builtins.h"
     15      1.1  joerg #include "clang/Basic/CodeGenOptions.h"
     16      1.1  joerg #include "clang/Basic/LangOptions.h"
     17      1.1  joerg #include "clang/Basic/MacroBuilder.h"
     18      1.1  joerg #include "clang/Basic/TargetBuiltins.h"
     19      1.1  joerg #include "llvm/ADT/StringSwitch.h"
     20  1.1.1.2  joerg #include "llvm/Frontend/OpenMP/OMPGridValues.h"
     21      1.1  joerg 
     22      1.1  joerg using namespace clang;
     23      1.1  joerg using namespace clang::targets;
     24      1.1  joerg 
     25      1.1  joerg namespace clang {
     26      1.1  joerg namespace targets {
     27      1.1  joerg 
     28      1.1  joerg // If you edit the description strings, make sure you update
     29      1.1  joerg // getPointerWidthV().
     30      1.1  joerg 
     31      1.1  joerg static const char *const DataLayoutStringR600 =
     32      1.1  joerg     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
     33  1.1.1.2  joerg     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
     34      1.1  joerg 
     35      1.1  joerg static const char *const DataLayoutStringAMDGCN =
     36      1.1  joerg     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
     37      1.1  joerg     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
     38  1.1.1.2  joerg     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
     39      1.1  joerg     "-ni:7";
     40      1.1  joerg 
     41      1.1  joerg const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
     42      1.1  joerg     Generic,  // Default
     43      1.1  joerg     Global,   // opencl_global
     44      1.1  joerg     Local,    // opencl_local
     45      1.1  joerg     Constant, // opencl_constant
     46      1.1  joerg     Private,  // opencl_private
     47      1.1  joerg     Generic,  // opencl_generic
     48  1.1.1.2  joerg     Global,   // opencl_global_device
     49  1.1.1.2  joerg     Global,   // opencl_global_host
     50      1.1  joerg     Global,   // cuda_device
     51      1.1  joerg     Constant, // cuda_constant
     52  1.1.1.2  joerg     Local,    // cuda_shared
     53  1.1.1.2  joerg     Global,   // sycl_global
     54  1.1.1.2  joerg     Global,   // sycl_global_device
     55  1.1.1.2  joerg     Global,   // sycl_global_host
     56  1.1.1.2  joerg     Local,    // sycl_local
     57  1.1.1.2  joerg     Private,  // sycl_private
     58  1.1.1.2  joerg     Generic,  // ptr32_sptr
     59  1.1.1.2  joerg     Generic,  // ptr32_uptr
     60  1.1.1.2  joerg     Generic   // ptr64
     61      1.1  joerg };
     62      1.1  joerg 
     63      1.1  joerg const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
     64      1.1  joerg     Private,  // Default
     65      1.1  joerg     Global,   // opencl_global
     66      1.1  joerg     Local,    // opencl_local
     67      1.1  joerg     Constant, // opencl_constant
     68      1.1  joerg     Private,  // opencl_private
     69      1.1  joerg     Generic,  // opencl_generic
     70  1.1.1.2  joerg     Global,   // opencl_global_device
     71  1.1.1.2  joerg     Global,   // opencl_global_host
     72      1.1  joerg     Global,   // cuda_device
     73      1.1  joerg     Constant, // cuda_constant
     74  1.1.1.2  joerg     Local,    // cuda_shared
     75  1.1.1.2  joerg     // SYCL address space values for this map are dummy
     76  1.1.1.2  joerg     Generic,  // sycl_global
     77  1.1.1.2  joerg     Generic,  // sycl_global_device
     78  1.1.1.2  joerg     Generic,  // sycl_global_host
     79  1.1.1.2  joerg     Generic,  // sycl_local
     80  1.1.1.2  joerg     Generic,  // sycl_private
     81  1.1.1.2  joerg     Generic,  // ptr32_sptr
     82  1.1.1.2  joerg     Generic,  // ptr32_uptr
     83  1.1.1.2  joerg     Generic   // ptr64
     84  1.1.1.2  joerg 
     85      1.1  joerg };
     86      1.1  joerg } // namespace targets
     87      1.1  joerg } // namespace clang
     88      1.1  joerg 
     89      1.1  joerg const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
     90      1.1  joerg #define BUILTIN(ID, TYPE, ATTRS)                                               \
     91      1.1  joerg   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
     92      1.1  joerg #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
     93      1.1  joerg   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
     94      1.1  joerg #include "clang/Basic/BuiltinsAMDGPU.def"
     95      1.1  joerg };
     96      1.1  joerg 
     97      1.1  joerg const char *const AMDGPUTargetInfo::GCCRegNames[] = {
     98      1.1  joerg   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
     99      1.1  joerg   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
    100      1.1  joerg   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
    101      1.1  joerg   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
    102      1.1  joerg   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
    103      1.1  joerg   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
    104      1.1  joerg   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
    105      1.1  joerg   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
    106      1.1  joerg   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
    107      1.1  joerg   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
    108      1.1  joerg   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
    109      1.1  joerg   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
    110      1.1  joerg   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
    111      1.1  joerg   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
    112      1.1  joerg   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
    113      1.1  joerg   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
    114      1.1  joerg   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
    115      1.1  joerg   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
    116      1.1  joerg   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
    117      1.1  joerg   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
    118      1.1  joerg   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
    119      1.1  joerg   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
    120      1.1  joerg   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
    121      1.1  joerg   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
    122      1.1  joerg   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
    123      1.1  joerg   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
    124      1.1  joerg   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
    125      1.1  joerg   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
    126      1.1  joerg   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
    127      1.1  joerg   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
    128      1.1  joerg   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
    129      1.1  joerg   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
    130      1.1  joerg   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
    131      1.1  joerg   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
    132      1.1  joerg   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
    133      1.1  joerg   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
    134      1.1  joerg   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
    135      1.1  joerg   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
    136      1.1  joerg   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
    137      1.1  joerg   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
    138      1.1  joerg   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
    139      1.1  joerg   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
    140      1.1  joerg   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
    141      1.1  joerg   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
    142  1.1.1.2  joerg   "flat_scratch_lo", "flat_scratch_hi",
    143  1.1.1.2  joerg   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
    144  1.1.1.2  joerg   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
    145  1.1.1.2  joerg   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
    146  1.1.1.2  joerg   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
    147  1.1.1.2  joerg   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
    148  1.1.1.2  joerg   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
    149  1.1.1.2  joerg   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
    150  1.1.1.2  joerg   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
    151  1.1.1.2  joerg   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
    152  1.1.1.2  joerg   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
    153  1.1.1.2  joerg   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
    154  1.1.1.2  joerg   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
    155  1.1.1.2  joerg   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
    156  1.1.1.2  joerg   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
    157  1.1.1.2  joerg   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
    158  1.1.1.2  joerg   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
    159  1.1.1.2  joerg   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
    160  1.1.1.2  joerg   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
    161  1.1.1.2  joerg   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
    162  1.1.1.2  joerg   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
    163  1.1.1.2  joerg   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
    164  1.1.1.2  joerg   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
    165  1.1.1.2  joerg   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
    166  1.1.1.2  joerg   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
    167  1.1.1.2  joerg   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
    168  1.1.1.2  joerg   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
    169  1.1.1.2  joerg   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
    170  1.1.1.2  joerg   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
    171  1.1.1.2  joerg   "a252", "a253", "a254", "a255"
    172      1.1  joerg };
    173      1.1  joerg 
    174      1.1  joerg ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
    175      1.1  joerg   return llvm::makeArrayRef(GCCRegNames);
    176      1.1  joerg }
    177      1.1  joerg 
    178      1.1  joerg bool AMDGPUTargetInfo::initFeatureMap(
    179      1.1  joerg     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
    180      1.1  joerg     const std::vector<std::string> &FeatureVec) const {
    181      1.1  joerg 
    182      1.1  joerg   using namespace llvm::AMDGPU;
    183      1.1  joerg 
    184      1.1  joerg   // XXX - What does the member GPU mean if device name string passed here?
    185      1.1  joerg   if (isAMDGCN(getTriple())) {
    186      1.1  joerg     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
    187  1.1.1.2  joerg     case GK_GFX1034:
    188  1.1.1.2  joerg     case GK_GFX1033:
    189  1.1.1.2  joerg     case GK_GFX1032:
    190  1.1.1.2  joerg     case GK_GFX1031:
    191  1.1.1.2  joerg     case GK_GFX1030:
    192  1.1.1.2  joerg       Features["ci-insts"] = true;
    193  1.1.1.2  joerg       Features["dot1-insts"] = true;
    194  1.1.1.2  joerg       Features["dot2-insts"] = true;
    195  1.1.1.2  joerg       Features["dot5-insts"] = true;
    196  1.1.1.2  joerg       Features["dot6-insts"] = true;
    197  1.1.1.2  joerg       Features["dot7-insts"] = true;
    198  1.1.1.2  joerg       Features["dl-insts"] = true;
    199  1.1.1.2  joerg       Features["flat-address-space"] = true;
    200  1.1.1.2  joerg       Features["16-bit-insts"] = true;
    201  1.1.1.2  joerg       Features["dpp"] = true;
    202  1.1.1.2  joerg       Features["gfx8-insts"] = true;
    203  1.1.1.2  joerg       Features["gfx9-insts"] = true;
    204  1.1.1.2  joerg       Features["gfx10-insts"] = true;
    205  1.1.1.2  joerg       Features["gfx10-3-insts"] = true;
    206  1.1.1.2  joerg       Features["s-memrealtime"] = true;
    207  1.1.1.2  joerg       Features["s-memtime-inst"] = true;
    208  1.1.1.2  joerg       break;
    209      1.1  joerg     case GK_GFX1012:
    210      1.1  joerg     case GK_GFX1011:
    211      1.1  joerg       Features["dot1-insts"] = true;
    212      1.1  joerg       Features["dot2-insts"] = true;
    213      1.1  joerg       Features["dot5-insts"] = true;
    214      1.1  joerg       Features["dot6-insts"] = true;
    215  1.1.1.2  joerg       Features["dot7-insts"] = true;
    216      1.1  joerg       LLVM_FALLTHROUGH;
    217      1.1  joerg     case GK_GFX1010:
    218      1.1  joerg       Features["dl-insts"] = true;
    219      1.1  joerg       Features["ci-insts"] = true;
    220      1.1  joerg       Features["flat-address-space"] = true;
    221      1.1  joerg       Features["16-bit-insts"] = true;
    222      1.1  joerg       Features["dpp"] = true;
    223      1.1  joerg       Features["gfx8-insts"] = true;
    224      1.1  joerg       Features["gfx9-insts"] = true;
    225      1.1  joerg       Features["gfx10-insts"] = true;
    226      1.1  joerg       Features["s-memrealtime"] = true;
    227  1.1.1.2  joerg       Features["s-memtime-inst"] = true;
    228      1.1  joerg       break;
    229  1.1.1.2  joerg     case GK_GFX90A:
    230  1.1.1.2  joerg       Features["gfx90a-insts"] = true;
    231  1.1.1.2  joerg       LLVM_FALLTHROUGH;
    232      1.1  joerg     case GK_GFX908:
    233      1.1  joerg       Features["dot3-insts"] = true;
    234      1.1  joerg       Features["dot4-insts"] = true;
    235      1.1  joerg       Features["dot5-insts"] = true;
    236      1.1  joerg       Features["dot6-insts"] = true;
    237  1.1.1.2  joerg       Features["mai-insts"] = true;
    238      1.1  joerg       LLVM_FALLTHROUGH;
    239      1.1  joerg     case GK_GFX906:
    240      1.1  joerg       Features["dl-insts"] = true;
    241      1.1  joerg       Features["dot1-insts"] = true;
    242      1.1  joerg       Features["dot2-insts"] = true;
    243  1.1.1.2  joerg       Features["dot7-insts"] = true;
    244      1.1  joerg       LLVM_FALLTHROUGH;
    245  1.1.1.2  joerg     case GK_GFX90C:
    246      1.1  joerg     case GK_GFX909:
    247      1.1  joerg     case GK_GFX904:
    248      1.1  joerg     case GK_GFX902:
    249      1.1  joerg     case GK_GFX900:
    250      1.1  joerg       Features["gfx9-insts"] = true;
    251      1.1  joerg       LLVM_FALLTHROUGH;
    252      1.1  joerg     case GK_GFX810:
    253  1.1.1.2  joerg     case GK_GFX805:
    254      1.1  joerg     case GK_GFX803:
    255      1.1  joerg     case GK_GFX802:
    256      1.1  joerg     case GK_GFX801:
    257      1.1  joerg       Features["gfx8-insts"] = true;
    258      1.1  joerg       Features["16-bit-insts"] = true;
    259      1.1  joerg       Features["dpp"] = true;
    260      1.1  joerg       Features["s-memrealtime"] = true;
    261      1.1  joerg       LLVM_FALLTHROUGH;
    262  1.1.1.2  joerg     case GK_GFX705:
    263      1.1  joerg     case GK_GFX704:
    264      1.1  joerg     case GK_GFX703:
    265      1.1  joerg     case GK_GFX702:
    266      1.1  joerg     case GK_GFX701:
    267      1.1  joerg     case GK_GFX700:
    268      1.1  joerg       Features["ci-insts"] = true;
    269      1.1  joerg       Features["flat-address-space"] = true;
    270      1.1  joerg       LLVM_FALLTHROUGH;
    271  1.1.1.2  joerg     case GK_GFX602:
    272      1.1  joerg     case GK_GFX601:
    273      1.1  joerg     case GK_GFX600:
    274  1.1.1.2  joerg       Features["s-memtime-inst"] = true;
    275      1.1  joerg       break;
    276      1.1  joerg     case GK_NONE:
    277      1.1  joerg       break;
    278      1.1  joerg     default:
    279      1.1  joerg       llvm_unreachable("Unhandled GPU!");
    280      1.1  joerg     }
    281      1.1  joerg   } else {
    282      1.1  joerg     if (CPU.empty())
    283      1.1  joerg       CPU = "r600";
    284      1.1  joerg 
    285      1.1  joerg     switch (llvm::AMDGPU::parseArchR600(CPU)) {
    286      1.1  joerg     case GK_CAYMAN:
    287      1.1  joerg     case GK_CYPRESS:
    288      1.1  joerg     case GK_RV770:
    289      1.1  joerg     case GK_RV670:
    290      1.1  joerg       // TODO: Add fp64 when implemented.
    291      1.1  joerg       break;
    292      1.1  joerg     case GK_TURKS:
    293      1.1  joerg     case GK_CAICOS:
    294      1.1  joerg     case GK_BARTS:
    295      1.1  joerg     case GK_SUMO:
    296      1.1  joerg     case GK_REDWOOD:
    297      1.1  joerg     case GK_JUNIPER:
    298      1.1  joerg     case GK_CEDAR:
    299      1.1  joerg     case GK_RV730:
    300      1.1  joerg     case GK_RV710:
    301      1.1  joerg     case GK_RS880:
    302      1.1  joerg     case GK_R630:
    303      1.1  joerg     case GK_R600:
    304      1.1  joerg       break;
    305      1.1  joerg     default:
    306      1.1  joerg       llvm_unreachable("Unhandled GPU!");
    307      1.1  joerg     }
    308      1.1  joerg   }
    309      1.1  joerg 
    310      1.1  joerg   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
    311      1.1  joerg }
    312      1.1  joerg 
    313      1.1  joerg void AMDGPUTargetInfo::fillValidCPUList(
    314      1.1  joerg     SmallVectorImpl<StringRef> &Values) const {
    315      1.1  joerg   if (isAMDGCN(getTriple()))
    316      1.1  joerg     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
    317      1.1  joerg   else
    318      1.1  joerg     llvm::AMDGPU::fillValidArchListR600(Values);
    319      1.1  joerg }
    320      1.1  joerg 
    321      1.1  joerg void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
    322      1.1  joerg   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
    323      1.1  joerg }
    324      1.1  joerg 
    325      1.1  joerg AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
    326      1.1  joerg                                    const TargetOptions &Opts)
    327      1.1  joerg     : TargetInfo(Triple),
    328      1.1  joerg       GPUKind(isAMDGCN(Triple) ?
    329      1.1  joerg               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
    330      1.1  joerg               llvm::AMDGPU::parseArchR600(Opts.CPU)),
    331      1.1  joerg       GPUFeatures(isAMDGCN(Triple) ?
    332      1.1  joerg                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
    333      1.1  joerg                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
    334      1.1  joerg   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
    335      1.1  joerg                                         : DataLayoutStringR600);
    336  1.1.1.2  joerg   GridValues = llvm::omp::AMDGPUGpuGridValues;
    337      1.1  joerg 
    338      1.1  joerg   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
    339      1.1  joerg                      !isAMDGCN(Triple));
    340      1.1  joerg   UseAddrSpaceMapMangling = true;
    341      1.1  joerg 
    342      1.1  joerg   HasLegalHalfType = true;
    343      1.1  joerg   HasFloat16 = true;
    344  1.1.1.2  joerg   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
    345  1.1.1.2  joerg   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
    346      1.1  joerg 
    347      1.1  joerg   // Set pointer width and alignment for target address space 0.
    348  1.1.1.2  joerg   PointerWidth = PointerAlign = getPointerWidthV(Generic);
    349      1.1  joerg   if (getMaxPointerWidth() == 64) {
    350      1.1  joerg     LongWidth = LongAlign = 64;
    351      1.1  joerg     SizeType = UnsignedLong;
    352      1.1  joerg     PtrDiffType = SignedLong;
    353      1.1  joerg     IntPtrType = SignedLong;
    354      1.1  joerg   }
    355      1.1  joerg 
    356      1.1  joerg   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
    357      1.1  joerg }
    358      1.1  joerg 
    359      1.1  joerg void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
    360      1.1  joerg   TargetInfo::adjust(Opts);
    361      1.1  joerg   // ToDo: There are still a few places using default address space as private
    362      1.1  joerg   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
    363      1.1  joerg   // can be removed from the following line.
    364      1.1  joerg   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
    365      1.1  joerg                      !isAMDGCN(getTriple()));
    366      1.1  joerg }
    367      1.1  joerg 
    368      1.1  joerg ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
    369      1.1  joerg   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
    370      1.1  joerg                                              Builtin::FirstTSBuiltin);
    371      1.1  joerg }
    372      1.1  joerg 
    373      1.1  joerg void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
    374      1.1  joerg                                         MacroBuilder &Builder) const {
    375      1.1  joerg   Builder.defineMacro("__AMD__");
    376      1.1  joerg   Builder.defineMacro("__AMDGPU__");
    377      1.1  joerg 
    378      1.1  joerg   if (isAMDGCN(getTriple()))
    379      1.1  joerg     Builder.defineMacro("__AMDGCN__");
    380      1.1  joerg   else
    381      1.1  joerg     Builder.defineMacro("__R600__");
    382      1.1  joerg 
    383      1.1  joerg   if (GPUKind != llvm::AMDGPU::GK_NONE) {
    384      1.1  joerg     StringRef CanonName = isAMDGCN(getTriple()) ?
    385      1.1  joerg       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
    386      1.1  joerg     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
    387  1.1.1.2  joerg     if (isAMDGCN(getTriple())) {
    388  1.1.1.2  joerg       Builder.defineMacro("__amdgcn_processor__",
    389  1.1.1.2  joerg                           Twine("\"") + Twine(CanonName) + Twine("\""));
    390  1.1.1.2  joerg       Builder.defineMacro("__amdgcn_target_id__",
    391  1.1.1.2  joerg                           Twine("\"") + Twine(getTargetID().getValue()) +
    392  1.1.1.2  joerg                               Twine("\""));
    393  1.1.1.2  joerg       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
    394  1.1.1.2  joerg         auto Loc = OffloadArchFeatures.find(F);
    395  1.1.1.2  joerg         if (Loc != OffloadArchFeatures.end()) {
    396  1.1.1.2  joerg           std::string NewF = F.str();
    397  1.1.1.2  joerg           std::replace(NewF.begin(), NewF.end(), '-', '_');
    398  1.1.1.2  joerg           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
    399  1.1.1.2  joerg                                   Twine("__"),
    400  1.1.1.2  joerg                               Loc->second ? "1" : "0");
    401  1.1.1.2  joerg         }
    402  1.1.1.2  joerg       }
    403  1.1.1.2  joerg     }
    404      1.1  joerg   }
    405      1.1  joerg 
    406      1.1  joerg   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
    407      1.1  joerg   // removed in the near future.
    408      1.1  joerg   if (hasFMAF())
    409      1.1  joerg     Builder.defineMacro("__HAS_FMAF__");
    410      1.1  joerg   if (hasFastFMAF())
    411      1.1  joerg     Builder.defineMacro("FP_FAST_FMAF");
    412      1.1  joerg   if (hasLDEXPF())
    413      1.1  joerg     Builder.defineMacro("__HAS_LDEXPF__");
    414      1.1  joerg   if (hasFP64())
    415      1.1  joerg     Builder.defineMacro("__HAS_FP64__");
    416      1.1  joerg   if (hasFastFMA())
    417      1.1  joerg     Builder.defineMacro("FP_FAST_FMA");
    418  1.1.1.2  joerg 
    419  1.1.1.2  joerg   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
    420      1.1  joerg }
    421      1.1  joerg 
    422      1.1  joerg void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
    423      1.1  joerg   assert(HalfFormat == Aux->HalfFormat);
    424      1.1  joerg   assert(FloatFormat == Aux->FloatFormat);
    425      1.1  joerg   assert(DoubleFormat == Aux->DoubleFormat);
    426      1.1  joerg 
    427      1.1  joerg   // On x86_64 long double is 80-bit extended precision format, which is
    428      1.1  joerg   // not supported by AMDGPU. 128-bit floating point format is also not
    429      1.1  joerg   // supported by AMDGPU. Therefore keep its own format for these two types.
    430      1.1  joerg   auto SaveLongDoubleFormat = LongDoubleFormat;
    431      1.1  joerg   auto SaveFloat128Format = Float128Format;
    432      1.1  joerg   copyAuxTarget(Aux);
    433      1.1  joerg   LongDoubleFormat = SaveLongDoubleFormat;
    434      1.1  joerg   Float128Format = SaveFloat128Format;
    435  1.1.1.2  joerg   // For certain builtin types support on the host target, claim they are
    436  1.1.1.2  joerg   // support to pass the compilation of the host code during the device-side
    437  1.1.1.2  joerg   // compilation.
    438  1.1.1.2  joerg   // FIXME: As the side effect, we also accept `__float128` uses in the device
    439  1.1.1.2  joerg   // code. To rejct these builtin types supported in the host target but not in
    440  1.1.1.2  joerg   // the device target, one approach would support `device_builtin` attribute
    441  1.1.1.2  joerg   // so that we could tell the device builtin types from the host ones. The
    442  1.1.1.2  joerg   // also solves the different representations of the same builtin type, such
    443  1.1.1.2  joerg   // as `size_t` in the MSVC environment.
    444  1.1.1.2  joerg   if (Aux->hasFloat128Type()) {
    445  1.1.1.2  joerg     HasFloat128 = true;
    446  1.1.1.2  joerg     Float128Format = DoubleFormat;
    447  1.1.1.2  joerg   }
    448      1.1  joerg }
    449