Home | History | Annotate | Line # | Download | only in Sema
      1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 /// \file
      9 /// This file implements semantic analysis for CUDA constructs.
     10 ///
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "clang/AST/ASTContext.h"
     14 #include "clang/AST/Decl.h"
     15 #include "clang/AST/ExprCXX.h"
     16 #include "clang/Basic/Cuda.h"
     17 #include "clang/Basic/TargetInfo.h"
     18 #include "clang/Lex/Preprocessor.h"
     19 #include "clang/Sema/Lookup.h"
     20 #include "clang/Sema/ScopeInfo.h"
     21 #include "clang/Sema/Sema.h"
     22 #include "clang/Sema/SemaDiagnostic.h"
     23 #include "clang/Sema/SemaInternal.h"
     24 #include "clang/Sema/Template.h"
     25 #include "llvm/ADT/Optional.h"
     26 #include "llvm/ADT/SmallVector.h"
     27 using namespace clang;
     28 
     29 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
     30   if (!D)
     31     return false;
     32   if (auto *A = D->getAttr<AttrT>())
     33     return !A->isImplicit();
     34   return false;
     35 }
     36 
     37 void Sema::PushForceCUDAHostDevice() {
     38   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
     39   ForceCUDAHostDeviceDepth++;
     40 }
     41 
     42 bool Sema::PopForceCUDAHostDevice() {
     43   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
     44   if (ForceCUDAHostDeviceDepth == 0)
     45     return false;
     46   ForceCUDAHostDeviceDepth--;
     47   return true;
     48 }
     49 
     50 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
     51                                          MultiExprArg ExecConfig,
     52                                          SourceLocation GGGLoc) {
     53   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
     54   if (!ConfigDecl)
     55     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
     56                      << getCudaConfigureFuncName());
     57   QualType ConfigQTy = ConfigDecl->getType();
     58 
     59   DeclRefExpr *ConfigDR = new (Context)
     60       DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
     61   MarkFunctionReferenced(LLLLoc, ConfigDecl);
     62 
     63   return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
     64                        /*IsExecConfig=*/true);
     65 }
     66 
     67 Sema::CUDAFunctionTarget
     68 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
     69   bool HasHostAttr = false;
     70   bool HasDeviceAttr = false;
     71   bool HasGlobalAttr = false;
     72   bool HasInvalidTargetAttr = false;
     73   for (const ParsedAttr &AL : Attrs) {
     74     switch (AL.getKind()) {
     75     case ParsedAttr::AT_CUDAGlobal:
     76       HasGlobalAttr = true;
     77       break;
     78     case ParsedAttr::AT_CUDAHost:
     79       HasHostAttr = true;
     80       break;
     81     case ParsedAttr::AT_CUDADevice:
     82       HasDeviceAttr = true;
     83       break;
     84     case ParsedAttr::AT_CUDAInvalidTarget:
     85       HasInvalidTargetAttr = true;
     86       break;
     87     default:
     88       break;
     89     }
     90   }
     91 
     92   if (HasInvalidTargetAttr)
     93     return CFT_InvalidTarget;
     94 
     95   if (HasGlobalAttr)
     96     return CFT_Global;
     97 
     98   if (HasHostAttr && HasDeviceAttr)
     99     return CFT_HostDevice;
    100 
    101   if (HasDeviceAttr)
    102     return CFT_Device;
    103 
    104   return CFT_Host;
    105 }
    106 
    107 template <typename A>
    108 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
    109   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
    110            return isa<A>(Attribute) &&
    111                   !(IgnoreImplicitAttr && Attribute->isImplicit());
    112          });
    113 }
    114 
    115 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
    116 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
    117                                                   bool IgnoreImplicitHDAttr) {
    118   // Code that lives outside a function is run on the host.
    119   if (D == nullptr)
    120     return CFT_Host;
    121 
    122   if (D->hasAttr<CUDAInvalidTargetAttr>())
    123     return CFT_InvalidTarget;
    124 
    125   if (D->hasAttr<CUDAGlobalAttr>())
    126     return CFT_Global;
    127 
    128   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
    129     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
    130       return CFT_HostDevice;
    131     return CFT_Device;
    132   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
    133     return CFT_Host;
    134   } else if ((D->isImplicit() || !D->isUserProvided()) &&
    135              !IgnoreImplicitHDAttr) {
    136     // Some implicit declarations (like intrinsic functions) are not marked.
    137     // Set the most lenient target on them for maximal flexibility.
    138     return CFT_HostDevice;
    139   }
    140 
    141   return CFT_Host;
    142 }
    143 
    144 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
    145 Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
    146   if (Var->hasAttr<HIPManagedAttr>())
    147     return CVT_Unified;
    148   if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
    149     return CVT_Both;
    150   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
    151       Var->hasAttr<CUDASharedAttr>() ||
    152       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
    153       Var->getType()->isCUDADeviceBuiltinTextureType())
    154     return CVT_Device;
    155   // Function-scope static variable without explicit device or constant
    156   // attribute are emitted
    157   //  - on both sides in host device functions
    158   //  - on device side in device or global functions
    159   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
    160     switch (IdentifyCUDATarget(FD)) {
    161     case CFT_HostDevice:
    162       return CVT_Both;
    163     case CFT_Device:
    164     case CFT_Global:
    165       return CVT_Device;
    166     default:
    167       return CVT_Host;
    168     }
    169   }
    170   return CVT_Host;
    171 }
    172 
    173 // * CUDA Call preference table
    174 //
    175 // F - from,
    176 // T - to
    177 // Ph - preference in host mode
    178 // Pd - preference in device mode
    179 // H  - handled in (x)
    180 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
    181 //
    182 // | F  | T  | Ph  | Pd  |  H  |
    183 // |----+----+-----+-----+-----+
    184 // | d  | d  | N   | N   | (c) |
    185 // | d  | g  | --  | --  | (a) |
    186 // | d  | h  | --  | --  | (e) |
    187 // | d  | hd | HD  | HD  | (b) |
    188 // | g  | d  | N   | N   | (c) |
    189 // | g  | g  | --  | --  | (a) |
    190 // | g  | h  | --  | --  | (e) |
    191 // | g  | hd | HD  | HD  | (b) |
    192 // | h  | d  | --  | --  | (e) |
    193 // | h  | g  | N   | N   | (c) |
    194 // | h  | h  | N   | N   | (c) |
    195 // | h  | hd | HD  | HD  | (b) |
    196 // | hd | d  | WS  | SS  | (d) |
    197 // | hd | g  | SS  | --  |(d/a)|
    198 // | hd | h  | SS  | WS  | (d) |
    199 // | hd | hd | HD  | HD  | (b) |
    200 
    201 Sema::CUDAFunctionPreference
    202 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
    203                              const FunctionDecl *Callee) {
    204   assert(Callee && "Callee must be valid.");
    205   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
    206   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
    207 
    208   // If one of the targets is invalid, the check always fails, no matter what
    209   // the other target is.
    210   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
    211     return CFP_Never;
    212 
    213   // (a) Can't call global from some contexts until we support CUDA's
    214   // dynamic parallelism.
    215   if (CalleeTarget == CFT_Global &&
    216       (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
    217     return CFP_Never;
    218 
    219   // (b) Calling HostDevice is OK for everyone.
    220   if (CalleeTarget == CFT_HostDevice)
    221     return CFP_HostDevice;
    222 
    223   // (c) Best case scenarios
    224   if (CalleeTarget == CallerTarget ||
    225       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
    226       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
    227     return CFP_Native;
    228 
    229   // (d) HostDevice behavior depends on compilation mode.
    230   if (CallerTarget == CFT_HostDevice) {
    231     // It's OK to call a compilation-mode matching function from an HD one.
    232     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
    233         (!getLangOpts().CUDAIsDevice &&
    234          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
    235       return CFP_SameSide;
    236 
    237     // Calls from HD to non-mode-matching functions (i.e., to host functions
    238     // when compiling in device mode or to device functions when compiling in
    239     // host mode) are allowed at the sema level, but eventually rejected if
    240     // they're ever codegened.  TODO: Reject said calls earlier.
    241     return CFP_WrongSide;
    242   }
    243 
    244   // (e) Calling across device/host boundary is not something you should do.
    245   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
    246       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
    247       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
    248     return CFP_Never;
    249 
    250   llvm_unreachable("All cases should've been handled by now.");
    251 }
    252 
    253 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
    254   if (!D)
    255     return false;
    256   if (auto *A = D->getAttr<AttrT>())
    257     return A->isImplicit();
    258   return D->isImplicit();
    259 }
    260 
    261 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) {
    262   bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
    263   bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
    264   return IsImplicitDevAttr && IsImplicitHostAttr;
    265 }
    266 
    267 void Sema::EraseUnwantedCUDAMatches(
    268     const FunctionDecl *Caller,
    269     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
    270   if (Matches.size() <= 1)
    271     return;
    272 
    273   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
    274 
    275   // Gets the CUDA function preference for a call from Caller to Match.
    276   auto GetCFP = [&](const Pair &Match) {
    277     return IdentifyCUDAPreference(Caller, Match.second);
    278   };
    279 
    280   // Find the best call preference among the functions in Matches.
    281   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
    282       Matches.begin(), Matches.end(),
    283       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
    284 
    285   // Erase all functions with lower priority.
    286   llvm::erase_if(Matches,
    287                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
    288 }
    289 
    290 /// When an implicitly-declared special member has to invoke more than one
    291 /// base/field special member, conflicts may occur in the targets of these
    292 /// members. For example, if one base's member __host__ and another's is
    293 /// __device__, it's a conflict.
    294 /// This function figures out if the given targets \param Target1 and
    295 /// \param Target2 conflict, and if they do not it fills in
    296 /// \param ResolvedTarget with a target that resolves for both calls.
    297 /// \return true if there's a conflict, false otherwise.
    298 static bool
    299 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
    300                                 Sema::CUDAFunctionTarget Target2,
    301                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
    302   // Only free functions and static member functions may be global.
    303   assert(Target1 != Sema::CFT_Global);
    304   assert(Target2 != Sema::CFT_Global);
    305 
    306   if (Target1 == Sema::CFT_HostDevice) {
    307     *ResolvedTarget = Target2;
    308   } else if (Target2 == Sema::CFT_HostDevice) {
    309     *ResolvedTarget = Target1;
    310   } else if (Target1 != Target2) {
    311     return true;
    312   } else {
    313     *ResolvedTarget = Target1;
    314   }
    315 
    316   return false;
    317 }
    318 
    319 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
    320                                                    CXXSpecialMember CSM,
    321                                                    CXXMethodDecl *MemberDecl,
    322                                                    bool ConstRHS,
    323                                                    bool Diagnose) {
    324   // If the defaulted special member is defined lexically outside of its
    325   // owning class, or the special member already has explicit device or host
    326   // attributes, do not infer.
    327   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
    328   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
    329   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
    330   bool HasExplicitAttr =
    331       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
    332       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
    333   if (!InClass || HasExplicitAttr)
    334     return false;
    335 
    336   llvm::Optional<CUDAFunctionTarget> InferredTarget;
    337 
    338   // We're going to invoke special member lookup; mark that these special
    339   // members are called from this one, and not from its caller.
    340   ContextRAII MethodContext(*this, MemberDecl);
    341 
    342   // Look for special members in base classes that should be invoked from here.
    343   // Infer the target of this member base on the ones it should call.
    344   // Skip direct and indirect virtual bases for abstract classes.
    345   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
    346   for (const auto &B : ClassDecl->bases()) {
    347     if (!B.isVirtual()) {
    348       Bases.push_back(&B);
    349     }
    350   }
    351 
    352   if (!ClassDecl->isAbstract()) {
    353     for (const auto &VB : ClassDecl->vbases()) {
    354       Bases.push_back(&VB);
    355     }
    356   }
    357 
    358   for (const auto *B : Bases) {
    359     const RecordType *BaseType = B->getType()->getAs<RecordType>();
    360     if (!BaseType) {
    361       continue;
    362     }
    363 
    364     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
    365     Sema::SpecialMemberOverloadResult SMOR =
    366         LookupSpecialMember(BaseClassDecl, CSM,
    367                             /* ConstArg */ ConstRHS,
    368                             /* VolatileArg */ false,
    369                             /* RValueThis */ false,
    370                             /* ConstThis */ false,
    371                             /* VolatileThis */ false);
    372 
    373     if (!SMOR.getMethod())
    374       continue;
    375 
    376     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
    377     if (!InferredTarget.hasValue()) {
    378       InferredTarget = BaseMethodTarget;
    379     } else {
    380       bool ResolutionError = resolveCalleeCUDATargetConflict(
    381           InferredTarget.getValue(), BaseMethodTarget,
    382           InferredTarget.getPointer());
    383       if (ResolutionError) {
    384         if (Diagnose) {
    385           Diag(ClassDecl->getLocation(),
    386                diag::note_implicit_member_target_infer_collision)
    387               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
    388         }
    389         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
    390         return true;
    391       }
    392     }
    393   }
    394 
    395   // Same as for bases, but now for special members of fields.
    396   for (const auto *F : ClassDecl->fields()) {
    397     if (F->isInvalidDecl()) {
    398       continue;
    399     }
    400 
    401     const RecordType *FieldType =
    402         Context.getBaseElementType(F->getType())->getAs<RecordType>();
    403     if (!FieldType) {
    404       continue;
    405     }
    406 
    407     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
    408     Sema::SpecialMemberOverloadResult SMOR =
    409         LookupSpecialMember(FieldRecDecl, CSM,
    410                             /* ConstArg */ ConstRHS && !F->isMutable(),
    411                             /* VolatileArg */ false,
    412                             /* RValueThis */ false,
    413                             /* ConstThis */ false,
    414                             /* VolatileThis */ false);
    415 
    416     if (!SMOR.getMethod())
    417       continue;
    418 
    419     CUDAFunctionTarget FieldMethodTarget =
    420         IdentifyCUDATarget(SMOR.getMethod());
    421     if (!InferredTarget.hasValue()) {
    422       InferredTarget = FieldMethodTarget;
    423     } else {
    424       bool ResolutionError = resolveCalleeCUDATargetConflict(
    425           InferredTarget.getValue(), FieldMethodTarget,
    426           InferredTarget.getPointer());
    427       if (ResolutionError) {
    428         if (Diagnose) {
    429           Diag(ClassDecl->getLocation(),
    430                diag::note_implicit_member_target_infer_collision)
    431               << (unsigned)CSM << InferredTarget.getValue()
    432               << FieldMethodTarget;
    433         }
    434         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
    435         return true;
    436       }
    437     }
    438   }
    439 
    440 
    441   // If no target was inferred, mark this member as __host__ __device__;
    442   // it's the least restrictive option that can be invoked from any target.
    443   bool NeedsH = true, NeedsD = true;
    444   if (InferredTarget.hasValue()) {
    445     if (InferredTarget.getValue() == CFT_Device)
    446       NeedsH = false;
    447     else if (InferredTarget.getValue() == CFT_Host)
    448       NeedsD = false;
    449   }
    450 
    451   // We either setting attributes first time, or the inferred ones must match
    452   // previously set ones.
    453   if (NeedsD && !HasD)
    454     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    455   if (NeedsH && !HasH)
    456     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
    457 
    458   return false;
    459 }
    460 
    461 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
    462   if (!CD->isDefined() && CD->isTemplateInstantiation())
    463     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
    464 
    465   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
    466   // empty at a point in the translation unit, if it is either a
    467   // trivial constructor
    468   if (CD->isTrivial())
    469     return true;
    470 
    471   // ... or it satisfies all of the following conditions:
    472   // The constructor function has been defined.
    473   // The constructor function has no parameters,
    474   // and the function body is an empty compound statement.
    475   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
    476     return false;
    477 
    478   // Its class has no virtual functions and no virtual base classes.
    479   if (CD->getParent()->isDynamicClass())
    480     return false;
    481 
    482   // Union ctor does not call ctors of its data members.
    483   if (CD->getParent()->isUnion())
    484     return true;
    485 
    486   // The only form of initializer allowed is an empty constructor.
    487   // This will recursively check all base classes and member initializers
    488   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
    489         if (const CXXConstructExpr *CE =
    490                 dyn_cast<CXXConstructExpr>(CI->getInit()))
    491           return isEmptyCudaConstructor(Loc, CE->getConstructor());
    492         return false;
    493       }))
    494     return false;
    495 
    496   return true;
    497 }
    498 
    499 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
    500   // No destructor -> no problem.
    501   if (!DD)
    502     return true;
    503 
    504   if (!DD->isDefined() && DD->isTemplateInstantiation())
    505     InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
    506 
    507   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
    508   // empty at a point in the translation unit, if it is either a
    509   // trivial constructor
    510   if (DD->isTrivial())
    511     return true;
    512 
    513   // ... or it satisfies all of the following conditions:
    514   // The destructor function has been defined.
    515   // and the function body is an empty compound statement.
    516   if (!DD->hasTrivialBody())
    517     return false;
    518 
    519   const CXXRecordDecl *ClassDecl = DD->getParent();
    520 
    521   // Its class has no virtual functions and no virtual base classes.
    522   if (ClassDecl->isDynamicClass())
    523     return false;
    524 
    525   // Union does not have base class and union dtor does not call dtors of its
    526   // data members.
    527   if (DD->getParent()->isUnion())
    528     return true;
    529 
    530   // Only empty destructors are allowed. This will recursively check
    531   // destructors for all base classes...
    532   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
    533         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
    534           return isEmptyCudaDestructor(Loc, RD->getDestructor());
    535         return true;
    536       }))
    537     return false;
    538 
    539   // ... and member fields.
    540   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
    541         if (CXXRecordDecl *RD = Field->getType()
    542                                     ->getBaseElementTypeUnsafe()
    543                                     ->getAsCXXRecordDecl())
    544           return isEmptyCudaDestructor(Loc, RD->getDestructor());
    545         return true;
    546       }))
    547     return false;
    548 
    549   return true;
    550 }
    551 
    552 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
    553   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage())
    554     return;
    555   const Expr *Init = VD->getInit();
    556   if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
    557       VD->hasAttr<CUDASharedAttr>()) {
    558     if (LangOpts.GPUAllowDeviceInit)
    559       return;
    560     bool AllowedInit = false;
    561     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
    562       AllowedInit =
    563           isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
    564     // We'll allow constant initializers even if it's a non-empty
    565     // constructor according to CUDA rules. This deviates from NVCC,
    566     // but allows us to handle things like constexpr constructors.
    567     if (!AllowedInit &&
    568         (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) {
    569       auto *Init = VD->getInit();
    570       // isConstantInitializer cannot be called with dependent value, therefore
    571       // we skip checking dependent value here. This is OK since
    572       // checkAllowedCUDAInitializer is called again when the template is
    573       // instantiated.
    574       AllowedInit =
    575           VD->getType()->isDependentType() || Init->isValueDependent() ||
    576           Init->isConstantInitializer(Context,
    577                                       VD->getType()->isReferenceType());
    578     }
    579 
    580     // Also make sure that destructor, if there is one, is empty.
    581     if (AllowedInit)
    582       if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
    583         AllowedInit =
    584             isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
    585 
    586     if (!AllowedInit) {
    587       Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
    588                                   ? diag::err_shared_var_init
    589                                   : diag::err_dynamic_var_init)
    590           << Init->getSourceRange();
    591       VD->setInvalidDecl();
    592     }
    593   } else {
    594     // This is a host-side global variable.  Check that the initializer is
    595     // callable from the host side.
    596     const FunctionDecl *InitFn = nullptr;
    597     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
    598       InitFn = CE->getConstructor();
    599     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
    600       InitFn = CE->getDirectCallee();
    601     }
    602     if (InitFn) {
    603       CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
    604       if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
    605         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
    606             << InitFnTarget << InitFn;
    607         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
    608         VD->setInvalidDecl();
    609       }
    610     }
    611   }
    612 }
    613 
    614 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
    615 // treated as implicitly __host__ __device__, unless:
    616 //  * it is a variadic function (device-side variadic functions are not
    617 //    allowed), or
    618 //  * a __device__ function with this signature was already declared, in which
    619 //    case in which case we output an error, unless the __device__ decl is in a
    620 //    system header, in which case we leave the constexpr function unattributed.
    621 //
    622 // In addition, all function decls are treated as __host__ __device__ when
    623 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
    624 //   #pragma clang force_cuda_host_device_begin/end
    625 // pair).
    626 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
    627                                        const LookupResult &Previous) {
    628   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    629 
    630   if (ForceCUDAHostDeviceDepth > 0) {
    631     if (!NewD->hasAttr<CUDAHostAttr>())
    632       NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
    633     if (!NewD->hasAttr<CUDADeviceAttr>())
    634       NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    635     return;
    636   }
    637 
    638   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
    639       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
    640       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
    641     return;
    642 
    643   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
    644   // attributes?
    645   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
    646     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
    647       D = Using->getTargetDecl();
    648     FunctionDecl *OldD = D->getAsFunction();
    649     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
    650            !OldD->hasAttr<CUDAHostAttr>() &&
    651            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
    652                        /* ConsiderCudaAttrs = */ false);
    653   };
    654   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
    655   if (It != Previous.end()) {
    656     // We found a __device__ function with the same name and signature as NewD
    657     // (ignoring CUDA attrs).  This is an error unless that function is defined
    658     // in a system header, in which case we simply return without making NewD
    659     // host+device.
    660     NamedDecl *Match = *It;
    661     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
    662       Diag(NewD->getLocation(),
    663            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
    664           << NewD;
    665       Diag(Match->getLocation(),
    666            diag::note_cuda_conflicting_device_function_declared_here);
    667     }
    668     return;
    669   }
    670 
    671   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
    672   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    673 }
    674 
    675 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
    676   if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
    677       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
    678       !VD->hasAttr<CUDAConstantAttr>()) {
    679     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
    680   }
    681 }
    682 
    683 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
    684                                                        unsigned DiagID) {
    685   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    686   SemaDiagnosticBuilder::Kind DiagKind = [&] {
    687     if (!isa<FunctionDecl>(CurContext))
    688       return SemaDiagnosticBuilder::K_Nop;
    689     switch (CurrentCUDATarget()) {
    690     case CFT_Global:
    691     case CFT_Device:
    692       return SemaDiagnosticBuilder::K_Immediate;
    693     case CFT_HostDevice:
    694       // An HD function counts as host code if we're compiling for host, and
    695       // device code if we're compiling for device.  Defer any errors in device
    696       // mode until the function is known-emitted.
    697       if (!getLangOpts().CUDAIsDevice)
    698         return SemaDiagnosticBuilder::K_Nop;
    699       if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
    700         return SemaDiagnosticBuilder::K_Immediate;
    701       return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
    702               FunctionEmissionStatus::Emitted)
    703                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
    704                  : SemaDiagnosticBuilder::K_Deferred;
    705     default:
    706       return SemaDiagnosticBuilder::K_Nop;
    707     }
    708   }();
    709   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID,
    710                                dyn_cast<FunctionDecl>(CurContext), *this);
    711 }
    712 
    713 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
    714                                                      unsigned DiagID) {
    715   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    716   SemaDiagnosticBuilder::Kind DiagKind = [&] {
    717     if (!isa<FunctionDecl>(CurContext))
    718       return SemaDiagnosticBuilder::K_Nop;
    719     switch (CurrentCUDATarget()) {
    720     case CFT_Host:
    721       return SemaDiagnosticBuilder::K_Immediate;
    722     case CFT_HostDevice:
    723       // An HD function counts as host code if we're compiling for host, and
    724       // device code if we're compiling for device.  Defer any errors in device
    725       // mode until the function is known-emitted.
    726       if (getLangOpts().CUDAIsDevice)
    727         return SemaDiagnosticBuilder::K_Nop;
    728       if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
    729         return SemaDiagnosticBuilder::K_Immediate;
    730       return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
    731               FunctionEmissionStatus::Emitted)
    732                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
    733                  : SemaDiagnosticBuilder::K_Deferred;
    734     default:
    735       return SemaDiagnosticBuilder::K_Nop;
    736     }
    737   }();
    738   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID,
    739                                dyn_cast<FunctionDecl>(CurContext), *this);
    740 }
    741 
    742 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
    743   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    744   assert(Callee && "Callee may not be null.");
    745 
    746   auto &ExprEvalCtx = ExprEvalContexts.back();
    747   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
    748     return true;
    749 
    750   // FIXME: Is bailing out early correct here?  Should we instead assume that
    751   // the caller is a global initializer?
    752   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
    753   if (!Caller)
    754     return true;
    755 
    756   // If the caller is known-emitted, mark the callee as known-emitted.
    757   // Otherwise, mark the call in our call graph so we can traverse it later.
    758   bool CallerKnownEmitted =
    759       getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
    760   SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
    761                                           CallerKnownEmitted] {
    762     switch (IdentifyCUDAPreference(Caller, Callee)) {
    763     case CFP_Never:
    764     case CFP_WrongSide:
    765       assert(Caller && "Never/wrongSide calls require a non-null caller");
    766       // If we know the caller will be emitted, we know this wrong-side call
    767       // will be emitted, so it's an immediate error.  Otherwise, defer the
    768       // error until we know the caller is emitted.
    769       return CallerKnownEmitted
    770                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
    771                  : SemaDiagnosticBuilder::K_Deferred;
    772     default:
    773       return SemaDiagnosticBuilder::K_Nop;
    774     }
    775   }();
    776 
    777   if (DiagKind == SemaDiagnosticBuilder::K_Nop)
    778     return true;
    779 
    780   // Avoid emitting this error twice for the same location.  Using a hashtable
    781   // like this is unfortunate, but because we must continue parsing as normal
    782   // after encountering a deferred error, it's otherwise very tricky for us to
    783   // ensure that we only emit this deferred error once.
    784   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
    785     return true;
    786 
    787   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
    788       << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee
    789       << IdentifyCUDATarget(Caller);
    790   if (!Callee->getBuiltinID())
    791     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
    792                           diag::note_previous_decl, Caller, *this)
    793         << Callee;
    794   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
    795          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
    796 }
    797 
    798 // Check the wrong-sided reference capture of lambda for CUDA/HIP.
    799 // A lambda function may capture a stack variable by reference when it is
    800 // defined and uses the capture by reference when the lambda is called. When
    801 // the capture and use happen on different sides, the capture is invalid and
    802 // should be diagnosed.
    803 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
    804                                   const sema::Capture &Capture) {
    805   // In host compilation we only need to check lambda functions emitted on host
    806   // side. In such lambda functions, a reference capture is invalid only
    807   // if the lambda structure is populated by a device function or kernel then
    808   // is passed to and called by a host function. However that is impossible,
    809   // since a device function or kernel can only call a device function, also a
    810   // kernel cannot pass a lambda back to a host function since we cannot
    811   // define a kernel argument type which can hold the lambda before the lambda
    812   // itself is defined.
    813   if (!LangOpts.CUDAIsDevice)
    814     return;
    815 
    816   // File-scope lambda can only do init captures for global variables, which
    817   // results in passing by value for these global variables.
    818   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
    819   if (!Caller)
    820     return;
    821 
    822   // In device compilation, we only need to check lambda functions which are
    823   // emitted on device side. For such lambdas, a reference capture is invalid
    824   // only if the lambda structure is populated by a host function then passed
    825   // to and called in a device function or kernel.
    826   bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
    827   bool CallerIsHost =
    828       !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
    829   bool ShouldCheck = CalleeIsDevice && CallerIsHost;
    830   if (!ShouldCheck || !Capture.isReferenceCapture())
    831     return;
    832   auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
    833   if (Capture.isVariableCapture()) {
    834     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
    835                           diag::err_capture_bad_target, Callee, *this)
    836         << Capture.getVariable();
    837   } else if (Capture.isThisCapture()) {
    838     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
    839                           diag::err_capture_bad_target_this_ptr, Callee, *this);
    840   }
    841   return;
    842 }
    843 
    844 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
    845   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    846   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
    847     return;
    848   Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
    849   Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
    850 }
    851 
    852 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
    853                                    const LookupResult &Previous) {
    854   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
    855   CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
    856   for (NamedDecl *OldND : Previous) {
    857     FunctionDecl *OldFD = OldND->getAsFunction();
    858     if (!OldFD)
    859       continue;
    860 
    861     CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
    862     // Don't allow HD and global functions to overload other functions with the
    863     // same signature.  We allow overloading based on CUDA attributes so that
    864     // functions can have different implementations on the host and device, but
    865     // HD/global functions "exist" in some sense on both the host and device, so
    866     // should have the same implementation on both sides.
    867     if (NewTarget != OldTarget &&
    868         ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
    869          (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
    870         !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
    871                     /* ConsiderCudaAttrs = */ false)) {
    872       Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
    873           << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
    874       Diag(OldFD->getLocation(), diag::note_previous_declaration);
    875       NewFD->setInvalidDecl();
    876       break;
    877     }
    878   }
    879 }
    880 
    881 template <typename AttrTy>
    882 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
    883                               const FunctionDecl &TemplateFD) {
    884   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
    885     AttrTy *Clone = Attribute->clone(S.Context);
    886     Clone->setInherited(true);
    887     FD->addAttr(Clone);
    888   }
    889 }
    890 
    891 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
    892                                   const FunctionTemplateDecl &TD) {
    893   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
    894   copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
    895   copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
    896   copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
    897 }
    898 
    899 std::string Sema::getCudaConfigureFuncName() const {
    900   if (getLangOpts().HIP)
    901     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
    902                                             : "hipConfigureCall";
    903 
    904   // New CUDA kernel launch sequence.
    905   if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
    906                          CudaFeature::CUDA_USES_NEW_LAUNCH))
    907     return "__cudaPushCallConfiguration";
    908 
    909   // Legacy CUDA kernel configuration call
    910   return "cudaConfigureCall";
    911 }
    912