Home | History | Annotate | Line # | Download | only in TableGen
      1 //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      6 // See https://llvm.org/LICENSE.txt for license information.
      7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      8 //
      9 //===----------------------------------------------------------------------===//
     10 //
     11 // These backends consume the definitions of Syntax Tree nodes.
     12 // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
     13 //
     14 // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
     15 //   NODE(Kind, BaseKind)
     16 //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
     17 // similar to those for AST nodes such as AST/DeclNodes.inc.
     18 //
     19 // The -gen-clang-syntax-node-classes backend produces definitions for the
     20 // syntax::Node subclasses (except those marked as External).
     21 //
     22 // In future, another backend will encode the structure of the various node
     23 // types in tables so their invariants can be checked and enforced.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 #include "TableGenBackends.h"
     27 
     28 #include <deque>
     29 
     30 #include "llvm/ADT/StringExtras.h"
     31 #include "llvm/Support/FormatVariadic.h"
     32 #include "llvm/Support/raw_ostream.h"
     33 #include "llvm/TableGen/Record.h"
     34 #include "llvm/TableGen/TableGenBackend.h"
     35 
     36 namespace {
     37 using llvm::formatv;
     38 
     39 // The class hierarchy of Node types.
     40 // We assemble this in order to be able to define the NodeKind enum in a
     41 // stable and useful way, where abstract Node subclasses correspond to ranges.
     42 class Hierarchy {
     43 public:
     44   Hierarchy(const llvm::RecordKeeper &Records) {
     45     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
     46       add(T);
     47     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
     48       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
     49         link(Derived, Base);
     50     for (NodeType &N : AllTypes) {
     51       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
     52         return L->Record->getName() < R->Record->getName();
     53       });
     54       // Alternatives nodes must have subclasses, External nodes may do.
     55       assert(N.Record->isSubClassOf("Alternatives") ||
     56              N.Record->isSubClassOf("External") || N.Derived.empty());
     57       assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
     58     }
     59   }
     60 
     61   struct NodeType {
     62     const llvm::Record *Record = nullptr;
     63     const NodeType *Base = nullptr;
     64     std::vector<const NodeType *> Derived;
     65     llvm::StringRef name() const { return Record->getName(); }
     66   };
     67 
     68   NodeType &get(llvm::StringRef Name = "Node") {
     69     auto NI = ByName.find(Name);
     70     assert(NI != ByName.end() && "no such node");
     71     return *NI->second;
     72   }
     73 
     74   // Traverse the hierarchy in pre-order (base classes before derived).
     75   void visit(llvm::function_ref<void(const NodeType &)> CB,
     76              const NodeType *Start = nullptr) {
     77     if (Start == nullptr)
     78       Start = &get();
     79     CB(*Start);
     80     for (const NodeType *D : Start->Derived)
     81       visit(CB, D);
     82   }
     83 
     84 private:
     85   void add(const llvm::Record *R) {
     86     AllTypes.emplace_back();
     87     AllTypes.back().Record = R;
     88     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
     89     assert(Inserted && "Duplicate node name");
     90     (void)Inserted;
     91   }
     92 
     93   void link(const llvm::Record *Derived, const llvm::Record *Base) {
     94     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
     95     assert(CN.Base == nullptr && "setting base twice");
     96     PN.Derived.push_back(&CN);
     97     CN.Base = &PN;
     98   }
     99 
    100   std::deque<NodeType> AllTypes;
    101   llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
    102 };
    103 
    104 const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
    105   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
    106 }
    107 const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
    108   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
    109 }
    110 
    111 struct SyntaxConstraint {
    112   SyntaxConstraint(const llvm::Record &R) {
    113     if (R.isSubClassOf("Optional")) {
    114       *this = SyntaxConstraint(*R.getValueAsDef("inner"));
    115     } else if (R.isSubClassOf("AnyToken")) {
    116       NodeType = "Leaf";
    117     } else if (R.isSubClassOf("NodeType")) {
    118       NodeType = R.getName().str();
    119     } else {
    120       assert(false && "Unhandled Syntax kind");
    121     }
    122   }
    123 
    124   std::string NodeType;
    125   // optional and leaf types also go here, once we want to use them.
    126 };
    127 
    128 } // namespace
    129 
    130 void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
    131                                     llvm::raw_ostream &OS) {
    132   llvm::emitSourceFileHeader("Syntax tree node list", OS);
    133   Hierarchy H(Records);
    134   OS << R"cpp(
    135 #ifndef NODE
    136 #define NODE(Kind, Base)
    137 #endif
    138 
    139 #ifndef CONCRETE_NODE
    140 #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
    141 #endif
    142 
    143 #ifndef ABSTRACT_NODE
    144 #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
    145 #endif
    146 
    147 )cpp";
    148   H.visit([&](const Hierarchy::NodeType &N) {
    149     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
    150     if (N.Base == nullptr)
    151       return;
    152     if (N.Derived.empty())
    153       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
    154     else
    155       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
    156                     N.Base->name(), firstConcrete(N).name(),
    157                     lastConcrete(N).name());
    158   });
    159   OS << R"cpp(
    160 #undef NODE
    161 #undef CONCRETE_NODE
    162 #undef ABSTRACT_NODE
    163 )cpp";
    164 }
    165 
    166 // Format a documentation string as a C++ comment.
    167 // Trims leading whitespace handling since comments come from a TableGen file:
    168 //    documentation = [{
    169 //      This is a widget. Example:
    170 //        widget.explode()
    171 //    }];
    172 // and should be formatted as:
    173 //    /// This is a widget. Example:
    174 //    ///   widget.explode()
    175 // Leading and trailing whitespace lines are stripped.
    176 // The indentation of the first line is stripped from all lines.
    177 static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
    178   Doc = Doc.rtrim();
    179   llvm::StringRef Line;
    180   while (Line.trim().empty() && !Doc.empty())
    181     std::tie(Line, Doc) = Doc.split('\n');
    182   llvm::StringRef Indent = Line.take_while(llvm::isSpace);
    183   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
    184     Line.consume_front(Indent);
    185     OS << "/// " << Line << "\n";
    186   }
    187 }
    188 
    189 void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
    190                                        llvm::raw_ostream &OS) {
    191   llvm::emitSourceFileHeader("Syntax tree node list", OS);
    192   Hierarchy H(Records);
    193 
    194   OS << "\n// Forward-declare node types so we don't have to carefully "
    195         "sequence definitions.\n";
    196   H.visit([&](const Hierarchy::NodeType &N) {
    197     OS << "class " << N.name() << ";\n";
    198   });
    199 
    200   OS << "\n// Node definitions\n\n";
    201   H.visit([&](const Hierarchy::NodeType &N) {
    202     if (N.Record->isSubClassOf("External"))
    203       return;
    204     printDoc(N.Record->getValueAsString("documentation"), OS);
    205     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
    206                   N.Derived.empty() ? " final" : "", N.Base->name());
    207 
    208     // Constructor.
    209     if (N.Derived.empty())
    210       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
    211                     N.Base->name());
    212     else
    213       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
    214                     N.name(), N.Base->name());
    215 
    216     if (N.Record->isSubClassOf("Sequence")) {
    217       // Getters for sequence elements.
    218       for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
    219         assert(C->isSubClassOf("Role"));
    220         llvm::StringRef Role = C->getValueAsString("role");
    221         SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
    222         for (const char *Const : {"", "const "})
    223           OS << formatv(
    224               "  {2}{1} *get{0}() {2} {{\n"
    225               "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
    226               "  }\n",
    227               Role, Constraint.NodeType, Const);
    228       }
    229     }
    230 
    231     // classof. FIXME: move definition inline once ~all nodes are generated.
    232     OS << "  static bool classof(const Node *N);\n";
    233 
    234     OS << "};\n\n";
    235   });
    236 }
    237