JSONCompilationDatabase.cpp revision 1.1 1 1.1 joerg //===- JSONCompilationDatabase.cpp ----------------------------------------===//
2 1.1 joerg //
3 1.1 joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 1.1 joerg // See https://llvm.org/LICENSE.txt for license information.
5 1.1 joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 1.1 joerg //
7 1.1 joerg //===----------------------------------------------------------------------===//
8 1.1 joerg //
9 1.1 joerg // This file contains the implementation of the JSONCompilationDatabase.
10 1.1 joerg //
11 1.1 joerg //===----------------------------------------------------------------------===//
12 1.1 joerg
13 1.1 joerg #include "clang/Tooling/JSONCompilationDatabase.h"
14 1.1 joerg #include "clang/Basic/LLVM.h"
15 1.1 joerg #include "clang/Tooling/CompilationDatabase.h"
16 1.1 joerg #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
17 1.1 joerg #include "clang/Tooling/Tooling.h"
18 1.1 joerg #include "llvm/ADT/Optional.h"
19 1.1 joerg #include "llvm/ADT/STLExtras.h"
20 1.1 joerg #include "llvm/ADT/SmallString.h"
21 1.1 joerg #include "llvm/ADT/SmallVector.h"
22 1.1 joerg #include "llvm/ADT/StringRef.h"
23 1.1 joerg #include "llvm/ADT/Triple.h"
24 1.1 joerg #include "llvm/Support/Allocator.h"
25 1.1 joerg #include "llvm/Support/Casting.h"
26 1.1 joerg #include "llvm/Support/CommandLine.h"
27 1.1 joerg #include "llvm/Support/ErrorOr.h"
28 1.1 joerg #include "llvm/Support/Host.h"
29 1.1 joerg #include "llvm/Support/MemoryBuffer.h"
30 1.1 joerg #include "llvm/Support/Path.h"
31 1.1 joerg #include "llvm/Support/StringSaver.h"
32 1.1 joerg #include "llvm/Support/YAMLParser.h"
33 1.1 joerg #include "llvm/Support/raw_ostream.h"
34 1.1 joerg #include <cassert>
35 1.1 joerg #include <memory>
36 1.1 joerg #include <string>
37 1.1 joerg #include <system_error>
38 1.1 joerg #include <tuple>
39 1.1 joerg #include <utility>
40 1.1 joerg #include <vector>
41 1.1 joerg
42 1.1 joerg using namespace clang;
43 1.1 joerg using namespace tooling;
44 1.1 joerg
45 1.1 joerg namespace {
46 1.1 joerg
47 1.1 joerg /// A parser for escaped strings of command line arguments.
48 1.1 joerg ///
49 1.1 joerg /// Assumes \-escaping for quoted arguments (see the documentation of
50 1.1 joerg /// unescapeCommandLine(...)).
51 1.1 joerg class CommandLineArgumentParser {
52 1.1 joerg public:
53 1.1 joerg CommandLineArgumentParser(StringRef CommandLine)
54 1.1 joerg : Input(CommandLine), Position(Input.begin()-1) {}
55 1.1 joerg
56 1.1 joerg std::vector<std::string> parse() {
57 1.1 joerg bool HasMoreInput = true;
58 1.1 joerg while (HasMoreInput && nextNonWhitespace()) {
59 1.1 joerg std::string Argument;
60 1.1 joerg HasMoreInput = parseStringInto(Argument);
61 1.1 joerg CommandLine.push_back(Argument);
62 1.1 joerg }
63 1.1 joerg return CommandLine;
64 1.1 joerg }
65 1.1 joerg
66 1.1 joerg private:
67 1.1 joerg // All private methods return true if there is more input available.
68 1.1 joerg
69 1.1 joerg bool parseStringInto(std::string &String) {
70 1.1 joerg do {
71 1.1 joerg if (*Position == '"') {
72 1.1 joerg if (!parseDoubleQuotedStringInto(String)) return false;
73 1.1 joerg } else if (*Position == '\'') {
74 1.1 joerg if (!parseSingleQuotedStringInto(String)) return false;
75 1.1 joerg } else {
76 1.1 joerg if (!parseFreeStringInto(String)) return false;
77 1.1 joerg }
78 1.1 joerg } while (*Position != ' ');
79 1.1 joerg return true;
80 1.1 joerg }
81 1.1 joerg
82 1.1 joerg bool parseDoubleQuotedStringInto(std::string &String) {
83 1.1 joerg if (!next()) return false;
84 1.1 joerg while (*Position != '"') {
85 1.1 joerg if (!skipEscapeCharacter()) return false;
86 1.1 joerg String.push_back(*Position);
87 1.1 joerg if (!next()) return false;
88 1.1 joerg }
89 1.1 joerg return next();
90 1.1 joerg }
91 1.1 joerg
92 1.1 joerg bool parseSingleQuotedStringInto(std::string &String) {
93 1.1 joerg if (!next()) return false;
94 1.1 joerg while (*Position != '\'') {
95 1.1 joerg String.push_back(*Position);
96 1.1 joerg if (!next()) return false;
97 1.1 joerg }
98 1.1 joerg return next();
99 1.1 joerg }
100 1.1 joerg
101 1.1 joerg bool parseFreeStringInto(std::string &String) {
102 1.1 joerg do {
103 1.1 joerg if (!skipEscapeCharacter()) return false;
104 1.1 joerg String.push_back(*Position);
105 1.1 joerg if (!next()) return false;
106 1.1 joerg } while (*Position != ' ' && *Position != '"' && *Position != '\'');
107 1.1 joerg return true;
108 1.1 joerg }
109 1.1 joerg
110 1.1 joerg bool skipEscapeCharacter() {
111 1.1 joerg if (*Position == '\\') {
112 1.1 joerg return next();
113 1.1 joerg }
114 1.1 joerg return true;
115 1.1 joerg }
116 1.1 joerg
117 1.1 joerg bool nextNonWhitespace() {
118 1.1 joerg do {
119 1.1 joerg if (!next()) return false;
120 1.1 joerg } while (*Position == ' ');
121 1.1 joerg return true;
122 1.1 joerg }
123 1.1 joerg
124 1.1 joerg bool next() {
125 1.1 joerg ++Position;
126 1.1 joerg return Position != Input.end();
127 1.1 joerg }
128 1.1 joerg
129 1.1 joerg const StringRef Input;
130 1.1 joerg StringRef::iterator Position;
131 1.1 joerg std::vector<std::string> CommandLine;
132 1.1 joerg };
133 1.1 joerg
134 1.1 joerg std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax,
135 1.1 joerg StringRef EscapedCommandLine) {
136 1.1 joerg if (Syntax == JSONCommandLineSyntax::AutoDetect) {
137 1.1 joerg Syntax = JSONCommandLineSyntax::Gnu;
138 1.1 joerg llvm::Triple Triple(llvm::sys::getProcessTriple());
139 1.1 joerg if (Triple.getOS() == llvm::Triple::OSType::Win32) {
140 1.1 joerg // Assume Windows command line parsing on Win32 unless the triple
141 1.1 joerg // explicitly tells us otherwise.
142 1.1 joerg if (!Triple.hasEnvironment() ||
143 1.1 joerg Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC)
144 1.1 joerg Syntax = JSONCommandLineSyntax::Windows;
145 1.1 joerg }
146 1.1 joerg }
147 1.1 joerg
148 1.1 joerg if (Syntax == JSONCommandLineSyntax::Windows) {
149 1.1 joerg llvm::BumpPtrAllocator Alloc;
150 1.1 joerg llvm::StringSaver Saver(Alloc);
151 1.1 joerg llvm::SmallVector<const char *, 64> T;
152 1.1 joerg llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T);
153 1.1 joerg std::vector<std::string> Result(T.begin(), T.end());
154 1.1 joerg return Result;
155 1.1 joerg }
156 1.1 joerg assert(Syntax == JSONCommandLineSyntax::Gnu);
157 1.1 joerg CommandLineArgumentParser parser(EscapedCommandLine);
158 1.1 joerg return parser.parse();
159 1.1 joerg }
160 1.1 joerg
161 1.1 joerg // This plugin locates a nearby compile_command.json file, and also infers
162 1.1 joerg // compile commands for files not present in the database.
163 1.1 joerg class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin {
164 1.1 joerg std::unique_ptr<CompilationDatabase>
165 1.1 joerg loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
166 1.1 joerg SmallString<1024> JSONDatabasePath(Directory);
167 1.1 joerg llvm::sys::path::append(JSONDatabasePath, "compile_commands.json");
168 1.1 joerg auto Base = JSONCompilationDatabase::loadFromFile(
169 1.1 joerg JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect);
170 1.1 joerg return Base ? inferTargetAndDriverMode(
171 1.1 joerg inferMissingCompileCommands(std::move(Base)))
172 1.1 joerg : nullptr;
173 1.1 joerg }
174 1.1 joerg };
175 1.1 joerg
176 1.1 joerg } // namespace
177 1.1 joerg
178 1.1 joerg // Register the JSONCompilationDatabasePlugin with the
179 1.1 joerg // CompilationDatabasePluginRegistry using this statically initialized variable.
180 1.1 joerg static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin>
181 1.1 joerg X("json-compilation-database", "Reads JSON formatted compilation databases");
182 1.1 joerg
183 1.1 joerg namespace clang {
184 1.1 joerg namespace tooling {
185 1.1 joerg
186 1.1 joerg // This anchor is used to force the linker to link in the generated object file
187 1.1 joerg // and thus register the JSONCompilationDatabasePlugin.
188 1.1 joerg volatile int JSONAnchorSource = 0;
189 1.1 joerg
190 1.1 joerg } // namespace tooling
191 1.1 joerg } // namespace clang
192 1.1 joerg
193 1.1 joerg std::unique_ptr<JSONCompilationDatabase>
194 1.1 joerg JSONCompilationDatabase::loadFromFile(StringRef FilePath,
195 1.1 joerg std::string &ErrorMessage,
196 1.1 joerg JSONCommandLineSyntax Syntax) {
197 1.1 joerg // Don't mmap: if we're a long-lived process, the build system may overwrite.
198 1.1 joerg llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer =
199 1.1 joerg llvm::MemoryBuffer::getFile(FilePath, /*FileSize=*/-1,
200 1.1 joerg /*RequiresNullTerminator=*/true,
201 1.1 joerg /*IsVolatile=*/true);
202 1.1 joerg if (std::error_code Result = DatabaseBuffer.getError()) {
203 1.1 joerg ErrorMessage = "Error while opening JSON database: " + Result.message();
204 1.1 joerg return nullptr;
205 1.1 joerg }
206 1.1 joerg std::unique_ptr<JSONCompilationDatabase> Database(
207 1.1 joerg new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax));
208 1.1 joerg if (!Database->parse(ErrorMessage))
209 1.1 joerg return nullptr;
210 1.1 joerg return Database;
211 1.1 joerg }
212 1.1 joerg
213 1.1 joerg std::unique_ptr<JSONCompilationDatabase>
214 1.1 joerg JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString,
215 1.1 joerg std::string &ErrorMessage,
216 1.1 joerg JSONCommandLineSyntax Syntax) {
217 1.1 joerg std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer(
218 1.1 joerg llvm::MemoryBuffer::getMemBuffer(DatabaseString));
219 1.1 joerg std::unique_ptr<JSONCompilationDatabase> Database(
220 1.1 joerg new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax));
221 1.1 joerg if (!Database->parse(ErrorMessage))
222 1.1 joerg return nullptr;
223 1.1 joerg return Database;
224 1.1 joerg }
225 1.1 joerg
226 1.1 joerg std::vector<CompileCommand>
227 1.1 joerg JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
228 1.1 joerg SmallString<128> NativeFilePath;
229 1.1 joerg llvm::sys::path::native(FilePath, NativeFilePath);
230 1.1 joerg
231 1.1 joerg std::string Error;
232 1.1 joerg llvm::raw_string_ostream ES(Error);
233 1.1 joerg StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
234 1.1 joerg if (Match.empty())
235 1.1 joerg return {};
236 1.1 joerg const auto CommandsRefI = IndexByFile.find(Match);
237 1.1 joerg if (CommandsRefI == IndexByFile.end())
238 1.1 joerg return {};
239 1.1 joerg std::vector<CompileCommand> Commands;
240 1.1 joerg getCommands(CommandsRefI->getValue(), Commands);
241 1.1 joerg return Commands;
242 1.1 joerg }
243 1.1 joerg
244 1.1 joerg std::vector<std::string>
245 1.1 joerg JSONCompilationDatabase::getAllFiles() const {
246 1.1 joerg std::vector<std::string> Result;
247 1.1 joerg for (const auto &CommandRef : IndexByFile)
248 1.1 joerg Result.push_back(CommandRef.first().str());
249 1.1 joerg return Result;
250 1.1 joerg }
251 1.1 joerg
252 1.1 joerg std::vector<CompileCommand>
253 1.1 joerg JSONCompilationDatabase::getAllCompileCommands() const {
254 1.1 joerg std::vector<CompileCommand> Commands;
255 1.1 joerg getCommands(AllCommands, Commands);
256 1.1 joerg return Commands;
257 1.1 joerg }
258 1.1 joerg
259 1.1 joerg static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) {
260 1.1 joerg Name.consume_back(".exe");
261 1.1 joerg return Name;
262 1.1 joerg }
263 1.1 joerg
264 1.1 joerg // There are compiler-wrappers (ccache, distcc, gomacc) that take the "real"
265 1.1 joerg // compiler as an argument, e.g. distcc gcc -O3 foo.c.
266 1.1 joerg // These end up in compile_commands.json when people set CC="distcc gcc".
267 1.1 joerg // Clang's driver doesn't understand this, so we need to unwrap.
268 1.1 joerg static bool unwrapCommand(std::vector<std::string> &Args) {
269 1.1 joerg if (Args.size() < 2)
270 1.1 joerg return false;
271 1.1 joerg StringRef Wrapper =
272 1.1 joerg stripExecutableExtension(llvm::sys::path::filename(Args.front()));
273 1.1 joerg if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache") {
274 1.1 joerg // Most of these wrappers support being invoked 3 ways:
275 1.1 joerg // `distcc g++ file.c` This is the mode we're trying to match.
276 1.1 joerg // We need to drop `distcc`.
277 1.1 joerg // `distcc file.c` This acts like compiler is cc or similar.
278 1.1 joerg // Clang's driver can handle this, no change needed.
279 1.1 joerg // `g++ file.c` g++ is a symlink to distcc.
280 1.1 joerg // We don't even notice this case, and all is well.
281 1.1 joerg //
282 1.1 joerg // We need to distinguish between the first and second case.
283 1.1 joerg // The wrappers themselves don't take flags, so Args[1] is a compiler flag,
284 1.1 joerg // an input file, or a compiler. Inputs have extensions, compilers don't.
285 1.1 joerg bool HasCompiler =
286 1.1 joerg (Args[1][0] != '-') &&
287 1.1 joerg !llvm::sys::path::has_extension(stripExecutableExtension(Args[1]));
288 1.1 joerg if (HasCompiler) {
289 1.1 joerg Args.erase(Args.begin());
290 1.1 joerg return true;
291 1.1 joerg }
292 1.1 joerg // If !HasCompiler, wrappers act like GCC. Fine: so do we.
293 1.1 joerg }
294 1.1 joerg return false;
295 1.1 joerg }
296 1.1 joerg
297 1.1 joerg static std::vector<std::string>
298 1.1 joerg nodeToCommandLine(JSONCommandLineSyntax Syntax,
299 1.1 joerg const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
300 1.1 joerg SmallString<1024> Storage;
301 1.1 joerg std::vector<std::string> Arguments;
302 1.1 joerg if (Nodes.size() == 1)
303 1.1 joerg Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage));
304 1.1 joerg else
305 1.1 joerg for (const auto *Node : Nodes)
306 1.1 joerg Arguments.push_back(Node->getValue(Storage));
307 1.1 joerg // There may be multiple wrappers: using distcc and ccache together is common.
308 1.1 joerg while (unwrapCommand(Arguments))
309 1.1 joerg ;
310 1.1 joerg return Arguments;
311 1.1 joerg }
312 1.1 joerg
313 1.1 joerg void JSONCompilationDatabase::getCommands(
314 1.1 joerg ArrayRef<CompileCommandRef> CommandsRef,
315 1.1 joerg std::vector<CompileCommand> &Commands) const {
316 1.1 joerg for (const auto &CommandRef : CommandsRef) {
317 1.1 joerg SmallString<8> DirectoryStorage;
318 1.1 joerg SmallString<32> FilenameStorage;
319 1.1 joerg SmallString<32> OutputStorage;
320 1.1 joerg auto Output = std::get<3>(CommandRef);
321 1.1 joerg Commands.emplace_back(
322 1.1 joerg std::get<0>(CommandRef)->getValue(DirectoryStorage),
323 1.1 joerg std::get<1>(CommandRef)->getValue(FilenameStorage),
324 1.1 joerg nodeToCommandLine(Syntax, std::get<2>(CommandRef)),
325 1.1 joerg Output ? Output->getValue(OutputStorage) : "");
326 1.1 joerg }
327 1.1 joerg }
328 1.1 joerg
329 1.1 joerg bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
330 1.1 joerg llvm::yaml::document_iterator I = YAMLStream.begin();
331 1.1 joerg if (I == YAMLStream.end()) {
332 1.1 joerg ErrorMessage = "Error while parsing YAML.";
333 1.1 joerg return false;
334 1.1 joerg }
335 1.1 joerg llvm::yaml::Node *Root = I->getRoot();
336 1.1 joerg if (!Root) {
337 1.1 joerg ErrorMessage = "Error while parsing YAML.";
338 1.1 joerg return false;
339 1.1 joerg }
340 1.1 joerg auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Root);
341 1.1 joerg if (!Array) {
342 1.1 joerg ErrorMessage = "Expected array.";
343 1.1 joerg return false;
344 1.1 joerg }
345 1.1 joerg for (auto &NextObject : *Array) {
346 1.1 joerg auto *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject);
347 1.1 joerg if (!Object) {
348 1.1 joerg ErrorMessage = "Expected object.";
349 1.1 joerg return false;
350 1.1 joerg }
351 1.1 joerg llvm::yaml::ScalarNode *Directory = nullptr;
352 1.1 joerg llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command;
353 1.1 joerg llvm::yaml::ScalarNode *File = nullptr;
354 1.1 joerg llvm::yaml::ScalarNode *Output = nullptr;
355 1.1 joerg for (auto& NextKeyValue : *Object) {
356 1.1 joerg auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
357 1.1 joerg if (!KeyString) {
358 1.1 joerg ErrorMessage = "Expected strings as key.";
359 1.1 joerg return false;
360 1.1 joerg }
361 1.1 joerg SmallString<10> KeyStorage;
362 1.1 joerg StringRef KeyValue = KeyString->getValue(KeyStorage);
363 1.1 joerg llvm::yaml::Node *Value = NextKeyValue.getValue();
364 1.1 joerg if (!Value) {
365 1.1 joerg ErrorMessage = "Expected value.";
366 1.1 joerg return false;
367 1.1 joerg }
368 1.1 joerg auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Value);
369 1.1 joerg auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Value);
370 1.1 joerg if (KeyValue == "arguments" && !SequenceString) {
371 1.1 joerg ErrorMessage = "Expected sequence as value.";
372 1.1 joerg return false;
373 1.1 joerg } else if (KeyValue != "arguments" && !ValueString) {
374 1.1 joerg ErrorMessage = "Expected string as value.";
375 1.1 joerg return false;
376 1.1 joerg }
377 1.1 joerg if (KeyValue == "directory") {
378 1.1 joerg Directory = ValueString;
379 1.1 joerg } else if (KeyValue == "arguments") {
380 1.1 joerg Command = std::vector<llvm::yaml::ScalarNode *>();
381 1.1 joerg for (auto &Argument : *SequenceString) {
382 1.1 joerg auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument);
383 1.1 joerg if (!Scalar) {
384 1.1 joerg ErrorMessage = "Only strings are allowed in 'arguments'.";
385 1.1 joerg return false;
386 1.1 joerg }
387 1.1 joerg Command->push_back(Scalar);
388 1.1 joerg }
389 1.1 joerg } else if (KeyValue == "command") {
390 1.1 joerg if (!Command)
391 1.1 joerg Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString);
392 1.1 joerg } else if (KeyValue == "file") {
393 1.1 joerg File = ValueString;
394 1.1 joerg } else if (KeyValue == "output") {
395 1.1 joerg Output = ValueString;
396 1.1 joerg } else {
397 1.1 joerg ErrorMessage = ("Unknown key: \"" +
398 1.1 joerg KeyString->getRawValue() + "\"").str();
399 1.1 joerg return false;
400 1.1 joerg }
401 1.1 joerg }
402 1.1 joerg if (!File) {
403 1.1 joerg ErrorMessage = "Missing key: \"file\".";
404 1.1 joerg return false;
405 1.1 joerg }
406 1.1 joerg if (!Command) {
407 1.1 joerg ErrorMessage = "Missing key: \"command\" or \"arguments\".";
408 1.1 joerg return false;
409 1.1 joerg }
410 1.1 joerg if (!Directory) {
411 1.1 joerg ErrorMessage = "Missing key: \"directory\".";
412 1.1 joerg return false;
413 1.1 joerg }
414 1.1 joerg SmallString<8> FileStorage;
415 1.1 joerg StringRef FileName = File->getValue(FileStorage);
416 1.1 joerg SmallString<128> NativeFilePath;
417 1.1 joerg if (llvm::sys::path::is_relative(FileName)) {
418 1.1 joerg SmallString<8> DirectoryStorage;
419 1.1 joerg SmallString<128> AbsolutePath(
420 1.1 joerg Directory->getValue(DirectoryStorage));
421 1.1 joerg llvm::sys::path::append(AbsolutePath, FileName);
422 1.1 joerg llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true);
423 1.1 joerg llvm::sys::path::native(AbsolutePath, NativeFilePath);
424 1.1 joerg } else {
425 1.1 joerg llvm::sys::path::native(FileName, NativeFilePath);
426 1.1 joerg }
427 1.1 joerg auto Cmd = CompileCommandRef(Directory, File, *Command, Output);
428 1.1 joerg IndexByFile[NativeFilePath].push_back(Cmd);
429 1.1 joerg AllCommands.push_back(Cmd);
430 1.1 joerg MatchTrie.insert(NativeFilePath);
431 1.1 joerg }
432 1.1 joerg return true;
433 1.1 joerg }
434