Home | History | Annotate | Line # | Download | only in PerfJITEvents
      1 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines a JITEventListener object that tells perf about JITted
     10 // functions, including source line information.
     11 //
     12 // Documentation for perf jit integration is available at:
     13 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
     14 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
     15 //
     16 //===----------------------------------------------------------------------===//
     17 
     18 #include "llvm/ADT/Twine.h"
     19 #include "llvm/Config/config.h"
     20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
     21 #include "llvm/ExecutionEngine/JITEventListener.h"
     22 #include "llvm/Object/ObjectFile.h"
     23 #include "llvm/Object/SymbolSize.h"
     24 #include "llvm/Support/Debug.h"
     25 #include "llvm/Support/Errno.h"
     26 #include "llvm/Support/FileSystem.h"
     27 #include "llvm/Support/ManagedStatic.h"
     28 #include "llvm/Support/MemoryBuffer.h"
     29 #include "llvm/Support/Mutex.h"
     30 #include "llvm/Support/Path.h"
     31 #include "llvm/Support/Process.h"
     32 #include "llvm/Support/Threading.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include <mutex>
     35 
     36 #include <sys/mman.h>  // mmap()
     37 #include <time.h>      // clock_gettime(), time(), localtime_r() */
     38 #include <unistd.h>    // for read(), close()
     39 
     40 using namespace llvm;
     41 using namespace llvm::object;
     42 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
     43 
     44 namespace {
     45 
     46 // language identifier (XXX: should we generate something better from debug
     47 // info?)
     48 #define JIT_LANG "llvm-IR"
     49 #define LLVM_PERF_JIT_MAGIC                                                    \
     50   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
     51    (uint32_t)'D')
     52 #define LLVM_PERF_JIT_VERSION 1
     53 
     54 // bit 0: set if the jitdump file is using an architecture-specific timestamp
     55 // clock source
     56 #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
     57 
     58 struct LLVMPerfJitHeader;
     59 
     60 class PerfJITEventListener : public JITEventListener {
     61 public:
     62   PerfJITEventListener();
     63   ~PerfJITEventListener() {
     64     if (MarkerAddr)
     65       CloseMarker();
     66   }
     67 
     68   void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
     69                           const RuntimeDyld::LoadedObjectInfo &L) override;
     70   void notifyFreeingObject(ObjectKey K) override;
     71 
     72 private:
     73   bool InitDebuggingDir();
     74   bool OpenMarker();
     75   void CloseMarker();
     76   static bool FillMachine(LLVMPerfJitHeader &hdr);
     77 
     78   void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
     79                   uint64_t CodeSize);
     80   void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
     81 
     82   // cache lookups
     83   sys::Process::Pid Pid;
     84 
     85   // base directory for output data
     86   std::string JitPath;
     87 
     88   // output data stream, closed via Dumpstream
     89   int DumpFd = -1;
     90 
     91   // output data stream
     92   std::unique_ptr<raw_fd_ostream> Dumpstream;
     93 
     94   // prevent concurrent dumps from messing up the output file
     95   sys::Mutex Mutex;
     96 
     97   // perf mmap marker
     98   void *MarkerAddr = NULL;
     99 
    100   // perf support ready
    101   bool SuccessfullyInitialized = false;
    102 
    103   // identifier for functions, primarily to identify when moving them around
    104   uint64_t CodeGeneration = 1;
    105 };
    106 
    107 // The following are POD struct definitions from the perf jit specification
    108 
    109 enum LLVMPerfJitRecordType {
    110   JIT_CODE_LOAD = 0,
    111   JIT_CODE_MOVE = 1, // not emitted, code isn't moved
    112   JIT_CODE_DEBUG_INFO = 2,
    113   JIT_CODE_CLOSE = 3,          // not emitted, unnecessary
    114   JIT_CODE_UNWINDING_INFO = 4, // not emitted
    115 
    116   JIT_CODE_MAX
    117 };
    118 
    119 struct LLVMPerfJitHeader {
    120   uint32_t Magic;     // characters "JiTD"
    121   uint32_t Version;   // header version
    122   uint32_t TotalSize; // total size of header
    123   uint32_t ElfMach;   // elf mach target
    124   uint32_t Pad1;      // reserved
    125   uint32_t Pid;
    126   uint64_t Timestamp; // timestamp
    127   uint64_t Flags;     // flags
    128 };
    129 
    130 // record prefix (mandatory in each record)
    131 struct LLVMPerfJitRecordPrefix {
    132   uint32_t Id; // record type identifier
    133   uint32_t TotalSize;
    134   uint64_t Timestamp;
    135 };
    136 
    137 struct LLVMPerfJitRecordCodeLoad {
    138   LLVMPerfJitRecordPrefix Prefix;
    139 
    140   uint32_t Pid;
    141   uint32_t Tid;
    142   uint64_t Vma;
    143   uint64_t CodeAddr;
    144   uint64_t CodeSize;
    145   uint64_t CodeIndex;
    146 };
    147 
    148 struct LLVMPerfJitDebugEntry {
    149   uint64_t Addr;
    150   int Lineno;  // source line number starting at 1
    151   int Discrim; // column discriminator, 0 is default
    152   // followed by null terminated filename, \xff\0 if same as previous entry
    153 };
    154 
    155 struct LLVMPerfJitRecordDebugInfo {
    156   LLVMPerfJitRecordPrefix Prefix;
    157 
    158   uint64_t CodeAddr;
    159   uint64_t NrEntry;
    160   // followed by NrEntry LLVMPerfJitDebugEntry records
    161 };
    162 
    163 static inline uint64_t timespec_to_ns(const struct timespec *ts) {
    164   const uint64_t NanoSecPerSec = 1000000000;
    165   return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
    166 }
    167 
    168 static inline uint64_t perf_get_timestamp(void) {
    169   struct timespec ts;
    170   int ret;
    171 
    172   ret = clock_gettime(CLOCK_MONOTONIC, &ts);
    173   if (ret)
    174     return 0;
    175 
    176   return timespec_to_ns(&ts);
    177 }
    178 
    179 PerfJITEventListener::PerfJITEventListener()
    180     : Pid(sys::Process::getProcessId()) {
    181   // check if clock-source is supported
    182   if (!perf_get_timestamp()) {
    183     errs() << "kernel does not support CLOCK_MONOTONIC\n";
    184     return;
    185   }
    186 
    187   if (!InitDebuggingDir()) {
    188     errs() << "could not initialize debugging directory\n";
    189     return;
    190   }
    191 
    192   std::string Filename;
    193   raw_string_ostream FilenameBuf(Filename);
    194   FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
    195 
    196   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
    197   // raw_fd_ostream doesn't expose the FD.
    198   using sys::fs::openFileForWrite;
    199   if (auto EC =
    200           openFileForReadWrite(FilenameBuf.str(), DumpFd,
    201 			       sys::fs::CD_CreateNew, sys::fs::OF_None)) {
    202     errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
    203            << EC.message() << "\n";
    204     return;
    205   }
    206 
    207   Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
    208 
    209   LLVMPerfJitHeader Header = {0};
    210   if (!FillMachine(Header))
    211     return;
    212 
    213   // signal this process emits JIT information
    214   if (!OpenMarker())
    215     return;
    216 
    217   // emit dumpstream header
    218   Header.Magic = LLVM_PERF_JIT_MAGIC;
    219   Header.Version = LLVM_PERF_JIT_VERSION;
    220   Header.TotalSize = sizeof(Header);
    221   Header.Pid = Pid;
    222   Header.Timestamp = perf_get_timestamp();
    223   Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
    224 
    225   // Everything initialized, can do profiling now.
    226   if (!Dumpstream->has_error())
    227     SuccessfullyInitialized = true;
    228 }
    229 
    230 void PerfJITEventListener::notifyObjectLoaded(
    231     ObjectKey K, const ObjectFile &Obj,
    232     const RuntimeDyld::LoadedObjectInfo &L) {
    233 
    234   if (!SuccessfullyInitialized)
    235     return;
    236 
    237   OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
    238   const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
    239 
    240   // Get the address of the object image for use as a unique identifier
    241   std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
    242 
    243   // Use symbol info to iterate over functions in the object.
    244   for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
    245     SymbolRef Sym = P.first;
    246     std::string SourceFileName;
    247 
    248     Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
    249     if (!SymTypeOrErr) {
    250       // There's not much we can with errors here
    251       consumeError(SymTypeOrErr.takeError());
    252       continue;
    253     }
    254     SymbolRef::Type SymType = *SymTypeOrErr;
    255     if (SymType != SymbolRef::ST_Function)
    256       continue;
    257 
    258     Expected<StringRef> Name = Sym.getName();
    259     if (!Name) {
    260       consumeError(Name.takeError());
    261       continue;
    262     }
    263 
    264     Expected<uint64_t> AddrOrErr = Sym.getAddress();
    265     if (!AddrOrErr) {
    266       consumeError(AddrOrErr.takeError());
    267       continue;
    268     }
    269     uint64_t Size = P.second;
    270     object::SectionedAddress Address;
    271     Address.Address = *AddrOrErr;
    272 
    273     uint64_t SectionIndex = object::SectionedAddress::UndefSection;
    274     if (auto SectOrErr = Sym.getSection())
    275         if (*SectOrErr != Obj.section_end())
    276             SectionIndex = SectOrErr.get()->getIndex();
    277 
    278     // According to spec debugging info has to come before loading the
    279     // corresonding code load.
    280     DILineInfoTable Lines = Context->getLineInfoForAddressRange(
    281         {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
    282 
    283     NotifyDebug(*AddrOrErr, Lines);
    284     NotifyCode(Name, *AddrOrErr, Size);
    285   }
    286 
    287   Dumpstream->flush();
    288 }
    289 
    290 void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
    291   // perf currently doesn't have an interface for unloading. But munmap()ing the
    292   // code section does, so that's ok.
    293 }
    294 
    295 bool PerfJITEventListener::InitDebuggingDir() {
    296   time_t Time;
    297   struct tm LocalTime;
    298   char TimeBuffer[sizeof("YYYYMMDD")];
    299   SmallString<64> Path;
    300 
    301   // search for location to dump data to
    302   if (const char *BaseDir = getenv("JITDUMPDIR"))
    303     Path.append(BaseDir);
    304   else if (!sys::path::home_directory(Path))
    305     Path = ".";
    306 
    307   // create debug directory
    308   Path += "/.debug/jit/";
    309   if (auto EC = sys::fs::create_directories(Path)) {
    310     errs() << "could not create jit cache directory " << Path << ": "
    311            << EC.message() << "\n";
    312     return false;
    313   }
    314 
    315   // create unique directory for dump data related to this process
    316   time(&Time);
    317   localtime_r(&Time, &LocalTime);
    318   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
    319   Path += JIT_LANG "-jit-";
    320   Path += TimeBuffer;
    321 
    322   SmallString<128> UniqueDebugDir;
    323 
    324   using sys::fs::createUniqueDirectory;
    325   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
    326     errs() << "could not create unique jit cache directory " << UniqueDebugDir
    327            << ": " << EC.message() << "\n";
    328     return false;
    329   }
    330 
    331   JitPath = std::string(UniqueDebugDir.str());
    332 
    333   return true;
    334 }
    335 
    336 bool PerfJITEventListener::OpenMarker() {
    337   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
    338   // is captured either live (perf record running when we mmap) or in deferred
    339   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
    340   // file for more meta data info about the jitted code. Perf report/annotate
    341   // detect this special filename and process the jitdump file.
    342   //
    343   // Mapping must be PROT_EXEC to ensure it is captured by perf record
    344   // even when not using -d option.
    345   MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
    346                       PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
    347 
    348   if (MarkerAddr == MAP_FAILED) {
    349     errs() << "could not mmap JIT marker\n";
    350     return false;
    351   }
    352   return true;
    353 }
    354 
    355 void PerfJITEventListener::CloseMarker() {
    356   if (!MarkerAddr)
    357     return;
    358 
    359   munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
    360   MarkerAddr = nullptr;
    361 }
    362 
    363 bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
    364   char id[16];
    365   struct {
    366     uint16_t e_type;
    367     uint16_t e_machine;
    368   } info;
    369 
    370   size_t RequiredMemory = sizeof(id) + sizeof(info);
    371 
    372   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
    373     MemoryBuffer::getFileSlice("/proc/self/exe",
    374 			       RequiredMemory,
    375 			       0);
    376 
    377   // This'll not guarantee that enough data was actually read from the
    378   // underlying file. Instead the trailing part of the buffer would be
    379   // zeroed. Given the ELF signature check below that seems ok though,
    380   // it's unlikely that the file ends just after that, and the
    381   // consequence would just be that perf wouldn't recognize the
    382   // signature.
    383   if (auto EC = MB.getError()) {
    384     errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
    385     return false;
    386   }
    387 
    388   memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
    389   memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
    390 
    391   // check ELF signature
    392   if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
    393     errs() << "invalid elf signature\n";
    394     return false;
    395   }
    396 
    397   hdr.ElfMach = info.e_machine;
    398 
    399   return true;
    400 }
    401 
    402 void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
    403                                       uint64_t CodeAddr, uint64_t CodeSize) {
    404   assert(SuccessfullyInitialized);
    405 
    406   // 0 length functions can't have samples.
    407   if (CodeSize == 0)
    408     return;
    409 
    410   LLVMPerfJitRecordCodeLoad rec;
    411   rec.Prefix.Id = JIT_CODE_LOAD;
    412   rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
    413                          Symbol->size() + 1 + // symbol name
    414                          CodeSize;            // and code
    415   rec.Prefix.Timestamp = perf_get_timestamp();
    416 
    417   rec.CodeSize = CodeSize;
    418   rec.Vma = 0;
    419   rec.CodeAddr = CodeAddr;
    420   rec.Pid = Pid;
    421   rec.Tid = get_threadid();
    422 
    423   // avoid interspersing output
    424   std::lock_guard<sys::Mutex> Guard(Mutex);
    425 
    426   rec.CodeIndex = CodeGeneration++; // under lock!
    427 
    428   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
    429   Dumpstream->write(Symbol->data(), Symbol->size() + 1);
    430   Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
    431 }
    432 
    433 void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
    434                                        DILineInfoTable Lines) {
    435   assert(SuccessfullyInitialized);
    436 
    437   // Didn't get useful debug info.
    438   if (Lines.empty())
    439     return;
    440 
    441   LLVMPerfJitRecordDebugInfo rec;
    442   rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
    443   rec.Prefix.TotalSize = sizeof(rec); // will be increased further
    444   rec.Prefix.Timestamp = perf_get_timestamp();
    445   rec.CodeAddr = CodeAddr;
    446   rec.NrEntry = Lines.size();
    447 
    448   // compute total size size of record (variable due to filenames)
    449   DILineInfoTable::iterator Begin = Lines.begin();
    450   DILineInfoTable::iterator End = Lines.end();
    451   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
    452     DILineInfo &line = It->second;
    453     rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
    454     rec.Prefix.TotalSize += line.FileName.size() + 1;
    455   }
    456 
    457   // The debug_entry describes the source line information. It is defined as
    458   // follows in order:
    459   // * uint64_t code_addr: address of function for which the debug information
    460   // is generated
    461   // * uint32_t line     : source file line number (starting at 1)
    462   // * uint32_t discrim  : column discriminator, 0 is default
    463   // * char name[n]      : source file name in ASCII, including null termination
    464 
    465   // avoid interspersing output
    466   std::lock_guard<sys::Mutex> Guard(Mutex);
    467 
    468   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
    469 
    470   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
    471     LLVMPerfJitDebugEntry LineInfo;
    472     DILineInfo &Line = It->second;
    473 
    474     LineInfo.Addr = It->first;
    475     // The function re-created by perf is preceded by a elf
    476     // header. Need to adjust for that, otherwise the results are
    477     // wrong.
    478     LineInfo.Addr += 0x40;
    479     LineInfo.Lineno = Line.Line;
    480     LineInfo.Discrim = Line.Discriminator;
    481 
    482     Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
    483                       sizeof(LineInfo));
    484     Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
    485   }
    486 }
    487 
    488 // There should be only a single event listener per process, otherwise perf gets
    489 // confused.
    490 llvm::ManagedStatic<PerfJITEventListener> PerfListener;
    491 
    492 } // end anonymous namespace
    493 
    494 namespace llvm {
    495 JITEventListener *JITEventListener::createPerfJITEventListener() {
    496   return &*PerfListener;
    497 }
    498 
    499 } // namespace llvm
    500 
    501 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void)
    502 {
    503   return wrap(JITEventListener::createPerfJITEventListener());
    504 }
    505