Home | History | Annotate | Line # | Download | only in xray
      1 #include "cpuid.h"
      2 #include "sanitizer_common/sanitizer_common.h"
      3 #if !SANITIZER_FUCHSIA
      4 #include "sanitizer_common/sanitizer_posix.h"
      5 #endif
      6 #include "xray_defs.h"
      7 #include "xray_interface_internal.h"
      8 
      9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
     10 #include <sys/types.h>
     11 #if SANITIZER_OPENBSD
     12 #include <sys/time.h>
     13 #include <machine/cpu.h>
     14 #endif
     15 #include <sys/sysctl.h>
     16 #elif SANITIZER_FUCHSIA
     17 #include <zircon/syscalls.h>
     18 #endif
     19 
     20 #include <atomic>
     21 #include <cstdint>
     22 #include <errno.h>
     23 #include <fcntl.h>
     24 #include <iterator>
     25 #include <limits>
     26 #include <tuple>
     27 #include <unistd.h>
     28 
     29 namespace __xray {
     30 
     31 #if SANITIZER_LINUX
     32 static std::pair<ssize_t, bool>
     33 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
     34   auto BytesToRead = std::distance(Begin, End);
     35   ssize_t BytesRead;
     36   ssize_t TotalBytesRead = 0;
     37   while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
     38     if (BytesRead == -1) {
     39       if (errno == EINTR)
     40         continue;
     41       Report("Read error; errno = %d\n", errno);
     42       return std::make_pair(TotalBytesRead, false);
     43     }
     44 
     45     TotalBytesRead += BytesRead;
     46     BytesToRead -= BytesRead;
     47     Begin += BytesRead;
     48   }
     49   return std::make_pair(TotalBytesRead, true);
     50 }
     51 
     52 static bool readValueFromFile(const char *Filename,
     53                               long long *Value) XRAY_NEVER_INSTRUMENT {
     54   int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
     55   if (Fd == -1)
     56     return false;
     57   static constexpr size_t BufSize = 256;
     58   char Line[BufSize] = {};
     59   ssize_t BytesRead;
     60   bool Success;
     61   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
     62   close(Fd);
     63   if (!Success)
     64     return false;
     65   const char *End = nullptr;
     66   long long Tmp = internal_simple_strtoll(Line, &End, 10);
     67   bool Result = false;
     68   if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
     69     *Value = Tmp;
     70     Result = true;
     71   }
     72   return Result;
     73 }
     74 
     75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
     76   long long TSCFrequency = -1;
     77   if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
     78                         &TSCFrequency)) {
     79     TSCFrequency *= 1000;
     80   } else if (readValueFromFile(
     81                  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
     82                  &TSCFrequency)) {
     83     TSCFrequency *= 1000;
     84   } else {
     85     Report("Unable to determine CPU frequency for TSC accounting.\n");
     86   }
     87   return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
     88 }
     89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
     90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
     91     long long TSCFrequency = -1;
     92     size_t tscfreqsz = sizeof(TSCFrequency);
     93 #if SANITIZER_OPENBSD
     94     int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
     95     if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
     96 #elif SANITIZER_MAC
     97     if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
     98                               &tscfreqsz, NULL, 0) != -1) {
     99 
    100 #else
    101     if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
    102                               NULL, 0) != -1) {
    103 #endif
    104         return static_cast<uint64_t>(TSCFrequency);
    105     } else {
    106       Report("Unable to determine CPU frequency for TSC accounting.\n");
    107     }
    108 
    109     return 0;
    110 }
    111 #elif !SANITIZER_FUCHSIA
    112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
    113     /* Not supported */
    114     return 0;
    115 }
    116 #endif
    117 
    118 static constexpr uint8_t CallOpCode = 0xe8;
    119 static constexpr uint16_t MovR10Seq = 0xba41;
    120 static constexpr uint16_t Jmp9Seq = 0x09eb;
    121 static constexpr uint16_t Jmp20Seq = 0x14eb;
    122 static constexpr uint16_t Jmp15Seq = 0x0feb;
    123 static constexpr uint8_t JmpOpCode = 0xe9;
    124 static constexpr uint8_t RetOpCode = 0xc3;
    125 static constexpr uint16_t NopwSeq = 0x9066;
    126 
    127 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
    128 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
    129 
    130 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
    131                         const XRaySledEntry &Sled,
    132                         void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
    133   // Here we do the dance of replacing the following sled:
    134   //
    135   // xray_sled_n:
    136   //   jmp +9
    137   //   <9 byte nop>
    138   //
    139   // With the following:
    140   //
    141   //   mov r10d, <function id>
    142   //   call <relative 32bit offset to entry trampoline>
    143   //
    144   // We need to do this in the following order:
    145   //
    146   // 1. Put the function id first, 2 bytes from the start of the sled (just
    147   // after the 2-byte jmp instruction).
    148   // 2. Put the call opcode 6 bytes from the start of the sled.
    149   // 3. Put the relative offset 7 bytes from the start of the sled.
    150   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
    151   // opcode and first operand.
    152   //
    153   // Prerequisite is to compute the relative offset to the trampoline's address.
    154   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
    155                              (static_cast<int64_t>(Sled.Address) + 11);
    156   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    157     Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
    158            Trampoline, reinterpret_cast<void *>(Sled.Address));
    159     return false;
    160   }
    161   if (Enable) {
    162     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    163     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
    164     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    165     std::atomic_store_explicit(
    166         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
    167         std::memory_order_release);
    168   } else {
    169     std::atomic_store_explicit(
    170         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
    171         std::memory_order_release);
    172     // FIXME: Write out the nops still?
    173   }
    174   return true;
    175 }
    176 
    177 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
    178                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
    179   // Here we do the dance of replacing the following sled:
    180   //
    181   // xray_sled_n:
    182   //   ret
    183   //   <10 byte nop>
    184   //
    185   // With the following:
    186   //
    187   //   mov r10d, <function id>
    188   //   jmp <relative 32bit offset to exit trampoline>
    189   //
    190   // 1. Put the function id first, 2 bytes from the start of the sled (just
    191   // after the 1-byte ret instruction).
    192   // 2. Put the jmp opcode 6 bytes from the start of the sled.
    193   // 3. Put the relative offset 7 bytes from the start of the sled.
    194   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
    195   // opcode and first operand.
    196   //
    197   // Prerequisite is to compute the relative offset fo the
    198   // __xray_FunctionExit function's address.
    199   int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
    200                              (static_cast<int64_t>(Sled.Address) + 11);
    201   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    202     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
    203            __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
    204     return false;
    205   }
    206   if (Enable) {
    207     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    208     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
    209     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    210     std::atomic_store_explicit(
    211         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
    212         std::memory_order_release);
    213   } else {
    214     std::atomic_store_explicit(
    215         reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
    216         std::memory_order_release);
    217     // FIXME: Write out the nops still?
    218   }
    219   return true;
    220 }
    221 
    222 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
    223                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
    224   // Here we do the dance of replacing the tail call sled with a similar
    225   // sequence as the entry sled, but calls the tail exit sled instead.
    226   int64_t TrampolineOffset =
    227       reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
    228       (static_cast<int64_t>(Sled.Address) + 11);
    229   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    230     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
    231            __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
    232     return false;
    233   }
    234   if (Enable) {
    235     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    236     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
    237     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    238     std::atomic_store_explicit(
    239         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
    240         std::memory_order_release);
    241   } else {
    242     std::atomic_store_explicit(
    243         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
    244         std::memory_order_release);
    245     // FIXME: Write out the nops still?
    246   }
    247   return true;
    248 }
    249 
    250 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
    251                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
    252   // Here we do the dance of replacing the following sled:
    253   //
    254   // In Version 0:
    255   //
    256   // xray_sled_n:
    257   //   jmp +20          // 2 bytes
    258   //   ...
    259   //
    260   // With the following:
    261   //
    262   //   nopw             // 2 bytes*
    263   //   ...
    264   //
    265   //
    266   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
    267   //
    268   // ---
    269   //
    270   // In Version 1:
    271   //
    272   //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
    273   //   to a jmp, use 15 bytes instead.
    274   //
    275   if (Enable) {
    276     std::atomic_store_explicit(
    277         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
    278         std::memory_order_release);
    279   } else {
    280     switch (Sled.Version) {
    281     case 1:
    282       std::atomic_store_explicit(
    283           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
    284           std::memory_order_release);
    285       break;
    286     case 0:
    287     default:
    288       std::atomic_store_explicit(
    289           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
    290           std::memory_order_release);
    291       break;
    292     }
    293     }
    294   return false;
    295 }
    296 
    297 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
    298                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
    299   // Here we do the dance of replacing the following sled:
    300   //
    301   // xray_sled_n:
    302   //   jmp +20          // 2 byte instruction
    303   //   ...
    304   //
    305   // With the following:
    306   //
    307   //   nopw             // 2 bytes
    308   //   ...
    309   //
    310   //
    311   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
    312   // The 20 byte sled stashes three argument registers, calls the trampoline,
    313   // unstashes the registers and returns. If the arguments are already in
    314   // the correct registers, the stashing and unstashing become equivalently
    315   // sized nops.
    316   if (Enable) {
    317     std::atomic_store_explicit(
    318         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
    319         std::memory_order_release);
    320   } else {
    321       std::atomic_store_explicit(
    322           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
    323           std::memory_order_release);
    324   }
    325   return false;
    326 }
    327 
    328 #if !SANITIZER_FUCHSIA
    329 // We determine whether the CPU we're running on has the correct features we
    330 // need. In x86_64 this will be rdtscp support.
    331 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
    332   unsigned int EAX, EBX, ECX, EDX;
    333 
    334   // We check whether rdtscp support is enabled. According to the x86_64 manual,
    335   // level should be set at 0x80000001, and we should have a look at bit 27 in
    336   // EDX. That's 0x8000000 (or 1u << 27).
    337   __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
    338     : "0"(0x80000001));
    339   if (!(EDX & (1u << 27))) {
    340     Report("Missing rdtscp support.\n");
    341     return false;
    342   }
    343   // Also check whether we can determine the CPU frequency, since if we cannot,
    344   // we should use the emulated TSC instead.
    345   if (!getTSCFrequency()) {
    346     Report("Unable to determine CPU frequency.\n");
    347     return false;
    348   }
    349   return true;
    350 }
    351 #endif
    352 
    353 } // namespace __xray
    354