Home | History | Annotate | Line # | Download | only in nat
linux-btrace.c revision 1.1.1.9
      1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
      2 
      3    Copyright (C) 2013-2024 Free Software Foundation, Inc.
      4 
      5    Contributed by Intel Corp. <markus.t.metzger (at) intel.com>
      6 
      7    This file is part of GDB.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 3 of the License, or
     12    (at your option) any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     21 
     22 #include "linux-btrace.h"
     23 #include "gdbsupport/common-regcache.h"
     24 #include "gdbsupport/gdb_wait.h"
     25 #include "x86-cpuid.h"
     26 #include "gdbsupport/filestuff.h"
     27 #include "gdbsupport/scoped_fd.h"
     28 #include "gdbsupport/scoped_mmap.h"
     29 
     30 #include <inttypes.h>
     31 
     32 #include <sys/syscall.h>
     33 
     34 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
     35 #include <unistd.h>
     36 #include <sys/mman.h>
     37 #include <sys/user.h>
     38 #include "nat/gdb_ptrace.h"
     39 #include <sys/types.h>
     40 #include <signal.h>
     41 
     42 /* A branch trace record in perf_event.  */
     43 struct perf_event_bts
     44 {
     45   /* The linear address of the branch source.  */
     46   uint64_t from;
     47 
     48   /* The linear address of the branch destination.  */
     49   uint64_t to;
     50 };
     51 
     52 /* A perf_event branch trace sample.  */
     53 struct perf_event_sample
     54 {
     55   /* The perf_event sample header.  */
     56   struct perf_event_header header;
     57 
     58   /* The perf_event branch tracing payload.  */
     59   struct perf_event_bts bts;
     60 };
     61 
     62 /* Identify the cpu we're running on.  */
     63 static struct btrace_cpu
     64 btrace_this_cpu (void)
     65 {
     66   struct btrace_cpu cpu;
     67   unsigned int eax, ebx, ecx, edx;
     68   int ok;
     69 
     70   memset (&cpu, 0, sizeof (cpu));
     71 
     72   ok = x86_cpuid (0, &eax, &ebx, &ecx, &edx);
     73   if (ok != 0)
     74     {
     75       if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
     76 	  && edx == signature_INTEL_edx)
     77 	{
     78 	  unsigned int cpuid, ignore;
     79 
     80 	  ok = x86_cpuid (1, &cpuid, &ignore, &ignore, &ignore);
     81 	  if (ok != 0)
     82 	    {
     83 	      cpu.vendor = CV_INTEL;
     84 
     85 	      cpu.family = (cpuid >> 8) & 0xf;
     86 	      if (cpu.family == 0xf)
     87 		cpu.family += (cpuid >> 20) & 0xff;
     88 
     89 	      cpu.model = (cpuid >> 4) & 0xf;
     90 	      if ((cpu.family == 0x6) || ((cpu.family & 0xf) == 0xf))
     91 		cpu.model += (cpuid >> 12) & 0xf0;
     92 	    }
     93 	}
     94       else if (ebx == signature_AMD_ebx && ecx == signature_AMD_ecx
     95 	       && edx == signature_AMD_edx)
     96 	cpu.vendor = CV_AMD;
     97     }
     98 
     99   return cpu;
    100 }
    101 
    102 /* Return non-zero if there is new data in PEVENT; zero otherwise.  */
    103 
    104 static int
    105 perf_event_new_data (const struct perf_event_buffer *pev)
    106 {
    107   return *pev->data_head != pev->last_head;
    108 }
    109 
    110 /* Copy the last SIZE bytes from PEV ending at DATA_HEAD and return a pointer
    111    to the memory holding the copy.
    112    The caller is responsible for freeing the memory.  */
    113 
    114 static gdb_byte *
    115 perf_event_read (const struct perf_event_buffer *pev, __u64 data_head,
    116 		 size_t size)
    117 {
    118   const gdb_byte *begin, *end, *start, *stop;
    119   gdb_byte *buffer;
    120   size_t buffer_size;
    121   __u64 data_tail;
    122 
    123   if (size == 0)
    124     return NULL;
    125 
    126   /* We should never ask for more data than the buffer can hold.  */
    127   buffer_size = pev->size;
    128   gdb_assert (size <= buffer_size);
    129 
    130   /* If we ask for more data than we seem to have, we wrap around and read
    131      data from the end of the buffer.  This is already handled by the %
    132      BUFFER_SIZE operation, below.  Here, we just need to make sure that we
    133      don't underflow.
    134 
    135      Note that this is perfectly OK for perf event buffers where data_head
    136      doesn'grow indefinitely and instead wraps around to remain within the
    137      buffer's boundaries.  */
    138   if (data_head < size)
    139     data_head += buffer_size;
    140 
    141   gdb_assert (size <= data_head);
    142   data_tail = data_head - size;
    143 
    144   begin = pev->mem;
    145   start = begin + data_tail % buffer_size;
    146   stop = begin + data_head % buffer_size;
    147 
    148   buffer = (gdb_byte *) xmalloc (size);
    149 
    150   if (start < stop)
    151     memcpy (buffer, start, stop - start);
    152   else
    153     {
    154       end = begin + buffer_size;
    155 
    156       memcpy (buffer, start, end - start);
    157       memcpy (buffer + (end - start), begin, stop - begin);
    158     }
    159 
    160   return buffer;
    161 }
    162 
    163 /* Copy the perf event buffer data from PEV.
    164    Store a pointer to the copy into DATA and its size in SIZE.  */
    165 
    166 static void
    167 perf_event_read_all (struct perf_event_buffer *pev, gdb_byte **data,
    168 		     size_t *psize)
    169 {
    170   size_t size;
    171   __u64 data_head;
    172 
    173   data_head = *pev->data_head;
    174   size = pev->size;
    175 
    176   *data = perf_event_read (pev, data_head, size);
    177   *psize = size;
    178 
    179   pev->last_head = data_head;
    180 }
    181 
    182 /* Try to determine the start address of the Linux kernel.  */
    183 
    184 static uint64_t
    185 linux_determine_kernel_start (void)
    186 {
    187   static uint64_t kernel_start;
    188   static int cached;
    189 
    190   if (cached != 0)
    191     return kernel_start;
    192 
    193   cached = 1;
    194 
    195   gdb_file_up file = gdb_fopen_cloexec ("/proc/kallsyms", "r");
    196   if (file == NULL)
    197     return kernel_start;
    198 
    199   while (!feof (file.get ()))
    200     {
    201       char buffer[1024], symbol[8], *line;
    202       uint64_t addr;
    203       int match;
    204 
    205       line = fgets (buffer, sizeof (buffer), file.get ());
    206       if (line == NULL)
    207 	break;
    208 
    209       match = sscanf (line, "%" SCNx64 " %*[tT] %7s", &addr, symbol);
    210       if (match != 2)
    211 	continue;
    212 
    213       if (strcmp (symbol, "_text") == 0)
    214 	{
    215 	  kernel_start = addr;
    216 	  break;
    217 	}
    218     }
    219 
    220   return kernel_start;
    221 }
    222 
    223 /* Check whether an address is in the kernel.  */
    224 
    225 static inline int
    226 perf_event_is_kernel_addr (uint64_t addr)
    227 {
    228   uint64_t kernel_start;
    229 
    230   kernel_start = linux_determine_kernel_start ();
    231   if (kernel_start != 0ull)
    232     return (addr >= kernel_start);
    233 
    234   /* If we don't know the kernel's start address, let's check the most
    235      significant bit.  This will work at least for 64-bit kernels.  */
    236   return ((addr & (1ull << 63)) != 0);
    237 }
    238 
    239 /* Check whether a perf event record should be skipped.  */
    240 
    241 static inline int
    242 perf_event_skip_bts_record (const struct perf_event_bts *bts)
    243 {
    244   /* The hardware may report branches from kernel into user space.  Branches
    245      from user into kernel space will be suppressed.  We filter the former to
    246      provide a consistent branch trace excluding kernel.  */
    247   return perf_event_is_kernel_addr (bts->from);
    248 }
    249 
    250 /* Perform a few consistency checks on a perf event sample record.  This is
    251    meant to catch cases when we get out of sync with the perf event stream.  */
    252 
    253 static inline int
    254 perf_event_sample_ok (const struct perf_event_sample *sample)
    255 {
    256   if (sample->header.type != PERF_RECORD_SAMPLE)
    257     return 0;
    258 
    259   if (sample->header.size != sizeof (*sample))
    260     return 0;
    261 
    262   return 1;
    263 }
    264 
    265 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
    266    and to addresses (plus a header).
    267 
    268    Start points into that buffer at the next sample position.
    269    We read the collected samples backwards from start.
    270 
    271    While reading the samples, we convert the information into a list of blocks.
    272    For two adjacent samples s1 and s2, we form a block b such that b.begin =
    273    s1.to and b.end = s2.from.
    274 
    275    In case the buffer overflows during sampling, one sample may have its lower
    276    part at the end and its upper part at the beginning of the buffer.  */
    277 
    278 static std::vector<btrace_block> *
    279 perf_event_read_bts (btrace_target_info *tinfo, const uint8_t *begin,
    280 		     const uint8_t *end, const uint8_t *start, size_t size)
    281 {
    282   std::vector<btrace_block> *btrace = new std::vector<btrace_block>;
    283   struct perf_event_sample sample;
    284   size_t read = 0;
    285   struct btrace_block block = { 0, 0 };
    286 
    287   gdb_assert (begin <= start);
    288   gdb_assert (start <= end);
    289 
    290   /* The first block ends at the current pc.  */
    291   reg_buffer_common *regcache = get_thread_regcache_for_ptid (tinfo->ptid);
    292   block.end = regcache_read_pc (regcache);
    293 
    294   /* The buffer may contain a partial record as its last entry (i.e. when the
    295      buffer size is not a multiple of the sample size).  */
    296   read = sizeof (sample) - 1;
    297 
    298   for (; read < size; read += sizeof (sample))
    299     {
    300       const struct perf_event_sample *psample;
    301 
    302       /* Find the next perf_event sample in a backwards traversal.  */
    303       start -= sizeof (sample);
    304 
    305       /* If we're still inside the buffer, we're done.  */
    306       if (begin <= start)
    307 	psample = (const struct perf_event_sample *) start;
    308       else
    309 	{
    310 	  int missing;
    311 
    312 	  /* We're to the left of the ring buffer, we will wrap around and
    313 	     reappear at the very right of the ring buffer.  */
    314 
    315 	  missing = (begin - start);
    316 	  start = (end - missing);
    317 
    318 	  /* If the entire sample is missing, we're done.  */
    319 	  if (missing == sizeof (sample))
    320 	    psample = (const struct perf_event_sample *) start;
    321 	  else
    322 	    {
    323 	      uint8_t *stack;
    324 
    325 	      /* The sample wrapped around.  The lower part is at the end and
    326 		 the upper part is at the beginning of the buffer.  */
    327 	      stack = (uint8_t *) &sample;
    328 
    329 	      /* Copy the two parts so we have a contiguous sample.  */
    330 	      memcpy (stack, start, missing);
    331 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
    332 
    333 	      psample = &sample;
    334 	    }
    335 	}
    336 
    337       if (!perf_event_sample_ok (psample))
    338 	{
    339 	  warning (_("Branch trace may be incomplete."));
    340 	  break;
    341 	}
    342 
    343       if (perf_event_skip_bts_record (&psample->bts))
    344 	continue;
    345 
    346       /* We found a valid sample, so we can complete the current block.  */
    347       block.begin = psample->bts.to;
    348 
    349       btrace->push_back (block);
    350 
    351       /* Start the next block.  */
    352       block.end = psample->bts.from;
    353     }
    354 
    355   /* Push the last block (i.e. the first one of inferior execution), as well.
    356      We don't know where it ends, but we know where it starts.  If we're
    357      reading delta trace, we can fill in the start address later on.
    358      Otherwise we will prune it.  */
    359   block.begin = 0;
    360   btrace->push_back (block);
    361 
    362   return btrace;
    363 }
    364 
    365 /* Check whether an Intel cpu supports BTS.  */
    366 
    367 static int
    368 intel_supports_bts (const struct btrace_cpu *cpu)
    369 {
    370   switch (cpu->family)
    371     {
    372     case 0x6:
    373       switch (cpu->model)
    374 	{
    375 	case 0x1a: /* Nehalem */
    376 	case 0x1f:
    377 	case 0x1e:
    378 	case 0x2e:
    379 	case 0x25: /* Westmere */
    380 	case 0x2c:
    381 	case 0x2f:
    382 	case 0x2a: /* Sandy Bridge */
    383 	case 0x2d:
    384 	case 0x3a: /* Ivy Bridge */
    385 
    386 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
    387 	     "from" information afer an EIST transition, T-states, C1E, or
    388 	     Adaptive Thermal Throttling.  */
    389 	  return 0;
    390 	}
    391     }
    392 
    393   return 1;
    394 }
    395 
    396 /* Check whether the cpu supports BTS.  */
    397 
    398 static int
    399 cpu_supports_bts (void)
    400 {
    401   struct btrace_cpu cpu;
    402 
    403   cpu = btrace_this_cpu ();
    404   switch (cpu.vendor)
    405     {
    406     default:
    407       /* Don't know about others.  Let's assume they do.  */
    408       return 1;
    409 
    410     case CV_INTEL:
    411       return intel_supports_bts (&cpu);
    412 
    413     case CV_AMD:
    414       return 0;
    415     }
    416 }
    417 
    418 /* Return the Intel PT config bitmask from the linux sysfs for a FEATURE.
    419    The bits can be used in the perf_event configuration when enabling PT.
    420    Callers of this function are expected to check the availability of the
    421    feature first via linux_supports_pt_feature.  */
    422 
    423 static uint64_t
    424 linux_read_pt_config_bitmask (const char *feature)
    425 {
    426   uint64_t config_bitmask = 0;
    427   std::string filename
    428       = std::string ("/sys/bus/event_source/devices/intel_pt/format/")
    429       + feature;
    430 
    431   gdb_file_up file = gdb_fopen_cloexec (filename.c_str (), "r");
    432   if (file.get () == nullptr)
    433     error (_("Failed to determine config from %s."),  filename.c_str ());
    434 
    435   uint8_t start, end;
    436   int found = fscanf (file.get (), "config:%hhu-%hhu", &start, &end);
    437   if (found == 1)
    438     end = start;
    439   else if (found != 2)
    440     error (_("Failed to determine config from %s."), filename.c_str ());
    441 
    442   for (uint8_t i = start; i <= end; ++i)
    443     config_bitmask |= (1ULL << i);
    444 
    445   return config_bitmask;
    446 }
    447 
    448 /* Check whether the linux target supports the Intel PT FEATURE.  */
    449 
    450 static bool
    451 linux_supports_pt_feature (const char *feature)
    452 {
    453   std::string filename
    454     = std::string ("/sys/bus/event_source/devices/intel_pt/caps/") + feature;
    455 
    456   gdb_file_up file = gdb_fopen_cloexec (filename.c_str (), "r");
    457   if (file.get () == nullptr)
    458     return false;
    459 
    460   int status, found = fscanf (file.get (), "%d", &status);
    461   if (found != 1)
    462     {
    463       warning (_("Failed to determine %s support from %s."), feature,
    464 	       filename.c_str ());
    465       return false;
    466     }
    467 
    468   return (status == 1);
    469 }
    470 
    471 /* The perf_event_open syscall failed.  Try to print a helpful error
    472    message.  */
    473 
    474 static void
    475 diagnose_perf_event_open_fail ()
    476 {
    477   int orig_errno = errno;
    478   switch (orig_errno)
    479     {
    480     case EPERM:
    481     case EACCES:
    482       {
    483 	static const char filename[] = "/proc/sys/kernel/perf_event_paranoid";
    484 	errno = 0;
    485 	gdb_file_up file = gdb_fopen_cloexec (filename, "r");
    486 	if (file.get () == nullptr)
    487 	  error (_("Failed to open %s (%s).  Your system does not support "
    488 		   "process recording."), filename, safe_strerror (errno));
    489 
    490 	int level, found = fscanf (file.get (), "%d", &level);
    491 	if (found == 1 && level > 2)
    492 	  error (_("You do not have permission to record the process.  "
    493 		   "Try setting %s to 2 or less."), filename);
    494       }
    495 
    496       break;
    497     }
    498 
    499   error (_("Failed to start recording: %s"), safe_strerror (orig_errno));
    500 }
    501 
    502 /* Get the linux version of a btrace_target_info.  */
    503 
    504 static linux_btrace_target_info *
    505 get_linux_btrace_target_info (btrace_target_info *gtinfo)
    506 {
    507   return gdb::checked_static_cast<linux_btrace_target_info *> (gtinfo);
    508 }
    509 
    510 /* Enable branch tracing in BTS format.  */
    511 
    512 static struct btrace_target_info *
    513 linux_enable_bts (ptid_t ptid, const struct btrace_config_bts *conf)
    514 {
    515   size_t size, pages;
    516   __u64 data_offset;
    517   int pid, pg;
    518 
    519   if (!cpu_supports_bts ())
    520     error (_("BTS support has been disabled for the target cpu."));
    521 
    522   std::unique_ptr<linux_btrace_target_info> tinfo
    523     { std::make_unique<linux_btrace_target_info> (ptid) };
    524 
    525   tinfo->conf.format = BTRACE_FORMAT_BTS;
    526 
    527   tinfo->attr.size = sizeof (tinfo->attr);
    528   tinfo->attr.type = PERF_TYPE_HARDWARE;
    529   tinfo->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
    530   tinfo->attr.sample_period = 1;
    531 
    532   /* We sample from and to address.  */
    533   tinfo->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
    534 
    535   tinfo->attr.exclude_kernel = 1;
    536   tinfo->attr.exclude_hv = 1;
    537   tinfo->attr.exclude_idle = 1;
    538 
    539   pid = ptid.lwp ();
    540   if (pid == 0)
    541     pid = ptid.pid ();
    542 
    543   errno = 0;
    544   scoped_fd fd (syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0));
    545   if (fd.get () < 0)
    546     diagnose_perf_event_open_fail ();
    547 
    548   /* Convert the requested size in bytes to pages (rounding up).  */
    549   pages = ((size_t) conf->size / PAGE_SIZE
    550 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
    551   /* We need at least one page.  */
    552   if (pages == 0)
    553     pages = 1;
    554 
    555   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
    556      to the next power of two.  */
    557   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
    558     if ((pages & ((size_t) 1 << pg)) != 0)
    559       pages += ((size_t) 1 << pg);
    560 
    561   /* We try to allocate the requested size.
    562      If that fails, try to get as much as we can.  */
    563   scoped_mmap data;
    564   for (; pages > 0; pages >>= 1)
    565     {
    566       size_t length;
    567       __u64 data_size;
    568 
    569       data_size = (__u64) pages * PAGE_SIZE;
    570 
    571       /* Don't ask for more than we can represent in the configuration.  */
    572       if ((__u64) UINT_MAX < data_size)
    573 	continue;
    574 
    575       size = (size_t) data_size;
    576       length = size + PAGE_SIZE;
    577 
    578       /* Check for overflows.  */
    579       if ((__u64) length != data_size + PAGE_SIZE)
    580 	continue;
    581 
    582       errno = 0;
    583       /* The number of pages we request needs to be a power of two.  */
    584       data.reset (nullptr, length, PROT_READ, MAP_SHARED, fd.get (), 0);
    585       if (data.get () != MAP_FAILED)
    586 	break;
    587     }
    588 
    589   if (pages == 0)
    590     error (_("Failed to map trace buffer: %s."), safe_strerror (errno));
    591 
    592   struct perf_event_mmap_page *header = (struct perf_event_mmap_page *)
    593     data.get ();
    594   data_offset = PAGE_SIZE;
    595 
    596 #if defined (PERF_ATTR_SIZE_VER5)
    597   if (offsetof (struct perf_event_mmap_page, data_size) <= header->size)
    598     {
    599       __u64 data_size;
    600 
    601       data_offset = header->data_offset;
    602       data_size = header->data_size;
    603 
    604       size = (unsigned int) data_size;
    605 
    606       /* Check for overflows.  */
    607       if ((__u64) size != data_size)
    608 	error (_("Failed to determine trace buffer size."));
    609     }
    610 #endif /* defined (PERF_ATTR_SIZE_VER5) */
    611 
    612   tinfo->pev.size = size;
    613   tinfo->pev.data_head = &header->data_head;
    614   tinfo->pev.mem = (const uint8_t *) data.release () + data_offset;
    615   tinfo->pev.last_head = 0ull;
    616   tinfo->header = header;
    617   tinfo->file = fd.release ();
    618 
    619   tinfo->conf.bts.size = (unsigned int) size;
    620   return tinfo.release ();
    621 }
    622 
    623 #if defined (PERF_ATTR_SIZE_VER5)
    624 
    625 /* Determine the event type.  */
    626 
    627 static int
    628 perf_event_pt_event_type ()
    629 {
    630   static const char filename[] = "/sys/bus/event_source/devices/intel_pt/type";
    631 
    632   errno = 0;
    633   gdb_file_up file = gdb_fopen_cloexec (filename, "r");
    634   if (file.get () == nullptr)
    635     switch (errno)
    636       {
    637       case EACCES:
    638       case EFAULT:
    639       case EPERM:
    640 	error (_("Failed to open %s (%s).  You do not have permission "
    641 		 "to use Intel PT."), filename, safe_strerror (errno));
    642 
    643       case ENOTDIR:
    644       case ENOENT:
    645 	error (_("Failed to open %s (%s).  Your system does not support "
    646 		 "Intel PT."), filename, safe_strerror (errno));
    647 
    648       default:
    649 	error (_("Failed to open %s: %s."), filename, safe_strerror (errno));
    650       }
    651 
    652   int type, found = fscanf (file.get (), "%d", &type);
    653   if (found != 1)
    654     error (_("Failed to read the PT event type from %s."), filename);
    655 
    656   return type;
    657 }
    658 
    659 /* Enable branch tracing in Intel Processor Trace format.  */
    660 
    661 static struct btrace_target_info *
    662 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
    663 {
    664   size_t pages;
    665   int pid, pg;
    666 
    667   pid = ptid.lwp ();
    668   if (pid == 0)
    669     pid = ptid.pid ();
    670 
    671   std::unique_ptr<linux_btrace_target_info> tinfo
    672     { std::make_unique<linux_btrace_target_info> (ptid) };
    673 
    674   tinfo->conf.format = BTRACE_FORMAT_PT;
    675 
    676   tinfo->attr.size = sizeof (tinfo->attr);
    677   tinfo->attr.type = perf_event_pt_event_type ();
    678 
    679   tinfo->attr.exclude_kernel = 1;
    680   tinfo->attr.exclude_hv = 1;
    681   tinfo->attr.exclude_idle = 1;
    682 
    683   if (conf->ptwrite && linux_supports_pt_feature ("ptwrite"))
    684     {
    685       tinfo->attr.config |= linux_read_pt_config_bitmask ("ptw");
    686       tinfo->conf.pt.ptwrite = true;
    687     }
    688 
    689   if (conf->event_tracing)
    690     {
    691       if (linux_supports_pt_feature ("event_trace"))
    692 	{
    693 	  tinfo->attr.config |= linux_read_pt_config_bitmask ("event");
    694 	  tinfo->conf.pt.event_tracing = true;
    695 	}
    696       else
    697 	error (_("Event tracing for record btrace pt is not supported."));
    698     }
    699 
    700   errno = 0;
    701   scoped_fd fd (syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0));
    702   if (fd.get () < 0)
    703     diagnose_perf_event_open_fail ();
    704 
    705   /* Allocate the configuration page. */
    706   scoped_mmap data (nullptr, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
    707 		    fd.get (), 0);
    708   if (data.get () == MAP_FAILED)
    709     error (_("Failed to map trace user page: %s."), safe_strerror (errno));
    710 
    711   struct perf_event_mmap_page *header = (struct perf_event_mmap_page *)
    712     data.get ();
    713 
    714   header->aux_offset = header->data_offset + header->data_size;
    715 
    716   /* Convert the requested size in bytes to pages (rounding up).  */
    717   pages = ((size_t) conf->size / PAGE_SIZE
    718 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
    719   /* We need at least one page.  */
    720   if (pages == 0)
    721     pages = 1;
    722 
    723   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
    724      to the next power of two.  */
    725   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
    726     if ((pages & ((size_t) 1 << pg)) != 0)
    727       pages += ((size_t) 1 << pg);
    728 
    729   /* We try to allocate the requested size.
    730      If that fails, try to get as much as we can.  */
    731   scoped_mmap aux;
    732   for (; pages > 0; pages >>= 1)
    733     {
    734       size_t length;
    735       __u64 data_size;
    736 
    737       data_size = (__u64) pages * PAGE_SIZE;
    738 
    739       /* Don't ask for more than we can represent in the configuration.  */
    740       if ((__u64) UINT_MAX < data_size)
    741 	continue;
    742 
    743       length = (size_t) data_size;
    744 
    745       /* Check for overflows.  */
    746       if ((__u64) length != data_size)
    747 	continue;
    748 
    749       header->aux_size = data_size;
    750 
    751       errno = 0;
    752       aux.reset (nullptr, length, PROT_READ, MAP_SHARED, fd.get (),
    753 		 header->aux_offset);
    754       if (aux.get () != MAP_FAILED)
    755 	break;
    756     }
    757 
    758   if (pages == 0)
    759     error (_("Failed to map trace buffer: %s."), safe_strerror (errno));
    760 
    761   tinfo->pev.size = aux.size ();
    762   tinfo->pev.mem = (const uint8_t *) aux.release ();
    763   tinfo->pev.data_head = &header->aux_head;
    764   tinfo->header = (struct perf_event_mmap_page *) data.release ();
    765   gdb_assert (tinfo->header == header);
    766   tinfo->file = fd.release ();
    767 
    768   tinfo->conf.pt.size = (unsigned int) tinfo->pev.size;
    769   return tinfo.release ();
    770 }
    771 
    772 #else /* !defined (PERF_ATTR_SIZE_VER5) */
    773 
    774 static struct btrace_target_info *
    775 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
    776 {
    777   error (_("Intel Processor Trace support was disabled at compile time."));
    778 }
    779 
    780 #endif /* !defined (PERF_ATTR_SIZE_VER5) */
    781 
    782 /* See linux-btrace.h.  */
    783 
    784 struct btrace_target_info *
    785 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
    786 {
    787   switch (conf->format)
    788     {
    789     case BTRACE_FORMAT_NONE:
    790       error (_("Bad branch trace format."));
    791 
    792     default:
    793       error (_("Unknown branch trace format."));
    794 
    795     case BTRACE_FORMAT_BTS:
    796       return linux_enable_bts (ptid, &conf->bts);
    797 
    798     case BTRACE_FORMAT_PT:
    799       return linux_enable_pt (ptid, &conf->pt);
    800     }
    801 }
    802 
    803 /* Disable BTS tracing.  */
    804 
    805 static void
    806 linux_disable_bts (struct linux_btrace_target_info *tinfo)
    807 {
    808   munmap ((void *) tinfo->header, tinfo->pev.size + PAGE_SIZE);
    809   close (tinfo->file);
    810 }
    811 
    812 /* Disable Intel Processor Trace tracing.  */
    813 
    814 static void
    815 linux_disable_pt (struct linux_btrace_target_info *tinfo)
    816 {
    817   munmap ((void *) tinfo->pev.mem, tinfo->pev.size);
    818   munmap ((void *) tinfo->header, PAGE_SIZE);
    819   close (tinfo->file);
    820 }
    821 
    822 /* See linux-btrace.h.  */
    823 
    824 enum btrace_error
    825 linux_disable_btrace (struct btrace_target_info *gtinfo)
    826 {
    827   linux_btrace_target_info *tinfo
    828     = get_linux_btrace_target_info (gtinfo);
    829 
    830   switch (tinfo->conf.format)
    831     {
    832     case BTRACE_FORMAT_NONE:
    833       return BTRACE_ERR_NOT_SUPPORTED;
    834 
    835     case BTRACE_FORMAT_BTS:
    836       linux_disable_bts (tinfo);
    837       delete tinfo;
    838       return BTRACE_ERR_NONE;
    839 
    840     case BTRACE_FORMAT_PT:
    841       linux_disable_pt (tinfo);
    842       delete tinfo;
    843       return BTRACE_ERR_NONE;
    844     }
    845 
    846   return BTRACE_ERR_NOT_SUPPORTED;
    847 }
    848 
    849 /* Read branch trace data in BTS format for the thread given by TINFO into
    850    BTRACE using the TYPE reading method.  */
    851 
    852 static enum btrace_error
    853 linux_read_bts (btrace_data_bts *btrace, linux_btrace_target_info *tinfo,
    854 		enum btrace_read_type type)
    855 {
    856   const uint8_t *begin, *end, *start;
    857   size_t buffer_size, size;
    858   __u64 data_head = 0, data_tail;
    859   unsigned int retries = 5;
    860 
    861   /* For delta reads, we return at least the partial last block containing
    862      the current PC.  */
    863   if (type == BTRACE_READ_NEW && !perf_event_new_data (&tinfo->pev))
    864     return BTRACE_ERR_NONE;
    865 
    866   buffer_size = tinfo->pev.size;
    867   data_tail = tinfo->pev.last_head;
    868 
    869   /* We may need to retry reading the trace.  See below.  */
    870   while (retries--)
    871     {
    872       data_head = *tinfo->pev.data_head;
    873 
    874       /* Delete any leftover trace from the previous iteration.  */
    875       delete btrace->blocks;
    876       btrace->blocks = nullptr;
    877 
    878       if (type == BTRACE_READ_DELTA)
    879 	{
    880 	  __u64 data_size;
    881 
    882 	  /* Determine the number of bytes to read and check for buffer
    883 	     overflows.  */
    884 
    885 	  /* Check for data head overflows.  We might be able to recover from
    886 	     those but they are very unlikely and it's not really worth the
    887 	     effort, I think.  */
    888 	  if (data_head < data_tail)
    889 	    return BTRACE_ERR_OVERFLOW;
    890 
    891 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
    892 	  data_size = data_head - data_tail;
    893 	  if (buffer_size < data_size)
    894 	    return BTRACE_ERR_OVERFLOW;
    895 
    896 	  /* DATA_SIZE <= BUFFER_SIZE and therefore fits into a size_t.  */
    897 	  size = (size_t) data_size;
    898 	}
    899       else
    900 	{
    901 	  /* Read the entire buffer.  */
    902 	  size = buffer_size;
    903 
    904 	  /* Adjust the size if the buffer has not overflowed, yet.  */
    905 	  if (data_head < size)
    906 	    size = (size_t) data_head;
    907 	}
    908 
    909       /* Data_head keeps growing; the buffer itself is circular.  */
    910       begin = tinfo->pev.mem;
    911       start = begin + data_head % buffer_size;
    912 
    913       if (data_head <= buffer_size)
    914 	end = start;
    915       else
    916 	end = begin + tinfo->pev.size;
    917 
    918       btrace->blocks = perf_event_read_bts (tinfo, begin, end, start, size);
    919 
    920       /* The stopping thread notifies its ptracer before it is scheduled out.
    921 	 On multi-core systems, the debugger might therefore run while the
    922 	 kernel might be writing the last branch trace records.
    923 
    924 	 Let's check whether the data head moved while we read the trace.  */
    925       if (data_head == *tinfo->pev.data_head)
    926 	break;
    927     }
    928 
    929   tinfo->pev.last_head = data_head;
    930 
    931   /* Prune the incomplete last block (i.e. the first one of inferior execution)
    932      if we're not doing a delta read.  There is no way of filling in its zeroed
    933      BEGIN element.  */
    934   if (!btrace->blocks->empty () && type != BTRACE_READ_DELTA)
    935     btrace->blocks->pop_back ();
    936 
    937   return BTRACE_ERR_NONE;
    938 }
    939 
    940 /* Fill in the Intel Processor Trace configuration information.  */
    941 
    942 static void
    943 linux_fill_btrace_pt_config (struct btrace_data_pt_config *conf)
    944 {
    945   conf->cpu = btrace_this_cpu ();
    946 }
    947 
    948 /* Read branch trace data in Intel Processor Trace format for the thread
    949    given by TINFO into BTRACE using the TYPE reading method.  */
    950 
    951 static enum btrace_error
    952 linux_read_pt (btrace_data_pt *btrace, linux_btrace_target_info *tinfo,
    953 	       enum btrace_read_type type)
    954 {
    955   linux_fill_btrace_pt_config (&btrace->config);
    956 
    957   switch (type)
    958     {
    959     case BTRACE_READ_DELTA:
    960       /* We don't support delta reads.  The data head (i.e. aux_head) wraps
    961 	 around to stay inside the aux buffer.  */
    962       return BTRACE_ERR_NOT_SUPPORTED;
    963 
    964     case BTRACE_READ_NEW:
    965       if (!perf_event_new_data (&tinfo->pev))
    966 	return BTRACE_ERR_NONE;
    967       [[fallthrough]];
    968     case BTRACE_READ_ALL:
    969       perf_event_read_all (&tinfo->pev, &btrace->data, &btrace->size);
    970       return BTRACE_ERR_NONE;
    971     }
    972 
    973   internal_error (_("Unknown btrace read type."));
    974 }
    975 
    976 /* See linux-btrace.h.  */
    977 
    978 enum btrace_error
    979 linux_read_btrace (struct btrace_data *btrace,
    980 		   struct btrace_target_info *gtinfo,
    981 		   enum btrace_read_type type)
    982 {
    983   linux_btrace_target_info *tinfo
    984     = get_linux_btrace_target_info (gtinfo);
    985 
    986   switch (tinfo->conf.format)
    987     {
    988     case BTRACE_FORMAT_NONE:
    989       return BTRACE_ERR_NOT_SUPPORTED;
    990 
    991     case BTRACE_FORMAT_BTS:
    992       /* We read btrace in BTS format.  */
    993       btrace->format = BTRACE_FORMAT_BTS;
    994       btrace->variant.bts.blocks = NULL;
    995 
    996       return linux_read_bts (&btrace->variant.bts, tinfo, type);
    997 
    998     case BTRACE_FORMAT_PT:
    999       /* We read btrace in Intel Processor Trace format.  */
   1000       btrace->format = BTRACE_FORMAT_PT;
   1001       btrace->variant.pt.data = NULL;
   1002       btrace->variant.pt.size = 0;
   1003 
   1004       return linux_read_pt (&btrace->variant.pt, tinfo, type);
   1005     }
   1006 
   1007   internal_error (_("Unknown branch trace format."));
   1008 }
   1009 
   1010 /* See linux-btrace.h.  */
   1011 
   1012 const struct btrace_config *
   1013 linux_btrace_conf (const struct btrace_target_info *tinfo)
   1014 {
   1015   return &tinfo->conf;
   1016 }
   1017 
   1018 #else /* !HAVE_LINUX_PERF_EVENT_H */
   1019 
   1020 /* See linux-btrace.h.  */
   1021 
   1022 struct btrace_target_info *
   1023 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
   1024 {
   1025   return NULL;
   1026 }
   1027 
   1028 /* See linux-btrace.h.  */
   1029 
   1030 enum btrace_error
   1031 linux_disable_btrace (struct btrace_target_info *tinfo)
   1032 {
   1033   return BTRACE_ERR_NOT_SUPPORTED;
   1034 }
   1035 
   1036 /* See linux-btrace.h.  */
   1037 
   1038 enum btrace_error
   1039 linux_read_btrace (struct btrace_data *btrace,
   1040 		   struct btrace_target_info *tinfo,
   1041 		   enum btrace_read_type type)
   1042 {
   1043   return BTRACE_ERR_NOT_SUPPORTED;
   1044 }
   1045 
   1046 /* See linux-btrace.h.  */
   1047 
   1048 const struct btrace_config *
   1049 linux_btrace_conf (const struct btrace_target_info *tinfo)
   1050 {
   1051   return NULL;
   1052 }
   1053 
   1054 #endif /* !HAVE_LINUX_PERF_EVENT_H */
   1055