Home | History | Annotate | Line # | Download | only in nat
linux-btrace.c revision 1.1.1.2
      1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
      2 
      3    Copyright (C) 2013-2015 Free Software Foundation, Inc.
      4 
      5    Contributed by Intel Corp. <markus.t.metzger (at) intel.com>
      6 
      7    This file is part of GDB.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 3 of the License, or
     12    (at your option) any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     21 
     22 #include "common-defs.h"
     23 #include "linux-btrace.h"
     24 #include "common-regcache.h"
     25 #include "gdb_wait.h"
     26 #include "x86-cpuid.h"
     27 
     28 #ifdef HAVE_SYS_SYSCALL_H
     29 #include <sys/syscall.h>
     30 #endif
     31 
     32 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
     33 #include <unistd.h>
     34 #include <sys/mman.h>
     35 #include <sys/user.h>
     36 #include <sys/ptrace.h>
     37 #include <sys/types.h>
     38 #include <signal.h>
     39 #include <sys/utsname.h>
     40 
     41 /* A branch trace record in perf_event.  */
     42 struct perf_event_bts
     43 {
     44   /* The linear address of the branch source.  */
     45   uint64_t from;
     46 
     47   /* The linear address of the branch destination.  */
     48   uint64_t to;
     49 };
     50 
     51 /* A perf_event branch trace sample.  */
     52 struct perf_event_sample
     53 {
     54   /* The perf_event sample header.  */
     55   struct perf_event_header header;
     56 
     57   /* The perf_event branch tracing payload.  */
     58   struct perf_event_bts bts;
     59 };
     60 
     61 /* Identify the cpu we're running on.  */
     62 static struct btrace_cpu
     63 btrace_this_cpu (void)
     64 {
     65   struct btrace_cpu cpu;
     66   unsigned int eax, ebx, ecx, edx;
     67   int ok;
     68 
     69   memset (&cpu, 0, sizeof (cpu));
     70 
     71   ok = x86_cpuid (0, &eax, &ebx, &ecx, &edx);
     72   if (ok != 0)
     73     {
     74       if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
     75 	  && edx == signature_INTEL_edx)
     76 	{
     77 	  unsigned int cpuid, ignore;
     78 
     79 	  ok = x86_cpuid (1, &cpuid, &ignore, &ignore, &ignore);
     80 	  if (ok != 0)
     81 	    {
     82 	      cpu.vendor = CV_INTEL;
     83 
     84 	      cpu.family = (cpuid >> 8) & 0xf;
     85 	      cpu.model = (cpuid >> 4) & 0xf;
     86 
     87 	      if (cpu.family == 0x6)
     88 		cpu.model += (cpuid >> 12) & 0xf0;
     89 	    }
     90 	}
     91     }
     92 
     93   return cpu;
     94 }
     95 
     96 /* Return non-zero if there is new data in PEVENT; zero otherwise.  */
     97 
     98 static int
     99 perf_event_new_data (const struct perf_event_buffer *pev)
    100 {
    101   return *pev->data_head != pev->last_head;
    102 }
    103 
    104 /* Try to determine the size of a pointer in bits for the OS.
    105 
    106    This is the same as the size of a pointer for the inferior process
    107    except when a 32-bit inferior is running on a 64-bit OS.  */
    108 
    109 /* Copy the last SIZE bytes from PEV ending at DATA_HEAD and return a pointer
    110    to the memory holding the copy.
    111    The caller is responsible for freeing the memory.  */
    112 
    113 static gdb_byte *
    114 perf_event_read (const struct perf_event_buffer *pev, unsigned long data_head,
    115 		 unsigned long size)
    116 {
    117   const gdb_byte *begin, *end, *start, *stop;
    118   gdb_byte *buffer;
    119   unsigned long data_tail, buffer_size;
    120 
    121   if (size == 0)
    122     return NULL;
    123 
    124   gdb_assert (size <= data_head);
    125   data_tail = data_head - size;
    126 
    127   buffer_size = pev->size;
    128   begin = pev->mem;
    129   start = begin + data_tail % buffer_size;
    130   stop = begin + data_head % buffer_size;
    131 
    132   buffer = xmalloc (size);
    133 
    134   if (start < stop)
    135     memcpy (buffer, start, stop - start);
    136   else
    137     {
    138       end = begin + buffer_size;
    139 
    140       memcpy (buffer, start, end - start);
    141       memcpy (buffer + (end - start), begin, stop - begin);
    142     }
    143 
    144   return buffer;
    145 }
    146 
    147 /* Copy the perf event buffer data from PEV.
    148    Store a pointer to the copy into DATA and its size in SIZE.  */
    149 
    150 static void
    151 perf_event_read_all (struct perf_event_buffer *pev, gdb_byte **data,
    152 		     unsigned long *psize)
    153 {
    154   unsigned long data_head, size;
    155 
    156   data_head = *pev->data_head;
    157 
    158   size = pev->size;
    159   if (data_head < size)
    160     size = data_head;
    161 
    162   *data = perf_event_read (pev, data_head, size);
    163   *psize = size;
    164 
    165   pev->last_head = data_head;
    166 }
    167 
    168 /* Determine the event type.
    169    Returns zero on success and fills in TYPE; returns -1 otherwise.  */
    170 
    171 static int
    172 perf_event_pt_event_type (int *type)
    173 {
    174   FILE *file;
    175   int found;
    176 
    177   file = fopen ("/sys/bus/event_source/devices/intel_pt/type", "r");
    178   if (file == NULL)
    179     return -1;
    180 
    181   found = fscanf (file, "%d", type);
    182 
    183   fclose (file);
    184 
    185   if (found == 1)
    186     return 0;
    187   return -1;
    188 }
    189 
    190 static int
    191 linux_determine_kernel_ptr_bits (void)
    192 {
    193   struct utsname utsn;
    194   int errcode;
    195 
    196   memset (&utsn, 0, sizeof (utsn));
    197 
    198   errcode = uname (&utsn);
    199   if (errcode < 0)
    200     return 0;
    201 
    202   /* We only need to handle the 64-bit host case, here.  For 32-bit host,
    203      the pointer size can be filled in later based on the inferior.  */
    204   if (strcmp (utsn.machine, "x86_64") == 0)
    205     return 64;
    206 
    207   return 0;
    208 }
    209 
    210 /* Check whether an address is in the kernel.  */
    211 
    212 static inline int
    213 perf_event_is_kernel_addr (const struct btrace_target_info *tinfo,
    214 			   uint64_t addr)
    215 {
    216   uint64_t mask;
    217 
    218   /* If we don't know the size of a pointer, we can't check.  Let's assume it's
    219      not a kernel address in this case.  */
    220   if (tinfo->ptr_bits == 0)
    221     return 0;
    222 
    223   /* A bit mask for the most significant bit in an address.  */
    224   mask = (uint64_t) 1 << (tinfo->ptr_bits - 1);
    225 
    226   /* Check whether the most significant bit in the address is set.  */
    227   return (addr & mask) != 0;
    228 }
    229 
    230 /* Check whether a perf event record should be skipped.  */
    231 
    232 static inline int
    233 perf_event_skip_bts_record (const struct btrace_target_info *tinfo,
    234 			    const struct perf_event_bts *bts)
    235 {
    236   /* The hardware may report branches from kernel into user space.  Branches
    237      from user into kernel space will be suppressed.  We filter the former to
    238      provide a consistent branch trace excluding kernel.  */
    239   return perf_event_is_kernel_addr (tinfo, bts->from);
    240 }
    241 
    242 /* Perform a few consistency checks on a perf event sample record.  This is
    243    meant to catch cases when we get out of sync with the perf event stream.  */
    244 
    245 static inline int
    246 perf_event_sample_ok (const struct perf_event_sample *sample)
    247 {
    248   if (sample->header.type != PERF_RECORD_SAMPLE)
    249     return 0;
    250 
    251   if (sample->header.size != sizeof (*sample))
    252     return 0;
    253 
    254   return 1;
    255 }
    256 
    257 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
    258    and to addresses (plus a header).
    259 
    260    Start points into that buffer at the next sample position.
    261    We read the collected samples backwards from start.
    262 
    263    While reading the samples, we convert the information into a list of blocks.
    264    For two adjacent samples s1 and s2, we form a block b such that b.begin =
    265    s1.to and b.end = s2.from.
    266 
    267    In case the buffer overflows during sampling, one sample may have its lower
    268    part at the end and its upper part at the beginning of the buffer.  */
    269 
    270 static VEC (btrace_block_s) *
    271 perf_event_read_bts (struct btrace_target_info* tinfo, const uint8_t *begin,
    272 		     const uint8_t *end, const uint8_t *start,
    273 		     unsigned long long size)
    274 {
    275   VEC (btrace_block_s) *btrace = NULL;
    276   struct perf_event_sample sample;
    277   unsigned long long read = 0;
    278   struct btrace_block block = { 0, 0 };
    279   struct regcache *regcache;
    280 
    281   gdb_assert (begin <= start);
    282   gdb_assert (start <= end);
    283 
    284   /* The first block ends at the current pc.  */
    285   regcache = get_thread_regcache_for_ptid (tinfo->ptid);
    286   block.end = regcache_read_pc (regcache);
    287 
    288   /* The buffer may contain a partial record as its last entry (i.e. when the
    289      buffer size is not a multiple of the sample size).  */
    290   read = sizeof (sample) - 1;
    291 
    292   for (; read < size; read += sizeof (sample))
    293     {
    294       const struct perf_event_sample *psample;
    295 
    296       /* Find the next perf_event sample in a backwards traversal.  */
    297       start -= sizeof (sample);
    298 
    299       /* If we're still inside the buffer, we're done.  */
    300       if (begin <= start)
    301 	psample = (const struct perf_event_sample *) start;
    302       else
    303 	{
    304 	  int missing;
    305 
    306 	  /* We're to the left of the ring buffer, we will wrap around and
    307 	     reappear at the very right of the ring buffer.  */
    308 
    309 	  missing = (begin - start);
    310 	  start = (end - missing);
    311 
    312 	  /* If the entire sample is missing, we're done.  */
    313 	  if (missing == sizeof (sample))
    314 	    psample = (const struct perf_event_sample *) start;
    315 	  else
    316 	    {
    317 	      uint8_t *stack;
    318 
    319 	      /* The sample wrapped around.  The lower part is at the end and
    320 		 the upper part is at the beginning of the buffer.  */
    321 	      stack = (uint8_t *) &sample;
    322 
    323 	      /* Copy the two parts so we have a contiguous sample.  */
    324 	      memcpy (stack, start, missing);
    325 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
    326 
    327 	      psample = &sample;
    328 	    }
    329 	}
    330 
    331       if (!perf_event_sample_ok (psample))
    332 	{
    333 	  warning (_("Branch trace may be incomplete."));
    334 	  break;
    335 	}
    336 
    337       if (perf_event_skip_bts_record (tinfo, &psample->bts))
    338 	continue;
    339 
    340       /* We found a valid sample, so we can complete the current block.  */
    341       block.begin = psample->bts.to;
    342 
    343       VEC_safe_push (btrace_block_s, btrace, &block);
    344 
    345       /* Start the next block.  */
    346       block.end = psample->bts.from;
    347     }
    348 
    349   /* Push the last block (i.e. the first one of inferior execution), as well.
    350      We don't know where it ends, but we know where it starts.  If we're
    351      reading delta trace, we can fill in the start address later on.
    352      Otherwise we will prune it.  */
    353   block.begin = 0;
    354   VEC_safe_push (btrace_block_s, btrace, &block);
    355 
    356   return btrace;
    357 }
    358 
    359 /* Check whether the kernel supports BTS.  */
    360 
    361 static int
    362 kernel_supports_bts (void)
    363 {
    364   struct perf_event_attr attr;
    365   pid_t child, pid;
    366   int status, file;
    367 
    368   errno = 0;
    369   child = fork ();
    370   switch (child)
    371     {
    372     case -1:
    373       warning (_("test bts: cannot fork: %s."), strerror (errno));
    374       return 0;
    375 
    376     case 0:
    377       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
    378       if (status != 0)
    379 	{
    380 	  warning (_("test bts: cannot PTRACE_TRACEME: %s."),
    381 		   strerror (errno));
    382 	  _exit (1);
    383 	}
    384 
    385       status = raise (SIGTRAP);
    386       if (status != 0)
    387 	{
    388 	  warning (_("test bts: cannot raise SIGTRAP: %s."),
    389 		   strerror (errno));
    390 	  _exit (1);
    391 	}
    392 
    393       _exit (1);
    394 
    395     default:
    396       pid = waitpid (child, &status, 0);
    397       if (pid != child)
    398 	{
    399 	  warning (_("test bts: bad pid %ld, error: %s."),
    400 		   (long) pid, strerror (errno));
    401 	  return 0;
    402 	}
    403 
    404       if (!WIFSTOPPED (status))
    405 	{
    406 	  warning (_("test bts: expected stop. status: %d."),
    407 		   status);
    408 	  return 0;
    409 	}
    410 
    411       memset (&attr, 0, sizeof (attr));
    412 
    413       attr.type = PERF_TYPE_HARDWARE;
    414       attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
    415       attr.sample_period = 1;
    416       attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
    417       attr.exclude_kernel = 1;
    418       attr.exclude_hv = 1;
    419       attr.exclude_idle = 1;
    420 
    421       file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
    422       if (file >= 0)
    423 	close (file);
    424 
    425       kill (child, SIGKILL);
    426       ptrace (PTRACE_KILL, child, NULL, NULL);
    427 
    428       pid = waitpid (child, &status, 0);
    429       if (pid != child)
    430 	{
    431 	  warning (_("test bts: bad pid %ld, error: %s."),
    432 		   (long) pid, strerror (errno));
    433 	  if (!WIFSIGNALED (status))
    434 	    warning (_("test bts: expected killed. status: %d."),
    435 		     status);
    436 	}
    437 
    438       return (file >= 0);
    439     }
    440 }
    441 
    442 /* Check whether the kernel supports Intel(R) Processor Trace.  */
    443 
    444 static int
    445 kernel_supports_pt (void)
    446 {
    447   struct perf_event_attr attr;
    448   pid_t child, pid;
    449   int status, file, type;
    450 
    451   errno = 0;
    452   child = fork ();
    453   switch (child)
    454     {
    455     case -1:
    456       warning (_("test pt: cannot fork: %s."), strerror (errno));
    457       return 0;
    458 
    459     case 0:
    460       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
    461       if (status != 0)
    462 	{
    463 	  warning (_("test pt: cannot PTRACE_TRACEME: %s."),
    464 		   strerror (errno));
    465 	  _exit (1);
    466 	}
    467 
    468       status = raise (SIGTRAP);
    469       if (status != 0)
    470 	{
    471 	  warning (_("test pt: cannot raise SIGTRAP: %s."),
    472 		   strerror (errno));
    473 	  _exit (1);
    474 	}
    475 
    476       _exit (1);
    477 
    478     default:
    479       pid = waitpid (child, &status, 0);
    480       if (pid != child)
    481 	{
    482 	  warning (_("test pt: bad pid %ld, error: %s."),
    483 		   (long) pid, strerror (errno));
    484 	  return 0;
    485 	}
    486 
    487       if (!WIFSTOPPED (status))
    488 	{
    489 	  warning (_("test pt: expected stop. status: %d."),
    490 		   status);
    491 	  return 0;
    492 	}
    493 
    494       status = perf_event_pt_event_type (&type);
    495       if (status != 0)
    496 	file = -1;
    497       else
    498 	{
    499 	  memset (&attr, 0, sizeof (attr));
    500 
    501 	  attr.size = sizeof (attr);
    502 	  attr.type = type;
    503 	  attr.exclude_kernel = 1;
    504 	  attr.exclude_hv = 1;
    505 	  attr.exclude_idle = 1;
    506 
    507 	  file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
    508 	  if (file >= 0)
    509 	    close (file);
    510 	}
    511 
    512       kill (child, SIGKILL);
    513       ptrace (PTRACE_KILL, child, NULL, NULL);
    514 
    515       pid = waitpid (child, &status, 0);
    516       if (pid != child)
    517 	{
    518 	  warning (_("test pt: bad pid %ld, error: %s."),
    519 		   (long) pid, strerror (errno));
    520 	  if (!WIFSIGNALED (status))
    521 	    warning (_("test pt: expected killed. status: %d."),
    522 		     status);
    523 	}
    524 
    525       return (file >= 0);
    526     }
    527 }
    528 
    529 /* Check whether an Intel cpu supports BTS.  */
    530 
    531 static int
    532 intel_supports_bts (const struct btrace_cpu *cpu)
    533 {
    534   switch (cpu->family)
    535     {
    536     case 0x6:
    537       switch (cpu->model)
    538 	{
    539 	case 0x1a: /* Nehalem */
    540 	case 0x1f:
    541 	case 0x1e:
    542 	case 0x2e:
    543 	case 0x25: /* Westmere */
    544 	case 0x2c:
    545 	case 0x2f:
    546 	case 0x2a: /* Sandy Bridge */
    547 	case 0x2d:
    548 	case 0x3a: /* Ivy Bridge */
    549 
    550 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
    551 	     "from" information afer an EIST transition, T-states, C1E, or
    552 	     Adaptive Thermal Throttling.  */
    553 	  return 0;
    554 	}
    555     }
    556 
    557   return 1;
    558 }
    559 
    560 /* Check whether the cpu supports BTS.  */
    561 
    562 static int
    563 cpu_supports_bts (void)
    564 {
    565   struct btrace_cpu cpu;
    566 
    567   cpu = btrace_this_cpu ();
    568   switch (cpu.vendor)
    569     {
    570     default:
    571       /* Don't know about others.  Let's assume they do.  */
    572       return 1;
    573 
    574     case CV_INTEL:
    575       return intel_supports_bts (&cpu);
    576     }
    577 }
    578 
    579 /* Check whether the linux target supports BTS.  */
    580 
    581 static int
    582 linux_supports_bts (void)
    583 {
    584   static int cached;
    585 
    586   if (cached == 0)
    587     {
    588       if (!kernel_supports_bts ())
    589 	cached = -1;
    590       else if (!cpu_supports_bts ())
    591 	cached = -1;
    592       else
    593 	cached = 1;
    594     }
    595 
    596   return cached > 0;
    597 }
    598 
    599 /* Check whether the linux target supports Intel(R) Processor Trace.  */
    600 
    601 static int
    602 linux_supports_pt (void)
    603 {
    604   static int cached;
    605 
    606   if (cached == 0)
    607     {
    608       if (!kernel_supports_pt ())
    609 	cached = -1;
    610       else
    611 	cached = 1;
    612     }
    613 
    614   return cached > 0;
    615 }
    616 
    617 /* See linux-btrace.h.  */
    618 
    619 int
    620 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
    621 {
    622   switch (format)
    623     {
    624     case BTRACE_FORMAT_NONE:
    625       return 0;
    626 
    627     case BTRACE_FORMAT_BTS:
    628       return linux_supports_bts ();
    629 
    630     case BTRACE_FORMAT_PT:
    631       return linux_supports_pt ();
    632     }
    633 
    634   internal_error (__FILE__, __LINE__, _("Unknown branch trace format"));
    635 }
    636 
    637 /* Enable branch tracing in BTS format.  */
    638 
    639 static struct btrace_target_info *
    640 linux_enable_bts (ptid_t ptid, const struct btrace_config_bts *conf)
    641 {
    642   struct perf_event_mmap_page *header;
    643   struct btrace_target_info *tinfo;
    644   struct btrace_tinfo_bts *bts;
    645   unsigned long long size, pages, data_offset, data_size;
    646   int pid, pg;
    647 
    648   tinfo = xzalloc (sizeof (*tinfo));
    649   tinfo->ptid = ptid;
    650   tinfo->ptr_bits = linux_determine_kernel_ptr_bits ();
    651 
    652   tinfo->conf.format = BTRACE_FORMAT_BTS;
    653   bts = &tinfo->variant.bts;
    654 
    655   bts->attr.size = sizeof (bts->attr);
    656   bts->attr.type = PERF_TYPE_HARDWARE;
    657   bts->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
    658   bts->attr.sample_period = 1;
    659 
    660   /* We sample from and to address.  */
    661   bts->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
    662 
    663   bts->attr.exclude_kernel = 1;
    664   bts->attr.exclude_hv = 1;
    665   bts->attr.exclude_idle = 1;
    666 
    667   pid = ptid_get_lwp (ptid);
    668   if (pid == 0)
    669     pid = ptid_get_pid (ptid);
    670 
    671   errno = 0;
    672   bts->file = syscall (SYS_perf_event_open, &bts->attr, pid, -1, -1, 0);
    673   if (bts->file < 0)
    674     goto err_out;
    675 
    676   /* Convert the requested size in bytes to pages (rounding up).  */
    677   pages = (((unsigned long long) conf->size) + PAGE_SIZE - 1) / PAGE_SIZE;
    678   /* We need at least one page.  */
    679   if (pages == 0)
    680     pages = 1;
    681 
    682   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
    683      to the next power of two.  */
    684   for (pg = 0; pages != (1u << pg); ++pg)
    685     if ((pages & (1u << pg)) != 0)
    686       pages += (1u << pg);
    687 
    688   /* We try to allocate the requested size.
    689      If that fails, try to get as much as we can.  */
    690   for (; pages > 0; pages >>= 1)
    691     {
    692       size_t length;
    693 
    694       size = pages * PAGE_SIZE;
    695       length = size + PAGE_SIZE;
    696 
    697       /* Check for overflows.  */
    698       if ((unsigned long long) length < size)
    699 	continue;
    700 
    701       /* The number of pages we request needs to be a power of two.  */
    702       header = mmap (NULL, length, PROT_READ, MAP_SHARED, bts->file, 0);
    703       if (header != MAP_FAILED)
    704 	break;
    705     }
    706 
    707   if (pages == 0)
    708     goto err_file;
    709 
    710   data_offset = PAGE_SIZE;
    711   data_size = size;
    712 
    713 #if defined (PERF_ATTR_SIZE_VER5)
    714   if (offsetof (struct perf_event_mmap_page, data_size) <= header->size)
    715     {
    716       data_offset = header->data_offset;
    717       data_size = header->data_size;
    718     }
    719 #endif /* defined (PERF_ATTR_SIZE_VER5) */
    720 
    721   bts->header = header;
    722   bts->bts.mem = ((const uint8_t *) header) + data_offset;
    723   bts->bts.size = data_size;
    724   bts->bts.data_head = &header->data_head;
    725   bts->bts.last_head = 0;
    726 
    727   tinfo->conf.bts.size = data_size;
    728   return tinfo;
    729 
    730  err_file:
    731   /* We were not able to allocate any buffer.  */
    732   close (bts->file);
    733 
    734  err_out:
    735   xfree (tinfo);
    736   return NULL;
    737 }
    738 
    739 #if defined (PERF_ATTR_SIZE_VER5)
    740 
    741 /* Enable branch tracing in Intel(R) Processor Trace format.  */
    742 
    743 static struct btrace_target_info *
    744 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
    745 {
    746   struct perf_event_mmap_page *header;
    747   struct btrace_target_info *tinfo;
    748   struct btrace_tinfo_pt *pt;
    749   unsigned long long pages, size;
    750   int pid, pg, errcode, type;
    751 
    752   if (conf->size == 0)
    753     return NULL;
    754 
    755   errcode = perf_event_pt_event_type (&type);
    756   if (errcode != 0)
    757     return NULL;
    758 
    759   pid = ptid_get_lwp (ptid);
    760   if (pid == 0)
    761     pid = ptid_get_pid (ptid);
    762 
    763   tinfo = xzalloc (sizeof (*tinfo));
    764   tinfo->ptid = ptid;
    765   tinfo->ptr_bits = 0;
    766 
    767   tinfo->conf.format = BTRACE_FORMAT_PT;
    768   pt = &tinfo->variant.pt;
    769 
    770   pt->attr.size = sizeof (pt->attr);
    771   pt->attr.type = type;
    772 
    773   pt->attr.exclude_kernel = 1;
    774   pt->attr.exclude_hv = 1;
    775   pt->attr.exclude_idle = 1;
    776 
    777   errno = 0;
    778   pt->file = syscall (SYS_perf_event_open, &pt->attr, pid, -1, -1, 0);
    779   if (pt->file < 0)
    780     goto err;
    781 
    782   /* Allocate the configuration page. */
    783   header = mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
    784 		 pt->file, 0);
    785   if (header == MAP_FAILED)
    786     goto err_file;
    787 
    788   header->aux_offset = header->data_offset + header->data_size;
    789 
    790   /* Convert the requested size in bytes to pages (rounding up).  */
    791   pages = (((unsigned long long) conf->size) + PAGE_SIZE - 1) / PAGE_SIZE;
    792   /* We need at least one page.  */
    793   if (pages == 0)
    794     pages = 1;
    795 
    796   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
    797      to the next power of two.  */
    798   for (pg = 0; pages != (1u << pg); ++pg)
    799     if ((pages & (1u << pg)) != 0)
    800       pages += (1u << pg);
    801 
    802   /* We try to allocate the requested size.
    803      If that fails, try to get as much as we can.  */
    804   for (; pages > 0; pages >>= 1)
    805     {
    806       size_t length;
    807 
    808       size = pages * PAGE_SIZE;
    809       length = size;
    810 
    811       /* Check for overflows.  */
    812       if ((unsigned long long) length < size)
    813 	continue;
    814 
    815       header->aux_size = size;
    816 
    817       pt->pt.mem = mmap (NULL, length, PROT_READ, MAP_SHARED, pt->file,
    818 			 header->aux_offset);
    819       if (pt->pt.mem != MAP_FAILED)
    820 	break;
    821     }
    822 
    823   if (pages == 0)
    824     goto err_conf;
    825 
    826   pt->header = header;
    827   pt->pt.size = size;
    828   pt->pt.data_head = &header->aux_head;
    829 
    830   tinfo->conf.pt.size = size;
    831   return tinfo;
    832 
    833  err_conf:
    834   munmap((void *) header, PAGE_SIZE);
    835 
    836  err_file:
    837   close (pt->file);
    838 
    839  err:
    840   xfree (tinfo);
    841   return NULL;
    842 }
    843 
    844 #else /* !defined (PERF_ATTR_SIZE_VER5) */
    845 
    846 static struct btrace_target_info *
    847 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
    848 {
    849   errno = EOPNOTSUPP;
    850   return NULL;
    851 }
    852 
    853 #endif /* !defined (PERF_ATTR_SIZE_VER5) */
    854 
    855 /* See linux-btrace.h.  */
    856 
    857 struct btrace_target_info *
    858 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
    859 {
    860   struct btrace_target_info *tinfo;
    861 
    862   tinfo = NULL;
    863   switch (conf->format)
    864     {
    865     case BTRACE_FORMAT_NONE:
    866       break;
    867 
    868     case BTRACE_FORMAT_BTS:
    869       tinfo = linux_enable_bts (ptid, &conf->bts);
    870       break;
    871 
    872     case BTRACE_FORMAT_PT:
    873       tinfo = linux_enable_pt (ptid, &conf->pt);
    874       break;
    875     }
    876 
    877   return tinfo;
    878 }
    879 
    880 /* Disable BTS tracing.  */
    881 
    882 static enum btrace_error
    883 linux_disable_bts (struct btrace_tinfo_bts *tinfo)
    884 {
    885   munmap((void *) tinfo->header, tinfo->bts.size + PAGE_SIZE);
    886   close (tinfo->file);
    887 
    888   return BTRACE_ERR_NONE;
    889 }
    890 
    891 /* Disable Intel(R) Processor Trace tracing.  */
    892 
    893 static enum btrace_error
    894 linux_disable_pt (struct btrace_tinfo_pt *tinfo)
    895 {
    896   munmap((void *) tinfo->pt.mem, tinfo->pt.size);
    897   munmap((void *) tinfo->header, PAGE_SIZE);
    898   close (tinfo->file);
    899 
    900   return BTRACE_ERR_NONE;
    901 }
    902 
    903 /* See linux-btrace.h.  */
    904 
    905 enum btrace_error
    906 linux_disable_btrace (struct btrace_target_info *tinfo)
    907 {
    908   enum btrace_error errcode;
    909 
    910   errcode = BTRACE_ERR_NOT_SUPPORTED;
    911   switch (tinfo->conf.format)
    912     {
    913     case BTRACE_FORMAT_NONE:
    914       break;
    915 
    916     case BTRACE_FORMAT_BTS:
    917       errcode = linux_disable_bts (&tinfo->variant.bts);
    918       break;
    919 
    920     case BTRACE_FORMAT_PT:
    921       errcode = linux_disable_pt (&tinfo->variant.pt);
    922       break;
    923     }
    924 
    925   if (errcode == BTRACE_ERR_NONE)
    926     xfree (tinfo);
    927 
    928   return errcode;
    929 }
    930 
    931 /* Read branch trace data in BTS format for the thread given by TINFO into
    932    BTRACE using the TYPE reading method.  */
    933 
    934 static enum btrace_error
    935 linux_read_bts (struct btrace_data_bts *btrace,
    936 		struct btrace_target_info *tinfo,
    937 		enum btrace_read_type type)
    938 {
    939   struct perf_event_buffer *pevent;
    940   const uint8_t *begin, *end, *start;
    941   unsigned long long data_head, data_tail, buffer_size, size;
    942   unsigned int retries = 5;
    943 
    944   pevent = &tinfo->variant.bts.bts;
    945 
    946   /* For delta reads, we return at least the partial last block containing
    947      the current PC.  */
    948   if (type == BTRACE_READ_NEW && !perf_event_new_data (pevent))
    949     return BTRACE_ERR_NONE;
    950 
    951   buffer_size = pevent->size;
    952   data_tail = pevent->last_head;
    953 
    954   /* We may need to retry reading the trace.  See below.  */
    955   while (retries--)
    956     {
    957       data_head = *pevent->data_head;
    958 
    959       /* Delete any leftover trace from the previous iteration.  */
    960       VEC_free (btrace_block_s, btrace->blocks);
    961 
    962       if (type == BTRACE_READ_DELTA)
    963 	{
    964 	  /* Determine the number of bytes to read and check for buffer
    965 	     overflows.  */
    966 
    967 	  /* Check for data head overflows.  We might be able to recover from
    968 	     those but they are very unlikely and it's not really worth the
    969 	     effort, I think.  */
    970 	  if (data_head < data_tail)
    971 	    return BTRACE_ERR_OVERFLOW;
    972 
    973 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
    974 	  size = data_head - data_tail;
    975 	  if (buffer_size < size)
    976 	    return BTRACE_ERR_OVERFLOW;
    977 	}
    978       else
    979 	{
    980 	  /* Read the entire buffer.  */
    981 	  size = buffer_size;
    982 
    983 	  /* Adjust the size if the buffer has not overflowed, yet.  */
    984 	  if (data_head < size)
    985 	    size = data_head;
    986 	}
    987 
    988       /* Data_head keeps growing; the buffer itself is circular.  */
    989       begin = pevent->mem;
    990       start = begin + data_head % buffer_size;
    991 
    992       if (data_head <= buffer_size)
    993 	end = start;
    994       else
    995 	end = begin + pevent->size;
    996 
    997       btrace->blocks = perf_event_read_bts (tinfo, begin, end, start, size);
    998 
    999       /* The stopping thread notifies its ptracer before it is scheduled out.
   1000 	 On multi-core systems, the debugger might therefore run while the
   1001 	 kernel might be writing the last branch trace records.
   1002 
   1003 	 Let's check whether the data head moved while we read the trace.  */
   1004       if (data_head == *pevent->data_head)
   1005 	break;
   1006     }
   1007 
   1008   pevent->last_head = data_head;
   1009 
   1010   /* Prune the incomplete last block (i.e. the first one of inferior execution)
   1011      if we're not doing a delta read.  There is no way of filling in its zeroed
   1012      BEGIN element.  */
   1013   if (!VEC_empty (btrace_block_s, btrace->blocks)
   1014       && type != BTRACE_READ_DELTA)
   1015     VEC_pop (btrace_block_s, btrace->blocks);
   1016 
   1017   return BTRACE_ERR_NONE;
   1018 }
   1019 
   1020 /* Fill in the Intel(R) Processor Trace configuration information.  */
   1021 
   1022 static void
   1023 linux_fill_btrace_pt_config (struct btrace_data_pt_config *conf)
   1024 {
   1025   conf->cpu = btrace_this_cpu ();
   1026 }
   1027 
   1028 /* Read branch trace data in Intel(R) Processor Trace format for the thread
   1029    given by TINFO into BTRACE using the TYPE reading method.  */
   1030 
   1031 static enum btrace_error
   1032 linux_read_pt (struct btrace_data_pt *btrace,
   1033 	       struct btrace_target_info *tinfo,
   1034 	       enum btrace_read_type type)
   1035 {
   1036   struct perf_event_buffer *pt;
   1037 
   1038   pt = &tinfo->variant.pt.pt;
   1039 
   1040   linux_fill_btrace_pt_config (&btrace->config);
   1041 
   1042   switch (type)
   1043     {
   1044     case BTRACE_READ_DELTA:
   1045       /* We don't support delta reads.  The data head (i.e. aux_head) wraps
   1046 	 around to stay inside the aux buffer.  */
   1047       return BTRACE_ERR_NOT_SUPPORTED;
   1048 
   1049     case BTRACE_READ_NEW:
   1050       if (!perf_event_new_data (pt))
   1051 	return BTRACE_ERR_NONE;
   1052 
   1053       /* Fall through.  */
   1054     case BTRACE_READ_ALL:
   1055       perf_event_read_all (pt, &btrace->data, &btrace->size);
   1056       return BTRACE_ERR_NONE;
   1057     }
   1058 
   1059   internal_error (__FILE__, __LINE__, _("Unkown btrace read type."));
   1060 }
   1061 
   1062 /* See linux-btrace.h.  */
   1063 
   1064 enum btrace_error
   1065 linux_read_btrace (struct btrace_data *btrace,
   1066 		   struct btrace_target_info *tinfo,
   1067 		   enum btrace_read_type type)
   1068 {
   1069   switch (tinfo->conf.format)
   1070     {
   1071     case BTRACE_FORMAT_NONE:
   1072       return BTRACE_ERR_NOT_SUPPORTED;
   1073 
   1074     case BTRACE_FORMAT_BTS:
   1075       /* We read btrace in BTS format.  */
   1076       btrace->format = BTRACE_FORMAT_BTS;
   1077       btrace->variant.bts.blocks = NULL;
   1078 
   1079       return linux_read_bts (&btrace->variant.bts, tinfo, type);
   1080 
   1081     case BTRACE_FORMAT_PT:
   1082       /* We read btrace in Intel(R) Processor Trace format.  */
   1083       btrace->format = BTRACE_FORMAT_PT;
   1084       btrace->variant.pt.data = NULL;
   1085       btrace->variant.pt.size = 0;
   1086 
   1087       return linux_read_pt (&btrace->variant.pt, tinfo, type);
   1088     }
   1089 
   1090   internal_error (__FILE__, __LINE__, _("Unkown branch trace format."));
   1091 }
   1092 
   1093 /* See linux-btrace.h.  */
   1094 
   1095 const struct btrace_config *
   1096 linux_btrace_conf (const struct btrace_target_info *tinfo)
   1097 {
   1098   return &tinfo->conf;
   1099 }
   1100 
   1101 #else /* !HAVE_LINUX_PERF_EVENT_H */
   1102 
   1103 /* See linux-btrace.h.  */
   1104 
   1105 int
   1106 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
   1107 {
   1108   return 0;
   1109 }
   1110 
   1111 /* See linux-btrace.h.  */
   1112 
   1113 struct btrace_target_info *
   1114 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
   1115 {
   1116   return NULL;
   1117 }
   1118 
   1119 /* See linux-btrace.h.  */
   1120 
   1121 enum btrace_error
   1122 linux_disable_btrace (struct btrace_target_info *tinfo)
   1123 {
   1124   return BTRACE_ERR_NOT_SUPPORTED;
   1125 }
   1126 
   1127 /* See linux-btrace.h.  */
   1128 
   1129 enum btrace_error
   1130 linux_read_btrace (struct btrace_data *btrace,
   1131 		   struct btrace_target_info *tinfo,
   1132 		   enum btrace_read_type type)
   1133 {
   1134   return BTRACE_ERR_NOT_SUPPORTED;
   1135 }
   1136 
   1137 /* See linux-btrace.h.  */
   1138 
   1139 const struct btrace_config *
   1140 linux_btrace_conf (const struct btrace_target_info *tinfo)
   1141 {
   1142   return NULL;
   1143 }
   1144 
   1145 #endif /* !HAVE_LINUX_PERF_EVENT_H */
   1146