mirror of
https://sourceware.org/git/binutils-gdb.git
synced 2024-12-27 04:52:05 +08:00
76bdc7266a
This adds 'Innovative Computing Labs' as an external author to update-copyright.py, to cover the copyright notice in gprofng/common/opteron_pcbe.c, and uses that plus another external author 'Oracle and' to update gprofng copyright dates. I'm not going to commit 'Oracle and' as an accepted author, but that covers the string "Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved." found in gprofng/testsuite/gprofng.display/jsynprog files.
1455 lines
46 KiB
C
1455 lines
46 KiB
C
/* Copyright (C) 2021-2023 Free Software Foundation, Inc.
|
|
Contributed by Oracle.
|
|
|
|
This file is part of GNU Binutils.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, 51 Franklin Street - Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/syscall.h>
|
|
#include <linux/perf_event.h>
|
|
|
|
#include "hwcdrv.h"
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/* macros */
|
|
#define IS_GLOBAL /* Mark global symbols */
|
|
|
|
#include "cpuid.c" /* ftns for identifying a chip */
|
|
|
|
static hdrv_pcbe_api_t hdrv_pcbe_core_api;
|
|
static hdrv_pcbe_api_t hdrv_pcbe_opteron_api;
|
|
static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = {
|
|
&hdrv_pcbe_core_api,
|
|
&hdrv_pcbe_opteron_api,
|
|
NULL
|
|
};
|
|
#include "opteron_pcbe.c" /* CPU-specific code */
|
|
#include "core_pcbe.c" /* CPU-specific code */
|
|
|
|
extern hwcdrv_api_t hwcdrv_pcl_api;
|
|
IS_GLOBAL hwcdrv_api_t *hwcdrv_drivers[] = {
|
|
&hwcdrv_pcl_api,
|
|
NULL
|
|
};
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/* utils for drivers */
|
|
IS_GLOBAL int
|
|
hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs)
|
|
{
|
|
unsigned int pmc_assigned[MAX_PICS];
|
|
unsigned idx;
|
|
for (int ii = 0; ii < MAX_PICS; ii++)
|
|
pmc_assigned[ii] = 0;
|
|
|
|
/* assign the HWCs that we already know about */
|
|
for (idx = 0; idx < numctrs; idx++)
|
|
{
|
|
regno_t regno = entries[idx]->reg_num;
|
|
if (regno == REGNO_ANY)
|
|
{
|
|
/* check to see if list of possible registers only contains one entry */
|
|
regno = REG_LIST_SINGLE_VALID_ENTRY (entries[idx]->reg_list);
|
|
}
|
|
if (regno != REGNO_ANY)
|
|
{
|
|
if (regno < 0 || regno >= MAX_PICS || !regno_is_valid (entries[idx], regno))
|
|
{
|
|
logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx, regno);
|
|
entries[idx]->reg_num = regno; /* assigning back to entries */
|
|
pmc_assigned[regno] = 1;
|
|
}
|
|
}
|
|
|
|
/* assign HWCs that are currently REGNO_ANY */
|
|
for (idx = 0; idx < numctrs; idx++)
|
|
{
|
|
if (entries[idx]->reg_num == REGNO_ANY)
|
|
{
|
|
int assigned = 0;
|
|
regno_t *reg_list = entries[idx]->reg_list;
|
|
for (; reg_list && *reg_list != REGNO_ANY; reg_list++)
|
|
{
|
|
regno_t regno = *reg_list;
|
|
if (regno < 0 || regno >= MAX_PICS)
|
|
{
|
|
logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
if (pmc_assigned[regno] == 0)
|
|
{
|
|
TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): assigned: idx=%d, regno=%d\n", idx, regno);
|
|
entries[idx]->reg_num = regno; /* assigning back to entries */
|
|
pmc_assigned[regno] = 1;
|
|
assigned = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!assigned)
|
|
{
|
|
logerr (GTXT ("Counter '%s' could not be bound to a register\n"),
|
|
entries[idx]->name ? entries[idx]->name : "<NULL>");
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
IS_GLOBAL int
|
|
hwcdrv_lookup_cpuver (const char * cpcN_cciname)
|
|
{
|
|
libcpc2_cpu_lookup_t *plookup;
|
|
static libcpc2_cpu_lookup_t cpu_table[] = {
|
|
LIBCPC2_CPU_LOOKUP_LIST
|
|
};
|
|
if (cpcN_cciname == NULL)
|
|
return CPUVER_UNDEFINED;
|
|
|
|
/* search table for name */
|
|
for (plookup = cpu_table; plookup->cpc2_cciname; plookup++)
|
|
{
|
|
int n = strlen (plookup->cpc2_cciname);
|
|
if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n))
|
|
return plookup->cpc2_cpuver;
|
|
}
|
|
/* unknown, but does have a descriptive string */
|
|
TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' "
|
|
"could not be determined\n",
|
|
cpcN_cciname);
|
|
return CPUVER_GENERIC;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/* utils to generate x86 register definitions on Linux */
|
|
|
|
/*
|
|
* This code is structured as though we're going to initialize the
|
|
* HWC by writing the Intel MSR register directly. That is, we
|
|
* assume the lowest 16 bits of the event number will have the event
|
|
* and that higher bits will set attributes.
|
|
*
|
|
* While SPARC is different, we can nonetheless use basically the
|
|
* same "x86"-named functions:
|
|
*
|
|
* - The event code will still be 16 bits. It will still
|
|
* be in the lowest 16 bits of the event number. Though
|
|
* perf_event_code() on SPARC will expect those bits to
|
|
* shifted, hwcdrv_pcl.c can easily perform that shift.
|
|
*
|
|
* - On SPARC we support only two attributes, "user" and "system",
|
|
* which hwcdrv_pcl.c already converts to the "exclude_user"
|
|
* and "exclude_kernel" fields expected by perf_event_open().
|
|
* "user" and "system" are stored in event bits 16 and 17.
|
|
* For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
|
|
*/
|
|
|
|
IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0;
|
|
|
|
static const attr_info_t perfctr_sparc_attrs[] = {
|
|
{NTXT ("user"), 0, 0x01, 16}, //usr
|
|
{NTXT ("system"), 0, 0x01, 17}, //os
|
|
{NULL, 0, 0x00, 0},
|
|
};
|
|
static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */
|
|
{NTXT ("umask"), 0, 0xff, 8},
|
|
{NTXT ("user"), 0, 0x01, 16}, //usr
|
|
//{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted)
|
|
{NTXT ("system"), 0, 0x01, 17}, //os
|
|
{NTXT ("edge"), 0, 0x01, 18},
|
|
{NTXT ("pc"), 0, 0x01, 19},
|
|
{NTXT ("inv"), 0, 0x01, 23},
|
|
{NTXT ("cmask"), 0, 0xff, 24},
|
|
{NULL, 0, 0x00, 0},
|
|
};
|
|
const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs;
|
|
|
|
static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */
|
|
// (0xff << 0) | /* event*/
|
|
// (0xff << 8) | /* umask */
|
|
// (0x01 << 17) | /* os */
|
|
// (0x01 << 18) | /* edge */
|
|
// (0x01 << 19) | /* pc */
|
|
(0x01 << 20) | /* int */
|
|
// (0x01 << 21) | /* reserved */
|
|
(0x01 << 22) | /* enable */
|
|
// (0x01 << 23) | /* inv */
|
|
// (0xff << 24) | /* cmask */
|
|
0;
|
|
|
|
static int
|
|
myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc,
|
|
eventsel_t *eventsel, eventsel_t *valid_umask,
|
|
uint_t *pmc_sel)
|
|
{
|
|
if (hwcdrv_get_x86_eventnum &&
|
|
!hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel))
|
|
return 0;
|
|
|
|
/* check for numerically-specified counters */
|
|
char * endptr;
|
|
uint64_t num = strtoull (eventname, &endptr, 0);
|
|
if (*eventname && !*endptr)
|
|
{
|
|
*eventsel = EXTENDED_EVNUM_2_EVSEL (num);
|
|
*valid_umask = 0xff; /* allow any umask (unused for SPARC?) */
|
|
*pmc_sel = pmc;
|
|
return 0;
|
|
}
|
|
|
|
/* name does not specify a numeric value */
|
|
*eventsel = (eventsel_t) - 1;
|
|
*valid_umask = 0x0;
|
|
*pmc_sel = pmc;
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
mask_shift_set (eventsel_t *presult, eventsel_t invalue,
|
|
eventsel_t mask, eventsel_t shift)
|
|
{
|
|
if (invalue & ~mask)
|
|
return -1; /* invalue attempts to set bits outside of mask */
|
|
*presult &= ~(mask << shift); /* clear all the mask bits */
|
|
*presult |= (invalue << shift); /* set bits according to invalue */
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask,
|
|
hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly)
|
|
{
|
|
eventsel_t evntsel = *result_mask;
|
|
for (int ii = 0; ii < (int) nattrs; ii++)
|
|
{
|
|
const char *attrname = attrs[ii].ca_name;
|
|
eventsel_t attrval = (eventsel_t) attrs[ii].ca_val;
|
|
const char *tmpname;
|
|
int attr_found = 0;
|
|
for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++)
|
|
{
|
|
if (strcmp (attrname, tmpname) == 0)
|
|
{
|
|
if (strcmp (attrname, "umask") == 0)
|
|
{
|
|
if (attrval & ~evnt_valid_umask)
|
|
{
|
|
logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"),
|
|
nameOnly, (long long) evnt_valid_umask);
|
|
return -1;
|
|
}
|
|
}
|
|
if (mask_shift_set (&evntsel,
|
|
perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval,
|
|
perfctr_attrs_table[jj].mask,
|
|
perfctr_attrs_table[jj].shift))
|
|
{
|
|
logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"),
|
|
nameOnly, attrname, (long long) attrval);
|
|
return -1;
|
|
}
|
|
TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n",
|
|
nameOnly, attrname, (long long) attrval);
|
|
attr_found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!attr_found)
|
|
{
|
|
logerr (GTXT ("attribute `%s' is invalid\n"), attrname);
|
|
return -1;
|
|
}
|
|
}
|
|
*result_mask = evntsel;
|
|
return 0;
|
|
}
|
|
|
|
IS_GLOBAL int
|
|
hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
|
|
eventsel_t *return_event, uint_t *return_pmc_sel)
|
|
{
|
|
hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1];
|
|
unsigned nattrs = 0;
|
|
char *nameOnly = NULL;
|
|
eventsel_t evntsel = 0; // event number
|
|
eventsel_t evnt_valid_umask = 0;
|
|
uint_t pmc_sel = 0;
|
|
int rc = -1;
|
|
*return_event = 0;
|
|
*return_pmc_sel = 0;
|
|
void *attr_mem = hwcfuncs_parse_attrs (int_name, attrs, HWCFUNCS_MAX_ATTRS,
|
|
&nattrs, NULL);
|
|
if (!attr_mem)
|
|
{
|
|
logerr (GTXT ("out of memory, could not parse attributes\n"));
|
|
return -1;
|
|
}
|
|
hwcfuncs_parse_ctr (int_name, NULL, &nameOnly, NULL, NULL, NULL);
|
|
if (regno == REGNO_ANY)
|
|
{
|
|
logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly);
|
|
goto attr_wrapup;
|
|
}
|
|
|
|
/* look up evntsel */
|
|
if (myperfctr_get_x86_eventnum (nameOnly, regno,
|
|
&evntsel, &evnt_valid_umask, &pmc_sel))
|
|
{
|
|
logerr (GTXT ("counter `%s' is not valid\n"), nameOnly);
|
|
goto attr_wrapup;
|
|
}
|
|
TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n",
|
|
(long long) evntsel, pmc_sel, nameOnly, nattrs);
|
|
|
|
/* determine event attributes */
|
|
eventsel_t evnt_attrs = perfctr_evntsel_enable_bits;
|
|
if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly))
|
|
goto attr_wrapup;
|
|
if (evntsel & evnt_attrs)
|
|
TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n",
|
|
(long long) evntsel, (long long) evnt_attrs,
|
|
(long long) (evntsel & evnt_attrs));
|
|
*return_event = evntsel | evnt_attrs;
|
|
*return_pmc_sel = pmc_sel;
|
|
rc = 0;
|
|
|
|
attr_wrapup:
|
|
free (attr_mem);
|
|
free (nameOnly);
|
|
return rc;
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
#define syscall_instr "syscall"
|
|
#define syscall_clobber "rcx", "r11", "memory"
|
|
#endif
|
|
#ifdef __i386__
|
|
#define syscall_instr "int $0x80"
|
|
#define syscall_clobber "memory"
|
|
#endif
|
|
|
|
static inline int
|
|
perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid,
|
|
int cpu, int group_fd, unsigned long flags)
|
|
{
|
|
/* It seems that perf_event_open() sometimes fails spuriously,
|
|
* even while an immediate retry succeeds.
|
|
* So, let's try a few retries if the call fails just to be sure.
|
|
*/
|
|
int rc;
|
|
for (int retry = 0; retry < 5; retry++)
|
|
{
|
|
rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags);
|
|
if (rc != -1)
|
|
return rc;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/* macros & fwd prototypes */
|
|
|
|
#define HWCDRV_API static /* Mark functions used by hwcdrv API */
|
|
|
|
HWCDRV_API int hwcdrv_start (void);
|
|
HWCDRV_API int hwcdrv_free_counters ();
|
|
|
|
static pid_t
|
|
hwcdrv_gettid (void)
|
|
{
|
|
#ifndef LIBCOLLECTOR_SRC
|
|
return syscall (__NR_gettid);
|
|
#elif defined(intel)
|
|
pid_t r;
|
|
__asm__ __volatile__(syscall_instr
|
|
: "=a" (r) : "0" (__NR_gettid)
|
|
: syscall_clobber);
|
|
return r;
|
|
#else
|
|
return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm
|
|
#endif
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/* types */
|
|
|
|
#define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples
|
|
// must be a power of 2
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/* typedefs */
|
|
|
|
typedef struct
|
|
{ // event (hwc) definition
|
|
unsigned int reg_num; // PMC assignment, potentially for detecting conflicts
|
|
eventsel_t eventsel; // raw event bits (Intel/AMD)
|
|
uint64_t counter_preload; // number of HWC events before signal
|
|
struct perf_event_attr hw; // perf_event definition
|
|
hrtime_t min_time; // minimum time we're targeting between events
|
|
char *name;
|
|
} perf_event_def_t;
|
|
|
|
typedef struct
|
|
{ // runtime state of perf_event buffer
|
|
void *buf; // pointer to mmapped buffer
|
|
size_t pagesz; // size of pages
|
|
} buffer_state_t;
|
|
|
|
typedef struct
|
|
{ // runtime state of counter values
|
|
uint64_t prev_ena_ts; // previous perf_event "enabled" time
|
|
uint64_t prev_run_ts; // previous perf_event "running" time
|
|
uint64_t prev_value; // previous HWC value
|
|
} counter_value_state_t;
|
|
|
|
typedef struct
|
|
{ // per-counter information
|
|
perf_event_def_t *ev_def; // global HWC definition for one counter
|
|
int fd; // perf_event fd
|
|
buffer_state_t buf_state; // perf_event buffer's state
|
|
counter_value_state_t value_state; // counter state
|
|
int needs_restart; // workaround for dbx failure to preserve si_fd
|
|
uint64_t last_overflow_period;
|
|
hrtime_t last_overflow_time;
|
|
} counter_state_t;
|
|
|
|
typedef struct
|
|
{ // per-thread context
|
|
counter_state_t *ctr_list;
|
|
int signal_fd; // fd that caused the most recent signal
|
|
pid_t tid; // for debugging signal delivery problems
|
|
} hdrv_pcl_ctx_t;
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/* static variables */
|
|
static struct
|
|
{
|
|
int library_ok;
|
|
int internal_open_called;
|
|
hwcfuncs_tsd_get_fn_t find_vpc_ctx;
|
|
unsigned hwcdef_cnt; /* number of *active* hardware counters */
|
|
hwcdrv_get_events_fn_t *get_events;
|
|
} hdrv_pcl_state;
|
|
|
|
static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED};
|
|
static perf_event_def_t global_perf_event_def[MAX_PICS];
|
|
|
|
#define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt)
|
|
|
|
|
|
/* perf_event buffer formatting and handling */
|
|
static void
|
|
reset_buf (buffer_state_t *bufstate)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n");
|
|
struct perf_event_mmap_page *metadata = bufstate->buf;
|
|
if (metadata)
|
|
metadata->data_tail = metadata->data_head;
|
|
}
|
|
|
|
static int
|
|
skip_buf (buffer_state_t *bufstate, size_t sz)
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n");
|
|
struct perf_event_mmap_page *metadata = bufstate->buf;
|
|
if (metadata == NULL)
|
|
return -1;
|
|
size_t pgsz = bufstate->pagesz;
|
|
size_t bufsz = NPAGES_PER_BUF*pgsz;
|
|
uint64_t d_tail = metadata->data_tail;
|
|
uint64_t d_head = metadata->data_head;
|
|
|
|
// validate request size
|
|
if (sz > d_head - d_tail || sz >= bufsz)
|
|
{
|
|
reset_buf (bufstate);
|
|
return -1;
|
|
}
|
|
metadata->data_tail = d_tail + sz; // advance tail
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
read_buf (buffer_state_t *bufstate, void *buf, size_t sz)
|
|
{
|
|
struct perf_event_mmap_page *metadata = bufstate->buf;
|
|
if (metadata == NULL)
|
|
return -1;
|
|
size_t pgsz = bufstate->pagesz;
|
|
size_t bufsz = NPAGES_PER_BUF*pgsz;
|
|
uint64_t d_tail = metadata->data_tail;
|
|
uint64_t d_head = metadata->data_head;
|
|
|
|
// validate request size
|
|
if (sz > d_head - d_tail || sz >= bufsz)
|
|
{
|
|
reset_buf (bufstate);
|
|
return -1;
|
|
}
|
|
char *buf_base = ((char *) metadata) + pgsz; // start of data buffer
|
|
uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer
|
|
size_t nbytes = sz;
|
|
if (start_pos + sz > bufsz)
|
|
{
|
|
// will wrap past end of buffer
|
|
nbytes = bufsz - start_pos;
|
|
memcpy (buf, buf_base + start_pos, nbytes);
|
|
start_pos = 0; // wrap to start
|
|
buf = (void *) (((char *) buf) + nbytes);
|
|
nbytes = sz - nbytes;
|
|
}
|
|
memcpy (buf, buf_base + start_pos, nbytes);
|
|
metadata->data_tail += sz;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
read_u64 (buffer_state_t *bufstate, uint64_t *value)
|
|
{
|
|
return read_buf (bufstate, value, sizeof (uint64_t));
|
|
}
|
|
|
|
static int
|
|
read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue,
|
|
uint64_t *rlost)
|
|
{
|
|
// returns count of bytes read
|
|
buffer_state_t *bufstate = &ctr_state->buf_state;
|
|
counter_value_state_t *cntstate = &ctr_state->value_state;
|
|
int readsz = 0;
|
|
|
|
// PERF_SAMPLE_IP
|
|
uint64_t ipc = 0;
|
|
int rc = read_u64 (bufstate, &ipc);
|
|
if (rc)
|
|
return -1;
|
|
readsz += sizeof (uint64_t);
|
|
|
|
// PERF_SAMPLE_READ: value
|
|
uint64_t value = 0;
|
|
rc = read_u64 (bufstate, &value);
|
|
if (rc)
|
|
return -2;
|
|
readsz += sizeof (uint64_t);
|
|
|
|
/* Bug 20806896
|
|
* Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and
|
|
* running times in the sample data that correspond to the metadata times
|
|
* metadata->time_enabled
|
|
* metadata->time_running
|
|
* from the PREVIOUS (not current) sample. Probably just ignore this bug
|
|
* since it's on old kernels and we only use the enabled and running times
|
|
* to construct loss_estimate.
|
|
*/
|
|
// PERF_SAMPLE_READ: PERF_FORMAT_ENABLED
|
|
uint64_t enabled_time = 0;
|
|
rc = read_u64 (bufstate, &enabled_time);
|
|
if (rc)
|
|
return -3;
|
|
readsz += sizeof (uint64_t);
|
|
|
|
// PERF_SAMPLE_READ: PERF_FORMAT_RUNNING
|
|
uint64_t running_time = 0;
|
|
rc = read_u64 (bufstate, &running_time);
|
|
if (rc)
|
|
return -4;
|
|
readsz += sizeof (uint64_t);
|
|
|
|
uint64_t value_delta = value - cntstate->prev_value;
|
|
uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts;
|
|
uint64_t running_delta = running_time - cntstate->prev_run_ts;
|
|
cntstate->prev_value = value;
|
|
cntstate->prev_ena_ts = enabled_time;
|
|
cntstate->prev_run_ts = running_time;
|
|
|
|
// 24830461 need workaround for Linux anomalous HWC skid overrun
|
|
int set_error_flag = 0;
|
|
if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */)
|
|
set_error_flag = 1;
|
|
|
|
uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing
|
|
if (running_delta == enabled_delta)
|
|
{
|
|
// counter was running 100% of time, no multiplexing
|
|
}
|
|
else if (running_delta == 0)
|
|
loss_estimate = 1; // token amount to aid in debugging perfctr oddities
|
|
else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll))
|
|
{
|
|
// running should be smaller than enabled, can't estimate
|
|
/*
|
|
* 21418391 HWC can have a negative count
|
|
*
|
|
* We've also seen enabled not only be smaller than running
|
|
* but in fact go negative. Guard against this.
|
|
*/
|
|
loss_estimate = 2; // token amount to aid in debugging perfctr oddities
|
|
}
|
|
else
|
|
{
|
|
// counter was running less than 100% of time
|
|
// Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479
|
|
uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta;
|
|
value_delta = scaled_delta;
|
|
#if 0
|
|
// We should perhaps warn the user that multiplexing is going on,
|
|
// but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values.
|
|
// For now we simply don't report.
|
|
// Perhaps we should address the issue not here but in the caller collector_sigemt_handler(),
|
|
// but at that level "lost" has a meaning that's considerably broader than just multiplexing.
|
|
collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
|
|
SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
|
|
ctr_list[idx].last_overflow_period, new_period);
|
|
#endif
|
|
}
|
|
TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3,
|
|
"hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu "
|
|
"value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n",
|
|
ctr_state->ev_def->name, (long long) ipc,
|
|
(long long) enabled_delta, (long long) running_delta,
|
|
(long long) value_delta, (long long) value_delta,
|
|
(unsigned long long) loss_estimate,
|
|
loss_estimate ? ", WARNING - SCALED" : "",
|
|
set_error_flag ? ", ERRORFLAG" : "");
|
|
if (set_error_flag == 1)
|
|
value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */;
|
|
*rvalue = value_delta;
|
|
*rlost = loss_estimate;
|
|
if (readsz != msgsz)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n");
|
|
return -5;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
dump_perf_event_attr (struct perf_event_attr *at)
|
|
{
|
|
TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n"
|
|
" config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n",
|
|
(int) at->size, (int) at->type, (unsigned long long) at->sample_period,
|
|
(unsigned long long) at->config, (unsigned long long) at->config1,
|
|
(unsigned long long) at->config2, (unsigned long long) at->wakeup_events,
|
|
(unsigned long long) at->__reserved_1);
|
|
#define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld)
|
|
DUMP_F (disabled);
|
|
DUMP_F (inherit);
|
|
DUMP_F (pinned);
|
|
DUMP_F (exclusive);
|
|
DUMP_F (exclude_user);
|
|
DUMP_F (exclude_kernel);
|
|
DUMP_F (exclude_hv);
|
|
DUMP_F (exclude_idle);
|
|
// DUMP_F(xmmap);
|
|
DUMP_F (comm);
|
|
DUMP_F (freq);
|
|
DUMP_F (inherit_stat);
|
|
DUMP_F (enable_on_exec);
|
|
DUMP_F (task);
|
|
DUMP_F (watermark);
|
|
}
|
|
|
|
static void
|
|
init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period)
|
|
{
|
|
memset (hw, 0, sizeof (struct perf_event_attr));
|
|
hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat
|
|
|
|
#if defined(__i386__) || defined(__x86_64)
|
|
//note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits
|
|
hw->config = event;
|
|
hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
|
|
#elif defined(__aarch64__)
|
|
hw->type = (event >> 24) & 7;
|
|
hw->config = event & 0xff;
|
|
#elif defined(sparc)
|
|
//SPARC needs to be shifted up 16 bits
|
|
hw->config = (event & 0xFFFF) << 16; // uint64_t event
|
|
uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c
|
|
hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4]
|
|
hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
|
|
#endif
|
|
|
|
hw->sample_period = period;
|
|
hw->sample_type = PERF_SAMPLE_IP |
|
|
// PERF_SAMPLE_TID |
|
|
// PERF_SAMPLE_TIME | // possibly interesting
|
|
// PERF_SAMPLE_ADDR |
|
|
PERF_SAMPLE_READ | // HWC value
|
|
// PERF_SAMPLE_CALLCHAIN | // interesting
|
|
// PERF_SAMPLE_ID |
|
|
// PERF_SAMPLE_CPU | // possibly interesting
|
|
// PERF_SAMPLE_PERIOD |
|
|
// PERF_SAMPLE_STREAM_ID |
|
|
// PERF_SAMPLE_RAW |
|
|
0;
|
|
hw->read_format =
|
|
PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled
|
|
// PERF_FORMAT_ID |
|
|
// PERF_FORMAT_GROUP |
|
|
0;
|
|
hw->disabled = 1; /* off by default */
|
|
|
|
// Note: the following override config.priv bits!
|
|
hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */
|
|
hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */
|
|
hw->exclude_hv = 1; /* ditto hypervisor */
|
|
hw->wakeup_events = 1; /* wakeup every n events */
|
|
dump_perf_event_attr (hw);
|
|
}
|
|
|
|
static int
|
|
start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string)
|
|
{
|
|
// pe_attr should have been initialized in hwcdrv_create_counters()
|
|
struct perf_event_attr pe_attr;
|
|
memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr));
|
|
|
|
// but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set
|
|
pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period;
|
|
|
|
int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0);
|
|
if (hwc_fd == -1)
|
|
{
|
|
TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n",
|
|
error_string, ii, errno);
|
|
return 1;
|
|
}
|
|
|
|
size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata
|
|
void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call?
|
|
PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0);
|
|
if (buf == MAP_FAILED)
|
|
{
|
|
TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n",
|
|
(long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno));
|
|
return 1;
|
|
}
|
|
pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it
|
|
pctx->ctr_list[ii].fd = hwc_fd;
|
|
pctx->ctr_list[ii].buf_state.buf = buf;
|
|
pctx->ctr_list[ii].buf_state.pagesz = pgsz;
|
|
pctx->ctr_list[ii].value_state.prev_ena_ts = 0;
|
|
pctx->ctr_list[ii].value_state.prev_run_ts = 0;
|
|
pctx->ctr_list[ii].value_state.prev_value = 0;
|
|
pctx->ctr_list[ii].last_overflow_time = gethrtime ();
|
|
|
|
/* set async mode */
|
|
long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC;
|
|
int rc = fcntl (hwc_fd, F_SETFL, flags);
|
|
if (rc == -1)
|
|
{
|
|
TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* set lwp ownership of the fd
|
|
* See BUGS section of "man perf_event_open":
|
|
* The F_SETOWN_EX option to fcntl(2) is needed to properly get
|
|
* overflow signals in threads. This was introduced in Linux 2.6.32.
|
|
* Legacy references:
|
|
* see http://lkml.org/lkml/2009/8/4/128
|
|
* google man fcntl F_SETOWN_EX -conflict
|
|
* "From Linux 2.6.32 onward, use F_SETOWN_EX to target
|
|
* SIGIO and SIGURG signals at a particular thread."
|
|
* http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html
|
|
* See 2010 CSCADS presentation by Eranian
|
|
*/
|
|
struct f_owner_ex fowner_ex;
|
|
fowner_ex.type = F_OWNER_TID;
|
|
fowner_ex.pid = pctx->tid;
|
|
rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex);
|
|
if (rc == -1)
|
|
{
|
|
TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii);
|
|
return 1;
|
|
}
|
|
|
|
/* Use sigio so handler can determine FD via siginfo->si_fd. */
|
|
rc = fcntl (hwc_fd, F_SETSIG, SIGIO);
|
|
if (rc == -1)
|
|
{
|
|
TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
stop_one_ctr (int ii, counter_state_t *ctr_list)
|
|
{
|
|
int hwc_rc = 0;
|
|
if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1))
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno);
|
|
hwc_rc = HWCFUNCS_ERROR_GENERIC;
|
|
}
|
|
void *buf = ctr_list[ii].buf_state.buf;
|
|
if (buf)
|
|
{
|
|
size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz;
|
|
ctr_list[ii].buf_state.buf = NULL;
|
|
int tmprc = munmap (buf, bufsz);
|
|
if (tmprc)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno);
|
|
hwc_rc = HWCFUNCS_ERROR_GENERIC;
|
|
}
|
|
}
|
|
if (-1 == close (ctr_list[ii].fd))
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno);
|
|
hwc_rc = HWCFUNCS_ERROR_GENERIC;
|
|
}
|
|
return hwc_rc;
|
|
}
|
|
|
|
/* HWCDRV_API for thread-specific actions */
|
|
HWCDRV_API int
|
|
hwcdrv_lwp_init (void)
|
|
{
|
|
return hwcdrv_start ();
|
|
}
|
|
|
|
HWCDRV_API void
|
|
hwcdrv_lwp_fini (void)
|
|
{
|
|
hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */
|
|
}
|
|
|
|
/* open */
|
|
static int
|
|
hdrv_pcl_internal_open ()
|
|
{
|
|
if (hdrv_pcl_state.internal_open_called)
|
|
{
|
|
TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n");
|
|
return HWCFUNCS_ERROR_ALREADY_CALLED;
|
|
}
|
|
|
|
// determine if PCL is available
|
|
perf_event_def_t tmp_event_def;
|
|
memset (&tmp_event_def, 0, sizeof (tmp_event_def));
|
|
struct perf_event_attr *pe_attr = &tmp_event_def.hw;
|
|
init_perf_event (pe_attr, 0, 0);
|
|
pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event
|
|
pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts
|
|
int hwc_fd = perf_event_open (pe_attr,
|
|
0, // pid/tid, 0 is self
|
|
-1, // cpu, -1 is per-thread mode
|
|
-1, // group_fd, -1 is root
|
|
0); // flags
|
|
if (hwc_fd == -1)
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
|
|
" perf_event_open() failed, errno=%d\n", errno);
|
|
goto internal_open_error;
|
|
}
|
|
|
|
/* see if the PCL is new enough to know about F_SETOWN_EX */
|
|
struct f_owner_ex fowner_ex;
|
|
fowner_ex.type = F_OWNER_TID;
|
|
fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID
|
|
if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1)
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: "
|
|
"F_SETOWN failed, errno=%d\n", errno);
|
|
close (hwc_fd);
|
|
goto internal_open_error;
|
|
}
|
|
close (hwc_fd);
|
|
|
|
hdrv_pcl_state.internal_open_called = 1;
|
|
hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted
|
|
hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
|
|
TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n");
|
|
for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++)
|
|
{
|
|
hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii];
|
|
if (!ppcbe->hdrv_pcbe_init ())
|
|
{
|
|
hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name ();
|
|
hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname);
|
|
if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
|
|
goto internal_open_error;
|
|
hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters ();
|
|
hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref ();
|
|
hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events;
|
|
hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum;
|
|
break;
|
|
}
|
|
}
|
|
if (hdrv_pcl_about.cpcN_npics > MAX_PICS)
|
|
{
|
|
TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
|
|
" reducing number of HWCs from %u to %u on processor '%s'\n",
|
|
hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname);
|
|
hdrv_pcl_about.cpcN_npics = MAX_PICS;
|
|
}
|
|
TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:"
|
|
" perf_event cpuver=%d, name='%s'\n",
|
|
hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname);
|
|
return 0;
|
|
|
|
internal_open_error:
|
|
hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
|
|
hdrv_pcl_about.cpcN_npics = 0;
|
|
hdrv_pcl_about.cpcN_docref = NULL;
|
|
hdrv_pcl_about.cpcN_cciname = NULL;
|
|
return HWCFUNCS_ERROR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static void *
|
|
single_thread_tsd_ftn ()
|
|
{
|
|
static hdrv_pcl_ctx_t tsd_context;
|
|
return &tsd_context;
|
|
}
|
|
|
|
/* HWCDRV_API */
|
|
HWCDRV_API int
|
|
hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz)
|
|
{
|
|
hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn;
|
|
if (tsd_sz)
|
|
*tsd_sz = sizeof (hdrv_pcl_ctx_t);
|
|
|
|
if (hdrv_pcl_state.internal_open_called)
|
|
return HWCFUNCS_ERROR_ALREADY_CALLED;
|
|
return hdrv_pcl_internal_open ();
|
|
}
|
|
|
|
HWCDRV_API void
|
|
hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics,
|
|
const char **docref, uint64_t *support)
|
|
{
|
|
if (cpuver)
|
|
*cpuver = hdrv_pcl_about.cpcN_cpuver;
|
|
if (cciname)
|
|
*cciname = hdrv_pcl_about.cpcN_cciname;
|
|
if (npics)
|
|
*npics = hdrv_pcl_about.cpcN_npics;
|
|
if (docref)
|
|
*docref = hdrv_pcl_about.cpcN_docref;
|
|
if (support)
|
|
*support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
|
|
{
|
|
if (tsd_ftn)
|
|
hdrv_pcl_state.find_vpc_ctx = tsd_ftn;
|
|
else
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n");
|
|
return HWCFUNCS_ERROR_UNAVAIL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb)
|
|
{
|
|
int count = 0;
|
|
if (hwc_cb && hdrv_pcl_state.get_events)
|
|
count = hdrv_pcl_state.get_events (hwc_cb);
|
|
if (attr_cb)
|
|
for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++)
|
|
attr_cb (perfctr_attrs_table[ii].attrname);
|
|
if (!count)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs)
|
|
{
|
|
return hwcdrv_assign_all_regnos (entries, numctrs);
|
|
}
|
|
|
|
static int
|
|
internal_hwc_start (int fd)
|
|
{
|
|
int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1);
|
|
if (rc == -1)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:"
|
|
" PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno);
|
|
return HWCFUNCS_ERROR_UNAVAIL;
|
|
}
|
|
TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd);
|
|
return 0;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events)
|
|
{
|
|
/* set expired counters to overflow value and all others to 0 */
|
|
/* return 0: OK, counters should be restarted */
|
|
/* return non-zero: eventp not set, counters should not be restarted */
|
|
/* clear return values */
|
|
int ii;
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
{
|
|
eventp->ce_pic[ii] = 0;
|
|
lost_events->ce_pic[ii] = 0;
|
|
}
|
|
hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event?
|
|
eventp->ce_hrt = sig_ts;
|
|
lost_events->ce_hrt = sig_ts;
|
|
|
|
/* determine source signal */
|
|
int signal_fd = -1;
|
|
switch (si->si_code)
|
|
{
|
|
case POLL_HUP: /* expected value from pcl */
|
|
/* According to Stephane Eranian:
|
|
* "expect POLL_HUP instead of POLL_IN because we are
|
|
* in one-shot mode (IOC_REFRESH)"
|
|
*/
|
|
signal_fd = si->si_fd;
|
|
break;
|
|
case SI_TKILL: /* event forwarded by tkill */
|
|
/* DBX can only forward SI_TKILL when it detects POLL_HUP
|
|
* unfortunately, this means that si->si_fd has been lost...
|
|
* We need to process the buffers, but we don't know the fd!
|
|
*/
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" SI_TKILL detected\n", sig_ts);
|
|
break;
|
|
default:
|
|
// "sometimes we see a POLL_IN (1) with very high event rates,"
|
|
// according to eranian(?)
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
|
|
" unexpected si_code 0x%x\n", sig_ts, si->si_code);
|
|
return HWCFUNCS_ERROR_GENERIC;
|
|
}
|
|
|
|
hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
|
|
if (!pctx)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
|
|
" tsd context is NULL\n", sig_ts);
|
|
return HWCFUNCS_ERROR_UNEXPECTED;
|
|
}
|
|
counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
|
|
if (!ctr_list)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" ctr_list is NULL\n", sig_ts);
|
|
return HWCFUNCS_ERROR_UNEXPECTED;
|
|
}
|
|
|
|
/* clear needs_restart flag */
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
ctr_list[ii].needs_restart = 0;
|
|
|
|
/* attempt to identify the counter to read */
|
|
int signal_idx = -1;
|
|
pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t
|
|
if (signal_fd != -1)
|
|
{
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
{
|
|
if (ctr_list[ii].fd == signal_fd)
|
|
{
|
|
signal_idx = ii;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (signal_idx < 0)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
|
|
" pmc not determined!\n", sig_ts);
|
|
lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */
|
|
// note: bogus value may get overwritten in loop below
|
|
}
|
|
|
|
/* capture sample(s). In addition to signal_idx, check other counters. */
|
|
struct perf_event_header sheader;
|
|
int idx;
|
|
for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++)
|
|
{
|
|
int num_recs = 0;
|
|
while (1)
|
|
{
|
|
/* check for samples */
|
|
struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf;
|
|
if (metadata == NULL)
|
|
break; // empty
|
|
if (metadata->data_tail == metadata->data_head)
|
|
break; // empty
|
|
|
|
/* read header */
|
|
if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader)))
|
|
break;
|
|
num_recs++;
|
|
|
|
/* check for PERF_RECORD_SAMPLE */
|
|
size_t datasz = sheader.size - sizeof (struct perf_event_header);
|
|
if (sheader.type != PERF_RECORD_SAMPLE)
|
|
{
|
|
TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" unexpected recd type=%d\n",
|
|
sig_ts, sheader.type);
|
|
if (skip_buf (&ctr_list[idx].buf_state, datasz))
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
|
|
" skip recd type=%d failed\n", sig_ts, sheader.type);
|
|
lost_events->ce_pic[idx] = 4; /* record a bogus value */
|
|
break; // failed to skip buffer??
|
|
}
|
|
lost_events->ce_pic[idx] = 2; /* record a bogus value */
|
|
continue; // advance to next record
|
|
}
|
|
|
|
/* type is PERF_RECORD_SAMPLE */
|
|
uint64_t value, lostv;
|
|
if (read_sample (&ctr_list[idx], datasz, &value, &lostv))
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
|
|
" read_sample() failed\n", sig_ts);
|
|
lost_events->ce_pic[idx] = 3; // record a bogus value
|
|
break; // failed to read sample data??
|
|
}
|
|
TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:"
|
|
" idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts,
|
|
idx, (unsigned long long) value, (unsigned long long) lostv);
|
|
if (eventp->ce_pic[idx])
|
|
{
|
|
TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" idx=%d previous sample recorded as lost_event\n", sig_ts, idx);
|
|
lost_events->ce_pic[idx] += eventp->ce_pic[idx];
|
|
}
|
|
eventp->ce_pic[idx] = value;
|
|
lost_events->ce_pic[idx] += lostv;
|
|
}
|
|
|
|
/* debug output for unexpected (but common) cases */
|
|
if (idx == signal_idx)
|
|
{
|
|
if (num_recs != 1)
|
|
TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx);
|
|
}
|
|
else if (num_recs)
|
|
TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
|
|
" %d unexpected record(s) for idx=%d (signal_idx=%d)\n",
|
|
sig_ts, num_recs, idx, signal_idx);
|
|
|
|
/* trigger counter restart whenever records were found */
|
|
if (num_recs)
|
|
{
|
|
/* check whether to adapt the overflow interval */
|
|
/* This is the Linux version.
|
|
* The Solaris version is in hwprofile.c collector_update_overflow_counters().
|
|
*/
|
|
hrtime_t min_time = global_perf_event_def[idx].min_time;
|
|
if (min_time > 0 // overflow interval is adaptive
|
|
&& sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min
|
|
{
|
|
/* pick a new overflow interval */
|
|
/* roughly doubled, but add funny numbers */
|
|
/* hopefully the result is prime or not a multiple of some # of ops/loop */
|
|
uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37;
|
|
#if 0
|
|
// On Solaris, we report the adjustment to the log file.
|
|
// On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ.
|
|
// For now we simply don't report.
|
|
collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
|
|
SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
|
|
ctr_list[idx].last_overflow_period, new_period);
|
|
#endif
|
|
/* There are a variety of ways of resetting the period on Linux.
|
|
* The most elegant is
|
|
* ioctl(fd,PERF_EVENT_IOC_PERIOD,&period)
|
|
* but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD:
|
|
* > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel.
|
|
* > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect
|
|
* until after the next overflow.
|
|
* So we're kind of stuck shutting the fd down and restarting it with the new period.
|
|
*/
|
|
if (stop_one_ctr (idx, ctr_list))
|
|
{
|
|
// EUGENE figure out what to do on error
|
|
}
|
|
ctr_list[idx].last_overflow_period = new_period;
|
|
if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):"))
|
|
{
|
|
// EUGENE figure out what to do on error
|
|
}
|
|
}
|
|
ctr_list[idx].last_overflow_time = sig_ts;
|
|
#if 0
|
|
ctr_list[idx].needs_restart = 1;
|
|
#else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart()
|
|
internal_hwc_start (ctr_list[idx].fd);
|
|
#endif
|
|
}
|
|
}
|
|
return 0; // OK to restart counters
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_sighlr_restart (const hwc_event_t *pp)
|
|
{
|
|
#if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow()
|
|
hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
|
|
if (!pctx)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n");
|
|
return -1;
|
|
}
|
|
counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
|
|
if (!ctr_list)
|
|
{
|
|
TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n");
|
|
return -1;
|
|
}
|
|
int errors = 0;
|
|
for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
{
|
|
if (ctr_list[ii].needs_restart)
|
|
errors |= internal_hwc_start (ctr_list[ii].fd);
|
|
ctr_list[ii].needs_restart = 0;
|
|
}
|
|
return errors;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/* create counters based on hwcdef[] */
|
|
HWCDRV_API int
|
|
hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
|
|
{
|
|
if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics)
|
|
{
|
|
logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
|
|
{
|
|
logerr (GTXT ("Processor not supported\n"));
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
|
|
/* add counters */
|
|
for (unsigned idx = 0; idx < hwcdef_cnt; idx++)
|
|
{
|
|
perf_event_def_t *glb_event_def = &global_perf_event_def[idx];
|
|
memset (glb_event_def, 0, sizeof (perf_event_def_t));
|
|
unsigned int pmc_sel;
|
|
eventsel_t evntsel;
|
|
if (hwcfuncs_get_x86_eventsel (hwcdef[idx].reg_num,
|
|
hwcdef[idx].int_name, &evntsel, &pmc_sel))
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n");
|
|
return HWCFUNCS_ERROR_HWCARGS;
|
|
}
|
|
glb_event_def->reg_num = pmc_sel;
|
|
glb_event_def->eventsel = evntsel;
|
|
glb_event_def->counter_preload = hwcdef[idx].val;
|
|
glb_event_def->min_time = hwcdef[idx].min_time;
|
|
glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor
|
|
init_perf_event (&glb_event_def->hw, glb_event_def->eventsel,
|
|
glb_event_def->counter_preload);
|
|
TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
|
|
"(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
|
|
idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload,
|
|
(long long) glb_event_def->min_time, (int) glb_event_def->reg_num,
|
|
(long long) glb_event_def->eventsel,
|
|
(long long) HW_INTERVAL_PRESET (hwcdef[idx].val),
|
|
(long long) glb_event_def->hw.exclude_user,
|
|
(long long) glb_event_def->hw.exclude_kernel);
|
|
}
|
|
|
|
hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt;
|
|
return 0;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_free_counters () // note: only performs shutdown for this thread
|
|
{
|
|
hdrv_pcl_ctx_t * pctx;
|
|
if (!COUNTERS_ENABLED ())
|
|
return 0;
|
|
pctx = hdrv_pcl_state.find_vpc_ctx ();
|
|
if (!pctx)
|
|
{
|
|
TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n");
|
|
return HWCFUNCS_ERROR_GENERIC;
|
|
}
|
|
counter_state_t *ctr_list = pctx->ctr_list;
|
|
if (!ctr_list)
|
|
{
|
|
// fork child: prolog suspends hwcs, then epilog frees them
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n");
|
|
return 0;
|
|
}
|
|
int hwc_rc = 0;
|
|
for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
if (stop_one_ctr (ii, ctr_list))
|
|
hwc_rc = HWCFUNCS_ERROR_GENERIC;
|
|
TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", (long) pctx->tid);
|
|
pctx->ctr_list = NULL;
|
|
return hwc_rc;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_start (void) /* must be called from each thread ? */
|
|
{
|
|
hdrv_pcl_ctx_t *pctx = NULL;
|
|
if (!COUNTERS_ENABLED ())
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n");
|
|
return 0;
|
|
}
|
|
if (!hdrv_pcl_state.library_ok)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n");
|
|
return HWCFUNCS_ERROR_NOT_SUPPORTED;
|
|
}
|
|
|
|
/*
|
|
* set up per-thread context
|
|
*/
|
|
pctx = hdrv_pcl_state.find_vpc_ctx ();
|
|
if (!pctx)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n");
|
|
return HWCFUNCS_ERROR_UNEXPECTED;
|
|
}
|
|
pctx->tid = hwcdrv_gettid ();
|
|
TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", (long) pctx->tid);
|
|
|
|
/*
|
|
* create per-thread counter list
|
|
*/
|
|
counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt,
|
|
sizeof (counter_state_t));
|
|
if (!ctr_list)
|
|
{
|
|
TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n");
|
|
return HWCFUNCS_ERROR_MEMORY;
|
|
}
|
|
int ii;
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely
|
|
pctx->ctr_list = ctr_list;
|
|
|
|
/*
|
|
* bind the counters
|
|
*/
|
|
size_t pgsz = sysconf (_SC_PAGESIZE);
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
{
|
|
ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period;
|
|
if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup;
|
|
}
|
|
|
|
/*
|
|
* start the counters
|
|
*/
|
|
for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
|
|
{
|
|
int rc = internal_hwc_start (ctr_list[ii].fd);
|
|
if (rc < 0)
|
|
goto hwcdrv_start_cleanup;
|
|
}
|
|
return 0;
|
|
|
|
hwcdrv_start_cleanup:
|
|
hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds
|
|
return HWCFUNCS_ERROR_UNAVAIL;
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_lwp_suspend (void) /* must be called from each thread */
|
|
{
|
|
if (!COUNTERS_ENABLED ())
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n");
|
|
return 0;
|
|
}
|
|
TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n");
|
|
return hwcdrv_free_counters ();
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_lwp_resume (void) /* must be called from each thread */
|
|
{
|
|
if (!COUNTERS_ENABLED ())
|
|
{
|
|
TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n");
|
|
return 0;
|
|
}
|
|
TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n");
|
|
return hwcdrv_start ();
|
|
}
|
|
|
|
HWCDRV_API int
|
|
hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data)
|
|
{
|
|
overflow_data->ce_hrt = 0;
|
|
for (int i = 0; i < MAX_PICS; i++)
|
|
{
|
|
overflow_data->ce_pic[i] = 0;
|
|
if (sampled_data)
|
|
HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/* HWCDRV_API */
|
|
|
|
hwcdrv_api_t hwcdrv_pcl_api = {
|
|
hwcdrv_init,
|
|
hwcdrv_get_info,
|
|
hwcdrv_enable_mt,
|
|
hwcdrv_get_descriptions,
|
|
hwcdrv_assign_regnos,
|
|
hwcdrv_create_counters,
|
|
hwcdrv_start,
|
|
hwcdrv_overflow,
|
|
hwcdrv_read_events,
|
|
hwcdrv_sighlr_restart,
|
|
hwcdrv_lwp_suspend,
|
|
hwcdrv_lwp_resume,
|
|
hwcdrv_free_counters,
|
|
hwcdrv_lwp_init,
|
|
hwcdrv_lwp_fini,
|
|
-1 // hwcdrv_init_status
|
|
};
|