hwpmc: Initial support for AMD IBS
This patch adds support for AMD IBS. It adds a new class of performance counter that cotains two events: ibs-fetch and ibs-op events. Unlike most existing sampled events, IBS events provide a number of values containing extra information regarding the sample. To support this we use the existing callchain event, and introduce a new flag for multipart payloads. The first 8 bytes of the pc_sample contains a header that defines up to four payloads. Sponsored by: Netflix Reviewed by: imp,mhorne Pull Request: https://github.com/freebsd/freebsd-src/pull/2022
This commit is contained in:
committed by
Warner Losh
parent
00c0a1f0bf
commit
e51ef8ae49
+57
-7
@@ -50,8 +50,8 @@
|
|||||||
#if defined(__amd64__) || defined(__i386__)
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
|
static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
|
||||||
struct pmc_op_pmcallocate *_pmc_config);
|
struct pmc_op_pmcallocate *_pmc_config);
|
||||||
#endif
|
static int ibs_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
|
||||||
#if defined(__amd64__) || defined(__i386__)
|
struct pmc_op_pmcallocate *_pmc_config);
|
||||||
static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
|
static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
|
||||||
struct pmc_op_pmcallocate *_pmc_config);
|
struct pmc_op_pmcallocate *_pmc_config);
|
||||||
#endif
|
#endif
|
||||||
@@ -132,6 +132,7 @@ struct pmc_class_descr {
|
|||||||
|
|
||||||
PMC_CLASSDEP_TABLE(iaf, IAF);
|
PMC_CLASSDEP_TABLE(iaf, IAF);
|
||||||
PMC_CLASSDEP_TABLE(k8, K8);
|
PMC_CLASSDEP_TABLE(k8, K8);
|
||||||
|
PMC_CLASSDEP_TABLE(ibs, IBS);
|
||||||
PMC_CLASSDEP_TABLE(armv7, ARMV7);
|
PMC_CLASSDEP_TABLE(armv7, ARMV7);
|
||||||
PMC_CLASSDEP_TABLE(armv8, ARMV8);
|
PMC_CLASSDEP_TABLE(armv8, ARMV8);
|
||||||
PMC_CLASSDEP_TABLE(cmn600_pmu, CMN600_PMU);
|
PMC_CLASSDEP_TABLE(cmn600_pmu, CMN600_PMU);
|
||||||
@@ -201,8 +202,7 @@ static const struct pmc_class_descr NAME##_class_table_descr = \
|
|||||||
|
|
||||||
#if defined(__i386__) || defined(__amd64__)
|
#if defined(__i386__) || defined(__amd64__)
|
||||||
PMC_CLASS_TABLE_DESC(k8, K8, k8, k8);
|
PMC_CLASS_TABLE_DESC(k8, K8, k8, k8);
|
||||||
#endif
|
PMC_CLASS_TABLE_DESC(ibs, IBS, ibs, ibs);
|
||||||
#if defined(__i386__) || defined(__amd64__)
|
|
||||||
PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
|
PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
|
||||||
#endif
|
#endif
|
||||||
#if defined(__arm__)
|
#if defined(__arm__)
|
||||||
@@ -691,9 +691,49 @@ k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
static int
|
||||||
|
ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||||
|
struct pmc_op_pmcallocate *pmc_config)
|
||||||
|
{
|
||||||
|
char *e, *p, *q;
|
||||||
|
uint64_t ctl;
|
||||||
|
|
||||||
|
pmc_config->pm_caps |=
|
||||||
|
(PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
|
||||||
|
pmc_config->pm_md.pm_ibs.ibs_ctl = 0;
|
||||||
|
|
||||||
|
/* setup parsing tables */
|
||||||
|
switch (pe) {
|
||||||
|
case PMC_EV_IBS_FETCH:
|
||||||
|
pmc_config->pm_md.pm_ibs.ibs_type = IBS_PMC_FETCH;
|
||||||
|
break;
|
||||||
|
case PMC_EV_IBS_OP:
|
||||||
|
pmc_config->pm_md.pm_ibs.ibs_type = IBS_PMC_OP;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* parse parameters */
|
||||||
|
while ((p = strsep(&ctrspec, ",")) != NULL) {
|
||||||
|
if (KWPREFIXMATCH(p, "ctl=")) {
|
||||||
|
q = strchr(p, '=');
|
||||||
|
if (*++q == '\0') /* skip '=' */
|
||||||
|
return (-1);
|
||||||
|
|
||||||
|
ctl = strtoull(q, &e, 0);
|
||||||
|
if (e == q || *e != '\0')
|
||||||
|
return (-1);
|
||||||
|
|
||||||
|
pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
|
||||||
|
} else {
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__amd64__)
|
|
||||||
static int
|
static int
|
||||||
tsc_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
tsc_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||||
struct pmc_op_pmcallocate *pmc_config)
|
struct pmc_op_pmcallocate *pmc_config)
|
||||||
@@ -1268,6 +1308,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
|
|||||||
ev = k8_event_table;
|
ev = k8_event_table;
|
||||||
count = PMC_EVENT_TABLE_SIZE(k8);
|
count = PMC_EVENT_TABLE_SIZE(k8);
|
||||||
break;
|
break;
|
||||||
|
case PMC_CLASS_IBS:
|
||||||
|
ev = ibs_event_table;
|
||||||
|
count = PMC_EVENT_TABLE_SIZE(ibs);
|
||||||
|
break;
|
||||||
case PMC_CLASS_ARMV7:
|
case PMC_CLASS_ARMV7:
|
||||||
switch (cpu_info.pm_cputype) {
|
switch (cpu_info.pm_cputype) {
|
||||||
default:
|
default:
|
||||||
@@ -1471,6 +1515,10 @@ pmc_init(void)
|
|||||||
case PMC_CLASS_K8:
|
case PMC_CLASS_K8:
|
||||||
pmc_class_table[n++] = &k8_class_table_descr;
|
pmc_class_table[n++] = &k8_class_table_descr;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PMC_CLASS_IBS:
|
||||||
|
pmc_class_table[n++] = &ibs_class_table_descr;
|
||||||
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
case PMC_CLASS_SOFT:
|
case PMC_CLASS_SOFT:
|
||||||
@@ -1676,7 +1724,9 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
|
|||||||
if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
|
if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
|
||||||
ev = k8_event_table;
|
ev = k8_event_table;
|
||||||
evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8);
|
evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8);
|
||||||
|
} else if (pe >= PMC_EV_IBS_FIRST && pe <= PMC_EV_IBS_LAST) {
|
||||||
|
ev = ibs_event_table;
|
||||||
|
evfence = ibs_event_table + PMC_EVENT_TABLE_SIZE(ibs);
|
||||||
} else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) {
|
} else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) {
|
||||||
switch (cpu) {
|
switch (cpu) {
|
||||||
case PMC_CPU_ARMV7_CORTEX_A8:
|
case PMC_CPU_ARMV7_CORTEX_A8:
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ struct pmc_mdep;
|
|||||||
|
|
||||||
#include <dev/hwpmc/hwpmc_amd.h>
|
#include <dev/hwpmc/hwpmc_amd.h>
|
||||||
#include <dev/hwpmc/hwpmc_core.h>
|
#include <dev/hwpmc/hwpmc_core.h>
|
||||||
|
#include <dev/hwpmc/hwpmc_ibs.h>
|
||||||
#include <dev/hwpmc/hwpmc_tsc.h>
|
#include <dev/hwpmc/hwpmc_tsc.h>
|
||||||
#include <dev/hwpmc/hwpmc_uncore.h>
|
#include <dev/hwpmc/hwpmc_uncore.h>
|
||||||
|
|
||||||
@@ -51,6 +52,7 @@ struct pmc_mdep;
|
|||||||
*/
|
*/
|
||||||
#define PMC_MDEP_CLASS_INDEX_TSC 1
|
#define PMC_MDEP_CLASS_INDEX_TSC 1
|
||||||
#define PMC_MDEP_CLASS_INDEX_K8 2
|
#define PMC_MDEP_CLASS_INDEX_K8 2
|
||||||
|
#define PMC_MDEP_CLASS_INDEX_IBS 3
|
||||||
#define PMC_MDEP_CLASS_INDEX_P4 2
|
#define PMC_MDEP_CLASS_INDEX_P4 2
|
||||||
#define PMC_MDEP_CLASS_INDEX_IAP 2
|
#define PMC_MDEP_CLASS_INDEX_IAP 2
|
||||||
#define PMC_MDEP_CLASS_INDEX_IAF 3
|
#define PMC_MDEP_CLASS_INDEX_IAF 3
|
||||||
@@ -62,6 +64,7 @@ struct pmc_mdep;
|
|||||||
*
|
*
|
||||||
* TSC The timestamp counter
|
* TSC The timestamp counter
|
||||||
* K8 AMD Athlon64 and Opteron PMCs in 64 bit mode.
|
* K8 AMD Athlon64 and Opteron PMCs in 64 bit mode.
|
||||||
|
* IBS AMD IBS
|
||||||
* PIV Intel P4/HTT and P4/EMT64
|
* PIV Intel P4/HTT and P4/EMT64
|
||||||
* IAP Intel Core/Core2/Atom CPUs in 64 bits mode.
|
* IAP Intel Core/Core2/Atom CPUs in 64 bits mode.
|
||||||
* IAF Intel fixed-function PMCs in Core2 and later CPUs.
|
* IAF Intel fixed-function PMCs in Core2 and later CPUs.
|
||||||
@@ -71,6 +74,7 @@ struct pmc_mdep;
|
|||||||
|
|
||||||
union pmc_md_op_pmcallocate {
|
union pmc_md_op_pmcallocate {
|
||||||
struct pmc_md_amd_op_pmcallocate pm_amd;
|
struct pmc_md_amd_op_pmcallocate pm_amd;
|
||||||
|
struct pmc_md_ibs_op_pmcallocate pm_ibs;
|
||||||
struct pmc_md_iap_op_pmcallocate pm_iap;
|
struct pmc_md_iap_op_pmcallocate pm_iap;
|
||||||
struct pmc_md_ucf_op_pmcallocate pm_ucf;
|
struct pmc_md_ucf_op_pmcallocate pm_ucf;
|
||||||
struct pmc_md_ucp_op_pmcallocate pm_ucp;
|
struct pmc_md_ucp_op_pmcallocate pm_ucp;
|
||||||
@@ -85,6 +89,7 @@ union pmc_md_op_pmcallocate {
|
|||||||
|
|
||||||
union pmc_md_pmc {
|
union pmc_md_pmc {
|
||||||
struct pmc_md_amd_pmc pm_amd;
|
struct pmc_md_amd_pmc pm_amd;
|
||||||
|
struct pmc_md_ibs_pmc pm_ibs;
|
||||||
struct pmc_md_iaf_pmc pm_iaf;
|
struct pmc_md_iaf_pmc pm_iaf;
|
||||||
struct pmc_md_iap_pmc pm_iap;
|
struct pmc_md_iap_pmc pm_iap;
|
||||||
struct pmc_md_ucf_pmc pm_ucf;
|
struct pmc_md_ucf_pmc pm_ucf;
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ dev/hptrr/hptrr_osm_bsd.c optional hptrr
|
|||||||
dev/hptrr/hptrr_config.c optional hptrr
|
dev/hptrr/hptrr_config.c optional hptrr
|
||||||
dev/hptrr/$M-elf.hptrr_lib.o optional hptrr
|
dev/hptrr/$M-elf.hptrr_lib.o optional hptrr
|
||||||
dev/hwpmc/hwpmc_amd.c optional hwpmc
|
dev/hwpmc/hwpmc_amd.c optional hwpmc
|
||||||
|
dev/hwpmc/hwpmc_ibs.c optional hwpmc
|
||||||
dev/hwpmc/hwpmc_intel.c optional hwpmc
|
dev/hwpmc/hwpmc_intel.c optional hwpmc
|
||||||
dev/hwpmc/hwpmc_core.c optional hwpmc
|
dev/hwpmc/hwpmc_core.c optional hwpmc
|
||||||
dev/hwpmc/hwpmc_uncore.c optional hwpmc
|
dev/hwpmc/hwpmc_uncore.c optional hwpmc
|
||||||
|
|||||||
@@ -543,6 +543,10 @@ amd_intr(struct trapframe *tf)
|
|||||||
|
|
||||||
pac = amd_pcpu[cpu];
|
pac = amd_pcpu[cpu];
|
||||||
|
|
||||||
|
retval = pmc_ibs_intr(tf);
|
||||||
|
if (retval)
|
||||||
|
goto done;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* look for all PMCs that have interrupted:
|
* look for all PMCs that have interrupted:
|
||||||
* - look for a running, sampling PMC which has overflowed
|
* - look for a running, sampling PMC which has overflowed
|
||||||
@@ -613,6 +617,7 @@ amd_intr(struct trapframe *tf)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
if (retval)
|
if (retval)
|
||||||
counter_u64_add(pmc_stats.pm_intr_processed, 1);
|
counter_u64_add(pmc_stats.pm_intr_processed, 1);
|
||||||
else
|
else
|
||||||
@@ -760,7 +765,7 @@ pmc_amd_initialize(void)
|
|||||||
struct pmc_classdep *pcd;
|
struct pmc_classdep *pcd;
|
||||||
struct pmc_mdep *pmc_mdep;
|
struct pmc_mdep *pmc_mdep;
|
||||||
enum pmc_cputype cputype;
|
enum pmc_cputype cputype;
|
||||||
int error, i, ncpus;
|
int error, i, ncpus, nclasses;
|
||||||
int family, model, stepping;
|
int family, model, stepping;
|
||||||
int amd_core_npmcs, amd_l3_npmcs, amd_df_npmcs;
|
int amd_core_npmcs, amd_l3_npmcs, amd_df_npmcs;
|
||||||
struct amd_descr *d;
|
struct amd_descr *d;
|
||||||
@@ -884,10 +889,16 @@ pmc_amd_initialize(void)
|
|||||||
M_WAITOK | M_ZERO);
|
M_WAITOK | M_ZERO);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These processors have two classes of PMCs: the TSC and
|
* These processors have two or three classes of PMCs: the TSC,
|
||||||
* programmable PMCs.
|
* programmable PMCs, and AMD IBS.
|
||||||
*/
|
*/
|
||||||
pmc_mdep = pmc_mdep_alloc(2);
|
if ((amd_feature2 & AMDID2_IBS) != 0) {
|
||||||
|
nclasses = 3;
|
||||||
|
} else {
|
||||||
|
nclasses = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
pmc_mdep = pmc_mdep_alloc(nclasses);
|
||||||
|
|
||||||
ncpus = pmc_cpu_max();
|
ncpus = pmc_cpu_max();
|
||||||
|
|
||||||
@@ -927,6 +938,12 @@ pmc_amd_initialize(void)
|
|||||||
|
|
||||||
PMCDBG0(MDP, INI, 0, "amd-initialize");
|
PMCDBG0(MDP, INI, 0, "amd-initialize");
|
||||||
|
|
||||||
|
if (nclasses >= 3) {
|
||||||
|
error = pmc_ibs_initialize(pmc_mdep, ncpus);
|
||||||
|
if (error != 0)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
return (pmc_mdep);
|
return (pmc_mdep);
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
|||||||
@@ -0,0 +1,614 @@
|
|||||||
|
/*-
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*
|
||||||
|
* Copyright (c) 2026, Ali Jose Mashtizadeh
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Support for the AMD IBS */
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
#include <sys/lock.h>
|
||||||
|
#include <sys/malloc.h>
|
||||||
|
#include <sys/mutex.h>
|
||||||
|
#include <sys/pcpu.h>
|
||||||
|
#include <sys/pmc.h>
|
||||||
|
#include <sys/pmckern.h>
|
||||||
|
#include <sys/pmclog.h>
|
||||||
|
#include <sys/smp.h>
|
||||||
|
#include <sys/systm.h>
|
||||||
|
|
||||||
|
#include <machine/cpu.h>
|
||||||
|
#include <machine/cpufunc.h>
|
||||||
|
#include <machine/md_var.h>
|
||||||
|
#include <machine/specialreg.h>
|
||||||
|
|
||||||
|
#define IBS_STOP_ITER 50 /* Stopping iterations */
|
||||||
|
|
||||||
|
/* AMD IBS PMCs */
|
||||||
|
struct ibs_descr {
|
||||||
|
struct pmc_descr pm_descr; /* "base class" */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Globals
|
||||||
|
*/
|
||||||
|
static uint64_t ibs_features;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-processor information
|
||||||
|
*/
|
||||||
|
#define IBS_CPU_RUNNING 1
|
||||||
|
#define IBS_CPU_STOPPING 2
|
||||||
|
#define IBS_CPU_STOPPED 3
|
||||||
|
|
||||||
|
struct ibs_cpu {
|
||||||
|
int pc_status;
|
||||||
|
struct pmc_hw pc_ibspmcs[IBS_NPMCS];
|
||||||
|
};
|
||||||
|
static struct ibs_cpu **ibs_pcpu;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read a PMC value from the MSR.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
|
||||||
|
{
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
KASSERT(ibs_pcpu[cpu],
|
||||||
|
("[ibs,%d] null per-cpu, cpu %d", __LINE__, cpu));
|
||||||
|
|
||||||
|
/* read the IBS ctl */
|
||||||
|
switch (ri) {
|
||||||
|
case IBS_PMC_FETCH:
|
||||||
|
*v = rdmsr(IBS_FETCH_CTL);
|
||||||
|
break;
|
||||||
|
case IBS_PMC_OP:
|
||||||
|
*v = rdmsr(IBS_OP_CTL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PMCDBG2(MDP, REA, 2, "ibs-read id=%d -> %jd", ri, *v);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write a PMC MSR.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
|
||||||
|
{
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
PMCDBG3(MDP, WRI, 1, "ibs-write cpu=%d ri=%d v=%jx", cpu, ri, v);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Configure hardware PMC according to the configuration recorded in 'pm'.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_config_pmc(int cpu, int ri, struct pmc *pm)
|
||||||
|
{
|
||||||
|
struct pmc_hw *phw;
|
||||||
|
|
||||||
|
PMCDBG3(MDP, CFG, 1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri];
|
||||||
|
|
||||||
|
KASSERT(pm == NULL || phw->phw_pmc == NULL,
|
||||||
|
("[ibs,%d] pm=%p phw->pm=%p hwpmc not unconfigured",
|
||||||
|
__LINE__, pm, phw->phw_pmc));
|
||||||
|
|
||||||
|
phw->phw_pmc = pm;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Retrieve a configured PMC pointer from hardware state.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_get_config(int cpu, int ri, struct pmc **ppm)
|
||||||
|
{
|
||||||
|
|
||||||
|
*ppm = ibs_pcpu[cpu]->pc_ibspmcs[ri].phw_pmc;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if a given PMC allocation is feasible.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
|
||||||
|
const struct pmc_op_pmcallocate *a)
|
||||||
|
{
|
||||||
|
uint64_t caps, config;
|
||||||
|
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
/* check class match */
|
||||||
|
if (a->pm_class != PMC_CLASS_IBS)
|
||||||
|
return (EINVAL);
|
||||||
|
if (a->pm_md.pm_ibs.ibs_type != ri)
|
||||||
|
return (EINVAL);
|
||||||
|
|
||||||
|
caps = pm->pm_caps;
|
||||||
|
|
||||||
|
PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps);
|
||||||
|
|
||||||
|
if ((caps & PMC_CAP_SYSTEM) == 0)
|
||||||
|
return (EINVAL);
|
||||||
|
|
||||||
|
config = a->pm_md.pm_ibs.ibs_ctl;
|
||||||
|
pm->pm_md.pm_ibs.ibs_ctl = config;
|
||||||
|
|
||||||
|
PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%x", ri, config);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release machine dependent state associated with a PMC. This is a
|
||||||
|
* no-op on this architecture.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_release_pmc(int cpu, int ri, struct pmc *pmc __unused)
|
||||||
|
{
|
||||||
|
struct pmc_hw *phw __diagused;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
PMCDBG1(MDP, ALL, 1, "ibs-release ri=%d", ri);
|
||||||
|
|
||||||
|
phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri];
|
||||||
|
|
||||||
|
KASSERT(phw->phw_pmc == NULL,
|
||||||
|
("[ibs,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start a PMC.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||||
|
{
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
PMCDBG2(MDP, STA, 1, "ibs-start cpu=%d ri=%d", cpu, ri);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is used to handle spurious NMIs. All that matters is that it
|
||||||
|
* is not in the stopping state.
|
||||||
|
*/
|
||||||
|
atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_RUNNING);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turn on the ENABLE bit. Zeroing out the control register eliminates
|
||||||
|
* stale valid bits from spurious NMIs and it resets the counter.
|
||||||
|
*/
|
||||||
|
switch (ri) {
|
||||||
|
case IBS_PMC_FETCH:
|
||||||
|
wrmsr(IBS_FETCH_CTL, 0);
|
||||||
|
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE;
|
||||||
|
wrmsr(IBS_FETCH_CTL, config);
|
||||||
|
break;
|
||||||
|
case IBS_PMC_OP:
|
||||||
|
wrmsr(IBS_OP_CTL, 0);
|
||||||
|
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE;
|
||||||
|
wrmsr(IBS_OP_CTL, config);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop a PMC.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] illegal row-index %d", __LINE__, ri));
|
||||||
|
|
||||||
|
PMCDBG1(MDP, STO, 1, "ibs-stop ri=%d", ri);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turn off the ENABLE bit, but unfortunately there are a few quirks
|
||||||
|
* that generate excess NMIs. Workaround #420 in the Revision Guide
|
||||||
|
* for AMD Family 10h Processors 41322 Rev. 3.92 March 2012. requires
|
||||||
|
* that we clear the count before clearing enable.
|
||||||
|
*
|
||||||
|
* Even after clearing the counter spurious NMIs are still possible so
|
||||||
|
* we use a per-CPU atomic variable to notify the interrupt handler we
|
||||||
|
* are stopping and discard spurious NMIs. We then retry clearing the
|
||||||
|
* control register for 50us. This gives us enough time and ensures
|
||||||
|
* that the valid bit is not accidently stuck after a spurious NMI.
|
||||||
|
*/
|
||||||
|
config = pm->pm_md.pm_ibs.ibs_ctl;
|
||||||
|
|
||||||
|
atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING);
|
||||||
|
|
||||||
|
switch (ri) {
|
||||||
|
case IBS_PMC_FETCH:
|
||||||
|
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK);
|
||||||
|
DELAY(1);
|
||||||
|
config &= ~IBS_FETCH_CTL_ENABLE;
|
||||||
|
wrmsr(IBS_FETCH_CTL, config);
|
||||||
|
break;
|
||||||
|
case IBS_PMC_OP:
|
||||||
|
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK);
|
||||||
|
DELAY(1);
|
||||||
|
config &= ~IBS_OP_CTL_ENABLE;
|
||||||
|
wrmsr(IBS_OP_CTL, config);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < IBS_STOP_ITER; i++) {
|
||||||
|
DELAY(1);
|
||||||
|
|
||||||
|
switch (ri) {
|
||||||
|
case IBS_PMC_FETCH:
|
||||||
|
wrmsr(IBS_FETCH_CTL, 0);
|
||||||
|
break;
|
||||||
|
case IBS_PMC_OP:
|
||||||
|
wrmsr(IBS_OP_CTL, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPED);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config)
|
||||||
|
{
|
||||||
|
struct pmc_multipart mpd;
|
||||||
|
|
||||||
|
if (pm == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pm->pm_state != PMC_STATE_RUNNING)
|
||||||
|
return;
|
||||||
|
|
||||||
|
memset(&mpd, 0, sizeof(mpd));
|
||||||
|
|
||||||
|
mpd.pl_type = PMC_CC_MULTIPART_IBS_FETCH;
|
||||||
|
mpd.pl_length = 4;
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config;
|
||||||
|
if (ibs_features) {
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_FETCH_EXTCTL] = rdmsr(IBS_FETCH_EXTCTL);
|
||||||
|
}
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config;
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_FETCH_LINADDR] = rdmsr(IBS_FETCH_LINADDR);
|
||||||
|
if ((config & IBS_FETCH_CTL_PHYSADDRVALID) != 0) {
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] =
|
||||||
|
rdmsr(IBS_FETCH_PHYSADDR);
|
||||||
|
}
|
||||||
|
|
||||||
|
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config)
|
||||||
|
{
|
||||||
|
struct pmc_multipart mpd;
|
||||||
|
|
||||||
|
if (pm == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pm->pm_state != PMC_STATE_RUNNING)
|
||||||
|
return;
|
||||||
|
|
||||||
|
memset(&mpd, 0, sizeof(mpd));
|
||||||
|
|
||||||
|
mpd.pl_type = PMC_CC_MULTIPART_IBS_OP;
|
||||||
|
mpd.pl_length = 8;
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_CTL] = config;
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_RIP] = rdmsr(IBS_OP_RIP);
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_DATA] = rdmsr(IBS_OP_DATA);
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_DATA2] = rdmsr(IBS_OP_DATA2);
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_DATA3] = rdmsr(IBS_OP_DATA3);
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_DC_LINADDR] = rdmsr(IBS_OP_DC_LINADDR);
|
||||||
|
mpd.pl_mpdata[PMC_MPIDX_OP_DC_PHYSADDR] = rdmsr(IBS_OP_DC_PHYSADDR);
|
||||||
|
|
||||||
|
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
|
||||||
|
|
||||||
|
wrmsr(IBS_OP_CTL, pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Interrupt handler. This function needs to return '1' if the
|
||||||
|
* interrupt was this CPU's PMCs or '0' otherwise. It is not allowed
|
||||||
|
* to sleep or do anything a 'fast' interrupt handler is not allowed
|
||||||
|
* to do.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
pmc_ibs_intr(struct trapframe *tf)
|
||||||
|
{
|
||||||
|
struct ibs_cpu *pac;
|
||||||
|
struct pmc *pm;
|
||||||
|
int retval, cpu;
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
cpu = curcpu;
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] out of range CPU %d", __LINE__, cpu));
|
||||||
|
|
||||||
|
PMCDBG3(MDP, INT, 1, "cpu=%d tf=%p um=%d", cpu, tf, TRAPF_USERMODE(tf));
|
||||||
|
|
||||||
|
retval = 0;
|
||||||
|
|
||||||
|
pac = ibs_pcpu[cpu];
|
||||||
|
|
||||||
|
config = rdmsr(IBS_FETCH_CTL);
|
||||||
|
if ((config & IBS_FETCH_CTL_VALID) != 0) {
|
||||||
|
pm = pac->pc_ibspmcs[IBS_PMC_FETCH].phw_pmc;
|
||||||
|
|
||||||
|
retval = 1;
|
||||||
|
|
||||||
|
pmc_ibs_process_fetch(pm, tf, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
config = rdmsr(IBS_OP_CTL);
|
||||||
|
if ((retval == 0) && ((config & IBS_OP_CTL_VALID) != 0)) {
|
||||||
|
pm = pac->pc_ibspmcs[IBS_PMC_OP].phw_pmc;
|
||||||
|
|
||||||
|
retval = 1;
|
||||||
|
|
||||||
|
pmc_ibs_process_op(pm, tf, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (retval == 0) {
|
||||||
|
// Lets check for a stray NMI when stopping
|
||||||
|
if (atomic_load_int(&pac->pc_status) == IBS_CPU_STOPPING) {
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (retval)
|
||||||
|
counter_u64_add(pmc_stats.pm_intr_processed, 1);
|
||||||
|
else
|
||||||
|
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
|
||||||
|
|
||||||
|
PMCDBG1(MDP, INT, 2, "retval=%d", retval);
|
||||||
|
|
||||||
|
return (retval);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Describe a PMC.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
|
||||||
|
{
|
||||||
|
struct pmc_hw *phw;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] illegal CPU %d", __LINE__, cpu));
|
||||||
|
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||||
|
("[ibs,%d] row-index %d out of range", __LINE__, ri));
|
||||||
|
|
||||||
|
phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri];
|
||||||
|
|
||||||
|
if (ri == IBS_PMC_FETCH) {
|
||||||
|
strlcpy(pi->pm_name, "IBS-FETCH", sizeof(pi->pm_name));
|
||||||
|
pi->pm_class = PMC_CLASS_IBS;
|
||||||
|
pi->pm_enabled = true;
|
||||||
|
*ppmc = phw->phw_pmc;
|
||||||
|
} else {
|
||||||
|
strlcpy(pi->pm_name, "IBS-OP", sizeof(pi->pm_name));
|
||||||
|
pi->pm_class = PMC_CLASS_IBS;
|
||||||
|
pi->pm_enabled = true;
|
||||||
|
*ppmc = phw->phw_pmc;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Processor-dependent initialization.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_pcpu_init(struct pmc_mdep *md, int cpu)
|
||||||
|
{
|
||||||
|
struct ibs_cpu *pac;
|
||||||
|
struct pmc_cpu *pc;
|
||||||
|
struct pmc_hw *phw;
|
||||||
|
int first_ri, n;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] insane cpu number %d", __LINE__, cpu));
|
||||||
|
|
||||||
|
PMCDBG1(MDP, INI, 1, "ibs-init cpu=%d", cpu);
|
||||||
|
|
||||||
|
ibs_pcpu[cpu] = pac = malloc(sizeof(struct ibs_cpu), M_PMC,
|
||||||
|
M_WAITOK | M_ZERO);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the content of the hardware descriptors to a known
|
||||||
|
* state and initialize pointers in the MI per-cpu descriptor.
|
||||||
|
*/
|
||||||
|
pc = pmc_pcpu[cpu];
|
||||||
|
first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri;
|
||||||
|
|
||||||
|
KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu pointer", __LINE__));
|
||||||
|
|
||||||
|
for (n = 0, phw = pac->pc_ibspmcs; n < IBS_NPMCS; n++, phw++) {
|
||||||
|
phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
|
||||||
|
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
|
||||||
|
phw->phw_pmc = NULL;
|
||||||
|
pc->pc_hwpmcs[n + first_ri] = phw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Processor-dependent cleanup prior to the KLD being unloaded.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ibs_pcpu_fini(struct pmc_mdep *md, int cpu)
|
||||||
|
{
|
||||||
|
struct ibs_cpu *pac;
|
||||||
|
struct pmc_cpu *pc;
|
||||||
|
int first_ri, i;
|
||||||
|
|
||||||
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||||
|
("[ibs,%d] insane cpu number (%d)", __LINE__, cpu));
|
||||||
|
|
||||||
|
PMCDBG1(MDP, INI, 1, "ibs-cleanup cpu=%d", cpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turn off IBS.
|
||||||
|
*/
|
||||||
|
wrmsr(IBS_FETCH_CTL, 0);
|
||||||
|
wrmsr(IBS_OP_CTL, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free up allocated space.
|
||||||
|
*/
|
||||||
|
if ((pac = ibs_pcpu[cpu]) == NULL)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
ibs_pcpu[cpu] = NULL;
|
||||||
|
|
||||||
|
pc = pmc_pcpu[cpu];
|
||||||
|
KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu state", __LINE__));
|
||||||
|
|
||||||
|
first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset pointers in the MI 'per-cpu' state.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < IBS_NPMCS; i++)
|
||||||
|
pc->pc_hwpmcs[i + first_ri] = NULL;
|
||||||
|
|
||||||
|
free(pac, M_PMC);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize ourselves.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
pmc_ibs_initialize(struct pmc_mdep *pmc_mdep, int ncpus)
|
||||||
|
{
|
||||||
|
u_int regs[4];
|
||||||
|
struct pmc_classdep *pcd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate space for pointers to PMC HW descriptors and for
|
||||||
|
* the MDEP structure used by MI code.
|
||||||
|
*/
|
||||||
|
ibs_pcpu = malloc(sizeof(struct ibs_cpu *) * pmc_cpu_max(), M_PMC,
|
||||||
|
M_WAITOK | M_ZERO);
|
||||||
|
|
||||||
|
/* Initialize AMD IBS handling. */
|
||||||
|
pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS];
|
||||||
|
|
||||||
|
pcd->pcd_caps = IBS_PMC_CAPS;
|
||||||
|
pcd->pcd_class = PMC_CLASS_IBS;
|
||||||
|
pcd->pcd_num = IBS_NPMCS;
|
||||||
|
pcd->pcd_ri = pmc_mdep->pmd_npmc;
|
||||||
|
pcd->pcd_width = 0;
|
||||||
|
|
||||||
|
pcd->pcd_allocate_pmc = ibs_allocate_pmc;
|
||||||
|
pcd->pcd_config_pmc = ibs_config_pmc;
|
||||||
|
pcd->pcd_describe = ibs_describe;
|
||||||
|
pcd->pcd_get_config = ibs_get_config;
|
||||||
|
pcd->pcd_pcpu_fini = ibs_pcpu_fini;
|
||||||
|
pcd->pcd_pcpu_init = ibs_pcpu_init;
|
||||||
|
pcd->pcd_release_pmc = ibs_release_pmc;
|
||||||
|
pcd->pcd_start_pmc = ibs_start_pmc;
|
||||||
|
pcd->pcd_stop_pmc = ibs_stop_pmc;
|
||||||
|
pcd->pcd_read_pmc = ibs_read_pmc;
|
||||||
|
pcd->pcd_write_pmc = ibs_write_pmc;
|
||||||
|
|
||||||
|
pmc_mdep->pmd_npmc += IBS_NPMCS;
|
||||||
|
|
||||||
|
if (cpu_exthigh >= CPUID_IBSID) {
|
||||||
|
do_cpuid(CPUID_IBSID, regs);
|
||||||
|
ibs_features = regs[0];
|
||||||
|
} else {
|
||||||
|
ibs_features = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PMCDBG0(MDP, INI, 0, "ibs-initialize");
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finalization code for AMD CPUs.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pmc_ibs_finalize(struct pmc_mdep *md)
|
||||||
|
{
|
||||||
|
PMCDBG0(MDP, INI, 1, "ibs-finalize");
|
||||||
|
|
||||||
|
for (int i = 0; i < pmc_cpu_max(); i++)
|
||||||
|
KASSERT(ibs_pcpu[i] == NULL,
|
||||||
|
("[ibs,%d] non-null pcpu cpu %d", __LINE__, i));
|
||||||
|
|
||||||
|
free(ibs_pcpu, M_PMC);
|
||||||
|
ibs_pcpu = NULL;
|
||||||
|
}
|
||||||
@@ -0,0 +1,176 @@
|
|||||||
|
/*-
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*
|
||||||
|
* Copyright (c) 2026, Ali Jose Mashtizadeh
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _DEV_HWPMC_IBS_H_
|
||||||
|
#define _DEV_HWPMC_IBS_H_ 1
|
||||||
|
|
||||||
|
#define IBS_NPMCS 2
|
||||||
|
#define IBS_PMC_FETCH 0
|
||||||
|
#define IBS_PMC_OP 1
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah
|
||||||
|
* Model 02h C1 (57238) 2024-09-29 Revision 0.24.
|
||||||
|
*/
|
||||||
|
#define CPUID_IBSID 0x8000001B
|
||||||
|
#define CPUID_IBSID_IBSFFV 0x00000001 /* IBS Feature Flags Valid */
|
||||||
|
#define CPUID_IBSID_FETCHSAM 0x00000002 /* IBS Fetch Sampling */
|
||||||
|
#define CPUID_IBSID_OPSAM 0x00000004 /* IBS Execution Sampling */
|
||||||
|
#define CPUID_IBSID_RDWROPCNT 0x00000008 /* RdWr Operationg Counter */
|
||||||
|
#define CPUID_IBSID_OPCNT 0x00000010 /* Operation Counter */
|
||||||
|
#define CPUID_IBSID_BRNTRGT 0x00000020 /* Branch Target Address */
|
||||||
|
#define CPUID_IBSID_OPCNTEXT 0x00000040 /* Extend Counter */
|
||||||
|
#define CPUID_IBSID_RIPINVALIDCHK 0x00000080 /* Invalid RIP Indication */
|
||||||
|
#define CPUID_IBSID_OPFUSE 0x00000010 /* Fused Branch Operation */
|
||||||
|
#define CPUID_IBSID_IBSFETCHCTLEXTD 0x00000020 /* IBS Fetch Control Ext */
|
||||||
|
#define CPUID_IBSID_IBSOPDATA4 0x00000040 /* IBS OP DATA4 */
|
||||||
|
#define CPUID_IBSID_ZEN4IBSEXTENSIONS 0x00000080 /* IBS Zen 4 Extensions */
|
||||||
|
#define CPUID_IBSID_IBSLOADLATENCYFILT 0x00000100 /* Load Latency Filtering */
|
||||||
|
#define CPUID_IBSID_IBSUPDTDDTLBSTATS 0x00080000 /* Simplified DTLB Stats */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All of these definitions here come from AMD64 Architecture Programmer's
|
||||||
|
* Manual Volume 2: System Programming (24593) 2025-07-02 Version 3.43. with
|
||||||
|
* the following exceptions:
|
||||||
|
*
|
||||||
|
* OpData4 and fields come from the BKDG for AMD Family 15h Model 70-7Fh
|
||||||
|
* (55072) 2018-06-20 Revision 3.09.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* IBS MSRs */
|
||||||
|
#define IBS_CTL 0xC001103A /* IBS Control */
|
||||||
|
#define IBS_CTL_LVTOFFSETVALID (1ULL << 8)
|
||||||
|
#define IBS_CTL_LVTOFFSETMASK 0x0000000F
|
||||||
|
|
||||||
|
/* IBS Fetch Control */
|
||||||
|
#define IBS_FETCH_CTL 0xC0011030 /* IBS Fetch Control */
|
||||||
|
#define IBS_FETCH_CTL_L3MISS (1ULL << 61) /* L3 Cache Miss */
|
||||||
|
#define IBS_FETCH_CTL_OPCACHEMISS (1ULL << 60) /* Op Cache Miss */
|
||||||
|
#define IBS_FETCH_CTL_L3MISSONLY (1ULL << 59) /* L3 Miss Filtering */
|
||||||
|
#define IBS_FETCH_CTL_RANDOMIZE (1ULL << 57) /* Randomized Tagging */
|
||||||
|
#define IBS_FETCH_CTL_L1TLBMISS (1ULL << 55) /* L1 TLB Miss */
|
||||||
|
// Page size 54:53
|
||||||
|
#define IBS_FETCH_CTL_PHYSADDRVALID (1ULL << 52) /* PHYSADDR Valid */
|
||||||
|
#define IBS_FETCH_CTL_ICMISS (1ULL << 51) /* Inst. Cache Miss */
|
||||||
|
#define IBS_FETCH_CTL_COMPLETE (1ULL << 50) /* Complete */
|
||||||
|
#define IBS_FETCH_CTL_VALID (1ULL << 49) /* Valid */
|
||||||
|
#define IBS_FETCH_CTL_ENABLE (1ULL << 48) /* Enable */
|
||||||
|
#define IBS_FETCH_CTL_MAXCNTMASK 0x0000FFFFULL
|
||||||
|
|
||||||
|
#define IBS_FETCH_CTL_TO_LAT(_c) ((_c >> 32) & 0x0000FFFF)
|
||||||
|
|
||||||
|
#define IBS_FETCH_LINADDR 0xC0011031 /* Fetch Linear Address */
|
||||||
|
#define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */
|
||||||
|
#define IBS_FETCH_EXTCTL 0xC001103C /* Fetch Control Extended */
|
||||||
|
|
||||||
|
#define PMC_MPIDX_FETCH_CTL 0
|
||||||
|
#define PMC_MPIDX_FETCH_EXTCTL 1
|
||||||
|
#define PMC_MPIDX_FETCH_LINADDR 2
|
||||||
|
#define PMC_MPIDX_FETCH_PHYSADDR 3
|
||||||
|
|
||||||
|
/* IBS Execution Control */
|
||||||
|
#define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */
|
||||||
|
#define IBS_OP_CTL_COUNTERCONTROL (1ULL << 19) /* Counter Control */
|
||||||
|
#define IBS_OP_CTL_VALID (1ULL << 18) /* Valid */
|
||||||
|
#define IBS_OP_CTL_ENABLE (1ULL << 17) /* Enable */
|
||||||
|
#define IBS_OP_CTL_L3MISSONLY (1ULL << 16) /* L3 Miss Filtering */
|
||||||
|
#define IBS_OP_CTL_MAXCNTMASK 0x0000FFFFULL
|
||||||
|
|
||||||
|
#define IBS_OP_RIP 0xC0011034 /* IBS Op RIP */
|
||||||
|
#define IBS_OP_DATA 0xC0011035 /* IBS Op Data */
|
||||||
|
#define IBS_OP_DATA_RIPINVALID (1ULL << 38) /* RIP Invalid */
|
||||||
|
#define IBS_OP_DATA_BRANCHRETIRED (1ULL << 37) /* Branch Retired */
|
||||||
|
#define IBS_OP_DATA_BRANCHMISPREDICTED (1ULL << 36) /* Branch Mispredicted */
|
||||||
|
#define IBS_OP_DATA_BRANCHTAKEN (1ULL << 35) /* Branch Taken */
|
||||||
|
#define IBS_OP_DATA_RETURN (1ULL << 34) /* Return */
|
||||||
|
|
||||||
|
#define IBS_OP_DATA2 0xC0011036 /* IBS Op Data 2 */
|
||||||
|
#define IBS_OP_DATA3 0xC0011037 /* IBS Op Data 3 */
|
||||||
|
#define IBS_OP_DATA3_DCPHYADDRVALID (1ULL << 18) /* DC Physical Address */
|
||||||
|
#define IBS_OP_DATA3_DCLINADDRVALID (1ULL << 17) /* DC Linear Address */
|
||||||
|
#define IBS_OP_DATA3_LOCKEDOP (1ULL << 15) /* DC Locked Op */
|
||||||
|
#define IBS_OP_DATA3_UCMEMACCESS (1ULL << 14) /* DC UC Memory Access */
|
||||||
|
#define IBS_OP_DATA3_WCMEMACCESS (1ULL << 13) /* DC WC Memory Access */
|
||||||
|
#define IBS_OP_DATA3_DCMISALIGN (1ULL << 8) /* DC Misaligned Access */
|
||||||
|
#define IBS_OP_DATA3_DCMISS (1ULL << 7) /* DC Miss */
|
||||||
|
#define IBS_OP_DATA3_DCL1TLBHIT1G (1ULL << 5) /* DC L1 TLB Hit 1-GB */
|
||||||
|
#define IBS_OP_DATA3_DCL1TLBHIT2M (1ULL << 4) /* DC L1 TLB Hit 2-MB */
|
||||||
|
#define IBS_OP_DATA3_DCL1TLBMISS (1ULL << 2) /* DC L1 TLB Miss */
|
||||||
|
#define IBS_OP_DATA3_STORE (1ULL << 1) /* Store */
|
||||||
|
#define IBS_OP_DATA3_LOAD (1ULL << 0) /* Load */
|
||||||
|
#define IBS_OP_DATA3_TO_DCLAT(_c) ((_c >> 32) & 0x0000FFFF)
|
||||||
|
|
||||||
|
#define IBS_OP_DC_LINADDR 0xC0011038 /* IBS DC Linear Address */
|
||||||
|
#define IBS_OP_DC_PHYSADDR 0xC0011039 /* IBS DC Physical Address */
|
||||||
|
#define IBS_TGT_RIP 0xC001103B /* IBS Branch Target */
|
||||||
|
#define IBS_OP_DATA4 0xC001103D /* IBS Op Data 4 */
|
||||||
|
#define IBS_OP_DATA4_LDRESYNC (1ULL << 0) /* Load Resync */
|
||||||
|
|
||||||
|
#define PMC_MPIDX_OP_CTL 0
|
||||||
|
#define PMC_MPIDX_OP_RIP 1
|
||||||
|
#define PMC_MPIDX_OP_DATA 2
|
||||||
|
#define PMC_MPIDX_OP_DATA2 3
|
||||||
|
#define PMC_MPIDX_OP_DATA3 4
|
||||||
|
#define PMC_MPIDX_OP_DC_LINADDR 5
|
||||||
|
#define PMC_MPIDX_OP_DC_PHYSADDR 6
|
||||||
|
#define PMC_MPIDX_OP_TGT_RIP 7
|
||||||
|
#define PMC_MPIDX_OP_DATA4 8
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IBS data is encoded as using the multipart flag in the existing callchain
|
||||||
|
* structure. The PMC ID number tells you if the sample contains a fetch or an
|
||||||
|
* op sample. The available payload will be encoded in the MSR order with a
|
||||||
|
* variable length.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct pmc_md_ibs_op_pmcallocate {
|
||||||
|
uint32_t ibs_flag;
|
||||||
|
uint32_t ibs_type;
|
||||||
|
uint64_t ibs_ctl;
|
||||||
|
uint64_t ibs_ctl2;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
|
||||||
|
/* MD extension for 'struct pmc' */
|
||||||
|
struct pmc_md_ibs_pmc {
|
||||||
|
uint32_t ibs_flag;
|
||||||
|
uint32_t ibs_type;
|
||||||
|
uint64_t ibs_ctl;
|
||||||
|
uint64_t ibs_ctl2;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \
|
||||||
|
PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
|
||||||
|
|
||||||
|
int pmc_ibs_initialize(struct pmc_mdep *md, int ncpu);
|
||||||
|
void pmc_ibs_finalize(struct pmc_mdep *md);
|
||||||
|
int pmc_ibs_intr(struct trapframe *tf);
|
||||||
|
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
#endif /* _DEV_HWPMC_IBS_H_ */
|
||||||
+84
-12
@@ -198,9 +198,15 @@ static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS);
|
|||||||
static int pmc_debugflags_parse(char *newstr, char *fence);
|
static int pmc_debugflags_parse(char *newstr, char *fence);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static bool pmc_is_multipart(struct pmc_sample *ps);
|
||||||
|
static void pmc_multipart_add(struct pmc_sample *ps, int type,
|
||||||
|
int length);
|
||||||
|
static void pmc_multipart_copydata(struct pmc_sample *ps,
|
||||||
|
struct pmc_multipart *mp);
|
||||||
|
|
||||||
static int load(struct module *module, int cmd, void *arg);
|
static int load(struct module *module, int cmd, void *arg);
|
||||||
static int pmc_add_sample(ring_type_t ring, struct pmc *pm,
|
static int pmc_add_sample(ring_type_t ring, struct pmc *pm,
|
||||||
struct trapframe *tf);
|
struct trapframe *tf, struct pmc_multipart *mp);
|
||||||
static void pmc_add_thread_descriptors_from_proc(struct proc *p,
|
static void pmc_add_thread_descriptors_from_proc(struct proc *p,
|
||||||
struct pmc_process *pp);
|
struct pmc_process *pp);
|
||||||
static int pmc_attach_process(struct proc *p, struct pmc *pm);
|
static int pmc_attach_process(struct proc *p, struct pmc *pm);
|
||||||
@@ -4587,6 +4593,53 @@ pmc_post_callchain_callback(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
pmc_is_multipart(struct pmc_sample *ps)
|
||||||
|
{
|
||||||
|
return ((ps->ps_flags & PMC_CC_F_MULTIPART) != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmc_multipart_add(struct pmc_sample *ps, int type, int length)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint8_t *hdr;
|
||||||
|
|
||||||
|
MPASS(ps->ps_pc != NULL);
|
||||||
|
MPASS(ps->ps_nsamples_actual != 0);
|
||||||
|
|
||||||
|
hdr = (uint8_t *)ps->ps_pc;
|
||||||
|
|
||||||
|
for (i = 0; i < PMC_MULTIPART_HEADER_ENTRIES; i++) {
|
||||||
|
if (hdr[2 * i] == PMC_CC_MULTIPART_NONE) {
|
||||||
|
hdr[2 * i] = type;
|
||||||
|
hdr[2 * i + 1] = length;
|
||||||
|
ps->ps_nsamples_actual += length;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
KASSERT(false, ("Too many parts in the multipart header!"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmc_multipart_copydata(struct pmc_sample *ps, struct pmc_multipart *mp)
|
||||||
|
{
|
||||||
|
int i, scale;
|
||||||
|
uint64_t *ps_pc;
|
||||||
|
|
||||||
|
MPASS(ps->ps_pc != NULL);
|
||||||
|
MPASS(ps->ps_nsamples_actual != 0);
|
||||||
|
|
||||||
|
ps_pc = (uint64_t *)ps->ps_pc;
|
||||||
|
|
||||||
|
for (i = 0; i < mp->pl_length; i++)
|
||||||
|
ps_pc[i + 1] = mp->pl_mpdata[i];
|
||||||
|
|
||||||
|
scale = sizeof(uint64_t) / sizeof(uintptr_t);
|
||||||
|
pmc_multipart_add(ps, mp->pl_type, scale * mp->pl_length);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find a free slot in the per-cpu array of samples and capture the
|
* Find a free slot in the per-cpu array of samples and capture the
|
||||||
* current callchain there. If a sample was successfully added, a bit
|
* current callchain there. If a sample was successfully added, a bit
|
||||||
@@ -4597,7 +4650,8 @@ pmc_post_callchain_callback(void)
|
|||||||
* use any of the locking primitives supplied by the OS.
|
* use any of the locking primitives supplied by the OS.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
|
pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf,
|
||||||
|
struct pmc_multipart *mp)
|
||||||
{
|
{
|
||||||
struct pmc_sample *ps;
|
struct pmc_sample *ps;
|
||||||
struct pmc_samplebuffer *psb;
|
struct pmc_samplebuffer *psb;
|
||||||
@@ -4641,21 +4695,33 @@ pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
|
|||||||
ps->ps_ticks = ticks;
|
ps->ps_ticks = ticks;
|
||||||
ps->ps_cpu = cpu;
|
ps->ps_cpu = cpu;
|
||||||
ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
|
ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
|
||||||
|
ps->ps_nsamples_actual = 0;
|
||||||
|
|
||||||
callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
|
callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
|
||||||
pmc_callchaindepth : 1;
|
pmc_callchaindepth : 1;
|
||||||
|
|
||||||
MPASS(ps->ps_pc != NULL);
|
MPASS(ps->ps_pc != NULL);
|
||||||
|
|
||||||
|
if (mp != NULL) {
|
||||||
|
/* Set multipart flag, clear header and copy data */
|
||||||
|
ps->ps_flags |= PMC_CC_F_MULTIPART;
|
||||||
|
ps->ps_pc[0] = 0;
|
||||||
|
ps->ps_nsamples_actual = 1;
|
||||||
|
pmc_multipart_copydata(ps, mp);
|
||||||
|
}
|
||||||
|
|
||||||
if (callchaindepth == 1) {
|
if (callchaindepth == 1) {
|
||||||
ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
|
ps->ps_pc[ps->ps_nsamples_actual] = PMC_TRAPFRAME_TO_PC(tf);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Kernel stack traversals can be done immediately, while we
|
* Kernel stack traversals can be done immediately, while we
|
||||||
* defer to an AST for user space traversals.
|
* defer to an AST for user space traversals.
|
||||||
*/
|
*/
|
||||||
if (!inuserspace) {
|
if (!inuserspace) {
|
||||||
callchaindepth = pmc_save_kernel_callchain(ps->ps_pc,
|
callchaindepth = pmc_save_kernel_callchain(
|
||||||
callchaindepth, tf);
|
ps->ps_pc + ps->ps_nsamples_actual,
|
||||||
|
callchaindepth - ps->ps_nsamples_actual, tf);
|
||||||
|
callchaindepth += ps->ps_nsamples_actual;
|
||||||
} else {
|
} else {
|
||||||
pmc_post_callchain_callback();
|
pmc_post_callchain_callback();
|
||||||
callchaindepth = PMC_USER_CALLCHAIN_PENDING;
|
callchaindepth = PMC_USER_CALLCHAIN_PENDING;
|
||||||
@@ -4664,7 +4730,7 @@ pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
|
|||||||
|
|
||||||
ps->ps_nsamples = callchaindepth; /* mark entry as in-use */
|
ps->ps_nsamples = callchaindepth; /* mark entry as in-use */
|
||||||
if (ring == PMC_UR) {
|
if (ring == PMC_UR) {
|
||||||
ps->ps_nsamples_actual = callchaindepth;
|
ps->ps_nsamples_actual = ps->ps_nsamples;
|
||||||
ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING;
|
ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4690,7 +4756,8 @@ pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
|
|||||||
* locking primitives supplied by the OS.
|
* locking primitives supplied by the OS.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf)
|
pmc_process_interrupt_mp(int ring, struct pmc *pm, struct trapframe *tf,
|
||||||
|
struct pmc_multipart *mp)
|
||||||
{
|
{
|
||||||
struct thread *td;
|
struct thread *td;
|
||||||
|
|
||||||
@@ -4698,9 +4765,15 @@ pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf)
|
|||||||
if ((pm->pm_flags & PMC_F_USERCALLCHAIN) &&
|
if ((pm->pm_flags & PMC_F_USERCALLCHAIN) &&
|
||||||
(td->td_proc->p_flag & P_KPROC) == 0 && !TRAPF_USERMODE(tf)) {
|
(td->td_proc->p_flag & P_KPROC) == 0 && !TRAPF_USERMODE(tf)) {
|
||||||
atomic_add_int(&td->td_pmcpend, 1);
|
atomic_add_int(&td->td_pmcpend, 1);
|
||||||
return (pmc_add_sample(PMC_UR, pm, tf));
|
return (pmc_add_sample(PMC_UR, pm, tf, mp));
|
||||||
}
|
}
|
||||||
return (pmc_add_sample(ring, pm, tf));
|
return (pmc_add_sample(ring, pm, tf, mp));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf)
|
||||||
|
{
|
||||||
|
return (pmc_process_interrupt_mp(ring, pm, tf, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -4763,10 +4836,9 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
|
|||||||
(uintmax_t)counter_u64_fetch(pm->pm_runcount)));
|
(uintmax_t)counter_u64_fetch(pm->pm_runcount)));
|
||||||
|
|
||||||
if (ring == PMC_UR) {
|
if (ring == PMC_UR) {
|
||||||
nsamples = ps->ps_nsamples_actual;
|
|
||||||
counter_u64_add(pmc_stats.pm_merges, 1);
|
counter_u64_add(pmc_stats.pm_merges, 1);
|
||||||
} else
|
}
|
||||||
nsamples = 0;
|
nsamples = ps->ps_nsamples_actual;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve the callchain and mark the sample buffer
|
* Retrieve the callchain and mark the sample buffer
|
||||||
|
|||||||
@@ -149,6 +149,15 @@ __PMC_EV(K8, NB_HT_BUS2_BANDWIDTH)
|
|||||||
#define PMC_EV_K8_FIRST PMC_EV_K8_FP_DISPATCHED_FPU_OPS
|
#define PMC_EV_K8_FIRST PMC_EV_K8_FP_DISPATCHED_FPU_OPS
|
||||||
#define PMC_EV_K8_LAST PMC_EV_K8_NB_HT_BUS2_BANDWIDTH
|
#define PMC_EV_K8_LAST PMC_EV_K8_NB_HT_BUS2_BANDWIDTH
|
||||||
|
|
||||||
|
/* AMD IBS PMCs */
|
||||||
|
|
||||||
|
#define __PMC_EV_IBS() \
|
||||||
|
__PMC_EV(IBS, FETCH) \
|
||||||
|
__PMC_EV(IBS, OP)
|
||||||
|
|
||||||
|
#define PMC_EV_IBS_FIRST PMC_EV_IBS_FETCH
|
||||||
|
#define PMC_EV_IBS_LAST PMC_EV_IBS_OP
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Events supported by Intel architectural fixed function counters,
|
* Events supported by Intel architectural fixed function counters,
|
||||||
* from the "Intel 64 and IA-32 Architectures Software Developer's
|
* from the "Intel 64 and IA-32 Architectures Software Developer's
|
||||||
@@ -2398,7 +2407,7 @@ __PMC_EV_ALIAS("unhalted-reference-cycles", IAF_CPU_CLK_UNHALTED_REF)
|
|||||||
* START #EVENTS DESCRIPTION
|
* START #EVENTS DESCRIPTION
|
||||||
* 0 0x1000 Reserved
|
* 0 0x1000 Reserved
|
||||||
* 0x1000 0x0001 TSC
|
* 0x1000 0x0001 TSC
|
||||||
* 0x2000 0x0080 free (was AMD K7 events)
|
* 0x2000 0x0080 AMD IBS (was AMD K7 events)
|
||||||
* 0x2080 0x0100 AMD K8 events
|
* 0x2080 0x0100 AMD K8 events
|
||||||
* 0x10000 0x0080 INTEL architectural fixed-function events
|
* 0x10000 0x0080 INTEL architectural fixed-function events
|
||||||
* 0x10080 0x0F80 free (was INTEL architectural programmable events)
|
* 0x10080 0x0F80 free (was INTEL architectural programmable events)
|
||||||
@@ -2424,6 +2433,8 @@ __PMC_EV_ALIAS("unhalted-reference-cycles", IAF_CPU_CLK_UNHALTED_REF)
|
|||||||
#define __PMC_EVENTS() \
|
#define __PMC_EVENTS() \
|
||||||
__PMC_EV_BLOCK(TSC, 0x01000) \
|
__PMC_EV_BLOCK(TSC, 0x01000) \
|
||||||
__PMC_EV_TSC() \
|
__PMC_EV_TSC() \
|
||||||
|
__PMC_EV_BLOCK(IBS, 0x02000) \
|
||||||
|
__PMC_EV_IBS() \
|
||||||
__PMC_EV_BLOCK(K8, 0x02080) \
|
__PMC_EV_BLOCK(K8, 0x02080) \
|
||||||
__PMC_EV_K8() \
|
__PMC_EV_K8() \
|
||||||
__PMC_EV_BLOCK(IAF, 0x10000) \
|
__PMC_EV_BLOCK(IAF, 0x10000) \
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ struct pmc_mdep;
|
|||||||
* TSC The timestamp counter
|
* TSC The timestamp counter
|
||||||
* K7 AMD Athlon XP/MP and other 32 bit processors.
|
* K7 AMD Athlon XP/MP and other 32 bit processors.
|
||||||
* K8 AMD Athlon64 and Opteron PMCs in 32 bit mode.
|
* K8 AMD Athlon64 and Opteron PMCs in 32 bit mode.
|
||||||
|
* IBS AMD IBS
|
||||||
* IAP Intel Core/Core2/Atom programmable PMCs.
|
* IAP Intel Core/Core2/Atom programmable PMCs.
|
||||||
* IAF Intel fixed-function PMCs.
|
* IAF Intel fixed-function PMCs.
|
||||||
* UCP Intel Uncore programmable PMCs.
|
* UCP Intel Uncore programmable PMCs.
|
||||||
@@ -50,6 +51,7 @@ struct pmc_mdep;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <dev/hwpmc/hwpmc_amd.h> /* K7 and K8 */
|
#include <dev/hwpmc/hwpmc_amd.h> /* K7 and K8 */
|
||||||
|
#include <dev/hwpmc/hwpmc_ibs.h>
|
||||||
#include <dev/hwpmc/hwpmc_core.h>
|
#include <dev/hwpmc/hwpmc_core.h>
|
||||||
#include <dev/hwpmc/hwpmc_tsc.h>
|
#include <dev/hwpmc/hwpmc_tsc.h>
|
||||||
#include <dev/hwpmc/hwpmc_uncore.h>
|
#include <dev/hwpmc/hwpmc_uncore.h>
|
||||||
@@ -62,6 +64,7 @@ struct pmc_mdep;
|
|||||||
#define PMC_MDEP_CLASS_INDEX_TSC 1
|
#define PMC_MDEP_CLASS_INDEX_TSC 1
|
||||||
#define PMC_MDEP_CLASS_INDEX_K7 2
|
#define PMC_MDEP_CLASS_INDEX_K7 2
|
||||||
#define PMC_MDEP_CLASS_INDEX_K8 2
|
#define PMC_MDEP_CLASS_INDEX_K8 2
|
||||||
|
#define PMC_MDEP_CLASS_INDEX_IBS 3
|
||||||
#define PMC_MDEP_CLASS_INDEX_IAP 2
|
#define PMC_MDEP_CLASS_INDEX_IAP 2
|
||||||
#define PMC_MDEP_CLASS_INDEX_IAF 3
|
#define PMC_MDEP_CLASS_INDEX_IAF 3
|
||||||
#define PMC_MDEP_CLASS_INDEX_UCP 4
|
#define PMC_MDEP_CLASS_INDEX_UCP 4
|
||||||
@@ -73,6 +76,7 @@ struct pmc_mdep;
|
|||||||
|
|
||||||
union pmc_md_op_pmcallocate {
|
union pmc_md_op_pmcallocate {
|
||||||
struct pmc_md_amd_op_pmcallocate pm_amd;
|
struct pmc_md_amd_op_pmcallocate pm_amd;
|
||||||
|
struct pmc_md_ibs_op_pmcallocate pm_ibs;
|
||||||
struct pmc_md_iap_op_pmcallocate pm_iap;
|
struct pmc_md_iap_op_pmcallocate pm_iap;
|
||||||
struct pmc_md_ucf_op_pmcallocate pm_ucf;
|
struct pmc_md_ucf_op_pmcallocate pm_ucf;
|
||||||
struct pmc_md_ucp_op_pmcallocate pm_ucp;
|
struct pmc_md_ucp_op_pmcallocate pm_ucp;
|
||||||
@@ -88,6 +92,7 @@ union pmc_md_op_pmcallocate {
|
|||||||
/* MD extension for 'struct pmc' */
|
/* MD extension for 'struct pmc' */
|
||||||
union pmc_md_pmc {
|
union pmc_md_pmc {
|
||||||
struct pmc_md_amd_pmc pm_amd;
|
struct pmc_md_amd_pmc pm_amd;
|
||||||
|
struct pmc_md_ibs_pmc pm_ibs;
|
||||||
struct pmc_md_iaf_pmc pm_iaf;
|
struct pmc_md_iaf_pmc pm_iaf;
|
||||||
struct pmc_md_iap_pmc pm_iap;
|
struct pmc_md_iap_pmc pm_iap;
|
||||||
struct pmc_md_ucf_pmc pm_ucf;
|
struct pmc_md_ucf_pmc pm_ucf;
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ SRCS.DEV_ACPI+= hwpmc_dmc620.c pmu_dmc620.c
|
|||||||
.endif
|
.endif
|
||||||
|
|
||||||
.if ${MACHINE_CPUARCH} == "amd64"
|
.if ${MACHINE_CPUARCH} == "amd64"
|
||||||
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_tsc.c
|
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_ibs.c hwpmc_intel.c hwpmc_tsc.c
|
||||||
SRCS+= hwpmc_x86.c hwpmc_uncore.c
|
SRCS+= hwpmc_x86.c hwpmc_uncore.c
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ SRCS+= hwpmc_armv7.c
|
|||||||
.endif
|
.endif
|
||||||
|
|
||||||
.if ${MACHINE_CPUARCH} == "i386"
|
.if ${MACHINE_CPUARCH} == "i386"
|
||||||
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c
|
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_ibs.c hwpmc_intel.c
|
||||||
SRCS+= hwpmc_tsc.c hwpmc_x86.c hwpmc_uncore.c
|
SRCS+= hwpmc_tsc.c hwpmc_x86.c hwpmc_uncore.c
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
|
|||||||
+18
-1
@@ -141,6 +141,7 @@ enum pmc_cputype {
|
|||||||
#define __PMC_CLASSES() \
|
#define __PMC_CLASSES() \
|
||||||
__PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \
|
__PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \
|
||||||
__PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \
|
__PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \
|
||||||
|
__PMC_CLASS(IBS, 0x03, "AMD IBS performance counters") \
|
||||||
__PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \
|
__PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \
|
||||||
__PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \
|
__PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \
|
||||||
__PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \
|
__PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \
|
||||||
@@ -386,6 +387,7 @@ enum pmc_ops {
|
|||||||
#define PMC_CALLCHAIN_DEPTH_MAX 512
|
#define PMC_CALLCHAIN_DEPTH_MAX 512
|
||||||
|
|
||||||
#define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/
|
#define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/
|
||||||
|
#define PMC_CC_F_MULTIPART 0x02 /*multipart data*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cookies used to denote allocated PMCs, and the values of PMCs.
|
* Cookies used to denote allocated PMCs, and the values of PMCs.
|
||||||
@@ -960,6 +962,18 @@ struct pmc_samplebuffer {
|
|||||||
#define PMC_PROD_SAMPLE(psb) \
|
#define PMC_PROD_SAMPLE(psb) \
|
||||||
(&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask])
|
(&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask])
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct pmc_multipart
|
||||||
|
*
|
||||||
|
* Multipart payload
|
||||||
|
*/
|
||||||
|
struct pmc_multipart {
|
||||||
|
char pl_type;
|
||||||
|
char pl_length;
|
||||||
|
uint64_t pl_mpdata[10];
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* struct pmc_cpustate
|
* struct pmc_cpustate
|
||||||
*
|
*
|
||||||
@@ -1226,7 +1240,10 @@ MALLOC_DECLARE(M_PMC);
|
|||||||
struct pmc_mdep *pmc_md_initialize(void); /* MD init function */
|
struct pmc_mdep *pmc_md_initialize(void); /* MD init function */
|
||||||
void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */
|
void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */
|
||||||
int pmc_getrowdisp(int _ri);
|
int pmc_getrowdisp(int _ri);
|
||||||
int pmc_process_interrupt(int _ring, struct pmc *_pm, struct trapframe *_tf);
|
int pmc_process_interrupt_mp(int _ring, struct pmc *_pm,
|
||||||
|
struct trapframe *_tf, struct pmc_multipart *mp);
|
||||||
|
int pmc_process_interrupt(int _ring, struct pmc *_pm,
|
||||||
|
struct trapframe *_tf);
|
||||||
int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
|
int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
|
||||||
struct trapframe *_tf);
|
struct trapframe *_tf);
|
||||||
int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
|
int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
|
||||||
|
|||||||
@@ -125,6 +125,20 @@ struct pmclog_callchain {
|
|||||||
#define PMC_CALLCHAIN_TO_CPUFLAGS(CPU,FLAGS) \
|
#define PMC_CALLCHAIN_TO_CPUFLAGS(CPU,FLAGS) \
|
||||||
(((CPU) << 16) | ((FLAGS) & 0xFFFF))
|
(((CPU) << 16) | ((FLAGS) & 0xFFFF))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the multipart flag is set, then pl_pc contains multiple data types. The
|
||||||
|
* first 8 bytes is a header made up of a 1 byte type and 1 byte length that
|
||||||
|
* describes the use of the remaining pl_pc array.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PMC_MULTIPART_HEADER_LENGTH 8
|
||||||
|
#define PMC_MULTIPART_HEADER_ENTRIES 4
|
||||||
|
|
||||||
|
#define PMC_CC_MULTIPART_NONE 0
|
||||||
|
#define PMC_CC_MULTIPART_CALLCHAIN 1
|
||||||
|
#define PMC_CC_MULTIPART_IBS_FETCH 2
|
||||||
|
#define PMC_CC_MULTIPART_IBS_OP 3
|
||||||
|
|
||||||
struct pmclog_closelog {
|
struct pmclog_closelog {
|
||||||
PMCLOG_ENTRY_HEADER
|
PMCLOG_ENTRY_HEADER
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -230,11 +230,11 @@ static struct lvt elvts[] = {
|
|||||||
.lvt_edgetrigger = 1,
|
.lvt_edgetrigger = 1,
|
||||||
.lvt_activehi = 1,
|
.lvt_activehi = 1,
|
||||||
.lvt_masked = 1,
|
.lvt_masked = 1,
|
||||||
.lvt_active = 0,
|
.lvt_active = 1,
|
||||||
.lvt_mode = APIC_LVT_DM_FIXED,
|
.lvt_mode = APIC_LVT_DM_NMI,
|
||||||
.lvt_vector = 0,
|
.lvt_vector = 0,
|
||||||
.lvt_reg = LAPIC_EXT_LVT0,
|
.lvt_reg = LAPIC_EXT_LVT0,
|
||||||
.lvt_desc = "ELVT0",
|
.lvt_desc = "IBS",
|
||||||
},
|
},
|
||||||
[APIC_ELVT_MCA] = {
|
[APIC_ELVT_MCA] = {
|
||||||
.lvt_edgetrigger = 1,
|
.lvt_edgetrigger = 1,
|
||||||
@@ -528,7 +528,10 @@ elvt_mode(struct lapic *la, u_int idx, uint32_t value)
|
|||||||
KASSERT(idx <= APIC_ELVT_MAX,
|
KASSERT(idx <= APIC_ELVT_MAX,
|
||||||
("%s: idx %u out of range", __func__, idx));
|
("%s: idx %u out of range", __func__, idx));
|
||||||
|
|
||||||
elvt = &la->la_elvts[idx];
|
if (la->la_elvts[idx].lvt_active)
|
||||||
|
elvt = &la->la_elvts[idx];
|
||||||
|
else
|
||||||
|
elvt = &elvts[idx];
|
||||||
KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
|
KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
|
||||||
KASSERT(elvt->lvt_edgetrigger,
|
KASSERT(elvt->lvt_edgetrigger,
|
||||||
("%s: ELVT%u is not edge triggered", __func__, idx));
|
("%s: ELVT%u is not edge triggered", __func__, idx));
|
||||||
@@ -963,9 +966,16 @@ lapic_reenable_pcint(void)
|
|||||||
|
|
||||||
if (refcount_load(&pcint_refcnt) == 0)
|
if (refcount_load(&pcint_refcnt) == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
value = lapic_read32(LAPIC_LVT_PCINT);
|
value = lapic_read32(LAPIC_LVT_PCINT);
|
||||||
value &= ~APIC_LVT_M;
|
value &= ~APIC_LVT_M;
|
||||||
lapic_write32(LAPIC_LVT_PCINT, value);
|
lapic_write32(LAPIC_LVT_PCINT, value);
|
||||||
|
|
||||||
|
if ((amd_feature2 & AMDID2_IBS) != 0) {
|
||||||
|
value = lapic_read32(LAPIC_EXT_LVT0);
|
||||||
|
value &= ~APIC_LVT_M;
|
||||||
|
lapic_write32(LAPIC_EXT_LVT0, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -976,6 +986,11 @@ lapic_update_pcint(void *dummy)
|
|||||||
la = &lapics[lapic_id()];
|
la = &lapics[lapic_id()];
|
||||||
lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
|
lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
|
||||||
lapic_read32(LAPIC_LVT_PCINT)));
|
lapic_read32(LAPIC_LVT_PCINT)));
|
||||||
|
|
||||||
|
if ((amd_feature2 & AMDID2_IBS) != 0) {
|
||||||
|
lapic_write32(LAPIC_EXT_LVT0, elvt_mode(la, APIC_ELVT_IBS,
|
||||||
|
lapic_read32(LAPIC_EXT_LVT0)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -1022,6 +1037,9 @@ lapic_enable_pcint(void)
|
|||||||
return (1);
|
return (1);
|
||||||
lvts[APIC_LVT_PMC].lvt_masked = 0;
|
lvts[APIC_LVT_PMC].lvt_masked = 0;
|
||||||
|
|
||||||
|
if ((amd_feature2 & AMDID2_IBS) != 0)
|
||||||
|
elvts[APIC_ELVT_IBS].lvt_masked = 0;
|
||||||
|
|
||||||
MPASS(mp_ncpus == 1 || smp_started);
|
MPASS(mp_ncpus == 1 || smp_started);
|
||||||
smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL);
|
smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL);
|
||||||
return (1);
|
return (1);
|
||||||
@@ -1045,6 +1063,7 @@ lapic_disable_pcint(void)
|
|||||||
if (!refcount_release(&pcint_refcnt))
|
if (!refcount_release(&pcint_refcnt))
|
||||||
return;
|
return;
|
||||||
lvts[APIC_LVT_PMC].lvt_masked = 1;
|
lvts[APIC_LVT_PMC].lvt_masked = 1;
|
||||||
|
elvts[APIC_ELVT_IBS].lvt_masked = 1;
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
/* The APs should always be started when hwpmc is unloaded. */
|
/* The APs should always be started when hwpmc is unloaded. */
|
||||||
|
|||||||
@@ -56,6 +56,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <gelf.h>
|
#include <gelf.h>
|
||||||
|
#include <inttypes.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <netdb.h>
|
#include <netdb.h>
|
||||||
@@ -367,6 +368,97 @@ pmcstat_pmcindex_to_pmcr(int pmcin)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
static void
|
||||||
|
pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
|
||||||
|
{
|
||||||
|
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
|
||||||
|
uint64_t ctl;
|
||||||
|
|
||||||
|
ctl = ibsbuf[PMC_MPIDX_FETCH_CTL];
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s",
|
||||||
|
(ctl & IBS_FETCH_CTL_ICMISS) ? "icmiss " : "",
|
||||||
|
(ctl & IBS_FETCH_CTL_L1TLBMISS) ? "l1tlbmiss " : "",
|
||||||
|
(ctl & IBS_FETCH_CTL_OPCACHEMISS) ? "opcachemiss " : "",
|
||||||
|
(ctl & IBS_FETCH_CTL_L3MISS) ? "l3miss" : "");
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-fetch", "Latency %" PRIu64,
|
||||||
|
IBS_FETCH_CTL_TO_LAT(ctl));
|
||||||
|
PMCSTAT_PRINT_ENTRY("IBS", "Address %" PRIx64,
|
||||||
|
ibsbuf[PMC_MPIDX_FETCH_LINADDR]);
|
||||||
|
if ((ctl & IBS_FETCH_CTL_PHYSADDRVALID) != 0) {
|
||||||
|
PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64,
|
||||||
|
ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
|
||||||
|
{
|
||||||
|
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
|
||||||
|
uint64_t data, data3;
|
||||||
|
|
||||||
|
data = ibsbuf[PMC_MPIDX_OP_DATA];
|
||||||
|
data3 = ibsbuf[PMC_MPIDX_OP_DATA3];
|
||||||
|
|
||||||
|
if ((data & IBS_OP_DATA_RIPINVALID) == 0) {
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "RIP %" PRIx64,
|
||||||
|
ibsbuf[PMC_MPIDX_OP_RIP]);
|
||||||
|
}
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s%s%s",
|
||||||
|
(data & IBS_OP_DATA_BRANCHRETIRED) ? "branchretired " : "",
|
||||||
|
(data & IBS_OP_DATA_BRANCHMISPREDICTED) ? "branchmispredicted " : "",
|
||||||
|
(data & IBS_OP_DATA_BRANCHTAKEN) ? "branchtaken " : "",
|
||||||
|
(data & IBS_OP_DATA_RETURN) ? "return" : "");
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s%s%s%s",
|
||||||
|
(data3 & IBS_OP_DATA3_LOAD) ? "load " : "",
|
||||||
|
(data3 & IBS_OP_DATA3_STORE) ? "store " : "",
|
||||||
|
(data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "",
|
||||||
|
(data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "",
|
||||||
|
(data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : "");
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64,
|
||||||
|
IBS_OP_DATA3_TO_DCLAT(data3));
|
||||||
|
if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) {
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "Address %" PRIx64,
|
||||||
|
ibsbuf[PMC_MPIDX_OP_DC_LINADDR]);
|
||||||
|
}
|
||||||
|
if ((data3 & IBS_OP_DATA3_DCPHYADDRVALID) != 0) {
|
||||||
|
PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64,
|
||||||
|
ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
pmcstat_print_multipart(struct pmclog_ev_callchain *cc)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint8_t *hdr = (uint8_t *)&cc->pl_pc[0];
|
||||||
|
int offset = PMC_MULTIPART_HEADER_LENGTH / sizeof(uintptr_t);
|
||||||
|
|
||||||
|
for (i = 0; i < PMC_MULTIPART_HEADER_ENTRIES; i++) {
|
||||||
|
uint8_t type = hdr[2 * i];
|
||||||
|
uint8_t len = hdr[2 * i + 1];
|
||||||
|
|
||||||
|
if (type == PMC_CC_MULTIPART_NONE) {
|
||||||
|
break;
|
||||||
|
} else if (type == PMC_CC_MULTIPART_CALLCHAIN) {
|
||||||
|
return (offset);
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
} else if (type == PMC_CC_MULTIPART_IBS_FETCH) {
|
||||||
|
pmcstat_print_ibs_fetch(cc, offset);
|
||||||
|
} else if (type == PMC_CC_MULTIPART_IBS_OP) {
|
||||||
|
pmcstat_print_ibs_op(cc, offset);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
PMCSTAT_PRINT_ENTRY("unsupported multipart type!");
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (offset);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print log entries as text.
|
* Print log entries as text.
|
||||||
*/
|
*/
|
||||||
@@ -388,7 +480,12 @@ pmcstat_print_log(void)
|
|||||||
pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
|
pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
|
||||||
PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
|
PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
|
||||||
pl_cpuflags) ? 'u' : 's');
|
pl_cpuflags) ? 'u' : 's');
|
||||||
for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
|
if ((ev.pl_u.pl_cc.pl_cpuflags & PMC_CC_F_MULTIPART)
|
||||||
|
!= 0)
|
||||||
|
npc = pmcstat_print_multipart(&ev.pl_u.pl_cc);
|
||||||
|
else
|
||||||
|
npc = 0;
|
||||||
|
for (; npc < ev.pl_u.pl_cc.pl_npc; npc++)
|
||||||
PMCSTAT_PRINT_ENTRY("...", "%p",
|
PMCSTAT_PRINT_ENTRY("...", "%p",
|
||||||
(void *) ev.pl_u.pl_cc.pl_pc[npc]);
|
(void *) ev.pl_u.pl_cc.pl_pc[npc]);
|
||||||
break;
|
break;
|
||||||
|
|||||||
Reference in New Issue
Block a user