@ -10,9 +10,12 @@
|
||||
#include <ihk/perfctr.h>
|
||||
#include <march.h>
|
||||
#include <errno.h>
|
||||
#include <cls.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/cpu.h>
|
||||
#include <registers.h>
|
||||
#include <mc_perf_event.h>
|
||||
#include <config.h>
|
||||
|
||||
extern unsigned int *x86_march_perfmap;
|
||||
extern int running_on_kvm(void);
|
||||
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
#ifndef ENABLE_PERF
|
||||
return;
|
||||
#endif //ENABLE_PERF
|
||||
|
||||
/* Do not do it on KVM */
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
@ -93,7 +100,7 @@ void x86_init_perfctr(void)
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Enable PMC Control */
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
|
||||
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
// allocate extra_reg
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_1;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_0;
|
||||
}
|
||||
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
// extra_regs are full
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
|
||||
}
|
||||
|
||||
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
|
||||
wrmsr(event->extra_reg.reg, event->extra_reg.config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MARCH_PERFCTR_START
|
||||
extern void x86_march_perfctr_start(unsigned long counter_mask);
|
||||
#endif
|
||||
|
||||
@ -9,12 +9,15 @@
|
||||
/* whether memdump feature is enabled */
|
||||
#undef ENABLE_MEMDUMP
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
/* whether qlmpi is enabled */
|
||||
#undef ENABLE_QLMPI
|
||||
|
||||
/* whether rusage is enabled */
|
||||
#undef ENABLE_RUSAGE
|
||||
|
||||
/* whether perf is enabled */
|
||||
#undef ENABLE_PERF
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
|
||||
21
configure
vendored
21
configure
vendored
@ -708,6 +708,7 @@ enable_dcfa
|
||||
enable_memdump
|
||||
enable_mcoverlayfs
|
||||
enable_rusage
|
||||
enable_perf
|
||||
enable_qlmpi
|
||||
with_uname_r
|
||||
'
|
||||
@ -1333,6 +1334,7 @@ Optional Features:
|
||||
--enable-memdump enable dumping memory and analyzing a dump
|
||||
--enable-mcoverlayfs enable mcoverlayfs implementation
|
||||
--enable-rusage enable rusage implementation
|
||||
--enable-perf enable perf_event implementation
|
||||
--enable-qlmpi enable qlmpi implementation
|
||||
|
||||
Optional Packages:
|
||||
@ -3568,6 +3570,14 @@ else
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-perf was given.
|
||||
if test "${enable_perf+set}" = set; then :
|
||||
enableval=$enable_perf; ENABLE_PERF=$enableval
|
||||
else
|
||||
ENABLE_PERF=yes
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-qlmpi was given.
|
||||
if test "${enable_qlmpi+set}" = set; then :
|
||||
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
|
||||
@ -4964,6 +4974,17 @@ else
|
||||
$as_echo "$as_me: rusage is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
|
||||
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
|
||||
$as_echo "$as_me: perf is enabled" >&6;}
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
|
||||
$as_echo "$as_me: perf is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
|
||||
15
configure.ac
15
configure.ac
@ -134,6 +134,12 @@ AC_ARG_ENABLE([rusage],
|
||||
[ENABLE_RUSAGE=$enableval],
|
||||
[ENABLE_RUSAGE=yes])
|
||||
|
||||
AC_ARG_ENABLE([perf],
|
||||
AC_HELP_STRING([--enable-perf],
|
||||
[enable perf_event implementation]),
|
||||
[ENABLE_PERF=$enableval],
|
||||
[ENABLE_PERF=yes])
|
||||
|
||||
AC_ARG_ENABLE([qlmpi],
|
||||
AC_HELP_STRING([--enable-qlmpi],
|
||||
[enable qlmpi implementation]),
|
||||
@ -454,7 +460,7 @@ else
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_QLMPI" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
|
||||
AC_MSG_NOTICE([qlmpi is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([qlmpi is disabled])
|
||||
@ -478,6 +484,13 @@ else
|
||||
AC_MSG_NOTICE([rusage is disabled])
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
|
||||
AC_MSG_NOTICE([perf is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([perf is disabled])
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
|
||||
fi
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#include <syscall.h>
|
||||
#include <bitops.h>
|
||||
#include <profile.h>
|
||||
#include <config.h>
|
||||
|
||||
#define VR_NONE 0x0
|
||||
#define VR_STACK 0x1
|
||||
@ -691,6 +692,10 @@ struct thread {
|
||||
int mod_clone;
|
||||
struct uti_attr *mod_clone_arg;
|
||||
int parent_cpuid;
|
||||
|
||||
// for performance counter
|
||||
unsigned long pmc_alloc_map;
|
||||
unsigned long extra_reg_alloc_map;
|
||||
};
|
||||
|
||||
#define VM_RANGE_CACHE_SIZE 4
|
||||
|
||||
171
kernel/syscall.c
171
kernel/syscall.c
@ -3436,18 +3436,18 @@ SYSCALL_DECLARE(signalfd4)
|
||||
}
|
||||
|
||||
int
|
||||
perf_counter_alloc(struct mc_perf_event *event)
|
||||
perf_counter_alloc(struct thread *thread)
|
||||
{
|
||||
int ret = 0;
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
struct mc_perf_event *leader = event->group_leader;
|
||||
int ret = -1;
|
||||
int i = 0;
|
||||
|
||||
ret = ihk_mc_perfctr_alloc_counter(&attr->type, &attr->config, leader->pmc_status);
|
||||
|
||||
if(ret >= 0) {
|
||||
leader->pmc_status |= 1UL << ret;
|
||||
// find avail generic counter
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
if(!(thread->pmc_alloc_map & (1 << i))) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
event->counter_id = ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -3467,7 +3467,13 @@ perf_counter_start(struct mc_perf_event *event)
|
||||
}
|
||||
|
||||
if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) {
|
||||
ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode);
|
||||
if (event->extra_reg.reg) {
|
||||
if (ihk_mc_perfctr_set_extra(event)) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = ihk_mc_perfctr_init_raw(event->counter_id, event->hw_config, mode);
|
||||
ihk_mc_perfctr_start(1UL << event->counter_id);
|
||||
}
|
||||
else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS &&
|
||||
@ -3478,7 +3484,8 @@ perf_counter_start(struct mc_perf_event *event)
|
||||
else {
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3569,18 +3576,18 @@ perf_event_read_group(struct mc_perf_event *event, unsigned long read_format, ch
|
||||
static int
|
||||
perf_event_read_one(struct mc_perf_event *event, unsigned long read_format, char *buf)
|
||||
{
|
||||
unsigned long values[4];
|
||||
int n = 0;
|
||||
unsigned long values[4];
|
||||
int n = 0;
|
||||
int size = 0;
|
||||
|
||||
values[n++] = perf_event_read_value(event);
|
||||
values[n++] = perf_event_read_value(event);
|
||||
|
||||
size = n * sizeof(unsigned long);
|
||||
|
||||
if (copy_to_user(buf, values, size))
|
||||
return -EFAULT;
|
||||
|
||||
return size;
|
||||
return size;
|
||||
}
|
||||
|
||||
static long
|
||||
@ -3597,7 +3604,6 @@ perf_read(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||
ret = perf_event_read_one(event, read_format, buf);
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
@ -3723,12 +3729,12 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||
process_unlock(proc, &lock);
|
||||
}
|
||||
*/
|
||||
break;
|
||||
break;
|
||||
case PERF_EVENT_IOC_RESET:
|
||||
// TODO: reset other process
|
||||
ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1);
|
||||
event->count = 0L;
|
||||
break;
|
||||
break;
|
||||
case PERF_EVENT_IOC_REFRESH:
|
||||
// TODO: refresh other process
|
||||
|
||||
@ -3753,7 +3759,13 @@ static int
|
||||
perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||
{
|
||||
struct mc_perf_event *event = (struct mc_perf_event*)sfd->data;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
thread->pmc_alloc_map &= ~(1UL << event->counter_id);
|
||||
if (event->extra_reg.reg) {
|
||||
thread->extra_reg_alloc_map &= ~(1UL << event->extra_reg.idx);
|
||||
}
|
||||
|
||||
kfree(event);
|
||||
|
||||
return 0;
|
||||
@ -3805,6 +3817,66 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
|
||||
return rc;
|
||||
}
|
||||
|
||||
struct mc_perf_event*
|
||||
mc_perf_event_alloc(struct perf_event_attr *attr)
|
||||
{
|
||||
unsigned long val = 0, extra_config = 0;
|
||||
struct mc_perf_event *event;
|
||||
int ereg_id;
|
||||
|
||||
if (!attr) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT);
|
||||
if (!event) {
|
||||
return NULL;
|
||||
}
|
||||
memset(event, 0, sizeof(struct mc_perf_event));
|
||||
|
||||
INIT_LIST_HEAD(&event->group_entry);
|
||||
INIT_LIST_HEAD(&event->sibling_list);
|
||||
event->attr = *attr;
|
||||
|
||||
event->sample_freq = attr->sample_freq;
|
||||
event->nr_siblings = 0;
|
||||
event->count = 0L;
|
||||
event->child_count_total = 0;
|
||||
event->parent = NULL;
|
||||
|
||||
switch(attr->type) {
|
||||
case PERF_TYPE_HARDWARE :
|
||||
val = ihk_mc_hw_event_map(attr->config);
|
||||
break;
|
||||
case PERF_TYPE_HW_CACHE :
|
||||
val = ihk_mc_hw_cache_event_map(attr->config);
|
||||
extra_config = ihk_mc_hw_cache_extra_reg_map(attr->config);
|
||||
break;
|
||||
case PERF_TYPE_RAW :
|
||||
val = attr->config;
|
||||
break;
|
||||
|
||||
default:
|
||||
// Unexpected type
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (val == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
event->hw_config = val;
|
||||
event->hw_config_ext = extra_config;
|
||||
|
||||
ereg_id = ihk_mc_get_extra_reg_id(event->hw_config, event->hw_config_ext);
|
||||
if (ereg_id >= 0) {
|
||||
event->extra_reg.config = event->hw_config_ext;
|
||||
event->extra_reg.reg = ihk_mc_get_extra_reg_msr(ereg_id);
|
||||
event->extra_reg.idx = ihk_mc_get_extra_reg_idx(ereg_id);
|
||||
}
|
||||
return event;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(perf_event_open)
|
||||
{
|
||||
struct syscall_request request IHK_DMA_ALIGN;
|
||||
@ -3812,6 +3884,7 @@ SYSCALL_DECLARE(perf_event_open)
|
||||
struct process *proc = thread->proc;
|
||||
struct mckfd *sfd, *cfd;
|
||||
int fd;
|
||||
int counter_idx;
|
||||
long irqstate;
|
||||
struct perf_event_attr *attr = (void *)ihk_mc_syscall_arg0(ctx);
|
||||
int pid = ihk_mc_syscall_arg1(ctx);
|
||||
@ -3822,47 +3895,54 @@ SYSCALL_DECLARE(perf_event_open)
|
||||
|
||||
int not_supported_flag = 0;
|
||||
|
||||
#ifndef ENABLE_PERF
|
||||
return -ENOSYS;
|
||||
#endif // ENABLE_PERF
|
||||
|
||||
// check Not supported
|
||||
if(cpu > 0) {
|
||||
if (cpu > 0) {
|
||||
not_supported_flag = 1;
|
||||
}
|
||||
if(flags > 0) {
|
||||
if (flags > 0) {
|
||||
not_supported_flag = 1;
|
||||
}
|
||||
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
|
||||
|
||||
if ((attr->type != PERF_TYPE_RAW) &&
|
||||
(attr->type != PERF_TYPE_HARDWARE) &&
|
||||
(attr->type != PERF_TYPE_HW_CACHE)) {
|
||||
not_supported_flag = 1;
|
||||
}
|
||||
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
|
||||
not_supported_flag = 1;
|
||||
}
|
||||
if(attr->read_format & PERF_FORMAT_ID) {
|
||||
if (attr->read_format &
|
||||
(PERF_FORMAT_TOTAL_TIME_ENABLED |
|
||||
PERF_FORMAT_TOTAL_TIME_RUNNING |
|
||||
PERF_FORMAT_ID)) {
|
||||
not_supported_flag = 1;
|
||||
}
|
||||
|
||||
if(not_supported_flag) {
|
||||
if (not_supported_flag) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// process of perf_event_open
|
||||
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT);
|
||||
if(!event)
|
||||
return -ENOMEM;
|
||||
event->attr = (struct perf_event_attr)*attr;
|
||||
event = mc_perf_event_alloc((struct perf_event_attr*)attr);
|
||||
if (!event) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
event->sample_freq = attr->sample_freq;
|
||||
event->nr_siblings = 0;
|
||||
event->count = 0L;
|
||||
event->child_count_total = 0;
|
||||
event->parent = NULL;
|
||||
event->pid = pid;
|
||||
INIT_LIST_HEAD(&event->group_entry);
|
||||
INIT_LIST_HEAD(&event->sibling_list);
|
||||
if(group_fd == -1) {
|
||||
|
||||
counter_idx = perf_counter_alloc(thread);
|
||||
if (counter_idx < 0) {
|
||||
return -1;
|
||||
}
|
||||
event->counter_id = counter_idx;
|
||||
|
||||
if (group_fd == -1) {
|
||||
event->group_leader = event;
|
||||
event->pmc_status = 0x0UL;
|
||||
} else {
|
||||
for(cfd = proc->mckfd; cfd; cfd = cfd->next) {
|
||||
if(cfd->fd == group_fd) {
|
||||
}
|
||||
else {
|
||||
for (cfd = proc->mckfd; cfd; cfd = cfd->next) {
|
||||
if (cfd->fd == group_fd) {
|
||||
event->group_leader = (struct mc_perf_event*)cfd->data;
|
||||
list_add_tail(&event->group_entry, &event->group_leader->sibling_list);
|
||||
event->group_leader->nr_siblings++;
|
||||
@ -3871,10 +3951,7 @@ SYSCALL_DECLARE(perf_event_open)
|
||||
}
|
||||
}
|
||||
|
||||
if(perf_counter_alloc(event) < 0)
|
||||
return -1;
|
||||
if(event->counter_id < 0)
|
||||
return -1;
|
||||
event->group_leader->pmc_status |= (1UL << counter_idx);
|
||||
|
||||
request.number = __NR_perf_event_open;
|
||||
request.args[0] = 0;
|
||||
@ -3883,6 +3960,8 @@ SYSCALL_DECLARE(perf_event_open)
|
||||
return fd;
|
||||
}
|
||||
|
||||
thread->pmc_alloc_map |= 1UL << counter_idx;
|
||||
|
||||
sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
|
||||
if(!sfd)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -77,6 +77,15 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
void *stack_pointer,
|
||||
void (*next_function)(void));
|
||||
|
||||
int ihk_mc_get_extra_reg_id(unsigned long hw_config, unsigned long hw_config_ext);
|
||||
unsigned int ihk_mc_get_nr_extra_regs();
|
||||
int ihk_mc_get_extra_reg_idx(int id);
|
||||
unsigned int ihk_mc_get_extra_reg_msr(int id);
|
||||
unsigned long ihk_mc_get_extra_reg_event(int id);
|
||||
unsigned long ihk_mc_hw_event_map(unsigned long hw_event);
|
||||
unsigned long ihk_mc_hw_cache_event_map(unsigned long hw_cache_event);
|
||||
unsigned long ihk_mc_hw_cache_extra_reg_map(unsigned long hw_cache_event);
|
||||
|
||||
/* returns the 'prev' argument of the call that caused the switch to the context returned. */
|
||||
void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx,
|
||||
ihk_mc_kernel_context_t *new_ctx,
|
||||
|
||||
@ -18,6 +18,8 @@
|
||||
#include <types.h>
|
||||
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
|
||||
|
||||
#include <mc_perf_event.h>
|
||||
|
||||
#define PERFCTR_USER_MODE 0x01
|
||||
#define PERFCTR_KERNEL_MODE 0x02
|
||||
|
||||
@ -60,6 +62,7 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode);
|
||||
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode);
|
||||
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode);
|
||||
#endif/*POSTK_DEBUG_TEMP_FIX_29*/
|
||||
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event);
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_30
|
||||
int ihk_mc_perfctr_start(int counter);
|
||||
int ihk_mc_perfctr_stop(int counter);
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
#ifndef MC_PERF_EVENT_H
|
||||
#define MC_PERF_EVENT_H
|
||||
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_32
|
||||
//#ifdef POSTK_DEBUG_TEMP_FIX_32
|
||||
#include <list.h>
|
||||
#endif /*POSTK_DEBUG_TEMP_FIX_32*/
|
||||
//#endif /*POSTK_DEBUG_TEMP_FIX_32*/
|
||||
#include <march.h>
|
||||
|
||||
struct perf_event_attr;
|
||||
@ -105,6 +105,40 @@ enum perf_hw_id {
|
||||
PERF_COUNT_HW_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
/*
|
||||
* Generalized hardware cache events:
|
||||
*
|
||||
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
|
||||
* { read, write, prefetch } x
|
||||
* { accesses, misses }
|
||||
*/
|
||||
enum perf_hw_cache_id {
|
||||
PERF_COUNT_HW_CACHE_L1D = 0,
|
||||
PERF_COUNT_HW_CACHE_L1I = 1,
|
||||
PERF_COUNT_HW_CACHE_LL = 2,
|
||||
PERF_COUNT_HW_CACHE_DTLB = 3,
|
||||
PERF_COUNT_HW_CACHE_ITLB = 4,
|
||||
PERF_COUNT_HW_CACHE_BPU = 5,
|
||||
PERF_COUNT_HW_CACHE_NODE = 6,
|
||||
|
||||
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
enum perf_hw_cache_op_id {
|
||||
PERF_COUNT_HW_CACHE_OP_READ = 0,
|
||||
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
|
||||
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
|
||||
|
||||
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
enum perf_hw_cache_op_result_id {
|
||||
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
|
||||
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
|
||||
|
||||
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
/*
|
||||
* Hardware event_id to monitor via a performance monitoring event:
|
||||
*/
|
||||
@ -218,8 +252,18 @@ struct perf_event_attr {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct hw_perf_event_extra {
|
||||
unsigned long config;
|
||||
unsigned int reg;
|
||||
int idx;
|
||||
};
|
||||
|
||||
|
||||
struct mc_perf_event {
|
||||
struct perf_event_attr attr;
|
||||
struct hw_perf_event_extra extra_reg;
|
||||
unsigned long hw_config;
|
||||
unsigned long hw_config_ext;
|
||||
int cpu_id;
|
||||
int counter_id; // counter_id
|
||||
unsigned long count; // counter_value
|
||||
|
||||
3
test/perf_event/Makefile
Normal file
3
test/perf_event/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
all: perf_test
|
||||
perf_test: perf_test.o perftool.o
|
||||
perftool.o: perftool.c perftool.h
|
||||
66
test/perf_event/go_perf_test.sh
Executable file
66
test/perf_event/go_perf_test.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/sh
|
||||
|
||||
MCEXEC="mcexec"
|
||||
|
||||
PERF_HW_ID_MAX=9
|
||||
PERF_COUNT_HW_CACHE_MAX=6
|
||||
PERF_COUNT_HW_CACHE_OP_MAX=2
|
||||
PERF_COUNT_HW_CACHE_RESULT_MAX=1
|
||||
|
||||
PERF_TYPE_HARDWARE=0
|
||||
PERF_TYPE_HW_CACHE=3
|
||||
|
||||
echo "【PERF_TYPE_HARDWARE all spase】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE no exclude】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "【HARDWARE exclude user space】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE exclude user space】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "【HARDWARE exclude kernel space】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE exclude kernel space】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
66
test/perf_event/no_mc_go.sh
Executable file
66
test/perf_event/no_mc_go.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/sh
|
||||
|
||||
MCEXEC=""
|
||||
|
||||
PERF_HW_ID_MAX=9
|
||||
PERF_COUNT_HW_CACHE_MAX=6
|
||||
PERF_COUNT_HW_CACHE_OP_MAX=2
|
||||
PERF_COUNT_HW_CACHE_RESULT_MAX=1
|
||||
|
||||
PERF_TYPE_HARDWARE=0
|
||||
PERF_TYPE_HW_CACHE=3
|
||||
|
||||
echo "【PERF_TYPE_HARDWARE all spase】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE no exclude】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "【HARDWARE exclude user space】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE exclude user space】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "【HARDWARE exclude kernel space】, "
|
||||
for id in `seq 0 ${PERF_HW_ID_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
|
||||
done
|
||||
|
||||
echo "【HW_CACHE exclude kernel space】, "
|
||||
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
|
||||
do
|
||||
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
|
||||
do
|
||||
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
|
||||
do
|
||||
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
82
test/perf_event/perf_test.c
Normal file
82
test/perf_event/perf_test.c
Normal file
@ -0,0 +1,82 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <langinfo.h>
|
||||
#include <locale.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "perftool.h"
|
||||
|
||||
void
|
||||
usage()
|
||||
{
|
||||
printf("Usage: perf_test mode cntr_num [<type> <config>]...\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
long fds[PERF_CNTR_NUM];
|
||||
long long counts[PERF_CNTR_NUM];
|
||||
int types[PERF_CNTR_NUM];
|
||||
int configs[PERF_CNTR_NUM];
|
||||
|
||||
int cntr_num;
|
||||
int mode;
|
||||
int i;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("ERROR: Too few arguments.\n");
|
||||
usage();
|
||||
}
|
||||
|
||||
mode = atoi(argv[1]);
|
||||
cntr_num = atoi(argv[2]);
|
||||
|
||||
/* perse args */
|
||||
if (argc < cntr_num * 2 + 2) {
|
||||
printf("ERROR: Too few arguments.\n");
|
||||
usage();
|
||||
}
|
||||
|
||||
for (i = 0; i < cntr_num; i++) {
|
||||
types[i] = atoi(argv[i * 2 + 3]);
|
||||
configs[i] = atoi(argv[i * 2 + 4]);
|
||||
}
|
||||
|
||||
/* perf_event_open */
|
||||
for (i = 0; i < cntr_num; i++) {
|
||||
fds[i] = pe_opener(-1, mode, types[i], configs[i]);
|
||||
}
|
||||
|
||||
|
||||
/* perf_start */
|
||||
for (i = 0; i < cntr_num; i++) {
|
||||
PERF_BEGIN(fds[i]);
|
||||
}
|
||||
|
||||
memory_task();
|
||||
calc_task();
|
||||
|
||||
/* perf_end and read */
|
||||
for (i = 0; i < cntr_num; i++) {
|
||||
PERF_END(fds[i]);
|
||||
PERF_READ(fds[i], counts[i]);
|
||||
close(fds[i]);
|
||||
}
|
||||
|
||||
/* print result */
|
||||
for (i = 0; i < cntr_num; i++) {
|
||||
if (types[i] == PERF_TYPE_HARDWARE) {
|
||||
printf("%s,%ld\n", hw_event_names[configs[i]], counts[i]);
|
||||
}
|
||||
else if (types[i] == PERF_TYPE_HW_CACHE) {
|
||||
printf("%s_%s_%s,%ld\n",
|
||||
id_names[(configs[i] >> 0) & 0xff],
|
||||
op_id_names[(configs[i] >> 8) & 0xff],
|
||||
result_names[(configs[i] >> 16) & 0xff],
|
||||
counts[i]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
99
test/perf_event/perftool.c
Normal file
99
test/perf_event/perftool.c
Normal file
@ -0,0 +1,99 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
//#include "perftool.h"
|
||||
|
||||
#define WORKSIZE (1024 * 1024 * 32)
|
||||
#define LOOPSIZE 1000000
|
||||
#define REP 1000
|
||||
|
||||
long
|
||||
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long
|
||||
pe_opener(long group_fd, int mode, int type, unsigned long config)
|
||||
{
|
||||
struct perf_event_attr pe;
|
||||
int fd;
|
||||
long long perf_hw_cache_id;
|
||||
long long perf_hw_cache_op_id;
|
||||
long long perf_hw_cache_op_result_id;
|
||||
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.type = type;
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
pe.config = config;
|
||||
pe.disabled = 1;
|
||||
if (mode & 0x01) {
|
||||
pe.exclude_user = 1; // EXCLUDE EVENTS THAT HAPPEN IN USER-SPACE
|
||||
} else {
|
||||
pe.exclude_user = 0; // INCLUDE EVENTS THAT HAPPEN IN USER-SPACE
|
||||
}
|
||||
if (mode & 0x02) {
|
||||
pe.exclude_kernel = 1; // EXCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
|
||||
} else {
|
||||
pe.exclude_kernel = 0; // INCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
|
||||
}
|
||||
pe.exclude_hv = 1;
|
||||
if (group_fd > 0) {
|
||||
pe.read_format = PERF_FORMAT_GROUP;
|
||||
}
|
||||
|
||||
fd = perf_event_open(&pe, 0, -1, group_fd, 0);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
long long
|
||||
hw_cache_build(long long id, long long op_id, long long op_result_id)
|
||||
{
|
||||
return (id) | (op_id << 8) | (op_result_id << 16);
|
||||
}
|
||||
|
||||
void
|
||||
memory_task()
|
||||
{
|
||||
char* work = malloc(WORKSIZE);
|
||||
char* fromaddr;
|
||||
char* toaddr;
|
||||
double r;
|
||||
int offset;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < LOOPSIZE; i++) {
|
||||
r = drand48();
|
||||
offset = (int)(r * (double)WORKSIZE);
|
||||
fromaddr = work + offset;
|
||||
r = drand48();
|
||||
offset = (int)(r * (double)WORKSIZE);
|
||||
toaddr = work + offset;
|
||||
*toaddr = *fromaddr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
calc_task()
|
||||
{
|
||||
int i, j;
|
||||
double tmp;
|
||||
|
||||
for (i = 0; i < REP; i++) {
|
||||
for (j = 0; j < REP; j++) {
|
||||
tmp = drand48() * drand48();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
99
test/perf_event/perftool.h
Normal file
99
test/perf_event/perftool.h
Normal file
@ -0,0 +1,99 @@
|
||||
#ifndef __PERFTOOL_H__
|
||||
#define __PERFTOOL_H__
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
extern long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags);
|
||||
|
||||
extern long perf_instructions();
|
||||
extern long perf_count_hw_cache_l1d_read_miss();
|
||||
extern long perf_count_hw_cache_dtlb_read_miss();
|
||||
extern long pe_opener(long group_fd, int mode, int type, unsigned long config);
|
||||
extern long long hw_cache_build(long long id, long long op_id, long long op_result_id);
|
||||
extern void memory_task();
|
||||
extern void calc_task();
|
||||
|
||||
#define PERF_CNTR_NUM 4
|
||||
|
||||
#define PERF_BEGIN(fd) \
|
||||
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_RESET, 0); \
|
||||
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
|
||||
#define PERF_END(fd) \
|
||||
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
|
||||
#define PERF_READ(fd, count) \
|
||||
if (fd > 0) read(fd, &count, sizeof(long long)); else count = -1;
|
||||
|
||||
|
||||
char* hw_event_names[] = {
|
||||
"CPU_CYCLES ",
|
||||
"INSTRUCTIONS ",
|
||||
"CACHE_REFERENCES ",
|
||||
"CACHE_MISSES ",
|
||||
"BRANCH_INSTRUCTIONS ",
|
||||
"BRANCH_MISSES ",
|
||||
"BUS_CYCLES ",
|
||||
"STALLED_CYCLES_FRONTEND",
|
||||
"STALLED_CYCLES_BACKEND ",
|
||||
"REF_CPU_CYCLES ",
|
||||
};
|
||||
|
||||
char* id_names[] = {
|
||||
"L1D ",
|
||||
"L1I ",
|
||||
"LL ",
|
||||
"DTLB",
|
||||
"ITLB",
|
||||
"BPU ",
|
||||
"NODE",
|
||||
};
|
||||
|
||||
char* op_id_names[] = {
|
||||
"OP_READ ",
|
||||
"OP_WRITE ",
|
||||
"OP_PREFETCH",
|
||||
};
|
||||
|
||||
char* result_names[] = {
|
||||
"ACCESS",
|
||||
"MISS ",
|
||||
};
|
||||
|
||||
long long ids[] = {
|
||||
PERF_COUNT_HW_CACHE_L1D,
|
||||
PERF_COUNT_HW_CACHE_L1I,
|
||||
PERF_COUNT_HW_CACHE_LL,
|
||||
PERF_COUNT_HW_CACHE_DTLB,
|
||||
PERF_COUNT_HW_CACHE_ITLB,
|
||||
PERF_COUNT_HW_CACHE_BPU,
|
||||
PERF_COUNT_HW_CACHE_NODE,
|
||||
};
|
||||
|
||||
long long op_ids[] = {
|
||||
PERF_COUNT_HW_CACHE_OP_READ,
|
||||
PERF_COUNT_HW_CACHE_OP_WRITE,
|
||||
PERF_COUNT_HW_CACHE_OP_PREFETCH,
|
||||
};
|
||||
|
||||
long long result_ids[] = {
|
||||
PERF_COUNT_HW_CACHE_RESULT_ACCESS,
|
||||
PERF_COUNT_HW_CACHE_RESULT_MISS,
|
||||
};
|
||||
|
||||
int hw_configs[] = {
|
||||
PERF_COUNT_HW_CPU_CYCLES,
|
||||
PERF_COUNT_HW_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_CACHE_REFERENCES,
|
||||
PERF_COUNT_HW_CACHE_MISSES,
|
||||
PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_BRANCH_MISSES,
|
||||
PERF_COUNT_HW_BUS_CYCLES,
|
||||
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
|
||||
PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
|
||||
PERF_COUNT_HW_REF_CPU_CYCLES,
|
||||
};
|
||||
#endif
|
||||
Reference in New Issue
Block a user