support PERF_TYPE_{HARDWARE|HW_CACHE} in perf_event_open

refs #829
This commit is contained in:
Ken Sato
2017-10-12 16:51:47 +09:00
parent 2ae6883a8b
commit 12840601e1
15 changed files with 685 additions and 51 deletions

View File

@ -10,9 +10,12 @@
#include <ihk/perfctr.h> #include <ihk/perfctr.h>
#include <march.h> #include <march.h>
#include <errno.h> #include <errno.h>
#include <cls.h>
#include <ihk/debug.h> #include <ihk/debug.h>
#include <ihk/cpu.h>
#include <registers.h> #include <registers.h>
#include <mc_perf_event.h> #include <mc_perf_event.h>
#include <config.h>
extern unsigned int *x86_march_perfmap; extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void); extern int running_on_kvm(void);
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
uint64_t ecx; uint64_t ecx;
uint64_t edx; uint64_t edx;
#ifndef ENABLE_PERF
return;
#endif //ENABLE_PERF
/* Do not do it on KVM */ /* Do not do it on KVM */
if (running_on_kvm()) return; if (running_on_kvm()) return;
@ -93,7 +100,7 @@ void x86_init_perfctr(void)
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) { for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0); wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
} }
/* Enable PMC Control */ /* Enable PMC Control */
value = rdmsr(MSR_PERF_GLOBAL_CTRL); value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= X86_IA32_PERF_COUNTERS_MASK; value |= X86_IA32_PERF_COUNTERS_MASK;
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]); return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
} }
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
struct thread *thread = cpu_local_var(current);
// allocate extra_reg
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->extra_reg.idx = EXTRA_REG_RSP_1;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->extra_reg.idx = EXTRA_REG_RSP_0;
}
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
// extra_regs are full
return -1;
}
}
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
}
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
wrmsr(event->extra_reg.reg, event->extra_reg.config);
return 0;
}
#ifdef HAVE_MARCH_PERFCTR_START #ifdef HAVE_MARCH_PERFCTR_START
extern void x86_march_perfctr_start(unsigned long counter_mask); extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif #endif

View File

@ -9,12 +9,15 @@
/* whether memdump feature is enabled */ /* whether memdump feature is enabled */
#undef ENABLE_MEMDUMP #undef ENABLE_MEMDUMP
/* whether mcoverlayfs is enabled */ /* whether qlmpi is enabled */
#undef ENABLE_QLMPI #undef ENABLE_QLMPI
/* whether rusage is enabled */ /* whether rusage is enabled */
#undef ENABLE_RUSAGE #undef ENABLE_RUSAGE
/* whether perf is enabled */
#undef ENABLE_PERF
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H #undef HAVE_INTTYPES_H

21
configure vendored
View File

@ -708,6 +708,7 @@ enable_dcfa
enable_memdump enable_memdump
enable_mcoverlayfs enable_mcoverlayfs
enable_rusage enable_rusage
enable_perf
enable_qlmpi enable_qlmpi
with_uname_r with_uname_r
' '
@ -1333,6 +1334,7 @@ Optional Features:
--enable-memdump enable dumping memory and analyzing a dump --enable-memdump enable dumping memory and analyzing a dump
--enable-mcoverlayfs enable mcoverlayfs implementation --enable-mcoverlayfs enable mcoverlayfs implementation
--enable-rusage enable rusage implementation --enable-rusage enable rusage implementation
--enable-perf enable perf_event implementation
--enable-qlmpi enable qlmpi implementation --enable-qlmpi enable qlmpi implementation
Optional Packages: Optional Packages:
@ -3568,6 +3570,14 @@ else
fi fi
# Check whether --enable-perf was given.
if test "${enable_perf+set}" = set; then :
enableval=$enable_perf; ENABLE_PERF=$enableval
else
ENABLE_PERF=yes
fi
# Check whether --enable-qlmpi was given. # Check whether --enable-qlmpi was given.
if test "${enable_qlmpi+set}" = set; then : if test "${enable_qlmpi+set}" = set; then :
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
@ -4964,6 +4974,17 @@ else
$as_echo "$as_me: rusage is disabled" >&6;} $as_echo "$as_me: rusage is disabled" >&6;}
fi fi
if test "x$ENABLE_PERF" = "xyes" ; then
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
$as_echo "$as_me: perf is enabled" >&6;}
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
$as_echo "$as_me: perf is disabled" >&6;}
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then if test "x$MCKERNEL_INCDIR" != "x" ; then
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF

View File

@ -134,6 +134,12 @@ AC_ARG_ENABLE([rusage],
[ENABLE_RUSAGE=$enableval], [ENABLE_RUSAGE=$enableval],
[ENABLE_RUSAGE=yes]) [ENABLE_RUSAGE=yes])
AC_ARG_ENABLE([perf],
AC_HELP_STRING([--enable-perf],
[enable perf_event implementation]),
[ENABLE_PERF=$enableval],
[ENABLE_PERF=yes])
AC_ARG_ENABLE([qlmpi], AC_ARG_ENABLE([qlmpi],
AC_HELP_STRING([--enable-qlmpi], AC_HELP_STRING([--enable-qlmpi],
[enable qlmpi implementation]), [enable qlmpi implementation]),
@ -454,7 +460,7 @@ else
fi fi
if test "x$ENABLE_QLMPI" = "xyes" ; then if test "x$ENABLE_QLMPI" = "xyes" ; then
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled]) AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
AC_MSG_NOTICE([qlmpi is enabled]) AC_MSG_NOTICE([qlmpi is enabled])
else else
AC_MSG_NOTICE([qlmpi is disabled]) AC_MSG_NOTICE([qlmpi is disabled])
@ -478,6 +484,13 @@ else
AC_MSG_NOTICE([rusage is disabled]) AC_MSG_NOTICE([rusage is disabled])
fi fi
if test "x$ENABLE_PERF" = "xyes" ; then
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
AC_MSG_NOTICE([perf is enabled])
else
AC_MSG_NOTICE([perf is disabled])
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then if test "x$MCKERNEL_INCDIR" != "x" ; then
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers]) AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
fi fi

View File

@ -26,6 +26,7 @@
#include <syscall.h> #include <syscall.h>
#include <bitops.h> #include <bitops.h>
#include <profile.h> #include <profile.h>
#include <config.h>
#define VR_NONE 0x0 #define VR_NONE 0x0
#define VR_STACK 0x1 #define VR_STACK 0x1
@ -691,6 +692,10 @@ struct thread {
int mod_clone; int mod_clone;
struct uti_attr *mod_clone_arg; struct uti_attr *mod_clone_arg;
int parent_cpuid; int parent_cpuid;
// for performance counter
unsigned long pmc_alloc_map;
unsigned long extra_reg_alloc_map;
}; };
#define VM_RANGE_CACHE_SIZE 4 #define VM_RANGE_CACHE_SIZE 4

View File

@ -3436,18 +3436,18 @@ SYSCALL_DECLARE(signalfd4)
} }
int int
perf_counter_alloc(struct mc_perf_event *event) perf_counter_alloc(struct thread *thread)
{ {
int ret = 0; int ret = -1;
struct perf_event_attr *attr = &event->attr; int i = 0;
struct mc_perf_event *leader = event->group_leader;
ret = ihk_mc_perfctr_alloc_counter(&attr->type, &attr->config, leader->pmc_status); // find avail generic counter
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
if(ret >= 0) { if(!(thread->pmc_alloc_map & (1 << i))) {
leader->pmc_status |= 1UL << ret; ret = i;
break;
}
} }
event->counter_id = ret;
return ret; return ret;
} }
@ -3467,7 +3467,13 @@ perf_counter_start(struct mc_perf_event *event)
} }
if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) { if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) {
ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode); if (event->extra_reg.reg) {
if (ihk_mc_perfctr_set_extra(event)) {
ret = -1;
goto out;
}
}
ret = ihk_mc_perfctr_init_raw(event->counter_id, event->hw_config, mode);
ihk_mc_perfctr_start(1UL << event->counter_id); ihk_mc_perfctr_start(1UL << event->counter_id);
} }
else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS && else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS &&
@ -3478,7 +3484,8 @@ perf_counter_start(struct mc_perf_event *event)
else { else {
ret = -1; ret = -1;
} }
out:
return ret; return ret;
} }
@ -3569,18 +3576,18 @@ perf_event_read_group(struct mc_perf_event *event, unsigned long read_format, ch
static int static int
perf_event_read_one(struct mc_perf_event *event, unsigned long read_format, char *buf) perf_event_read_one(struct mc_perf_event *event, unsigned long read_format, char *buf)
{ {
unsigned long values[4]; unsigned long values[4];
int n = 0; int n = 0;
int size = 0; int size = 0;
values[n++] = perf_event_read_value(event); values[n++] = perf_event_read_value(event);
size = n * sizeof(unsigned long); size = n * sizeof(unsigned long);
if (copy_to_user(buf, values, size)) if (copy_to_user(buf, values, size))
return -EFAULT; return -EFAULT;
return size; return size;
} }
static long static long
@ -3597,7 +3604,6 @@ perf_read(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
ret = perf_event_read_one(event, read_format, buf); ret = perf_event_read_one(event, read_format, buf);
} }
return ret; return ret;
} }
void void
@ -3723,12 +3729,12 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
process_unlock(proc, &lock); process_unlock(proc, &lock);
} }
*/ */
break; break;
case PERF_EVENT_IOC_RESET: case PERF_EVENT_IOC_RESET:
// TODO: reset other process // TODO: reset other process
ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1); ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1);
event->count = 0L; event->count = 0L;
break; break;
case PERF_EVENT_IOC_REFRESH: case PERF_EVENT_IOC_REFRESH:
// TODO: refresh other process // TODO: refresh other process
@ -3753,7 +3759,13 @@ static int
perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx) perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
{ {
struct mc_perf_event *event = (struct mc_perf_event*)sfd->data; struct mc_perf_event *event = (struct mc_perf_event*)sfd->data;
struct thread *thread = cpu_local_var(current);
thread->pmc_alloc_map &= ~(1UL << event->counter_id);
if (event->extra_reg.reg) {
thread->extra_reg_alloc_map &= ~(1UL << event->extra_reg.idx);
}
kfree(event); kfree(event);
return 0; return 0;
@ -3805,6 +3817,66 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
return rc; return rc;
} }
struct mc_perf_event*
mc_perf_event_alloc(struct perf_event_attr *attr)
{
unsigned long val = 0, extra_config = 0;
struct mc_perf_event *event;
int ereg_id;
if (!attr) {
return NULL;
}
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT);
if (!event) {
return NULL;
}
memset(event, 0, sizeof(struct mc_perf_event));
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->sibling_list);
event->attr = *attr;
event->sample_freq = attr->sample_freq;
event->nr_siblings = 0;
event->count = 0L;
event->child_count_total = 0;
event->parent = NULL;
switch(attr->type) {
case PERF_TYPE_HARDWARE :
val = ihk_mc_hw_event_map(attr->config);
break;
case PERF_TYPE_HW_CACHE :
val = ihk_mc_hw_cache_event_map(attr->config);
extra_config = ihk_mc_hw_cache_extra_reg_map(attr->config);
break;
case PERF_TYPE_RAW :
val = attr->config;
break;
default:
// Unexpected type
return NULL;
}
if (val == 0) {
return NULL;
}
event->hw_config = val;
event->hw_config_ext = extra_config;
ereg_id = ihk_mc_get_extra_reg_id(event->hw_config, event->hw_config_ext);
if (ereg_id >= 0) {
event->extra_reg.config = event->hw_config_ext;
event->extra_reg.reg = ihk_mc_get_extra_reg_msr(ereg_id);
event->extra_reg.idx = ihk_mc_get_extra_reg_idx(ereg_id);
}
return event;
}
SYSCALL_DECLARE(perf_event_open) SYSCALL_DECLARE(perf_event_open)
{ {
struct syscall_request request IHK_DMA_ALIGN; struct syscall_request request IHK_DMA_ALIGN;
@ -3812,6 +3884,7 @@ SYSCALL_DECLARE(perf_event_open)
struct process *proc = thread->proc; struct process *proc = thread->proc;
struct mckfd *sfd, *cfd; struct mckfd *sfd, *cfd;
int fd; int fd;
int counter_idx;
long irqstate; long irqstate;
struct perf_event_attr *attr = (void *)ihk_mc_syscall_arg0(ctx); struct perf_event_attr *attr = (void *)ihk_mc_syscall_arg0(ctx);
int pid = ihk_mc_syscall_arg1(ctx); int pid = ihk_mc_syscall_arg1(ctx);
@ -3822,47 +3895,54 @@ SYSCALL_DECLARE(perf_event_open)
int not_supported_flag = 0; int not_supported_flag = 0;
#ifndef ENABLE_PERF
return -ENOSYS;
#endif // ENABLE_PERF
// check Not supported // check Not supported
if(cpu > 0) { if (cpu > 0) {
not_supported_flag = 1; not_supported_flag = 1;
} }
if(flags > 0) { if (flags > 0) {
not_supported_flag = 1; not_supported_flag = 1;
} }
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
if ((attr->type != PERF_TYPE_RAW) &&
(attr->type != PERF_TYPE_HARDWARE) &&
(attr->type != PERF_TYPE_HW_CACHE)) {
not_supported_flag = 1; not_supported_flag = 1;
} }
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { if (attr->read_format &
not_supported_flag = 1; (PERF_FORMAT_TOTAL_TIME_ENABLED |
} PERF_FORMAT_TOTAL_TIME_RUNNING |
if(attr->read_format & PERF_FORMAT_ID) { PERF_FORMAT_ID)) {
not_supported_flag = 1; not_supported_flag = 1;
} }
if(not_supported_flag) { if (not_supported_flag) {
return -1; return -1;
} }
// process of perf_event_open event = mc_perf_event_alloc((struct perf_event_attr*)attr);
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT); if (!event) {
if(!event) return -1;
return -ENOMEM; }
event->attr = (struct perf_event_attr)*attr;
event->sample_freq = attr->sample_freq;
event->nr_siblings = 0;
event->count = 0L;
event->child_count_total = 0;
event->parent = NULL;
event->pid = pid; event->pid = pid;
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->sibling_list); counter_idx = perf_counter_alloc(thread);
if(group_fd == -1) { if (counter_idx < 0) {
return -1;
}
event->counter_id = counter_idx;
if (group_fd == -1) {
event->group_leader = event; event->group_leader = event;
event->pmc_status = 0x0UL; event->pmc_status = 0x0UL;
} else { }
for(cfd = proc->mckfd; cfd; cfd = cfd->next) { else {
if(cfd->fd == group_fd) { for (cfd = proc->mckfd; cfd; cfd = cfd->next) {
if (cfd->fd == group_fd) {
event->group_leader = (struct mc_perf_event*)cfd->data; event->group_leader = (struct mc_perf_event*)cfd->data;
list_add_tail(&event->group_entry, &event->group_leader->sibling_list); list_add_tail(&event->group_entry, &event->group_leader->sibling_list);
event->group_leader->nr_siblings++; event->group_leader->nr_siblings++;
@ -3871,10 +3951,7 @@ SYSCALL_DECLARE(perf_event_open)
} }
} }
if(perf_counter_alloc(event) < 0) event->group_leader->pmc_status |= (1UL << counter_idx);
return -1;
if(event->counter_id < 0)
return -1;
request.number = __NR_perf_event_open; request.number = __NR_perf_event_open;
request.args[0] = 0; request.args[0] = 0;
@ -3883,6 +3960,8 @@ SYSCALL_DECLARE(perf_event_open)
return fd; return fd;
} }
thread->pmc_alloc_map |= 1UL << counter_idx;
sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT); sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
if(!sfd) if(!sfd)
return -ENOMEM; return -ENOMEM;

View File

@ -77,6 +77,15 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
void *stack_pointer, void *stack_pointer,
void (*next_function)(void)); void (*next_function)(void));
int ihk_mc_get_extra_reg_id(unsigned long hw_config, unsigned long hw_config_ext);
unsigned int ihk_mc_get_nr_extra_regs();
int ihk_mc_get_extra_reg_idx(int id);
unsigned int ihk_mc_get_extra_reg_msr(int id);
unsigned long ihk_mc_get_extra_reg_event(int id);
unsigned long ihk_mc_hw_event_map(unsigned long hw_event);
unsigned long ihk_mc_hw_cache_event_map(unsigned long hw_cache_event);
unsigned long ihk_mc_hw_cache_extra_reg_map(unsigned long hw_cache_event);
/* returns the 'prev' argument of the call that caused the switch to the context returned. */ /* returns the 'prev' argument of the call that caused the switch to the context returned. */
void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx, void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx,
ihk_mc_kernel_context_t *new_ctx, ihk_mc_kernel_context_t *new_ctx,

View File

@ -18,6 +18,8 @@
#include <types.h> #include <types.h>
#endif /*POSTK_DEBUG_TEMP_FIX_29*/ #endif /*POSTK_DEBUG_TEMP_FIX_29*/
#include <mc_perf_event.h>
#define PERFCTR_USER_MODE 0x01 #define PERFCTR_USER_MODE 0x01
#define PERFCTR_KERNEL_MODE 0x02 #define PERFCTR_KERNEL_MODE 0x02
@ -60,6 +62,7 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode);
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode); int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode);
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode); int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode);
#endif/*POSTK_DEBUG_TEMP_FIX_29*/ #endif/*POSTK_DEBUG_TEMP_FIX_29*/
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event);
#ifdef POSTK_DEBUG_TEMP_FIX_30 #ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_start(int counter); int ihk_mc_perfctr_start(int counter);
int ihk_mc_perfctr_stop(int counter); int ihk_mc_perfctr_stop(int counter);

View File

@ -2,9 +2,9 @@
#ifndef MC_PERF_EVENT_H #ifndef MC_PERF_EVENT_H
#define MC_PERF_EVENT_H #define MC_PERF_EVENT_H
#ifdef POSTK_DEBUG_TEMP_FIX_32 //#ifdef POSTK_DEBUG_TEMP_FIX_32
#include <list.h> #include <list.h>
#endif /*POSTK_DEBUG_TEMP_FIX_32*/ //#endif /*POSTK_DEBUG_TEMP_FIX_32*/
#include <march.h> #include <march.h>
struct perf_event_attr; struct perf_event_attr;
@ -105,6 +105,40 @@ enum perf_hw_id {
PERF_COUNT_HW_MAX, /* non-ABI */ PERF_COUNT_HW_MAX, /* non-ABI */
}; };
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
/* /*
* Hardware event_id to monitor via a performance monitoring event: * Hardware event_id to monitor via a performance monitoring event:
*/ */
@ -218,8 +252,18 @@ struct perf_event_attr {
#endif #endif
}; };
struct hw_perf_event_extra {
unsigned long config;
unsigned int reg;
int idx;
};
struct mc_perf_event { struct mc_perf_event {
struct perf_event_attr attr; struct perf_event_attr attr;
struct hw_perf_event_extra extra_reg;
unsigned long hw_config;
unsigned long hw_config_ext;
int cpu_id; int cpu_id;
int counter_id; // counter_id int counter_id; // counter_id
unsigned long count; // counter_value unsigned long count; // counter_value

3
test/perf_event/Makefile Normal file
View File

@ -0,0 +1,3 @@
all: perf_test
perf_test: perf_test.o perftool.o
perftool.o: perftool.c perftool.h

66
test/perf_event/go_perf_test.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC="mcexec"
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

66
test/perf_event/no_mc_go.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC=""
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

View File

@ -0,0 +1,82 @@
#include <stdio.h>
#include <stdlib.h>
#include <langinfo.h>
#include <locale.h>
#include <sys/mman.h>
#include "perftool.h"
void
usage()
{
printf("Usage: perf_test mode cntr_num [<type> <config>]...\n");
exit(1);
}
int
main(int argc, char** argv)
{
long fds[PERF_CNTR_NUM];
long long counts[PERF_CNTR_NUM];
int types[PERF_CNTR_NUM];
int configs[PERF_CNTR_NUM];
int cntr_num;
int mode;
int i;
if (argc < 3) {
printf("ERROR: Too few arguments.\n");
usage();
}
mode = atoi(argv[1]);
cntr_num = atoi(argv[2]);
/* perse args */
if (argc < cntr_num * 2 + 2) {
printf("ERROR: Too few arguments.\n");
usage();
}
for (i = 0; i < cntr_num; i++) {
types[i] = atoi(argv[i * 2 + 3]);
configs[i] = atoi(argv[i * 2 + 4]);
}
/* perf_event_open */
for (i = 0; i < cntr_num; i++) {
fds[i] = pe_opener(-1, mode, types[i], configs[i]);
}
/* perf_start */
for (i = 0; i < cntr_num; i++) {
PERF_BEGIN(fds[i]);
}
memory_task();
calc_task();
/* perf_end and read */
for (i = 0; i < cntr_num; i++) {
PERF_END(fds[i]);
PERF_READ(fds[i], counts[i]);
close(fds[i]);
}
/* print result */
for (i = 0; i < cntr_num; i++) {
if (types[i] == PERF_TYPE_HARDWARE) {
printf("%s,%ld\n", hw_event_names[configs[i]], counts[i]);
}
else if (types[i] == PERF_TYPE_HW_CACHE) {
printf("%s_%s_%s,%ld\n",
id_names[(configs[i] >> 0) & 0xff],
op_id_names[(configs[i] >> 8) & 0xff],
result_names[(configs[i] >> 16) & 0xff],
counts[i]);
}
}
return 0;
}

View File

@ -0,0 +1,99 @@
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
//#include "perftool.h"
#define WORKSIZE (1024 * 1024 * 32)
#define LOOPSIZE 1000000
#define REP 1000
long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
long
pe_opener(long group_fd, int mode, int type, unsigned long config)
{
struct perf_event_attr pe;
int fd;
long long perf_hw_cache_id;
long long perf_hw_cache_op_id;
long long perf_hw_cache_op_result_id;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = type;
pe.size = sizeof(struct perf_event_attr);
pe.config = config;
pe.disabled = 1;
if (mode & 0x01) {
pe.exclude_user = 1; // EXCLUDE EVENTS THAT HAPPEN IN USER-SPACE
} else {
pe.exclude_user = 0; // INCLUDE EVENTS THAT HAPPEN IN USER-SPACE
}
if (mode & 0x02) {
pe.exclude_kernel = 1; // EXCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
} else {
pe.exclude_kernel = 0; // INCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
}
pe.exclude_hv = 1;
if (group_fd > 0) {
pe.read_format = PERF_FORMAT_GROUP;
}
fd = perf_event_open(&pe, 0, -1, group_fd, 0);
return fd;
}
long long
hw_cache_build(long long id, long long op_id, long long op_result_id)
{
return (id) | (op_id << 8) | (op_result_id << 16);
}
void
memory_task()
{
char* work = malloc(WORKSIZE);
char* fromaddr;
char* toaddr;
double r;
int offset;
int i;
for (i = 0; i < LOOPSIZE; i++) {
r = drand48();
offset = (int)(r * (double)WORKSIZE);
fromaddr = work + offset;
r = drand48();
offset = (int)(r * (double)WORKSIZE);
toaddr = work + offset;
*toaddr = *fromaddr;
}
}
void
calc_task()
{
int i, j;
double tmp;
for (i = 0; i < REP; i++) {
for (j = 0; j < REP; j++) {
tmp = drand48() * drand48();
}
}
}

View File

@ -0,0 +1,99 @@
#ifndef __PERFTOOL_H__
#define __PERFTOOL_H__
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
extern long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags);
extern long perf_instructions();
extern long perf_count_hw_cache_l1d_read_miss();
extern long perf_count_hw_cache_dtlb_read_miss();
extern long pe_opener(long group_fd, int mode, int type, unsigned long config);
extern long long hw_cache_build(long long id, long long op_id, long long op_result_id);
extern void memory_task();
extern void calc_task();
#define PERF_CNTR_NUM 4
#define PERF_BEGIN(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_RESET, 0); \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
#define PERF_END(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
#define PERF_READ(fd, count) \
if (fd > 0) read(fd, &count, sizeof(long long)); else count = -1;
char* hw_event_names[] = {
"CPU_CYCLES ",
"INSTRUCTIONS ",
"CACHE_REFERENCES ",
"CACHE_MISSES ",
"BRANCH_INSTRUCTIONS ",
"BRANCH_MISSES ",
"BUS_CYCLES ",
"STALLED_CYCLES_FRONTEND",
"STALLED_CYCLES_BACKEND ",
"REF_CPU_CYCLES ",
};
char* id_names[] = {
"L1D ",
"L1I ",
"LL ",
"DTLB",
"ITLB",
"BPU ",
"NODE",
};
char* op_id_names[] = {
"OP_READ ",
"OP_WRITE ",
"OP_PREFETCH",
};
char* result_names[] = {
"ACCESS",
"MISS ",
};
long long ids[] = {
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_LL,
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_BPU,
PERF_COUNT_HW_CACHE_NODE,
};
long long op_ids[] = {
PERF_COUNT_HW_CACHE_OP_READ,
PERF_COUNT_HW_CACHE_OP_WRITE,
PERF_COUNT_HW_CACHE_OP_PREFETCH,
};
long long result_ids[] = {
PERF_COUNT_HW_CACHE_RESULT_ACCESS,
PERF_COUNT_HW_CACHE_RESULT_MISS,
};
int hw_configs[] = {
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_CACHE_REFERENCES,
PERF_COUNT_HW_CACHE_MISSES,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_BUS_CYCLES,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
PERF_COUNT_HW_REF_CPU_CYCLES,
};
#endif