support PERF_TYPE_{HARDWARE|HW_CACHE} in perf_event_open

refs #829
This commit is contained in:
Ken Sato
2017-10-12 16:51:47 +09:00
parent 2ae6883a8b
commit 12840601e1
15 changed files with 685 additions and 51 deletions

View File

@ -10,9 +10,12 @@
#include <ihk/perfctr.h>
#include <march.h>
#include <errno.h>
#include <cls.h>
#include <ihk/debug.h>
#include <ihk/cpu.h>
#include <registers.h>
#include <mc_perf_event.h>
#include <config.h>
extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void);
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
uint64_t ecx;
uint64_t edx;
#ifndef ENABLE_PERF
return;
#endif //ENABLE_PERF
/* Do not do it on KVM */
if (running_on_kvm()) return;
@ -93,7 +100,7 @@ void x86_init_perfctr(void)
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
}
/* Enable PMC Control */
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= X86_IA32_PERF_COUNTERS_MASK;
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
}
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
struct thread *thread = cpu_local_var(current);
// allocate extra_reg
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->extra_reg.idx = EXTRA_REG_RSP_1;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->extra_reg.idx = EXTRA_REG_RSP_0;
}
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
// extra_regs are full
return -1;
}
}
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
}
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
wrmsr(event->extra_reg.reg, event->extra_reg.config);
return 0;
}
#ifdef HAVE_MARCH_PERFCTR_START
extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif

View File

@ -9,12 +9,15 @@
/* whether memdump feature is enabled */
#undef ENABLE_MEMDUMP
/* whether mcoverlayfs is enabled */
/* whether qlmpi is enabled */
#undef ENABLE_QLMPI
/* whether rusage is enabled */
#undef ENABLE_RUSAGE
/* whether perf is enabled */
#undef ENABLE_PERF
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H

21
configure vendored
View File

@ -708,6 +708,7 @@ enable_dcfa
enable_memdump
enable_mcoverlayfs
enable_rusage
enable_perf
enable_qlmpi
with_uname_r
'
@ -1333,6 +1334,7 @@ Optional Features:
--enable-memdump enable dumping memory and analyzing a dump
--enable-mcoverlayfs enable mcoverlayfs implementation
--enable-rusage enable rusage implementation
--enable-perf enable perf_event implementation
--enable-qlmpi enable qlmpi implementation
Optional Packages:
@ -3568,6 +3570,14 @@ else
fi
# Check whether --enable-perf was given.
if test "${enable_perf+set}" = set; then :
enableval=$enable_perf; ENABLE_PERF=$enableval
else
ENABLE_PERF=yes
fi
# Check whether --enable-qlmpi was given.
if test "${enable_qlmpi+set}" = set; then :
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
@ -4964,6 +4974,17 @@ else
$as_echo "$as_me: rusage is disabled" >&6;}
fi
if test "x$ENABLE_PERF" = "xyes" ; then
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
$as_echo "$as_me: perf is enabled" >&6;}
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
$as_echo "$as_me: perf is disabled" >&6;}
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
cat >>confdefs.h <<_ACEOF

View File

@ -134,6 +134,12 @@ AC_ARG_ENABLE([rusage],
[ENABLE_RUSAGE=$enableval],
[ENABLE_RUSAGE=yes])
AC_ARG_ENABLE([perf],
AC_HELP_STRING([--enable-perf],
[enable perf_event implementation]),
[ENABLE_PERF=$enableval],
[ENABLE_PERF=yes])
AC_ARG_ENABLE([qlmpi],
AC_HELP_STRING([--enable-qlmpi],
[enable qlmpi implementation]),
@ -454,7 +460,7 @@ else
fi
if test "x$ENABLE_QLMPI" = "xyes" ; then
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
AC_MSG_NOTICE([qlmpi is enabled])
else
AC_MSG_NOTICE([qlmpi is disabled])
@ -478,6 +484,13 @@ else
AC_MSG_NOTICE([rusage is disabled])
fi
if test "x$ENABLE_PERF" = "xyes" ; then
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
AC_MSG_NOTICE([perf is enabled])
else
AC_MSG_NOTICE([perf is disabled])
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
fi

View File

@ -26,6 +26,7 @@
#include <syscall.h>
#include <bitops.h>
#include <profile.h>
#include <config.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
@ -691,6 +692,10 @@ struct thread {
int mod_clone;
struct uti_attr *mod_clone_arg;
int parent_cpuid;
// for performance counter
unsigned long pmc_alloc_map;
unsigned long extra_reg_alloc_map;
};
#define VM_RANGE_CACHE_SIZE 4

View File

@ -3436,18 +3436,18 @@ SYSCALL_DECLARE(signalfd4)
}
int
perf_counter_alloc(struct mc_perf_event *event)
perf_counter_alloc(struct thread *thread)
{
int ret = 0;
struct perf_event_attr *attr = &event->attr;
struct mc_perf_event *leader = event->group_leader;
int ret = -1;
int i = 0;
ret = ihk_mc_perfctr_alloc_counter(&attr->type, &attr->config, leader->pmc_status);
if(ret >= 0) {
leader->pmc_status |= 1UL << ret;
// find avail generic counter
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
if(!(thread->pmc_alloc_map & (1 << i))) {
ret = i;
break;
}
}
event->counter_id = ret;
return ret;
}
@ -3467,7 +3467,13 @@ perf_counter_start(struct mc_perf_event *event)
}
if(event->counter_id >= 0 && event->counter_id < X86_IA32_NUM_PERF_COUNTERS) {
ret = ihk_mc_perfctr_init_raw(event->counter_id, attr->config, mode);
if (event->extra_reg.reg) {
if (ihk_mc_perfctr_set_extra(event)) {
ret = -1;
goto out;
}
}
ret = ihk_mc_perfctr_init_raw(event->counter_id, event->hw_config, mode);
ihk_mc_perfctr_start(1UL << event->counter_id);
}
else if(event->counter_id >= X86_IA32_BASE_FIXED_PERF_COUNTERS &&
@ -3478,7 +3484,8 @@ perf_counter_start(struct mc_perf_event *event)
else {
ret = -1;
}
out:
return ret;
}
@ -3569,18 +3576,18 @@ perf_event_read_group(struct mc_perf_event *event, unsigned long read_format, ch
static int
perf_event_read_one(struct mc_perf_event *event, unsigned long read_format, char *buf)
{
unsigned long values[4];
int n = 0;
unsigned long values[4];
int n = 0;
int size = 0;
values[n++] = perf_event_read_value(event);
values[n++] = perf_event_read_value(event);
size = n * sizeof(unsigned long);
if (copy_to_user(buf, values, size))
return -EFAULT;
return size;
return size;
}
static long
@ -3597,7 +3604,6 @@ perf_read(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
ret = perf_event_read_one(event, read_format, buf);
}
return ret;
}
void
@ -3723,12 +3729,12 @@ perf_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
process_unlock(proc, &lock);
}
*/
break;
break;
case PERF_EVENT_IOC_RESET:
// TODO: reset other process
ihk_mc_perfctr_set(counter_id, event->attr.sample_freq * -1);
event->count = 0L;
break;
break;
case PERF_EVENT_IOC_REFRESH:
// TODO: refresh other process
@ -3753,7 +3759,13 @@ static int
perf_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
{
struct mc_perf_event *event = (struct mc_perf_event*)sfd->data;
struct thread *thread = cpu_local_var(current);
thread->pmc_alloc_map &= ~(1UL << event->counter_id);
if (event->extra_reg.reg) {
thread->extra_reg_alloc_map &= ~(1UL << event->extra_reg.idx);
}
kfree(event);
return 0;
@ -3805,6 +3817,66 @@ perf_mmap(struct mckfd *sfd, ihk_mc_user_context_t *ctx)
return rc;
}
struct mc_perf_event*
mc_perf_event_alloc(struct perf_event_attr *attr)
{
unsigned long val = 0, extra_config = 0;
struct mc_perf_event *event;
int ereg_id;
if (!attr) {
return NULL;
}
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT);
if (!event) {
return NULL;
}
memset(event, 0, sizeof(struct mc_perf_event));
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->sibling_list);
event->attr = *attr;
event->sample_freq = attr->sample_freq;
event->nr_siblings = 0;
event->count = 0L;
event->child_count_total = 0;
event->parent = NULL;
switch(attr->type) {
case PERF_TYPE_HARDWARE :
val = ihk_mc_hw_event_map(attr->config);
break;
case PERF_TYPE_HW_CACHE :
val = ihk_mc_hw_cache_event_map(attr->config);
extra_config = ihk_mc_hw_cache_extra_reg_map(attr->config);
break;
case PERF_TYPE_RAW :
val = attr->config;
break;
default:
// Unexpected type
return NULL;
}
if (val == 0) {
return NULL;
}
event->hw_config = val;
event->hw_config_ext = extra_config;
ereg_id = ihk_mc_get_extra_reg_id(event->hw_config, event->hw_config_ext);
if (ereg_id >= 0) {
event->extra_reg.config = event->hw_config_ext;
event->extra_reg.reg = ihk_mc_get_extra_reg_msr(ereg_id);
event->extra_reg.idx = ihk_mc_get_extra_reg_idx(ereg_id);
}
return event;
}
SYSCALL_DECLARE(perf_event_open)
{
struct syscall_request request IHK_DMA_ALIGN;
@ -3812,6 +3884,7 @@ SYSCALL_DECLARE(perf_event_open)
struct process *proc = thread->proc;
struct mckfd *sfd, *cfd;
int fd;
int counter_idx;
long irqstate;
struct perf_event_attr *attr = (void *)ihk_mc_syscall_arg0(ctx);
int pid = ihk_mc_syscall_arg1(ctx);
@ -3822,47 +3895,54 @@ SYSCALL_DECLARE(perf_event_open)
int not_supported_flag = 0;
#ifndef ENABLE_PERF
return -ENOSYS;
#endif // ENABLE_PERF
// check Not supported
if(cpu > 0) {
if (cpu > 0) {
not_supported_flag = 1;
}
if(flags > 0) {
if (flags > 0) {
not_supported_flag = 1;
}
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
if ((attr->type != PERF_TYPE_RAW) &&
(attr->type != PERF_TYPE_HARDWARE) &&
(attr->type != PERF_TYPE_HW_CACHE)) {
not_supported_flag = 1;
}
if(attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
not_supported_flag = 1;
}
if(attr->read_format & PERF_FORMAT_ID) {
if (attr->read_format &
(PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID)) {
not_supported_flag = 1;
}
if(not_supported_flag) {
if (not_supported_flag) {
return -1;
}
// process of perf_event_open
event = kmalloc(sizeof(struct mc_perf_event), IHK_MC_AP_NOWAIT);
if(!event)
return -ENOMEM;
event->attr = (struct perf_event_attr)*attr;
event = mc_perf_event_alloc((struct perf_event_attr*)attr);
if (!event) {
return -1;
}
event->sample_freq = attr->sample_freq;
event->nr_siblings = 0;
event->count = 0L;
event->child_count_total = 0;
event->parent = NULL;
event->pid = pid;
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->sibling_list);
if(group_fd == -1) {
counter_idx = perf_counter_alloc(thread);
if (counter_idx < 0) {
return -1;
}
event->counter_id = counter_idx;
if (group_fd == -1) {
event->group_leader = event;
event->pmc_status = 0x0UL;
} else {
for(cfd = proc->mckfd; cfd; cfd = cfd->next) {
if(cfd->fd == group_fd) {
}
else {
for (cfd = proc->mckfd; cfd; cfd = cfd->next) {
if (cfd->fd == group_fd) {
event->group_leader = (struct mc_perf_event*)cfd->data;
list_add_tail(&event->group_entry, &event->group_leader->sibling_list);
event->group_leader->nr_siblings++;
@ -3871,10 +3951,7 @@ SYSCALL_DECLARE(perf_event_open)
}
}
if(perf_counter_alloc(event) < 0)
return -1;
if(event->counter_id < 0)
return -1;
event->group_leader->pmc_status |= (1UL << counter_idx);
request.number = __NR_perf_event_open;
request.args[0] = 0;
@ -3883,6 +3960,8 @@ SYSCALL_DECLARE(perf_event_open)
return fd;
}
thread->pmc_alloc_map |= 1UL << counter_idx;
sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT);
if(!sfd)
return -ENOMEM;

View File

@ -77,6 +77,15 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
void *stack_pointer,
void (*next_function)(void));
int ihk_mc_get_extra_reg_id(unsigned long hw_config, unsigned long hw_config_ext);
unsigned int ihk_mc_get_nr_extra_regs();
int ihk_mc_get_extra_reg_idx(int id);
unsigned int ihk_mc_get_extra_reg_msr(int id);
unsigned long ihk_mc_get_extra_reg_event(int id);
unsigned long ihk_mc_hw_event_map(unsigned long hw_event);
unsigned long ihk_mc_hw_cache_event_map(unsigned long hw_cache_event);
unsigned long ihk_mc_hw_cache_extra_reg_map(unsigned long hw_cache_event);
/* returns the 'prev' argument of the call that caused the switch to the context returned. */
void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx,
ihk_mc_kernel_context_t *new_ctx,

View File

@ -18,6 +18,8 @@
#include <types.h>
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
#include <mc_perf_event.h>
#define PERFCTR_USER_MODE 0x01
#define PERFCTR_KERNEL_MODE 0x02
@ -60,6 +62,7 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode);
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode);
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode);
#endif/*POSTK_DEBUG_TEMP_FIX_29*/
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event);
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_start(int counter);
int ihk_mc_perfctr_stop(int counter);

View File

@ -2,9 +2,9 @@
#ifndef MC_PERF_EVENT_H
#define MC_PERF_EVENT_H
#ifdef POSTK_DEBUG_TEMP_FIX_32
//#ifdef POSTK_DEBUG_TEMP_FIX_32
#include <list.h>
#endif /*POSTK_DEBUG_TEMP_FIX_32*/
//#endif /*POSTK_DEBUG_TEMP_FIX_32*/
#include <march.h>
struct perf_event_attr;
@ -105,6 +105,40 @@ enum perf_hw_id {
PERF_COUNT_HW_MAX, /* non-ABI */
};
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
/*
* Hardware event_id to monitor via a performance monitoring event:
*/
@ -218,8 +252,18 @@ struct perf_event_attr {
#endif
};
struct hw_perf_event_extra {
unsigned long config;
unsigned int reg;
int idx;
};
struct mc_perf_event {
struct perf_event_attr attr;
struct hw_perf_event_extra extra_reg;
unsigned long hw_config;
unsigned long hw_config_ext;
int cpu_id;
int counter_id; // counter_id
unsigned long count; // counter_value

3
test/perf_event/Makefile Normal file
View File

@ -0,0 +1,3 @@
all: perf_test
perf_test: perf_test.o perftool.o
perftool.o: perftool.c perftool.h

66
test/perf_event/go_perf_test.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC="mcexec"
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

66
test/perf_event/no_mc_go.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC=""
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

View File

@ -0,0 +1,82 @@
#include <stdio.h>
#include <stdlib.h>
#include <langinfo.h>
#include <locale.h>
#include <sys/mman.h>
#include "perftool.h"
void
usage()
{
printf("Usage: perf_test mode cntr_num [<type> <config>]...\n");
exit(1);
}
int
main(int argc, char** argv)
{
long fds[PERF_CNTR_NUM];
long long counts[PERF_CNTR_NUM];
int types[PERF_CNTR_NUM];
int configs[PERF_CNTR_NUM];
int cntr_num;
int mode;
int i;
if (argc < 3) {
printf("ERROR: Too few arguments.\n");
usage();
}
mode = atoi(argv[1]);
cntr_num = atoi(argv[2]);
/* perse args */
if (argc < cntr_num * 2 + 2) {
printf("ERROR: Too few arguments.\n");
usage();
}
for (i = 0; i < cntr_num; i++) {
types[i] = atoi(argv[i * 2 + 3]);
configs[i] = atoi(argv[i * 2 + 4]);
}
/* perf_event_open */
for (i = 0; i < cntr_num; i++) {
fds[i] = pe_opener(-1, mode, types[i], configs[i]);
}
/* perf_start */
for (i = 0; i < cntr_num; i++) {
PERF_BEGIN(fds[i]);
}
memory_task();
calc_task();
/* perf_end and read */
for (i = 0; i < cntr_num; i++) {
PERF_END(fds[i]);
PERF_READ(fds[i], counts[i]);
close(fds[i]);
}
/* print result */
for (i = 0; i < cntr_num; i++) {
if (types[i] == PERF_TYPE_HARDWARE) {
printf("%s,%ld\n", hw_event_names[configs[i]], counts[i]);
}
else if (types[i] == PERF_TYPE_HW_CACHE) {
printf("%s_%s_%s,%ld\n",
id_names[(configs[i] >> 0) & 0xff],
op_id_names[(configs[i] >> 8) & 0xff],
result_names[(configs[i] >> 16) & 0xff],
counts[i]);
}
}
return 0;
}

View File

@ -0,0 +1,99 @@
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
//#include "perftool.h"
#define WORKSIZE (1024 * 1024 * 32)
#define LOOPSIZE 1000000
#define REP 1000
long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
long
pe_opener(long group_fd, int mode, int type, unsigned long config)
{
struct perf_event_attr pe;
int fd;
long long perf_hw_cache_id;
long long perf_hw_cache_op_id;
long long perf_hw_cache_op_result_id;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = type;
pe.size = sizeof(struct perf_event_attr);
pe.config = config;
pe.disabled = 1;
if (mode & 0x01) {
pe.exclude_user = 1; // EXCLUDE EVENTS THAT HAPPEN IN USER-SPACE
} else {
pe.exclude_user = 0; // INCLUDE EVENTS THAT HAPPEN IN USER-SPACE
}
if (mode & 0x02) {
pe.exclude_kernel = 1; // EXCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
} else {
pe.exclude_kernel = 0; // INCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
}
pe.exclude_hv = 1;
if (group_fd > 0) {
pe.read_format = PERF_FORMAT_GROUP;
}
fd = perf_event_open(&pe, 0, -1, group_fd, 0);
return fd;
}
long long
hw_cache_build(long long id, long long op_id, long long op_result_id)
{
return (id) | (op_id << 8) | (op_result_id << 16);
}
void
memory_task()
{
char* work = malloc(WORKSIZE);
char* fromaddr;
char* toaddr;
double r;
int offset;
int i;
for (i = 0; i < LOOPSIZE; i++) {
r = drand48();
offset = (int)(r * (double)WORKSIZE);
fromaddr = work + offset;
r = drand48();
offset = (int)(r * (double)WORKSIZE);
toaddr = work + offset;
*toaddr = *fromaddr;
}
}
void
calc_task()
{
int i, j;
double tmp;
for (i = 0; i < REP; i++) {
for (j = 0; j < REP; j++) {
tmp = drand48() * drand48();
}
}
}

View File

@ -0,0 +1,99 @@
#ifndef __PERFTOOL_H__
#define __PERFTOOL_H__
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
extern long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags);
extern long perf_instructions();
extern long perf_count_hw_cache_l1d_read_miss();
extern long perf_count_hw_cache_dtlb_read_miss();
extern long pe_opener(long group_fd, int mode, int type, unsigned long config);
extern long long hw_cache_build(long long id, long long op_id, long long op_result_id);
extern void memory_task();
extern void calc_task();
#define PERF_CNTR_NUM 4
#define PERF_BEGIN(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_RESET, 0); \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
#define PERF_END(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
#define PERF_READ(fd, count) \
if (fd > 0) read(fd, &count, sizeof(long long)); else count = -1;
char* hw_event_names[] = {
"CPU_CYCLES ",
"INSTRUCTIONS ",
"CACHE_REFERENCES ",
"CACHE_MISSES ",
"BRANCH_INSTRUCTIONS ",
"BRANCH_MISSES ",
"BUS_CYCLES ",
"STALLED_CYCLES_FRONTEND",
"STALLED_CYCLES_BACKEND ",
"REF_CPU_CYCLES ",
};
char* id_names[] = {
"L1D ",
"L1I ",
"LL ",
"DTLB",
"ITLB",
"BPU ",
"NODE",
};
char* op_id_names[] = {
"OP_READ ",
"OP_WRITE ",
"OP_PREFETCH",
};
char* result_names[] = {
"ACCESS",
"MISS ",
};
long long ids[] = {
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_LL,
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_BPU,
PERF_COUNT_HW_CACHE_NODE,
};
long long op_ids[] = {
PERF_COUNT_HW_CACHE_OP_READ,
PERF_COUNT_HW_CACHE_OP_WRITE,
PERF_COUNT_HW_CACHE_OP_PREFETCH,
};
long long result_ids[] = {
PERF_COUNT_HW_CACHE_RESULT_ACCESS,
PERF_COUNT_HW_CACHE_RESULT_MISS,
};
int hw_configs[] = {
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_CACHE_REFERENCES,
PERF_COUNT_HW_CACHE_MISSES,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_BUS_CYCLES,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
PERF_COUNT_HW_REF_CPU_CYCLES,
};
#endif