support PERF_TYPE_{HARDWARE|HW_CACHE} in perf_event_open

refs #829
This commit is contained in:
Ken Sato
2017-10-12 16:51:47 +09:00
parent 2ae6883a8b
commit 12840601e1
15 changed files with 685 additions and 51 deletions

3
test/perf_event/Makefile Normal file
View File

@ -0,0 +1,3 @@
all: perf_test
perf_test: perf_test.o perftool.o
perftool.o: perftool.c perftool.h

66
test/perf_event/go_perf_test.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC="mcexec"
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

66
test/perf_event/no_mc_go.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
MCEXEC=""
PERF_HW_ID_MAX=9
PERF_COUNT_HW_CACHE_MAX=6
PERF_COUNT_HW_CACHE_OP_MAX=2
PERF_COUNT_HW_CACHE_RESULT_MAX=1
PERF_TYPE_HARDWARE=0
PERF_TYPE_HW_CACHE=3
echo "【PERF_TYPE_HARDWARE all spase】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE no exclude】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 0 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude user space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude user space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 1 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done
echo "【HARDWARE exclude kernel space】, "
for id in `seq 0 ${PERF_HW_ID_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HARDWARE} ${id}
done
echo "【HW_CACHE exclude kernel space】, "
for i in `seq 0 ${PERF_COUNT_HW_CACHE_MAX}`
do
for j in `seq 0 ${PERF_COUNT_HW_CACHE_OP_MAX}`
do
for k in `seq 0 ${PERF_COUNT_HW_CACHE_RESULT_MAX}`
do
${MCEXEC} ./perf_test 2 1 ${PERF_TYPE_HW_CACHE} `expr ${k} \* 65536 + ${j} \* 256 + ${i}`
done
done
done

View File

@ -0,0 +1,82 @@
#include <stdio.h>
#include <stdlib.h>
#include <langinfo.h>
#include <locale.h>
#include <sys/mman.h>
#include "perftool.h"
void
usage()
{
printf("Usage: perf_test mode cntr_num [<type> <config>]...\n");
exit(1);
}
int
main(int argc, char** argv)
{
long fds[PERF_CNTR_NUM];
long long counts[PERF_CNTR_NUM];
int types[PERF_CNTR_NUM];
int configs[PERF_CNTR_NUM];
int cntr_num;
int mode;
int i;
if (argc < 3) {
printf("ERROR: Too few arguments.\n");
usage();
}
mode = atoi(argv[1]);
cntr_num = atoi(argv[2]);
/* perse args */
if (argc < cntr_num * 2 + 2) {
printf("ERROR: Too few arguments.\n");
usage();
}
for (i = 0; i < cntr_num; i++) {
types[i] = atoi(argv[i * 2 + 3]);
configs[i] = atoi(argv[i * 2 + 4]);
}
/* perf_event_open */
for (i = 0; i < cntr_num; i++) {
fds[i] = pe_opener(-1, mode, types[i], configs[i]);
}
/* perf_start */
for (i = 0; i < cntr_num; i++) {
PERF_BEGIN(fds[i]);
}
memory_task();
calc_task();
/* perf_end and read */
for (i = 0; i < cntr_num; i++) {
PERF_END(fds[i]);
PERF_READ(fds[i], counts[i]);
close(fds[i]);
}
/* print result */
for (i = 0; i < cntr_num; i++) {
if (types[i] == PERF_TYPE_HARDWARE) {
printf("%s,%ld\n", hw_event_names[configs[i]], counts[i]);
}
else if (types[i] == PERF_TYPE_HW_CACHE) {
printf("%s_%s_%s,%ld\n",
id_names[(configs[i] >> 0) & 0xff],
op_id_names[(configs[i] >> 8) & 0xff],
result_names[(configs[i] >> 16) & 0xff],
counts[i]);
}
}
return 0;
}

View File

@ -0,0 +1,99 @@
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
//#include "perftool.h"
#define WORKSIZE (1024 * 1024 * 32)
#define LOOPSIZE 1000000
#define REP 1000
long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
long
pe_opener(long group_fd, int mode, int type, unsigned long config)
{
struct perf_event_attr pe;
int fd;
long long perf_hw_cache_id;
long long perf_hw_cache_op_id;
long long perf_hw_cache_op_result_id;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = type;
pe.size = sizeof(struct perf_event_attr);
pe.config = config;
pe.disabled = 1;
if (mode & 0x01) {
pe.exclude_user = 1; // EXCLUDE EVENTS THAT HAPPEN IN USER-SPACE
} else {
pe.exclude_user = 0; // INCLUDE EVENTS THAT HAPPEN IN USER-SPACE
}
if (mode & 0x02) {
pe.exclude_kernel = 1; // EXCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
} else {
pe.exclude_kernel = 0; // INCUDE EVENTS THAT HAPPEN IN KERNEL-SPACE
}
pe.exclude_hv = 1;
if (group_fd > 0) {
pe.read_format = PERF_FORMAT_GROUP;
}
fd = perf_event_open(&pe, 0, -1, group_fd, 0);
return fd;
}
long long
hw_cache_build(long long id, long long op_id, long long op_result_id)
{
return (id) | (op_id << 8) | (op_result_id << 16);
}
void
memory_task()
{
char* work = malloc(WORKSIZE);
char* fromaddr;
char* toaddr;
double r;
int offset;
int i;
for (i = 0; i < LOOPSIZE; i++) {
r = drand48();
offset = (int)(r * (double)WORKSIZE);
fromaddr = work + offset;
r = drand48();
offset = (int)(r * (double)WORKSIZE);
toaddr = work + offset;
*toaddr = *fromaddr;
}
}
void
calc_task()
{
int i, j;
double tmp;
for (i = 0; i < REP; i++) {
for (j = 0; j < REP; j++) {
tmp = drand48() * drand48();
}
}
}

View File

@ -0,0 +1,99 @@
#ifndef __PERFTOOL_H__
#define __PERFTOOL_H__
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
extern long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags);
extern long perf_instructions();
extern long perf_count_hw_cache_l1d_read_miss();
extern long perf_count_hw_cache_dtlb_read_miss();
extern long pe_opener(long group_fd, int mode, int type, unsigned long config);
extern long long hw_cache_build(long long id, long long op_id, long long op_result_id);
extern void memory_task();
extern void calc_task();
#define PERF_CNTR_NUM 4
#define PERF_BEGIN(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_RESET, 0); \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
#define PERF_END(fd) \
if (fd > 0) ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
#define PERF_READ(fd, count) \
if (fd > 0) read(fd, &count, sizeof(long long)); else count = -1;
char* hw_event_names[] = {
"CPU_CYCLES ",
"INSTRUCTIONS ",
"CACHE_REFERENCES ",
"CACHE_MISSES ",
"BRANCH_INSTRUCTIONS ",
"BRANCH_MISSES ",
"BUS_CYCLES ",
"STALLED_CYCLES_FRONTEND",
"STALLED_CYCLES_BACKEND ",
"REF_CPU_CYCLES ",
};
char* id_names[] = {
"L1D ",
"L1I ",
"LL ",
"DTLB",
"ITLB",
"BPU ",
"NODE",
};
char* op_id_names[] = {
"OP_READ ",
"OP_WRITE ",
"OP_PREFETCH",
};
char* result_names[] = {
"ACCESS",
"MISS ",
};
long long ids[] = {
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_LL,
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_BPU,
PERF_COUNT_HW_CACHE_NODE,
};
long long op_ids[] = {
PERF_COUNT_HW_CACHE_OP_READ,
PERF_COUNT_HW_CACHE_OP_WRITE,
PERF_COUNT_HW_CACHE_OP_PREFETCH,
};
long long result_ids[] = {
PERF_COUNT_HW_CACHE_RESULT_ACCESS,
PERF_COUNT_HW_CACHE_RESULT_MISS,
};
int hw_configs[] = {
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_CACHE_REFERENCES,
PERF_COUNT_HW_CACHE_MISSES,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_BUS_CYCLES,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
PERF_COUNT_HW_REF_CPU_CYCLES,
};
#endif