relu test added

This commit is contained in:
Nico Castaneda
2023-10-06 13:20:31 -07:00
parent d69a64c32c
commit 8296e6be0f
11 changed files with 2434 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ all:
$(MAKE) -C no_mf_ext
$(MAKE) -C no_smem
$(MAKE) -C prefetch
$(MAKE) -C relu
run-simx:
$(MAKE) -C basic run-simx
@@ -25,6 +26,7 @@ run-simx:
$(MAKE) -C no_mf_ext run-simx
$(MAKE) -C no_smem run-simx
$(MAKE) -C prefetch run-simx
$(MAKE) -C relu run-simx
run-rtlsim:
$(MAKE) -C basic run-rtlsim
@@ -39,6 +41,7 @@ run-rtlsim:
$(MAKE) -C no_mf_ext run-rtlsim
$(MAKE) -C no_smem run-rtlsim
$(MAKE) -C prefetch run-rtlsim
$(MAKE) -C relu run-rtlsim
run-vlsim:
$(MAKE) -C basic run-vlsim
@@ -53,6 +56,7 @@ run-vlsim:
$(MAKE) -C no_mf_ext run-vlsim
$(MAKE) -C no_smem run-vlsim
$(MAKE) -C prefetch run-vlsim
$(MAKE) -C relu run-vlsim
clean:
$(MAKE) -C basic clean
@@ -67,6 +71,7 @@ clean:
$(MAKE) -C no_mf_ext clean
$(MAKE) -C no_smem clean
$(MAKE) -C prefetch clean
$(MAKE) -C relu clean
clean-all:
$(MAKE) -C basic clean-all
@@ -81,3 +86,4 @@ clean-all:
$(MAKE) -C no_mf_ext clean-all
$(MAKE) -C no_smem clean-all
$(MAKE) -C prefetch clean-all
$(MAKE) -C relu clean-all

View File

@@ -0,0 +1,2 @@
main.o: main.cpp \
/home/eecs/nicolas.a.castaneda/vortex/driver/include/vortex.h common.h

View File

@@ -0,0 +1,77 @@
XLEN ?= 32
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(LLVM_PREFIX)/bin/llvm-objdump
VX_CP = $(LLVM_PREFIX)/bin/llvm-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = relu
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-simx: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-fpga: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -0,0 +1,12 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
typedef struct {
uint32_t num_points;
uint32_t src_addr;
uint32_t dst_addr;
} kernel_arg_t;
#endif

BIN
tests/regression/relu/kernel.bin Executable file

Binary file not shown.

View File

@@ -0,0 +1,26 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
uint32_t num_points = arg->num_points;
uint32_t points_per_core = num_points / vx_num_warps();
int tid = vx_thread_lid();
int32_t* src_ptr = (int32_t*)arg->src_addr;
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
int32_t ref_value = src_ptr[task_id];
int ref_negative = ref_value < 0;
if (ref_negative) {
ref_value = 0;
}
dst_ptr[task_id] = ref_value;
}
void main() {
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
int num_warps = vx_num_warps();
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
}

File diff suppressed because it is too large Load Diff

BIN
tests/regression/relu/kernel.elf Executable file

Binary file not shown.

View File

@@ -0,0 +1,218 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include <vector>
#include "common.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
std::vector<int32_t> src_data;
std::vector<int32_t> ref_data;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
kernel_arg_t kernel_arg;
static void show_usage() {
std::cout << "Vortex Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) {
case 'n':
count = atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (staging_buf) {
vx_buf_free(staging_buf);
}
if (device) {
vx_mem_free(device, kernel_arg.src_addr);
vx_mem_free(device, kernel_arg.dst_addr);
vx_dev_close(device);
}
}
void gen_input_data(uint32_t num_points) {
src_data.resize(num_points);
for (uint32_t i = 0; i < src_data.size(); ++i) {
int value = std::rand() - (RAND_MAX / 2);
src_data[i] = value;
}
}
void gen_ref_data(uint32_t num_points) {
ref_data.resize(num_points);
for (uint32_t i = 0; i < num_points; ++i) {
int32_t ref_value = src_data.at(i);
ref_data.at(i) = std::max(0, ref_value);
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = ref_data.at(i);
int cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at result #" << std::dec << i
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
int main(int argc, char *argv[]) {
size_t value;
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
std::srand(50);
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
uint32_t num_points = 256;
// generate input data
gen_input_data(num_points);
// generate reference data
gen_ref_data(num_points);
uint32_t src_buf_size = src_data.size() * sizeof(int32_t);
uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t);
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl;
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
kernel_arg.src_addr = value;
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
kernel_arg.dst_addr = value;
kernel_arg.num_points = num_points;
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
std::max<uint32_t>(dst_buf_size,
sizeof(kernel_arg_t)));
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = src_data.at(i);
}
}
std::cout << "upload source buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0xdeadbeef;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}

View File

@@ -0,0 +1,278 @@
ramulator.active_cycles_0 1072 # Total active cycles for level _0
ramulator.busy_cycles_0 1072 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0
ramulator.serving_requests_0 1496 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0
ramulator.average_serving_requests_0 0.130336 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
ramulator.active_cycles_0_0 1072 # Total active cycles for level _0_0
ramulator.busy_cycles_0_0 1384 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0
ramulator.serving_requests_0_0 1496 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0
ramulator.average_serving_requests_0_0 0.130336 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
ramulator.active_cycles_0_0_0 1030 # Total active cycles for level _0_0_0
ramulator.busy_cycles_0_0_0 1030 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0
ramulator.serving_requests_0_0_0 1440 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
ramulator.average_serving_requests_0_0_0 0.125457 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
ramulator.active_cycles_0_0_0_0 1030 # Total active cycles for level _0_0_0_0
ramulator.busy_cycles_0_0_0_0 1030 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_0
ramulator.serving_requests_0_0_0_0 1440 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
ramulator.average_serving_requests_0_0_0_0 0.125457 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
ramulator.active_cycles_0_0_0_1 0 # Total active cycles for level _0_0_0_1
ramulator.busy_cycles_0_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_1
ramulator.serving_requests_0_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
ramulator.average_serving_requests_0_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
ramulator.active_cycles_0_0_0_2 0 # Total active cycles for level _0_0_0_2
ramulator.busy_cycles_0_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_2
ramulator.serving_requests_0_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_2
ramulator.average_serving_requests_0_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_2
ramulator.active_cycles_0_0_0_3 0 # Total active cycles for level _0_0_0_3
ramulator.busy_cycles_0_0_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_3
ramulator.serving_requests_0_0_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_3
ramulator.average_serving_requests_0_0_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_3
ramulator.active_cycles_0_0_1 0 # Total active cycles for level _0_0_1
ramulator.busy_cycles_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1
ramulator.serving_requests_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1
ramulator.average_serving_requests_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1
ramulator.active_cycles_0_0_1_0 0 # Total active cycles for level _0_0_1_0
ramulator.busy_cycles_0_0_1_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_0
ramulator.serving_requests_0_0_1_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_0
ramulator.average_serving_requests_0_0_1_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_0
ramulator.active_cycles_0_0_1_1 0 # Total active cycles for level _0_0_1_1
ramulator.busy_cycles_0_0_1_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_1
ramulator.serving_requests_0_0_1_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_1
ramulator.average_serving_requests_0_0_1_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_1
ramulator.active_cycles_0_0_1_2 0 # Total active cycles for level _0_0_1_2
ramulator.busy_cycles_0_0_1_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_2
ramulator.serving_requests_0_0_1_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_2
ramulator.average_serving_requests_0_0_1_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_2
ramulator.active_cycles_0_0_1_3 0 # Total active cycles for level _0_0_1_3
ramulator.busy_cycles_0_0_1_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_3
ramulator.serving_requests_0_0_1_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_3
ramulator.average_serving_requests_0_0_1_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_3
ramulator.active_cycles_0_0_2 0 # Total active cycles for level _0_0_2
ramulator.busy_cycles_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2
ramulator.serving_requests_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2
ramulator.average_serving_requests_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2
ramulator.active_cycles_0_0_2_0 0 # Total active cycles for level _0_0_2_0
ramulator.busy_cycles_0_0_2_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_0
ramulator.serving_requests_0_0_2_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_0
ramulator.average_serving_requests_0_0_2_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_0
ramulator.active_cycles_0_0_2_1 0 # Total active cycles for level _0_0_2_1
ramulator.busy_cycles_0_0_2_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_1
ramulator.serving_requests_0_0_2_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_1
ramulator.average_serving_requests_0_0_2_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_1
ramulator.active_cycles_0_0_2_2 0 # Total active cycles for level _0_0_2_2
ramulator.busy_cycles_0_0_2_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_2
ramulator.serving_requests_0_0_2_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_2
ramulator.average_serving_requests_0_0_2_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_2
ramulator.active_cycles_0_0_2_3 0 # Total active cycles for level _0_0_2_3
ramulator.busy_cycles_0_0_2_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_3
ramulator.serving_requests_0_0_2_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_3
ramulator.average_serving_requests_0_0_2_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_3
ramulator.active_cycles_0_0_3 42 # Total active cycles for level _0_0_3
ramulator.busy_cycles_0_0_3 42 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3
ramulator.serving_requests_0_0_3 56 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3
ramulator.average_serving_requests_0_0_3 0.004879 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3
ramulator.active_cycles_0_0_3_0 0 # Total active cycles for level _0_0_3_0
ramulator.busy_cycles_0_0_3_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_0
ramulator.serving_requests_0_0_3_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_0
ramulator.average_serving_requests_0_0_3_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_0
ramulator.active_cycles_0_0_3_1 0 # Total active cycles for level _0_0_3_1
ramulator.busy_cycles_0_0_3_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_1
ramulator.serving_requests_0_0_3_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_1
ramulator.average_serving_requests_0_0_3_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_1
ramulator.active_cycles_0_0_3_2 0 # Total active cycles for level _0_0_3_2
ramulator.busy_cycles_0_0_3_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_2
ramulator.serving_requests_0_0_3_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_2
ramulator.average_serving_requests_0_0_3_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_2
ramulator.active_cycles_0_0_3_3 42 # Total active cycles for level _0_0_3_3
ramulator.busy_cycles_0_0_3_3 42 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_3
ramulator.serving_requests_0_0_3_3 56 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_3
ramulator.average_serving_requests_0_0_3_3 0.004879 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_3
ramulator.read_transaction_bytes_0 3712 # The total byte of read transaction per channel
ramulator.write_transaction_bytes_0 16000 # The total byte of write transaction per channel
ramulator.row_hits_channel_0_core 296 # Number of row hits per channel per core
ramulator.row_misses_channel_0_core 3 # Number of row misses per channel per core
ramulator.row_conflicts_channel_0_core 9 # Number of row conflicts per channel per core
ramulator.read_row_hits_channel_0_core 51 # Number of row hits for read requests per channel per core
[0] 51.0 #
ramulator.read_row_misses_channel_0_core 2 # Number of row misses for read requests per channel per core
[0] 2.0 #
ramulator.read_row_conflicts_channel_0_core 5 # Number of row conflicts for read requests per channel per core
[0] 5.0 #
ramulator.write_row_hits_channel_0_core 245 # Number of row hits for write requests per channel per core
[0] 245.0 #
ramulator.write_row_misses_channel_0_core 1 # Number of row misses for write requests per channel per core
[0] 1.0 #
ramulator.write_row_conflicts_channel_0_core 4 # Number of row conflicts for write requests per channel per core
[0] 4.0 #
ramulator.useless_activates_0_core 0 # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
ramulator.read_latency_avg_0 41.689655 # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
ramulator.read_latency_sum_0 2418 # The memory latency cycles (in memory time domain) sum for all read requests in this channel
ramulator.req_queue_length_avg_0 0.993466 # Average of read and write queue length per memory cycle per channel.
ramulator.req_queue_length_sum_0 11403 # Sum of read and write queue length per memory cycle per channel.
ramulator.read_req_queue_length_avg_0 0.200906 # Read queue length average per memory cycle per channel.
ramulator.read_req_queue_length_sum_0 2306 # Read queue length sum per memory cycle per channel.
ramulator.write_req_queue_length_avg_0 0.792560 # Write queue length average per memory cycle per channel.
ramulator.write_req_queue_length_sum_0 9097 # Write queue length sum per memory cycle per channel.
ramulator.record_read_hits 0.0 # record read hit count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_read_misses 0.0 # record_read_miss count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_read_conflicts 0.0 # record read conflict count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_hits 0.0 # record write hit count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_misses 0.0 # record write miss count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_conflicts 0.0 # record write conflict for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.active_cycles_1 1071 # Total active cycles for level _1
ramulator.busy_cycles_1 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1
ramulator.serving_requests_1 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1
ramulator.average_serving_requests_1 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
ramulator.active_cycles_1_0 1071 # Total active cycles for level _1_0
ramulator.busy_cycles_1_0 1383 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0
ramulator.serving_requests_1_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0
ramulator.average_serving_requests_1_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
ramulator.active_cycles_1_0_0 1071 # Total active cycles for level _1_0_0
ramulator.busy_cycles_1_0_0 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0
ramulator.serving_requests_1_0_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
ramulator.average_serving_requests_1_0_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
ramulator.active_cycles_1_0_0_0 1071 # Total active cycles for level _1_0_0_0
ramulator.busy_cycles_1_0_0_0 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_0
ramulator.serving_requests_1_0_0_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
ramulator.average_serving_requests_1_0_0_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
ramulator.active_cycles_1_0_0_1 0 # Total active cycles for level _1_0_0_1
ramulator.busy_cycles_1_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_1
ramulator.serving_requests_1_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
ramulator.average_serving_requests_1_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
ramulator.active_cycles_1_0_0_2 0 # Total active cycles for level _1_0_0_2
ramulator.busy_cycles_1_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_2
ramulator.serving_requests_1_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_2
ramulator.average_serving_requests_1_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_2
ramulator.active_cycles_1_0_0_3 0 # Total active cycles for level _1_0_0_3
ramulator.busy_cycles_1_0_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_3
ramulator.serving_requests_1_0_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_3
ramulator.average_serving_requests_1_0_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_3
ramulator.active_cycles_1_0_1 0 # Total active cycles for level _1_0_1
ramulator.busy_cycles_1_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1
ramulator.serving_requests_1_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1
ramulator.average_serving_requests_1_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1
ramulator.active_cycles_1_0_1_0 0 # Total active cycles for level _1_0_1_0
ramulator.busy_cycles_1_0_1_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_0
ramulator.serving_requests_1_0_1_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_0
ramulator.average_serving_requests_1_0_1_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_0
ramulator.active_cycles_1_0_1_1 0 # Total active cycles for level _1_0_1_1
ramulator.busy_cycles_1_0_1_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_1
ramulator.serving_requests_1_0_1_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_1
ramulator.average_serving_requests_1_0_1_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_1
ramulator.active_cycles_1_0_1_2 0 # Total active cycles for level _1_0_1_2
ramulator.busy_cycles_1_0_1_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_2
ramulator.serving_requests_1_0_1_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_2
ramulator.average_serving_requests_1_0_1_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_2
ramulator.active_cycles_1_0_1_3 0 # Total active cycles for level _1_0_1_3
ramulator.busy_cycles_1_0_1_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_3
ramulator.serving_requests_1_0_1_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_3
ramulator.average_serving_requests_1_0_1_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_3
ramulator.active_cycles_1_0_2 0 # Total active cycles for level _1_0_2
ramulator.busy_cycles_1_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2
ramulator.serving_requests_1_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2
ramulator.average_serving_requests_1_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2
ramulator.active_cycles_1_0_2_0 0 # Total active cycles for level _1_0_2_0
ramulator.busy_cycles_1_0_2_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_0
ramulator.serving_requests_1_0_2_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_0
ramulator.average_serving_requests_1_0_2_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_0
ramulator.active_cycles_1_0_2_1 0 # Total active cycles for level _1_0_2_1
ramulator.busy_cycles_1_0_2_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_1
ramulator.serving_requests_1_0_2_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_1
ramulator.average_serving_requests_1_0_2_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_1
ramulator.active_cycles_1_0_2_2 0 # Total active cycles for level _1_0_2_2
ramulator.busy_cycles_1_0_2_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_2
ramulator.serving_requests_1_0_2_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_2
ramulator.average_serving_requests_1_0_2_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_2
ramulator.active_cycles_1_0_2_3 0 # Total active cycles for level _1_0_2_3
ramulator.busy_cycles_1_0_2_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_3
ramulator.serving_requests_1_0_2_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_3
ramulator.average_serving_requests_1_0_2_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_3
ramulator.active_cycles_1_0_3 0 # Total active cycles for level _1_0_3
ramulator.busy_cycles_1_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3
ramulator.serving_requests_1_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3
ramulator.average_serving_requests_1_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3
ramulator.active_cycles_1_0_3_0 0 # Total active cycles for level _1_0_3_0
ramulator.busy_cycles_1_0_3_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_0
ramulator.serving_requests_1_0_3_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_0
ramulator.average_serving_requests_1_0_3_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_0
ramulator.active_cycles_1_0_3_1 0 # Total active cycles for level _1_0_3_1
ramulator.busy_cycles_1_0_3_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_1
ramulator.serving_requests_1_0_3_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_1
ramulator.average_serving_requests_1_0_3_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_1
ramulator.active_cycles_1_0_3_2 0 # Total active cycles for level _1_0_3_2
ramulator.busy_cycles_1_0_3_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_2
ramulator.serving_requests_1_0_3_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_2
ramulator.average_serving_requests_1_0_3_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_2
ramulator.active_cycles_1_0_3_3 0 # Total active cycles for level _1_0_3_3
ramulator.busy_cycles_1_0_3_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_3
ramulator.serving_requests_1_0_3_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_3
ramulator.average_serving_requests_1_0_3_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_3
ramulator.read_transaction_bytes_1 3584 # The total byte of read transaction per channel
ramulator.write_transaction_bytes_1 14848 # The total byte of write transaction per channel
ramulator.row_hits_channel_1_core 276 # Number of row hits per channel per core
ramulator.row_misses_channel_1_core 2 # Number of row misses per channel per core
ramulator.row_conflicts_channel_1_core 10 # Number of row conflicts per channel per core
ramulator.read_row_hits_channel_1_core 49 # Number of row hits for read requests per channel per core
[0] 49.0 #
ramulator.read_row_misses_channel_1_core 1 # Number of row misses for read requests per channel per core
[0] 1.0 #
ramulator.read_row_conflicts_channel_1_core 6 # Number of row conflicts for read requests per channel per core
[0] 6.0 #
ramulator.write_row_hits_channel_1_core 227 # Number of row hits for write requests per channel per core
[0] 227.0 #
ramulator.write_row_misses_channel_1_core 1 # Number of row misses for write requests per channel per core
[0] 1.0 #
ramulator.write_row_conflicts_channel_1_core 4 # Number of row conflicts for write requests per channel per core
[0] 4.0 #
ramulator.useless_activates_1_core 0 # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
ramulator.read_latency_avg_1 34.642857 # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
ramulator.read_latency_sum_1 1940 # The memory latency cycles (in memory time domain) sum for all read requests in this channel
ramulator.req_queue_length_avg_1 0.524830 # Average of read and write queue length per memory cycle per channel.
ramulator.req_queue_length_sum_1 6024 # Sum of read and write queue length per memory cycle per channel.
ramulator.read_req_queue_length_avg_1 0.159261 # Read queue length average per memory cycle per channel.
ramulator.read_req_queue_length_sum_1 1828 # Read queue length sum per memory cycle per channel.
ramulator.write_req_queue_length_avg_1 0.365569 # Write queue length average per memory cycle per channel.
ramulator.write_req_queue_length_sum_1 4196 # Write queue length sum per memory cycle per channel.
ramulator.record_read_hits 0.0 # record read hit count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_read_misses 0.0 # record_read_miss count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_read_conflicts 0.0 # record read conflict count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_hits 0.0 # record write hit count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_misses 0.0 # record write miss count for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_conflicts 0.0 # record write conflict for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.dram_capacity 8589934592 # Number of bytes in simulated DRAM
ramulator.dram_cycles 11478 # Number of DRAM cycles simulated
ramulator.incoming_requests 596 # Number of incoming requests to DRAM
ramulator.read_requests 114 # Number of incoming read requests to DRAM per core
[0] 114.0 #
ramulator.write_requests 482 # Number of incoming write requests to DRAM per core
[0] 482.0 #
ramulator.ramulator_active_cycles 2049 # The total number of cycles that the DRAM part is active (serving R/W)
ramulator.incoming_requests_per_channel 596.0 # Number of incoming requests to each DRAM channel
[0] 308.0 #
[1] 288.0 #
ramulator.incoming_read_reqs_per_channel 114.0 # Number of incoming read requests to each DRAM channel
[0] 58.0 #
[1] 56.0 #
ramulator.physical_page_replacement 0 # The number of times that physical page replacement happens.
ramulator.maximum_bandwidth 38400000000 # The theoretical maximum bandwidth (Bps)
ramulator.in_queue_req_num_sum 17427 # Sum of read/write queue length
ramulator.in_queue_read_req_num_sum 4134 # Sum of read queue length
ramulator.in_queue_write_req_num_sum 13293 # Sum of write queue length
ramulator.in_queue_req_num_avg 1.518296 # Average of read/write queue length per memory cycle
ramulator.in_queue_read_req_num_avg 0.360167 # Average of read queue length per memory cycle
ramulator.in_queue_write_req_num_avg 1.158129 # Average of write queue length per memory cycle
ramulator.record_read_requests 0.0 # record read requests for this core when it reaches request limit or to the end
[0] 0.0 #
ramulator.record_write_requests 0.0 # record write requests for this core when it reaches request limit or to the end
[0] 0.0 #

BIN
tests/regression/relu/relu Executable file

Binary file not shown.