Compare commits
35 Commits
wu-archite
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f4fd11c93 | ||
|
|
6563ed696e | ||
|
|
92ed21f83f | ||
|
|
46a60cf58e | ||
|
|
6ffb8c37e9 | ||
|
|
7da7a1a983 | ||
|
|
5b89ff2741 | ||
|
|
8296e6be0f | ||
|
|
719b8048ab | ||
|
|
d34177ea9c | ||
|
|
b97e94b8ed | ||
|
|
576e7aab78 | ||
|
|
905b1877fb | ||
|
|
3846d2ae59 | ||
|
|
c90fe56588 | ||
|
|
9efdd2ebb7 | ||
|
|
0caf3ad471 | ||
|
|
2fa94b9c21 | ||
|
|
62ebe0312f | ||
|
|
19734fc5b6 | ||
|
|
fdc0fdc958 | ||
|
|
3e290f6321 | ||
|
|
c24916b5e0 | ||
|
|
0c40864522 | ||
|
|
f2f1249b93 | ||
|
|
f46383f350 | ||
|
|
8caf476b1a | ||
|
|
9cf5a29917 | ||
|
|
d81e4085e2 | ||
|
|
b9d1684582 | ||
|
|
f8fc305cbd | ||
|
|
70e1e2089d | ||
|
|
547216d43f | ||
|
|
bb4f38d000 | ||
|
|
7c39cc2b5b |
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
*.dump
|
||||
*.o
|
||||
*.bin
|
||||
*.elf
|
||||
.depend
|
||||
*.a
|
||||
*.so
|
||||
*.log
|
||||
*.vcd
|
||||
blackbox.*.cache
|
||||
@@ -29,6 +29,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE(hansung): This is code running on the CPU, but CPU is still the one
|
||||
// that keeps track of allocation of the GPU memory. GPU kernel simply runs
|
||||
// assuming that CPU has done the right thing and returned a safe and valid
|
||||
// chunk of memory.
|
||||
int allocate(uint64_t size, uint64_t* addr) {
|
||||
if (size == 0 || addr == nullptr)
|
||||
return -1;
|
||||
@@ -403,4 +407,4 @@ private:
|
||||
page_t* pages_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
@@ -7,5 +7,12 @@ uint64_t aligned_size(uint64_t size, uint64_t alignment);
|
||||
bool is_aligned(uint64_t addr, uint64_t alignment);
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x00000000
|
||||
#define LOCAL_MEM_SIZE 4294967296 // 4 GB
|
||||
// NOTE(hansung): This is changed to something more akin to be in a heap area
|
||||
// for a CPU userspace program, since that works better with Chipyard's default
|
||||
// memory mapping scheme (0x80000000 and above). This gives us a pretty small
|
||||
// space though.
|
||||
#define ALLOC_BASE_ADDR 0xc0000000ul
|
||||
#define LOCAL_MEM_SIZE 0x40000000ul // 1 GB
|
||||
// #define ALLOC_BASE_ADDR 0x00000000
|
||||
// #define LOCAL_MEM_SIZE 4294967296 // 4 GB
|
||||
#define DEVICE_MAX_ADDR 0xfffffffful
|
||||
|
||||
@@ -86,7 +86,7 @@ public:
|
||||
|
||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > LOCAL_MEM_SIZE)
|
||||
if (dest_addr + asize > DEVICE_MAX_ADDR)
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
|
||||
@@ -104,7 +104,7 @@ public:
|
||||
|
||||
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > LOCAL_MEM_SIZE)
|
||||
if (src_addr + asize > DEVICE_MAX_ADDR)
|
||||
return -1;
|
||||
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
@@ -352,4 +352,4 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ public:
|
||||
|
||||
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > LOCAL_MEM_SIZE)
|
||||
if (dest_addr + asize > DEVICE_MAX_ADDR)
|
||||
return -1;
|
||||
|
||||
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
|
||||
@@ -108,7 +108,7 @@ public:
|
||||
|
||||
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > LOCAL_MEM_SIZE)
|
||||
if (src_addr + asize > DEVICE_MAX_ADDR)
|
||||
return -1;
|
||||
|
||||
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
|
||||
@@ -354,4 +354,4 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
7
env.my-pocl-riscv32.sh
Normal file
7
env.my-pocl-riscv32.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
export VORTEX_ENV="my-pocl-riscv32"
|
||||
export LLVM_PREFIX=/scratch/hansung/build/llvm-riscv32-unknown-linux-gnu-10.0.1
|
||||
export POCL_CC_PATH=/scratch/hansung/build/pocl-riscv32/compiler
|
||||
export POCL_RT_PATH=/scratch/hansung/build/pocl-riscv32/runtime
|
||||
export VERILATOR_ROOT=/scratch/hansung/build/vortex-toolchain-prebuilt/verilator
|
||||
export RISCV_TOOLCHAIN_PATH=/scratch/hansung/build/vortex-toolchain-prebuilt
|
||||
export PATH="/scratch/hansung/build/vortex-toolchain-prebuilt/verilator/bin:$PATH"
|
||||
7
env.my-pocl.sh
Normal file
7
env.my-pocl.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
export VORTEX_ENV="my-pocl"
|
||||
export LLVM_PREFIX=/scratch/hansung/build/vortex-toolchain-prebuilt/llvm-riscv/
|
||||
export POCL_CC_PATH=/scratch/hansung/build/pocl-vortex/compiler
|
||||
export POCL_RT_PATH=/scratch/hansung/build/pocl-vortex/runtime
|
||||
export VERILATOR_ROOT=/scratch/hansung/build/vortex-toolchain-prebuilt/verilator
|
||||
export RISCV_TOOLCHAIN_PATH=/scratch/hansung/build/vortex-toolchain-prebuilt
|
||||
export PATH="/scratch/hansung/build/vortex-toolchain-prebuilt/verilator/bin:$PATH"
|
||||
17
env.vortex-prebuilt.sh
Normal file
17
env.vortex-prebuilt.sh
Normal file
@@ -0,0 +1,17 @@
|
||||
if [ -n "$VORTEX_ENV" ]
|
||||
then
|
||||
echo "VORTEX_ENV already set. Exiting."
|
||||
return
|
||||
fi
|
||||
|
||||
# PREBUILT_DIR=/scratch/hansung/build/vortex-toolchain-prebuilt-d2ba5df-230831
|
||||
PREBUILT_DIR=/scratch/hansung/build/vortex-toolchain-prebuilt-230831
|
||||
|
||||
export VORTEX_ENV="vortex-prebuilt"
|
||||
export LLVM_PREFIX=$PREBUILT_DIR/llvm-riscv/
|
||||
export POCL_CC_PATH=$PREBUILT_DIR/pocl/compiler
|
||||
export POCL_RT_PATH=$PREBUILT_DIR/pocl/runtime
|
||||
export VERILATOR_ROOT=$PREBUILT_DIR/verilator
|
||||
export RISCV_TOOLCHAIN_PATH=$PREBUILT_DIR/
|
||||
export PATH="$BUILDDIR/vortex-toolchain-prebuilt-d2ba5df-230831/verilator/bin:$PATH"
|
||||
export PS1="($VORTEX_ENV) $PS1"
|
||||
7
env.vortex-prebuilt2.sh
Normal file
7
env.vortex-prebuilt2.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
export VORTEX_ENV="vortex-prebuilt2"
|
||||
export LLVM_PREFIX=/scratch/hansung/build/vortex-toolchain-prebuilt/llvm-riscv2
|
||||
export POCL_CC_PATH=/scratch/hansung/build/vortex-toolchain-prebuilt/pocl2/compiler
|
||||
export POCL_RT_PATH=/scratch/hansung/build/vortex-toolchain-prebuilt/pocl2/runtime
|
||||
export VERILATOR_ROOT=/scratch/hansung/build/vortex-toolchain-prebuilt/verilator
|
||||
export RISCV_TOOLCHAIN_PATH=/scratch/hansung/build/vortex-toolchain-prebuilt
|
||||
export PATH="/scratch/hansung/build/vortex-toolchain-prebuilt/verilator/bin:$PATH"
|
||||
@@ -9,6 +9,7 @@ module VX_execute #(
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
// NOTE(hansung): this comes out of VX_lsu_unit
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
@@ -234,4 +235,4 @@ module VX_execute #(
|
||||
&& (`INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
@@ -34,7 +34,7 @@ module VX_lsu_unit #(
|
||||
wire [`INST_LSU_BITS-1:0] req_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire req_wb;
|
||||
wire req_wb; // NOTE(hansung): 0:load, 1:store
|
||||
wire [`NW_BITS-1:0] req_wid;
|
||||
wire [31:0] req_pc;
|
||||
wire req_is_dup;
|
||||
@@ -369,4 +369,4 @@ module VX_lsu_unit #(
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
2
hw/rtl/cache/VX_cache.sv
vendored
2
hw/rtl/cache/VX_cache.sv
vendored
@@ -250,6 +250,8 @@ module VX_cache #(
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_c;
|
||||
wire mem_rsp_ready_c;
|
||||
|
||||
// NOTE(hansung): non-cacheable addresses. Although is this applied for
|
||||
// all address range?
|
||||
if (NC_ENABLE) begin
|
||||
VX_nc_bypass #(
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
|
||||
1
hw/rtl/cache/VX_cache_define.vh
vendored
1
hw/rtl/cache/VX_cache_define.vh
vendored
@@ -55,6 +55,7 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// NOTE(hansung): what does CORE_TAG_ID_BITS == 0 mean?
|
||||
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
|
||||
|
||||
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||
|
||||
11
hw/rtl/cache/VX_core_req_bank_sel.sv
vendored
11
hw/rtl/cache/VX_core_req_bank_sel.sv
vendored
@@ -53,6 +53,7 @@ module VX_core_req_bank_sel #(
|
||||
|
||||
wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel;
|
||||
// NOTE(hansung): "bank id"
|
||||
wire [NUM_REQS-1:0][`UP(`BANK_SELECT_BITS)-1:0] core_req_bid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
@@ -123,6 +124,9 @@ module VX_core_req_bank_sel #(
|
||||
per_bank_core_req_tid_r = 'x;
|
||||
req_select_table_r = 'x;
|
||||
|
||||
// NOTE(hansung): if we're simply overwriting assignment in
|
||||
// a loop with decrementing index, wouldn't this be unfair
|
||||
// for reqs with higher index?
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
|
||||
@@ -184,6 +188,8 @@ module VX_core_req_bank_sel #(
|
||||
end
|
||||
|
||||
end else begin
|
||||
// NOTE(hansung): this is what the default config elaborates, i.e.
|
||||
// NUM_REQS > 1, NUM_PORTS == 1
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = 0;
|
||||
@@ -204,6 +210,8 @@ module VX_core_req_bank_sel #(
|
||||
per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
// NOTE(hansung): this marks which req 'won' mapping
|
||||
// to this bank eventually
|
||||
per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i);
|
||||
end
|
||||
end
|
||||
@@ -216,6 +224,7 @@ module VX_core_req_bank_sel #(
|
||||
core_req_ready_r = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_r[i]) begin
|
||||
// NOTE(hansung): this flows back to upstream
|
||||
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
|
||||
end
|
||||
end
|
||||
@@ -311,4 +320,4 @@ module VX_core_req_bank_sel #(
|
||||
assign bank_stalls = bank_stalls_r;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
ifeq ($(XLEN),32)
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv64-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf-
|
||||
|
||||
@@ -97,6 +97,8 @@ static void spawn_tasks_rem_cb(int thread_mask) {
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
// NOTE(hansung): where is this used? The main section in the POCL binary calls
|
||||
// `vx_spawn_kernel` but not this one
|
||||
void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
||||
// device specs
|
||||
int NC = vx_num_cores();
|
||||
@@ -281,9 +283,12 @@ void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
|
||||
char log2X = fast_log2(X);
|
||||
|
||||
//--
|
||||
wspawn_kernel_args_t wspawn_args = {
|
||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
|
||||
};
|
||||
wspawn_kernel_args_t wspawn_args = {
|
||||
ctx, callback, arg, core_id * wgs_per_core /*offset*/,
|
||||
fW /*N*/, rW /*R*/, 0 /*NW*/, isXYpow2,
|
||||
isXpow2, log2XY, log2X};
|
||||
|
||||
// NOTE(hansung): core_id is capped at NUM_CORES_MAX = 32
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
@@ -304,4 +309,4 @@ void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -77,11 +77,14 @@ init_regs:
|
||||
# allocate stack region for a threads on the processor
|
||||
# set stack pointer
|
||||
li sp, SMEM_BASE_ADDR # load stack base address
|
||||
#if SM_ENABLE
|
||||
csrr a0, CSR_LTID # get local thread id
|
||||
#else
|
||||
# NOTE(hansung): Force per-global-thread stack allocation, since
|
||||
# we're experimenting with different memory hierarchy (i.e. no private cache)
|
||||
# and it's easy to miss setting SM_ENABLE accordingly.
|
||||
# #if SM_ENABLE
|
||||
# csrr a0, CSR_LTID # get local thread id
|
||||
# #else
|
||||
csrr a0, CSR_GTID # get global thread id
|
||||
#endif
|
||||
# #endif
|
||||
sll a1, a0, STACK_LOG2_SIZE
|
||||
sub sp, sp, a1
|
||||
|
||||
@@ -107,4 +110,4 @@ RETURN:
|
||||
.weak __dso_handle
|
||||
__dso_handle:
|
||||
.long 0
|
||||
|
||||
|
||||
|
||||
@@ -683,4 +683,4 @@ bool Core::check_exit() const {
|
||||
bool Core::running() const {
|
||||
bool is_running = (committed_instrs_ != issued_instrs_);
|
||||
return is_running;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -690,8 +690,14 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t mem_data = 0;
|
||||
core_->dcache_read(&mem_data, mem_addr, mem_bytes);
|
||||
trace->mem_addrs.at(t).push_back({mem_addr, mem_bytes});
|
||||
DP(4, "LOAD MEM: ADDRESS=0x" << std::hex << mem_addr << ", DATA=0x" << mem_data);
|
||||
trace->mem_addrs.at(t).push_back({mem_addr, mem_bytes});
|
||||
DP(2, "LOAD MEM: CYCLE=" << std::dec << SimPlatform::instance().cycles()
|
||||
<< ", CORE=" << core_->id()
|
||||
<< ", WARP=" << id_
|
||||
<< ", THREAD=" << t
|
||||
<< ", ADDRESS=0x" << std::hex << mem_addr
|
||||
<< ", DATA=0x" << mem_data << std::dec
|
||||
<< ", BYTES=" << mem_bytes);
|
||||
switch (func3) {
|
||||
case 0:
|
||||
// RV32I: LB
|
||||
@@ -731,7 +737,14 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
core_->dcache_read(&mem_data, mem_addr, 4);
|
||||
Word *result_ptr = (Word *)(vd.data() + i);
|
||||
*result_ptr = mem_data;
|
||||
DP(4, "LOAD MEM: ADDRESS=0x" << std::hex << mem_addr << ", DATA=0x" << mem_data);
|
||||
DP(2, "LOAD MEM: CYCLE=" << std::dec << SimPlatform::instance().cycles()
|
||||
<< ", CORE=" << core_->id()
|
||||
<< ", WARP=" << id_
|
||||
<< ", VLEN=" << vl_
|
||||
<< ", VID=" << i
|
||||
<< ", ADDRESS=0x" << std::hex << mem_addr
|
||||
<< ", DATA=0x" << mem_data << std::dec
|
||||
<< ", BYTES=" << 4);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -762,7 +775,13 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
mem_data &= mask;
|
||||
}
|
||||
trace->mem_addrs.at(t).push_back({mem_addr, mem_bytes});
|
||||
DP(4, "STORE MEM: ADDRESS=0x" << std::hex << mem_addr << ", DATA=0x" << mem_data);
|
||||
DP(2, "STORE MEM: CYCLE=" << std::dec << SimPlatform::instance().cycles()
|
||||
<< ", CORE=" << core_->id()
|
||||
<< ", WARP=" << id_
|
||||
<< ", THREAD=" << t
|
||||
<< ", ADDRESS=0x" << std::hex << mem_addr
|
||||
<< ", DATA=0x" << mem_data << std::dec
|
||||
<< ", BYTES=" << mem_bytes);
|
||||
switch (func3) {
|
||||
case 0:
|
||||
case 1:
|
||||
@@ -782,7 +801,14 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
// store word and unit strided (not checking for unit stride)
|
||||
uint32_t mem_data = *(uint32_t *)(vreg_file_.at(instr.getVs3()).data() + i);
|
||||
core_->dcache_write(&mem_data, mem_addr, 4);
|
||||
DP(4, "STORE MEM: ADDRESS=0x" << std::hex << mem_addr << ", DATA=0x" << mem_data);
|
||||
DP(2, "STORE MEM: CYCLE=" << std::dec << SimPlatform::instance().cycles()
|
||||
<< ", CORE=" << core_->id()
|
||||
<< ", WARP=" << id_
|
||||
<< ", VLEN=" << vl_
|
||||
<< ", VID=" << i
|
||||
<< ", ADDRESS=0x" << std::hex << mem_addr
|
||||
<< ", DATA=0x" << mem_data << std::dec
|
||||
<< ", BYTES=" << 4);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -878,6 +904,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
case FENCE: {
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->lsu.type = LsuType::FENCE;
|
||||
DP(2, "FENCE MEM");
|
||||
break;
|
||||
}
|
||||
case FCI: {
|
||||
@@ -1304,6 +1331,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
} else {
|
||||
tmask_.reset();
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
// NOTE(hansung): `ts` is the left-most lane currently enabled.
|
||||
// Doing this only respects the operand of that lane, even though
|
||||
// every lane might have different operand for the tmask.
|
||||
tmask_.set(t, rsdata.at(ts)[0].i & (1 << t));
|
||||
}
|
||||
}
|
||||
@@ -1397,6 +1427,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->lsu.type = LsuType::PREFETCH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
DP(2, "PREFETCH MEM");
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
continue;
|
||||
@@ -2349,4 +2380,4 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
DP(3, "*** Next PC: " << std::hex << nextPC << std::dec);
|
||||
PC_ = nextPC;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,4 +175,4 @@ void Processor::attach_ram(RAM* mem) {
|
||||
|
||||
int Processor::run() {
|
||||
return impl_->run();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
@@ -25,4 +26,4 @@ private:
|
||||
Core* core_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ PROJECT=DotProduct
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
lib$(PROJECT).a: DotProduct.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib:$(LLVM_PREFIX)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a DotProduct.cl
|
||||
|
||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
6
tests/opencl/convolution/.depend
Normal file
6
tests/opencl/convolution/.depend
Normal file
@@ -0,0 +1,6 @@
|
||||
main.o: main.cpp \
|
||||
/scratch/hansung/build/vortex-toolchain-prebuilt/pocl/runtime/include/CL/cl.h \
|
||||
/scratch/hansung/build/vortex-toolchain-prebuilt/pocl/runtime/include/CL/cl_version.h \
|
||||
/scratch/hansung/build/vortex-toolchain-prebuilt/pocl/runtime/include/CL/cl_platform.h \
|
||||
utils.h
|
||||
utils.o: utils.cpp utils.h
|
||||
@@ -17,7 +17,7 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCL_RT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/simx -Wl,-rpath $(VORTEX_DRV_PATH)/simx -lOpenCL -lvortex
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
||||
@@ -37,7 +37,7 @@ PROJECT=reduce0
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
lib$(PROJECT).a: oclReduction_kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib:$(LLVM_PREFIX)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a oclReduction_kernel.cl
|
||||
|
||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
@@ -37,7 +37,7 @@ SRCS = main.cc
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
@@ -78,6 +78,25 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_operand_file(const char* filename, void* data, size_t size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "wb");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t wsize = fwrite(data, size, 1, fp);
|
||||
if (wsize != 1) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
///
|
||||
@@ -209,6 +228,11 @@ int main(int argc, char **argv) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||
}
|
||||
|
||||
// NOTE(hansung): Dump operand buffer to a file
|
||||
if (write_operand_file("saxpy.input.src.bin", h_src, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
||||
free(h_src);
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ SRCS = main.cc
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
@@ -52,6 +52,25 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_operand_file(const char* filename, void* data, size_t size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "wb");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t wsize = fwrite(data, size, 1, fp);
|
||||
if (wsize != 1) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void matmul(float *C, const float* A, const float *B, int M, int N, int K) {
|
||||
for (int m = 0; m < M; ++m) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
@@ -194,6 +213,12 @@ int main (int argc, char **argv) {
|
||||
//printf("*** [%d]: h_a=%f, h_b=%f\n", i, h_a[i], h_b[i]);
|
||||
}
|
||||
|
||||
// NOTE(hansung): Dump operand buffer to a file
|
||||
if (write_operand_file("sgemm.input.a.bin", h_a, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
if (write_operand_file("sgemm.input.b.bin", h_b, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
|
||||
|
||||
|
||||
1
tests/opencl/tid/.gitignore
vendored
Normal file
1
tests/opencl/tid/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
tid
|
||||
71
tests/opencl/tid/Makefile
Normal file
71
tests/opencl/tid/Makefile
Normal file
@@ -0,0 +1,71 @@
|
||||
XLEN ?= 32
|
||||
|
||||
LLVM_PREFIX ?= /opt/llvm-riscv
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
|
||||
POCL_CC_PATH ?= /opt/pocl/compiler
|
||||
POCL_RT_PATH ?= /opt/pocl/runtime
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
|
||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex -float-abi=hard -code-model=small"
|
||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
|
||||
|
||||
CXXFLAGS += -I$(POCL_RT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = tid
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.pocl *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
6
tests/opencl/tid/kernel.cl
Normal file
6
tests/opencl/tid/kernel.cl
Normal file
@@ -0,0 +1,6 @@
|
||||
__kernel void tid()
|
||||
{
|
||||
__global int *out = (__global int *)0xc0000000;
|
||||
int gid = get_global_id(0);
|
||||
out[gid] = gid;
|
||||
}
|
||||
221
tests/opencl/tid/main.cc
Normal file
221
tests/opencl/tid/main.cc
Normal file
@@ -0,0 +1,221 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <CL/opencl.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
|
||||
#define KERNEL_NAME "tid"
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK2(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_operand_file(const char* filename, void* data, size_t size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "wb");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t wsize = fwrite(data, size, 1, fp);
|
||||
if (wsize != 1) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_mem a_memobj = NULL;
|
||||
cl_mem b_memobj = NULL;
|
||||
cl_mem c_memobj = NULL;
|
||||
float *h_a = NULL;
|
||||
float *h_b = NULL;
|
||||
float *h_c = NULL;
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
static void cleanup() {
|
||||
if (commandQueue) clReleaseCommandQueue(commandQueue);
|
||||
if (kernel) clReleaseKernel(kernel);
|
||||
if (program) clReleaseProgram(program);
|
||||
if (a_memobj) clReleaseMemObject(a_memobj);
|
||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||
if (context) clReleaseContext(context);
|
||||
if (device_id) clReleaseDevice(device_id);
|
||||
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
if (h_a) free(h_a);
|
||||
if (h_b) free(h_b);
|
||||
if (h_c) free(h_c);
|
||||
}
|
||||
|
||||
int size = 64;
|
||||
|
||||
static void show_usage() {
|
||||
printf("Usage: [-n size] [-h: help]\n");
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
size = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
printf("Workload size=%d\n", size);
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
cl_platform_id platform_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
printf("Create context\n");
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
printf("Allocate device buffers\n");
|
||||
size_t nbytes = size * sizeof(float);
|
||||
// a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
// b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
// c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||
|
||||
printf("Create program from kernel source\n");
|
||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
||||
if (program == NULL) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
// Create kernel
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
// Set kernel arguments
|
||||
// CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
// CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
// CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
|
||||
// Allocate memories for input arrays and output arrays.
|
||||
h_a = (float*)malloc(nbytes);
|
||||
h_b = (float*)malloc(nbytes);
|
||||
h_c = (float*)malloc(nbytes);
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(
|
||||
context, device_id, 0 /* command-queue properties */, &_err));
|
||||
|
||||
// printf("Upload source buffers\n");
|
||||
// CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a, 0, NULL, NULL));
|
||||
// CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b, 0, NULL, NULL));
|
||||
|
||||
printf("Execute the kernel\n");
|
||||
size_t global_work_size[1] = {size};
|
||||
size_t local_work_size[1] = {1};
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL));
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
// printf("Download destination buffer\n");
|
||||
// CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c, 0, NULL, NULL));
|
||||
|
||||
// printf("Verify result\n");
|
||||
// int errors = 0;
|
||||
// for (int i = 0; i < size; ++i) {
|
||||
// float ref = h_a[i] + h_b[i];
|
||||
// if (!almost_equal(h_c[i], ref)) {
|
||||
// if (errors < 100)
|
||||
// printf("*** error: [%d] expected=%f, actual=%f, a=%f, b=%f\n", i, ref, h_c[i], h_a[i], h_b[i]);
|
||||
// ++errors;
|
||||
// }
|
||||
// }
|
||||
// if (0 == errors) {
|
||||
// printf("PASSED!\n");
|
||||
// } else {
|
||||
// printf("FAILED! - %d errors\n", errors);
|
||||
// }
|
||||
|
||||
// Clean up
|
||||
cleanup();
|
||||
|
||||
// return errors;
|
||||
return 0;
|
||||
}
|
||||
2
tests/opencl/vecadd-loop/.gitignore
vendored
Normal file
2
tests/opencl/vecadd-loop/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
vecadd-loop
|
||||
*.ll
|
||||
72
tests/opencl/vecadd-loop/Makefile
Normal file
72
tests/opencl/vecadd-loop/Makefile
Normal file
@@ -0,0 +1,72 @@
|
||||
XLEN ?= 32
|
||||
|
||||
LLVM_PREFIX ?= /opt/llvm-riscv
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
|
||||
POCL_CC_PATH ?= /opt/pocl/compiler
|
||||
POCL_RT_PATH ?= /opt/pocl/runtime
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
|
||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex -float-abi=hard -code-model=small"
|
||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
|
||||
|
||||
CXXFLAGS += -I$(POCL_RT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = vecadd-loop
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
cp -f args.bin $(PROJECT).args.bin
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.pocl *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
0
tests/opencl/vecadd-loop/README
Normal file
0
tests/opencl/vecadd-loop/README
Normal file
12
tests/opencl/vecadd-loop/kernel.alll1hit.loop1000.cl
Normal file
12
tests/opencl/vecadd-loop/kernel.alll1hit.loop1000.cl
Normal file
@@ -0,0 +1,12 @@
|
||||
__kernel void vecadd (__global const float *A,
|
||||
__global const float *B,
|
||||
__global float *C)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
float sum = 0.;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
int addr = gid + (i % 2);
|
||||
sum += A[addr] + B[addr];
|
||||
}
|
||||
C[gid] = sum;
|
||||
}
|
||||
13
tests/opencl/vecadd-loop/kernel.cl
Normal file
13
tests/opencl/vecadd-loop/kernel.cl
Normal file
@@ -0,0 +1,13 @@
|
||||
__kernel void vecadd_loop (__global volatile const float *A,
|
||||
__global volatile const float *B,
|
||||
__global volatile float *C)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
float sum = 0.;
|
||||
for (int i = 0; i < 500; i++) {
|
||||
// int addr = gid + (i % 2);
|
||||
int addr = gid;
|
||||
C[addr] += A[addr] + B[addr];
|
||||
}
|
||||
// C[gid] = sum;
|
||||
}
|
||||
9
tests/opencl/vecadd-loop/kernel.cl.loop
Normal file
9
tests/opencl/vecadd-loop/kernel.cl.loop
Normal file
@@ -0,0 +1,9 @@
|
||||
__kernel void vecadd_loop (__global volatile const float *A,
|
||||
__global volatile const float *B,
|
||||
__global volatile float *C)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
C[gid] = A[gid] + B[gid];
|
||||
}
|
||||
}
|
||||
BIN
tests/opencl/vecadd-loop/kernel.pocl
Normal file
BIN
tests/opencl/vecadd-loop/kernel.pocl
Normal file
Binary file not shown.
250
tests/opencl/vecadd-loop/main.cc
Normal file
250
tests/opencl/vecadd-loop/main.cc
Normal file
@@ -0,0 +1,250 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <CL/opencl.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
|
||||
#define KERNEL_NAME "vecadd_loop"
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK2(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_operand_file(const char* filename, void* data, size_t size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "wb");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t wsize = fwrite(data, size, 1, fp);
|
||||
if (wsize != 1) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool almost_equal(float a, float b, int ulp = 4) {
|
||||
union fi_t { int i; float f; };
|
||||
fi_t fa, fb;
|
||||
fa.f = a;
|
||||
fb.f = b;
|
||||
return std::abs(fa.i - fb.i) <= ulp;
|
||||
}
|
||||
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_mem a_memobj = NULL;
|
||||
cl_mem b_memobj = NULL;
|
||||
cl_mem c_memobj = NULL;
|
||||
float *h_a = NULL;
|
||||
float *h_b = NULL;
|
||||
float *h_c = NULL;
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
static void cleanup() {
|
||||
if (commandQueue) clReleaseCommandQueue(commandQueue);
|
||||
if (kernel) clReleaseKernel(kernel);
|
||||
if (program) clReleaseProgram(program);
|
||||
if (a_memobj) clReleaseMemObject(a_memobj);
|
||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||
if (context) clReleaseContext(context);
|
||||
if (device_id) clReleaseDevice(device_id);
|
||||
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
if (h_a) free(h_a);
|
||||
if (h_b) free(h_b);
|
||||
if (h_c) free(h_c);
|
||||
}
|
||||
|
||||
int size = 64;
|
||||
|
||||
static void show_usage() {
|
||||
printf("Usage: [-n size] [-h: help]\n");
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
size = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
printf("Workload size=%d\n", size);
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
cl_platform_id platform_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status;
|
||||
|
||||
// read kernel binary from file
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
printf("Create context\n");
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
printf("Allocate device buffers\n");
|
||||
size_t nbytes = size * sizeof(float);
|
||||
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||
|
||||
printf("Create program from kernel source\n");
|
||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
||||
if (program == NULL) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
// Create kernel
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
// Set kernel arguments
|
||||
// NOTE(hansung): clSetKernelArg doesn't seem to incur any device-specific
|
||||
// operation
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
|
||||
// Allocate memories for input arrays and output arrays.
|
||||
h_a = (float*)malloc(nbytes);
|
||||
h_b = (float*)malloc(nbytes);
|
||||
h_c = (float*)malloc(nbytes);
|
||||
|
||||
// Initialize values for array members.
|
||||
for (int i = 0; i < size; ++i) {
|
||||
h_a[i] = sinf(i)*sinf(i);
|
||||
h_b[i] = cosf(i)*cosf(i);
|
||||
h_c[i] = 0xdeadbeef;
|
||||
//printf("*** [%d]: h_a=%f, h_b=%f\n", i, h_a[i], h_b[i]);
|
||||
}
|
||||
|
||||
// NOTE(hansung): Dump operand buffer to a file
|
||||
if (write_operand_file("vecadd.input.a.bin", h_a, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
if (write_operand_file("vecadd.input.b.bin", h_b, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// Creating command queue
|
||||
// NOTE(hansung): The 3rd properties arg is a bit-field, where fields like
|
||||
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE can be set. With value of 0,
|
||||
// nothing is set and the commands in the queue will be completed in-order.
|
||||
// See OpenCL 1.2 spec, section 5.1
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(
|
||||
context, device_id, 0 /* command-queue properties */, &_err));
|
||||
|
||||
printf("Upload source buffers\n");
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a, 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b, 0, NULL, NULL));
|
||||
|
||||
printf("Execute the kernel\n");
|
||||
size_t global_work_size[1] = {size};
|
||||
size_t local_work_size[1] = {1};
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL));
|
||||
// NOTE(hansung): clFinish blocks until all kernels in the command queue are
|
||||
// finished. This seems to be what actually kicks off kernel execution.
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
printf("Download destination buffer\n");
|
||||
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c, 0, NULL, NULL));
|
||||
|
||||
printf("Verify result\n");
|
||||
int errors = 0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
float ref = h_a[i] + h_b[i];
|
||||
if (!almost_equal(h_c[i], ref)) {
|
||||
if (errors < 100)
|
||||
printf("*** error: [%d] expected=%f, actual=%f, a=%f, b=%f\n", i, ref, h_c[i], h_a[i], h_b[i]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (0 == errors) {
|
||||
printf("PASSED!\n");
|
||||
} else {
|
||||
printf("FAILED! - %d errors\n", errors);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
cleanup();
|
||||
|
||||
return errors;
|
||||
}
|
||||
@@ -37,7 +37,7 @@ SRCS = main.cc
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
@@ -53,6 +53,7 @@ run-vlsim: $(PROJECT) kernel.pocl
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
cp -f args.bin $(PROJECT).args.bin
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
12
tests/opencl/vecadd/kernel.alll1hit.loop1000.cl
Normal file
12
tests/opencl/vecadd/kernel.alll1hit.loop1000.cl
Normal file
@@ -0,0 +1,12 @@
|
||||
__kernel void vecadd (__global const float *A,
|
||||
__global const float *B,
|
||||
__global float *C)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
float sum = 0.;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
int addr = gid + (i % 2);
|
||||
sum += A[addr] + B[addr];
|
||||
}
|
||||
C[gid] = sum;
|
||||
}
|
||||
@@ -52,6 +52,25 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_operand_file(const char* filename, void* data, size_t size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "wb");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t wsize = fwrite(data, size, 1, fp);
|
||||
if (wsize != 1) {
|
||||
fprintf(stderr, "Failed to write operand data.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool almost_equal(float a, float b, int ulp = 4) {
|
||||
union fi_t { int i; float f; };
|
||||
fi_t fa, fb;
|
||||
@@ -156,6 +175,8 @@ int main (int argc, char **argv) {
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
// Set kernel arguments
|
||||
// NOTE(hansung): clSetKernelArg doesn't seem to incur any device-specific
|
||||
// operation
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
@@ -173,10 +194,21 @@ int main (int argc, char **argv) {
|
||||
//printf("*** [%d]: h_a=%f, h_b=%f\n", i, h_a[i], h_b[i]);
|
||||
}
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
|
||||
// NOTE(hansung): Dump operand buffer to a file
|
||||
if (write_operand_file("vecadd.input.a.bin", h_a, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
if (write_operand_file("vecadd.input.b.bin", h_b, nbytes) != 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
printf("Upload source buffers\n");
|
||||
// Creating command queue
|
||||
// NOTE(hansung): The 3rd properties arg is a bit-field, where fields like
|
||||
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE can be set. With value of 0,
|
||||
// nothing is set and the commands in the queue will be completed in-order.
|
||||
// See OpenCL 1.2 spec, section 5.1
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(
|
||||
context, device_id, 0 /* command-queue properties */, &_err));
|
||||
|
||||
printf("Upload source buffers\n");
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a, 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b, 0, NULL, NULL));
|
||||
|
||||
@@ -185,6 +217,8 @@ int main (int argc, char **argv) {
|
||||
size_t local_work_size[1] = {1};
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL));
|
||||
// NOTE(hansung): clFinish blocks until all kernels in the command queue are
|
||||
// finished. This seems to be what actually kicks off kernel execution.
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
|
||||
2004
tests/opencl/vecadd/vecadd.dump.annotated
Normal file
2004
tests/opencl/vecadd/vecadd.dump.annotated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,7 @@ all:
|
||||
$(MAKE) -C no_mf_ext
|
||||
$(MAKE) -C no_smem
|
||||
$(MAKE) -C prefetch
|
||||
$(MAKE) -C relu
|
||||
|
||||
run-simx:
|
||||
$(MAKE) -C basic run-simx
|
||||
@@ -25,6 +26,7 @@ run-simx:
|
||||
$(MAKE) -C no_mf_ext run-simx
|
||||
$(MAKE) -C no_smem run-simx
|
||||
$(MAKE) -C prefetch run-simx
|
||||
$(MAKE) -C relu run-simx
|
||||
|
||||
run-rtlsim:
|
||||
$(MAKE) -C basic run-rtlsim
|
||||
@@ -39,6 +41,7 @@ run-rtlsim:
|
||||
$(MAKE) -C no_mf_ext run-rtlsim
|
||||
$(MAKE) -C no_smem run-rtlsim
|
||||
$(MAKE) -C prefetch run-rtlsim
|
||||
$(MAKE) -C relu run-rtlsim
|
||||
|
||||
run-vlsim:
|
||||
$(MAKE) -C basic run-vlsim
|
||||
@@ -53,6 +56,7 @@ run-vlsim:
|
||||
$(MAKE) -C no_mf_ext run-vlsim
|
||||
$(MAKE) -C no_smem run-vlsim
|
||||
$(MAKE) -C prefetch run-vlsim
|
||||
$(MAKE) -C relu run-vlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
@@ -67,6 +71,7 @@ clean:
|
||||
$(MAKE) -C no_mf_ext clean
|
||||
$(MAKE) -C no_smem clean
|
||||
$(MAKE) -C prefetch clean
|
||||
$(MAKE) -C relu clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C basic clean-all
|
||||
@@ -81,3 +86,4 @@ clean-all:
|
||||
$(MAKE) -C no_mf_ext clean-all
|
||||
$(MAKE) -C no_smem clean-all
|
||||
$(MAKE) -C prefetch clean-all
|
||||
$(MAKE) -C relu clean-all
|
||||
|
||||
2
tests/regression/relu/.depend
Normal file
2
tests/regression/relu/.depend
Normal file
@@ -0,0 +1,2 @@
|
||||
main.o: main.cpp \
|
||||
/home/eecs/nicolas.a.castaneda/vortex/driver/include/vortex.h common.h
|
||||
77
tests/regression/relu/Makefile
Normal file
77
tests/regression/relu/Makefile
Normal file
@@ -0,0 +1,77 @@
|
||||
XLEN ?= 32
|
||||
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(LLVM_PREFIX)/bin/llvm-objdump
|
||||
VX_CP = $(LLVM_PREFIX)/bin/llvm-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = relu
|
||||
|
||||
SRCS = main.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-simx: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-fpga: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.elf *.bin *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
12
tests/regression/relu/common.h
Normal file
12
tests/regression/relu/common.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_points;
|
||||
uint32_t src_addr;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
BIN
tests/regression/relu/kernel.bin
Executable file
BIN
tests/regression/relu/kernel.bin
Executable file
Binary file not shown.
28
tests/regression/relu/kernel.c
Normal file
28
tests/regression/relu/kernel.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
|
||||
uint32_t num_points = arg->num_points;
|
||||
uint32_t points_per_core = num_points / vx_num_warps();
|
||||
int tid = vx_thread_lid();
|
||||
int32_t* src_ptr = (int32_t*)arg->src_addr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_addr;
|
||||
|
||||
int32_t ref_value = src_ptr[task_id];
|
||||
int ref_negative = ref_value < 0;
|
||||
vx_split(ref_negative);
|
||||
if (ref_negative) {
|
||||
ref_value = 0;
|
||||
}
|
||||
vx_join();
|
||||
|
||||
dst_ptr[task_id] = ref_value;
|
||||
}
|
||||
|
||||
void main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
int num_warps = vx_num_warps();
|
||||
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
|
||||
}
|
||||
1818
tests/regression/relu/kernel.dump
Normal file
1818
tests/regression/relu/kernel.dump
Normal file
File diff suppressed because it is too large
Load Diff
BIN
tests/regression/relu/kernel.elf
Executable file
BIN
tests/regression/relu/kernel.elf
Executable file
Binary file not shown.
218
tests/regression/relu/main.cpp
Normal file
218
tests/regression/relu/main.cpp
Normal file
@@ -0,0 +1,218 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include <vector>
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
uint32_t count = 0;
|
||||
|
||||
std::vector<int32_t> src_data;
|
||||
std::vector<int32_t> ref_data;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.src_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
void gen_input_data(uint32_t num_points) {
|
||||
src_data.resize(num_points);
|
||||
|
||||
for (uint32_t i = 0; i < src_data.size(); ++i) {
|
||||
int value = std::rand() - (RAND_MAX / 2);
|
||||
src_data[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
void gen_ref_data(uint32_t num_points) {
|
||||
ref_data.resize(num_points);
|
||||
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int32_t ref_value = src_data.at(i);
|
||||
ref_data.at(i) = std::max(0, ref_value);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, MAX_TIMEOUT));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = ref_data.at(i);
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
std::cout << "error at result #" << std::dec << i
|
||||
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
std::srand(50);
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
uint32_t num_points = 256;
|
||||
|
||||
// generate input data
|
||||
gen_input_data(num_points);
|
||||
|
||||
// generate reference data
|
||||
gen_ref_data(num_points);
|
||||
|
||||
uint32_t src_buf_size = src_data.size() * sizeof(int32_t);
|
||||
uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_mem_alloc(device, src_buf_size, &value));
|
||||
kernel_arg.src_addr = value;
|
||||
RT_CHECK(vx_mem_alloc(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_addr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_addr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_addr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t)));
|
||||
RT_CHECK(vx_buf_alloc(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = src_data.at(i);
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_addr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
278
tests/regression/relu/ramulator.ddr4.log
Normal file
278
tests/regression/relu/ramulator.ddr4.log
Normal file
@@ -0,0 +1,278 @@
|
||||
ramulator.active_cycles_0 1072 # Total active cycles for level _0
|
||||
ramulator.busy_cycles_0 1072 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0
|
||||
ramulator.serving_requests_0 1496 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0
|
||||
ramulator.average_serving_requests_0 0.130336 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
|
||||
ramulator.active_cycles_0_0 1072 # Total active cycles for level _0_0
|
||||
ramulator.busy_cycles_0_0 1384 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0
|
||||
ramulator.serving_requests_0_0 1496 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0
|
||||
ramulator.average_serving_requests_0_0 0.130336 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
|
||||
ramulator.active_cycles_0_0_0 1030 # Total active cycles for level _0_0_0
|
||||
ramulator.busy_cycles_0_0_0 1030 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0
|
||||
ramulator.serving_requests_0_0_0 1440 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
|
||||
ramulator.average_serving_requests_0_0_0 0.125457 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
|
||||
ramulator.active_cycles_0_0_0_0 1030 # Total active cycles for level _0_0_0_0
|
||||
ramulator.busy_cycles_0_0_0_0 1030 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_0
|
||||
ramulator.serving_requests_0_0_0_0 1440 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
|
||||
ramulator.average_serving_requests_0_0_0_0 0.125457 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
|
||||
ramulator.active_cycles_0_0_0_1 0 # Total active cycles for level _0_0_0_1
|
||||
ramulator.busy_cycles_0_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_1
|
||||
ramulator.serving_requests_0_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
|
||||
ramulator.average_serving_requests_0_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
|
||||
ramulator.active_cycles_0_0_0_2 0 # Total active cycles for level _0_0_0_2
|
||||
ramulator.busy_cycles_0_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_2
|
||||
ramulator.serving_requests_0_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_2
|
||||
ramulator.average_serving_requests_0_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_2
|
||||
ramulator.active_cycles_0_0_0_3 0 # Total active cycles for level _0_0_0_3
|
||||
ramulator.busy_cycles_0_0_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_3
|
||||
ramulator.serving_requests_0_0_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_3
|
||||
ramulator.average_serving_requests_0_0_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_3
|
||||
ramulator.active_cycles_0_0_1 0 # Total active cycles for level _0_0_1
|
||||
ramulator.busy_cycles_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1
|
||||
ramulator.serving_requests_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1
|
||||
ramulator.average_serving_requests_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1
|
||||
ramulator.active_cycles_0_0_1_0 0 # Total active cycles for level _0_0_1_0
|
||||
ramulator.busy_cycles_0_0_1_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_0
|
||||
ramulator.serving_requests_0_0_1_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_0
|
||||
ramulator.average_serving_requests_0_0_1_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_0
|
||||
ramulator.active_cycles_0_0_1_1 0 # Total active cycles for level _0_0_1_1
|
||||
ramulator.busy_cycles_0_0_1_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_1
|
||||
ramulator.serving_requests_0_0_1_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_1
|
||||
ramulator.average_serving_requests_0_0_1_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_1
|
||||
ramulator.active_cycles_0_0_1_2 0 # Total active cycles for level _0_0_1_2
|
||||
ramulator.busy_cycles_0_0_1_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_2
|
||||
ramulator.serving_requests_0_0_1_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_2
|
||||
ramulator.average_serving_requests_0_0_1_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_2
|
||||
ramulator.active_cycles_0_0_1_3 0 # Total active cycles for level _0_0_1_3
|
||||
ramulator.busy_cycles_0_0_1_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_1_3
|
||||
ramulator.serving_requests_0_0_1_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_3
|
||||
ramulator.average_serving_requests_0_0_1_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_1_3
|
||||
ramulator.active_cycles_0_0_2 0 # Total active cycles for level _0_0_2
|
||||
ramulator.busy_cycles_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2
|
||||
ramulator.serving_requests_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2
|
||||
ramulator.average_serving_requests_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2
|
||||
ramulator.active_cycles_0_0_2_0 0 # Total active cycles for level _0_0_2_0
|
||||
ramulator.busy_cycles_0_0_2_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_0
|
||||
ramulator.serving_requests_0_0_2_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_0
|
||||
ramulator.average_serving_requests_0_0_2_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_0
|
||||
ramulator.active_cycles_0_0_2_1 0 # Total active cycles for level _0_0_2_1
|
||||
ramulator.busy_cycles_0_0_2_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_1
|
||||
ramulator.serving_requests_0_0_2_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_1
|
||||
ramulator.average_serving_requests_0_0_2_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_1
|
||||
ramulator.active_cycles_0_0_2_2 0 # Total active cycles for level _0_0_2_2
|
||||
ramulator.busy_cycles_0_0_2_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_2
|
||||
ramulator.serving_requests_0_0_2_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_2
|
||||
ramulator.average_serving_requests_0_0_2_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_2
|
||||
ramulator.active_cycles_0_0_2_3 0 # Total active cycles for level _0_0_2_3
|
||||
ramulator.busy_cycles_0_0_2_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_2_3
|
||||
ramulator.serving_requests_0_0_2_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_3
|
||||
ramulator.average_serving_requests_0_0_2_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_2_3
|
||||
ramulator.active_cycles_0_0_3 42 # Total active cycles for level _0_0_3
|
||||
ramulator.busy_cycles_0_0_3 42 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3
|
||||
ramulator.serving_requests_0_0_3 56 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3
|
||||
ramulator.average_serving_requests_0_0_3 0.004879 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3
|
||||
ramulator.active_cycles_0_0_3_0 0 # Total active cycles for level _0_0_3_0
|
||||
ramulator.busy_cycles_0_0_3_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_0
|
||||
ramulator.serving_requests_0_0_3_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_0
|
||||
ramulator.average_serving_requests_0_0_3_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_0
|
||||
ramulator.active_cycles_0_0_3_1 0 # Total active cycles for level _0_0_3_1
|
||||
ramulator.busy_cycles_0_0_3_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_1
|
||||
ramulator.serving_requests_0_0_3_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_1
|
||||
ramulator.average_serving_requests_0_0_3_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_1
|
||||
ramulator.active_cycles_0_0_3_2 0 # Total active cycles for level _0_0_3_2
|
||||
ramulator.busy_cycles_0_0_3_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_2
|
||||
ramulator.serving_requests_0_0_3_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_2
|
||||
ramulator.average_serving_requests_0_0_3_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_2
|
||||
ramulator.active_cycles_0_0_3_3 42 # Total active cycles for level _0_0_3_3
|
||||
ramulator.busy_cycles_0_0_3_3 42 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_3_3
|
||||
ramulator.serving_requests_0_0_3_3 56 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_3
|
||||
ramulator.average_serving_requests_0_0_3_3 0.004879 # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_3_3
|
||||
ramulator.read_transaction_bytes_0 3712 # The total byte of read transaction per channel
|
||||
ramulator.write_transaction_bytes_0 16000 # The total byte of write transaction per channel
|
||||
ramulator.row_hits_channel_0_core 296 # Number of row hits per channel per core
|
||||
ramulator.row_misses_channel_0_core 3 # Number of row misses per channel per core
|
||||
ramulator.row_conflicts_channel_0_core 9 # Number of row conflicts per channel per core
|
||||
ramulator.read_row_hits_channel_0_core 51 # Number of row hits for read requests per channel per core
|
||||
[0] 51.0 #
|
||||
ramulator.read_row_misses_channel_0_core 2 # Number of row misses for read requests per channel per core
|
||||
[0] 2.0 #
|
||||
ramulator.read_row_conflicts_channel_0_core 5 # Number of row conflicts for read requests per channel per core
|
||||
[0] 5.0 #
|
||||
ramulator.write_row_hits_channel_0_core 245 # Number of row hits for write requests per channel per core
|
||||
[0] 245.0 #
|
||||
ramulator.write_row_misses_channel_0_core 1 # Number of row misses for write requests per channel per core
|
||||
[0] 1.0 #
|
||||
ramulator.write_row_conflicts_channel_0_core 4 # Number of row conflicts for write requests per channel per core
|
||||
[0] 4.0 #
|
||||
ramulator.useless_activates_0_core 0 # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
|
||||
ramulator.read_latency_avg_0 41.689655 # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
|
||||
ramulator.read_latency_sum_0 2418 # The memory latency cycles (in memory time domain) sum for all read requests in this channel
|
||||
ramulator.req_queue_length_avg_0 0.993466 # Average of read and write queue length per memory cycle per channel.
|
||||
ramulator.req_queue_length_sum_0 11403 # Sum of read and write queue length per memory cycle per channel.
|
||||
ramulator.read_req_queue_length_avg_0 0.200906 # Read queue length average per memory cycle per channel.
|
||||
ramulator.read_req_queue_length_sum_0 2306 # Read queue length sum per memory cycle per channel.
|
||||
ramulator.write_req_queue_length_avg_0 0.792560 # Write queue length average per memory cycle per channel.
|
||||
ramulator.write_req_queue_length_sum_0 9097 # Write queue length sum per memory cycle per channel.
|
||||
ramulator.record_read_hits 0.0 # record read hit count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_read_misses 0.0 # record_read_miss count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_read_conflicts 0.0 # record read conflict count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_hits 0.0 # record write hit count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_misses 0.0 # record write miss count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_conflicts 0.0 # record write conflict for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.active_cycles_1 1071 # Total active cycles for level _1
|
||||
ramulator.busy_cycles_1 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1
|
||||
ramulator.serving_requests_1 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1
|
||||
ramulator.average_serving_requests_1 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
|
||||
ramulator.active_cycles_1_0 1071 # Total active cycles for level _1_0
|
||||
ramulator.busy_cycles_1_0 1383 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0
|
||||
ramulator.serving_requests_1_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0
|
||||
ramulator.average_serving_requests_1_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
|
||||
ramulator.active_cycles_1_0_0 1071 # Total active cycles for level _1_0_0
|
||||
ramulator.busy_cycles_1_0_0 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0
|
||||
ramulator.serving_requests_1_0_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
|
||||
ramulator.average_serving_requests_1_0_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
|
||||
ramulator.active_cycles_1_0_0_0 1071 # Total active cycles for level _1_0_0_0
|
||||
ramulator.busy_cycles_1_0_0_0 1071 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_0
|
||||
ramulator.serving_requests_1_0_0_0 1472 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
|
||||
ramulator.average_serving_requests_1_0_0_0 0.128245 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
|
||||
ramulator.active_cycles_1_0_0_1 0 # Total active cycles for level _1_0_0_1
|
||||
ramulator.busy_cycles_1_0_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_1
|
||||
ramulator.serving_requests_1_0_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
|
||||
ramulator.average_serving_requests_1_0_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
|
||||
ramulator.active_cycles_1_0_0_2 0 # Total active cycles for level _1_0_0_2
|
||||
ramulator.busy_cycles_1_0_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_2
|
||||
ramulator.serving_requests_1_0_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_2
|
||||
ramulator.average_serving_requests_1_0_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_2
|
||||
ramulator.active_cycles_1_0_0_3 0 # Total active cycles for level _1_0_0_3
|
||||
ramulator.busy_cycles_1_0_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_3
|
||||
ramulator.serving_requests_1_0_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_3
|
||||
ramulator.average_serving_requests_1_0_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_3
|
||||
ramulator.active_cycles_1_0_1 0 # Total active cycles for level _1_0_1
|
||||
ramulator.busy_cycles_1_0_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1
|
||||
ramulator.serving_requests_1_0_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1
|
||||
ramulator.average_serving_requests_1_0_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1
|
||||
ramulator.active_cycles_1_0_1_0 0 # Total active cycles for level _1_0_1_0
|
||||
ramulator.busy_cycles_1_0_1_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_0
|
||||
ramulator.serving_requests_1_0_1_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_0
|
||||
ramulator.average_serving_requests_1_0_1_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_0
|
||||
ramulator.active_cycles_1_0_1_1 0 # Total active cycles for level _1_0_1_1
|
||||
ramulator.busy_cycles_1_0_1_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_1
|
||||
ramulator.serving_requests_1_0_1_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_1
|
||||
ramulator.average_serving_requests_1_0_1_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_1
|
||||
ramulator.active_cycles_1_0_1_2 0 # Total active cycles for level _1_0_1_2
|
||||
ramulator.busy_cycles_1_0_1_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_2
|
||||
ramulator.serving_requests_1_0_1_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_2
|
||||
ramulator.average_serving_requests_1_0_1_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_2
|
||||
ramulator.active_cycles_1_0_1_3 0 # Total active cycles for level _1_0_1_3
|
||||
ramulator.busy_cycles_1_0_1_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_1_3
|
||||
ramulator.serving_requests_1_0_1_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_3
|
||||
ramulator.average_serving_requests_1_0_1_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_1_3
|
||||
ramulator.active_cycles_1_0_2 0 # Total active cycles for level _1_0_2
|
||||
ramulator.busy_cycles_1_0_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2
|
||||
ramulator.serving_requests_1_0_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2
|
||||
ramulator.average_serving_requests_1_0_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2
|
||||
ramulator.active_cycles_1_0_2_0 0 # Total active cycles for level _1_0_2_0
|
||||
ramulator.busy_cycles_1_0_2_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_0
|
||||
ramulator.serving_requests_1_0_2_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_0
|
||||
ramulator.average_serving_requests_1_0_2_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_0
|
||||
ramulator.active_cycles_1_0_2_1 0 # Total active cycles for level _1_0_2_1
|
||||
ramulator.busy_cycles_1_0_2_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_1
|
||||
ramulator.serving_requests_1_0_2_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_1
|
||||
ramulator.average_serving_requests_1_0_2_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_1
|
||||
ramulator.active_cycles_1_0_2_2 0 # Total active cycles for level _1_0_2_2
|
||||
ramulator.busy_cycles_1_0_2_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_2
|
||||
ramulator.serving_requests_1_0_2_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_2
|
||||
ramulator.average_serving_requests_1_0_2_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_2
|
||||
ramulator.active_cycles_1_0_2_3 0 # Total active cycles for level _1_0_2_3
|
||||
ramulator.busy_cycles_1_0_2_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_2_3
|
||||
ramulator.serving_requests_1_0_2_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_3
|
||||
ramulator.average_serving_requests_1_0_2_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_2_3
|
||||
ramulator.active_cycles_1_0_3 0 # Total active cycles for level _1_0_3
|
||||
ramulator.busy_cycles_1_0_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3
|
||||
ramulator.serving_requests_1_0_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3
|
||||
ramulator.average_serving_requests_1_0_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3
|
||||
ramulator.active_cycles_1_0_3_0 0 # Total active cycles for level _1_0_3_0
|
||||
ramulator.busy_cycles_1_0_3_0 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_0
|
||||
ramulator.serving_requests_1_0_3_0 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_0
|
||||
ramulator.average_serving_requests_1_0_3_0 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_0
|
||||
ramulator.active_cycles_1_0_3_1 0 # Total active cycles for level _1_0_3_1
|
||||
ramulator.busy_cycles_1_0_3_1 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_1
|
||||
ramulator.serving_requests_1_0_3_1 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_1
|
||||
ramulator.average_serving_requests_1_0_3_1 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_1
|
||||
ramulator.active_cycles_1_0_3_2 0 # Total active cycles for level _1_0_3_2
|
||||
ramulator.busy_cycles_1_0_3_2 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_2
|
||||
ramulator.serving_requests_1_0_3_2 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_2
|
||||
ramulator.average_serving_requests_1_0_3_2 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_2
|
||||
ramulator.active_cycles_1_0_3_3 0 # Total active cycles for level _1_0_3_3
|
||||
ramulator.busy_cycles_1_0_3_3 0 # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_3_3
|
||||
ramulator.serving_requests_1_0_3_3 0 # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_3
|
||||
ramulator.average_serving_requests_1_0_3_3 0.000000 # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_3_3
|
||||
ramulator.read_transaction_bytes_1 3584 # The total byte of read transaction per channel
|
||||
ramulator.write_transaction_bytes_1 14848 # The total byte of write transaction per channel
|
||||
ramulator.row_hits_channel_1_core 276 # Number of row hits per channel per core
|
||||
ramulator.row_misses_channel_1_core 2 # Number of row misses per channel per core
|
||||
ramulator.row_conflicts_channel_1_core 10 # Number of row conflicts per channel per core
|
||||
ramulator.read_row_hits_channel_1_core 49 # Number of row hits for read requests per channel per core
|
||||
[0] 49.0 #
|
||||
ramulator.read_row_misses_channel_1_core 1 # Number of row misses for read requests per channel per core
|
||||
[0] 1.0 #
|
||||
ramulator.read_row_conflicts_channel_1_core 6 # Number of row conflicts for read requests per channel per core
|
||||
[0] 6.0 #
|
||||
ramulator.write_row_hits_channel_1_core 227 # Number of row hits for write requests per channel per core
|
||||
[0] 227.0 #
|
||||
ramulator.write_row_misses_channel_1_core 1 # Number of row misses for write requests per channel per core
|
||||
[0] 1.0 #
|
||||
ramulator.write_row_conflicts_channel_1_core 4 # Number of row conflicts for write requests per channel per core
|
||||
[0] 4.0 #
|
||||
ramulator.useless_activates_1_core 0 # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
|
||||
ramulator.read_latency_avg_1 34.642857 # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
|
||||
ramulator.read_latency_sum_1 1940 # The memory latency cycles (in memory time domain) sum for all read requests in this channel
|
||||
ramulator.req_queue_length_avg_1 0.524830 # Average of read and write queue length per memory cycle per channel.
|
||||
ramulator.req_queue_length_sum_1 6024 # Sum of read and write queue length per memory cycle per channel.
|
||||
ramulator.read_req_queue_length_avg_1 0.159261 # Read queue length average per memory cycle per channel.
|
||||
ramulator.read_req_queue_length_sum_1 1828 # Read queue length sum per memory cycle per channel.
|
||||
ramulator.write_req_queue_length_avg_1 0.365569 # Write queue length average per memory cycle per channel.
|
||||
ramulator.write_req_queue_length_sum_1 4196 # Write queue length sum per memory cycle per channel.
|
||||
ramulator.record_read_hits 0.0 # record read hit count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_read_misses 0.0 # record_read_miss count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_read_conflicts 0.0 # record read conflict count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_hits 0.0 # record write hit count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_misses 0.0 # record write miss count for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_conflicts 0.0 # record write conflict for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.dram_capacity 8589934592 # Number of bytes in simulated DRAM
|
||||
ramulator.dram_cycles 11478 # Number of DRAM cycles simulated
|
||||
ramulator.incoming_requests 596 # Number of incoming requests to DRAM
|
||||
ramulator.read_requests 114 # Number of incoming read requests to DRAM per core
|
||||
[0] 114.0 #
|
||||
ramulator.write_requests 482 # Number of incoming write requests to DRAM per core
|
||||
[0] 482.0 #
|
||||
ramulator.ramulator_active_cycles 2049 # The total number of cycles that the DRAM part is active (serving R/W)
|
||||
ramulator.incoming_requests_per_channel 596.0 # Number of incoming requests to each DRAM channel
|
||||
[0] 308.0 #
|
||||
[1] 288.0 #
|
||||
ramulator.incoming_read_reqs_per_channel 114.0 # Number of incoming read requests to each DRAM channel
|
||||
[0] 58.0 #
|
||||
[1] 56.0 #
|
||||
ramulator.physical_page_replacement 0 # The number of times that physical page replacement happens.
|
||||
ramulator.maximum_bandwidth 38400000000 # The theoretical maximum bandwidth (Bps)
|
||||
ramulator.in_queue_req_num_sum 17427 # Sum of read/write queue length
|
||||
ramulator.in_queue_read_req_num_sum 4134 # Sum of read queue length
|
||||
ramulator.in_queue_write_req_num_sum 13293 # Sum of write queue length
|
||||
ramulator.in_queue_req_num_avg 1.518296 # Average of read/write queue length per memory cycle
|
||||
ramulator.in_queue_read_req_num_avg 0.360167 # Average of read queue length per memory cycle
|
||||
ramulator.in_queue_write_req_num_avg 1.158129 # Average of write queue length per memory cycle
|
||||
ramulator.record_read_requests 0.0 # record read requests for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
ramulator.record_write_requests 0.0 # record write requests for this core when it reaches request limit or to the end
|
||||
[0] 0.0 #
|
||||
BIN
tests/regression/relu/relu
Executable file
BIN
tests/regression/relu/relu
Executable file
Binary file not shown.
@@ -1,9 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
ifeq ($(XLEN),32)
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv64-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf-
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
ifeq ($(XLEN),32)
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv64-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf-
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
ifeq ($(XLEN),32)
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH = /opt/riscv64-gnu-toolchain
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf-
|
||||
|
||||
219
vortex-rtlsim.rc
Normal file
219
vortex-rtlsim.rc
Normal file
@@ -0,0 +1,219 @@
|
||||
Magic 271485
|
||||
Revision Verdi_S-2021.09-SP1-1
|
||||
|
||||
; Window Layout <x> <y> <width> <height> <signalwidth> <valuewidth>
|
||||
viewPort 0 33 3840 1560 374 148
|
||||
|
||||
; File list:
|
||||
; openDirFile [-d delimiter] [-s time_offset] [-rf auto_bus_rule_file] path_name file_name
|
||||
openDirFile -d / "" "/scratch/hansung/src/vortex/trace.vcd.fsdb"
|
||||
|
||||
; file time scale:
|
||||
; fileTimeScale ### s|ms|us|ns|ps
|
||||
|
||||
; signal spacing:
|
||||
signalSpacing 5
|
||||
|
||||
; windowTimeUnit is used for zoom, cursor & marker
|
||||
; waveform viewport range
|
||||
zoom 75133.753950 75225.192159
|
||||
cursor 75155.000000
|
||||
marker 0.000000
|
||||
|
||||
; user define markers
|
||||
; userMarker time_pos marker_name color linestyle
|
||||
; visible top row signal index
|
||||
top 42
|
||||
; marker line index
|
||||
markerPos 78
|
||||
|
||||
; event list
|
||||
; addEvent event_name event_expression
|
||||
; curEvent event_name
|
||||
|
||||
|
||||
|
||||
COMPLEX_EVENT_BEGIN
|
||||
|
||||
|
||||
COMPLEX_EVENT_END
|
||||
|
||||
|
||||
|
||||
; toolbar current search type
|
||||
; curSTATUS search_type
|
||||
curSTATUS ByValue
|
||||
|
||||
|
||||
addGroup "G1"
|
||||
activeDirFile "" "/scratch/hansung/src/vortex/trace.vcd.fsdb"
|
||||
addSignal -h 30 /TOP/clk
|
||||
addSignal -h 30 -holdScope reset
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/dcache_req_valid[3:0]
|
||||
addSignal -h 30 -holdScope dcache_rsp_ready
|
||||
addSubGroup "Issue"
|
||||
addSubGroup "Ibuffer"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/ibuffer/ibuffer_if/PC[31:0]
|
||||
addSignal -h 30 -holdScope rs1[5:0]
|
||||
addSignal -h 30 -holdScope wid[1:0]
|
||||
endSubGroup "Ibuffer"
|
||||
addSubGroup "gpr_rsp_if"
|
||||
addSignal -h 30 -UNSIGNED -HEX /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/dispatch/gpr_rsp_if/\rs1_data[0] [31:0]
|
||||
addSignal -h 30 -holdScope \rs1_data[1] [31:0]
|
||||
addSignal -h 30 -holdScope \rs1_data[2] [31:0]
|
||||
addSignal -h 30 -holdScope \rs1_data[3] [31:0]
|
||||
endSubGroup "gpr_rsp_if"
|
||||
addSubGroup "Dispatch"
|
||||
endSubGroup "Dispatch"
|
||||
endSubGroup "Issue"
|
||||
addSubGroup "Execute"
|
||||
addSubGroup "LSU"
|
||||
addSubGroup "lsu_req_if" -e FALSE
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_req_if/ready
|
||||
addSignal -h 30 -holdScope valid
|
||||
addSignal -h 30 -UNSIGNED -HEX -holdScope PC[31:0]
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/lsu_req_if/\base_addr[0] [31:0]
|
||||
addSignal -h 30 -holdScope \base_addr[1] [31:0]
|
||||
addSignal -h 30 -holdScope \base_addr[2] [31:0]
|
||||
addSignal -h 30 -holdScope \base_addr[3] [31:0]
|
||||
addSignal -h 30 -holdScope offset[31:0]
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_req_if/tmask[3:0]
|
||||
addSignal -h 30 -holdScope op_type[3:0]
|
||||
endSubGroup "lsu_req_if"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/req_valid
|
||||
addSignal -h 30 -holdScope req_pc[31:0]
|
||||
addSignal -h 30 -holdScope dcache_req_ready
|
||||
addSignal -h 30 -holdScope req_sent_mask[3:0]
|
||||
endSubGroup "LSU"
|
||||
addSubGroup "dcache_req_if"
|
||||
addSignal -h 30 -UNSIGNED -HEX /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/dcache_req_if/ready[3:0]
|
||||
addSignal -h 30 -holdScope valid[3:0]
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/dcache_req_if/\addr[0] [29:0]
|
||||
addSignal -h 30 -holdScope \addr[1] [29:0]
|
||||
addSignal -h 30 -holdScope \addr[2] [29:0]
|
||||
addSignal -h 30 -holdScope \addr[3] [29:0]
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/\per_bank_core_req_addr[0] [25:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_addr[1] [25:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_addr[2] [25:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_addr[3] [25:0]
|
||||
endSubGroup "dcache_req_if"
|
||||
addSubGroup "dcache_rsp_if"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/dcache_rsp_if/ready
|
||||
addSignal -h 30 -holdScope valid
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/dcache_rsp_if/tag[48:0]
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/dcache_rsp_if/tmask[3:0]
|
||||
endSubGroup "dcache_rsp_if"
|
||||
addSubGroup "alu_req_if" -e FALSE
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/alu_req_if/PC[31:0]
|
||||
addSignal -h 30 -holdScope tmask[3:0]
|
||||
addSignal -h 30 -holdScope ready
|
||||
addSignal -h 30 -holdScope valid
|
||||
endSubGroup "alu_req_if"
|
||||
endSubGroup "Execute"
|
||||
addSubGroup "Decode" -e FALSE
|
||||
addSignal -h 30 -UNSIGNED -HEX /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/decode/decode_if/tmask[3:0]
|
||||
endSubGroup "Decode"
|
||||
addGroup "L1 Dcache"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_ready[3:0]
|
||||
addSignal -h 30 -UNSIGNED -HEX -holdScope core_req_valid[3:0]
|
||||
addSignal -h 30 -holdScope core_req_rw[3:0]
|
||||
addSignal -h 30 -holdScope \core_req_addr[0] [29:0]
|
||||
addSignal -h 30 -holdScope \core_req_addr[1] [29:0]
|
||||
addSignal -h 30 -holdScope \core_req_addr[2] [29:0]
|
||||
addSignal -h 30 -holdScope \core_req_addr[3] [29:0]
|
||||
addSubGroup "BankSel"
|
||||
addSignal -h 30 -UNSIGNED -HEX /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel/\core_req_bid[0] [1:0]
|
||||
addSignal -h 30 -holdScope \core_req_bid[1] [1:0]
|
||||
addSignal -h 30 -holdScope \core_req_bid[2] [1:0]
|
||||
addSignal -h 30 -holdScope \core_req_bid[3] [1:0]
|
||||
addSignal -h 30 -holdScope \core_req_line_addr[0] [25:0]
|
||||
addSignal -h 30 -holdScope \core_req_line_addr[1] [25:0]
|
||||
addSignal -h 30 -holdScope \core_req_line_addr[2] [25:0]
|
||||
addSignal -h 30 -holdScope \core_req_line_addr[3] [25:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_tid_r[0][0] [1:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_tid_r[1][0] [1:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_tid_r[2][0] [1:0]
|
||||
addSignal -h 30 -holdScope \per_bank_core_req_tid_r[3][0] [1:0]
|
||||
addSignal -h 30 -UNSIGNED -BIN -holdScope per_bank_core_req_valid[3:0]
|
||||
addSignal -h 30 -holdScope core_req_ready[3:0]
|
||||
endSubGroup "BankSel"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_rsp_valid[0:0]
|
||||
addSignal -h 30 -holdScope mem_req_valid
|
||||
addSignal -h 30 -holdScope mem_req_rw
|
||||
addSignal -h 30 -holdScope mem_rsp_valid
|
||||
addGroup "L2"
|
||||
addSignal -h 30 -UNSIGNED -HEX /TOP/Vortex/\genblk2[0] /cluster/genblk3/l2cache/core_req_ready[1:0]
|
||||
addSignal -h 30 -holdScope core_req_valid[1:0]
|
||||
addSignal -h 30 -holdScope mem_req_valid
|
||||
addGroup "DRAM"
|
||||
addSignal -h 30 /TOP/Vortex/\genblk2[0] /cluster/genblk3/l2cache/mem_req_valid
|
||||
addSignal -h 30 -holdScope mem_rsp_valid
|
||||
addGroup "G3"
|
||||
|
||||
; getSignalForm Scope Hierarchy Status
|
||||
; active file of getSignalForm
|
||||
activeDirFile "" "/scratch/hansung/src/vortex/trace.vcd.fsdb"
|
||||
|
||||
GETSIGNALFORM_SCOPE_HIERARCHY_BEGIN
|
||||
getSignalForm close
|
||||
|
||||
"/TOP"
|
||||
"/TOP/Vortex"
|
||||
"/TOP/Vortex/\genblk2[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/dcache_rsp_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/\genblk7[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/mem_req_arb"
|
||||
"/TOP/Vortex/genblk3"
|
||||
|
||||
SCOPE_LIST_BEGIN
|
||||
"/TOP"
|
||||
"/TOP/Vortex"
|
||||
"/TOP/Vortex/genblk3"
|
||||
"/TOP/Vortex/\genblk2[0]"
|
||||
"/TOP/Vortex/\genblk2[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/dcache_rsp_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/dcache_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/dcache_rsp_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/dcache_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache_mem_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/mem_req_arb"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/\genblk7[0] "
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/mem_req_arb/genblk1"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/lsu_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/execute/lsu_unit/lsu_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/dispatch"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/dispatch/ibuffer_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/dispatch/gpr_rsp_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/ibuffer"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/ibuffer_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/ibuffer/ibuffer_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/ibuffer/decode_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/gpr_rsp_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/pipeline/issue/gpr_req_if"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel/genblk5"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_rsp_merge"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel/genblk5/genblk1"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel/genblk5/genblk1/genblk1"
|
||||
"/TOP/Vortex/\genblk2[0] /cluster/\genblk2[0] /core/mem_unit/dcache/core_req_bank_sel/genblk5/genblk1/genblk1/unnamedblk7"
|
||||
SCOPE_LIST_END
|
||||
|
||||
GETSIGNALFORM_SCOPE_HIERARCHY_END
|
||||
|
||||
|
||||
Reference in New Issue
Block a user