fixed fp_noncomp bug, ci toolchain script update, increased DRAM latency to 100 cycles

This commit is contained in:
Blaise Tine
2020-11-23 11:59:40 -08:00
parent e281d32138
commit 2d4fef6dd6
6 changed files with 50 additions and 28 deletions

View File

@@ -5,6 +5,8 @@ set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
DESTDIR="${DESTDIR:=/opt}"
riscv() riscv()
{ {
for x in {a..o} for x in {a..o}
@@ -14,7 +16,7 @@ riscv()
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2 cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
tar -xvf riscv-gnu-toolchain.tar.bz2 tar -xvf riscv-gnu-toolchain.tar.bz2
rm -f riscv-gnu-toolchain.tar.bz2* rm -f riscv-gnu-toolchain.tar.bz2*
sudo cp -r riscv-gnu-toolchain /opt/ cp -r riscv-gnu-toolchain $DESTDIR
rm -rf riscv-gnu-toolchain rm -rf riscv-gnu-toolchain
} }
@@ -27,7 +29,7 @@ llvm()
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2 cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
tar -xvf llvm-riscv.tar.bz2 tar -xvf llvm-riscv.tar.bz2
rm -f llvm-riscv.tar.bz2* rm -f llvm-riscv.tar.bz2*
sudo cp -r llvm-riscv /opt/ cp -r llvm-riscv $DESTDIR
rm -rf llvm-riscv rm -rf llvm-riscv
} }
@@ -36,7 +38,7 @@ pocl()
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2 wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
tar -xvf pocl.tar.bz2 tar -xvf pocl.tar.bz2
rm -f pocl.tar.bz2 rm -f pocl.tar.bz2
sudo cp -r pocl /opt/ cp -r pocl $DESTDIR
rm -rf pocl rm -rf pocl
} }
@@ -45,7 +47,7 @@ verilator()
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2 wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
tar -xvf verilator.tar.bz2 tar -xvf verilator.tar.bz2
rm -f verilator.tar.bz2 rm -f verilator.tar.bz2
sudo cp -r verilator /opt/ cp -r verilator $DESTDIR
rm -rf verilator rm -rf verilator
} }

View File

@@ -9,7 +9,7 @@
#define CCI_WQ_SIZE 16 #define CCI_WQ_SIZE 16
#define ENABLE_DRAM_STALLS #define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4 #define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16 #define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16 #define DRAM_STALLS_MODULO 16
@@ -261,14 +261,14 @@ void opae_sim::avs_bus() {
if (dram_rd_it != dram_reads_.end()) { if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1; vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag; uint32_t addr = dram_rd_it->addr;
dram_reads_.erase(dram_rd_it); dram_reads_.erase(dram_rd_it);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag); /*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) { for (auto& req : dram_reads_) {
if (req.cycles_left != 0) if (req.cycles_left != 0)
printf(" !%0x", req.tag); printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else else
printf(" %0x", req.tag); printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
} }
printf("}\n");*/ printf("}\n");*/
} }
@@ -300,18 +300,27 @@ void opae_sim::avs_bus() {
} }
if (vortex_afu_->avs_read) { if (vortex_afu_->avs_read) {
assert(0 == vortex_afu_->mem_bank_select); assert(0 == vortex_afu_->mem_bank_select);
dram_rd_req_t dram_req; dram_rd_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); dram_req.addr = vortex_afu_->avs_address;
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr; ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_reads_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}
dram_reads_.emplace_back(dram_req); dram_reads_.emplace_back(dram_req);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr); /*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) { for (auto& req : dram_reads_) {
if (req.cycles_left != 0) if (req.cycles_left != 0)
printf(" !%0x", req.tag); printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else else
printf(" %0x", req.tag); printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
} }
printf("}\n");*/ printf("}\n");*/
} }

View File

@@ -41,7 +41,7 @@ private:
typedef struct { typedef struct {
int cycles_left; int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block; std::array<uint8_t, CACHE_BLOCK_SIZE> block;
uint32_t tag; uint32_t addr;
} dram_rd_req_t; } dram_rd_req_t;
typedef struct { typedef struct {

View File

@@ -147,7 +147,7 @@ module VX_fp_noncomp #(
case (frm_r) // use LSB to distinguish MIN and MAX case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i]; 3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i]; 4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value default: fminmax_res[i] = 'x; // don't care value
endcase endcase
end end
end end
@@ -160,7 +160,7 @@ module VX_fp_noncomp #(
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]}; 0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]}; 1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]}; 2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value default: fsgnj_res[i] = 'x; // don't care value
endcase endcase
end end
end end
@@ -192,8 +192,8 @@ module VX_fp_noncomp #(
`FRM_RDN: begin `FRM_RDN: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
// ** FEQS only raise NV flag when either operand is signaling NaN // FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0}; fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
end end
else begin else begin
fcmp_res[i] = {31'h0, ab_equal[i]}; fcmp_res[i] = {31'h0, ab_equal[i]};
@@ -201,7 +201,7 @@ module VX_fp_noncomp #(
end end
end end
default: begin default: begin
fcmp_res[i] = 32'hdeadbeaf; // don't care value fcmp_res[i] = 'x; // don't care value
fcmp_excp[i] = 5'h0; fcmp_excp[i] = 5'h0;
end end
endcase endcase
@@ -226,7 +226,7 @@ module VX_fp_noncomp #(
end end
//`FPU_MISC: //`FPU_MISC:
default: begin default: begin
case (frm) case (frm_r)
0,1,2: begin 0,1,2: begin
tmp_result[i] = fsgnj_res[i]; tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;

View File

@@ -4,7 +4,7 @@
#include <iomanip> #include <iomanip>
#define ENABLE_DRAM_STALLS #define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4 #define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16 #define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16 #define DRAM_STALLS_MODULO 16
@@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() {
} }
} else { } else {
dram_req_t dram_req; dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.tag = vortex_->dram_req_tag; dram_req.tag = vortex_->dram_req_tag;
dram_req.addr = vortex_->dram_req_addr;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_rsp_vec_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}
dram_rsp_vec_.emplace_back(dram_req); dram_rsp_vec_.emplace_back(dram_req);
} }
} }

View File

@@ -51,7 +51,8 @@ private:
typedef struct { typedef struct {
int cycles_left; int cycles_left;
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block; std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
unsigned tag; uint32_t tag;
uint32_t addr;
} dram_req_t; } dram_req_t;
std::unordered_map<int, std::stringstream> print_bufs_; std::unordered_map<int, std::stringstream> print_bufs_;