fixed fp_noncomp bug, ci toolchain script update, increased DRAM latency to 100 cycles
This commit is contained in:
@@ -5,6 +5,8 @@ set -e
|
|||||||
|
|
||||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||||
|
|
||||||
|
DESTDIR="${DESTDIR:=/opt}"
|
||||||
|
|
||||||
riscv()
|
riscv()
|
||||||
{
|
{
|
||||||
for x in {a..o}
|
for x in {a..o}
|
||||||
@@ -14,7 +16,7 @@ riscv()
|
|||||||
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
|
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
|
||||||
tar -xvf riscv-gnu-toolchain.tar.bz2
|
tar -xvf riscv-gnu-toolchain.tar.bz2
|
||||||
rm -f riscv-gnu-toolchain.tar.bz2*
|
rm -f riscv-gnu-toolchain.tar.bz2*
|
||||||
sudo cp -r riscv-gnu-toolchain /opt/
|
cp -r riscv-gnu-toolchain $DESTDIR
|
||||||
rm -rf riscv-gnu-toolchain
|
rm -rf riscv-gnu-toolchain
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,7 +29,7 @@ llvm()
|
|||||||
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
|
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
|
||||||
tar -xvf llvm-riscv.tar.bz2
|
tar -xvf llvm-riscv.tar.bz2
|
||||||
rm -f llvm-riscv.tar.bz2*
|
rm -f llvm-riscv.tar.bz2*
|
||||||
sudo cp -r llvm-riscv /opt/
|
cp -r llvm-riscv $DESTDIR
|
||||||
rm -rf llvm-riscv
|
rm -rf llvm-riscv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -36,7 +38,7 @@ pocl()
|
|||||||
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
|
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
|
||||||
tar -xvf pocl.tar.bz2
|
tar -xvf pocl.tar.bz2
|
||||||
rm -f pocl.tar.bz2
|
rm -f pocl.tar.bz2
|
||||||
sudo cp -r pocl /opt/
|
cp -r pocl $DESTDIR
|
||||||
rm -rf pocl
|
rm -rf pocl
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,7 +47,7 @@ verilator()
|
|||||||
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
|
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
|
||||||
tar -xvf verilator.tar.bz2
|
tar -xvf verilator.tar.bz2
|
||||||
rm -f verilator.tar.bz2
|
rm -f verilator.tar.bz2
|
||||||
sudo cp -r verilator /opt/
|
cp -r verilator $DESTDIR
|
||||||
rm -rf verilator
|
rm -rf verilator
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
#define CCI_WQ_SIZE 16
|
#define CCI_WQ_SIZE 16
|
||||||
|
|
||||||
#define ENABLE_DRAM_STALLS
|
#define ENABLE_DRAM_STALLS
|
||||||
#define DRAM_LATENCY 4
|
#define DRAM_LATENCY 100
|
||||||
#define DRAM_RQ_SIZE 16
|
#define DRAM_RQ_SIZE 16
|
||||||
#define DRAM_STALLS_MODULO 16
|
#define DRAM_STALLS_MODULO 16
|
||||||
|
|
||||||
@@ -261,14 +261,14 @@ void opae_sim::avs_bus() {
|
|||||||
if (dram_rd_it != dram_reads_.end()) {
|
if (dram_rd_it != dram_reads_.end()) {
|
||||||
vortex_afu_->avs_readdatavalid = 1;
|
vortex_afu_->avs_readdatavalid = 1;
|
||||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||||
uint32_t tag = dram_rd_it->tag;
|
uint32_t addr = dram_rd_it->addr;
|
||||||
dram_reads_.erase(dram_rd_it);
|
dram_reads_.erase(dram_rd_it);
|
||||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
|
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
|
||||||
for (auto& req : dram_reads_) {
|
for (auto& req : dram_reads_) {
|
||||||
if (req.cycles_left != 0)
|
if (req.cycles_left != 0)
|
||||||
printf(" !%0x", req.tag);
|
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||||
else
|
else
|
||||||
printf(" %0x", req.tag);
|
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||||
}
|
}
|
||||||
printf("}\n");*/
|
printf("}\n");*/
|
||||||
}
|
}
|
||||||
@@ -300,18 +300,27 @@ void opae_sim::avs_bus() {
|
|||||||
}
|
}
|
||||||
if (vortex_afu_->avs_read) {
|
if (vortex_afu_->avs_read) {
|
||||||
assert(0 == vortex_afu_->mem_bank_select);
|
assert(0 == vortex_afu_->mem_bank_select);
|
||||||
dram_rd_req_t dram_req;
|
dram_rd_req_t dram_req;
|
||||||
dram_req.cycles_left = DRAM_LATENCY;
|
|
||||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
dram_req.addr = vortex_afu_->avs_address;
|
||||||
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
|
|
||||||
dram_req.tag = base_addr;
|
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||||
|
|
||||||
|
dram_req.cycles_left = DRAM_LATENCY;
|
||||||
|
for (auto& req : dram_reads_) {
|
||||||
|
if (req.addr == dram_req.addr) {
|
||||||
|
dram_req.cycles_left = req.cycles_left;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dram_reads_.emplace_back(dram_req);
|
dram_reads_.emplace_back(dram_req);
|
||||||
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
|
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
|
||||||
for (auto& req : dram_reads_) {
|
for (auto& req : dram_reads_) {
|
||||||
if (req.cycles_left != 0)
|
if (req.cycles_left != 0)
|
||||||
printf(" !%0x", req.tag);
|
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||||
else
|
else
|
||||||
printf(" %0x", req.tag);
|
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||||
}
|
}
|
||||||
printf("}\n");*/
|
printf("}\n");*/
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ private:
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
int cycles_left;
|
int cycles_left;
|
||||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||||
uint32_t tag;
|
uint32_t addr;
|
||||||
} dram_rd_req_t;
|
} dram_rd_req_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ module VX_fp_noncomp #(
|
|||||||
case (frm_r) // use LSB to distinguish MIN and MAX
|
case (frm_r) // use LSB to distinguish MIN and MAX
|
||||||
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
||||||
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
||||||
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
default: fminmax_res[i] = 'x; // don't care value
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -160,7 +160,7 @@ module VX_fp_noncomp #(
|
|||||||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
default: fsgnj_res[i] = 'x; // don't care value
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -192,8 +192,8 @@ module VX_fp_noncomp #(
|
|||||||
`FRM_RDN: begin
|
`FRM_RDN: begin
|
||||||
if (a_type[i].is_nan || b_type[i].is_nan) begin
|
if (a_type[i].is_nan || b_type[i].is_nan) begin
|
||||||
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
|
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
|
||||||
// ** FEQS only raise NV flag when either operand is signaling NaN
|
// FEQS only raise NV flag when either operand is signaling NaN
|
||||||
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
|
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
|
||||||
end
|
end
|
||||||
else begin
|
else begin
|
||||||
fcmp_res[i] = {31'h0, ab_equal[i]};
|
fcmp_res[i] = {31'h0, ab_equal[i]};
|
||||||
@@ -201,7 +201,7 @@ module VX_fp_noncomp #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
default: begin
|
default: begin
|
||||||
fcmp_res[i] = 32'hdeadbeaf; // don't care value
|
fcmp_res[i] = 'x; // don't care value
|
||||||
fcmp_excp[i] = 5'h0;
|
fcmp_excp[i] = 5'h0;
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
@@ -226,7 +226,7 @@ module VX_fp_noncomp #(
|
|||||||
end
|
end
|
||||||
//`FPU_MISC:
|
//`FPU_MISC:
|
||||||
default: begin
|
default: begin
|
||||||
case (frm)
|
case (frm_r)
|
||||||
0,1,2: begin
|
0,1,2: begin
|
||||||
tmp_result[i] = fsgnj_res[i];
|
tmp_result[i] = fsgnj_res[i];
|
||||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
#define ENABLE_DRAM_STALLS
|
#define ENABLE_DRAM_STALLS
|
||||||
#define DRAM_LATENCY 4
|
#define DRAM_LATENCY 100
|
||||||
#define DRAM_RQ_SIZE 16
|
#define DRAM_RQ_SIZE 16
|
||||||
#define DRAM_STALLS_MODULO 16
|
#define DRAM_STALLS_MODULO 16
|
||||||
|
|
||||||
@@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dram_req_t dram_req;
|
dram_req_t dram_req;
|
||||||
dram_req.cycles_left = DRAM_LATENCY;
|
|
||||||
dram_req.tag = vortex_->dram_req_tag;
|
dram_req.tag = vortex_->dram_req_tag;
|
||||||
|
dram_req.addr = vortex_->dram_req_addr;
|
||||||
|
|
||||||
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
||||||
|
|
||||||
|
dram_req.cycles_left = DRAM_LATENCY;
|
||||||
|
for (auto& req : dram_rsp_vec_) {
|
||||||
|
if (req.addr == dram_req.addr) {
|
||||||
|
dram_req.cycles_left = req.cycles_left;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
dram_rsp_vec_.emplace_back(dram_req);
|
dram_rsp_vec_.emplace_back(dram_req);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,8 @@ private:
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
int cycles_left;
|
int cycles_left;
|
||||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||||
unsigned tag;
|
uint32_t tag;
|
||||||
|
uint32_t addr;
|
||||||
} dram_req_t;
|
} dram_req_t;
|
||||||
|
|
||||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||||
|
|||||||
Reference in New Issue
Block a user