adding support for multi-banks memory bus

This commit is contained in:
Blaise Tine
2021-05-04 07:32:03 -07:00
parent bdbf99c5b0
commit bde6a69ea0
11 changed files with 276 additions and 477 deletions

View File

@@ -137,16 +137,19 @@ void opae_sim::flush() {
void opae_sim::reset() {
host_buffers_.clear();
mem_reads_.clear();
host_buffers_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
vortex_afu_->avs_readdatavalid = 0;
vortex_afu_->avs_waitrequest = 0;
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
mem_reads_[b].clear();
vortex_afu_->avs_readdatavalid[b] = 0;
vortex_afu_->avs_waitrequest[b] = 0;
}
vortex_afu_->reset = 1;
@@ -268,79 +271,29 @@ void opae_sim::sTxPort_bus() {
}
void opae_sim::avs_bus() {
// update memory responses schedule
for (auto& rsp : mem_reads_) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_.end());
if (!mem_reads_.empty()
&& (0 == mem_reads_.begin()->cycles_left)) {
mem_rd_it = mem_reads_.begin();
}
// send memory response
vortex_afu_->avs_readdatavalid = 0;
if (mem_rd_it != mem_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_.erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
// update memory responses schedule
for (auto& rsp : mem_reads_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
printf("}\n");*/
}
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
if (vortex_afu_->avs_write) {
uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = vortex_afu_->avs_address * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
if (!mem_reads_[b].empty()
&& (0 == mem_reads_[b].begin()->cycles_left)) {
mem_rd_it = mem_reads_[b].begin();
}
if (vortex_afu_->avs_read) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_.emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
// send memory response
vortex_afu_->avs_readdatavalid[b] = 0;
if (mem_rd_it != mem_reads_[b].end()) {
vortex_afu_->avs_readdatavalid[b] = 1;
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_[b].erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
@@ -348,7 +301,59 @@ void opae_sim::avs_bus() {
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest = mem_stalled;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
if (vortex_afu_->avs_write[b]) {
uint64_t byteen = vortex_afu_->avs_byteenable[b];
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read[b]) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address[b];
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_[b]) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest[b] = mem_stalled;
}
}

View File

@@ -1,8 +1,7 @@
#pragma once
#include "verilated.h"
#include "verilated_stub.h"
//#include "verilated_stub.h"
#include "Vvortex_afu_shim.h"
#include "Vvortex_afu_shim__Syms.h"
@@ -20,7 +19,7 @@
#include <unordered_map>
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (Vvortex_afu_shim::VL_BITS_avs_writedata / 8)
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
@@ -83,7 +82,7 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::list<mem_rd_req_t> mem_reads_;
std::list<mem_rd_req_t> mem_reads_ [PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;

View File

@@ -1,126 +0,0 @@
#pragma once
#undef VL_ST_SIG8
#define VL_ST_SIG8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_ST_SIG16
#define VL_ST_SIG16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_ST_SIG64
#define VL_ST_SIG64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_ST_SIG
#define VL_ST_SIG(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_ST_SIGW
#define VL_ST_SIGW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_SIG8
#define VL_SIG8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_SIG16
#define VL_SIG16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_SIG64
#define VL_SIG64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_SIG
#define VL_SIG(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_SIGW
#define VL_SIGW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_IN8
#define VL_IN8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_IN16
#define VL_IN16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_IN64
#define VL_IN64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_IN
#define VL_IN(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_INW
#define VL_INW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_INOUT8
#define VL_INOUT8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_INOUT16
#define VL_INOUT16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_INOUT64
#define VL_INOUT64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_INOUT
#define VL_INOUT(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_INOUTW
#define VL_INOUTW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_OUT8
#define VL_OUT8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_OUT16
#define VL_OUT16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_OUT64
#define VL_OUT64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_OUT
#define VL_OUT(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_OUTW
#define VL_OUTW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]

View File

@@ -72,17 +72,15 @@ module vortex_afu_shim (
output t_ccip_mmioData af2cp_sTxPort_c2_data,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)-1:0] mem_bank_select
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
);
t_if_ccip_Rx cp2af_sRxPort;
@@ -103,8 +101,7 @@ vortex_afu #(
.avs_read(avs_read),
.avs_byteenable(avs_byteenable),
.avs_burstcount(avs_burstcount),
.avs_readdatavalid(avs_readdatavalid),
.mem_bank_select(mem_bank_select)
.avs_readdatavalid(avs_readdatavalid)
);
t_if_ccip_c0_RxHdr c0_RxHdr;