diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index d62319a5..98a52ce6 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -137,16 +137,19 @@ void opae_sim::flush() { void opae_sim::reset() { - host_buffers_.clear(); - mem_reads_.clear(); + host_buffers_.clear(); cci_reads_.clear(); cci_writes_.clear(); vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0; vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0; - vortex_afu_->avs_readdatavalid = 0; - vortex_afu_->avs_waitrequest = 0; + + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + mem_reads_[b].clear(); + vortex_afu_->avs_readdatavalid[b] = 0; + vortex_afu_->avs_waitrequest[b] = 0; + } vortex_afu_->reset = 1; @@ -268,79 +271,29 @@ void opae_sim::sTxPort_bus() { } void opae_sim::avs_bus() { - // update memory responses schedule - for (auto& rsp : mem_reads_) { - if (rsp.cycles_left > 0) - rsp.cycles_left -= 1; - } - - // schedule memory responses in FIFO order - std::list::iterator mem_rd_it(mem_reads_.end()); - if (!mem_reads_.empty() - && (0 == mem_reads_.begin()->cycles_left)) { - mem_rd_it = mem_reads_.begin(); - } - - // send memory response - vortex_afu_->avs_readdatavalid = 0; - if (mem_rd_it != mem_reads_.end()) { - vortex_afu_->avs_readdatavalid = 1; - memcpy(vortex_afu_->avs_readdata, mem_rd_it->data.data(), MEM_BLOCK_SIZE); - uint32_t addr = mem_rd_it->addr; - mem_reads_.erase(mem_rd_it); - /*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE); - for (auto& req : mem_reads_) { - if (req.cycles_left != 0) - printf(" !%0x", req.addr * MEM_BLOCK_SIZE); - else - printf(" %0x", req.addr * MEM_BLOCK_SIZE); + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + // update memory responses schedule + for (auto& rsp : mem_reads_[b]) { + if (rsp.cycles_left > 0) + rsp.cycles_left -= 1; } - printf("}\n");*/ - } - // handle memory stalls - bool mem_stalled = false; -#ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_reads_.size() >= MEM_RQ_SIZE) { - mem_stalled = true; - } -#endif - - // process memory requests - if (!mem_stalled) { - assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write); - if (vortex_afu_->avs_write) { - uint64_t byteen = vortex_afu_->avs_byteenable; - unsigned base_addr = vortex_afu_->avs_address * MEM_BLOCK_SIZE; - uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - ram_[base_addr + i] = data[i]; - } - } - /*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]); - } - printf("\n");*/ + // schedule memory responses in FIFO order + std::list::iterator mem_rd_it(mem_reads_[b].end()); + if (!mem_reads_[b].empty() + && (0 == mem_reads_[b].begin()->cycles_left)) { + mem_rd_it = mem_reads_[b].begin(); } - if (vortex_afu_->avs_read) { - mem_rd_req_t mem_req; - mem_req.addr = vortex_afu_->avs_address; - ram_.read(vortex_afu_->avs_address * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data()); - mem_req.cycles_left = MEM_LATENCY; - for (auto& rsp : mem_reads_) { - if (mem_req.addr == rsp.addr) { - mem_req.cycles_left = rsp.cycles_left; - break; - } - } - mem_reads_.emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE); - for (auto& req : mem_reads_) { + + // send memory response + vortex_afu_->avs_readdatavalid[b] = 0; + if (mem_rd_it != mem_reads_[b].end()) { + vortex_afu_->avs_readdatavalid[b] = 1; + memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE); + uint32_t addr = mem_rd_it->addr; + mem_reads_[b].erase(mem_rd_it); + /*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE); + for (auto& req : mem_reads_[b]) { if (req.cycles_left != 0) printf(" !%0x", req.addr * MEM_BLOCK_SIZE); else @@ -348,7 +301,59 @@ void opae_sim::avs_bus() { } printf("}\n");*/ } - } - vortex_afu_->avs_waitrequest = mem_stalled; + // handle memory stalls + bool mem_stalled = false; + #ifdef ENABLE_MEM_STALLS + if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { + mem_stalled = true; + } else + if (mem_reads_[b].size() >= MEM_RQ_SIZE) { + mem_stalled = true; + } + #endif + + // process memory requests + if (!mem_stalled) { + assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]); + if (vortex_afu_->avs_write[b]) { + uint64_t byteen = vortex_afu_->avs_byteenable[b]; + unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE; + uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + ram_[base_addr + i] = data[i]; + } + } + /*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n");*/ + } + if (vortex_afu_->avs_read[b]) { + mem_rd_req_t mem_req; + mem_req.addr = vortex_afu_->avs_address[b]; + ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data()); + mem_req.cycles_left = MEM_LATENCY; + for (auto& rsp : mem_reads_[b]) { + if (mem_req.addr == rsp.addr) { + mem_req.cycles_left = rsp.cycles_left; + break; + } + } + mem_reads_[b].emplace_back(mem_req); + /*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE); + for (auto& req : mem_reads_[b]) { + if (req.cycles_left != 0) + printf(" !%0x", req.addr * MEM_BLOCK_SIZE); + else + printf(" %0x", req.addr * MEM_BLOCK_SIZE); + } + printf("}\n");*/ + } + } + + vortex_afu_->avs_waitrequest[b] = mem_stalled; + } } \ No newline at end of file diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 1d9bef54..e8ecd4a3 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -1,8 +1,7 @@ #pragma once #include "verilated.h" -#include "verilated_stub.h" - +//#include "verilated_stub.h" #include "Vvortex_afu_shim.h" #include "Vvortex_afu_shim__Syms.h" @@ -20,7 +19,7 @@ #include #undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (Vvortex_afu_shim::VL_BITS_avs_writedata / 8) +#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) #define CACHE_BLOCK_SIZE 64 @@ -83,7 +82,7 @@ private: std::unordered_map host_buffers_; - std::list mem_reads_; + std::list mem_reads_ [PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; std::list cci_reads_; diff --git a/driver/opae/vlsim/verilated_stub.h b/driver/opae/vlsim/verilated_stub.h deleted file mode 100644 index ad79adac..00000000 --- a/driver/opae/vlsim/verilated_stub.h +++ /dev/null @@ -1,126 +0,0 @@ -#pragma once - -#undef VL_ST_SIG8 -#define VL_ST_SIG8(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - CData name - -#undef VL_ST_SIG16 -#define VL_ST_SIG16(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - SData name - -#undef VL_ST_SIG64 -#define VL_ST_SIG64(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - QData name - -#undef VL_ST_SIG -#define VL_ST_SIG(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - IData name - -#undef VL_ST_SIGW -#define VL_ST_SIGW(name, msb, lsb, words) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - WData name[words] - -#undef VL_SIG8 -#define VL_SIG8(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - CData name - -#undef VL_SIG16 -#define VL_SIG16(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - SData name - -#undef VL_SIG64 -#define VL_SIG64(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - QData name - -#undef VL_SIG -#define VL_SIG(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - IData name - -#undef VL_SIGW -#define VL_SIGW(name, msb, lsb, words) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - WData name[words] - -#undef VL_IN8 -#define VL_IN8(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - CData name - -#undef VL_IN16 -#define VL_IN16(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - SData name - -#undef VL_IN64 -#define VL_IN64(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - QData name - -#undef VL_IN -#define VL_IN(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - IData name - -#undef VL_INW -#define VL_INW(name, msb, lsb, words) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - WData name[words] - -#undef VL_INOUT8 -#define VL_INOUT8(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - CData name - -#undef VL_INOUT16 -#define VL_INOUT16(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - SData name - -#undef VL_INOUT64 -#define VL_INOUT64(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - QData name - -#undef VL_INOUT -#define VL_INOUT(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - IData name - -#undef VL_INOUTW -#define VL_INOUTW(name, msb, lsb, words) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - WData name[words] - -#undef VL_OUT8 -#define VL_OUT8(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - CData name - -#undef VL_OUT16 -#define VL_OUT16(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - SData name - -#undef VL_OUT64 -#define VL_OUT64(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - QData name - -#undef VL_OUT -#define VL_OUT(name, msb, lsb) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - IData name - -#undef VL_OUTW -#define VL_OUTW(name, msb, lsb, words) \ - enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \ - WData name[words] diff --git a/driver/opae/vlsim/vortex_afu_shim.sv b/driver/opae/vlsim/vortex_afu_shim.sv index ed7537c5..9255bfaa 100644 --- a/driver/opae/vlsim/vortex_afu_shim.sv +++ b/driver/opae/vlsim/vortex_afu_shim.sv @@ -72,17 +72,15 @@ module vortex_afu_shim ( output t_ccip_mmioData af2cp_sTxPort_c2_data, // Avalon signals for local memory access - output t_local_mem_data avs_writedata, - input t_local_mem_data avs_readdata, - output t_local_mem_addr avs_address, - input logic avs_waitrequest, - output logic avs_write, - output logic avs_read, - output t_local_mem_byte_mask avs_byteenable, - output t_local_mem_burst_cnt avs_burstcount, - input avs_readdatavalid, - - output logic [$clog2(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)-1:0] mem_bank_select + output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], + input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS] ); t_if_ccip_Rx cp2af_sRxPort; @@ -103,8 +101,7 @@ vortex_afu #( .avs_read(avs_read), .avs_byteenable(avs_byteenable), .avs_burstcount(avs_burstcount), - .avs_readdatavalid(avs_readdatavalid), - .mem_bank_select(mem_bank_select) + .avs_readdatavalid(avs_readdatavalid) ); t_if_ccip_c0_RxHdr c0_RxHdr; diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index ff7d1dd1..81dc78bc 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -1,6 +1,7 @@ `include "VX_define.vh" module VX_avs_wrapper #( + parameter NUM_BANKS = 1, parameter AVS_DATA_WIDTH = 1, parameter AVS_ADDR_WIDTH = 1, parameter AVS_BURST_WIDTH = 1, @@ -31,103 +32,141 @@ module VX_avs_wrapper #( input wire mem_rsp_ready, // AVS bus - output wire [AVS_DATA_WIDTH-1:0] avs_writedata, - input wire [AVS_DATA_WIDTH-1:0] avs_readdata, - output wire [AVS_ADDR_WIDTH-1:0] avs_address, - input wire avs_waitrequest, - output wire avs_write, - output wire avs_read, - output wire [AVS_BYTEENW-1:0] avs_byteenable, - output wire [AVS_BURST_WIDTH-1:0] avs_burstcount, - input avs_readdatavalid, - output wire [AVS_BANKS_BITS-1:0] avs_bankselect + output wire [AVS_DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], + input wire [AVS_DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], + output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + input wire avs_waitrequest [NUM_BANKS], + output wire avs_write [NUM_BANKS], + output wire avs_read [NUM_BANKS], + output wire [AVS_BYTEENW-1:0] avs_byteenable [NUM_BANKS], + output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS], + input avs_readdatavalid [NUM_BANKS] ); - reg [AVS_BANKS_BITS-1:0] avs_bankselect_r; - reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r; - wire avs_reqq_push = mem_req_valid && mem_req_ready && !mem_req_rw; - wire avs_reqq_pop = mem_rsp_valid && mem_rsp_ready; + localparam BANK_ADDRW = $clog2(NUM_BANKS); - wire avs_rspq_push = avs_readdatavalid; - wire avs_rspq_pop = avs_reqq_pop; - wire avs_rspq_empty; + // Requests handling - wire rsp_queue_going_full; - wire [RD_QUEUE_ADDR_WIDTH-1:0] rsp_queue_size; - VX_pending_size #( - .SIZE (RD_QUEUE_SIZE) - ) pending_size ( - .clk (clk), - .reset (reset), - .push (avs_reqq_push), - .pop (avs_rspq_pop), - `UNUSED_PIN (empty), - .full (rsp_queue_going_full), - .size (rsp_queue_size) - ); - `UNUSED_VAR (rsp_queue_size) - - always @(posedge clk) begin - avs_burstcount_r <= 1; - avs_bankselect_r <= 0; - end + reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r; - VX_fifo_queue #( - .DATAW (REQ_TAG_WIDTH), - .SIZE (RD_QUEUE_SIZE) - ) rd_req_queue ( - .clk (clk), - .reset (reset), - .push (avs_reqq_push), - .pop (avs_reqq_pop), - .data_in (mem_req_tag), - .data_out (mem_rsp_tag), - `UNUSED_PIN (empty), - `UNUSED_PIN (full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (size) + wire [NUM_BANKS-1:0] avs_reqq_pop; + wire [NUM_BANKS-1:0] req_queue_going_full; + wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size; + wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_data_out; + + wire [BANK_ADDRW-1:0] req_bank_sel = mem_req_addr [BANK_ADDRW-1:0]; + + wire avs_reqq_push = mem_req_valid && !mem_req_rw && mem_req_ready; + + for (genvar i = 0; i < NUM_BANKS; i++) begin + + VX_pending_size #( + .SIZE (RD_QUEUE_SIZE) + ) pending_size ( + .clk (clk), + .reset (reset), + .push (avs_reqq_push && (req_bank_sel == i)), + .pop (avs_reqq_pop[i]), + `UNUSED_PIN (empty), + .full (req_queue_going_full[i]), + .size (req_queue_size[i]) + ); + `UNUSED_VAR (req_queue_size) + + always @(posedge clk) begin + avs_burstcount_r <= 1; + end + + VX_fifo_queue #( + .DATAW (REQ_TAG_WIDTH), + .SIZE (RD_QUEUE_SIZE) + ) rd_req_queue ( + .clk (clk), + .reset (reset), + .push (avs_reqq_push && (req_bank_sel == i)), + .pop (avs_reqq_pop[i]), + .data_in (mem_req_tag), + .data_out (avs_reqq_data_out[i]), + `UNUSED_PIN (empty), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) + ); + end + + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i); + assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i); + assign avs_address[i] = mem_req_addr; + assign avs_byteenable[i] = mem_req_byteen; + assign avs_writedata[i] = mem_req_data; + assign avs_burstcount[i] = avs_burstcount_r; + end + + assign mem_req_ready = !(avs_waitrequest[req_bank_sel] || req_queue_going_full[req_bank_sel]); + + // Responses handling + + wire [NUM_BANKS-1:0] rsp_arb_valid_in; + wire [NUM_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in; + wire [NUM_BANKS-1:0] rsp_arb_ready_in; + + wire [NUM_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out; + wire [NUM_BANKS-1:0] avs_rspq_empty; + + for (genvar i = 0; i < NUM_BANKS; i++) begin + + VX_fifo_queue #( + .DATAW (AVS_DATA_WIDTH), + .SIZE (RD_QUEUE_SIZE) + ) rd_rsp_queue ( + .clk (clk), + .reset (reset), + .push (avs_readdatavalid[i]), + .pop (avs_reqq_pop[i]), + .data_in (avs_readdata[i]), + .data_out (avs_rspq_data_out[i]), + .empty (avs_rspq_empty[i]), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) + ); + + end + + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign rsp_arb_valid_in[i] = !avs_rspq_empty[i]; + assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_data_out[i]}; + assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i]; + end + + VX_stream_arbiter #( + .NUM_REQS (NUM_BANKS), + .DATAW (AVS_DATA_WIDTH+REQ_TAG_WIDTH), + .BUFFERED (0) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (rsp_arb_valid_in), + .data_in (rsp_arb_data_in), + .ready_in (rsp_arb_ready_in), + .valid_out (mem_rsp_valid), + .data_out ({mem_rsp_data, mem_rsp_tag}), + .ready_out (mem_rsp_ready) ); - VX_fifo_queue #( - .DATAW (AVS_DATA_WIDTH), - .SIZE (RD_QUEUE_SIZE) - ) rd_rsp_queue ( - .clk (clk), - .reset (reset), - .push (avs_rspq_push), - .pop (avs_rspq_pop), - .data_in (avs_readdata), - .data_out (mem_rsp_data), - .empty (avs_rspq_empty), - `UNUSED_PIN (full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (size) - ); - - assign avs_read = mem_req_valid && !mem_req_rw && !rsp_queue_going_full; - assign avs_write = mem_req_valid && mem_req_rw && !rsp_queue_going_full; - assign avs_address = mem_req_addr; - assign avs_byteenable = mem_req_byteen; - assign avs_writedata = mem_req_data; - assign avs_burstcount = avs_burstcount_r; - assign avs_bankselect = avs_bankselect_r; - - assign mem_req_ready = !avs_waitrequest && !rsp_queue_going_full; - - assign mem_rsp_valid = !avs_rspq_empty; - `ifdef DBG_PRINT_AVS always @(posedge clk) begin if (mem_req_valid && mem_req_ready) begin if (mem_req_rw) $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data); else - $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, rsp_queue_size); + $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size); end if (mem_rsp_valid && mem_rsp_ready) begin - $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, rsp_queue_size); + $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, req_queue_size); end end `endif diff --git a/hw/rtl/afu/ccip_std_afu.sv b/hw/rtl/afu/ccip_std_afu.sv index 4534d40c..2adea591 100644 --- a/hw/rtl/afu/ccip_std_afu.sv +++ b/hw/rtl/afu/ccip_std_afu.sv @@ -77,30 +77,28 @@ module ccip_std_afu #( // User AFU goes here // ==================================================================== - // - // vortex_afu depends on CCI-P and local memory being in the same - // clock domain. This is accomplished by choosing a common clock - // in the AFU's JSON description. The platform instantiates clock- - // crossing shims automatically, as needed. - // + t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS]; + logic avs_waitrequest [NUM_LOCAL_MEM_BANKS]; + t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS]; + logic avs_readdatavalid [NUM_LOCAL_MEM_BANKS]; + t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS]; + t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS]; + t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS]; + logic avs_write [NUM_LOCAL_MEM_BANKS]; + logic avs_read [NUM_LOCAL_MEM_BANKS]; - // - // Memory banks are used very simply here. Only bank is active at - // a time, selected by mem_bank_select. mem_bank_select is set - // by a CSR from the host. - // - t_local_mem_byte_mask avs_byteenable; - logic avs_waitrequest; - t_local_mem_data avs_readdata; - logic avs_readdatavalid; - t_local_mem_burst_cnt avs_burstcount; - t_local_mem_data avs_writedata; - t_local_mem_addr avs_address; - logic avs_write; - logic avs_read; - - // choose which memory bank to test - logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select; + for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin + assign local_mem[b].burstcount = avs_burstcount[b]; + assign local_mem[b].writedata = avs_writedata[b]; + assign local_mem[b].address = avs_address[b]; + assign local_mem[b].byteenable = avs_byteenable[b]; + assign local_mem[b].write = avs_write[b]; + assign local_mem[b].read = avs_read[b]; + + assign avs_waitrequest[b] = local_mem[b].waitrequest; + assign avs_readdata[b] = local_mem[b].readdata; + assign avs_readdatavalid[b] = local_mem[b].readdatavalid; + end vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) @@ -108,6 +106,9 @@ module ccip_std_afu #( .clk (clk), .reset (reset_T1), + .cp2af_sRxPort (cp2af_sRx_T1), + .af2cp_sTxPort (af2cp_sTx_T0), + .avs_writedata (avs_writedata), .avs_readdata (avs_readdata), .avs_address (avs_address), @@ -116,52 +117,7 @@ module ccip_std_afu #( .avs_read (avs_read), .avs_byteenable (avs_byteenable), .avs_burstcount (avs_burstcount), - .avs_readdatavalid (avs_readdatavalid), - .mem_bank_select (mem_bank_select), - - .cp2af_sRxPort (cp2af_sRx_T1), - .af2cp_sTxPort (af2cp_sTx_T0) - ); - - // - // Export the local memory interface signals as vectors so that bank - // selection can use array syntax. - // - logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS]; - t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS]; - logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS]; - - genvar b; - generate - for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1) - begin : lmb - always_comb - begin - // Local memory to AFU signals - avs_waitrequest_v[b] = local_mem[b].waitrequest; - avs_readdata_v[b] = local_mem[b].readdata; - avs_readdatavalid_v[b] = local_mem[b].readdatavalid; - - // Replicate address and write data to all banks. Only - // the request signals have to be bank-specific. - local_mem[b].burstcount = avs_burstcount; - local_mem[b].writedata = avs_writedata; - local_mem[b].address = avs_address; - local_mem[b].byteenable = avs_byteenable; - - // Request a write to this bank? - local_mem[b].write = avs_write && - ($bits(mem_bank_select)'(b) == mem_bank_select); - - // Request a read from this bank? - local_mem[b].read = avs_read && - ($bits(mem_bank_select)'(b) == mem_bank_select); - end - end - endgenerate - - assign avs_waitrequest = avs_waitrequest_v[mem_bank_select]; - assign avs_readdata = avs_readdata_v[mem_bank_select]; - assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select]; + .avs_readdatavalid (avs_readdatavalid) + ); endmodule diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 392c775d..2ac0f075 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -26,17 +26,15 @@ module vortex_afu #( output t_if_ccip_Tx af2cp_sTxPort, // Avalon signals for local memory access - output t_local_mem_data avs_writedata, - input t_local_mem_data avs_readdata, - output t_local_mem_addr avs_address, - input logic avs_waitrequest, - output logic avs_write, - output logic avs_read, - output t_local_mem_byte_mask avs_byteenable, - output t_local_mem_burst_cnt avs_burstcount, - input avs_readdatavalid, - - output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select + output t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS], + input t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS], + output t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS], + input logic avs_waitrequest [NUM_LOCAL_MEM_BANKS], + output logic avs_write [NUM_LOCAL_MEM_BANKS], + output logic avs_read [NUM_LOCAL_MEM_BANKS], + output t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS], + output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS], + input avs_readdatavalid [NUM_LOCAL_MEM_BANKS] ); localparam RESET_DELAY = 3; @@ -636,6 +634,7 @@ VX_mem_arb #( //-- VX_avs_wrapper #( + .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .AVS_DATA_WIDTH (LMEM_LINE_WIDTH), .AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH), .AVS_BURST_WIDTH (LMEM_BURST_CTRW), @@ -670,8 +669,7 @@ VX_avs_wrapper #( .avs_read (avs_read), .avs_byteenable (avs_byteenable), .avs_burstcount (avs_burstcount), - .avs_readdatavalid(avs_readdatavalid), - .avs_bankselect (mem_bank_select) + .avs_readdatavalid(avs_readdatavalid) ); // CCI-P Read Request /////////////////////////////////////////////////////////// diff --git a/hw/rtl/tex_unit/VX_tex_mgr.v b/hw/rtl/tex_unit/VX_tex_mgr.v deleted file mode 100644 index a7de9180..00000000 --- a/hw/rtl/tex_unit/VX_tex_mgr.v +++ /dev/null @@ -1,19 +0,0 @@ -`include "VX_platform.vh" - -module VX_tex_mgr ( - input wire clk, - input wire reset -); - - //-- - -endmodule - - - - - - - - - diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v deleted file mode 100644 index a7c38cfe..00000000 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ /dev/null @@ -1,50 +0,0 @@ -`include "VX_platform.vh" - -module VX_tex_unit #( - parameter TADDRW = 32, - parameter MADDRW = 32, - parameter DATAW = 32, - parameter MAXWTW = 8, - parameter MAXHTW = 8, - parameter MAXFTW = 2, - parameter MAXFMW = 1, - parameter MAXAMW = 2, - parameter TAGW = 16, - - parameter NUMCRQS = 32 -) ( - input wire clk, - input wire reset, - - // Texture Request - input wire tex_req_valid, - input wire [TADDRW-1:0] tex_req_u, - input wire [TADDRW-1:0] tex_req_v, - input wire [MADDRW-1:0] tex_req_addr, - input wire [MAXWTW-1:0] tex_req_width, - input wire [MAXHTW-1:0] tex_req_height, - input wire [MAXFTW-1:0] tex_req_format, - input wire [MAXFMW-1:0] tex_req_filter, - input wire [MAXAMW-1:0] tex_req_clamp, - input wire [TAGW-1:0] tex_req_tag, - output wire tex_req_ready, - - // Texture Response - output wire tex_rsp_valid, - output wire [TAGW-1:0] tex_rsp_tag, - input wire [DATAW-1:0] tex_rsp_data, - input wire tex_rsp_ready, - - // Cache Request - output wire [NUMCRQS-1:0] cache_req_valids, - output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs, - input wire cache_req_ready, - - // Cache Response - input wire cache_rsp_valid, - input wire [MADDRW-1:0] cache_rsp_addr, - input wire [DATAW-1:0] cache_rsp_data, - output wire cache_rsp_ready -); - -endmodule \ No newline at end of file diff --git a/hw/syn/opae/Makefile b/hw/syn/opae/Makefile index 38f21fba..d820df9a 100644 --- a/hw/syn/opae/Makefile +++ b/hw/syn/opae/Makefile @@ -1,6 +1,6 @@ -ASE_BUILD_DIR ?= build_ase -FPGA_BUILD_DIR ?= build_fpga DEVICE_FAMILY ?= arria10 +ASE_BUILD_DIR ?= build_ase_$(DEVICE_FAMILY) +FPGA_BUILD_DIR ?= build_fpga_$(DEVICE_FAMILY) RTL_DIR=../../rtl ifeq ($(shell which qsub-synth),) diff --git a/hw/syn/quartus/Makefile b/hw/syn/quartus/Makefile index 4dd40a40..fa002563 100644 --- a/hw/syn/quartus/Makefile +++ b/hw/syn/quartus/Makefile @@ -3,18 +3,18 @@ BUILDIR ?= build .PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64 unittest: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p unittest/$(BUILDIR) + cp core/Makefile unittest/$(BUILDIR) $(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest//$(BUILDIR)build.log 2>&1 & pipeline: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p pipeline/$(BUILDIR) + cp core/Makefile pipeline/$(BUILDIR) $(MAKE) -C pipeline/$(BUILDIR) clean && $(MAKE) -C pipeline/$(BUILDIR) > pipeline/$(BUILDIR)/build.log 2>&1 & cache: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p cache/$(BUILDIR) + cp core/Makefile cache/$(BUILDIR) $(MAKE) -C cache/$(BUILDIR) clean && $(MAKE) -C cache/$(BUILDIR) > cache/$(BUILDIR)/build.log 2>&1 & core: @@ -23,41 +23,41 @@ core: $(MAKE) -C core/$(BUILDIR) clean && $(MAKE) -C core/$(BUILDIR) > core/$(BUILDIR)/build.log 2>&1 & vortex: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p vortex/$(BUILDIR) + cp core/Makefile vortex/$(BUILDIR) $(MAKE) -C vortex/$(BUILDIR) clean && $(MAKE) -C vortex/$(BUILDIR) > vortex/$(BUILDIR)/build.log 2>&1 & top1: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top1/$(BUILDIR) + cp core/Makefile top1/$(BUILDIR) $(MAKE) -C top1/$(BUILDIR) clean && $(MAKE) -C top1/$(BUILDIR) > top1/$(BUILDIR)/build.log 2>&1 & top2: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top2/$(BUILDIR) + cp core/Makefile top2/$(BUILDIR) $(MAKE) -C top2/$(BUILDIR) clean && $(MAKE) -C top2/$(BUILDIR) > top2/$(BUILDIR)/build.log 2>&1 & top4: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top4/$(BUILDIR) + cp core/Makefile top4/$(BUILDIR) $(MAKE) -C top4/$(BUILDIR) clean && $(MAKE) -C top4/$(BUILDIR) > top4/$(BUILDIR)/build.log 2>&1 & top8: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top8/$(BUILDIR) + cp core/Makefile top8/$(BUILDIR) $(MAKE) -C top8/$(BUILDIR) clean && $(MAKE) -C top8/$(BUILDIR) > top8/$(BUILDIR)/build.log 2>&1 & top16: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top16/$(BUILDIR) + cp core/Makefile top16/$(BUILDIR) $(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)build.log 2>&1 & top32: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top32/$(BUILDIR) + cp core/Makefile top32/$(BUILDIR) $(MAKE) -C top32/$(BUILDIR) clean && $(MAKE) -C top32/$(BUILDIR) > top32/$(BUILDIR)/build.log 2>&1 & top64: - mkdir -p core/$(BUILDIR) - cp core/Makefile core/$(BUILDIR) + mkdir -p top64/$(BUILDIR) + cp core/Makefile top64/$(BUILDIR) $(MAKE) -C top64/$(BUILDIR) clean && $(MAKE) -C top64/$(BUILDIR) > top64/$(BUILDIR)/build.log 2>&1 & \ No newline at end of file