diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index 4f713358..b0391efd 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -11,7 +11,7 @@ #define RESET_DELAY 2 #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 300 +#define DRAM_LATENCY 24 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16 diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 34476b11..4cd04425 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -77,8 +77,7 @@ module VX_lsu_unit #( VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), - .RESETW (1), - .DEPTH (0) + .RESETW (1) ) req_pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 610935f7..fb20ad12 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -98,7 +98,7 @@ module VX_bank #( `endif wire drsq_pop; - wire drsq_empty; + wire drsq_empty, drsp_empty_next; wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata; @@ -108,11 +108,9 @@ module VX_bank #( wire drsq_full; assign dram_rsp_ready = !drsq_full; - VX_fifo_queue #( + VX_input_queue #( .DATAW ($bits(dram_rsp_data)), - .SIZE (DRSQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) + .SIZE (DRSQ_SIZE) ) dram_rsp_queue ( .clk (clk), .reset (reset), @@ -121,72 +119,98 @@ module VX_bank #( .data_in (dram_rsp_data), .data_out(drsq_filldata), .empty (drsq_empty), + `UNUSED_PIN (data_out_next), + .empty_next(drsp_empty_next), .full (drsq_full), `UNUSED_PIN (size) ); end else begin `UNUSED_VAR (dram_rsp_valid) `UNUSED_VAR (dram_rsp_data) - assign drsq_empty = 1; - assign drsq_filldata = 0; - assign dram_rsp_ready = 0; + assign drsq_empty = 1; + assign drsp_empty_next = 1; + assign drsq_filldata = 0; + assign dram_rsp_ready = 0; end - wire creq_pop; - wire creq_empty; + wire creq_pop; wire creq_full; - wire [`REQS_BITS-1:0] creq_tid_st0; - wire creq_rw_st0; - wire [WORD_SIZE-1:0] creq_byteen_st0; + wire creq_empty; + wire [`REQS_BITS-1:0] creq_tid_next; + wire creq_rw_next; + wire [WORD_SIZE-1:0] creq_byteen_next; `IGNORE_WARNINGS_BEGIN - wire [`WORD_ADDR_WIDTH-1:0] creq_addr_unqual; + wire [`WORD_ADDR_WIDTH-1:0] creq_addr_next_unqual; `IGNORE_WARNINGS_END - wire [`LINE_ADDR_WIDTH-1:0] creq_addr_st0; - wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel_st0; - wire [`WORD_WIDTH-1:0] creq_writeword_st0; - wire [CORE_TAG_WIDTH-1:0] creq_tag_st0; + wire [`LINE_ADDR_WIDTH-1:0] creq_addr_next; + wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel_next; + wire [`WORD_WIDTH-1:0] creq_writeword_next; + wire [CORE_TAG_WIDTH-1:0] creq_tag_next; wire creq_push = (| core_req_valid) && core_req_ready; assign core_req_ready = !creq_full; if (BANK_ADDR_OFFSET == 0) begin - assign creq_addr_st0 = `LINE_SELECT_ADDR0(creq_addr_unqual); + assign creq_addr_next = `LINE_SELECT_ADDR0(creq_addr_next_unqual); end else begin - assign creq_addr_st0 = `LINE_SELECT_ADDRX(creq_addr_unqual); - end + assign creq_addr_next = `LINE_SELECT_ADDRX(creq_addr_next_unqual); + end - assign creq_wsel_st0 = creq_addr_unqual[`UP(`WORD_SELECT_BITS)-1:0]; + if (`WORD_SELECT_BITS != 0) begin + assign creq_wsel_next = creq_addr_next_unqual[`WORD_SELECT_BITS-1:0]; + end else begin + assign creq_wsel_next = 0; + end - VX_fifo_queue #( + VX_input_queue #( .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), - .SIZE (CREQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) + .SIZE (CREQ_SIZE) ) core_req_queue ( .clk (clk), .reset (reset), .push (creq_push), .pop (creq_pop), .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), - .data_out({creq_tag_st0, creq_tid_st0, creq_rw_st0, creq_byteen_st0, creq_addr_unqual, creq_writeword_st0}), - .empty (creq_empty), + .data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}), + `UNUSED_PIN (empty_next), + `UNUSED_PIN (data_out), + .empty (creq_empty), .full (creq_full), `UNUSED_PIN (size) - ); + ); + + wire mshr_valid; + wire mshr_valid_next; + wire [`REQS_BITS-1:0] mshr_tid_next; + wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_next; + wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel_next; + wire [`WORD_WIDTH-1:0] mshr_writeword_next; + wire [`REQ_TAG_WIDTH-1:0] mshr_tag_next; + wire mshr_rw_next; + wire [WORD_SIZE-1:0] mshr_byteen_next; + + reg [`LINE_ADDR_WIDTH-1:0] creq_addr; + reg [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; + reg [`REQ_TAG_WIDTH-1:0] creq_tag; + reg creq_mem_rw; + reg [WORD_SIZE-1:0] creq_byteen; + reg [`WORD_WIDTH-1:0] creq_writeword; + reg [`REQS_BITS-1:0] creq_tid; + + always @(posedge clk) begin + creq_addr <= (mshr_valid_next || !drsp_empty_next) ? mshr_addr_next : creq_addr_next; + creq_wsel <= mshr_valid_next ? mshr_wsel_next : creq_wsel_next; + creq_mem_rw <= mshr_valid_next ? mshr_rw_next : creq_rw_next; + creq_byteen <= mshr_valid_next ? mshr_byteen_next : creq_byteen_next; + creq_writeword <= mshr_valid_next ? mshr_writeword_next : creq_writeword_next; + creq_tid <= mshr_valid_next ? mshr_tid_next : creq_tid_next; + creq_tag <= mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next); + end wire mshr_pop; reg [MSHR_SIZE_BITS-1:0] mshr_pending_size; wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n; - reg mshr_going_full; - - wire mshr_valid_st0; - wire [`REQS_BITS-1:0] mshr_tid_st0; - wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0; - wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel_st0; - wire [`WORD_WIDTH-1:0] mshr_writeword_st0; - wire [`REQ_TAG_WIDTH-1:0] mshr_tag_st0; - wire mshr_rw_st0; - wire [WORD_SIZE-1:0] mshr_byteen_st0; + reg mshr_going_full; wire mshr_pending_hazard_unqual_st0; wire valid_st0, valid_st1; @@ -233,7 +257,7 @@ module VX_bank #( && !pipeline_stall; // determine which queue to pop next in piority order - wire mshr_pop_unqual = mshr_valid_st0; + wire mshr_pop_unqual = mshr_valid; wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty; wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full; @@ -255,26 +279,18 @@ module VX_bank #( end end - assign is_mshr_st0 = mshr_pop_unqual; - assign is_fill_st0 = drsq_pop_unqual; - assign valid_st0 = mshr_pop || drsq_pop || creq_pop; - assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0; - assign tag_st0 = creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : `REQ_TAG_WIDTH'(mshr_tag_st0); - assign mem_rw_st0 = creq_pop_unqual ? creq_rw_st0 : mshr_rw_st0; - assign byteen_st0 = creq_pop_unqual ? creq_byteen_st0 : mshr_byteen_st0; - assign req_tid_st0 = creq_pop_unqual ? creq_tid_st0 : mshr_tid_st0; - assign writeword_st0 = creq_pop_unqual ? creq_writeword_st0 : mshr_writeword_st0; + assign is_mshr_st0 = mshr_pop_unqual; + assign is_fill_st0 = drsq_pop_unqual; + assign addr_st0 = creq_addr; + assign wsel_st0 = creq_wsel; + assign mem_rw_st0 = creq_mem_rw; + assign byteen_st0 = creq_byteen; + assign writeword_st0 = creq_writeword; + assign req_tid_st0 = creq_tid; + assign tag_st0 = creq_tag; assign filldata_st0 = drsq_filldata; - if (`WORD_SELECT_BITS != 0) begin - assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0; - end else begin - `UNUSED_VAR (creq_wsel_st0) - `UNUSED_VAR (mshr_wsel_st0) - assign wsel_st0 = 0; - end - `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin assign {debug_pc_st0, debug_wid_st0} = tag_st0[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; @@ -331,7 +347,7 @@ if (DRAM_ENABLE) begin // we have a miss in mshr for the current address wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || (valid_st1 && (miss_st1 || force_miss_st1) && (creq_addr_st0 == addr_st1)); + || (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1)); // force miss to ensure commit order when a new request has pending previous requests to same block assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0; @@ -461,7 +477,7 @@ end && !crsq_push_stall && !dreq_push_stall; - wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == mshr_addr_st0)); + wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == addr_st0)); if (DRAM_ENABLE) begin @@ -506,9 +522,12 @@ end // schedule .schedule (mshr_pop), - .schedule_valid (mshr_valid_st0), - .schedule_addr (mshr_addr_st0), - .schedule_data ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}), + .schedule_valid (mshr_valid), + .schedule_valid_next(mshr_valid_next), + .schedule_addr_next (mshr_addr_next), + .schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}), + `UNUSED_PIN (schedule_addr), + `UNUSED_PIN (schedule_data), // dequeue .dequeue (mshr_dequeue_st1) @@ -522,14 +541,15 @@ end `UNUSED_VAR (byteen_st1) `UNUSED_VAR (incoming_fill_st1) assign mshr_pending_hazard_unqual_st0 = 0; - assign mshr_valid_st0 = 0; - assign mshr_addr_st0 = 0; - assign mshr_wsel_st0 = 0; - assign mshr_writeword_st0 = 0; - assign mshr_tid_st0 = 0; - assign mshr_tag_st0 = 0; - assign mshr_rw_st0 = 0; - assign mshr_byteen_st0 = 0; + assign mshr_valid = 0; + assign mshr_valid_next = 0; + assign mshr_addr_next = 0; + assign mshr_wsel_next = 0; + assign mshr_writeword_next = 0; + assign mshr_tid_next = 0; + assign mshr_tag_next = 0; + assign mshr_rw_next = 0; + assign mshr_byteen_next = 0; end // Enqueue core response diff --git a/hw/rtl/cache/VX_input_queue.v b/hw/rtl/cache/VX_input_queue.v new file mode 100644 index 00000000..5f94f21f --- /dev/null +++ b/hw/rtl/cache/VX_input_queue.v @@ -0,0 +1,119 @@ +`include "VX_platform.vh" + +module VX_input_queue #( + parameter DATAW = 1, + parameter SIZE = 2, + parameter ADDRW = $clog2(SIZE), + parameter SIZEW = $clog2(SIZE+1) +) ( + input wire clk, + input wire reset, + input wire push, + input wire pop, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out, + output wire empty, + output wire [DATAW-1:0] data_out_next, + output wire empty_next, + output wire full, + output wire [SIZEW-1:0] size +); + wire [DATAW-1:0] dout; + reg [DATAW-1:0] dout_r, dout_n_r; + reg [ADDRW-1:0] wr_ptr_r; + reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n_r; + reg full_r; + reg empty_r, empty_n_r; + reg [ADDRW-1:0] used_r; + + always @(*) begin + empty_n_r = empty_r; + if (reset) begin + empty_n_r = 1; + end else begin + if (push && !pop) begin + empty_n_r = 0; + end + if (pop && !push) begin + if (used_r == ADDRW'(1)) begin + empty_n_r = 1; + end; + end + end + end + + always @(*) begin + dout_n_r = dout_r; + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + dout_n_r = data_in; + end else if (pop) begin + dout_n_r = dout; + end + end + + always @(posedge clk) begin + if (reset) begin + full_r <= 0; + used_r <= 0; + end else begin + assert(!push || !full); + assert(!pop || !empty_r); + if (push && !pop) begin + if (used_r == ADDRW'(SIZE-1)) begin + full_r <= 1; + end + end + if (pop && !push) begin + full_r <= 0; + end + used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop))); + end + empty_r <= empty_n_r; + dout_r <= dout_n_r; + end + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= 0; + rd_ptr_r <= 0; + rd_ptr_n_r <= 1; + end else begin + if (push) begin + wr_ptr_r <= wr_ptr_r + ADDRW'(1); + end + if (pop) begin + rd_ptr_r <= rd_ptr_n_r; + if (SIZE > 2) begin + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); + end else begin // (SIZE == 2); + rd_ptr_n_r <= ~rd_ptr_n_r; + end + end + end + end + + VX_dp_ram #( + .DATAW(DATAW), + .SIZE(SIZE), + .BUFFERED(0), + .RWCHECK(1), + .FASTRAM(1) + ) dp_ram ( + .clk(clk), + .waddr(wr_ptr_r), + .raddr(rd_ptr_n_r), + .wren(push), + .byteen(1'b1), + .rden(1'b1), + .din(data_in), + .dout(dout) + ); + + assign data_out = dout_r; + assign data_out_next = dout_n_r; + assign empty = empty_r; + assign empty_next = empty_n_r; + assign full = full_r; + assign size = {full_r, used_r}; + +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 8c9c1057..1d238b52 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -48,6 +48,9 @@ module VX_miss_resrv #( output wire schedule_valid, output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr, output wire [`MSHR_DATA_WIDTH-1:0] schedule_data, + output wire schedule_valid_next, + output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr_next, + output wire [`MSHR_DATA_WIDTH-1:0] schedule_data_next, // dequeue input wire dequeue @@ -56,14 +59,15 @@ module VX_miss_resrv #( reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table; - reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, schedule_n_ptr, restore_ptr; + reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, schedule_n_ptr; + reg [`LOG2UP(MSHR_SIZE)-1:0] restore_ptr; reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr; reg [`LOG2UP(MSHR_SIZE)-1:0] used_r; reg full_r; - reg [`MSHR_DATA_WIDTH-1:0] dout_r; - reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r; - reg schedule_valid_r; + reg schedule_valid_r, schedule_valid_n_r; + reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r, schedule_addr_n_r; + reg [`MSHR_DATA_WIDTH-1:0] dout_r, dout_n_r; wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin @@ -86,7 +90,14 @@ module VX_miss_resrv #( restore_ptr <= 0; head_ptr <= 0; tail_ptr <= 0; - end else begin + end else begin + + // WARNING: lookup should happen enqueue for ready_table's correct update + if (lookup_ready) begin + // unlock pending requests for scheduling + ready_table <= ready_table | valid_address_match; + end + if (enqueue) begin if (enqueue_is_mshr) begin // restore schedule, returning missed msrq entry @@ -109,13 +120,9 @@ module VX_miss_resrv #( valid_table[head_ptr] <= 0; end - if (lookup_ready) begin - ready_table <= ready_table | valid_address_match; - end - if (schedule) begin // schedule next entry - assert(schedule_valid); + assert(schedule_valid_r); valid_table[schedule_ptr] <= 0; ready_table[schedule_ptr] <= 0; @@ -153,6 +160,31 @@ module VX_miss_resrv #( .dout(dout) ); + always @(*) begin + schedule_valid_n_r = schedule_valid_r; + if (reset) begin + schedule_valid_n_r = 0; + end else begin + if (lookup_ready) begin + schedule_valid_n_r = 1; + end else if (schedule) begin + schedule_valid_n_r = ready_table[schedule_n_ptr]; + end + end + end + + always @(*) begin + schedule_addr_n_r = schedule_addr_r; + dout_n_r = dout_r; + if ((push_new && (used_r == 0 || (used_r == 1 && schedule))) || restore) begin + schedule_addr_n_r = enqueue_addr; + dout_n_r = enqueue_data; + end else if (schedule) begin + schedule_addr_n_r = addr_table[schedule_n_ptr]; + dout_n_r = dout; + end + end + always @(posedge clk) begin if (reset) begin used_r <= 0; @@ -161,36 +193,20 @@ module VX_miss_resrv #( used_r <= used_r + $bits(used_r)'($signed(2'(enqueue) - 2'(schedule))); full_r <= (used_r == $bits(used_r)'(MSHR_SIZE-1)) && enqueue; end - end - - always @(posedge clk) begin - if (reset) begin - schedule_valid_r <= 0; - end else begin - if (lookup_ready) begin - schedule_valid_r <= 1; - end else if (schedule) begin - schedule_valid_r <= ready_table[schedule_n_ptr]; - end - end - end - - always @(posedge clk) begin - if ((push_new && (used_r == 0 || (used_r == 1 && schedule))) - || restore) begin - schedule_addr_r <= enqueue_addr; - dout_r <= enqueue_data; - end else if (schedule) begin - schedule_addr_r <= addr_table[schedule_n_ptr]; - dout_r <= dout; - end + schedule_valid_r <= schedule_valid_n_r; + schedule_addr_r <= schedule_addr_n_r; + dout_r <= dout_n_r; end assign schedule_valid = schedule_valid_r; assign schedule_addr = schedule_addr_r; assign schedule_data = dout_r; -`ifdef DBG_PRINT_CACHE_MSHR + assign schedule_valid_next = schedule_valid_n_r; + assign schedule_addr_next = schedule_addr_n_r; + assign schedule_data_next = dout_n_r; + +/*`ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin if (lookup_ready || schedule || enqueue || dequeue) begin if (schedule) @@ -215,6 +231,6 @@ module VX_miss_resrv #( $write("\n"); end end -`endif +`endif*/ endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_fifo_queue.v b/hw/rtl/libs/VX_fifo_queue.v index fabef871..305379fb 100644 --- a/hw/rtl/libs/VX_fifo_queue.v +++ b/hw/rtl/libs/VX_fifo_queue.v @@ -159,7 +159,7 @@ module VX_fifo_queue #( if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin dout_r <= data_in; end else if (pop) begin - dout_r <= dout; // BRAM R/W collision + dout_r <= dout; end end diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 21197e16..d16aa406 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -6,7 +6,7 @@ #define RESET_DELAY 2 #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 300 +#define DRAM_LATENCY 24 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16