non-cacheable memory address critical paths optimizations
This commit is contained in:
@@ -222,9 +222,9 @@ module VX_cluster #(
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`L2MEM_LINE_WIDTH),
|
||||
.DATA_WIDTH (`L2MEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
|
||||
@@ -54,11 +54,12 @@ module VX_lsu_unit #(
|
||||
assign word_addr[i] = full_addr[i][REQ_ASHIFT +: REQ_ADDRW];
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
wire [`NUM_THREADS-1:0] addr_matches;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign addr_matches[i] = (word_addr[0] == word_addr[i]) || ~lsu_req_if.tmask[i];
|
||||
end
|
||||
wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches);
|
||||
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
// is non-cacheable address
|
||||
@@ -84,8 +85,8 @@ module VX_lsu_unit #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_in),
|
||||
.data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
.data_in ({lsu_req_if.valid, lsu_is_dup, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
);
|
||||
|
||||
// Can accept new request?
|
||||
@@ -105,9 +106,9 @@ module VX_lsu_unit #(
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
wire req_ready_all;
|
||||
reg is_req_start;
|
||||
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
||||
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
|
||||
@@ -119,9 +120,9 @@ module VX_lsu_unit #(
|
||||
|
||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
wire mbuf_push = (| dcache_req_fire)
|
||||
&& (0 == req_sent_mask) // first submission only
|
||||
&& req_wb; // loads only
|
||||
wire mbuf_push = (| dcache_req_fire)
|
||||
&& is_req_start // first submission only
|
||||
&& req_wb; // loads only
|
||||
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
@@ -144,23 +145,27 @@ module VX_lsu_unit #(
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
assign req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask);
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
wire req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
|
||||
wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_sent_mask <= 0;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
if (req_ready_all)
|
||||
if (req_ready_all) begin
|
||||
req_sent_mask <= 0;
|
||||
else
|
||||
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
req_sent_mask <= req_sent_mask_n;
|
||||
is_req_start <= (0 == req_sent_mask_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire is_req_start = (0 == req_sent_mask);
|
||||
|
||||
// need to hold the acquired tag index until the full request is submitted
|
||||
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
|
||||
wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
|
||||
@@ -168,9 +173,7 @@ module VX_lsu_unit #(
|
||||
if (mbuf_push) begin
|
||||
req_tag_hold <= mbuf_waddr;
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
end
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
|
||||
|
||||
@@ -184,46 +187,42 @@ module VX_lsu_unit #(
|
||||
end
|
||||
|
||||
// ensure all dependencies for the requests are resolved
|
||||
wire req_dep_ready = (req_wb && (~mbuf_full || ~is_req_start))
|
||||
wire req_dep_ready = (req_wb && ~(mbuf_full && is_req_start))
|
||||
|| (~req_wb && st_commit_if.ready);
|
||||
|
||||
// DCache Request
|
||||
|
||||
reg [`NUM_THREADS-1:0][29:0] mem_req_addr;
|
||||
reg [`NUM_THREADS-1:0][3:0] mem_req_byteen;
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_req_data;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
mem_req_byteen[i] = {4{req_wb}};
|
||||
reg [3:0] mem_req_byteen;
|
||||
reg [31:0] mem_req_data;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen = {4{req_wb}};
|
||||
case (`LSU_WSIZE(req_type))
|
||||
0: mem_req_byteen[i][req_offset[i]] = 1;
|
||||
0: mem_req_byteen[req_offset[i]] = 1;
|
||||
1: begin
|
||||
mem_req_byteen[i][req_offset[i]] = 1;
|
||||
mem_req_byteen[i][{req_addr[i][1], 1'b1}] = 1;
|
||||
mem_req_byteen[req_offset[i]] = 1;
|
||||
mem_req_byteen[{req_addr[i][1], 1'b1}] = 1;
|
||||
end
|
||||
default : mem_req_byteen[i] = {4{1'b1}};
|
||||
default : mem_req_byteen = {4{1'b1}};
|
||||
endcase
|
||||
|
||||
mem_req_data[i] = 'x;
|
||||
mem_req_data = 'x;
|
||||
case (req_offset[i])
|
||||
1: mem_req_data[i][31:8] = req_data[i][23:0];
|
||||
2: mem_req_data[i][31:16] = req_data[i][15:0];
|
||||
3: mem_req_data[i][31:24] = req_data[i][7:0];
|
||||
default: mem_req_data[i] = req_data[i];
|
||||
1: mem_req_data[31:8] = req_data[i][23:0];
|
||||
2: mem_req_data[31:16] = req_data[i][15:0];
|
||||
3: mem_req_data[31:24] = req_data[i][7:0];
|
||||
default: mem_req_data = req_data[i];
|
||||
endcase
|
||||
|
||||
mem_req_addr[i] = req_addr[i][31:2];
|
||||
end
|
||||
end
|
||||
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_dep_ready}} & req_tmask_dup & ~req_sent_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
|
||||
assign dcache_req_if.addr = mem_req_addr;
|
||||
assign dcache_req_if.byteen = mem_req_byteen;
|
||||
assign dcache_req_if.data = mem_req_data;
|
||||
assign dcache_req_if.valid[i] = req_valid && req_dep_ready && req_tmask_dup[i] && !req_sent_mask[i];
|
||||
assign dcache_req_if.rw[i] = ~req_wb;
|
||||
assign dcache_req_if.addr[i] = req_addr[i][31:2];
|
||||
assign dcache_req_if.byteen[i] = mem_req_byteen;
|
||||
assign dcache_req_if.data[i] = mem_req_data;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]};
|
||||
`else
|
||||
@@ -252,22 +251,17 @@ module VX_lsu_unit #(
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||
|
||||
reg [31:0] rsp_data_shifted;
|
||||
always @(*) begin
|
||||
rsp_data_shifted[31:16] = src_data[31:16];
|
||||
rsp_data_shifted[15:0] = rsp_offset[i][1] ? src_data[31:16] : src_data[15:0];
|
||||
rsp_data_shifted[7:0] = rsp_offset[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
|
||||
end
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||
wire [15:0] rsp_data16 = rsp_offset[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`LSU_FMT(rsp_type))
|
||||
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0]));
|
||||
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0]));
|
||||
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0]));
|
||||
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0]));
|
||||
default: rsp_data[i] = rsp_data_shifted;
|
||||
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
default: rsp_data[i] = rsp_data32;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,14 +3,15 @@
|
||||
module VX_mem_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "R",
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -50,20 +51,21 @@ module VX_mem_arb #(
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
assign req_data_in_merged[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
.BUFFERED (BUFFERED_REQ),
|
||||
.TYPE (TYPE)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_merged_data_in),
|
||||
.data_in (req_data_in_merged),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
@@ -72,11 +74,11 @@ module VX_mem_arb #(
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
|
||||
end
|
||||
|
||||
VX_stream_demux #(
|
||||
@@ -91,7 +93,7 @@ module VX_mem_arb #(
|
||||
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_merged_data_out),
|
||||
.data_out (rsp_data_out_merged),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
|
||||
@@ -288,7 +288,6 @@ module VX_mem_unit # (
|
||||
.DATA_WIDTH (`DMEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (0)
|
||||
) mem_arb (
|
||||
|
||||
@@ -18,7 +18,7 @@ module VX_smem_arb (
|
||||
// output response
|
||||
VX_dcache_core_rsp_if core_rsp_if
|
||||
);
|
||||
localparam REQ_DATAW = 1 + `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH - 1;
|
||||
localparam REQ_DATAW = `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + (`DCORE_TAG_WIDTH-1);
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
@@ -26,71 +26,59 @@ module VX_smem_arb (
|
||||
//
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire cache_req_valid_out;
|
||||
wire cache_req_ready_out;
|
||||
wire is_smem_addr_out;
|
||||
|
||||
wire is_smem_addr_in = core_req_if.tag[i][0];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) out_buffer (
|
||||
wire [1:0][REQ_DATAW-1:0] req_data_out;
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (2),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (0)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_ready_out)
|
||||
);
|
||||
.sel (core_req_if.tag[i][0]),
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out ({smem_req_if.valid[i], cache_req_if.valid[i]}),
|
||||
.data_out (req_data_out),
|
||||
.ready_out ({smem_req_if.ready[i], cache_req_if.ready[i]})
|
||||
);
|
||||
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
|
||||
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
|
||||
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
|
||||
|
||||
assign smem_req_if.addr[i] = cache_req_if.addr[i];
|
||||
assign smem_req_if.rw[i] = cache_req_if.rw[i];
|
||||
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
|
||||
assign smem_req_if.data[i] = cache_req_if.data[i];
|
||||
assign smem_req_if.tag[i] = cache_req_if.tag[i];
|
||||
assign {cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]} = req_data_out[0];
|
||||
assign {smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]} = req_data_out[1];
|
||||
end
|
||||
|
||||
//
|
||||
// handle responses
|
||||
//
|
||||
|
||||
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [1:0] rsp_valid_in;
|
||||
wire [1:0] rsp_ready_in;
|
||||
|
||||
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [`NUM_THREADS-1:0] core_rsp_tmask;
|
||||
wire core_rsp_valid;
|
||||
wire [`NUM_THREADS-1:0] core_rsp_valid_tmask;
|
||||
|
||||
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
|
||||
assign rsp_valid_in[1] = (| smem_rsp_if.valid);
|
||||
|
||||
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}};
|
||||
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}};
|
||||
|
||||
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
|
||||
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in),
|
||||
.ready_in (rsp_ready_in),
|
||||
.ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}),
|
||||
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
|
||||
.ready_out (core_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign cache_rsp_if.ready = rsp_ready_in[0];
|
||||
assign smem_rsp_if.ready = rsp_ready_in[1];
|
||||
|
||||
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask;
|
||||
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_tmask;
|
||||
|
||||
endmodule
|
||||
@@ -225,8 +225,8 @@ module Vortex (
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3MEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (`L3MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L3MEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
|
||||
@@ -586,23 +586,23 @@ wire [AVS_REQ_TAGW:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (LMEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (AVS_REQ_TAGW),
|
||||
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (LMEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (AVS_REQ_TAGW),
|
||||
.TYPE ("X")
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({cci_mem_req_arb_valid, vx_mem_req_arb_valid}),
|
||||
.req_rw_in ({cci_mem_req_arb_rw, vx_mem_req_arb_rw}),
|
||||
.req_byteen_in ({cci_mem_req_arb_byteen, vx_mem_req_arb_byteen}),
|
||||
.req_addr_in ({cci_mem_req_arb_addr, vx_mem_req_arb_addr}),
|
||||
.req_data_in ({cci_mem_req_arb_data, vx_mem_req_arb_data}),
|
||||
.req_tag_in ({cci_mem_req_arb_tag, vx_mem_req_arb_tag}),
|
||||
.req_ready_in ({cci_mem_req_arb_ready, vx_mem_req_arb_ready}),
|
||||
.req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}),
|
||||
.req_rw_in ({vx_mem_req_arb_rw, cci_mem_req_arb_rw}),
|
||||
.req_byteen_in ({vx_mem_req_arb_byteen, cci_mem_req_arb_byteen}),
|
||||
.req_addr_in ({vx_mem_req_arb_addr, cci_mem_req_arb_addr}),
|
||||
.req_data_in ({vx_mem_req_arb_data, cci_mem_req_arb_data}),
|
||||
.req_tag_in ({vx_mem_req_arb_tag, cci_mem_req_arb_tag}),
|
||||
.req_ready_in ({vx_mem_req_arb_ready, cci_mem_req_arb_ready}),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_valid),
|
||||
@@ -614,10 +614,10 @@ VX_mem_arb #(
|
||||
.req_ready_out (mem_req_ready),
|
||||
|
||||
// Source response
|
||||
.rsp_valid_out ({cci_mem_rsp_arb_valid, vx_mem_rsp_arb_valid}),
|
||||
.rsp_data_out ({cci_mem_rsp_arb_data, vx_mem_rsp_arb_data}),
|
||||
.rsp_tag_out ({cci_mem_rsp_arb_tag, vx_mem_rsp_arb_tag}),
|
||||
.rsp_ready_out ({cci_mem_rsp_arb_ready, vx_mem_rsp_arb_ready}),
|
||||
.rsp_valid_out ({vx_mem_rsp_arb_valid, cci_mem_rsp_arb_valid}),
|
||||
.rsp_data_out ({vx_mem_rsp_arb_data, cci_mem_rsp_arb_data}),
|
||||
.rsp_tag_out ({vx_mem_rsp_arb_tag, cci_mem_rsp_arb_tag}),
|
||||
.rsp_ready_out ({vx_mem_rsp_arb_ready, cci_mem_rsp_arb_ready}),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_valid),
|
||||
|
||||
191
hw/rtl/cache/VX_nc_bypass.v
vendored
191
hw/rtl/cache/VX_nc_bypass.v
vendored
@@ -97,8 +97,7 @@ module VX_nc_bypass #(
|
||||
reg [NUM_REQS-1:0] core_req_ready_in_r;
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [CORE_REQ_TIDW-1:0] core_req_nc_tid;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid_in_nc[i] = core_req_valid_in[i] && core_req_tag_in[i][NC_TAG_BIT];
|
||||
end
|
||||
@@ -107,14 +106,45 @@ module VX_nc_bypass #(
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid_in_nc[i]) begin
|
||||
core_req_valid_out_r[i] = 0;
|
||||
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
|
||||
end else begin
|
||||
core_req_valid_out_r[i] = core_req_valid_in[i];
|
||||
core_req_ready_in_r[i] = core_req_ready_out[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid_in_nc[i]) begin
|
||||
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
|
||||
end else begin
|
||||
core_req_ready_in_r[i] = core_req_ready_out[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_tid)
|
||||
always @(*) begin
|
||||
if (core_req_valid_in_nc) begin
|
||||
core_req_ready_in_r = mem_req_ready_out;
|
||||
end else begin
|
||||
core_req_ready_in_r = core_req_ready_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign core_req_valid_out = core_req_valid_out_r;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
@@ -131,57 +161,92 @@ module VX_nc_bypass #(
|
||||
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
|
||||
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
|
||||
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
|
||||
reg mem_req_ready_in_r;
|
||||
|
||||
wire core_req_nc_valid;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
reg mem_req_ready_in_r;
|
||||
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_valid_out_r = 1;
|
||||
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
|
||||
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
|
||||
end
|
||||
mem_req_ready_in_r = 0;
|
||||
mem_req_valid_out_r = 1;
|
||||
mem_req_ready_in_r = 0;
|
||||
end else begin
|
||||
mem_req_valid_out_r = mem_req_valid_in;
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
mem_req_ready_in_r = mem_req_ready_out;
|
||||
mem_req_valid_out_r = mem_req_valid_in;
|
||||
mem_req_ready_in_r = mem_req_ready_out;
|
||||
end
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
|
||||
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
|
||||
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
|
||||
end
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
|
||||
mem_req_rw_out_r = core_req_rw_in;
|
||||
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
|
||||
end
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in;
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -201,26 +266,41 @@ module VX_nc_bypass #(
|
||||
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
|
||||
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
|
||||
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 0;
|
||||
core_rsp_valid_out_r[rsp_tid] = 1;
|
||||
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
end
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
if (NUM_RSP_TAGS > 1) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin
|
||||
core_rsp_valid_out_r[i] = 1;
|
||||
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
core_rsp_ready_in_r[i] = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r[i] = core_rsp_valid_in[i];
|
||||
core_rsp_tag_out_r[i] = core_rsp_tag_in[i];
|
||||
core_rsp_ready_in_r[i] = core_rsp_ready_out[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 0;
|
||||
core_rsp_valid_out_r[rsp_tid] = 1;
|
||||
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
end
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 1;
|
||||
@@ -276,6 +356,7 @@ module VX_nc_bypass #(
|
||||
end
|
||||
|
||||
if (NUM_RSP_TAGS > 1) begin
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
mem_rsp_ready_in_r = core_rsp_ready_out[rsp_tid];
|
||||
|
||||
@@ -18,6 +18,12 @@ module VX_priority_encoder #(
|
||||
assign index = 0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
end else if (N == 2) begin
|
||||
|
||||
assign onehot = {!data_in[REVERSE], data_in[REVERSE]};
|
||||
assign index = !data_in[REVERSE];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end else if (FAST) begin
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
@@ -27,7 +27,6 @@ module VX_stream_arbiter #(
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
|
||||
if (TYPE == "X") begin
|
||||
|
||||
VX_fixed_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
@@ -40,9 +39,7 @@ module VX_stream_arbiter #(
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end else if (TYPE == "R") begin
|
||||
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
@@ -55,9 +52,7 @@ module VX_stream_arbiter #(
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end else if (TYPE == "F") begin
|
||||
|
||||
VX_fair_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
@@ -70,9 +65,7 @@ module VX_stream_arbiter #(
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end else if (TYPE == "M") begin
|
||||
|
||||
VX_matrix_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
@@ -85,8 +78,9 @@ module VX_stream_arbiter #(
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end
|
||||
end else begin
|
||||
$error ("invalid parameter");
|
||||
end
|
||||
|
||||
wire ready_out_unqual;
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS
|
||||
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG8 := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG32 := -DNUM_CLUSTERS=8 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)
|
||||
|
||||
Reference in New Issue
Block a user