non-cacheable memory address critical paths optimizations

This commit is contained in:
Blaise Tine
2021-06-10 12:47:18 -07:00
parent 41069ba188
commit adf033b0aa
11 changed files with 258 additions and 194 deletions

View File

@@ -222,9 +222,9 @@ module VX_cluster #(
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2MEM_LINE_WIDTH),
.DATA_WIDTH (`L2MEM_LINE_WIDTH),
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2MEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (

View File

@@ -54,11 +54,12 @@ module VX_lsu_unit #(
assign word_addr[i] = full_addr[i][REQ_ASHIFT +: REQ_ADDRW];
end
// detect duplicate addresses
wire [`NUM_THREADS-1:0] addr_matches;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign addr_matches[i] = (word_addr[0] == word_addr[i]) || ~lsu_req_if.tmask[i];
end
wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches);
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
// is non-cacheable address
@@ -84,8 +85,8 @@ module VX_lsu_unit #(
.clk (clk),
.reset (reset),
.enable (!stall_in),
.data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
.data_in ({lsu_req_if.valid, lsu_is_dup, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
);
// Can accept new request?
@@ -105,9 +106,9 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`NUM_THREADS-1:0] req_sent_mask;
wire req_ready_all;
reg is_req_start;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
@@ -119,9 +120,9 @@ module VX_lsu_unit #(
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire mbuf_push = (| dcache_req_fire)
&& (0 == req_sent_mask) // first submission only
&& req_wb; // loads only
wire mbuf_push = (| dcache_req_fire)
&& is_req_start // first submission only
&& req_wb; // loads only
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
@@ -144,23 +145,27 @@ module VX_lsu_unit #(
`UNUSED_PIN (empty)
);
assign req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask);
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
wire req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire;
always @(posedge clk) begin
if (reset) begin
req_sent_mask <= 0;
is_req_start <= 1;
end else begin
if (req_ready_all)
if (req_ready_all) begin
req_sent_mask <= 0;
else
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
is_req_start <= 1;
end else begin
req_sent_mask <= req_sent_mask_n;
is_req_start <= (0 == req_sent_mask_n);
end
end
end
wire is_req_start = (0 == req_sent_mask);
// need to hold the acquired tag index until the full request is submitted
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
@@ -168,9 +173,7 @@ module VX_lsu_unit #(
if (mbuf_push) begin
req_tag_hold <= mbuf_waddr;
end
end
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
end
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
@@ -184,46 +187,42 @@ module VX_lsu_unit #(
end
// ensure all dependencies for the requests are resolved
wire req_dep_ready = (req_wb && (~mbuf_full || ~is_req_start))
wire req_dep_ready = (req_wb && ~(mbuf_full && is_req_start))
|| (~req_wb && st_commit_if.ready);
// DCache Request
reg [`NUM_THREADS-1:0][29:0] mem_req_addr;
reg [`NUM_THREADS-1:0][3:0] mem_req_byteen;
reg [`NUM_THREADS-1:0][31:0] mem_req_data;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
mem_req_byteen[i] = {4{req_wb}};
reg [3:0] mem_req_byteen;
reg [31:0] mem_req_data;
always @(*) begin
mem_req_byteen = {4{req_wb}};
case (`LSU_WSIZE(req_type))
0: mem_req_byteen[i][req_offset[i]] = 1;
0: mem_req_byteen[req_offset[i]] = 1;
1: begin
mem_req_byteen[i][req_offset[i]] = 1;
mem_req_byteen[i][{req_addr[i][1], 1'b1}] = 1;
mem_req_byteen[req_offset[i]] = 1;
mem_req_byteen[{req_addr[i][1], 1'b1}] = 1;
end
default : mem_req_byteen[i] = {4{1'b1}};
default : mem_req_byteen = {4{1'b1}};
endcase
mem_req_data[i] = 'x;
mem_req_data = 'x;
case (req_offset[i])
1: mem_req_data[i][31:8] = req_data[i][23:0];
2: mem_req_data[i][31:16] = req_data[i][15:0];
3: mem_req_data[i][31:24] = req_data[i][7:0];
default: mem_req_data[i] = req_data[i];
1: mem_req_data[31:8] = req_data[i][23:0];
2: mem_req_data[31:16] = req_data[i][15:0];
3: mem_req_data[31:24] = req_data[i][7:0];
default: mem_req_data = req_data[i];
endcase
mem_req_addr[i] = req_addr[i][31:2];
end
end
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_dep_ready}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
assign dcache_req_if.addr = mem_req_addr;
assign dcache_req_if.byteen = mem_req_byteen;
assign dcache_req_if.data = mem_req_data;
assign dcache_req_if.valid[i] = req_valid && req_dep_ready && req_tmask_dup[i] && !req_sent_mask[i];
assign dcache_req_if.rw[i] = ~req_wb;
assign dcache_req_if.addr[i] = req_addr[i][31:2];
assign dcache_req_if.byteen[i] = mem_req_byteen;
assign dcache_req_if.data[i] = mem_req_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
`ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]};
`else
@@ -252,22 +251,17 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
reg [31:0] rsp_data_shifted;
always @(*) begin
rsp_data_shifted[31:16] = src_data[31:16];
rsp_data_shifted[15:0] = rsp_offset[i][1] ? src_data[31:16] : src_data[15:0];
rsp_data_shifted[7:0] = rsp_offset[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
end
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
wire [15:0] rsp_data16 = rsp_offset[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
always @(*) begin
case (`LSU_FMT(rsp_type))
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0]));
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0]));
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0]));
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0]));
default: rsp_data[i] = rsp_data_shifted;
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
default: rsp_data[i] = rsp_data32;
endcase
end
end

View File

@@ -3,14 +3,15 @@
module VX_mem_arb #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter TYPE = "R",
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
) (
input wire clk,
input wire reset,
@@ -50,20 +51,21 @@ module VX_mem_arb #(
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
assign req_data_in_merged[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ)
.BUFFERED (BUFFERED_REQ),
.TYPE (TYPE)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.data_in (req_merged_data_in),
.data_in (req_data_in_merged),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
@@ -72,11 +74,11 @@ module VX_mem_arb #(
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
end
VX_stream_demux #(
@@ -91,7 +93,7 @@ module VX_mem_arb #(
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_merged_data_out),
.data_out (rsp_data_out_merged),
.ready_out (rsp_ready_out)
);

View File

@@ -288,7 +288,6 @@ module VX_mem_unit # (
.DATA_WIDTH (`DMEM_LINE_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`XMEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (0)
) mem_arb (

View File

@@ -18,7 +18,7 @@ module VX_smem_arb (
// output response
VX_dcache_core_rsp_if core_rsp_if
);
localparam REQ_DATAW = 1 + `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH - 1;
localparam REQ_DATAW = `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + (`DCORE_TAG_WIDTH-1);
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
@@ -26,71 +26,59 @@ module VX_smem_arb (
//
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire cache_req_valid_out;
wire cache_req_ready_out;
wire is_smem_addr_out;
wire is_smem_addr_in = core_req_if.tag[i][0];
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) out_buffer (
wire [1:0][REQ_DATAW-1:0] req_data_out;
VX_stream_demux #(
.NUM_REQS (2),
.DATAW (REQ_DATAW),
.BUFFERED (0)
) rsp_demux (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i]),
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}),
.ready_in (core_req_if.ready[i]),
.valid_out (cache_req_valid_out),
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_ready_out)
);
.sel (core_req_if.tag[i][0]),
.valid_in (core_req_if.valid[i]),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}),
.ready_in (core_req_if.ready[i]),
.valid_out ({smem_req_if.valid[i], cache_req_if.valid[i]}),
.data_out (req_data_out),
.ready_out ({smem_req_if.ready[i], cache_req_if.ready[i]})
);
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
assign smem_req_if.addr[i] = cache_req_if.addr[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
assign smem_req_if.data[i] = cache_req_if.data[i];
assign smem_req_if.tag[i] = cache_req_if.tag[i];
assign {cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]} = req_data_out[0];
assign {smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]} = req_data_out[1];
end
//
// handle responses
//
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [1:0] rsp_valid_in;
wire [1:0] rsp_ready_in;
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_valid_tmask;
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid);
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
VX_stream_arbiter #(
.NUM_REQS (2),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.BUFFERED (1)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask;
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_tmask;
endmodule

View File

@@ -225,8 +225,8 @@ module Vortex (
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3MEM_LINE_WIDTH),
.ADDR_WIDTH (`L3MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3MEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (

View File

@@ -586,23 +586,23 @@ wire [AVS_REQ_TAGW:0] mem_rsp_tag;
wire mem_rsp_ready;
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (LMEM_LINE_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
.NUM_REQS (2),
.DATA_WIDTH (LMEM_LINE_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TYPE ("X")
) mem_arb (
.clk (clk),
.reset (reset),
// Source request
.req_valid_in ({cci_mem_req_arb_valid, vx_mem_req_arb_valid}),
.req_rw_in ({cci_mem_req_arb_rw, vx_mem_req_arb_rw}),
.req_byteen_in ({cci_mem_req_arb_byteen, vx_mem_req_arb_byteen}),
.req_addr_in ({cci_mem_req_arb_addr, vx_mem_req_arb_addr}),
.req_data_in ({cci_mem_req_arb_data, vx_mem_req_arb_data}),
.req_tag_in ({cci_mem_req_arb_tag, vx_mem_req_arb_tag}),
.req_ready_in ({cci_mem_req_arb_ready, vx_mem_req_arb_ready}),
.req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}),
.req_rw_in ({vx_mem_req_arb_rw, cci_mem_req_arb_rw}),
.req_byteen_in ({vx_mem_req_arb_byteen, cci_mem_req_arb_byteen}),
.req_addr_in ({vx_mem_req_arb_addr, cci_mem_req_arb_addr}),
.req_data_in ({vx_mem_req_arb_data, cci_mem_req_arb_data}),
.req_tag_in ({vx_mem_req_arb_tag, cci_mem_req_arb_tag}),
.req_ready_in ({vx_mem_req_arb_ready, cci_mem_req_arb_ready}),
// Memory request
.req_valid_out (mem_req_valid),
@@ -614,10 +614,10 @@ VX_mem_arb #(
.req_ready_out (mem_req_ready),
// Source response
.rsp_valid_out ({cci_mem_rsp_arb_valid, vx_mem_rsp_arb_valid}),
.rsp_data_out ({cci_mem_rsp_arb_data, vx_mem_rsp_arb_data}),
.rsp_tag_out ({cci_mem_rsp_arb_tag, vx_mem_rsp_arb_tag}),
.rsp_ready_out ({cci_mem_rsp_arb_ready, vx_mem_rsp_arb_ready}),
.rsp_valid_out ({vx_mem_rsp_arb_valid, cci_mem_rsp_arb_valid}),
.rsp_data_out ({vx_mem_rsp_arb_data, cci_mem_rsp_arb_data}),
.rsp_tag_out ({vx_mem_rsp_arb_tag, cci_mem_rsp_arb_tag}),
.rsp_ready_out ({vx_mem_rsp_arb_ready, cci_mem_rsp_arb_ready}),
// Memory response
.rsp_valid_in (mem_rsp_valid),

View File

@@ -97,8 +97,7 @@ module VX_nc_bypass #(
reg [NUM_REQS-1:0] core_req_ready_in_r;
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [CORE_REQ_TIDW-1:0] core_req_nc_tid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid_in_nc[i] = core_req_valid_in[i] && core_req_tag_in[i][NC_TAG_BIT];
end
@@ -107,14 +106,45 @@ module VX_nc_bypass #(
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_valid_out_r[i] = 0;
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
end else begin
core_req_valid_out_r[i] = core_req_valid_in[i];
core_req_ready_in_r[i] = core_req_ready_out[i];
end
end
end
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire core_req_nc_valid;
VX_priority_encoder #(
.N (NUM_REQS)
) core_req_sel (
.data_in (core_req_valid_in_nc),
.index (core_req_nc_tid),
`UNUSED_PIN (onehot),
.valid_out (core_req_nc_valid)
);
if (NUM_REQS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
end else begin
core_req_ready_in_r[i] = core_req_ready_out[i];
end
end
end
end else begin
`UNUSED_VAR (core_req_nc_tid)
always @(*) begin
if (core_req_valid_in_nc) begin
core_req_ready_in_r = mem_req_ready_out;
end else begin
core_req_ready_in_r = core_req_ready_out;
end
end
end
assign core_req_valid_out = core_req_valid_out_r;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
@@ -131,57 +161,92 @@ module VX_nc_bypass #(
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
reg mem_req_ready_in_r;
wire core_req_nc_valid;
VX_priority_encoder #(
.N (NUM_REQS)
) core_req_sel (
.data_in (core_req_valid_in_nc),
.index (core_req_nc_tid),
`UNUSED_PIN (onehot),
.valid_out (core_req_nc_valid)
);
reg mem_req_ready_in_r;
always @(*) begin
if (core_req_nc_valid) begin
mem_req_valid_out_r = 1;
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
end
mem_req_ready_in_r = 0;
mem_req_valid_out_r = 1;
mem_req_ready_in_r = 0;
end else begin
mem_req_valid_out_r = mem_req_valid_in;
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
mem_req_ready_in_r = mem_req_ready_out;
mem_req_valid_out_r = mem_req_valid_in;
mem_req_ready_in_r = mem_req_ready_out;
end
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
if (NUM_REQS > 1) begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
end
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end
end
end else begin
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
mem_req_rw_out_r = core_req_rw_in;
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
end
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end
end
@@ -201,26 +266,41 @@ module VX_nc_bypass #(
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
if (NUM_REQS > 1) begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
if (NUM_RSP_TAGS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin
core_rsp_valid_out_r[i] = 1;
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
core_rsp_ready_in_r[i] = 0;
end else begin
core_rsp_valid_out_r[i] = core_rsp_valid_in[i];
core_rsp_tag_out_r[i] = core_rsp_tag_in[i];
core_rsp_ready_in_r[i] = core_rsp_ready_out[i];
end
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
end
end
end else begin
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 1;
@@ -276,6 +356,7 @@ module VX_nc_bypass #(
end
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
always @(*) begin
if (is_mem_rsp_nc) begin
mem_rsp_ready_in_r = core_rsp_ready_out[rsp_tid];

View File

@@ -18,6 +18,12 @@ module VX_priority_encoder #(
assign index = 0;
assign valid_out = data_in;
end else if (N == 2) begin
assign onehot = {!data_in[REVERSE], data_in[REVERSE]};
assign index = !data_in[REVERSE];
assign valid_out = (| data_in);
end else if (FAST) begin
wire [N-1:0] scan_lo;

View File

@@ -27,7 +27,6 @@ module VX_stream_arbiter #(
wire [NUM_REQS-1:0] sel_1hot;
if (TYPE == "X") begin
VX_fixed_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
@@ -40,9 +39,7 @@ module VX_stream_arbiter #(
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "R") begin
VX_rr_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
@@ -55,9 +52,7 @@ module VX_stream_arbiter #(
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "F") begin
VX_fair_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
@@ -70,9 +65,7 @@ module VX_stream_arbiter #(
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "M") begin
VX_matrix_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
@@ -85,8 +78,9 @@ module VX_stream_arbiter #(
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end
end else begin
$error ("invalid parameter");
end
wire ready_out_unqual;

View File

@@ -30,7 +30,7 @@ CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG8 := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG32 := -DNUM_CLUSTERS=8 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)