Cache Working on Mem Copy
This commit is contained in:
@@ -27,6 +27,7 @@ assign VX_writeback_inter.rd = VX_writeback_temp.rd;
|
||||
assign VX_writeback_inter.write_data = VX_writeback_temp.write_data;
|
||||
assign VX_writeback_inter.wb_valid = VX_writeback_temp.wb_valid;
|
||||
assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
|
||||
assign VX_writeback_inter.wb_pc = VX_writeback_temp.wb_pc;
|
||||
|
||||
// assign VX_writeback_inter(VX_writeback_temp);
|
||||
|
||||
|
||||
@@ -294,11 +294,22 @@ module VX_bank
|
||||
);
|
||||
|
||||
wire stall_bank_pipe;
|
||||
reg is_fill_in_pipe;
|
||||
|
||||
genvar p_stage;
|
||||
always @(*) begin
|
||||
assign is_fill_in_pipe = 0;
|
||||
for (p_stage = 0; p_stage < STAGE_1_CYCLES; p_stage=p_stage+1) begin
|
||||
if (is_fill_st1[p_stage]) assign is_fill_in_pipe = 1;
|
||||
end
|
||||
|
||||
if (is_fill_st2) assign is_fill_in_pipe = 1;
|
||||
end
|
||||
|
||||
|
||||
assign dfpq_pop = !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
|
||||
assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
|
||||
assign reqq_pop = !mrvq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !reqq_hazard_st0;
|
||||
assign reqq_pop = !mrvq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !(reqq_hazard_st0 || (mrvq_valid_st0 && mrvq_hazard_st0)) && !is_fill_in_pipe;
|
||||
assign snrq_pop = !reqq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0;
|
||||
|
||||
|
||||
@@ -495,14 +506,15 @@ module VX_bank
|
||||
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
assign miss_add = valid_st2 && miss_st2;
|
||||
assign miss_add = valid_st2 && miss_st2 && !stall_bank_pipe && !mrvq_full && !(dirty_st2 && dwbq_full);
|
||||
assign miss_add_pc = pc_st2;
|
||||
assign miss_add_addr = addr_st2;
|
||||
assign miss_add_data = writeword_st2;
|
||||
assign {miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2;
|
||||
|
||||
|
||||
// Enqueue to CWB Queue
|
||||
wire cwbq_push = (valid_st2 && !miss_st2);
|
||||
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full & !llvq_full;
|
||||
wire [31:0] cwbq_data = readword_st2;
|
||||
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
|
||||
wire [4:0] cwbq_rd = miss_add_rd;
|
||||
@@ -527,8 +539,8 @@ module VX_bank
|
||||
);
|
||||
|
||||
// Enqueue to DWB Queue
|
||||
wire dwbq_push = (valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2;
|
||||
wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]};
|
||||
wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !(!fill_saw_dirty_st2 && mrvq_full);
|
||||
wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
wire[`BANK_LINE_SIZE_RNG][31:0] dwbq_req_data = readdata_st2;
|
||||
wire dwbq_empty;
|
||||
wire dwbq_full;
|
||||
@@ -536,6 +548,7 @@ module VX_bank
|
||||
|
||||
wire invalidate_fill;
|
||||
wire possible_fill = valid_st2 && miss_st2;
|
||||
wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
VX_fill_invalidator #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
@@ -560,16 +573,16 @@ module VX_bank
|
||||
.reset (reset),
|
||||
.possible_fill (possible_fill),
|
||||
.success_fill (is_fill_st2),
|
||||
.fill_addr (addr_st2),
|
||||
.fill_addr (fill_invalidator_addr),
|
||||
|
||||
.invalidate_fill (invalidate_fill)
|
||||
);
|
||||
|
||||
// Enqueu in dram_fill_req
|
||||
assign dram_fill_req = valid_st2 && miss_st2 && !invalidate_fill;
|
||||
assign dram_fill_req = valid_st2 && miss_st2 && !invalidate_fill && !dram_fill_req_queue_full;
|
||||
assign dram_because_of_snp = is_snp_st2 && valid_st2 && miss_st2;
|
||||
assign dram_snp_full = snrq_full && snp_req;
|
||||
assign dram_fill_req_addr = addr_st2;
|
||||
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
|
||||
assign dram_wb_req = !dwbq_empty;
|
||||
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * 32)), .SIZE(DWBQ_SIZE)) dwb_queue(
|
||||
@@ -589,7 +602,7 @@ module VX_bank
|
||||
// Lower Cache Hit
|
||||
wire llvq_empty;
|
||||
wire llvq_full;
|
||||
wire llvq_push = valid_st2 && !miss_st2;
|
||||
wire llvq_push = valid_st2 && !miss_st2 && !llvq_full && !cwbq_full;
|
||||
wire[`BANK_LINE_SIZE_RNG][31:0] llvq_push_data = readdata_st2;
|
||||
wire[31:0] llvq_addr = addr_st2;
|
||||
wire[`vx_clog2(NUMBER_REQUESTS)-1:0] llvq_tid = miss_add_tid;
|
||||
@@ -608,7 +621,7 @@ module VX_bank
|
||||
);
|
||||
|
||||
|
||||
assign stall_bank_pipe = (cwbq_push && cwbq_full) || (dwbq_push && dwbq_full) || (miss_add && mrvq_full) || (dram_fill_req && dram_fill_req_queue_full);
|
||||
assign stall_bank_pipe = (cwbq_push && cwbq_full) || (llvq_push && llvq_full) || (dwbq_push && dwbq_full) || (miss_add && mrvq_full) || (dram_fill_req && dram_fill_req_queue_full);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -254,7 +254,7 @@ module VX_cache
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_cache_core_req_bank_sel
|
||||
VX_cache_core_wb_sel_merge
|
||||
(
|
||||
.per_bank_wb_valid (per_bank_wb_valid),
|
||||
.per_bank_wb_tid (per_bank_wb_tid),
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
`define TAG_SELECT_SIZE_RNG `TAG_SELECT_SIZE_END-1:0
|
||||
|
||||
|
||||
`define BASE_ADDR_MASK (~((1<<`WORD_SELECT_ADDR_END)-1))
|
||||
`define BASE_ADDR_MASK (~((1<<(`WORD_SELECT_ADDR_END+1))-1))
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
@@ -91,7 +91,7 @@ module VX_cache_dfq_queue
|
||||
);
|
||||
|
||||
|
||||
assign qual_bank_dram_fill_req = use_empty ? out_per_bank_dram_fill_req : use_per_bank_dram_fill_req;
|
||||
assign qual_bank_dram_fill_req = use_empty ? (out_per_bank_dram_fill_req & {NUMBER_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req & {NUMBER_BANKS{!use_empty}});
|
||||
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
|
||||
|
||||
wire[`vx_clog2(NUMBER_BANKS)-1:0] qual_request_index;
|
||||
|
||||
@@ -101,7 +101,7 @@ module VX_cache_dram_req_arb
|
||||
);
|
||||
|
||||
|
||||
assign per_bank_dram_wb_queue_pop = per_bank_dram_wb_req & (~(1 << dwb_bank));
|
||||
assign per_bank_dram_wb_queue_pop = per_bank_dram_wb_req & ((1 << dwb_bank));
|
||||
|
||||
|
||||
assign dram_req = dwb_valid || dfqq_req;
|
||||
|
||||
@@ -95,7 +95,7 @@ module VX_cache_miss_resrv
|
||||
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire[`vx_clog2(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
wire[`vx_clog2(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
reg[MRVQ_SIZE-1:0] make_ready;
|
||||
genvar curr_e;
|
||||
|
||||
@@ -98,14 +98,12 @@ module VX_fill_invalidator
|
||||
wire enqueue_found;
|
||||
|
||||
VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) VX_sel_bank(
|
||||
.valids(fills_active),
|
||||
.valids(~fills_active),
|
||||
.index (enqueue_index),
|
||||
.found (enqueue_found)
|
||||
);
|
||||
|
||||
|
||||
reg[FILL_INVALIDAOR_SIZE-1:0] new_valids;
|
||||
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -113,7 +111,7 @@ module VX_fill_invalidator
|
||||
fills_active <= 0;
|
||||
fills_address <= 0;
|
||||
end else begin
|
||||
if (enqueue_found && !invalidate_fill) begin
|
||||
if (possible_fill && !invalidate_fill) begin
|
||||
fills_active[enqueue_index] <= 1;
|
||||
fills_address[enqueue_index] <= fill_addr;
|
||||
end
|
||||
|
||||
122
rtl/VX_cache/VX_mrv_queue.v
Normal file
122
rtl/VX_cache/VX_mrv_queue.v
Normal file
@@ -0,0 +1,122 @@
|
||||
|
||||
module VX_mrv_queue
|
||||
#(
|
||||
parameter DATAW = 4,
|
||||
parameter SIZE = 277
|
||||
)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire[DATAW-1:0] in_data,
|
||||
|
||||
input wire pop,
|
||||
output wire[DATAW-1:0] out_data,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
|
||||
if (SIZE == 0) begin
|
||||
assign empty = 1;
|
||||
assign out_data = 0;
|
||||
assign full = 0;
|
||||
end else begin
|
||||
|
||||
reg[DATAW-1:0] data[SIZE-1:0], curr_r, head_r;
|
||||
reg[$clog2(SIZE+1)-1:0] size_r;
|
||||
reg[$clog2(SIZE)-1:0] wr_ctr_r;
|
||||
reg[$clog2(SIZE)-1:0] rd_ptr_r, rd_next_ptr_r;
|
||||
reg empty_r, full_r, bypass_r;
|
||||
wire reading, writing;
|
||||
|
||||
assign reading = pop && !empty;
|
||||
assign writing = push && !full;
|
||||
|
||||
if (SIZE == 1) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= 0;
|
||||
end else begin
|
||||
if (writing && !reading) begin
|
||||
size_r <= 1;
|
||||
end else if (reading && !writing) begin
|
||||
size_r <= 0;
|
||||
end
|
||||
|
||||
if (writing) begin
|
||||
head_r <= in_data;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign out_data = head_r;
|
||||
assign empty = (size_r == 0);
|
||||
assign full = (size_r != 0) && !pop;
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ctr_r <= 0;
|
||||
end else begin
|
||||
if (writing)
|
||||
wr_ctr_r <= wr_ctr_r + 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= 0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
if (writing && !reading) begin
|
||||
size_r <= size_r + 1;
|
||||
empty_r <= 0;
|
||||
if (size_r == SIZE-1)
|
||||
full_r <= 1;
|
||||
end else if (reading && !writing) begin
|
||||
size_r <= size_r - 1;
|
||||
if (size_r == 1)
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (writing) begin
|
||||
data[wr_ctr_r] <= in_data;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
rd_next_ptr_r <= 1;
|
||||
bypass_r <= 0;
|
||||
end else begin
|
||||
if (reading) begin
|
||||
if (SIZE == 2) begin
|
||||
rd_ptr_r <= rd_next_ptr_r;
|
||||
rd_next_ptr_r <= ~rd_next_ptr_r;
|
||||
end else if (SIZE > 2) begin
|
||||
rd_ptr_r <= rd_next_ptr_r;
|
||||
rd_next_ptr_r <= rd_ptr_r + 2;
|
||||
end
|
||||
end
|
||||
|
||||
bypass_r <= writing && (empty_r || (1 == size_r) && reading);
|
||||
curr_r <= in_data;
|
||||
head_r <= data[reading ? rd_next_ptr_r : rd_ptr_r];
|
||||
end
|
||||
end
|
||||
|
||||
assign out_data = bypass_r ? curr_r : head_r;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -71,12 +71,12 @@ module VX_tag_data_access
|
||||
);
|
||||
|
||||
|
||||
reg[`BANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||
reg[`BANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-2:0];
|
||||
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg[`BANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-2:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-2:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-2:0];
|
||||
reg[`BANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-2:0];
|
||||
|
||||
|
||||
wire qual_read_valid_st1;
|
||||
@@ -142,7 +142,7 @@ module VX_tag_data_access
|
||||
|
||||
genvar curr_stage;
|
||||
generate
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-2; curr_stage = curr_stage + 1) begin
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`BANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
@@ -155,13 +155,13 @@ module VX_tag_data_access
|
||||
endgenerate
|
||||
|
||||
|
||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1];
|
||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1];
|
||||
assign use_read_tag_st1e = read_tag_st1c [STAGE_1_CYCLES-1];
|
||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-2];
|
||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-2];
|
||||
assign use_read_tag_st1e = read_tag_st1c [STAGE_1_CYCLES-2];
|
||||
|
||||
genvar curr_w;
|
||||
for (curr_w = 0; curr_w < `BANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
|
||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
|
||||
for (curr_w = 0; curr_w < `BANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-2][curr_w][31:0];
|
||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-2];
|
||||
|
||||
/////////////////////// LOAD LOGIC ///////////////////
|
||||
|
||||
@@ -179,12 +179,12 @@ module VX_tag_data_access
|
||||
wire b2 = (byte_select == 2);
|
||||
wire b3 = (byte_select == 3);
|
||||
|
||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-1][0][31:0];
|
||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-1][1][31:0];
|
||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-1][2][31:0];
|
||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-1][3][31:0];
|
||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-2][0][31:0];
|
||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-2][1][31:0];
|
||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-2][2][31:0];
|
||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-2][3][31:0];
|
||||
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-2][block_offset][31:0];
|
||||
|
||||
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
||||
b1 ? (data_unmod >> 8) :
|
||||
@@ -231,14 +231,14 @@ module VX_tag_data_access
|
||||
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
|
||||
|
||||
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
|
||||
wire force_write = writefill_st1e && valid_req_st1e && miss_st1e;
|
||||
wire force_write = writefill_st1e && valid_req_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
|
||||
|
||||
wire[`BANK_LINE_SIZE_RNG][3:0] we;
|
||||
wire[`BANK_LINE_SIZE_RNG][31:0] data_write;
|
||||
genvar g;
|
||||
generate
|
||||
for (g = 0; g < `BANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables
|
||||
wire normal_write = (block_offset == g) && should_write;
|
||||
wire normal_write = (block_offset == g) && should_write && !writefill_st1e;
|
||||
|
||||
assign we[g] = (force_write) ? 4'b1111 :
|
||||
(normal_write && sw) ? 4'b1111 :
|
||||
|
||||
@@ -27,21 +27,11 @@ module VX_fetch (
|
||||
|
||||
|
||||
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
|
||||
reg stall_might_be_branch;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
stall_might_be_branch <= 0;
|
||||
end else if ((stall_might_be_branch == 1'b1) && !icache_stage_delay && !schedule_delay) begin
|
||||
stall_might_be_branch <= 0;
|
||||
end else if (scheduled_warp == 1'b1) begin
|
||||
stall_might_be_branch <= 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// Locals
|
||||
|
||||
|
||||
assign pipe_stall = schedule_delay || icache_stage_delay || (stall_might_be_branch && (icache_stage_wid == warp_num)) ;
|
||||
assign pipe_stall = schedule_delay || icache_stage_delay;
|
||||
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
.clk (clk),
|
||||
@@ -68,6 +58,10 @@ module VX_fetch (
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num (VX_wstall.warp_num),
|
||||
|
||||
// Lock/release Stuff
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
||||
// Join
|
||||
.is_join (VX_join.is_join),
|
||||
.join_warp_num (VX_join.join_warp_num),
|
||||
@@ -100,7 +94,7 @@ module VX_fetch (
|
||||
);
|
||||
|
||||
assign fe_inst_meta_fi.warp_num = warp_num;
|
||||
assign fe_inst_meta_fi.valid = thread_mask && {`NT{!stall_might_be_branch}};
|
||||
assign fe_inst_meta_fi.valid = thread_mask;
|
||||
assign fe_inst_meta_fi.instruction = 32'h0;
|
||||
assign fe_inst_meta_fi.inst_pc = warp_pc;
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ module VX_icache_stage (
|
||||
assign fe_inst_meta_id.valid = fe_inst_meta_fi.valid & {`NT{!icache_stage_delay}};
|
||||
|
||||
assign icache_stage_wid = fe_inst_meta_fi.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_fi.valid;
|
||||
assign icache_stage_valids = fe_inst_meta_fi.valid & {`NT{!icache_stage_delay}};
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -9,11 +9,14 @@ module VX_scheduler (
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
output wire schedule_delay
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
|
||||
);
|
||||
|
||||
reg[31:0] count_valid;
|
||||
|
||||
assign is_empty = count_valid == 0;
|
||||
|
||||
reg[31:0][`NT-1:0] rename_table[`NW-1:0];
|
||||
|
||||
@@ -67,6 +70,10 @@ module VX_scheduler (
|
||||
end else begin
|
||||
if (valid_wb ) rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] <= rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid);
|
||||
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.warp_num ][VX_bckE_req.rd ] <= VX_bckE_req.valid;
|
||||
|
||||
if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1;
|
||||
if (!schedule_delay && wb_inc) count_valid = count_valid + 1;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -54,7 +54,10 @@ module VX_warp_scheduler (
|
||||
output wire[`NW_M1:0] warp_num,
|
||||
output wire[31:0] warp_pc,
|
||||
output wire out_ebreak,
|
||||
output wire scheduled_warp
|
||||
output wire scheduled_warp,
|
||||
|
||||
input wire[`NW_M1:0] icache_stage_wid,
|
||||
input wire[`NT-1:0] icache_stage_valids
|
||||
|
||||
);
|
||||
|
||||
@@ -76,8 +79,10 @@ module VX_warp_scheduler (
|
||||
reg[`NW-1:0] warp_active;
|
||||
reg[`NW-1:0] warp_stalled;
|
||||
|
||||
reg[`NW-1:0] visible_active;
|
||||
wire[`NW-1:0] use_active;
|
||||
reg [`NW-1:0] visible_active;
|
||||
wire[`NW-1:0] use_active;
|
||||
|
||||
reg [`NW-1:0] warp_lock;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
@@ -188,7 +193,7 @@ module VX_warp_scheduler (
|
||||
|
||||
// Refilling active warps
|
||||
if (update_visible_active) begin
|
||||
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall);
|
||||
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock;
|
||||
end
|
||||
|
||||
// Don't change state if stall
|
||||
@@ -208,6 +213,15 @@ module VX_warp_scheduler (
|
||||
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
|
||||
warp_stalled[branch_warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1'b1;
|
||||
end
|
||||
if (|icache_stage_valids && !stall) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
@@ -294,7 +308,7 @@ module VX_warp_scheduler (
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
|
||||
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall)) : visible_active;
|
||||
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock)) : visible_active;
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_priority_encoder choose_schedule(
|
||||
|
||||
26
rtl/Vortex.v
26
rtl/Vortex.v
@@ -40,6 +40,11 @@ module Vortex
|
||||
output wire out_ebreak
|
||||
);
|
||||
|
||||
wire scheduler_empty;
|
||||
wire out_ebreak_unqual;
|
||||
|
||||
assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
|
||||
|
||||
|
||||
reg[31:0] icache_banks = `ICACHE_BANKS;
|
||||
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
|
||||
@@ -63,6 +68,7 @@ module Vortex
|
||||
// Dcache Interface
|
||||
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual();
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_res();
|
||||
@@ -88,10 +94,21 @@ module Vortex
|
||||
endgenerate
|
||||
|
||||
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
wire[31:0] temp_io_data = VX_dcache_req.core_req_valid[0];
|
||||
wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~io_valid}};
|
||||
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
|
||||
|
||||
VX_icache_response_inter icache_response_fe();
|
||||
VX_icache_request_inter icache_request_fe();
|
||||
@@ -145,7 +162,7 @@ VX_front_end vx_front_end(
|
||||
.icache_request_fe (icache_request_fe),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
.fetch_ebreak (out_ebreak_unqual)
|
||||
);
|
||||
|
||||
VX_scheduler schedule(
|
||||
@@ -156,7 +173,8 @@ VX_scheduler schedule(
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.schedule_delay (schedule_delay)
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end vx_back_end(
|
||||
@@ -184,7 +202,7 @@ VX_dmem_controller VX_dmem_controller(
|
||||
.VX_dram_req_rsp_icache (VX_dram_req_rsp_icache),
|
||||
.VX_icache_req (icache_request_fe),
|
||||
.VX_icache_rsp (icache_response_fe),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_dcache_req (VX_dcache_req_qual),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
);
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ interface VX_gpu_dcache_req_inter
|
||||
wire [31:0] core_req_pc;
|
||||
|
||||
// Can't WB
|
||||
wire core_no_wb_slot;
|
||||
wire core_no_wb_slot;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
Reference in New Issue
Block a user