Fix for Single-Threaded
This commit is contained in:
@@ -311,8 +311,8 @@ module VX_bank
|
|||||||
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
|
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
|
||||||
|
|
||||||
|
|
||||||
assign dfpq_pop = !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
|
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
|
||||||
assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
|
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
|
||||||
assign reqq_pop = !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !(reqq_hazard_st0 || (mrvq_valid_st0 && mrvq_hazard_st0)) && !is_fill_in_pipe;
|
assign reqq_pop = !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !(reqq_hazard_st0 || (mrvq_valid_st0 && mrvq_hazard_st0)) && !is_fill_in_pipe;
|
||||||
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0;
|
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0;
|
||||||
|
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ module VX_fill_invalidator
|
|||||||
|
|
||||||
if (success_fill) begin
|
if (success_fill) begin
|
||||||
success_found = 1;
|
success_found = 1;
|
||||||
success_index = curr_fill[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0];
|
success_index = curr_fill;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -73,12 +73,12 @@ module VX_tag_data_access
|
|||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-2:0];
|
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||||
|
|
||||||
reg read_valid_st1c[STAGE_1_CYCLES-2:0];
|
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
|
||||||
reg read_dirty_st1c[STAGE_1_CYCLES-2:0];
|
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
|
||||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-2:0];
|
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
|
||||||
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-2:0];
|
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||||
|
|
||||||
|
|
||||||
wire qual_read_valid_st1;
|
wire qual_read_valid_st1;
|
||||||
@@ -94,6 +94,9 @@ module VX_tag_data_access
|
|||||||
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data;
|
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data;
|
||||||
|
|
||||||
|
|
||||||
|
wire real_writefill = writefill_st1e && miss_st1e;
|
||||||
|
|
||||||
|
|
||||||
wire fill_sent;
|
wire fill_sent;
|
||||||
wire invalidate_line;
|
wire invalidate_line;
|
||||||
VX_tag_data_structure #(
|
VX_tag_data_structure #(
|
||||||
@@ -128,13 +131,14 @@ module VX_tag_data_access
|
|||||||
|
|
||||||
.invalidate (invalidate_line),
|
.invalidate (invalidate_line),
|
||||||
.write_enable(use_write_enable),
|
.write_enable(use_write_enable),
|
||||||
.write_fill (writefill_st1e),
|
.write_fill (real_writefill),
|
||||||
.write_addr (writeaddr_st1e),
|
.write_addr (writeaddr_st1e),
|
||||||
.write_data (use_write_data),
|
.write_data (use_write_data),
|
||||||
.fill_sent (fill_sent)
|
.fill_sent (fill_sent)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
|
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
|
||||||
|
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.stall(stall),
|
.stall(stall),
|
||||||
@@ -145,7 +149,7 @@ module VX_tag_data_access
|
|||||||
|
|
||||||
genvar curr_stage;
|
genvar curr_stage;
|
||||||
generate
|
generate
|
||||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-2; curr_stage = curr_stage + 1) begin
|
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
|
||||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
|
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
@@ -158,13 +162,13 @@ module VX_tag_data_access
|
|||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|
||||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-2] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
|
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
|
||||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-2] && (FUNC_ID == `DFUNC_ID); // Dirty only applies in Dcache
|
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache
|
||||||
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-2]; // Tag is always the same in SM
|
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
|
||||||
|
|
||||||
genvar curr_w;
|
genvar curr_w;
|
||||||
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-2][curr_w][31:0];
|
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
|
||||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-2];
|
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
|
||||||
|
|
||||||
/////////////////////// LOAD LOGIC ///////////////////
|
/////////////////////// LOAD LOGIC ///////////////////
|
||||||
|
|
||||||
@@ -182,12 +186,12 @@ module VX_tag_data_access
|
|||||||
wire b2 = (byte_select == 2);
|
wire b2 = (byte_select == 2);
|
||||||
wire b3 = (byte_select == 3);
|
wire b3 = (byte_select == 3);
|
||||||
|
|
||||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-2][0][31:0];
|
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-1][0][31:0];
|
||||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-2][1][31:0];
|
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-1][1][31:0];
|
||||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-2][2][31:0];
|
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-1][2][31:0];
|
||||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-2][3][31:0];
|
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-1][3][31:0];
|
||||||
|
|
||||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-2][block_offset][31:0];
|
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
|
||||||
|
|
||||||
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
||||||
b1 ? (data_unmod >> 8) :
|
b1 ? (data_unmod >> 8) :
|
||||||
@@ -234,7 +238,7 @@ module VX_tag_data_access
|
|||||||
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
|
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
|
||||||
|
|
||||||
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
|
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
|
||||||
wire force_write = writefill_st1e && valid_req_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
|
wire force_write = writefill_st1e && valid_req_st1e && miss_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
|
||||||
|
|
||||||
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
|
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
|
||||||
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
|
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
|
||||||
@@ -262,7 +266,7 @@ module VX_tag_data_access
|
|||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
if (FUNC_ID == `LLFUNC_ID) begin
|
if (FUNC_ID == `LLFUNC_ID) begin
|
||||||
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-2];
|
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||||
end else begin
|
end else begin
|
||||||
assign readword_st1e = data_Qual;
|
assign readword_st1e = data_Qual;
|
||||||
end
|
end
|
||||||
@@ -272,7 +276,7 @@ module VX_tag_data_access
|
|||||||
assign readdata_st1e = use_read_data_st1e;
|
assign readdata_st1e = use_read_data_st1e;
|
||||||
assign readtag_st1e = use_read_tag_st1e;
|
assign readtag_st1e = use_read_tag_st1e;
|
||||||
assign fill_sent = miss_st1e;
|
assign fill_sent = miss_st1e;
|
||||||
assign fill_saw_dirty_st1e = force_write && dirty_st1e;
|
assign fill_saw_dirty_st1e = force_write && dirty_st1e && miss_st1e;
|
||||||
assign invalidate_line = is_snp_st1e && !miss_st1e;
|
assign invalidate_line = is_snp_st1e && !miss_st1e;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -92,6 +92,7 @@ module VX_tag_data_structure
|
|||||||
end
|
end
|
||||||
end else if (fill_sent) begin
|
end else if (fill_sent) begin
|
||||||
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||||
|
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (invalidate) begin
|
if (invalidate) begin
|
||||||
|
|||||||
@@ -127,7 +127,7 @@
|
|||||||
|
|
||||||
`define NUMBER_CORES (`NUMBER_CORES_PER_CLUSTER*`NUMBER_CLUSTERS)
|
`define NUMBER_CORES (`NUMBER_CORES_PER_CLUSTER*`NUMBER_CLUSTERS)
|
||||||
|
|
||||||
// `define SINGLE_CORE_BENCH 0
|
`define SINGLE_CORE_BENCH 1
|
||||||
`define GLOBAL_BLOCK_SIZE_BYTES 16
|
`define GLOBAL_BLOCK_SIZE_BYTES 16
|
||||||
// ========================================= Dcache Configurable Knobs =========================================
|
// ========================================= Dcache Configurable Knobs =========================================
|
||||||
|
|
||||||
@@ -141,7 +141,7 @@
|
|||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
`define DNUMBER_REQUESTS `NT
|
`define DNUMBER_REQUESTS `NT
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete stage 1 (read from memory)
|
||||||
`define DSTAGE_1_CYCLES 2
|
`define DSTAGE_1_CYCLES 1
|
||||||
// Function ID
|
// Function ID
|
||||||
`define DFUNC_ID 0
|
`define DFUNC_ID 0
|
||||||
|
|
||||||
@@ -172,7 +172,7 @@
|
|||||||
`define DFFSQ_SIZE 8
|
`define DFFSQ_SIZE 8
|
||||||
|
|
||||||
// Fill Invalidator Size {Fill invalidator must be active}
|
// Fill Invalidator Size {Fill invalidator must be active}
|
||||||
`define DFILL_INVALIDAOR_SIZE 16
|
`define DFILL_INVALIDAOR_SIZE 0
|
||||||
|
|
||||||
// Dram knobs
|
// Dram knobs
|
||||||
`define DSIMULATED_DRAM_LATENCY_CYCLES 10
|
`define DSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||||
@@ -192,7 +192,7 @@
|
|||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
`define INUMBER_REQUESTS 1
|
`define INUMBER_REQUESTS 1
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete stage 1 (read from memory)
|
||||||
`define ISTAGE_1_CYCLES 2
|
`define ISTAGE_1_CYCLES 1
|
||||||
// Function ID
|
// Function ID
|
||||||
`define IFUNC_ID 1
|
`define IFUNC_ID 1
|
||||||
|
|
||||||
@@ -214,16 +214,16 @@
|
|||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
`define ICWBQ_SIZE `IREQQ_SIZE
|
`define ICWBQ_SIZE `IREQQ_SIZE
|
||||||
// Dram Writeback Queue Size
|
// Dram Writeback Queue Size
|
||||||
`define IDWBQ_SIZE 0
|
`define IDWBQ_SIZE 16
|
||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
`define IDFQQ_SIZE `IREQQ_SIZE
|
`define IDFQQ_SIZE `IREQQ_SIZE
|
||||||
// Lower Level Cache Hit Queue Size
|
// Lower Level Cache Hit Queue Size
|
||||||
`define ILLVQ_SIZE 0
|
`define ILLVQ_SIZE 16
|
||||||
// Fill Forward SNP Queue
|
// Fill Forward SNP Queue
|
||||||
`define IFFSQ_SIZE 8
|
`define IFFSQ_SIZE 8
|
||||||
|
|
||||||
// Fill Invalidator Size {Fill invalidator must be active}
|
// Fill Invalidator Size {Fill invalidator must be active}
|
||||||
`define IFILL_INVALIDAOR_SIZE 16
|
`define IFILL_INVALIDAOR_SIZE 0
|
||||||
|
|
||||||
// Dram knobs
|
// Dram knobs
|
||||||
`define ISIMULATED_DRAM_LATENCY_CYCLES 10
|
`define ISIMULATED_DRAM_LATENCY_CYCLES 10
|
||||||
@@ -244,7 +244,7 @@
|
|||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
`define SNUMBER_REQUESTS `NT
|
`define SNUMBER_REQUESTS `NT
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete stage 1 (read from memory)
|
||||||
`define SSTAGE_1_CYCLES 2
|
`define SSTAGE_1_CYCLES 1
|
||||||
// Function ID
|
// Function ID
|
||||||
`define SFUNC_ID 2
|
`define SFUNC_ID 2
|
||||||
|
|
||||||
@@ -258,24 +258,24 @@
|
|||||||
// Miss Reserv Queue Knob
|
// Miss Reserv Queue Knob
|
||||||
`define SMRVQ_SIZE `SREQQ_SIZE
|
`define SMRVQ_SIZE `SREQQ_SIZE
|
||||||
// Dram Fill Rsp Queue Size
|
// Dram Fill Rsp Queue Size
|
||||||
`define SDFPQ_SIZE 0
|
`define SDFPQ_SIZE 16
|
||||||
// Snoop Req Queue
|
// Snoop Req Queue
|
||||||
`define SSNRQ_SIZE 0
|
`define SSNRQ_SIZE 16
|
||||||
|
|
||||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
`define SCWBQ_SIZE `SREQQ_SIZE
|
`define SCWBQ_SIZE `SREQQ_SIZE
|
||||||
// Dram Writeback Queue Size
|
// Dram Writeback Queue Size
|
||||||
`define SDWBQ_SIZE 0
|
`define SDWBQ_SIZE 16
|
||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
`define SDFQQ_SIZE 0
|
`define SDFQQ_SIZE 16
|
||||||
// Lower Level Cache Hit Queue Size
|
// Lower Level Cache Hit Queue Size
|
||||||
`define SLLVQ_SIZE 0
|
`define SLLVQ_SIZE 16
|
||||||
// Fill Forward SNP Queue
|
// Fill Forward SNP Queue
|
||||||
`define SFFSQ_SIZE 0
|
`define SFFSQ_SIZE 16
|
||||||
|
|
||||||
// Fill Invalidator Size {Fill invalidator must be active}
|
// Fill Invalidator Size {Fill invalidator must be active}
|
||||||
`define SFILL_INVALIDAOR_SIZE 16
|
`define SFILL_INVALIDAOR_SIZE 0
|
||||||
|
|
||||||
// Dram knobs
|
// Dram knobs
|
||||||
`define SSIMULATED_DRAM_LATENCY_CYCLES 10
|
`define SSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||||
@@ -296,7 +296,7 @@
|
|||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
`define LLNUMBER_REQUESTS (2*`NUMBER_CORES_PER_CLUSTER)
|
`define LLNUMBER_REQUESTS (2*`NUMBER_CORES_PER_CLUSTER)
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete stage 1 (read from memory)
|
||||||
`define LLSTAGE_1_CYCLES 2
|
`define LLSTAGE_1_CYCLES 1
|
||||||
// Function ID
|
// Function ID
|
||||||
`define LLFUNC_ID 3
|
`define LLFUNC_ID 3
|
||||||
|
|
||||||
@@ -322,12 +322,12 @@
|
|||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
`define LLDFQQ_SIZE `LLREQQ_SIZE
|
`define LLDFQQ_SIZE `LLREQQ_SIZE
|
||||||
// Lower Level Cache Hit Queue Size
|
// Lower Level Cache Hit Queue Size
|
||||||
`define LLLLVQ_SIZE 0
|
`define LLLLVQ_SIZE 16
|
||||||
// Fill Forward SNP Queue
|
// Fill Forward SNP Queue
|
||||||
`define LLFFSQ_SIZE 8
|
`define LLFFSQ_SIZE 8
|
||||||
|
|
||||||
// Fill Invalidator Size {Fill invalidator must be active}
|
// Fill Invalidator Size {Fill invalidator must be active}
|
||||||
`define LLFILL_INVALIDAOR_SIZE 16
|
`define LLFILL_INVALIDAOR_SIZE 0
|
||||||
|
|
||||||
// Dram knobs
|
// Dram knobs
|
||||||
`define LLSIMULATED_DRAM_LATENCY_CYCLES 10
|
`define LLSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||||
@@ -348,7 +348,7 @@
|
|||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
`define L3NUMBER_REQUESTS (`NUMBER_CLUSTERS)
|
`define L3NUMBER_REQUESTS (`NUMBER_CLUSTERS)
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete stage 1 (read from memory)
|
||||||
`define L3STAGE_1_CYCLES 2
|
`define L3STAGE_1_CYCLES 1
|
||||||
// Function ID
|
// Function ID
|
||||||
`define L3FUNC_ID 3
|
`define L3FUNC_ID 3
|
||||||
|
|
||||||
@@ -379,7 +379,7 @@
|
|||||||
`define L3FFSQ_SIZE 8
|
`define L3FFSQ_SIZE 8
|
||||||
|
|
||||||
// Fill Invalidator Size {Fill invalidator must be active}
|
// Fill Invalidator Size {Fill invalidator must be active}
|
||||||
`define L3FILL_INVALIDAOR_SIZE 16
|
`define L3FILL_INVALIDAOR_SIZE 0
|
||||||
|
|
||||||
// Dram knobs
|
// Dram knobs
|
||||||
`define L3SIMULATED_DRAM_LATENCY_CYCLES 10
|
`define L3SIMULATED_DRAM_LATENCY_CYCLES 10
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
`ifndef VX_DEFINE_SYNTH
|
`ifndef VX_DEFINE_SYNTH
|
||||||
`define VX_DEFINE_SYNTH
|
`define VX_DEFINE_SYNTH
|
||||||
|
|
||||||
`define NT 4
|
`define NT 8
|
||||||
`define NW 8
|
`define NW 8
|
||||||
`define NUMBER_CORES_PER_CLUSTER 2
|
`define NUMBER_CORES_PER_CLUSTER 1
|
||||||
`define NUMBER_CLUSTERS 1
|
`define NUMBER_CLUSTERS 1
|
||||||
`define DCACHE_SIZE_BYTES 4096
|
`define DCACHE_SIZE_BYTES 4096
|
||||||
`define ICACHE_SIZE_BYTES 1024
|
`define ICACHE_SIZE_BYTES 1024
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
module VX_generic_register
|
module VX_generic_register
|
||||||
#( parameter N = 1)
|
#( parameter N = 1, parameter Valid = 1)
|
||||||
(
|
(
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
@@ -10,6 +10,12 @@ module VX_generic_register
|
|||||||
output wire[(N-1):0] out
|
output wire[(N-1):0] out
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (Valid == 0) begin
|
||||||
|
|
||||||
|
assign out = in;
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
|
||||||
reg[(N-1):0] value;
|
reg[(N-1):0] value;
|
||||||
|
|
||||||
always @(posedge clk or posedge reset) begin
|
always @(posedge clk or posedge reset) begin
|
||||||
@@ -24,4 +30,6 @@ module VX_generic_register
|
|||||||
|
|
||||||
assign out = value;
|
assign out = value;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -13,10 +13,11 @@ module VX_gpgpu_inst (
|
|||||||
wire is_split = (VX_gpu_inst_req.is_split);
|
wire is_split = (VX_gpu_inst_req.is_split);
|
||||||
|
|
||||||
wire[`NT_M1:0] tmc_new_mask;
|
wire[`NT_M1:0] tmc_new_mask;
|
||||||
|
wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0];
|
||||||
genvar curr_t;
|
genvar curr_t;
|
||||||
generate
|
generate
|
||||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init
|
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||||
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
|
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
@@ -32,11 +33,12 @@ module VX_gpgpu_inst (
|
|||||||
|
|
||||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||||
|
wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0];
|
||||||
wire[`NW-1:0] wspawn_new_active;
|
wire[`NW-1:0] wspawn_new_active;
|
||||||
genvar curr_w;
|
genvar curr_w;
|
||||||
generate
|
generate
|
||||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init
|
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||||
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
|
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|||||||
@@ -218,9 +218,11 @@ module VX_warp_scheduler (
|
|||||||
// Lock/Release
|
// Lock/Release
|
||||||
if (scheduled_warp && !stall) begin
|
if (scheduled_warp && !stall) begin
|
||||||
warp_lock[warp_num] <= 1'b1;
|
warp_lock[warp_num] <= 1'b1;
|
||||||
|
// warp_lock <= {`NW{1'b1}};
|
||||||
end
|
end
|
||||||
if (|icache_stage_valids && !stall) begin
|
if (|icache_stage_valids && !stall) begin
|
||||||
warp_lock[icache_stage_wid] <= 1'b0;
|
warp_lock[icache_stage_wid] <= 1'b0;
|
||||||
|
// warp_lock <= {`NW{1'b0}};
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
@@ -292,7 +294,7 @@ module VX_warp_scheduler (
|
|||||||
|
|
||||||
assign hazard = (should_jal || should_bra) && schedule;
|
assign hazard = (should_jal || should_bra) && schedule;
|
||||||
|
|
||||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule];
|
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
||||||
|
|
||||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
|
|
||||||
COMP = riscv32-unknown-elf-gcc
|
COMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-gcc
|
||||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld -ffreestanding -nostdlib
|
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld -ffreestanding -nostdlib
|
||||||
|
|
||||||
DMP = riscv32-unknown-elf-objdump
|
DMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-objdump
|
||||||
CPY = riscv32-unknown-elf-objcopy
|
CPY = /opt/riscv-new/drops/bin/riscv32-unknown-elf-objcopy
|
||||||
|
|
||||||
|
|
||||||
NEWLIB = ../../newlib/newlib.c
|
NEWLIB = ../../newlib/newlib.c
|
||||||
@@ -13,7 +13,7 @@ VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
|||||||
VX_API = ../../vx_api/vx_api.c
|
VX_API = ../../vx_api/vx_api.c
|
||||||
VX_TEST = ../../tests/tests.c
|
VX_TEST = ../../tests/tests.c
|
||||||
VX_FIO = ../../fileio/fileio.s
|
VX_FIO = ../../fileio/fileio.s
|
||||||
LIBS = ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
LIBS = /opt/riscv-new/drops/riscv32-unknown-elf/lib/libc.a /opt/riscv-new/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||||
|
|
||||||
VX_MAIN = vx_simple_main
|
VX_MAIN = vx_simple_main
|
||||||
|
|
||||||
|
|||||||
@@ -52,6 +52,18 @@ int main()
|
|||||||
// Main is called with all threads active of warp 0
|
// Main is called with all threads active of warp 0
|
||||||
vx_tmc(1);
|
vx_tmc(1);
|
||||||
|
|
||||||
|
vx_print_str("Let's start...\n");
|
||||||
|
unsigned what[36];
|
||||||
|
for (int i = 0; i < 36; i++)
|
||||||
|
{
|
||||||
|
what[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 36; i++)
|
||||||
|
{
|
||||||
|
vx_printf("Value: ", what[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
vx_print_str("Simple Main\n");
|
vx_print_str("Simple Main\n");
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -20,22 +20,22 @@ _start:
|
|||||||
# Initialize SP
|
# Initialize SP
|
||||||
# la sp, __stack_top
|
# la sp, __stack_top
|
||||||
la a1, vx_set_sp
|
la a1, vx_set_sp
|
||||||
li a0, 32
|
li a0, 4
|
||||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||||
jal vx_set_sp
|
jal vx_set_sp
|
||||||
li a0, 1
|
# li a0, 1
|
||||||
.word 0x0005006b # tmc 1
|
# .word 0x0005006b # tmc 1
|
||||||
# Initialize global pointerp
|
# Initialize global pointerp
|
||||||
# call __cxx_global_var_init
|
# call __cxx_global_var_init
|
||||||
# Clear the bss segment
|
# Clear the bss segment
|
||||||
la a0, _edata
|
# la a0, _edata
|
||||||
la a2, _end
|
# la a2, _end
|
||||||
sub a2, a2, a0
|
# sub a2, a2, a0
|
||||||
li a1, 0
|
# li a1, 0
|
||||||
call memset
|
# call memset
|
||||||
la a0, __libc_fini_array # Register global termination functions
|
# la a0, __libc_fini_array # Register global termination functions
|
||||||
call atexit # to be called upon exit
|
# call atexit # to be called upon exit
|
||||||
call __libc_init_array # Run global initialization functions
|
# call __libc_init_array # Run global initialization functions
|
||||||
# li a0, 4
|
# li a0, 4
|
||||||
# .word 0x0005006b # tmc 4
|
# .word 0x0005006b # tmc 4
|
||||||
call main
|
call main
|
||||||
@@ -46,7 +46,7 @@ _start:
|
|||||||
.type vx_set_sp, @function
|
.type vx_set_sp, @function
|
||||||
.global vx_set_sp
|
.global vx_set_sp
|
||||||
vx_set_sp:
|
vx_set_sp:
|
||||||
li a0, 32
|
li a0, 4
|
||||||
.word 0x0005006b # tmc 4
|
.word 0x0005006b # tmc 4
|
||||||
|
|
||||||
.option push
|
.option push
|
||||||
@@ -55,7 +55,7 @@ vx_set_sp:
|
|||||||
addi gp, gp, %pcrel_lo(1b)
|
addi gp, gp, %pcrel_lo(1b)
|
||||||
.option pop
|
.option pop
|
||||||
|
|
||||||
csrr a3, 0x21 # get wid
|
csrr a3, 0x22 # get wid
|
||||||
slli a3, a3, 0x1a # shift by wid
|
slli a3, a3, 0x1a # shift by wid
|
||||||
csrr a2, 0x20 # get tid
|
csrr a2, 0x20 # get tid
|
||||||
slli a1, a2, 10 # multiply tid by 1024
|
slli a1, a2, 10 # multiply tid by 1024
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ void test_tmc()
|
|||||||
vx_tmc(4);
|
vx_tmc(4);
|
||||||
|
|
||||||
unsigned tid = vx_threadID(); // Get TID
|
unsigned tid = vx_threadID(); // Get TID
|
||||||
|
|
||||||
tmc_array[tid] = tid;
|
tmc_array[tid] = tid;
|
||||||
|
|
||||||
vx_tmc(1);
|
vx_tmc(1);
|
||||||
@@ -85,6 +86,7 @@ void simple_kernel()
|
|||||||
|
|
||||||
wsapwn_arr[wid] = wid;
|
wsapwn_arr[wid] = wid;
|
||||||
|
|
||||||
|
wid = vx_warpID();
|
||||||
if (wid != 0)
|
if (wid != 0)
|
||||||
{
|
{
|
||||||
vx_tmc(0);
|
vx_tmc(0);
|
||||||
|
|||||||
Reference in New Issue
Block a user