Merge branch 'fpga_synthesis' of https://github.gatech.edu/casl/Vortex into fpga_synthesis

This commit is contained in:
Blaise Tine
2020-03-29 01:17:32 -04:00
16 changed files with 28844 additions and 28661 deletions

View File

@@ -558,7 +558,7 @@ module VX_bank
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data = readdata_st2;
wire dwbq_empty;
wire possible_fill = valid_st2 && miss_st2;
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_queue_full;
wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK;
VX_fill_invalidator #(
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
@@ -590,7 +590,7 @@ module VX_bank
);
// Enqueu in dram_fill_req
assign dram_fill_req = valid_st2 && miss_st2 && !invalidate_fill && !dram_fill_req_queue_full;
assign dram_fill_req = possible_fill && !invalidate_fill;
assign dram_because_of_snp = is_snp_st2 && valid_st2 && miss_st2;
assign dram_snp_full = snrq_full && snp_req;
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;

View File

@@ -118,6 +118,10 @@ module VX_cache_miss_resrv
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index];
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
wire update_ready = (|make_ready);
integer i;
always @(posedge clk) begin
@@ -128,8 +132,7 @@ module VX_cache_miss_resrv
addr_table <= 0;
pc_table <= 0;
end else begin
if (miss_add && enqueue_possible && (MRVQ_SIZE != 2)) begin
size <= size + 1;
if (mrvq_push) begin
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= 0;
pc_table[enqueue_index] <= miss_add_pc;
@@ -142,8 +145,7 @@ module VX_cache_miss_resrv
ready_table <= ready_table | make_ready;
end
if (miss_resrv_pop && dequeue_possible) begin
size <= size - 1;
if (mrvq_pop) begin
valid_table[dequeue_index] <= 0;
ready_table[dequeue_index] <= 0;
addr_table[dequeue_index] <= 0;
@@ -152,6 +154,16 @@ module VX_cache_miss_resrv
head_ptr <= head_ptr + 1;
end
if (!(mrvq_push && mrvq_pop)) begin
if (mrvq_push) begin
size <= size + 1;
end
if (mrvq_pop) begin
size <= size - 1;
end
end
end
end

View File

@@ -68,35 +68,21 @@ module VX_fill_invalidator
reg[FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address;
reg success_found;
reg[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] success_index;
integer curr_fill;
reg[FILL_INVALIDAOR_SIZE-1:0] matched_fill;
wire matched;
integer fi;
always @(*) begin
invalidate_fill = 0;
success_found = 0;
success_index = 0;
for (curr_fill = 0; curr_fill < FILL_INVALIDAOR_SIZE; curr_fill=curr_fill+1) begin
if (fill_addr[31:`LINE_SELECT_ADDR_START] == fills_address[curr_fill][31:`LINE_SELECT_ADDR_START]) begin
if (possible_fill && fills_active[curr_fill]) begin
invalidate_fill = 1;
end
if (success_fill) begin
success_found = 1;
success_index = curr_fill;
end
end
for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin
matched_fill[fi] = fills_active[fi] && (fills_address[fi][31:`LINE_SELECT_ADDR_START] == fill_addr[31:`LINE_SELECT_ADDR_START]);
end
end
assign matched = (|(matched_fill));
wire [(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
wire [(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
wire enqueue_found;
VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) VX_sel_bank(
.valids(~fills_active),
.index (enqueue_index),
@@ -104,6 +90,7 @@ module VX_fill_invalidator
);
assign invalidate_fill = possible_fill && matched;
always @(posedge clk) begin
@@ -111,19 +98,74 @@ module VX_fill_invalidator
fills_active <= 0;
fills_address <= 0;
end else begin
if (possible_fill && !invalidate_fill) begin
fills_active[enqueue_index] <= 1;
fills_address[enqueue_index] <= fill_addr;
end
if (success_found) begin
fills_active[success_index] <= 0;
if (possible_fill && !matched && enqueue_found) begin
fills_active [enqueue_index] <= 1;
fills_address[enqueue_index] <= fill_addr;
end else if (success_fill && matched) begin
fills_active <= fills_active & (~matched_fill);
end
end
end
// reg success_found;
// reg[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] success_index;
// integer curr_fill;
// always @(*) begin
// invalidate_fill = 0;
// success_found = 0;
// success_index = 0;
// for (curr_fill = 0; curr_fill < FILL_INVALIDAOR_SIZE; curr_fill=curr_fill+1) begin
// if (fill_addr[31:`LINE_SELECT_ADDR_START] == fills_address[curr_fill][31:`LINE_SELECT_ADDR_START]) begin
// if (possible_fill && fills_active[curr_fill]) begin
// invalidate_fill = 1;
// end
// if (success_fill) begin
// success_found = 1;
// success_index = curr_fill;
// end
// end
// end
// end
// wire [(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
// wire enqueue_found;
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) VX_sel_bank(
// .valids(~fills_active),
// .index (enqueue_index),
// .found (enqueue_found)
// );
// always @(posedge clk) begin
// if (reset) begin
// fills_active <= 0;
// fills_address <= 0;
// end else begin
// if (possible_fill && !invalidate_fill) begin
// fills_active[enqueue_index] <= 1;
// fills_address[enqueue_index] <= fill_addr;
// end
// if (success_found) begin
// fills_active[success_index] <= 0;
// end
// end
// end
end

View File

@@ -239,7 +239,7 @@ module VX_tag_data_access
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
wire force_write = real_writefill && valid_req_st1e && miss_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
wire force_write = real_writefill;
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
@@ -249,7 +249,7 @@ module VX_tag_data_access
wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill;
assign we[g] = (force_write) ? 4'b1111 :
(normal_write && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 :
(should_write && !real_writefill && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask :
@@ -277,7 +277,7 @@ module VX_tag_data_access
assign readdata_st1e = use_read_data_st1e;
assign readtag_st1e = use_read_tag_st1e;
assign fill_sent = miss_st1e;
assign fill_saw_dirty_st1e = force_write && dirty_st1e && miss_st1e;
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
assign invalidate_line = is_snp_st1e && !miss_st1e;
endmodule

View File

@@ -93,7 +93,7 @@ module VX_tag_data_structure
end
end else if (fill_sent) begin
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
// valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
end
if (invalidate) begin

View File

@@ -162,7 +162,7 @@
// Size of cache in bytes
`ifndef DCACHE_SIZE_BYTES
`define DCACHE_SIZE_BYTES 4096
`define DCACHE_SIZE_BYTES 2048
`endif
// Size of line inside a bank in bytes
@@ -219,7 +219,7 @@
// Dram Fill Rsp Queue Size
`ifndef DDFPQ_SIZE
`define DDFPQ_SIZE 2
`define DDFPQ_SIZE 32
`endif
// Snoop Req Queue
@@ -256,7 +256,7 @@
// Fill Invalidator Size {Fill invalidator must be active}
`ifndef DFILL_INVALIDAOR_SIZE
`define DFILL_INVALIDAOR_SIZE 0
`define DFILL_INVALIDAOR_SIZE 32
`endif
// Dram knobs
@@ -270,7 +270,7 @@
// Size of cache in bytes
`ifndef ICACHE_SIZE_BYTES
`define ICACHE_SIZE_BYTES 1024
`define ICACHE_SIZE_BYTES 4096
`endif
// Size of line inside a bank in bytes
@@ -327,7 +327,7 @@
// Dram Fill Rsp Queue Size
`ifndef IDFPQ_SIZE
`define IDFPQ_SIZE 2
`define IDFPQ_SIZE 32
`endif
// Snoop Req Queue
@@ -364,7 +364,7 @@
// Fill Invalidator Size {Fill invalidator must be active}
`ifndef IFILL_INVALIDAOR_SIZE
`define IFILL_INVALIDAOR_SIZE 0
`define IFILL_INVALIDAOR_SIZE 32
`endif
// Dram knobs
@@ -433,7 +433,7 @@
// Dram Fill Rsp Queue Size
`ifndef SDFPQ_SIZE
`define SDFPQ_SIZE 16
`define SDFPQ_SIZE 0
`endif
// Snoop Req Queue
@@ -470,7 +470,7 @@
// Fill Invalidator Size {Fill invalidator must be active}
`ifndef SFILL_INVALIDAOR_SIZE
`define SFILL_INVALIDAOR_SIZE 0
`define SFILL_INVALIDAOR_SIZE 32
`endif
// Dram knobs
@@ -484,7 +484,7 @@
// Size of cache in bytes
`ifndef LLCACHE_SIZE_BYTES
`define LLCACHE_SIZE_BYTES 1024
`define LLCACHE_SIZE_BYTES 4096
`endif
// Size of line inside a bank in bytes
@@ -528,22 +528,22 @@
// Core Request Queue Size
`ifndef LLREQQ_SIZE
`define LLREQQ_SIZE (2*`NUMBER_CORES_PER_CLUSTER)
`define LLREQQ_SIZE 32
`endif
// Miss Reserv Queue Knob
`ifndef LLMRVQ_SIZE
`define LLMRVQ_SIZE (`DNUMBER_BANKS*`NUMBER_CORES_PER_CLUSTER)
`define LLMRVQ_SIZE 32
`endif
// Dram Fill Rsp Queue Size
`ifndef LLDFPQ_SIZE
`define LLDFPQ_SIZE 2
`define LLDFPQ_SIZE 32
`endif
// Snoop Req Queue
`ifndef LLSNRQ_SIZE
`define LLSNRQ_SIZE 8
`define LLSNRQ_SIZE 32
`endif
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
@@ -555,7 +555,7 @@
// Dram Writeback Queue Size
`ifndef LLDWBQ_SIZE
`define LLDWBQ_SIZE 4
`define LLDWBQ_SIZE 16
`endif
// Dram Fill Req Queue Size
@@ -565,17 +565,17 @@
// Lower Level Cache Hit Queue Size
`ifndef LLLLVQ_SIZE
`define LLLLVQ_SIZE 16
`define LLLLVQ_SIZE 32
`endif
// Fill Forward SNP Queue
`ifndef LLFFSQ_SIZE
`define LLFFSQ_SIZE 8
`define LLFFSQ_SIZE 32
`endif
// Fill Invalidator Size {Fill invalidator must be active}
`ifndef LLFILL_INVALIDAOR_SIZE
`define LLFILL_INVALIDAOR_SIZE 0
`define LLFILL_INVALIDAOR_SIZE 32
`endif
// Dram knobs
@@ -589,7 +589,7 @@
// Size of cache in bytes
`ifndef L3CACHE_SIZE_BYTES
`define L3CACHE_SIZE_BYTES 1024
`define L3CACHE_SIZE_BYTES 8192
`endif
// Size of line inside a bank in bytes
@@ -633,22 +633,22 @@
// Core Request Queue Size
`ifndef L3REQQ_SIZE
`define L3REQQ_SIZE (`NT*`NW*`NUMBER_CLUSTERS)
`define L3REQQ_SIZE 32
`endif
// Miss Reserv Queue Knob
`ifndef L3MRVQ_SIZE
`define L3MRVQ_SIZE `LLREQQ_SIZE
`define L3MRVQ_SIZE `L3REQQ_SIZE
`endif
// Dram Fill Rsp Queue Size
`ifndef L3DFPQ_SIZE
`define L3DFPQ_SIZE 2
`define L3DFPQ_SIZE 32
`endif
// Snoop Req Queue
`ifndef L3SNRQ_SIZE
`define L3SNRQ_SIZE 8
`define L3SNRQ_SIZE 32
`endif
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
@@ -660,7 +660,7 @@
// Dram Writeback Queue Size
`ifndef L3DWBQ_SIZE
`define L3DWBQ_SIZE 4
`define L3DWBQ_SIZE 16
`endif
// Dram Fill Req Queue Size
@@ -680,7 +680,7 @@
// Fill Invalidator Size {Fill invalidator must be active}
`ifndef L3FILL_INVALIDAOR_SIZE
`define L3FILL_INVALIDAOR_SIZE 0
`define L3FILL_INVALIDAOR_SIZE 32
`endif
// Dram knobs

View File

@@ -98,5 +98,8 @@ module VX_fetch (
assign fe_inst_meta_fi.instruction = 32'h0;
assign fe_inst_meta_fi.inst_pc = warp_pc;
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
endmodule

View File

@@ -31,29 +31,28 @@ module Vortex
input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
// DRAM Icache Req
output wire I_dram_req,
output wire I_dram_req_write,
output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size,
output wire [31:0] I_dram_req_data[`IBANK_LINE_SIZE_RNG],
output wire [31:0] I_dram_expected_lat,
output wire I_dram_req,
output wire I_dram_req_write,
output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data,
output wire [31:0] I_dram_expected_lat,
// DRAM Icache Res
output wire I_dram_fill_accept,
input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr,
input wire [31:0] I_dram_fill_rsp_data[`IBANK_LINE_SIZE_RNG],
output wire I_dram_fill_accept,
input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data,
// Dcache Snooping
// LLC Snooping
input wire snp_req,
input wire [31:0] snp_req_addr,
output wire snp_req_delay,
// Icache Snooping
input wire I_snp_req,
input wire [31:0] I_snp_req_addr,
output wire I_snp_req_delay,
input wire I_snp_req,
input wire [31:0] I_snp_req_addr,
output wire I_snp_req_delay,
output wire out_ebreak

View File

@@ -3,10 +3,11 @@
#define NUM_TESTS 46
int main(int argc, char **argv)
{
Verilated::commandArgs(argc, argv);
#define ALL_TESTS
// #define ALL_TESTS
#ifdef ALL_TESTS
bool passed = true;
std::string tests[NUM_TESTS] = {

View File

@@ -9,7 +9,7 @@ int main(int argc, char **argv)
Verilated::commandArgs(argc, argv);
#define ALL_TESTS
// #define ALL_TESTS
#ifdef ALL_TESTS
bool passed = true;

View File

@@ -23,6 +23,7 @@ be:
.global vx_printc
vx_printc:
la t0, print_addr
lw t0, 0(t0)
sw a1, 0(t0)
ret

View File

@@ -55,14 +55,33 @@ int main()
vx_print_str("Let's start... (This might take a while)\n");
unsigned what[36];
bool passed = true;
for (int i = 0; i < 36; i++)
{
what[i] = i;
// vx_print_hex(i);
// vx_printf(": ", what[i]);
if (what[i] != i)
{
passed = false;
vx_printf("T1 Fail On ", i);
}
}
for (int i = 0; i < 36; i++)
{
vx_printf("Value: ", what[i]);
// vx_print_hex(i);
// vx_printf(": ", what[i]);
if (what[i] != i)
{
passed = false;
vx_printf("T2 Fail on ", i);
}
}
if (passed)
{
vx_print_str("Wr->read and repeat(Wr) tests passed!\n");
}
@@ -80,8 +99,8 @@ int main()
// Test wspawn
// vx_print_str("test_wspawn\n");
// test_wsapwn();
vx_print_str("test_wspawn\n");
test_wsapwn();
vx_print_str("Shared Memory test\n");
unsigned * ptr = (unsigned *) 0xFFFF0000;
@@ -99,31 +118,34 @@ int main()
}
// vx_print_str("vx_spawnWarps mat_add_kernel\n");
vx_print_str("vx_spawnWarps mat_add_kernel\n");
// mat_add_args_t arguments;
// arguments.x = x;
// arguments.y = y;
// arguments.z = z;
// arguments.numColums = 4;
// arguments.numRows = 4;
mat_add_args_t arguments;
arguments.x = x;
arguments.y = y;
arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
// int numWarps = 4;
// int numThreads = 4;
int numWarps = 4;
int numThreads = 4;
// vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
// for (int i = 0; i < numWarps; i++)
// {
// for (int j = 0; j < numThreads; j++)
// {
// unsigned index = (i * arguments.numColums) + j;
// vx_print_hex(z[index]);
// vx_print_str(" ");
// }
// vx_print_str("\n");
// }
vx_print_str("Waiting to ensure other warps are done... (Takes a while)\n");
for (int i = 0; i < 5000; i++) {}
for (int i = 0; i < numWarps; i++)
{
for (int j = 0; j < numThreads; j++)
{
unsigned index = (i * arguments.numColums) + j;
vx_print_hex(z[index]);
vx_print_str(" ");
}
vx_print_str("\n");
}
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -100,6 +100,8 @@ void test_wsapwn()
vx_wspawn(4, func_ptr);
simple_kernel();
for (int i = 0; i < 100; i++) {}
vx_print_hex(wsapwn_arr[0]);
vx_print_str("\n");
vx_print_hex(wsapwn_arr[1]);