round robin warp scheduling

This commit is contained in:
Richard Yan
2024-04-16 23:03:00 -07:00
parent 7ae54bd280
commit 8de5470da4
4 changed files with 91 additions and 30 deletions

View File

@@ -1,5 +1,5 @@
// auto-generated by gen_config.py. DO NOT EDIT // auto-generated by gen_config.py. DO NOT EDIT
// Generated at 2024-01-04 01:43:02.432130 // Generated at 2024-04-08 12:40:13.594321
// Translated from ./rtl/VX_config.vh: // Translated from ./rtl/VX_config.vh:
@@ -84,15 +84,15 @@
#endif #endif
#ifndef NUM_CORES #ifndef NUM_CORES
#define NUM_CORES 1 #define NUM_CORES 2
#endif #endif
#ifndef NUM_WARPS #ifndef NUM_WARPS
#define NUM_WARPS 4 #define NUM_WARPS 8
#endif #endif
#ifndef NUM_THREADS #ifndef NUM_THREADS
#define NUM_THREADS 4 #define NUM_THREADS 8
#endif #endif
#ifndef NUM_BARRIERS #ifndef NUM_BARRIERS
@@ -141,6 +141,18 @@
#endif #endif
#endif #endif
#ifdef L2_ENABLE
#define L2_LINE_SIZE MEM_BLOCK_SIZE
#else
#define L2_LINE_SIZE L1_LINE_SIZE
#endif
#ifdef L3_ENABLE
#define L3_LINE_SIZE MEM_BLOCK_SIZE
#else
#define L3_LINE_SIZE L2_LINE_SIZE
#endif
#ifdef XLEN_64 #ifdef XLEN_64
#ifndef STARTUP_ADDR #ifndef STARTUP_ADDR
@@ -168,7 +180,7 @@
#endif #endif
#ifndef SMEM_LOG_SIZE #ifndef SMEM_LOG_SIZE
#define SMEM_LOG_SIZE 14 #define SMEM_LOG_SIZE 15
#endif #endif
#ifndef IO_BASE_ADDR #ifndef IO_BASE_ADDR
@@ -196,13 +208,21 @@
#define STALL_TIMEOUT (100000 * (1 ** (L2_ENABLED + L3_ENABLED))) #define STALL_TIMEOUT (100000 * (1 ** (L2_ENABLED + L3_ENABLED)))
#endif #endif
#ifndef SV_DPI
#define DPI_DISABLE
#endif
#ifndef FPU_FPNEW #ifndef FPU_FPNEW
#ifndef FPU_DSP #ifndef FPU_DSP
#ifndef FPU_DPI #ifndef FPU_DPI
#ifdef SYNTHESIS #ifndef SYNTHESIS
#define FPU_DSP #ifndef DPI_DISABLE
#else
#define FPU_DPI #define FPU_DPI
#else
#define FPU_DSP
#endif
#else
#define FPU_DSP
#endif #endif
#endif #endif
#endif #endif
@@ -228,18 +248,18 @@
// Number of ALU units // Number of ALU units
#ifndef NUM_ALU_LANES #ifndef NUM_ALU_LANES
#define NUM_ALU_LANES UP(NUM_THREADS / 2) #define NUM_ALU_LANES NUM_THREADS
#endif #endif
#ifndef NUM_ALU_BLOCKS #ifndef NUM_ALU_BLOCKS
#define NUM_ALU_BLOCKS UP(ISSUE_WIDTH / 1) #define NUM_ALU_BLOCKS ISSUE_WIDTH
#endif #endif
// Number of FPU units // Number of FPU units
#ifndef NUM_FPU_LANES #ifndef NUM_FPU_LANES
#define NUM_FPU_LANES UP(NUM_THREADS / 2) #define NUM_FPU_LANES NUM_THREADS
#endif #endif
#ifndef NUM_FPU_BLOCKS #ifndef NUM_FPU_BLOCKS
#define NUM_FPU_BLOCKS UP(ISSUE_WIDTH / 1) #define NUM_FPU_BLOCKS ISSUE_WIDTH
#endif #endif
// Number of LSU units // Number of LSU units
@@ -254,16 +274,19 @@
// Size of Instruction Buffer // Size of Instruction Buffer
#ifndef IBUF_SIZE #ifndef IBUF_SIZE
#define IBUF_SIZE (2 * (NUM_WARPS / ISSUE_WIDTH)) #define IBUF_SIZE (8 * (NUM_WARPS / ISSUE_WIDTH))
#endif #endif
// Size of LSU Request Queue // Size of LSU Request Queue
#ifndef LSUQ_SIZE #ifndef LSUQ_SIZE
#define LSUQ_SIZE (2 * (NUM_THREADS / NUM_LSU_LANES)) #define LSUQ_SIZE (8 * (NUM_THREADS / NUM_LSU_LANES))
#endif #endif
// LSU Duplicate Address Check // LSU Duplicate Address Check
#ifdef LSU_DUP #ifndef LSU_DUP_DISABLE
#define LSU_DUP_ENABLE
#endif
#ifdef LSU_DUP_ENABLE
#define LSU_DUP_ENABLED 1 #define LSU_DUP_ENABLED 1
#else #else
#define LSU_DUP_ENABLED 0 #define LSU_DUP_ENABLED 0
@@ -290,8 +313,8 @@
// Floating-Point Units /////////////////////////////////////////////////////// // Floating-Point Units ///////////////////////////////////////////////////////
// Size of FPU Request Queue // Size of FPU Request Queue
#ifndef FPU_REQ_QUEUE_SIZE #ifndef FPUQ_SIZE
#define FPU_REQ_QUEUE_SIZE (2 * (NUM_THREADS / NUM_FPU_LANES)) #define FPUQ_SIZE (2 * (NUM_THREADS / NUM_FPU_LANES))
#endif #endif
// FNCP Latency // FNCP Latency
@@ -382,7 +405,7 @@
// Number of Cache Units // Number of Cache Units
#ifndef NUM_ICACHES #ifndef NUM_ICACHES
#define NUM_ICACHES UP(NUM_CORES / 4) #define NUM_ICACHES UP(SOCKET_SIZE / 4)
#endif #endif
// Cache Size // Cache Size
@@ -412,7 +435,7 @@
// Number of Associative Ways // Number of Associative Ways
#ifndef ICACHE_NUM_WAYS #ifndef ICACHE_NUM_WAYS
#define ICACHE_NUM_WAYS 2 #define ICACHE_NUM_WAYS 1
#endif #endif
// Dcache Configurable Knobs ////////////////////////////////////////////////// // Dcache Configurable Knobs //////////////////////////////////////////////////
@@ -431,7 +454,7 @@
// Number of Cache Units // Number of Cache Units
#ifndef NUM_DCACHES #ifndef NUM_DCACHES
#define NUM_DCACHES UP(NUM_CORES / 4) #define NUM_DCACHES UP(SOCKET_SIZE / 4)
#endif #endif
// Cache Size // Cache Size
@@ -441,7 +464,7 @@
// Number of Banks // Number of Banks
#ifndef DCACHE_NUM_BANKS #ifndef DCACHE_NUM_BANKS
#define DCACHE_NUM_BANKS (NUM_LSU_LANES) #define DCACHE_NUM_BANKS MIN(NUM_LSU_LANES, 4)
#endif #endif
// Core Response Queue Size // Core Response Queue Size
@@ -466,7 +489,7 @@
// Number of Associative Ways // Number of Associative Ways
#ifndef DCACHE_NUM_WAYS #ifndef DCACHE_NUM_WAYS
#define DCACHE_NUM_WAYS 2 #define DCACHE_NUM_WAYS 1
#endif #endif
// SM Configurable Knobs ////////////////////////////////////////////////////// // SM Configurable Knobs //////////////////////////////////////////////////////
@@ -525,7 +548,7 @@
// Number of Associative Ways // Number of Associative Ways
#ifndef L2_NUM_WAYS #ifndef L2_NUM_WAYS
#define L2_NUM_WAYS 4 #define L2_NUM_WAYS 2
#endif #endif
// L3cache Configurable Knobs ///////////////////////////////////////////////// // L3cache Configurable Knobs /////////////////////////////////////////////////

View File

@@ -274,7 +274,7 @@
// Size of LSU Request Queue // Size of LSU Request Queue
`ifndef LSUQ_SIZE `ifndef LSUQ_SIZE
`define LSUQ_SIZE (8 * (`NUM_THREADS / `NUM_LSU_LANES)) `define LSUQ_SIZE (2 * `NUM_WARPS * (`NUM_THREADS / `NUM_LSU_LANES))
`endif `endif
// LSU Duplicate Address Check // LSU Duplicate Address Check

View File

@@ -308,10 +308,11 @@ module VX_schedule import VX_gpu_pkg::*; #(
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls); wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
VX_lzc #( VX_lzc_rr #(
.N (`NUM_WARPS), .N (`NUM_WARPS)
.REVERSE (1)
) wid_select ( ) wid_select (
.clk (clk),
.reset (reset),
.data_in (ready_warps), .data_in (ready_warps),
.data_out (schedule_wid), .data_out (schedule_wid),
.valid_out (schedule_valid) .valid_out (schedule_valid)

View File

@@ -21,7 +21,7 @@ module VX_lzc #(
) ( ) (
input wire [N-1:0] data_in, input wire [N-1:0] data_in,
output wire [LOGN-1:0] data_out, output wire [LOGN-1:0] data_out,
output wire valid_out output logic valid_out
); );
if (N == 1) begin if (N == 1) begin
@@ -33,11 +33,11 @@ module VX_lzc #(
end else begin end else begin
wire [N-1:0][LOGN-1:0] indices; wire [N-1:0][LOGN-1:0] indices;
for (genvar i = 0; i < N; ++i) begin for (genvar i = 0; i < N; ++i) begin
assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i); assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i);
end end
VX_find_first #( VX_find_first #(
.N (N), .N (N),
.DATAW (LOGN), .DATAW (LOGN),
@@ -51,5 +51,42 @@ module VX_lzc #(
end end
endmodule
module VX_lzc_rr #(
parameter N = 2
) (
input wire clk,
input wire reset,
input wire [N-1:0] data_in,
output logic [$clog2(N)-1:0] data_out,
output logic valid_out
);
logic [$clog2(N)-1:0] current_idx;
always @(*) begin
integer i;
data_out = 0;
for (i = 0; i < N; i += 1) begin
if (data_in[(current_idx + i) % N] == 1'b1) begin
data_out = (current_idx + i) % N;
break;
end
end
end
assign valid_out = |data_in;
always @(posedge clk) begin
if (reset) begin
current_idx <= 0;
end else begin
if (valid_out) begin
current_idx = (current_idx + 1) % N;
end
end
end
endmodule endmodule
`TRACING_ON `TRACING_ON