From fd65ed95eb38196c5b1f06f7db92447a985aff51 Mon Sep 17 00:00:00 2001 From: Shinnung Jeong Date: Tue, 30 Jan 2024 20:45:47 -0500 Subject: [PATCH 01/12] fix bug to access memory address in simx --- sim/simx/exe_unit.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/simx/exe_unit.cpp b/sim/simx/exe_unit.cpp index 4b5cb356..67834939 100644 --- a/sim/simx/exe_unit.cpp +++ b/sim/simx/exe_unit.cpp @@ -207,7 +207,7 @@ void LsuUnit::tick() { for (uint32_t t = 1; t < num_lanes_; ++t) { if (!trace->tmask.test(t0 + t)) continue; - auto mem_addr = trace_data->mem_addrs.at(t).addr & ~addr_mask; + auto mem_addr = trace_data->mem_addrs.at(t + t0).addr & ~addr_mask; matches += (addr0 == mem_addr); } #ifdef LSU_DUP_ENABLE @@ -229,7 +229,7 @@ void LsuUnit::tick() { continue; auto& dcache_req_port = core_->smem_demuxs_.at(t)->ReqIn; - auto mem_addr = trace_data->mem_addrs.at(t); + auto mem_addr = trace_data->mem_addrs.at(t + t0); auto type = core_->get_addr_type(mem_addr.addr); MemReq mem_req; @@ -324,4 +324,4 @@ void SfuUnit::tick() { break; // single block } ++input_idx_; -} \ No newline at end of file +} From e2d1387df805d6a0bcfdfe7f4983b39d51b7429f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 31 Jan 2024 00:39:37 -0800 Subject: [PATCH 02/12] elastic buffers classification --- hw/rtl/libs/VX_bypass_buffer.sv | 31 +++++--- hw/rtl/libs/VX_elastic_buffer.sv | 35 ++++----- hw/rtl/libs/VX_pipe_buffer.sv | 63 +++++++++++++++ hw/rtl/libs/VX_skid_buffer.sv | 131 +++++++------------------------ hw/rtl/libs/VX_stream_buffer | 128 ++++++++++++++++++++++++++++++ hw/rtl/libs/VX_toggle_buffer.sv | 70 +++++++++++++++++ 6 files changed, 323 insertions(+), 135 deletions(-) create mode 100644 hw/rtl/libs/VX_pipe_buffer.sv create mode 100644 hw/rtl/libs/VX_stream_buffer create mode 100644 hw/rtl/libs/VX_toggle_buffer.sv diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 7e723a45..4eefce44 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -11,6 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +// A bypass elastic buffer operates at full bandwidth where pop can happen if the buffer is empty but is going full +// It has the following benefits: +// + Full-bandwidth throughput +// + use only one register for storage +// It has the following limitations: +// + data_out is not registered +// + ready_in and ready_out are coupled + `include "VX_platform.vh" `TRACING_OFF @@ -35,30 +43,27 @@ module VX_bypass_buffer #( assign data_out = data_in; end else begin reg [DATAW-1:0] buffer; - reg buffer_valid; + reg has_data; always @(posedge clk) begin if (reset) begin - buffer_valid <= 0; + has_data <= 0; end else begin if (ready_out) begin - buffer_valid <= 0; - end - if (valid_in && ~ready_out) begin - `ASSERT(!buffer_valid, ("runtime error")); - buffer_valid <= 1; + has_data <= 0; + end else if (~has_data) begin + has_data <= valid_in; end end - - if (valid_in && ~ready_out) begin + if (~has_data) begin buffer <= data_in; end end - assign ready_in = ready_out || !buffer_valid; - assign data_out = buffer_valid ? buffer : data_in; - assign valid_out = valid_in || buffer_valid; + assign ready_in = ready_out || ~has_data; + assign data_out = has_data ? buffer : data_in; + assign valid_out = valid_in || has_data; end endmodule -`TRACING_ON \ No newline at end of file +`TRACING_ON diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 8cd8a3ab..c6af5197 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -42,34 +42,33 @@ module VX_elastic_buffer #( end else if (SIZE == 1) begin - wire stall = valid_out && ~ready_out; - - VX_pipe_register #( - .DATAW (1 + DATAW), - .RESETW (1) - ) pipe_register ( - .clk (clk), - .reset (reset), - .enable (~stall), - .data_in ({valid_in, data_in}), - .data_out ({valid_out, data_out}) + VX_pipe_buffer #( + .DATAW (DATAW) + ) pipe_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in), + .data_in (data_in), + .ready_in (ready_in), + .valid_out (valid_out), + .data_out (data_out), + .ready_out (ready_out) ); - assign ready_in = ~stall; - end else if (SIZE == 2) begin VX_skid_buffer #( .DATAW (DATAW), + .FULL_BW (OUT_REG != 2), .OUT_REG (OUT_REG) ) skid_buffer ( .clk (clk), .reset (reset), - .valid_in (valid_in), + .valid_in (valid_in), + .data_in (data_in), .ready_in (ready_in), - .data_in (data_in), - .data_out (data_out), .valid_out (valid_out), + .data_out (data_out), .ready_out (ready_out) ); @@ -111,10 +110,10 @@ module VX_elastic_buffer #( .clk (clk), .reset (reset), .valid_in (~empty), - .ready_in (ready_out_t), .data_in (data_out_t), - .data_out (data_out), + .ready_in (ready_out_t), .valid_out (valid_out), + .data_out (data_out), .ready_out (ready_out) ); diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv new file mode 100644 index 00000000..dfdbc43c --- /dev/null +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -0,0 +1,63 @@ +// Copyright 2024 blaise +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A pipelined elastic buffer operates at full bandwidth where push can happen if the buffer is not empty but is going empty +// It has the following benefits: +// + Full-bandwidth throughput +// + use only one register for storage +// + data_out is fully registered +// It has the following limitations: +// + ready_in and ready_out are coupled + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_pipe_buffer #( + parameter DATAW = 1, + parameter PASSTHRU = 0 +) ( + input wire clk, + input wire reset, + input wire valid_in, + output wire ready_in, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out, + input wire ready_out, + output wire valid_out +); + if (PASSTHRU != 0) begin + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + end else begin + wire stall = valid_out && ~ready_out; + + VX_pipe_register #( + .DATAW (1 + DATAW), + .RESETW (1) + ) pipe_register ( + .clk (clk), + .reset (reset), + .enable (~stall), + .data_in ({valid_in, data_in}), + .data_out ({valid_out, data_out}) + ); + + assign ready_in = ~stall; + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_skid_buffer.sv b/hw/rtl/libs/VX_skid_buffer.sv index a6876f5c..4816c1c2 100644 --- a/hw/rtl/libs/VX_skid_buffer.sv +++ b/hw/rtl/libs/VX_skid_buffer.sv @@ -17,6 +17,7 @@ module VX_skid_buffer #( parameter DATAW = 32, parameter PASSTHRU = 0, + parameter FULL_BW = 0, parameter OUT_REG = 0 ) ( input wire clk, @@ -30,8 +31,6 @@ module VX_skid_buffer #( input wire ready_out, output wire valid_out ); - `STATIC_ASSERT ((OUT_REG <= 2), ("invalid parameter")) - if (PASSTHRU != 0) begin `UNUSED_VAR (clk) @@ -41,112 +40,36 @@ module VX_skid_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (OUT_REG == 0) begin + end else if (FULL_BW != 0) begin - reg [1:0][DATAW-1:0] shift_reg; - reg valid_out_r, ready_in_r, rd_ptr_r; - - wire push = valid_in && ready_in; - wire pop = valid_out_r && ready_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - ready_in_r <= 1; - rd_ptr_r <= 1; - end else begin - if (push) begin - if (!pop) begin - ready_in_r <= rd_ptr_r; - valid_out_r <= 1; - end - end else if (pop) begin - ready_in_r <= 1; - valid_out_r <= rd_ptr_r; - end - rd_ptr_r <= rd_ptr_r ^ (push ^ pop); - end - end - - always @(posedge clk) begin - if (push) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end - end - - assign ready_in = ready_in_r; - assign valid_out = valid_out_r; - assign data_out = shift_reg[rd_ptr_r]; - - end else if (OUT_REG == 1) begin - - // Full-bandwidth operation: input is consummed every cycle. - // However, data_out register has an additional multiplexer. - - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg use_buffer; - - wire push = valid_in && ready_in; - wire stall_out = valid_out_r && ~ready_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - use_buffer <= 0; - end else begin - if (ready_out) begin - use_buffer <= 0; - end else if (valid_in && valid_out) begin - use_buffer <= 1; - end - if (~stall_out) begin - valid_out_r <= valid_in || use_buffer; - end - end - end - - always @(posedge clk) begin - if (push) begin - buffer <= data_in; - end - if (~stall_out) begin - data_out_r <= use_buffer ? buffer : data_in; - end - end - - assign ready_in = ~use_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; + VX_stream_buffer #( + .DATAW (DATAW), + .OUT_REG (OUT_REG) + ) stream_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in), + .data_in (data_in), + .ready_in (ready_in), + .valid_out (valid_out), + .data_out (data_out), + .ready_out (ready_out) + ); end else begin - // Half-bandwidth operation: input is consummed every other cycle. - // However, data_out register has no additional multiplexer. - - reg [DATAW-1:0] data_out_r; - reg has_data; - - always @(posedge clk) begin - if (reset) begin - has_data <= 0; - end else begin - if (~has_data) begin - has_data <= valid_in; - end else if (ready_out) begin - has_data <= 0; - end - end - if (~has_data) begin - data_out_r <= data_in; - end - end - - assign ready_in = ~has_data; - assign valid_out = has_data; - assign data_out = data_out_r; + VX_toggle_buffer #( + .DATAW (DATAW) + ) toggle_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in), + .data_in (data_in), + .ready_in (ready_in), + .valid_out (valid_out), + .data_out (data_out), + .ready_out (ready_out) + ); end diff --git a/hw/rtl/libs/VX_stream_buffer b/hw/rtl/libs/VX_stream_buffer new file mode 100644 index 00000000..3bcc5d39 --- /dev/null +++ b/hw/rtl/libs/VX_stream_buffer @@ -0,0 +1,128 @@ +// Copyright 2024 blaise +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously +// It has the following benefits: +// + full-bandwidth throughput +// + ready_in and ready_out are decoupled +// + data_out can be fully registered +// It has the following limitations: +// - requires two registers for storage + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_stream_buffer #( + parameter DATAW = 1, + parameter OUT_REG = 0, + parameter PASSTHRU = 0 +) ( + input wire clk, + input wire reset, + input wire valid_in, + output wire ready_in, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out, + input wire ready_out, + output wire valid_out +); + if (PASSTHRU != 0) begin + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + end else begin + if (OUT_REG != 0) begin + + reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] buffer; + reg valid_out_r; + reg use_buffer; + + wire push = valid_in && ready_in; + wire stall_out = valid_out_r && ~ready_out; + + always @(posedge clk) begin + if (reset) begin + valid_out_r <= 0; + use_buffer <= 0; + end else begin + if (ready_out) begin + use_buffer <= 0; + end else if (valid_in && valid_out) begin + use_buffer <= 1; + end + if (~stall_out) begin + valid_out_r <= valid_in || use_buffer; + end + end + end + + always @(posedge clk) begin + if (push) begin + buffer <= data_in; + end + if (~stall_out) begin + data_out_r <= use_buffer ? buffer : data_in; + end + end + + assign ready_in = ~use_buffer; + assign valid_out = valid_out_r; + assign data_out = data_out_r; + + end else begin + + reg [1:0][DATAW-1:0] shift_reg; + reg valid_out_r, ready_in_r, rd_ptr_r; + + wire push = valid_in && ready_in; + wire pop = valid_out_r && ready_out; + + always @(posedge clk) begin + if (reset) begin + valid_out_r <= 0; + ready_in_r <= 1; + rd_ptr_r <= 1; + end else begin + if (push) begin + if (!pop) begin + ready_in_r <= rd_ptr_r; + valid_out_r <= 1; + end + end else if (pop) begin + ready_in_r <= 1; + valid_out_r <= rd_ptr_r; + end + rd_ptr_r <= rd_ptr_r ^ (push ^ pop); + end + end + + always @(posedge clk) begin + if (push) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; + end + end + + assign ready_in = ready_in_r; + assign valid_out = valid_out_r; + assign data_out = shift_reg[rd_ptr_r]; + end + end + +endmodule +`TRACING_ON + diff --git a/hw/rtl/libs/VX_toggle_buffer.sv b/hw/rtl/libs/VX_toggle_buffer.sv new file mode 100644 index 00000000..e67a7d74 --- /dev/null +++ b/hw/rtl/libs/VX_toggle_buffer.sv @@ -0,0 +1,70 @@ +// Copyright 2024 blaise +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A toggle elastic buffer operates at half-bandwidth where push can only trigger after pop +// It has the following benefits: +// + use only one register for storage +// + ready_in and ready_out are decoupled +// + data_out is fully registered +// It has the following limitations: +// - Half-bandwidth throughput + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_toggle_buffer #( + parameter DATAW = 1, + parameter PASSTHRU = 0 +) ( + input wire clk, + input wire reset, + input wire valid_in, + output wire ready_in, + input wire [DATAW-1:0] data_in, + output wire [DATAW-1:0] data_out, + input wire ready_out, + output wire valid_out +); + if (PASSTHRU != 0) begin + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + end else begin + reg [DATAW-1:0] buffer; + reg has_data; + + always @(posedge clk) begin + if (reset) begin + has_data <= 0; + end else begin + if (~has_data) begin + has_data <= valid_in; + end else if (ready_out) begin + has_data <= 0; + end + end + if (~has_data) begin + buffer <= data_in; + end + end + + assign ready_in = ~has_data; + assign valid_out = has_data; + assign data_out = buffer; + end + +endmodule +`TRACING_ON From 8ab7c590fd2c48d7929cd0a486b34886571767ac Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 31 Jan 2024 06:16:54 -0800 Subject: [PATCH 03/12] disabling fetch's deadlock check when L1 caches are present --- hw/rtl/VX_define.vh | 7 +++++++ hw/rtl/core/VX_decode.sv | 3 ++- hw/rtl/core/VX_fetch.sv | 11 +++++++---- hw/rtl/core/VX_ibuffer.sv | 3 ++- hw/rtl/interfaces/VX_decode_if.sv | 13 +++++++++---- hw/rtl/interfaces/VX_fetch_if.sv | 11 ++++++++--- 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 63f2d42d..093a5fd2 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -291,6 +291,13 @@ /////////////////////////////////////////////////////////////////////////////// +`ifdef ICACHE_ENABLE +`define L1_ENABLE +`endif +`ifdef DCACHE_ENABLE +`define L1_ENABLE +`endif + `ifdef L2_ENABLE `define L2_LINE_SIZE `MEM_BLOCK_SIZE `else diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 0032fe7b..0a6b00ec 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -533,8 +533,9 @@ module VX_decode #( assign decode_sched_if.valid = fetch_fire; assign decode_sched_if.wid = fetch_if.data.wid; assign decode_sched_if.is_wstall = is_wstall; - +`ifndef L1_ENABLE assign fetch_if.ibuf_pop = decode_if.ibuf_pop; +`endif `ifdef DBG_TRACE_CORE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index ef52ef65..db5a1d73 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -32,7 +32,6 @@ module VX_fetch import VX_gpu_pkg::*; #( ); `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) - localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH); wire icache_req_valid; wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr; @@ -44,8 +43,6 @@ module VX_fetch import VX_gpu_pkg::*; #( wire icache_req_fire = icache_req_valid && icache_req_ready; - wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid); - assign req_tag = schedule_if.data.wid; assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag; @@ -68,9 +65,12 @@ module VX_fetch import VX_gpu_pkg::*; #( .rdata ({rsp_PC, rsp_tmask}) ); +`ifndef L1_ENABLE // Ensure that the ibuffer doesn't fill up. // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request. // This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus. + wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid); + wire [`ISSUE_WIDTH-1:0] pending_ibuf_full; for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin VX_pending_size #( @@ -85,13 +85,16 @@ module VX_fetch import VX_gpu_pkg::*; #( `UNUSED_PIN (empty) ); end + wire ibuf_ready = ~pending_ibuf_full[schedule_isw]; +`else + wire ibuf_ready = 1'b1; +`endif `RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0), ("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid)) // Icache Request - wire ibuf_ready = ~pending_ibuf_full[schedule_isw]; assign icache_req_valid = schedule_if.valid && ibuf_ready; assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2]; assign icache_req_tag = {schedule_if.data.uuid, req_tag}; diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index b6847edc..b465c195 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -66,8 +66,9 @@ module VX_ibuffer import VX_gpu_pkg::*; #( .valid_out (ibuffer_if[i].valid), .ready_out(ibuffer_if[i].ready) ); - + `ifndef L1_ENABLE assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready; + `endif end endmodule diff --git a/hw/rtl/interfaces/VX_decode_if.sv b/hw/rtl/interfaces/VX_decode_if.sv index d433ca47..2a357abd 100644 --- a/hw/rtl/interfaces/VX_decode_if.sv +++ b/hw/rtl/interfaces/VX_decode_if.sv @@ -36,21 +36,26 @@ interface VX_decode_if (); logic valid; data_t data; logic ready; - - wire [`ISSUE_WIDTH-1:0] ibuf_pop; +`ifndef L1_ENABLE + logic [`ISSUE_WIDTH-1:0] ibuf_pop; +`endif modport master ( output valid, output data, - input ibuf_pop, input ready + `ifndef L1_ENABLE + , input ibuf_pop + `endif ); modport slave ( input valid, input data, - output ibuf_pop, output ready + `ifndef L1_ENABLE + , output ibuf_pop + `endif ); endinterface diff --git a/hw/rtl/interfaces/VX_fetch_if.sv b/hw/rtl/interfaces/VX_fetch_if.sv index 06b27d90..cbfecdd2 100644 --- a/hw/rtl/interfaces/VX_fetch_if.sv +++ b/hw/rtl/interfaces/VX_fetch_if.sv @@ -26,21 +26,26 @@ interface VX_fetch_if (); logic valid; data_t data; logic ready; - +`ifndef L1_ENABLE logic [`ISSUE_WIDTH-1:0] ibuf_pop; +`endif modport master ( output valid, output data, - input ibuf_pop, input ready + `ifndef L1_ENABLE + , input ibuf_pop + `endif ); modport slave ( input valid, input data, - output ibuf_pop, output ready + `ifndef L1_ENABLE + , output ibuf_pop + `endif ); endinterface From f9cd8be19efd72255870743f12e434e3fe3d5f4c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 31 Jan 2024 13:35:43 -0800 Subject: [PATCH 04/12] minor update --- hw/rtl/libs/{VX_stream_buffer => VX_stream_buffer.sv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename hw/rtl/libs/{VX_stream_buffer => VX_stream_buffer.sv} (100%) diff --git a/hw/rtl/libs/VX_stream_buffer b/hw/rtl/libs/VX_stream_buffer.sv similarity index 100% rename from hw/rtl/libs/VX_stream_buffer rename to hw/rtl/libs/VX_stream_buffer.sv From b0b7cd2b1e311858f8b7d8d5228efb35e5eeb5a4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 3 Feb 2024 19:09:53 -0800 Subject: [PATCH 05/12] minor updates --- README.md | 6 +++--- hw/rtl/VX_config.vh | 12 ++++++++++++ hw/rtl/VX_define.vh | 12 ------------ sim/simx/cluster.cpp | 8 ++++---- sim/simx/core.cpp | 2 +- sim/simx/execute.cpp | 6 +++--- sim/simx/main.cpp | 2 +- sim/simx/processor.cpp | 6 +++--- sim/simx/socket.cpp | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 0db8e2ea..bca70da0 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md). $ git clone --recursive https://github.com/vortexgpgpu/vortex.git $ cd Vortex ### Install prebuilt toolchain - By default, the toolchain will install to /opt folder. - You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools). - + By default, the toolchain will install to /opt folder which requires sudo access. + You can install the toolchain to a different location of your choice by setting TOOLDIR (e.g. export TOOLDIR=$HOME/tools). + $ export TOOLDIR=/opt $ ./ci/toolchain_install.sh --all $ source ./ci/toolchain_env.sh ### Build Vortex sources diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 23805dcf..eed3cf54 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -136,6 +136,18 @@ `endif `endif +`ifdef L2_ENABLE +`define L2_LINE_SIZE `MEM_BLOCK_SIZE +`else +`define L2_LINE_SIZE `L1_LINE_SIZE +`endif + +`ifdef L3_ENABLE +`define L3_LINE_SIZE `MEM_BLOCK_SIZE +`else +`define L3_LINE_SIZE `L2_LINE_SIZE +`endif + `ifdef XLEN_64 `ifndef STARTUP_ADDR diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 093a5fd2..996c769d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -298,18 +298,6 @@ `define L1_ENABLE `endif -`ifdef L2_ENABLE -`define L2_LINE_SIZE `MEM_BLOCK_SIZE -`else -`define L2_LINE_SIZE `L1_LINE_SIZE -`endif - -`ifdef L3_ENABLE -`define L3_LINE_SIZE `MEM_BLOCK_SIZE -`else -`define L3_LINE_SIZE `L2_LINE_SIZE -`endif - `define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE `define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE)) `define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8) diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index 3ac80cb6..c632165a 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -62,10 +62,10 @@ Cluster::Cluster(const SimContext& ctx, snprintf(sname, 100, "cluster%d-l2cache", cluster_id); l2cache_ = CacheSim::Create(sname, CacheSim::Config{ !L2_ENABLED, - log2ceil(L2_CACHE_SIZE), // C - log2ceil(MEM_BLOCK_SIZE), // L - log2ceil(L2_NUM_WAYS), // W - 0, // A + log2ceil(L2_CACHE_SIZE),// C + log2ceil(MEM_BLOCK_SIZE),// L + log2ceil(L1_LINE_SIZE), // W + log2ceil(L2_NUM_WAYS), // A log2ceil(L2_NUM_BANKS), // B XLEN, // address bits 1, // number of ports diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 1c155011..50137a9c 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -210,7 +210,7 @@ void Core::schedule() { void Core::fetch() { perf_stats_.ifetch_latency += pending_ifetches_; - // handle icache reponse + // handle icache response auto& icache_rsp_port = icache_rsp_ports.at(0); if (!icache_rsp_port.empty()){ auto& mem_rsp = icache_rsp_port.front(); diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 52a95de1..4a8033d0 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -339,7 +339,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { break; } case 1: { - // RV64I: SLLI + // RV32I: SLLI rddata[t].i = rsdata[t][0].i << immsrc; break; } @@ -360,11 +360,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { } case 5: { if (func7) { - // RV64I: SRAI + // RV32I: SRAI Word result = rsdata[t][0].i >> immsrc; rddata[t].i = result; } else { - // RV64I: SRLI + // RV32I: SRLI Word result = rsdata[t][0].u >> immsrc; rddata[t].i = result; } diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 64031bb8..b4dcf4ed 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -34,7 +34,7 @@ static void show_usage() { uint32_t num_threads = NUM_THREADS; uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; -bool showStats = false;; +bool showStats = false; bool riscv_test = false; const char* program = nullptr; diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 8e8c1062..5382263e 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -33,8 +33,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) !L3_ENABLED, log2ceil(L3_CACHE_SIZE), // C log2ceil(MEM_BLOCK_SIZE), // L - log2ceil(L3_NUM_WAYS), // W - 0, // A + log2ceil(L2_LINE_SIZE), // W + log2ceil(L3_NUM_WAYS), // A log2ceil(L3_NUM_BANKS), // B XLEN, // address bits 1, // number of ports @@ -58,7 +58,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port); } - // set up memory perf recording + // set up memory profiling memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){ __unused (cycle); perf_mem_reads_ += !req.write; diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index dd9f9697..092e89d0 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx, XLEN, // address bits 1, // number of ports 1, // number of inputs - true, // write-through + false, // write-through false, // write response (uint8_t)arch.num_warps(), // mshr 2, // pipeline latency From fe15647f98d4fab96a38a07316a26cbfc4bfa82d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 4 Feb 2024 02:11:53 -0800 Subject: [PATCH 06/12] minor update --- hw/rtl/core/VX_operands.sv | 71 +++++++++++++++++++----------------- hw/rtl/core/VX_scoreboard.sv | 62 +++++++++++++++---------------- 2 files changed, 66 insertions(+), 67 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index ee0c493b..3747502f 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -47,8 +47,6 @@ module VX_operands import VX_gpu_pkg::*; #( reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0]; reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n; - reg valid_out_r; - reg [DATAW-1:0] data_out_r; reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n; reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n; reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n; @@ -60,7 +58,7 @@ module VX_operands import VX_gpu_pkg::*; #( reg rs3_ready, rs3_ready_n; reg data_ready, data_ready_n; - wire ready_out = operands_if[i].ready; + wire stg_valid_in, stg_ready_in; wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0); wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0); @@ -85,7 +83,7 @@ module VX_operands import VX_gpu_pkg::*; #( case (state) STATE_IDLE: begin - if (valid_out_r && ready_out) begin + if (operands_if[i].valid && operands_if[i].ready) begin data_ready_n = 0; end if (scoreboard_if[i].valid && data_ready_n == 0) begin @@ -173,37 +171,15 @@ module VX_operands import VX_gpu_pkg::*; #( end always @(posedge clk) begin - if (reset) begin + if (reset) begin state <= STATE_IDLE; cache_eop <= {ISSUE_RATIO{1'b1}}; data_ready <= 0; - valid_out_r <= 0; end else begin state <= state_n; cache_eop <= cache_eop_n; - data_ready <= data_ready_n; - if (~valid_out_r) begin - valid_out_r <= scoreboard_if[i].valid && data_ready; - end else if (ready_out) begin - valid_out_r <= 0; - end + data_ready <= data_ready_n; end - - if (~valid_out_r) begin - data_out_r <= {scoreboard_if[i].data.uuid, - scoreboard_if[i].data.wis, - scoreboard_if[i].data.tmask, - scoreboard_if[i].data.PC, - scoreboard_if[i].data.wb, - scoreboard_if[i].data.ex_type, - scoreboard_if[i].data.op_type, - scoreboard_if[i].data.op_mod, - scoreboard_if[i].data.use_PC, - scoreboard_if[i].data.use_imm, - scoreboard_if[i].data.imm, - scoreboard_if[i].data.rd}; - end - gpr_rd_rid <= gpr_rd_rid_n; gpr_rd_wis <= gpr_rd_wis_n; rs2_ready <= rs2_ready_n; @@ -216,10 +192,35 @@ module VX_operands import VX_gpu_pkg::*; #( cache_data <= cache_data_n; cache_reg <= cache_reg_n; cache_tmask <= cache_tmask_n; - end + end - assign operands_if[i].valid = valid_out_r; - assign {operands_if[i].data.uuid, + assign stg_valid_in = scoreboard_if[i].valid && data_ready; + assign scoreboard_if[i].ready = stg_ready_in && data_ready; + + VX_toggle_buffer #( + .DATAW (DATAW) + ) staging_buffer ( + .clk (clk), + .reset (reset), + .valid_in (stg_valid_in), + .data_in ({ + scoreboard_if[i].data.uuid, + scoreboard_if[i].data.wis, + scoreboard_if[i].data.tmask, + scoreboard_if[i].data.PC, + scoreboard_if[i].data.wb, + scoreboard_if[i].data.ex_type, + scoreboard_if[i].data.op_type, + scoreboard_if[i].data.op_mod, + scoreboard_if[i].data.use_PC, + scoreboard_if[i].data.use_imm, + scoreboard_if[i].data.imm, + scoreboard_if[i].data.rd + }), + .ready_in (stg_ready_in), + .valid_out (operands_if[i].valid), + .data_out ({ + operands_if[i].data.uuid, operands_if[i].data.wis, operands_if[i].data.tmask, operands_if[i].data.PC, @@ -230,13 +231,15 @@ module VX_operands import VX_gpu_pkg::*; #( operands_if[i].data.use_PC, operands_if[i].data.use_imm, operands_if[i].data.imm, - operands_if[i].data.rd} = data_out_r; + operands_if[i].data.rd + }), + .ready_out (operands_if[i].ready) + ); + assign operands_if[i].data.rs1_data = rs1_data; assign operands_if[i].data.rs2_data = rs2_data; assign operands_if[i].data.rs3_data = rs3_data; - assign scoreboard_if[i].ready = ~valid_out_r && data_ready; - // GPR banks reg [RAM_ADDRW-1:0] gpr_rd_addr; diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index a4792c8d..6b806dd0 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -152,51 +152,47 @@ module VX_scoreboard import VX_gpu_pkg::*; #( assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready; `endif - reg [DATAW-1:0] data_out_r; - reg valid_out_r; - wire ready_out; + wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3}; + wire operands_ready = ~(| operands_busy); + + wire stg_valid_in, stg_ready_in; + assign stg_valid_in = ibuffer_if[i].valid && operands_ready; + assign ibuffer_if[i].ready = stg_ready_in && operands_ready; - wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3}; - wire deps_ready = (& ready_masks); - - wire valid_in = ibuffer_if[i].valid && deps_ready; - wire ready_in = ~valid_out_r && deps_ready; - wire [DATAW-1:0] data_in = ibuffer_if[i].data; - - assign ready_out = scoreboard_if[i].ready; + VX_stream_buffer #( + .DATAW (DATAW) + ) staging_buffer ( + .clk (clk), + .reset (reset), + .valid_in (stg_valid_in), + .data_in (ibuffer_if[i].data), + .ready_in (stg_ready_in), + .valid_out (scoreboard_if[i].valid), + .data_out (scoreboard_if[i].data), + .ready_out (scoreboard_if[i].ready) + ); always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; inuse_regs <= '0; end else begin if (writeback_fire) begin inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0; end - if (~valid_out_r) begin - valid_out_r <= valid_in; - end else if (ready_out) begin - if (scoreboard_if[i].data.wb) begin - inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1; - `ifdef PERF_ENABLE - inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type; - if (scoreboard_if[i].data.ex_type == `EX_SFU) begin - inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type; - end - `endif - end - valid_out_r <= 0; + if (stg_valid_in && stg_ready_in && ibuffer_if[i].data.wb) begin + inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1; end end - if (~valid_out_r) begin - data_out_r <= data_in; + `ifdef PERF_ENABLE + if (stg_valid_in && stg_ready_in && ibuffer_if[i].data.wb) begin + inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type; + if (ibuffer_if[i].data.ex_type == `EX_SFU) begin + inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type; + end end + `endif end - assign ibuffer_if[i].ready = ready_in; - assign scoreboard_if[i].valid = valid_out_r; - assign scoreboard_if[i].data = data_out_r; - `ifdef SIMULATION reg [31:0] timeout_ctr; @@ -208,7 +204,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_CORE_PIPELINE `TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr, - ~ready_masks, ibuffer_if[i].data.uuid)); + operands_busy, ibuffer_if[i].data.uuid)); `endif timeout_ctr <= timeout_ctr + 1; end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin @@ -220,7 +216,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), ("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", $time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr, - ~ready_masks, ibuffer_if[i].data.uuid)); + operands_busy, ibuffer_if[i].data.uuid)); `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0, ("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", From 6f7a389a1f642ec205401c3f54c1a4c4bdfc798e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 4 Feb 2024 20:16:18 -0800 Subject: [PATCH 07/12] arbiters unlock refactoring --- hw/rtl/cache/VX_cache_bypass.sv | 12 +++++----- hw/rtl/libs/VX_cyclic_arbiter.sv | 11 ++++------ hw/rtl/libs/VX_fair_arbiter.sv | 12 ++++------ hw/rtl/libs/VX_generic_arbiter.sv | 35 +++++++++++++++--------------- hw/rtl/libs/VX_matrix_arbiter.sv | 14 ++++++------ hw/rtl/libs/VX_mem_rsp_sel.sv | 16 ++++++++------ hw/rtl/libs/VX_priority_arbiter.sv | 13 ++--------- hw/rtl/libs/VX_rr_arbiter.sv | 26 +++++++++++----------- hw/rtl/libs/VX_stream_arb.sv | 27 +++++++++-------------- hw/rtl/libs/VX_stream_xbar.sv | 5 +---- 10 files changed, 74 insertions(+), 97 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 4a281f19..d10f47e0 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -130,20 +130,20 @@ module VX_cache_bypass #( assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs; - wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in); + wire core_req_nc_ready = ~mem_req_valid_in && mem_req_ready_out; VX_generic_arbiter #( .NUM_REQS (NUM_REQS), .TYPE (PASSTHRU ? "R" : "P"), .LOCK_ENABLE (1) - ) req_arb ( + ) core_req_nc_arb ( .clk (clk), - .reset (reset), - .unlock (core_req_in_fire), + .reset (reset), .requests (core_req_valid_in_nc), .grant_index (core_req_nc_idx), .grant_onehot (core_req_nc_sel), - .grant_valid (core_req_nc_valid) + .grant_valid (core_req_nc_valid), + .grant_unlock (core_req_nc_ready) ); assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs; @@ -164,7 +164,7 @@ module VX_cache_bypass #( end for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) + assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (core_req_nc_ready && core_req_nc_sel[i]) : core_req_ready_out[i]; end diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index cd7d91f9..63b62136 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -21,15 +21,12 @@ module VX_cyclic_arbiter #( ) ( input wire clk, input wire reset, - input wire [NUM_REQS-1:0] requests, - input wire unlock, + input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid + output wire grant_valid, + input wire grant_unlock ); - `UNUSED_PARAM (LOCK_ENABLE) - `UNUSED_VAR (unlock) - if (NUM_REQS == 1) begin `UNUSED_VAR (clk) @@ -51,7 +48,7 @@ module VX_cyclic_arbiter #( end else begin if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; - end else begin + end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); end end diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index c1b1a4b7..acc01971 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -21,17 +21,17 @@ module VX_fair_arbiter #( ) ( input wire clk, input wire reset, - input wire unlock, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid + output wire grant_valid, + input wire grant_unlock ); if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (unlock) + `UNUSED_VAR (grant_unlock) assign grant_index = '0; assign grant_onehot = requests; @@ -48,18 +48,14 @@ module VX_fair_arbiter #( always @(posedge clk) begin if (reset) begin buffer <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin buffer <= buffer_n; end end VX_priority_arbiter #( .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) ) priority_arbiter ( - .clk (clk), - .reset (reset), - .unlock (unlock), .requests (requests_qual), .grant_index (grant_index), .grant_onehot (grant_onehot), diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index adeefc7d..2b7922d9 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -21,22 +21,23 @@ module VX_generic_arbiter #( parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, - input wire unlock, + input wire reset, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid + output wire grant_valid, + input wire grant_unlock ); if (TYPE == "P") begin + `UNUSED_PARAM (LOCK_ENABLE) + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + `UNUSED_VAR (grant_unlock) + VX_priority_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) + .NUM_REQS (NUM_REQS), ) priority_arbiter ( - .clk (clk), - .reset (reset), - .unlock (unlock), .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), @@ -50,12 +51,12 @@ module VX_generic_arbiter #( .LOCK_ENABLE (LOCK_ENABLE) ) rr_arbiter ( .clk (clk), - .reset (reset), - .unlock (unlock), + .reset (reset), .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), - .grant_onehot (grant_onehot) + .grant_onehot (grant_onehot), + .grant_unlock (grant_unlock) ); end else if (TYPE == "F") begin @@ -66,11 +67,11 @@ module VX_generic_arbiter #( ) fair_arbiter ( .clk (clk), .reset (reset), - .unlock (unlock), .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), - .grant_onehot (grant_onehot) + .grant_onehot (grant_onehot), + .grant_unlock (grant_unlock) ); end else if (TYPE == "M") begin @@ -81,11 +82,11 @@ module VX_generic_arbiter #( ) matrix_arbiter ( .clk (clk), .reset (reset), - .unlock (unlock), .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), - .grant_onehot (grant_onehot) + .grant_onehot (grant_onehot), + .grant_unlock (grant_unlock) ); end else if (TYPE == "C") begin @@ -96,11 +97,11 @@ module VX_generic_arbiter #( ) cyclic_arbiter ( .clk (clk), .reset (reset), - .unlock (unlock), .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), - .grant_onehot (grant_onehot) + .grant_onehot (grant_onehot), + .grant_unlock (grant_unlock) ); end else begin diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index e076e06e..9333c1ac 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -20,18 +20,18 @@ module VX_matrix_arbiter #( parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, - input wire unlock, + input wire reset, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid + output wire grant_valid, + input wire grant_unlock ); if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (unlock) + `UNUSED_VAR (grant_unlock) assign grant_index = '0; assign grant_onehot = requests; @@ -71,18 +71,18 @@ module VX_matrix_arbiter #( end if (LOCK_ENABLE == 0) begin - `UNUSED_VAR (unlock) + `UNUSED_VAR (grant_unlock) assign grant_onehot = grant_unqual; end else begin reg [NUM_REQS-1:0] grant_unqual_prev; always @(posedge clk) begin if (reset) begin grant_unqual_prev <= '0; - end else if (unlock) begin + end else if (grant_unlock) begin grant_unqual_prev <= grant_unqual; end end - assign grant_onehot = unlock ? grant_unqual : grant_unqual_prev; + assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev; end VX_onehot_encoder #( diff --git a/hw/rtl/libs/VX_mem_rsp_sel.sv b/hw/rtl/libs/VX_mem_rsp_sel.sv index 120bc80d..8366bfef 100644 --- a/hw/rtl/libs/VX_mem_rsp_sel.sv +++ b/hw/rtl/libs/VX_mem_rsp_sel.sv @@ -21,7 +21,7 @@ module VX_mem_rsp_sel #( parameter TAG_SEL_BITS = 0, parameter OUT_REG = 0 ) ( -input wire clk, + input wire clk, input wire reset, // input response @@ -46,18 +46,20 @@ input wire clk, wire [LOG_NUM_REQS-1:0] grant_index; wire grant_valid; - wire rsp_fire; + wire grant_ready; - VX_priority_arbiter #( - .NUM_REQS (NUM_REQS) + VX_generic_arbiter #( + .NUM_REQS (NUM_REQS), + .LOCK_ENABLE (1), + .TYPE ("P") ) arbiter ( .clk (clk), .reset (reset), - .unlock (rsp_fire), .requests (rsp_valid_in), .grant_valid (grant_valid), .grant_index (grant_index), - `UNUSED_PIN (grant_onehot) + `UNUSED_PIN (grant_onehot), + .grant_unlock(grant_ready) ); reg [NUM_REQS-1:0] rsp_valid_sel; @@ -78,7 +80,7 @@ input wire clk, end end - assign rsp_fire = grant_valid && rsp_ready_unqual; + assign grant_ready = rsp_ready_unqual; VX_elastic_buffer #( .DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)), diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index c47bc63a..e807d860 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -16,22 +16,13 @@ `TRACING_OFF module VX_priority_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( - input wire clk, - input wire reset, - input wire [NUM_REQS-1:0] requests, - input wire unlock, + input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid ); - `UNUSED_PARAM (LOCK_ENABLE) - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (unlock) - if (NUM_REQS == 1) begin assign grant_index = '0; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index d4d6fb1a..c1ee4d77 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -21,18 +21,18 @@ module VX_rr_arbiter #( parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, - input wire unlock, + input wire reset, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid + output wire grant_valid, + input wire grant_unlock ); if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (unlock) + `UNUSED_VAR (grant_unlock) assign grant_index = '0; assign grant_onehot = requests; @@ -55,7 +55,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -85,7 +85,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -121,7 +121,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -165,7 +165,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -219,7 +219,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -285,7 +285,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -365,7 +365,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end @@ -399,7 +399,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin pointer_reg <= {NUM_REQS{1'b1}}; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin if (|req_masked) begin pointer_reg <= mask_higher_pri_regs; end else if (|requests) begin @@ -443,7 +443,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || unlock) begin + end else if (!LOCK_ENABLE || grant_unlock) begin state <= grant_index_r; end end diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index a81be3ef..f3c4196a 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -19,7 +19,6 @@ module VX_stream_arb #( parameter NUM_OUTPUTS = 1, parameter DATAW = 1, parameter `STRING ARBITER = "P", - parameter LOCK_ENABLE = 1, parameter MAX_FANOUT = `MAX_FANOUT, parameter OUT_REG = 0 , parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS, @@ -57,7 +56,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) arb_slice ( @@ -102,7 +100,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) fanout_slice_arb ( @@ -129,7 +126,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (1), .DATAW (DATAW + LOG_NUM_REQS2), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) fanout_join_arb ( @@ -158,25 +154,25 @@ module VX_stream_arb #( wire arb_valid; wire [NUM_REQS_W-1:0] arb_index; wire [NUM_REQS-1:0] arb_onehot; - wire arb_unlock; + wire arb_ready; VX_generic_arbiter #( .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE), + .LOCK_ENABLE (1), .TYPE (ARBITER) ) arbiter ( .clk (clk), .reset (reset), .requests (valid_in), - .unlock (arb_unlock), .grant_valid (arb_valid), .grant_index (arb_index), - .grant_onehot (arb_onehot) + .grant_onehot (arb_onehot), + .grant_unlock (arb_ready) ); assign valid_in_r = arb_valid; assign data_in_r = data_in[arb_index]; - assign arb_unlock = | (valid_in_r & ready_in_r); + assign arb_ready = ready_in_r; for (genvar i = 0; i < NUM_REQS; ++i) begin assign ready_in[i] = ready_in_r & arb_onehot[i]; @@ -217,7 +213,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (BATCH_SIZE), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) arb_slice ( @@ -252,7 +247,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (NUM_BATCHES), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) fanout_fork_arb ( @@ -280,7 +274,6 @@ module VX_stream_arb #( .NUM_OUTPUTS (BATCH_SIZE), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) fanout_slice_arb ( @@ -305,24 +298,24 @@ module VX_stream_arb #( wire [NUM_OUTPUTS-1:0] arb_requests; wire arb_valid; wire [NUM_OUTPUTS-1:0] arb_onehot; - wire arb_unlock; + wire arb_ready; VX_generic_arbiter #( .NUM_REQS (NUM_OUTPUTS), - .LOCK_ENABLE (LOCK_ENABLE), + .LOCK_ENABLE (1), .TYPE (ARBITER) ) arbiter ( .clk (clk), .reset (reset), .requests (arb_requests), - .unlock (arb_unlock), .grant_valid (arb_valid), `UNUSED_PIN (grant_index), - .grant_onehot (arb_onehot) + .grant_onehot (arb_onehot), + .grant_unlock (arb_ready) ); assign arb_requests = ready_in_r; - assign arb_unlock = | (valid_in & ready_in); + assign arb_ready = valid_in[0]; assign ready_in = arb_valid; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 2a8e4bb4..ac8a8dc1 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -21,8 +21,7 @@ module VX_stream_xbar #( parameter IN_WIDTH = `LOG2UP(NUM_INPUTS), parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter ARBITER = "P", - parameter LOCK_ENABLE = 0, - parameter OUT_REG = 0, + parameter OUT_REG = 0, parameter MAX_FANOUT = `MAX_FANOUT, parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) ) ( @@ -66,7 +65,6 @@ module VX_stream_xbar #( .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) xbar_arb ( @@ -95,7 +93,6 @@ module VX_stream_xbar #( .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), - .LOCK_ENABLE (LOCK_ENABLE), .MAX_FANOUT (MAX_FANOUT), .OUT_REG (OUT_REG) ) xbar_arb ( From 8d4b6c804fe3f0558af9e6242908a736cf481c32 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 4 Feb 2024 20:17:12 -0800 Subject: [PATCH 08/12] minor update --- hw/rtl/core/VX_scoreboard.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 6b806dd0..df07ca63 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -111,7 +111,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( reg [`SFU_WIDTH-1:0] sfu_type; always @(*) begin - case (scoreboard_if[i].data.op_type) + case (ibuffer_if[i].data.op_type) `INST_SFU_CSRRW, `INST_SFU_CSRRS, `INST_SFU_CSRRC: sfu_type = `SFU_CSRS; @@ -179,12 +179,12 @@ module VX_scoreboard import VX_gpu_pkg::*; #( if (writeback_fire) begin inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0; end - if (stg_valid_in && stg_ready_in && ibuffer_if[i].data.wb) begin + if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1; end end `ifdef PERF_ENABLE - if (stg_valid_in && stg_ready_in && ibuffer_if[i].data.wb) begin + if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type; if (ibuffer_if[i].data.ex_type == `EX_SFU) begin inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type; From be0db6e1a511551ee3f03414126dea443354fac6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 4 Feb 2024 20:32:05 -0800 Subject: [PATCH 09/12] minor update --- hw/rtl/libs/VX_fair_arbiter.sv | 2 +- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index acc01971..c063b2fb 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -54,7 +54,7 @@ module VX_fair_arbiter #( end VX_priority_arbiter #( - .NUM_REQS (NUM_REQS), + .NUM_REQS (NUM_REQS) ) priority_arbiter ( .requests (requests_qual), .grant_index (grant_index), diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 2b7922d9..4573efb3 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -36,7 +36,7 @@ module VX_generic_arbiter #( `UNUSED_VAR (grant_unlock) VX_priority_arbiter #( - .NUM_REQS (NUM_REQS), + .NUM_REQS (NUM_REQS) ) priority_arbiter ( .requests (requests), .grant_valid (grant_valid), From ae7b01405c8e25c118f97dd2131d8a73f1f8fe5b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 8 Feb 2024 14:10:00 -0800 Subject: [PATCH 10/12] CI minor update --- .travis.yml | 4 ++-- ci/regression.sh | 32 ++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index 236ed3b7..8b6e2878 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,8 +37,8 @@ jobs: script: - rm -rf $HOME/build32 && cp -r $PWD $HOME/build32 - rm -rf $HOME/build64 && cp -r $PWD $HOME/build64 - - make -C $HOME/build32 - - XLEN=64 make -C $HOME/build64 + - make -C $HOME/build32 > /dev/null + - XLEN=64 make -C $HOME/build64 > /dev/null - stage: test name: unittest script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest diff --git a/ci/regression.sh b/ci/regression.sh index abe51129..41ce8332 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -22,7 +22,7 @@ rm -f blackbox.*.cache unittest() { make -C tests/unittest run -make -C hw/unittest +make -C hw/unittest > /dev/null } isa() @@ -31,33 +31,36 @@ echo "begin isa tests..." make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-rtlsim -CONFIGS="-DDPI_DISABLE" make -C tests/riscv/isa run-rtlsim -make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim +make -C sim/rtlsim clean && CONFIGS="-DDPI_DISABLE" make -C sim/rtlsim > /dev/null +make -C tests/riscv/isa run-rtlsim + +make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-32f -make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim +make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-32f -make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim +make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-32f if [ "$XLEN" == "64" ] then - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim + make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-64d || true - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-64fx fi -make -C sim/rtlsim clean && make -C sim/rtlsim +# restore default prebuilt configuration +make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null echo "isa tests done!" } @@ -134,15 +137,16 @@ debug() echo "begin debugging tests..." # test CSV trace generation -make -C sim/simx clean && DEBUG=3 make -C sim/simx -make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim +make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null +make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-simx-32im > run_simx.log make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv diff trace_rtlsim.csv trace_simx.csv -make -C sim/simx clean && make -C sim/simx -make -C sim/rtlsim clean && make -C sim/rtlsim +# restore default prebuilt configuration +make -C sim/simx clean && make -C sim/simx > /dev/null +make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1" From 3fee1a61935240e9145db0235addad9b632f7a5e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Feb 2024 20:34:44 -0800 Subject: [PATCH 11/12] minor update --- README.md | 2 +- RELEASE | 4 ---- TODO | 23 ----------------------- ci/travis_run.py | 6 +++--- 4 files changed, 4 insertions(+), 31 deletions(-) delete mode 100644 RELEASE delete mode 100644 TODO diff --git a/README.md b/README.md index bca70da0..05b1cb93 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. ## Build Instructions More detailed build instructions can be found [here](docs/install_vortex.md). ### Supported OS Platforms -- Ubuntu 18.04 +- Ubuntu 18.04, 20.04 - Centos 7 ### Toolchain Dependencies - [POCL](http://portablecl.org/) diff --git a/RELEASE b/RELEASE deleted file mode 100644 index 48ae100a..00000000 --- a/RELEASE +++ /dev/null @@ -1,4 +0,0 @@ - -Release Notes! - -* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels. \ No newline at end of file diff --git a/TODO b/TODO deleted file mode 100644 index 0e4b84ed..00000000 --- a/TODO +++ /dev/null @@ -1,23 +0,0 @@ - - - -Functionality: -1) vx_cl_warpSpawn() - -> To be used by pocl->ops->run - -2) newlib Integration (LoadFile("")) - -> To be used by the Rhinio benchmarks - -3) POCL OPS Vortex Suite - -Performance: -1) Icache doesn't need SEND_MEM_REQUEST Stage - -> Blocks are never dirty, so why not evict right away - -2) Branch not taken speculation - -3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator - - -Vector: -1) Cycle accurate simulator (would require Cache Simulator) diff --git a/ci/travis_run.py b/ci/travis_run.py index f55a4b0a..8424cd59 100755 --- a/ci/travis_run.py +++ b/ci/travis_run.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -# Copyright © 2019-2023 -# +# Copyright 2019-2023 +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,7 @@ def monitor(stop): break def execute(command): - process = subprocess.Popen(command, stdout=subprocess.PIPE) + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: output = process.stdout.readline() if output: From 5f2b10b8a6f4da14fbb1ad42433832c228ae5099 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Feb 2024 21:20:23 -0800 Subject: [PATCH 12/12] minor update --- ci/travis_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis_run.py b/ci/travis_run.py index 8424cd59..021e3ff2 100755 --- a/ci/travis_run.py +++ b/ci/travis_run.py @@ -38,7 +38,7 @@ def execute(command): while True: output = process.stdout.readline() if output: - line = output.decode('ascii').rstrip() + line = output.decode('utf-8').rstrip() print(">>> " + line) process.stdout.flush() ret = process.poll()