From d13c5f2986142316aa2682b55ec01ea65ee94bb9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 5 Nov 2023 18:51:31 -0800 Subject: [PATCH] hw unit tests fixes --- ci/regression.sh | 1 + hw/dpi/util_dpi.cpp | 3 +- hw/rtl/cache/VX_cache_cluster.sv | 171 ---------------- hw/rtl/cache/VX_cache_cluster_top.sv | 190 ++++++++++++++++++ hw/rtl/cache/VX_cache_top.sv | 168 ++++++++++++++++ hw/rtl/core/VX_core.sv | 154 -------------- hw/rtl/core/VX_core_top.sv | 168 ++++++++++++++++ hw/rtl/libs/VX_mem_scheduler.sv | 8 +- hw/unit_tests/cache/.Makefile.swp | Bin 12288 -> 0 bytes hw/unit_tests/cache/Makefile | 45 ----- hw/unit_tests/cache2/Makefile | 0 hw/unit_tests/common/vl_simulator.h | 94 --------- hw/unit_tests/generic_queue/Makefile | 30 --- hw/unit_tests/generic_queue/testbench.v | 77 ------- .../mem_streamer/mem_streamer/Makefile | 65 ------ hw/unittest/Makefile | 17 ++ hw/{unit_tests => unittest}/VX_divide_tb.v | 0 hw/unittest/cache/Makefile | 77 +++++++ .../cache/cachesim.cpp | 46 +++-- hw/{unit_tests => unittest}/cache/cachesim.h | 16 +- hw/{unit_tests => unittest}/cache/ram.h | 0 .../cache/testbench.cpp | 4 +- .../common}/vl_simulator.h | 10 +- hw/unittest/generic_queue/Makefile | 65 ++++++ .../generic_queue/main.cpp | 38 +++- hw/unittest/mem_streamer/Makefile | 65 ++++++ .../mem_streamer/memsim.cpp | 50 ++--- .../mem_streamer/memsim.h | 30 +-- .../mem_streamer/ram.cpp | 14 -- .../mem_streamer/ram.h | 0 hw/unittest/top_modules/Makefile | 68 +++++++ hw/unittest/top_modules/main.cpp | 49 +++++ 32 files changed, 987 insertions(+), 736 deletions(-) create mode 100644 hw/rtl/cache/VX_cache_cluster_top.sv create mode 100644 hw/rtl/cache/VX_cache_top.sv create mode 100644 hw/rtl/core/VX_core_top.sv delete mode 100644 hw/unit_tests/cache/.Makefile.swp delete mode 100644 hw/unit_tests/cache/Makefile delete mode 100644 hw/unit_tests/cache2/Makefile delete mode 100644 hw/unit_tests/common/vl_simulator.h delete mode 100644 hw/unit_tests/generic_queue/Makefile delete mode 100644 hw/unit_tests/generic_queue/testbench.v delete mode 100644 hw/unit_tests/mem_streamer/mem_streamer/Makefile create mode 100644 hw/unittest/Makefile rename hw/{unit_tests => unittest}/VX_divide_tb.v (100%) create mode 100644 hw/unittest/cache/Makefile rename hw/{unit_tests => unittest}/cache/cachesim.cpp (91%) rename hw/{unit_tests => unittest}/cache/cachesim.h (92%) rename hw/{unit_tests => unittest}/cache/ram.h (100%) rename hw/{unit_tests => unittest}/cache/testbench.cpp (99%) rename hw/{unit_tests/generic_queue => unittest/common}/vl_simulator.h (95%) create mode 100644 hw/unittest/generic_queue/Makefile rename hw/{unit_tests => unittest}/generic_queue/main.cpp (75%) create mode 100644 hw/unittest/mem_streamer/Makefile rename hw/{unit_tests/mem_streamer => unittest}/mem_streamer/memsim.cpp (79%) rename hw/{unit_tests/mem_streamer => unittest}/mem_streamer/memsim.h (70%) rename hw/{unit_tests/mem_streamer => unittest}/mem_streamer/ram.cpp (90%) rename hw/{unit_tests/mem_streamer => unittest}/mem_streamer/ram.h (100%) create mode 100644 hw/unittest/top_modules/Makefile create mode 100644 hw/unittest/top_modules/main.cpp diff --git a/ci/regression.sh b/ci/regression.sh index d1f9e30b..abe51129 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -22,6 +22,7 @@ rm -f blackbox.*.cache unittest() { make -C tests/unittest run +make -C hw/unittest } isa() diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index faf121cf..7315a471 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -20,7 +20,6 @@ #include "svdpi.h" #include "verilated_vpi.h" -#include "VX_config.h" #include "uuid_gen.h" @@ -163,7 +162,7 @@ void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotie uword_t dividen = a; uword_t divisor = b; - auto inf_neg = uword_t(1) << (XLEN-1); + auto inf_neg = uword_t(1) << (8 * sizeof(iword_t) - 1); if (is_signed) { if (b == 0) { diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 281b2b23..18e26eb2 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -194,174 +194,3 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); endmodule - -/////////////////////////////////////////////////////////////////////////////// - -module VX_cache_cluster_top #( - parameter `STRING INSTANCE_ID = "", - - parameter NUM_UNITS = 2, - parameter NUM_INPUTS = 4, - parameter TAG_SEL_IDX = 0, - - // Number of Word requests per cycle - parameter NUM_REQS = 4, - - // Size of cache in bytes - parameter CACHE_SIZE = 16384, - // Size of line inside a bank in bytes - parameter LINE_SIZE = 16, - // Number of banks - parameter NUM_BANKS = 4, - // Number of associative ways - parameter NUM_WAYS = 4, - // Size of a word in bytes - parameter WORD_SIZE = 4, - - // Core Response Queue Size - parameter CRSQ_SIZE = 2, - // Miss Reserv Queue Knob - parameter MSHR_SIZE = 16, - // Memory Response Queue Size - parameter MRSQ_SIZE = 0, - // Memory Request Queue Size - parameter MREQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, - - // Request debug identifier - parameter UUID_WIDTH = 0, - - // core request tag size - parameter TAG_WIDTH = 16, - - // enable bypass for non-cacheable addresses - parameter NC_ENABLE = 1, - - // Core response output register - parameter CORE_OUT_REG = 2, - - // Memory request output register - parameter MEM_OUT_REG = 2, - - parameter NUM_CACHES = `UP(NUM_UNITS), - parameter PASSTHRU = (NUM_UNITS == 0), - parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES), - parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)) - ) ( - input wire clk, - input wire reset, - - // Core request - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, - output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready, - - // Core response - output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid, - output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data, - output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag, - input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready, - - // Memory request - output wire mem_req_valid, - output wire mem_req_rw, - output wire [LINE_SIZE-1:0] mem_req_byteen, - output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [`CS_LINE_WIDTH-1:0] mem_req_data, - output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, - input wire mem_req_ready, - - // Memory response - input wire mem_rsp_valid, - input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, - input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, - output wire mem_rsp_ready -); - VX_mem_bus_if #( - .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) - ) core_bus_if[NUM_INPUTS * NUM_REQS](); - - VX_mem_bus_if #( - .DATA_SIZE (LINE_SIZE), - .TAG_WIDTH (MEM_TAG_WIDTH) - ) mem_bus_if(); - - // Core request - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar r = 0; r < NUM_REQS; ++r) begin - assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r]; - assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r]; - assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r]; - assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r]; - assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r]; - assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r]; - assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready; - end - end - - // Core response - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar r = 0; r < NUM_REQS; ++r) begin - assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid; - assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data; - assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag; - assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r]; - end - end - - // Memory request - assign mem_req_valid = mem_bus_if.req_valid; - assign mem_req_rw = mem_bus_if.req_data.rw; - assign mem_req_byteen = mem_bus_if.req_data.byteen; - assign mem_req_addr = mem_bus_if.req_data.addr; - assign mem_req_data = mem_bus_if.req_data.data; - assign mem_req_tag = mem_bus_if.req_data.tag; - assign mem_bus_if.req_ready = mem_req_ready; - - // Memory response - assign mem_bus_if.rsp_valid = mem_rsp_valid; - assign mem_bus_if.rsp_data.data = mem_rsp_data; - assign mem_bus_if.rsp_data.tag = mem_rsp_tag; - assign mem_rsp_ready = mem_bus_if.rsp_ready; - - VX_cache_cluster #( - .INSTANCE_ID (INSTANCE_ID), - .NUM_UNITS (NUM_UNITS), - .NUM_INPUTS (NUM_INPUTS), - .TAG_SEL_IDX (TAG_SEL_IDX), - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_WAYS (NUM_WAYS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CRSQ_SIZE (CRSQ_SIZE), - .MSHR_SIZE (MSHR_SIZE), - .MRSQ_SIZE (MRSQ_SIZE), - .MREQ_SIZE (MREQ_SIZE), - .TAG_WIDTH (TAG_WIDTH), - .UUID_WIDTH (UUID_WIDTH), - .WRITE_ENABLE (WRITE_ENABLE), - .CORE_OUT_REG (CORE_OUT_REG), - .MEM_OUT_REG (MEM_OUT_REG) - ) cache ( - `ifdef PERF_ENABLE - .cache_perf (perf_icache), - `endif - .clk (clk), - .reset (reset), - .core_bus_if (core_bus_if), - .mem_bus_if (mem_bus_if) - ); - - endmodule diff --git a/hw/rtl/cache/VX_cache_cluster_top.sv b/hw/rtl/cache/VX_cache_cluster_top.sv new file mode 100644 index 00000000..500f2c87 --- /dev/null +++ b/hw/rtl/cache/VX_cache_cluster_top.sv @@ -0,0 +1,190 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_cluster_top import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + + parameter NUM_UNITS = 2, + parameter NUM_INPUTS = 4, + parameter TAG_SEL_IDX = 0, + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + // Size of cache in bytes + parameter CACHE_SIZE = 16384, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 16, + // Number of banks + parameter NUM_BANKS = 4, + // Number of associative ways + parameter NUM_WAYS = 4, + // Size of a word in bytes + parameter WORD_SIZE = 4, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 16, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 16, + + // enable bypass for non-cacheable addresses + parameter NC_ENABLE = 1, + + // Core response output register + parameter CORE_OUT_REG = 2, + + // Memory request output register + parameter MEM_OUT_REG = 2, + + parameter NUM_CACHES = `UP(NUM_UNITS), + parameter PASSTHRU = (NUM_UNITS == 0), + parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES), + parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : + `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)), + parameter MEM_TAG_X_WIDTH = MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1) + ) ( + input wire clk, + input wire reset, + +// PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + // Core request + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, + output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready, + + // Core response + output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid, + output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data, + output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag, + input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready, + + // Memory request + output wire mem_req_valid, + output wire mem_req_rw, + output wire [LINE_SIZE-1:0] mem_req_byteen, + output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr, + output wire [`CS_LINE_WIDTH-1:0] mem_req_data, + output wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag, + input wire mem_req_ready, + + // Memory response + input wire mem_rsp_valid, + input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, + input wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag, + output wire mem_rsp_ready +); + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus_if[NUM_INPUTS * NUM_REQS](); + + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_X_WIDTH) + ) mem_bus_if(); + + // Core request + for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin + assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r]; + assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r]; + assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r]; + assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r]; + assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r]; + assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r]; + assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready; + end + end + + // Core response + for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin + assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid; + assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data; + assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag; + assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r]; + end + end + + // Memory request + assign mem_req_valid = mem_bus_if.req_valid; + assign mem_req_rw = mem_bus_if.req_data.rw; + assign mem_req_byteen = mem_bus_if.req_data.byteen; + assign mem_req_addr = mem_bus_if.req_data.addr; + assign mem_req_data = mem_bus_if.req_data.data; + assign mem_req_tag = mem_bus_if.req_data.tag; + assign mem_bus_if.req_ready = mem_req_ready; + + // Memory response + assign mem_bus_if.rsp_valid = mem_rsp_valid; + assign mem_bus_if.rsp_data.data = mem_rsp_data; + assign mem_bus_if.rsp_data.tag = mem_rsp_tag; + assign mem_rsp_ready = mem_bus_if.rsp_ready; + + VX_cache_cluster #( + .INSTANCE_ID (INSTANCE_ID), + .NUM_UNITS (NUM_UNITS), + .NUM_INPUTS (NUM_INPUTS), + .TAG_SEL_IDX (TAG_SEL_IDX), + .NUM_REQS (NUM_REQS), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MRSQ_SIZE (MRSQ_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .NC_ENABLE (NC_ENABLE), + .CORE_OUT_REG (CORE_OUT_REG), + .MEM_OUT_REG (MEM_OUT_REG) + ) cache ( + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .clk (clk), + .reset (reset), + .core_bus_if (core_bus_if), + .mem_bus_if (mem_bus_if) + ); + +endmodule diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv new file mode 100644 index 00000000..9e36d9af --- /dev/null +++ b/hw/rtl/cache/VX_cache_top.sv @@ -0,0 +1,168 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_top #( + parameter `STRING INSTANCE_ID = "", + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + // Size of cache in bytes + parameter CACHE_SIZE = 16384, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 16, + // Number of banks + parameter NUM_BANKS = 4, + // Number of associative ways + parameter NUM_WAYS = 4, + // Size of a word in bytes + parameter WORD_SIZE = 4, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 16, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = 16, + + // Core response output register + parameter CORE_OUT_REG = 2, + + // Memory request output register + parameter MEM_OUT_REG = 2, + + parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS) + ) ( + input wire clk, + input wire reset, + +// PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + // Core request + input wire [NUM_REQS-1:0] core_req_valid, + input wire [NUM_REQS-1:0] core_req_rw, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, + input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, + input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, + output wire [NUM_REQS-1:0] core_req_ready, + + // Core response + output wire [NUM_REQS-1:0] core_rsp_valid, + output wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data, + output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag, + input wire [NUM_REQS-1:0] core_rsp_ready, + + // Memory request + output wire mem_req_valid, + output wire mem_req_rw, + output wire [LINE_SIZE-1:0] mem_req_byteen, + output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr, + output wire [`CS_LINE_WIDTH-1:0] mem_req_data, + output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, + input wire mem_req_ready, + + // Memory response + input wire mem_rsp_valid, + input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, + input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, + output wire mem_rsp_ready +); + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus_if[NUM_REQS](); + + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_if(); + + // Core request + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_if[i].req_valid = core_req_valid[i]; + assign core_bus_if[i].req_data.rw = core_req_rw[i]; + assign core_bus_if[i].req_data.byteen = core_req_byteen[i]; + assign core_bus_if[i].req_data.addr = core_req_addr[i]; + assign core_bus_if[i].req_data.data = core_req_data[i]; + assign core_bus_if[i].req_data.tag = core_req_tag[i]; + assign core_req_ready[i] = core_bus_if[i].req_ready; + end + + // Core response + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_valid[i] = core_bus_if[i].rsp_valid; + assign core_rsp_data[i] = core_bus_if[i].rsp_data.data; + assign core_rsp_tag[i] = core_bus_if[i].rsp_data.tag; + assign core_bus_if[i].rsp_ready = core_rsp_ready[i]; + end + + // Memory request + assign mem_req_valid = mem_bus_if.req_valid; + assign mem_req_rw = mem_bus_if.req_data.rw; + assign mem_req_byteen = mem_bus_if.req_data.byteen; + assign mem_req_addr = mem_bus_if.req_data.addr; + assign mem_req_data = mem_bus_if.req_data.data; + assign mem_req_tag = mem_bus_if.req_data.tag; + assign mem_bus_if.req_ready = mem_req_ready; + + // Memory response + assign mem_bus_if.rsp_valid = mem_rsp_valid; + assign mem_bus_if.rsp_data.data = mem_rsp_data; + assign mem_bus_if.rsp_data.tag = mem_rsp_tag; + assign mem_rsp_ready = mem_bus_if.rsp_ready; + + VX_cache #( + .INSTANCE_ID (INSTANCE_ID), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MRSQ_SIZE (MRSQ_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .TAG_WIDTH (TAG_WIDTH), + .UUID_WIDTH (UUID_WIDTH), + .WRITE_ENABLE (WRITE_ENABLE), + .CORE_OUT_REG (CORE_OUT_REG), + .MEM_OUT_REG (MEM_OUT_REG) + ) cache ( + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .clk (clk), + .reset (reset), + .core_bus_if (core_bus_if), + .mem_bus_if (mem_bus_if) + ); + +endmodule diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index fa76c4a7..d50a3d32 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -334,157 +334,3 @@ module VX_core import VX_gpu_pkg::*; #( `endif endmodule - -/////////////////////////////////////////////////////////////////////////////// - -module VX_core_top -import VX_gpu_pkg::*; -#( - parameter CORE_ID = 0 -) ( - // Clock - input wire clk, - input wire reset, - - input wire dcr_write_valid, - input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr, - input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data, - - output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid, - output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw, - output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen, - output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr, - output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data, - output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag, - input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready, - - input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid, - input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data, - input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag, - output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready, - - output wire icache_req_valid, - output wire icache_req_rw, - output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen, - output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr, - output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data, - output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag, - input wire icache_req_ready, - - input wire icache_rsp_valid, - input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data, - input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag, - output wire icache_rsp_ready, - -`ifdef GBAR_ENABLE - output wire gbar_req_valid, - output wire [`NB_WIDTH-1:0] gbar_req_id, - output wire [`NC_WIDTH-1:0] gbar_req_size_m1, - output wire [`NC_WIDTH-1:0] gbar_req_core_id, - input wire gbar_req_ready, - input wire gbar_rsp_valid, - input wire [`NB_WIDTH-1:0] gbar_rsp_id, -`endif - - // simulation helper signals - output wire sim_ebreak, - output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value, - - // Status - output wire busy -); - -`ifdef GBAR_ENABLE - VX_gbar_bus_if gbar_bus_if(); - - assign gbar_req_valid = gbar_bus_if.req_valid; - assign gbar_req_id = gbar_bus_if.req_id; - assign gbar_req_size_m1 = gbar_bus_if.req_size_m1; - assign gbar_req_core_id = gbar_bus_if.req_core_id; - assign gbar_bus_if.req_ready = gbar_req_ready; - assign gbar_bus_if.rsp_valid = gbar_rsp_valid; - assign gbar_bus_if.rsp_id = gbar_rsp_id; -`endif - - VX_dcr_bus_if dcr_bus_if(); - - assign dcr_bus_if.write_valid = dcr_write_valid; - assign dcr_bus_if.write_addr = dcr_write_addr; - assign dcr_bus_if.write_data = dcr_write_data; - - VX_mem_bus_if #( - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) - ) dcache_bus_if[DCACHE_NUM_REQS](); - - for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin - assign dcache_req_valid[i] = dcache_bus_if[i].req_valid; - assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw; - assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen; - assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr; - assign dcache_req_data[i] = dcache_bus_if[i].req_data.data; - assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag; - assign dcache_bus_if[i].req_ready = dcache_req_ready[i]; - - assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i]; - assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i]; - assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i]; - assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready; - end - - VX_mem_bus_if #( - .DATA_SIZE (ICACHE_WORD_SIZE), - .TAG_WIDTH (ICACHE_TAG_WIDTH) - ) icache_bus_if(); - - assign icache_req_valid = icache_bus_if.req_valid; - assign icache_req_rw = icache_bus_if.req_data.rw; - assign icache_req_byteen = icache_bus_if.req_data.byteen; - assign icache_req_addr = icache_bus_if.req_data.addr; - assign icache_req_data = icache_bus_if.req_data.data; - assign icache_req_tag = icache_bus_if.req_data.tag; - assign icache_bus_if.req_ready = icache_req_ready; - - assign icache_bus_if.rsp_valid = icache_rsp_valid; - assign icache_bus_if.rsp_data.tag = icache_rsp_tag; - assign icache_bus_if.rsp_data.data = icache_rsp_data; - assign icache_rsp_ready = icache_bus_if.rsp_ready; - -`ifdef PERF_ENABLE - VX_mem_perf_if mem_perf_if(); -`endif - -`ifdef SCOPE - wire [0:0] scope_reset_w = 1'b0; - wire [0:0] scope_bus_in_w = 1'b0; - wire [0:0] scope_bus_out_w; - `UNUSED_VAR (scope_bus_out_w) -`endif - - VX_core #( - .CORE_ID (0) - ) core ( - `SCOPE_IO_BIND (0) - .clk (clk), - .reset (reset), - - `ifdef PERF_ENABLE - .mem_perf_if (mem_perf_if), - `endif - - .dcr_bus_if (dcr_bus_if), - - .dcache_bus_if (dcache_bus_if), - - .icache_bus_if (icache_bus_if), - - `ifdef GBAR_ENABLE - .gbar_bus_if (gbar_bus_if), - `endif - - .sim_ebreak (sim_ebreak), - .sim_wb_value (sim_wb_value), - .busy (busy) - ); - -endmodule diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv new file mode 100644 index 00000000..8d126f96 --- /dev/null +++ b/hw/rtl/core/VX_core_top.sv @@ -0,0 +1,168 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +`ifdef EXT_F_ENABLE +`include "VX_fpu_define.vh" +`endif + +module VX_core_top import VX_gpu_pkg::*; #( + parameter CORE_ID = 0 +) ( + // Clock + input wire clk, + input wire reset, + + input wire dcr_write_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data, + + output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid, + output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag, + input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready, + + input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag, + output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready, + + output wire icache_req_valid, + output wire icache_req_rw, + output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen, + output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr, + output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data, + output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag, + input wire icache_req_ready, + + input wire icache_rsp_valid, + input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data, + input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag, + output wire icache_rsp_ready, + +`ifdef GBAR_ENABLE + output wire gbar_req_valid, + output wire [`NB_WIDTH-1:0] gbar_req_id, + output wire [`NC_WIDTH-1:0] gbar_req_size_m1, + output wire [`NC_WIDTH-1:0] gbar_req_core_id, + input wire gbar_req_ready, + input wire gbar_rsp_valid, + input wire [`NB_WIDTH-1:0] gbar_rsp_id, +`endif + + // simulation helper signals + output wire sim_ebreak, + output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value, + + // Status + output wire busy +); + +`ifdef GBAR_ENABLE + VX_gbar_bus_if gbar_bus_if(); + + assign gbar_req_valid = gbar_bus_if.req_valid; + assign gbar_req_id = gbar_bus_if.req_id; + assign gbar_req_size_m1 = gbar_bus_if.req_size_m1; + assign gbar_req_core_id = gbar_bus_if.req_core_id; + assign gbar_bus_if.req_ready = gbar_req_ready; + assign gbar_bus_if.rsp_valid = gbar_rsp_valid; + assign gbar_bus_if.rsp_id = gbar_rsp_id; +`endif + + VX_dcr_bus_if dcr_bus_if(); + + assign dcr_bus_if.write_valid = dcr_write_valid; + assign dcr_bus_if.write_addr = dcr_write_addr; + assign dcr_bus_if.write_data = dcr_write_data; + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) + ) dcache_bus_if[DCACHE_NUM_REQS](); + + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign dcache_req_valid[i] = dcache_bus_if[i].req_valid; + assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw; + assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen; + assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr; + assign dcache_req_data[i] = dcache_bus_if[i].req_data.data; + assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag; + assign dcache_bus_if[i].req_ready = dcache_req_ready[i]; + + assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i]; + assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i]; + assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i]; + assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready; + end + + VX_mem_bus_if #( + .DATA_SIZE (ICACHE_WORD_SIZE), + .TAG_WIDTH (ICACHE_TAG_WIDTH) + ) icache_bus_if(); + + assign icache_req_valid = icache_bus_if.req_valid; + assign icache_req_rw = icache_bus_if.req_data.rw; + assign icache_req_byteen = icache_bus_if.req_data.byteen; + assign icache_req_addr = icache_bus_if.req_data.addr; + assign icache_req_data = icache_bus_if.req_data.data; + assign icache_req_tag = icache_bus_if.req_data.tag; + assign icache_bus_if.req_ready = icache_req_ready; + + assign icache_bus_if.rsp_valid = icache_rsp_valid; + assign icache_bus_if.rsp_data.tag = icache_rsp_tag; + assign icache_bus_if.rsp_data.data = icache_rsp_data; + assign icache_rsp_ready = icache_bus_if.rsp_ready; + +`ifdef PERF_ENABLE + VX_mem_perf_if mem_perf_if(); +`endif + +`ifdef SCOPE + wire [0:0] scope_reset_w = 1'b0; + wire [0:0] scope_bus_in_w = 1'b0; + wire [0:0] scope_bus_out_w; + `UNUSED_VAR (scope_bus_out_w) +`endif + + VX_core #( + .CORE_ID (CORE_ID) + ) core ( + `SCOPE_IO_BIND (0) + .clk (clk), + .reset (reset), + + `ifdef PERF_ENABLE + .mem_perf_if (mem_perf_if), + `endif + + .dcr_bus_if (dcr_bus_if), + + .dcache_bus_if (dcache_bus_if), + + .icache_bus_if (icache_bus_if), + + `ifdef GBAR_ENABLE + .gbar_bus_if (gbar_bus_if), + `endif + + .sim_ebreak (sim_ebreak), + .sim_wb_value (sim_wb_value), + .busy (busy) + ); + +endmodule diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 17ccd63f..d4e48e69 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -16,14 +16,14 @@ `TRACING_OFF module VX_mem_scheduler #( parameter `STRING INSTANCE_ID = "", - parameter NUM_REQS = 4, - parameter NUM_BANKS = 4, + parameter NUM_REQS = 1, + parameter NUM_BANKS = 1, parameter ADDR_WIDTH = 32, parameter DATA_WIDTH = 32, - parameter TAG_WIDTH = 32, + parameter TAG_WIDTH = 8, parameter MEM_TAG_ID = 0, // upper section of the tag sent to the memory interface parameter UUID_WIDTH = 0, // upper section of the mem_tag_id containing the UUID - parameter QUEUE_SIZE = 16, + parameter QUEUE_SIZE = 8, parameter RSP_PARTIAL = 0, parameter CORE_OUT_REG = 0, parameter MEM_OUT_REG = 0, diff --git a/hw/unit_tests/cache/.Makefile.swp b/hw/unit_tests/cache/.Makefile.swp deleted file mode 100644 index fe645a32b872279be51fddca51993ea8e17ac230..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&u`;I6vtnIO9ji1-IS9&9TWWAX;&oduS4^d}O?A4pRmoF09 z>qP(D`S&yTc7wiohsblIL`CVq)0-QpnHvuOH}$J|!9s70y0gOB7lmo)Cr+Z`BrbTa zccuzW_gtO$fqME2WmF7^foC%?O-{7ETiy5Wm9jtejk|aF&D-5)t0CE9Kn#chF(3xS zfEW-1Vn7T$*9MZw8hwo&ys+5C?&7)j*mDueLkx%kF(3xSfEW-1Vn7Ut0Wly3#DExh z4h^_NqWzbN)?UHl@&Etj@BgFMh+4n`P4F%t@aR>dpTPIvOYkYE0TXP4E%4VZqCdeO z;1}>S_!0a7z60Na4tO8j1uuaY!S7hZZ{QL570kc|I0scw0T002;4Sb5NZ0*0)-O#A zhygJm2E>3E5CdXB42Xdn4Xj^p(TKI^gvW!2&d`s@3zQok5UuZc!(}ILZE?QP>wV;8 zw{&w9O~V5oO#J}A9v)8|;R-Kn4_Q0m%NhC6d5*ID13pzDt<(C2iu}M$#u0Ds^lCZQ z49hU9-9y$qkC$)CUtqLdu`5j^=(R@Wu*2J%JNjz=GGUuFP0bW0bZotoW5SED4>GEU zyjw>7o^Cj;?s2<&j2!ejZM$wA=ZJc>EMax>=1$$x8(qW9El0JP=Hs@q?*@!p%b!ox za-biMHN(^!wR*#}Dou0QuLW*+I(1JKm-dQ9?Ow6Cj}^LctfGXqz8eGxlgRZHZ)p9A z8oFWPdqnztHE}X5d%L@(5=4$->?~1X?2p5kwR%2Z$mfYr;_@q`d*(7{-zWA*d2ce| z^kVfuh2B{j!Ng)qNL!RBL`hJ%7G(3U^#uMPCfd04RbFxDWM2dZ(-Xx|e3|96+7gzJ^Obb!ADiX-#r+4U zoUY1g)GgD=dP`}a)onUyf2VvOQNLvy^T9OIZS&|VYTCU;lua~h?bWbj*{h-6udYlr Lu0^$@ -#include -#include "verilated.h" - -#ifdef VM_TRACE -#include // Trace file format header -#endif - -template -class vl_simulator { -private: - - T top_; -#ifdef VM_TRACE - VerilatedVcdC tfp_; -#endif - -public: - - vl_simulator() { - top_.clk = 0; - top_.reset = 0; - #ifdef VM_TRACE - Verilated::traceEverOn(true); - top_.trace(&tfp_, 99); - tfp_.open("trace.vcd"); - #endif - } - - ~vl_simulator() { - #ifdef VM_TRACE - tfp_.close(); - #endif - top_.final(); - } - - uint64_t reset(uint64_t ticks) { - top_.reset = 1; - ticks = this->step(ticks, 2); - top_.reset = 0; - return ticks; - } - - uint64_t step(uint64_t ticks, uint32_t count = 1) { - while (count--) { - top_.eval(); - #ifdef VM_TRACE - tfp_.dump(ticks); - #endif - top_.clk = !top_.clk; - ++ticks; - } - return ticks; - } - - T* operator->() { - return &top_; - } -}; - -template -void vl_setw(uint32_t* sig, Args&&... args) { - std::array arr{static_cast(std::forward(args))...}; - for (size_t i = 0; i < sizeof... (Args); ++i) { - sig[i] = arr[i]; - } -} - -template -int vl_cmpw(const uint32_t* sig, Args&&... args) { - std::array arr{static_cast(std::forward(args))...}; - for (size_t i = 0; i < sizeof... (Args); ++i) { - if (sig[i] < arr[i]) - return -1; - if (sig[i] > arr[i]) - return 1; - } - return 0; -} \ No newline at end of file diff --git a/hw/unit_tests/generic_queue/Makefile b/hw/unit_tests/generic_queue/Makefile deleted file mode 100644 index f13d14a1..00000000 --- a/hw/unit_tests/generic_queue/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -TOP = VX_fifo_queue - -PARAMS ?= - -INCLUDE = -I../../rtl/ -I../../rtl/libs - -SRCS = main.cpp - -all: build - -CF += -std=c++11 -fms-extensions -I../.. -VF += $(PARAMS) - -VF += --language 1800-2009 --assert -Wall --trace -VF += -Wno-DECLFILENAME -VF += --x-initial unique -VF += -exe $(SRCS) $(INCLUDE) -VF += $(PARAMS) - -gen: - verilator $(VF) -cc $(TOP).v -CFLAGS '$(CF)' --exe $(SRCS) - -build: gen - (cd obj_dir && make -j -f V$(TOP).mk) - -run: build - (cd obj_dir && ./V$(TOP)) - -clean: - rm -rf obj_dir diff --git a/hw/unit_tests/generic_queue/testbench.v b/hw/unit_tests/generic_queue/testbench.v deleted file mode 100644 index 30b926a5..00000000 --- a/hw/unit_tests/generic_queue/testbench.v +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`timescale 1ns/1ns -`include "VX_fifo_queue.v" - -`define check(x, y) if ((x == y) !== 1) if ((x == y) === 0) $error("x=%h, expected=%h", x, y); else $warning("x=%h, expected=%h", x, y) - -module testbench(); - - reg clk; - reg reset; - reg[3:0] data_in; - reg push; - reg pop; - wire[3:0] data_out; - wire full; - wire empty; - - VX_fifo_queue #( - .DATAW (4), - .DEPTH (4) - ) dut ( - .clk(clk), - .reset(reset), - .data_in(data_in), - .push(push), - .pop(pop), - .data_out(data_out), - .empty(empty), - .full(full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_VAR (size) - ); - - always begin - #1 clk = !clk; - end - - initial begin - $monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h", - $time, clk, reset, push, pop, data_in, empty, full, data_out); - #0 clk=0; reset=1; pop=0; push=0; - #2 reset=0; data_in=4'ha; pop=0; push=1; - #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); - #0 data_in=4'hb; - #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); - #0 data_in=4'hc; - #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0); - #0 data_in=4'hd; - #2 `check(full, 1); `check(data_out, 4'ha); `check(empty, 0); - #0 push=0; pop=1; - #2 `check(full, 0); `check(data_out, 4'hb); `check(empty, 0); - #2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 0); - #2 `check(full, 0); `check(data_out, 4'hd); `check(empty, 0); - #2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 1); - #0 data_in=4'he; push=1; pop=0; - #2 `check(full, 0); `check(data_out, 4'he); `check(empty, 0); - #0 data_in=4'hf; pop=1; - #2 `check(full, 0); `check(data_out, 4'hf); `check(empty, 0); - #0 push=0; - #2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 1); - #1 $finish; - end - -endmodule diff --git a/hw/unit_tests/mem_streamer/mem_streamer/Makefile b/hw/unit_tests/mem_streamer/mem_streamer/Makefile deleted file mode 100644 index f42c5cb3..00000000 --- a/hw/unit_tests/mem_streamer/mem_streamer/Makefile +++ /dev/null @@ -1,65 +0,0 @@ -#--------------------------------------------------------- -# Makefile to compile and test the memory stream unit -#--------------------------------------------------------- - -TOP = VX_mem_scheduler -PARAMS += -GNUM_REQS=4 -GADDRW=8 -GDATAW=8 -GTAGW=8 -GWORD_SIZE=1 -GQUEUE_SIZE=4 - -ifdef RSP_PARTIAL - PARAMS += -GRSP_PARTIAL=$(RSP_PARTIAL) -endif - -ifdef DUPLICATE_ADDR - PARAMS += -GDUPLICATE_ADDR=$(DUPLICATE_ADDR) -endif - -RTL_DIR = ../../../rtl -DPI_DIR = ../../../dpi - -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(DPI_DIR) - -VERILATOR_ROOT = /opt/verilator -VERILATOR ?= $(VERILATOR_ROOT)/bin/verilator - -VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP) -VL_FLAGS += --language 1800-2009 --assert -Wall -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += --trace -VL_FLAGS += $(RTL_INCLUDE) -VL_FLAGS += $(PARAMS) - -SRCS += memsim.cpp ram.cpp $(DPI_DIR)/util_dpi.cpp - -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized -CXXFLAGS += -I../../../../dpi -I../../../../ - -default: run - -gen: $(SRCS) - @echo - @echo "### VERILATE ###" - $(VERILATOR) $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' - -build: gen - @echo - @echo "### BUILD ###" - $(MAKE) -C obj_dir -j 4 -f V$(TOP).mk - -run: build - @echo - @echo "### RUN ###" - obj_dir/V$(TOP) - -waves: trace.vcd - @echo - @echo "### TRACE ###" - gtkwave -o trace.vcd - -clean: - @echo - @echo "### CLEAN ###" - -rm -rf obj_dir *.vcd *.log - -#--------------------------------------------------------- - diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile new file mode 100644 index 00000000..87dc13f4 --- /dev/null +++ b/hw/unittest/Makefile @@ -0,0 +1,17 @@ +all: + $(MAKE) -C cache + $(MAKE) -C generic_queue + $(MAKE) -C mem_streamer + $(MAKE) -C top_modules + +run: + $(MAKE) -C cache run + $(MAKE) -C generic_queue run + $(MAKE) -C mem_streamer run + $(MAKE) -C top_modules run + +clean: + $(MAKE) -C cache clean + $(MAKE) -C generic_queue clean + $(MAKE) -C mem_streamer clean + $(MAKE) -C top_modules clean diff --git a/hw/unit_tests/VX_divide_tb.v b/hw/unittest/VX_divide_tb.v similarity index 100% rename from hw/unit_tests/VX_divide_tb.v rename to hw/unittest/VX_divide_tb.v diff --git a/hw/unittest/cache/Makefile b/hw/unittest/cache/Makefile new file mode 100644 index 00000000..e3737c3c --- /dev/null +++ b/hw/unittest/cache/Makefile @@ -0,0 +1,77 @@ +DESTDIR ?= . +RTL_DIR = ../../rtl +DPI_DIR = ../../dpi + +CONFIGS += +PARAMS += + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -fPIC -Wno-maybe-uninitialized +CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common +CXXFLAGS += $(CONFIGS) + +LDFLAGS += + +# control RTL debug tracing states +DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK +DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR +DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG +DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA + +DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) + +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache + +SRCS = cachesim.cpp testbench.cpp +SRCS += $(DPI_DIR)/util_dpi.cpp + +TOP = VX_cache_top + +VL_FLAGS = --exe +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += --x-initial unique --x-assign unique +VL_FLAGS += -DSIMULATION +VL_FLAGS += $(CONFIGS) +VL_FLAGS += $(PARAMS) +VL_FLAGS += $(RTL_INCLUDE) +VL_FLAGS += $(RTL_PKGS) +VL_FLAGS += --cc $(TOP) --top-module $(TOP) + +# Enable Verilator multithreaded simulation +THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +VL_FLAGS += -j $(THREADS) +#VL_FLAGS += --threads $(THREADS) + +# Debugigng +ifdef DEBUG + VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) + CXXFLAGS += -g -O0 $(DBG_FLAGS) +else + VL_FLAGS += -DNDEBUG + CXXFLAGS += -O2 -DNDEBUG +endif + +# Enable perf counters +ifdef PERF + VL_FLAGS += -DPERF_ENABLE + CXXFLAGS += -DPERF_ENABLE +endif + +PROJECT = cache + +all: $(DESTDIR)/$(PROJECT) + +$(DESTDIR)/$(PROJECT): $(SRCS) + verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@ + +run: $(DESTDIR)/$(PROJECT) + $(DESTDIR)/$(PROJECT) + +waves: trace.vcd + gtkwave -o trace.vcd + +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) diff --git a/hw/unit_tests/cache/cachesim.cpp b/hw/unittest/cache/cachesim.cpp similarity index 91% rename from hw/unit_tests/cache/cachesim.cpp rename to hw/unittest/cache/cachesim.cpp index e7fea67e..2c35f5e0 100644 --- a/hw/unit_tests/cache/cachesim.cpp +++ b/hw/unittest/cache/cachesim.cpp @@ -16,36 +16,58 @@ #include #include #include -#include +#include -uint64_t timestamp = 0; +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; double sc_time_stamp() { return timestamp; } +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + CacheSim::CacheSim() { // force random values for uninitialized signals Verilated::randReset(2); ram_ = nullptr; - cache_ = new VVX_cache(); + cache_ = new VVX_cache_top(); mem_rsp_active_ = false; snp_req_active_ = false; -//#ifdef VCD_OUTPUT +#ifdef VCD_OUTPUT Verilated::traceEverOn(true); trace_ = new VerilatedVcdC; cache_->trace(trace_, 99); trace_->open("trace.vcd"); -//#endif +#endif } CacheSim::~CacheSim() { -//#ifdef VCD_OUTPUT +#ifdef VCD_OUTPUT trace_->close(); -//#endif +#endif delete cache_; //need to delete the req and rsp vectors } @@ -88,9 +110,9 @@ void CacheSim::step() { void CacheSim::eval() { cache_->eval(); -//#ifdef VCD_OUTPUT +#ifdef VCD_OUTPUT trace_->dump(timestamp); -//#endif +#endif ++timestamp; } @@ -128,7 +150,6 @@ void CacheSim::clear_req(){ cache_->core_req_valid = 0; } - void CacheSim::send_req(core_req_t *req){ core_req_vec_.push(req); unsigned int *data = new unsigned int[4]; @@ -221,7 +242,7 @@ void CacheSim::eval_mem_bus() { cache_->mem_rsp_valid = 1; //copy data from the rsp queue to the cache module - memcpy((uint8_t*)cache_->mem_rsp_data, mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE); + memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE); cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag; free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue @@ -249,7 +270,7 @@ void CacheSim::eval_mem_bus() { if (cache_->mem_req_rw) { //write = 1 uint64_t byteen = cache_->mem_req_byteen; uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE); - uint8_t* data = (uint8_t*)(cache_->mem_req_data); + uint8_t* data = reinterpret_cast(cache_->mem_req_data.data()); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[base_addr + i] = data[i]; @@ -330,4 +351,3 @@ void CacheSim::get_mem_rsp(){ std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl; std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl; } - diff --git a/hw/unit_tests/cache/cachesim.h b/hw/unittest/cache/cachesim.h similarity index 92% rename from hw/unit_tests/cache/cachesim.h rename to hw/unittest/cache/cachesim.h index b0fad6fe..a38ed774 100644 --- a/hw/unit_tests/cache/cachesim.h +++ b/hw/unittest/cache/cachesim.h @@ -13,15 +13,15 @@ #pragma once -#include "VVX_cache.h" -#include "VVX_cache__Syms.h" +#include "VVX_cache_top.h" +#include "VVX_cache_top__Syms.h" #include "verilated.h" -//#ifdef VCD_OUTPUT +#ifdef VCD_OUTPUT #include -//#endif +#endif -//#include +#include #include "ram.h" #include #include @@ -97,9 +97,9 @@ private: uint32_t snp_req_size_; uint32_t pending_snp_reqs_; - VVX_cache *cache_; + VVX_cache_top *cache_; RAM *ram_; -//#ifdef VCD_OUTPUT +#ifdef VCD_OUTPUT VerilatedVcdC *trace_; -//#endif +#endif }; diff --git a/hw/unit_tests/cache/ram.h b/hw/unittest/cache/ram.h similarity index 100% rename from hw/unit_tests/cache/ram.h rename to hw/unittest/cache/ram.h diff --git a/hw/unit_tests/cache/testbench.cpp b/hw/unittest/cache/testbench.cpp similarity index 99% rename from hw/unit_tests/cache/testbench.cpp rename to hw/unittest/cache/testbench.cpp index 64e188e5..bf9dfb34 100644 --- a/hw/unit_tests/cache/testbench.cpp +++ b/hw/unittest/cache/testbench.cpp @@ -54,6 +54,8 @@ int REQ_RSP(CacheSim *sim){ //verified int check = sim->assert_equal(data, write->tag); if (check == 4) return 1; + + return 0; } int HIT_1(CacheSim *sim){ @@ -242,7 +244,5 @@ int main(int argc, char **argv) std::cout << "FAILED" << std::endl; } - - return 0; } diff --git a/hw/unit_tests/generic_queue/vl_simulator.h b/hw/unittest/common/vl_simulator.h similarity index 95% rename from hw/unit_tests/generic_queue/vl_simulator.h rename to hw/unittest/common/vl_simulator.h index 9b2d57ad..a3b11ba2 100644 --- a/hw/unit_tests/generic_queue/vl_simulator.h +++ b/hw/unittest/common/vl_simulator.h @@ -17,7 +17,7 @@ #include #include "verilated.h" -#ifdef VM_TRACE +#ifdef VCD_OUTPUT #include // Trace file format header #endif @@ -26,7 +26,7 @@ class vl_simulator { private: T top_; -#ifdef VM_TRACE +#ifdef VCD_OUTPUT VerilatedVcdC tfp_; #endif @@ -35,7 +35,7 @@ public: vl_simulator() { top_.clk = 0; top_.reset = 0; - #ifdef VM_TRACE + #ifdef VCD_OUTPUT Verilated::traceEverOn(true); top_.trace(&tfp_, 99); tfp_.open("trace.vcd"); @@ -43,7 +43,7 @@ public: } ~vl_simulator() { - #ifdef VM_TRACE + #ifdef VCD_OUTPUT tfp_.close(); #endif top_.final(); @@ -59,7 +59,7 @@ public: uint64_t step(uint64_t ticks, uint32_t count = 1) { while (count--) { top_.eval(); - #ifdef VM_TRACE + #ifdef VCD_OUTPUT tfp_.dump(ticks); #endif top_.clk = !top_.clk; diff --git a/hw/unittest/generic_queue/Makefile b/hw/unittest/generic_queue/Makefile new file mode 100644 index 00000000..9ec4baa1 --- /dev/null +++ b/hw/unittest/generic_queue/Makefile @@ -0,0 +1,65 @@ +DESTDIR ?= . +RTL_DIR = ../../rtl +DPI_DIR = ../../dpi + +CONFIGS += +PARAMS += + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -fPIC -Wno-maybe-uninitialized +CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common +CXXFLAGS += $(CONFIGS) + +LDFLAGS += + +DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) + +RTL_PKGS += + +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs + +SRCS = main.cpp +SRCS += $(DPI_DIR)/util_dpi.cpp + +TOP = VX_fifo_queue + +VL_FLAGS = --exe +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += --x-initial unique --x-assign unique +VL_FLAGS += -DSIMULATION +VL_FLAGS += $(CONFIGS) +VL_FLAGS += $(PARAMS) +VL_FLAGS += $(RTL_INCLUDE) +VL_FLAGS += $(RTL_PKGS) +VL_FLAGS += --cc $(TOP) --top-module $(TOP) + +# Enable Verilator multithreaded simulation +THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +VL_FLAGS += -j $(THREADS) +#VL_FLAGS += --threads $(THREADS) + +# Debugigng +ifdef DEBUG + VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) + CXXFLAGS += -g -O0 $(DBG_FLAGS) +else + VL_FLAGS += -DNDEBUG + CXXFLAGS += -O2 -DNDEBUG +endif + +PROJECT = generic_queue + +all: $(DESTDIR)/$(PROJECT) + +$(DESTDIR)/$(PROJECT): $(SRCS) + verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@ + +run: $(DESTDIR)/$(PROJECT) + $(DESTDIR)/$(PROJECT) + +waves: trace.vcd + gtkwave -o trace.vcd + +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) diff --git a/hw/unit_tests/generic_queue/main.cpp b/hw/unittest/generic_queue/main.cpp similarity index 75% rename from hw/unit_tests/generic_queue/main.cpp rename to hw/unittest/generic_queue/main.cpp index ba46ebce..d4826bfa 100644 --- a/hw/unit_tests/generic_queue/main.cpp +++ b/hw/unittest/generic_queue/main.cpp @@ -17,6 +17,14 @@ #define MAX_TICKS 20 +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + #define CHECK(x) \ do { \ if (x) \ @@ -25,10 +33,24 @@ std::abort(); \ } while (false) -uint64_t ticks = 0; +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; double sc_time_stamp() { - return ticks; + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; } using Device = VVX_fifo_queue; @@ -40,14 +62,14 @@ int main(int argc, char **argv) { vl_simulator sim; // run test - ticks = sim.reset(0); - while (ticks < MAX_TICKS) { - switch (ticks) { + timestamp = sim.reset(0); + while (timestamp < MAX_TICKS) { + switch (timestamp) { case 0: // initial values sim->pop = 0; sim->push = 0; - ticks = sim.step(ticks, 2); + timestamp = sim.step(timestamp, 2); break; case 2: // Verify outputs @@ -96,11 +118,11 @@ int main(int argc, char **argv) { } // advance clock - ticks = sim.step(ticks, 2); + timestamp = sim.step(timestamp, 2); } std::cout << "PASSED!" << std::endl; - std::cout << "Simulation time: " << std::dec << ticks/2 << " cycles" << std::endl; + std::cout << "Simulation time: " << std::dec << timestamp/2 << " cycles" << std::endl; return 0; } \ No newline at end of file diff --git a/hw/unittest/mem_streamer/Makefile b/hw/unittest/mem_streamer/Makefile new file mode 100644 index 00000000..9ff5e81c --- /dev/null +++ b/hw/unittest/mem_streamer/Makefile @@ -0,0 +1,65 @@ +DESTDIR ?= . +RTL_DIR = ../../rtl +DPI_DIR = ../../dpi + +CONFIGS += +PARAMS += + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -fPIC -Wno-maybe-uninitialized +CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common +CXXFLAGS += $(CONFIGS) + +LDFLAGS += + +DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) + +RTL_PKGS += + +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs + +SRCS = memsim.cpp ram.cpp +SRCS += $(DPI_DIR)/util_dpi.cpp + +TOP = VX_mem_scheduler + +VL_FLAGS = --exe +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += --x-initial unique --x-assign unique +VL_FLAGS += -DSIMULATION +VL_FLAGS += $(CONFIGS) +VL_FLAGS += $(PARAMS) +VL_FLAGS += $(RTL_INCLUDE) +VL_FLAGS += $(RTL_PKGS) +VL_FLAGS += --cc $(TOP) --top-module $(TOP) + +# Enable Verilator multithreaded simulation +THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +VL_FLAGS += -j $(THREADS) +#VL_FLAGS += --threads $(THREADS) + +# Debugigng +ifdef DEBUG + VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) + CXXFLAGS += -g -O0 $(DBG_FLAGS) +else + VL_FLAGS += -DNDEBUG + CXXFLAGS += -O2 -DNDEBUG +endif + +PROJECT = mem_streamer + +all: $(DESTDIR)/$(PROJECT) + +$(DESTDIR)/$(PROJECT): $(SRCS) + verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@ + +run: $(DESTDIR)/$(PROJECT) + $(DESTDIR)/$(PROJECT) + +waves: trace.vcd + gtkwave -o trace.vcd + +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) diff --git a/hw/unit_tests/mem_streamer/mem_streamer/memsim.cpp b/hw/unittest/mem_streamer/memsim.cpp similarity index 79% rename from hw/unit_tests/mem_streamer/mem_streamer/memsim.cpp rename to hw/unittest/mem_streamer/memsim.cpp index a6958c39..380f7c99 100644 --- a/hw/unit_tests/mem_streamer/mem_streamer/memsim.cpp +++ b/hw/unittest/mem_streamer/memsim.cpp @@ -15,6 +15,14 @@ #include "memsim.h" #include "ram.h" +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + static bool trace_enabled = false; static uint64_t trace_start_time = 0; static uint64_t trace_stop_time = -1ull; @@ -35,15 +43,11 @@ void sim_trace_enable (bool enable) { trace_enabled = enable; } -////////////////////////////////////////////////////// - int generate_rand (int min, int max) { int range = max - min + 1; return rand() % range + min; } -////////////////////////////////////////////////////// - int generate_rand_mask (int mask) { int result = 0; int m = mask; @@ -56,34 +60,34 @@ int generate_rand_mask (int mask) { return result; } -////////////////////////////////////////////////////// - MemSim::MemSim() { - msu_ = new VVX_mem_streamer(); + msu_ = new VVX_mem_scheduler(); // Enable tracing Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC; - msu_->trace(trace_, 99); - trace_->open("trace.vcd"); + +#ifdef VCD_OUTPUT + Verilated::traceEverOn(true); + trace_ = new VerilatedVcdC; + cache_->trace(trace_, 99); + race_->open("trace.vcd"); +#endif } -////////////////////////////////////////////////////// - MemSim::~MemSim() { +#ifdef VCD_OUTPUT trace_->close(); +#endif delete msu_; } -////////////////////////////////////////////////////// - void MemSim::eval() { msu_->eval(); +#ifdef VCD_OUTPUT trace_->dump(timestamp++); +#endif } -////////////////////////////////////////////////////// - void MemSim::step() { msu_->clk = 0; this->eval(); @@ -92,19 +96,14 @@ void MemSim::step() { this->eval(); } -////////////////////////////////////////////////////// - void MemSim::reset() { msu_->reset = 1; this->step(); msu_->reset = 0; this->step(); - } -////////////////////////////////////////////////////// - void MemSim::attach_core() { if (msu_->req_ready) { msu_->req_valid = generate_rand(0, 1); @@ -118,8 +117,6 @@ void MemSim::attach_core() { msu_->rsp_ready = true; } -////////////////////////////////////////////////////// - void MemSim::attach_ram (RAM *ram) { req_t req; @@ -137,7 +134,6 @@ void MemSim::attach_ram (RAM *ram) { rsp = ram->schedule_rsp(); msu_->mem_rsp_valid = rsp.valid; - msu_->mem_rsp_mask = rsp.mask; msu_->mem_rsp_data = rsp.data; msu_->mem_rsp_tag = rsp.tag; rsp.ready = msu_->mem_rsp_ready; @@ -146,8 +142,6 @@ void MemSim::attach_ram (RAM *ram) { ram->halt_rsp(rsp); } -////////////////////////////////////////////////////// - void MemSim::run(RAM *ram) { this->reset(); @@ -160,8 +154,6 @@ void MemSim::run(RAM *ram) { } } -////////////////////////////////////////////////////// - int main (int argc, char** argv, char** env) { Verilated::commandArgs(argc, argv); @@ -172,5 +164,3 @@ int main (int argc, char** argv, char** env) { return 0; } - -////////////////////////////////////////////////////// \ No newline at end of file diff --git a/hw/unit_tests/mem_streamer/mem_streamer/memsim.h b/hw/unittest/mem_streamer/memsim.h similarity index 70% rename from hw/unit_tests/mem_streamer/mem_streamer/memsim.h rename to hw/unittest/mem_streamer/memsim.h index 1fd09d24..482572bb 100644 --- a/hw/unit_tests/mem_streamer/mem_streamer/memsim.h +++ b/hw/unittest/mem_streamer/memsim.h @@ -18,8 +18,8 @@ #include #include #include -#include "VVX_mem_streamer.h" -#include "VVX_mem_streamer__Syms.h" +#include "VVX_mem_scheduler.h" +#include "VVX_mem_scheduler__Syms.h" #include "ram.h" #define SIM_TIME 5000 @@ -28,20 +28,22 @@ int generate_rand (int min, int max); int generate_rand_mask (int mask); class MemSim { - private: - VVX_mem_streamer *msu_; - VerilatedVcdC *trace_; +public: + MemSim(); + virtual ~MemSim(); - void eval(); - void step(); - void reset(); + void run(RAM *ram); - void attach_core(); - void attach_ram(RAM *ram); +private: + VVX_mem_scheduler *msu_; +#ifdef VCD_OUTPUT + VerilatedVcdC *trace_; +#endif - public: - MemSim(); - virtual ~MemSim(); + void eval(); + void step(); + void reset(); - void run(RAM *ram); + void attach_core(); + void attach_ram(RAM *ram); }; diff --git a/hw/unit_tests/mem_streamer/mem_streamer/ram.cpp b/hw/unittest/mem_streamer/ram.cpp similarity index 90% rename from hw/unit_tests/mem_streamer/mem_streamer/ram.cpp rename to hw/unittest/mem_streamer/ram.cpp index 4a4c4780..ff17dfa1 100644 --- a/hw/unit_tests/mem_streamer/mem_streamer/ram.cpp +++ b/hw/unittest/mem_streamer/ram.cpp @@ -21,8 +21,6 @@ RAM::RAM() { is_rsp_stall_ = false; } -////////////////////////////////////////////////////// - bool RAM::check_duplicate_req(req_t req) { for(int i = 0; i < ram_.size(); i++) { if (ram_[i].addr == req.addr) { @@ -33,8 +31,6 @@ bool RAM::check_duplicate_req(req_t req) { return false; } -////////////////////////////////////////////////////// - int RAM::simulate_cycle_delay() { std::cout<<"RAM: # entries: "<check_duplicate_req(req)) && req.valid && !req.rw) { req_t r; @@ -78,15 +72,11 @@ void RAM::insert_req(req_t req) { } } -////////////////////////////////////////////////////// - uint8_t RAM::is_ready() { // return generate_rand(0b1000, 0b1111); return 0b1111; } -////////////////////////////////////////////////////// - rsp_t RAM::schedule_rsp() { rsp_t rsp; int dequeue_index = this->simulate_cycle_delay(); @@ -124,8 +114,6 @@ rsp_t RAM::schedule_rsp() { return rsp; } -////////////////////////////////////////////////////// - // Schedule response for only one cycle void RAM::halt_rsp(rsp_t rsp) { if (is_rsp_active_ && rsp.valid && rsp.ready) { @@ -133,5 +121,3 @@ void RAM::halt_rsp(rsp_t rsp) { is_rsp_active_ = false; } } - -////////////////////////////////////////////////////// diff --git a/hw/unit_tests/mem_streamer/mem_streamer/ram.h b/hw/unittest/mem_streamer/ram.h similarity index 100% rename from hw/unit_tests/mem_streamer/mem_streamer/ram.h rename to hw/unittest/mem_streamer/ram.h diff --git a/hw/unittest/top_modules/Makefile b/hw/unittest/top_modules/Makefile new file mode 100644 index 00000000..2d0319e7 --- /dev/null +++ b/hw/unittest/top_modules/Makefile @@ -0,0 +1,68 @@ +DESTDIR ?= . +RTL_DIR = ../../rtl +DPI_DIR = ../../dpi + +CONFIGS += +PARAMS += + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -fPIC -Wno-maybe-uninitialized +CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common +CXXFLAGS += $(CONFIGS) + +LDFLAGS += + +DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) + +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv + +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core + +SRCS = main.cpp +SRCS += $(DPI_DIR)/util_dpi.cpp + +VL_FLAGS = --exe +VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += --x-initial unique --x-assign unique +VL_FLAGS += -DSIMULATION +VL_FLAGS += $(CONFIGS) +VL_FLAGS += $(PARAMS) +VL_FLAGS += $(RTL_INCLUDE) +VL_FLAGS += $(RTL_PKGS) + +# Enable Verilator multithreaded simulation +THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +VL_FLAGS += -j $(THREADS) +#VL_FLAGS += --threads $(THREADS) + +# Debugigng +ifdef DEBUG + VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) + CXXFLAGS += -g -O0 $(DBG_FLAGS) +else + VL_FLAGS += -DNDEBUG + CXXFLAGS += -O2 -DNDEBUG +endif + +# Enable perf counters +ifdef PERF + VL_FLAGS += -DPERF_ENABLE + CXXFLAGS += -DPERF_ENABLE +endif + +PROJECT = top_modules + +all: build + +build: $(SRCS) + verilator --build $(VL_FLAGS) --cc VX_cache_cluster_top --top-module VX_cache_cluster_top $^ -CFLAGS '$(CXXFLAGS)' + verilator --build $(VL_FLAGS) --cc VX_cache_top --top-module VX_cache_top $^ -CFLAGS '$(CXXFLAGS)' + verilator --build $(VL_FLAGS) --cc VX_core_top --top-module VX_core_top $^ -CFLAGS '$(CXXFLAGS)' + +run: + +waves: + +clean: + rm -rf obj_dir diff --git a/hw/unittest/top_modules/main.cpp b/hw/unittest/top_modules/main.cpp new file mode 100644 index 00000000..5191b443 --- /dev/null +++ b/hw/unittest/top_modules/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file