Merge pull request #654 from ucb-bar/multithreaded-verilator

Multithreaded Verilator
This commit is contained in:
Abraham Gonzalez
2020-08-24 11:25:12 -07:00
committed by GitHub
5 changed files with 137 additions and 22 deletions

View File

@@ -27,9 +27,13 @@ EXTRA_SIM_REQS ?=
#----------------------------------------------------------------------------
HELP_SIMULATION_VARIABLES += \
" EXTRA_SIM_FLAGS = additional runtime simulation flags (passed within +permissive)"
" EXTRA_SIM_FLAGS = additional runtime simulation flags (passed within +permissive)" \
" NUMACTL = set to '1' to wrap simulator in the appropriate numactl command"
EXTRA_SIM_FLAGS ?=
NUMACTL ?= 0
NUMA_PREFIX = $(if $(filter $(NUMACTL),0),,$(shell $(base_dir)/scripts/numa_prefix))
#----------------------------------------------------------------------------
HELP_COMMANDS += \
@@ -165,15 +169,15 @@ verilog: $(sim_vsrcs)
# run normal binary with hardware-logged insn dissassembly
run-binary: $(output_dir) $(sim)
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
# run simulator as fast as possible (no insn disassembly)
run-binary-fast: $(output_dir) $(sim)
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null | tee $(sim_out_name).log)
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null | tee $(sim_out_name).log)
# run simulator with as much debug info as possible
run-binary-debug: $(output_dir) $(sim_debug)
(set -o pipefail && $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(WAVEFORM_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
(set -o pipefail && $(NUMA_PREFIX) $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(WAVEFORM_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
run-fast: run-asm-tests-fast run-bmark-tests-fast
@@ -209,10 +213,10 @@ $(output_dir)/%: $(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa/% $(output_d
ln -sf $< $@
$(output_dir)/%.run: $(output_dir)/% $(sim)
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $< </dev/null | tee $<.log) && touch $@
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $< </dev/null | tee $<.log) && touch $@
$(output_dir)/%.out: $(output_dir)/% $(sim)
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $@) | tee $<.log)
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $@) | tee $<.log)
#########################################################################################
# include build/project specific makefrags made from the generator

View File

@@ -181,3 +181,19 @@ An open-source vcd-capable waveform viewer is `GTKWave <http://gtkwave.sourcefor
For a VCS simulation, this will generate a vpd file (this is a proprietary waveform representation format used by Synopsys) that can be loaded to vpd-supported waveform viewers.
If you have Synopsys licenses, we recommend using the DVE waveform viewer.
.. _sw-sim-verilator-opts:
Additional Verilator Options
-------------------------------
When building the verilator simulator there are some additional options:
.. code-block:: shell
make VERILATOR_THREADS=8 NUMACTL=1
The ``VERILATOR_THREADS=<num>`` option enables the compiled Verilator simulator to use ``<num>`` parallel threads.
On a multi-socket machine, you will want to make sure all threads are on the same socket by using ``NUMACTL=1`` to enable ``numactl``.
By enabling this, you will use CHipyard's ``numa_prefix`` wrapper, which is a simple wrapper around ``numactl`` that runs your verilated simulator like this: ``$(numa_prefix) ./simulator-<name> <simulator-args>``.
Note that both these flags are mutually exclusive, you can use either independently (though it makes sense to use ``NUMACTL`` just with ``VERILATOR_THREADS=8`` during a Verilator simulation).

View File

@@ -1,11 +1,15 @@
// See LICENSE.SiFive for license details.
// See LICENSE.Berkeley for license details.
#include "verilated.h"
#if VM_TRACE
#include <memory>
#if CY_FST_TRACE
#include "verilated_fst_c.h"
#else
#include "verilated.h"
#include "verilated_vcd_c.h"
#endif
#endif // CY_FST_TRACE
#endif // VM_TRACE
#include <fesvr/dtm.h>
#include <fesvr/tsi.h>
#include "remote_bitbang.h"
@@ -110,9 +114,10 @@ int main(int argc, char** argv)
uint64_t max_cycles = -1;
int ret = 0;
bool print_cycles = false;
// Port numbers are 16 bit unsigned integers.
// Port numbers are 16 bit unsigned integers.
uint16_t rbb_port = 0;
#if VM_TRACE
const char* vcdfile_name = NULL;
FILE * vcdfile = NULL;
uint64_t start = 0;
#endif
@@ -157,6 +162,7 @@ int main(int argc, char** argv)
case 'o': opterr = 1; break;
#if VM_TRACE
case 'v': {
vcdfile_name = optarg;
vcdfile = strcmp(optarg, "-") == 0 ? stdout : fopen(optarg, "w");
if (!vcdfile) {
std::cerr << "Unable to open " << optarg << " for VCD write\n";
@@ -264,17 +270,20 @@ done_processing:
#if VM_TRACE
Verilated::traceEverOn(true); // Verilator must compute traced signals
#if CY_FST_TRACE
std::unique_ptr<VerilatedFstC> tfp(new VerilatedFstC);
#else
std::unique_ptr<VerilatedVcdFILE> vcdfd(new VerilatedVcdFILE(vcdfile));
std::unique_ptr<VerilatedVcdC> tfp(new VerilatedVcdC(vcdfd.get()));
if (vcdfile) {
#endif // CY_FST_TRACE
if (vcdfile_name) {
tile->trace(tfp.get(), 99); // Trace 99 levels of hierarchy
tfp->open("");
tfp->open(vcdfile_name);
}
#endif
#endif // VM_TRACE
// RocketChip currently only supports RBB port 0, so this needs to stay here
jtag = new remote_bitbang_t(rbb_port);
dtm = new dtm_t(argc, argv);
tsi = new tsi_t(argc, argv);
signal(SIGTERM, handle_sigterm);
@@ -304,8 +313,7 @@ done_processing:
tile->reset = 0;
done_reset = true;
while (!dtm->done() && !jtag->done() && !tsi->done() &&
!tile->io_success && trace_count < max_cycles) {
do {
tile->clock = 0;
tile->eval();
#if VM_TRACE
@@ -322,6 +330,13 @@ done_processing:
#endif
trace_count++;
}
// for verilator multithreading. need to do 1 loop before checking if
// tsi exists, since tsi is created by verilated thread on the first
// serial_tick.
while ((!dtm || !dtm->done()) &&
(!jtag || !jtag->done()) &&
(!tsi || !tsi->done()) &&
!tile->io_success && trace_count < max_cycles);
#if VM_TRACE
if (tfp)
@@ -330,17 +345,17 @@ done_processing:
fclose(vcdfile);
#endif
if (dtm->exit_code())
if (dtm && dtm->exit_code())
{
fprintf(stderr, "*** FAILED *** via dtm (code = %d, seed %d) after %ld cycles\n", dtm->exit_code(), random_seed, trace_count);
ret = dtm->exit_code();
}
else if (tsi->exit_code())
else if (tsi && tsi->exit_code())
{
fprintf(stderr, "*** FAILED *** (code = %d, seed %d) after %ld cycles\n", tsi->exit_code(), random_seed, trace_count);
ret = tsi->exit_code();
}
else if (jtag->exit_code())
else if (jtag && jtag->exit_code())
{
fprintf(stderr, "*** FAILED *** via jtag (code = %d, seed %d) after %ld cycles\n", jtag->exit_code(), random_seed, trace_count);
ret = jtag->exit_code();

76
scripts/numa_prefix Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python
#============================================================================
# - really simple script, which just prints out the numactl cmd to
# prefix before your actual command. it determines this based on free
# memory size attached to every node.
# - when you run this on a machine without `numactl`, the output is empty,
# so `$(numa_prefix) <cmd> <args>` turns in to `<cmd> <args>`.
# - when the machine has `numactl` installed, regardless of the socket-count
# on the machine, the resulting command is:
# `numactl -m <socket> -C <core-id list> -- <cmd> <args>`
# - example output from `numactl -H` on a 2 socket machine:
# available: 2 nodes (0-1)
# node 0 cpus: 0 2 4 6 8 10 12 14 16 18 20 22
# node 0 size: 131026 MB
# node 0 free: 7934 MB
# node 1 cpus: 1 3 5 7 9 11 13 15 17 19 21 23
# node 1 size: 65536 MB
# node 1 free: 429 MB
# node distances:
# node 0 1
# 0: 10 20
# 1: 20 10
#============================================================================
import subprocess
import re
import sys
which_proc = subprocess.Popen(["which", "numactl"], stdout=subprocess.PIPE)
out, err = which_proc.communicate()
if out != "":
numactl_proc = subprocess.Popen(["numactl", "-H"], stdout=subprocess.PIPE)
out, err = numactl_proc.communicate()
lines = out.split("\n")
line_idx = 0
head_line = lines[line_idx]
line_idx += 1
node_match = re.match(r"^ *available: +(\d+) nodes", head_line)
if node_match:
avail_nodes = node_match.group(1)
best_node_id = ""
best_cpus = ""
best_free_size = 0
# loop through available nodes, selecting the node with the most free mem
for i in avail_nodes:
cpu_line = lines[line_idx]
# mem. size unused. skip and use mem. free
mem_free_line = lines[line_idx + 2]
line_idx += 3
cpu_match = re.match(r"^ *node (\d+) cpus: (\d.*\d)$", cpu_line)
if cpu_match:
node_id = cpu_match.group(1)
cpus = cpu_match.group(2).replace(" ", ",")
mem_free_match = re.match(r"^ *node " + node_id + " free: (\d+) \S+$", mem_free_line)
if mem_free_match:
free_size = mem_free_match.group(1)
if int(free_size) > int(best_free_size):
best_node_id = node_id
best_cpus = cpus
best_free_size = free_size
else:
sys.exit("[ERROR] Malformed mem free line: " + mem_free_line)
else:
sys.exit("[ERROR] Malformed cpus line: " + cpu_line)
sys.stdout.write("numactl -m " + best_node_id + " -C " + best_cpus + " --")
else:
sys.exit("[ERROR] Malformed head line: " + head_line)

View File

@@ -53,6 +53,7 @@ HELP_COMPILATION_VARIABLES += \
" VERILATOR_PROFILE = 'none' if no verilator profiling (default)" \
" 'all' if full verilator runtime profiling" \
" 'threads' if runtime thread profiling only" \
" VERILATOR_THREADS = how many threads the simulator will use (default 1)" \
" VERILATOR_FST_MODE = enable FST waveform instead of VCD. use with debug build"
#########################################################################################
@@ -70,6 +71,9 @@ RUNTIME_PROFILING_VFLAGS := $(if $(filter $(VERILATOR_PROFILE),all),\
$(if $(filter $(VERILATOR_PROFILE),threads),\
--prof-threads,))
VERILATOR_THREADS ?= 1
RUNTIME_THREADS := --threads $(VERILATOR_THREADS) --threads-dpi all
VERILATOR_FST_MODE ?= 0
TRACING_OPTS := $(if $(filter $(VERILATOR_FST_MODE),0),\
--trace,--trace-fst --trace-threads 1)
@@ -122,6 +126,7 @@ PREPROC_DEFINES := \
VERILATOR_NONCC_OPTS = \
$(RUNTIME_PROFILING_VFLAGS) \
$(RUNTIME_THREADS) \
$(VERILATOR_OPT_FLAGS) \
$(PLATFORM_OPTS) \
-Wno-fatal \
@@ -157,7 +162,6 @@ VERILATOR_CXXFLAGS = \
VERILATOR_LDFLAGS = \
$(LDFLAGS) \
$(RUNTIME_PROFILING_CFLAGS) \
-L$(RISCV)/lib \
-Wl,-rpath,$(RISCV)/lib \
-L$(sim_dir) \
@@ -200,7 +204,7 @@ $(model_mk): $(sim_vsrcs) $(sim_common_files) $(EXTRA_SIM_REQS)
$(model_mk_debug): $(sim_vsrcs) $(sim_common_files) $(EXTRA_SIM_REQS)
rm -rf $(model_dir_debug)
mkdir -p $(model_dir_debug)
$(VERILATOR) $(VERILATOR_OPTS) $(EXTRA_SIM_SOURCES) -o $(sim_debug) --trace -Mdir $(model_dir_debug) -CFLAGS "-include $(model_header_debug)"
$(VERILATOR) $(VERILATOR_OPTS) $(EXTRA_SIM_SOURCES) -o $(sim_debug) $(TRACING_OPTS) -Mdir $(model_dir_debug) -CFLAGS "-include $(model_header_debug)"
touch $@
#########################################################################################
@@ -219,7 +223,7 @@ $(sim_debug): $(model_mk_debug) $(dramsim_lib)
$(output_dir)/%.vpd: $(output_dir)/% $(sim_debug)
rm -f $@.vcd && mkfifo $@.vcd
vcd2vpd $@.vcd $@ > /dev/null &
(set -o pipefail && $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) -v$@.vcd $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $<.out) | tee $<.log)
(set -o pipefail && $(NUMA_PREFIX) $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) -v$@.vcd $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $<.out) | tee $<.log)
#########################################################################################
# general cleanup rules