Add support for multi-threaded verilator
This commit is contained in:
16
common.mk
16
common.mk
@@ -27,9 +27,13 @@ EXTRA_SIM_REQS ?=
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
HELP_SIMULATION_VARIABLES += \
|
||||
" EXTRA_SIM_FLAGS = additional runtime simulation flags (passed within +permissive)"
|
||||
" EXTRA_SIM_FLAGS = additional runtime simulation flags (passed within +permissive)" \
|
||||
" NUMACTL = set to '1' to wrap simulator in the appropriate numactl command"
|
||||
|
||||
EXTRA_SIM_FLAGS ?=
|
||||
NUMACTL ?= 0
|
||||
|
||||
NUMA_PREFIX = $(if $(filter $(NUMACTL),0),,$(shell $(base_dir)/scripts/numa_prefix))
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
HELP_COMMANDS += \
|
||||
@@ -165,15 +169,15 @@ verilog: $(sim_vsrcs)
|
||||
|
||||
# run normal binary with hardware-logged insn dissassembly
|
||||
run-binary: $(output_dir) $(sim)
|
||||
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
|
||||
|
||||
# run simulator as fast as possible (no insn disassembly)
|
||||
run-binary-fast: $(output_dir) $(sim)
|
||||
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null | tee $(sim_out_name).log)
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null | tee $(sim_out_name).log)
|
||||
|
||||
# run simulator with as much debug info as possible
|
||||
run-binary-debug: $(output_dir) $(sim_debug)
|
||||
(set -o pipefail && $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(WAVEFORM_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(WAVEFORM_FLAG) $(PERMISSIVE_OFF) $(BINARY) </dev/null 2> >(spike-dasm > $(sim_out_name).out) | tee $(sim_out_name).log)
|
||||
|
||||
run-fast: run-asm-tests-fast run-bmark-tests-fast
|
||||
|
||||
@@ -209,10 +213,10 @@ $(output_dir)/%: $(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa/% $(output_d
|
||||
ln -sf $< $@
|
||||
|
||||
$(output_dir)/%.run: $(output_dir)/% $(sim)
|
||||
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $< </dev/null | tee $<.log) && touch $@
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(PERMISSIVE_OFF) $< </dev/null | tee $<.log) && touch $@
|
||||
|
||||
$(output_dir)/%.out: $(output_dir)/% $(sim)
|
||||
(set -o pipefail && $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $@) | tee $<.log)
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $@) | tee $<.log)
|
||||
|
||||
#########################################################################################
|
||||
# include build/project specific makefrags made from the generator
|
||||
|
||||
@@ -181,3 +181,18 @@ An open-source vcd-capable waveform viewer is `GTKWave <http://gtkwave.sourcefor
|
||||
|
||||
For a VCS simulation, this will generate a vpd file (this is a proprietary waveform representation format used by Synopsys) that can be loaded to vpd-supported waveform viewers.
|
||||
If you have Synopsys licenses, we recommend using the DVE waveform viewer.
|
||||
|
||||
.. _sw-sim-verilator-opts:
|
||||
|
||||
Additional Verilator Options
|
||||
-------------------------------
|
||||
|
||||
When building the verilator simulator there are some additional options:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
make VERILATOR_THREADS=8
|
||||
|
||||
The ``VERILATOR_THREADS=<num>`` option enables the compiled Verilator simulator to use ``<num>`` parallel threads.
|
||||
On a multi-socket machine, you will want to make sure all threads are on the same socket by using ``numactl``.
|
||||
You can also just use the ``numa_prefix`` wrapper, which is a simple wrapper around ``numactl`` that runs your verilated simulator like this: ``$(numa_prefix) ./simulator-<name> <simulator-args>``.
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
// See LICENSE.Berkeley for license details.
|
||||
|
||||
#include "verilated.h"
|
||||
#if VM_TRACE
|
||||
#include <memory>
|
||||
#if CY_FST_TRACE
|
||||
#include "verilated_fst_c.h"
|
||||
#else
|
||||
#include "verilated.h"
|
||||
#include "verilated_vcd_c.h"
|
||||
#endif
|
||||
#endif // CY_FST_TRACE
|
||||
#endif // VM_TRACE
|
||||
#include <fesvr/dtm.h>
|
||||
#include <fesvr/tsi.h>
|
||||
#include "remote_bitbang.h"
|
||||
@@ -16,6 +20,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <getopt.h>
|
||||
// needed for s_vpi_vlog_info, which is needed for multithreading
|
||||
#include <vpi_user.h>
|
||||
|
||||
// For option parsing, which is split across this file, Verilog, and
|
||||
// FESVR's HTIF, a few external files must be pulled in. The list of
|
||||
@@ -35,6 +41,7 @@
|
||||
extern tsi_t* tsi;
|
||||
extern dtm_t* dtm;
|
||||
extern remote_bitbang_t * jtag;
|
||||
extern int dramsim;
|
||||
|
||||
static uint64_t trace_count = 0;
|
||||
bool verbose = false;
|
||||
@@ -50,6 +57,18 @@ double sc_time_stamp()
|
||||
return trace_count;
|
||||
}
|
||||
|
||||
// need to pull htif_argc/htif_argv out here so the thread that calls tick()
|
||||
// for the HTIF device can initialize properly with the cmdline args. this
|
||||
// was pulled out here for multithreading to work
|
||||
static int htif_argc;
|
||||
static char **htif_argv = NULL;
|
||||
extern "C" int vpi_get_vlog_info(s_vpi_vlog_info *vlog_info_s)
|
||||
{
|
||||
vlog_info_s->argc = htif_argc;
|
||||
vlog_info_s->argv = htif_argv;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void usage(const char * program_name)
|
||||
{
|
||||
printf("Usage: %s [EMULATOR OPTION]... [VERILOG PLUSARG]... [HOST OPTION]... BINARY [TARGET OPTION]...\n",
|
||||
@@ -113,12 +132,13 @@ int main(int argc, char** argv)
|
||||
// Port numbers are 16 bit unsigned integers.
|
||||
uint16_t rbb_port = 0;
|
||||
#if VM_TRACE
|
||||
const char* vcdfile_name = NULL;
|
||||
FILE * vcdfile = NULL;
|
||||
uint64_t start = 0;
|
||||
#endif
|
||||
int verilog_plusargs_legal = 1;
|
||||
|
||||
opterr = 1;
|
||||
dramsim = 0;
|
||||
|
||||
while (1) {
|
||||
static struct option long_options[] = {
|
||||
@@ -128,8 +148,7 @@ int main(int argc, char** argv)
|
||||
{"seed", required_argument, 0, 's' },
|
||||
{"rbb-port", required_argument, 0, 'r' },
|
||||
{"verbose", no_argument, 0, 'V' },
|
||||
{"permissive", no_argument, 0, 'p' },
|
||||
{"permissive-off", no_argument, 0, 'o' },
|
||||
{"dramsim", no_argument, 0, 'D' },
|
||||
#if VM_TRACE
|
||||
{"vcd", required_argument, 0, 'v' },
|
||||
{"dump-start", required_argument, 0, 'x' },
|
||||
@@ -138,9 +157,9 @@ int main(int argc, char** argv)
|
||||
};
|
||||
int option_index = 0;
|
||||
#if VM_TRACE
|
||||
int c = getopt_long(argc, argv, "-chm:s:r:v:Vx:po", long_options, &option_index);
|
||||
int c = getopt_long(argc, argv, "-chm:s:r:v:Vx:D", long_options, &option_index);
|
||||
#else
|
||||
int c = getopt_long(argc, argv, "-chm:s:r:Vpo", long_options, &option_index);
|
||||
int c = getopt_long(argc, argv, "-chm:s:r:VD", long_options, &option_index);
|
||||
#endif
|
||||
if (c == -1) break;
|
||||
retry:
|
||||
@@ -153,10 +172,10 @@ int main(int argc, char** argv)
|
||||
case 's': random_seed = atoi(optarg); break;
|
||||
case 'r': rbb_port = atoi(optarg); break;
|
||||
case 'V': verbose = true; break;
|
||||
case 'p': opterr = 0; break;
|
||||
case 'o': opterr = 1; break;
|
||||
case 'D': dramsim = 1; break;
|
||||
#if VM_TRACE
|
||||
case 'v': {
|
||||
vcdfile_name = optarg;
|
||||
vcdfile = strcmp(optarg, "-") == 0 ? stdout : fopen(optarg, "w");
|
||||
if (!vcdfile) {
|
||||
std::cerr << "Unable to open " << optarg << " for VCD write\n";
|
||||
@@ -188,10 +207,8 @@ int main(int argc, char** argv)
|
||||
#endif
|
||||
else if (arg.substr(0, 12) == "+cycle-count")
|
||||
c = 'c';
|
||||
else if (arg == "+permissive")
|
||||
c = 'p';
|
||||
else if (arg == "+permissive-off")
|
||||
c = 'o';
|
||||
else if (arg == "+dramsim")
|
||||
c = 'D';
|
||||
// If we don't find a legacy '+' EMULATOR argument, it still could be
|
||||
// a VERILOG_PLUSARG and not an error.
|
||||
else if (verilog_plusargs_legal) {
|
||||
@@ -223,13 +240,9 @@ int main(int argc, char** argv)
|
||||
}
|
||||
htif_option++;
|
||||
}
|
||||
if(opterr) {
|
||||
std::cerr << argv[0] << ": invalid plus-arg (Verilog or HTIF) \""
|
||||
<< arg << "\"\n";
|
||||
c = '?';
|
||||
} else {
|
||||
c = 'p';
|
||||
}
|
||||
}
|
||||
goto retry;
|
||||
}
|
||||
@@ -251,6 +264,10 @@ done_processing:
|
||||
usage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
htif_argc = 1 + argc - optind;
|
||||
htif_argv = (char **) malloc((htif_argc) * sizeof (char *));
|
||||
htif_argv[0] = argv[0];
|
||||
for (int i = 1; optind < argc;) htif_argv[i++] = argv[optind++];
|
||||
|
||||
if (verbose)
|
||||
fprintf(stderr, "using random seed %u\n", random_seed);
|
||||
@@ -264,17 +281,17 @@ done_processing:
|
||||
|
||||
#if VM_TRACE
|
||||
Verilated::traceEverOn(true); // Verilator must compute traced signals
|
||||
#if CY_FST_TRACE
|
||||
std::unique_ptr<VerilatedFstC> tfp(new VerilatedFstC);
|
||||
#else
|
||||
std::unique_ptr<VerilatedVcdFILE> vcdfd(new VerilatedVcdFILE(vcdfile));
|
||||
std::unique_ptr<VerilatedVcdC> tfp(new VerilatedVcdC(vcdfd.get()));
|
||||
if (vcdfile) {
|
||||
#endif // CY_FST_TRACE
|
||||
if (vcdfile_name) {
|
||||
tile->trace(tfp.get(), 99); // Trace 99 levels of hierarchy
|
||||
tfp->open("");
|
||||
tfp->open(vcdfile_name);
|
||||
}
|
||||
#endif
|
||||
|
||||
jtag = new remote_bitbang_t(rbb_port);
|
||||
dtm = new dtm_t(argc, argv);
|
||||
tsi = new tsi_t(argc, argv);
|
||||
#endif // VM_TRACE
|
||||
|
||||
signal(SIGTERM, handle_sigterm);
|
||||
|
||||
@@ -304,8 +321,7 @@ done_processing:
|
||||
tile->reset = 0;
|
||||
done_reset = true;
|
||||
|
||||
while (!dtm->done() && !jtag->done() && !tsi->done() &&
|
||||
!tile->io_success && trace_count < max_cycles) {
|
||||
do {
|
||||
tile->clock = 0;
|
||||
tile->eval();
|
||||
#if VM_TRACE
|
||||
@@ -322,6 +338,13 @@ done_processing:
|
||||
#endif
|
||||
trace_count++;
|
||||
}
|
||||
// for verilator multithreading. need to do 1 loop before checking if
|
||||
// tsi exists, since tsi is created by verilated thread on the first
|
||||
// serial_tick.
|
||||
while ((!dtm || !dtm->done()) &&
|
||||
(!jtag || !jtag->done()) &&
|
||||
(!tsi || !tsi->done()) &&
|
||||
!tile->io_success && trace_count < max_cycles);
|
||||
|
||||
#if VM_TRACE
|
||||
if (tfp)
|
||||
@@ -330,17 +353,17 @@ done_processing:
|
||||
fclose(vcdfile);
|
||||
#endif
|
||||
|
||||
if (dtm->exit_code())
|
||||
if (dtm && dtm->exit_code())
|
||||
{
|
||||
fprintf(stderr, "*** FAILED *** via dtm (code = %d, seed %d) after %ld cycles\n", dtm->exit_code(), random_seed, trace_count);
|
||||
ret = dtm->exit_code();
|
||||
}
|
||||
else if (tsi->exit_code())
|
||||
else if (tsi && tsi->exit_code())
|
||||
{
|
||||
fprintf(stderr, "*** FAILED *** (code = %d, seed %d) after %ld cycles\n", tsi->exit_code(), random_seed, trace_count);
|
||||
ret = tsi->exit_code();
|
||||
}
|
||||
else if (jtag->exit_code())
|
||||
else if (jtag && jtag->exit_code())
|
||||
{
|
||||
fprintf(stderr, "*** FAILED *** via jtag (code = %d, seed %d) after %ld cycles\n", jtag->exit_code(), random_seed, trace_count);
|
||||
ret = jtag->exit_code();
|
||||
@@ -359,5 +382,6 @@ done_processing:
|
||||
if (tsi) delete tsi;
|
||||
if (jtag) delete jtag;
|
||||
if (tile) delete tile;
|
||||
if (htif_argv) free(htif_argv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
67
scripts/numa_prefix
Executable file
67
scripts/numa_prefix
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
#============================================================================
|
||||
# - really simple script, which just prints out the numactl cmd to
|
||||
# prefix before your actual command. it determines this based on free
|
||||
# memory size attached to every node.
|
||||
# - when you run this on a machine without `numactl`, the output is empty,
|
||||
# so `$(numa_prefix) <cmd> <args>` turns in to `<cmd> <args>`.
|
||||
# - when the machine has `numactl` installed, regardless of the socket-count
|
||||
# on the machine, the resulting command is:
|
||||
# `numactl -m <socket> -C <core-id list> -- <cmd> <args>`
|
||||
# - example output from `numactl -H` on a 2 socket machine:
|
||||
# available: 2 nodes (0-1)
|
||||
# node 0 cpus: 0 2 4 6 8 10 12 14 16 18 20 22
|
||||
# node 0 size: 131026 MB
|
||||
# node 0 free: 7934 MB
|
||||
# node 1 cpus: 1 3 5 7 9 11 13 15 17 19 21 23
|
||||
# node 1 size: 65536 MB
|
||||
# node 1 free: 429 MB
|
||||
# node distances:
|
||||
# node 0 1
|
||||
# 0: 10 20
|
||||
# 1: 20 10
|
||||
#============================================================================
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $path = `which numactl`;
|
||||
if(length($path) > 0) {
|
||||
my ($head_line, @rest) = map {chomp; $_} `numactl -H`;
|
||||
|
||||
if($head_line =~ /available: (\d+) nodes/) {
|
||||
my $node_count = $1;
|
||||
my $best_node_id = undef
|
||||
my $best_cpus = undef;
|
||||
my $best_free_size = undef;
|
||||
|
||||
# loop through available nodes, selecting the node with the most free mem
|
||||
foreach my $num (1..$node_count) {
|
||||
my $cpus_line = shift(@rest);
|
||||
my $mem_size_line = shift(@rest);
|
||||
my $mem_free_line = shift(@rest);
|
||||
|
||||
if($cpus_line =~ /node (\d+) cpus: (\d.*\d)$/) {
|
||||
my ($node_id, $cpus) = ($1, $2);
|
||||
$cpus =~ s/\s+/,/g;
|
||||
|
||||
if($mem_free_line =~ /node $node_id free: (\d+) \S+$/) {
|
||||
my $free_size = $1;
|
||||
if(!defined($best_free_size) || ($free_size > $best_free_size)) {
|
||||
$best_node_id = $node_id;
|
||||
$best_cpus = $cpus;
|
||||
$best_free_size = $free_size;
|
||||
}
|
||||
} else {
|
||||
die("malformed mem-free line: $mem_free_line\n");
|
||||
}
|
||||
} else {
|
||||
die("malformed cpus line: $cpus_line\n");
|
||||
}
|
||||
}
|
||||
print("numactl -m $best_node_id -C $best_cpus --");
|
||||
} else {
|
||||
die("malformed head line: $head_line\n");
|
||||
}
|
||||
}
|
||||
@@ -70,6 +70,9 @@ RUNTIME_PROFILING_VFLAGS := $(if $(filter $(VERILATOR_PROFILE),all),\
|
||||
$(if $(filter $(VERILATOR_PROFILE),threads),\
|
||||
--prof-threads,))
|
||||
|
||||
VERILATOR_THREADS ?= 1
|
||||
RUNTIME_THREADS := --threads $(VERILATOR_THREADS) --threads-dpi all
|
||||
|
||||
VERILATOR_FST_MODE ?= 0
|
||||
TRACING_OPTS := $(if $(filter $(VERILATOR_FST_MODE),0),\
|
||||
--trace,--trace-fst --trace-threads 1)
|
||||
@@ -122,6 +125,7 @@ PREPROC_DEFINES := \
|
||||
|
||||
VERILATOR_NONCC_OPTS = \
|
||||
$(RUNTIME_PROFILING_VFLAGS) \
|
||||
$(RUNTIME_THREADS) \
|
||||
$(VERILATOR_OPT_FLAGS) \
|
||||
$(PLATFORM_OPTS) \
|
||||
-Wno-fatal \
|
||||
@@ -157,7 +161,6 @@ VERILATOR_CXXFLAGS = \
|
||||
|
||||
VERILATOR_LDFLAGS = \
|
||||
$(LDFLAGS) \
|
||||
$(RUNTIME_PROFILING_CFLAGS) \
|
||||
-L$(RISCV)/lib \
|
||||
-Wl,-rpath,$(RISCV)/lib \
|
||||
-L$(sim_dir) \
|
||||
@@ -219,7 +222,7 @@ $(sim_debug): $(model_mk_debug) $(dramsim_lib)
|
||||
$(output_dir)/%.vpd: $(output_dir)/% $(sim_debug)
|
||||
rm -f $@.vcd && mkfifo $@.vcd
|
||||
vcd2vpd $@.vcd $@ > /dev/null &
|
||||
(set -o pipefail && $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) -v$@.vcd $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $<.out) | tee $<.log)
|
||||
(set -o pipefail && $(NUMA_PREFIX) $(sim_debug) $(PERMISSIVE_ON) $(SIM_FLAGS) $(EXTRA_SIM_FLAGS) $(SEED_FLAG) $(VERBOSE_FLAGS) -v$@.vcd $(PERMISSIVE_OFF) $< </dev/null 2> >(spike-dasm > $<.out) | tee $<.log)
|
||||
|
||||
#########################################################################################
|
||||
# general cleanup rules
|
||||
|
||||
Reference in New Issue
Block a user