diff --git a/.gitmodules b/.gitmodules index 26a07f16..0db51e41 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "third_party/cocogfx"] path = third_party/cocogfx url = https://github.com/gtcasl/cocogfx.git +[submodule "third_party/ramulator"] + path = third_party/ramulator + url = https://github.com/CMU-SAFARI/ramulator.git diff --git a/ci/regression.sh b/ci/regression.sh index 4a1336c8..b99754af 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -102,7 +102,7 @@ FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo # adjust l1 block size to match l2 -CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1" +CONFIGS="-DL1_BLOCK_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1" # test cache banking CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr @@ -119,18 +119,12 @@ CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=simx --cores # test 128-bit MEM block CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo -# test 128-bit MEM and DRAM block -CONFIGS="-DMEM_BLOCK_SIZE=16 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo +# test single-bank DRAM +CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo # test 27-bit DRAM address CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo -# test 128-bit DRAM block -CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo - -# test long memory latency -CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo - echo "configuration tests done!" } diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 4626eeb3..72d3a07a 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -5,8 +5,6 @@ CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common -LDFLAGS += $(RTLSIM_DIR)/librtlsim.a - # Position independent code CXXFLAGS += -fPIC @@ -17,6 +15,7 @@ CXXFLAGS += $(CONFIGS) CXXFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread +LDFLAGS += -L. -lrtlsim SRCS = vortex.cpp ../common/vx_utils.cpp @@ -30,9 +29,9 @@ PROJECT = libvortex.so all: $(PROJECT) $(PROJECT): $(SRCS) - $(MAKE) -C $(RTLSIM_DIR) static + DESTDIR=../../driver/rtlsim $(MAKE) -C $(RTLSIM_DIR) ../../driver/rtlsim/librtlsim.so $(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT) clean: - $(MAKE) -C $(RTLSIM_DIR) clean-static + DESTDIR=../../driver/rtlsim $(MAKE) -C $(RTLSIM_DIR) clean rm -rf $(PROJECT) *.o \ No newline at end of file diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index cc16f0d3..52c290cd 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -11,7 +12,7 @@ #include #include #include -#include +#include #define RAM_PAGE_SIZE 4096 @@ -60,7 +61,9 @@ public: vx_device() : ram_(RAM_PAGE_SIZE) , mem_allocation_(ALLOC_BASE_ADDR) - {} + { + processor_.attach_ram(&ram_); + } ~vx_device() { if (future_.valid()) { @@ -121,12 +124,9 @@ public: future_.wait(); } // start new run - simulator_.attach_ram(&ram_); future_ = std::async(std::launch::async, [&]{ - simulator_.reset(); - while (simulator_.is_busy()) { - simulator_.step(); - } + processor_.reset(); + processor_.run(); }); return 0; } @@ -149,7 +149,7 @@ public: private: RAM ram_; - Simulator simulator_; + Processor processor_; uint64_t mem_allocation_; std::future future_; }; diff --git a/driver/simx/Makefile b/driver/simx/Makefile index 14114f2a..b5723972 100644 --- a/driver/simx/Makefile +++ b/driver/simx/Makefile @@ -9,7 +9,7 @@ CXXFLAGS += $(CONFIGS) CXXFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread -LDFLAGS += $(SIMX_DIR)/libsimx.a +LDFLAGS += -L. -lsimx SRCS = vortex.cpp ../common/vx_utils.cpp @@ -18,9 +18,9 @@ PROJECT = libvortex.so all: $(PROJECT) $(PROJECT): $(SRCS) - $(MAKE) -C $(SIMX_DIR) static + DESTDIR=../../driver/simx $(MAKE) -C $(SIMX_DIR) ../../driver/simx/libsimx.so $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ clean: - $(MAKE) -C $(SIMX_DIR) clean-static - rm -rf $(PROJECT) *.o \ No newline at end of file + DESTDIR=../../driver/simx $(MAKE) -C $(SIMX_DIR) clean + rm -rf libsimx.so $(PROJECT) *.o \ No newline at end of file diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index 2aaef1e9..4b086d7e 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -60,7 +60,13 @@ public: : arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS) , ram_(RAM_PAGE_SIZE) , mem_allocation_(ALLOC_BASE_ADDR) - {} + { + // setup memory simulator + memsim_ = MemSim::Create(MemSim::Config{ + DRAM_CHANNELS, + arch_.num_cores() + }); + } ~vx_device() { if (future_.valid()) { @@ -113,13 +119,33 @@ public: if (future_.valid()) { future_.wait(); } + // start new run - SimPlatform::instance().flush(); - processor_ = std::make_shared(arch_); - processor_->attach_ram(&ram_); future_ = std::async(std::launch::async, [&]{ - processor_->run(); + if (processor_) { + // release current processor instance + processor_->MemReqPort.unbind(); + memsim_->MemRspPort.unbind(); + SimPlatform::instance().release_object(processor_); + } + + // create new processor instance + processor_ = Processor::Create(arch_); + processor_->MemReqPort.bind(&memsim_->MemReqPort); + memsim_->MemRspPort.bind(&processor_->MemRspPort); + + // attach memory object + processor_->attach_ram(&ram_); + + // run simulation + int exitcode; + for (;;) { + SimPlatform::instance().step(); + if (processor_->check_exit(&exitcode)) + break; + }; }); + return 0; } @@ -141,6 +167,7 @@ public: private: ArchDef arch_; RAM ram_; + MemSim::Ptr memsim_; Processor::Ptr processor_; uint64_t mem_allocation_; std::future future_; diff --git a/driver/vlsim/Makefile b/driver/vlsim/Makefile index 5608ad11..23c07635 100644 --- a/driver/vlsim/Makefile +++ b/driver/vlsim/Makefile @@ -9,8 +9,6 @@ CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I. -I../include -I../../hw -I$(VLSIM_DIR) -LDFLAGS += $(VLSIM_DIR)/libopae-c-vlsim.a - # Position independent code CXXFLAGS += -fPIC @@ -21,6 +19,7 @@ CXXFLAGS += $(CONFIGS) CXXFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread +LDFLAGS += -L. -lopae-c-vlsim SRCS = ../common/opae.cpp ../common/vx_utils.cpp @@ -47,9 +46,9 @@ scope-defs.h: $(SCRIPT_DIR)/scope.json scope: scope-defs.h $(PROJECT): $(SRCS) $(SCOPE_H) - $(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C $(VLSIM_DIR) static + DESTDIR=../../driver/vlsim $(MAKE) -C $(VLSIM_DIR) ../../driver/vlsim/libopae-c-vlsim.so $(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -o $(PROJECT) clean: - $(MAKE) -C $(VLSIM_DIR) clean-static - rm -rf $(PROJECT) *.o scope-defs.h \ No newline at end of file + DESTDIR=../../driver/vlsim $(MAKE) -C $(VLSIM_DIR) clean + rm -rf libopae-c-vlsim.so $(PROJECT) *.o scope-defs.h \ No newline at end of file diff --git a/sim/common/simobject.h b/sim/common/simobject.h index 3a5ab2b6..2830ea06 100644 --- a/sim/common/simobject.h +++ b/sim/common/simobject.h @@ -51,8 +51,7 @@ public: peer_ = peer; } - void unbind() { - assert(peer_ == nullptr); + void unbind() { peer_ = nullptr; } @@ -292,12 +291,16 @@ public: } template - typename SimObject::Ptr CreateObject(Args&&... args) { + typename SimObject::Ptr create_object(Args&&... args) { auto obj = std::make_shared(SimContext{}, std::forward(args)...); objects_.push_back(obj); return obj; } + void release_object(const SimObjectBase::Ptr& object) { + objects_.remove(object); + } + template void schedule(const typename SimCallEvent::Func& callback, const Pkt& pkt, @@ -352,7 +355,7 @@ private: events_.emplace_back(evt); } - std::vector objects_; + std::list objects_; std::list events_; uint64_t cycles_; @@ -369,7 +372,7 @@ inline SimObjectBase::SimObjectBase(const SimContext&, const char* name) template template typename SimObject::Ptr SimObject::Create(Args&&... args) { - return SimPlatform::instance().CreateObject(std::forward(args)...); + return SimPlatform::instance().create_object(std::forward(args)...); } template diff --git a/sim/common/texturing.h b/sim/common/texturing.h index 9b0e4526..5941594e 100644 --- a/sim/common/texturing.h +++ b/sim/common/texturing.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include using namespace cocogfx; diff --git a/sim/common/util.h b/sim/common/util.h index d66305ee..171bbe68 100644 --- a/sim/common/util.h +++ b/sim/common/util.h @@ -11,4 +11,42 @@ void unused(Args&&...) {} #define __unused(...) unused(__VA_ARGS__) // return file extension -const char* fileExtension(const char* filepath); \ No newline at end of file +const char* fileExtension(const char* filepath); + +#if defined(_MSC_VER) +#define DISABLE_WARNING_PUSH __pragma(warning(push)) +#define DISABLE_WARNING_POP __pragma(warning(pop)) +#define DISABLE_WARNING_UNUSED_PARAMETER \ + __pragma(warning(disable : 4100)) +#define DISABLE_WARNING_UNREFERENCED_FUNCTION __pragma(warning(disable : 4505)) +#define DISABLE_WARNING_ANONYMOUS_STRUCT __pragma(warning(disable : 4201)) +#define DISABLE_WARNING_UNUSED_VARIABLE __pragma(warning(disable : 4189)) +#elif defined(__GNUC__) +#define DISABLE_WARNING_PUSH _Pragma("GCC diagnostic push") +#define DISABLE_WARNING_POP _Pragma("GCC diagnostic pop") +#define DISABLE_WARNING_UNUSED_PARAMETER \ + _Pragma("GCC diagnostic ignored \"-Wunused-parameter\"") +#define DISABLE_WARNING_UNREFERENCED_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#define DISABLE_WARNING_ANONYMOUS_STRUCT \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#define DISABLE_WARNING_UNUSED_VARIABLE \ + _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#elif defined(__clang__) +#define DISABLE_WARNING_PUSH _Pragma("clang diagnostic push") +#define DISABLE_WARNING_POP _Pragma("clang diagnostic pop") +#define DISABLE_WARNING_UNUSED_PARAMETER \ + _Pragma("clang diagnostic ignored \"-Wunused-parameter\"") +#define DISABLE_WARNING_UNREFERENCED_FUNCTION \ + _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#define DISABLE_WARNING_ANONYMOUS_STRUCT \ + _Pragma("clang diagnostic ignored \"-Wgnu-anonymous-struct\"") +#define DISABLE_WARNING_UNUSED_VARIABLE \ + _Pragma("clang diagnostic ignored \"-Wunused-but-set-variable\"") +#else +#define DISABLE_WARNING_PUSH +#define DISABLE_WARNING_POP +#define DISABLE_WARNING_UNUSED_PARAMETER +#define DISABLE_WARNING_UNREFERENCED_FUNCTION +#define DISABLE_WARNING_ANONYMOUS_STRUCT +#endif \ No newline at end of file diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index df9970d5..607dcf41 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -1,3 +1,4 @@ +DESTDIR ?= . RTL_DIR = ../../hw/rtl DPI_DIR = ../../hw/dpi THIRD_PARTY_DIR = ../../third_party @@ -6,8 +7,10 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I../../../hw -I../../common CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include +CXXFLAGS += -I../$(THIRD_PARTY_DIR) LDFLAGS += ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a +LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator # control RTL debug tracing states DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE @@ -31,7 +34,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += main.cpp simulator.cpp +SRCS += processor.cpp ifdef AXI_BUS TOP = Vortex_axi @@ -86,15 +89,11 @@ PROJECT = rtlsim all: $(PROJECT) -$(PROJECT): $(SRCS) - verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) +$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp + verilator --build $(VL_FLAGS) $^ $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$@ -static: $(SRCS) - verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' - $(AR) rcs lib$(PROJECT).a obj_dir/*.o $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o +$(DESTDIR)/lib$(PROJECT).so: $(SRCS) + verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '-shared $(LDFLAGS)' -o ../$@ -clean-static: - rm -rf lib$(PROJECT).a obj_dir - -clean: clean-static - rm -rf $(PROJECT) +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so diff --git a/sim/rtlsim/main.cpp b/sim/rtlsim/main.cpp index 652e550f..c61fbec8 100644 --- a/sim/rtlsim/main.cpp +++ b/sim/rtlsim/main.cpp @@ -5,7 +5,8 @@ #include #include #include -#include "simulator.h" +#include +#include "processor.h" #define RAM_PAGE_SIZE 4096 @@ -52,8 +53,8 @@ int main(int argc, char **argv) { std::cout << "Running " << program << "..." << std::endl; vortex::RAM ram(RAM_PAGE_SIZE); - vortex::Simulator simulator; - simulator.attach_ram(&ram); + vortex::Processor processor; + processor.attach_ram(&ram); std::string program_ext(fileExtension(program)); if (program_ext == "bin") { @@ -65,7 +66,7 @@ int main(int argc, char **argv) { return -1; } - exitcode = simulator.run(); + exitcode = processor.run(); if (riscv_test) { if (1 == exitcode) { diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp new file mode 100644 index 00000000..7c20a442 --- /dev/null +++ b/sim/rtlsim/processor.cpp @@ -0,0 +1,599 @@ +#include "processor.h" + +#include + +#ifdef AXI_BUS +#include "VVortex_axi.h" +#include "VVortex_axi__Syms.h" +#else +#include "VVortex.h" +#include "VVortex__Syms.h" +#endif + +#ifdef VCD_OUTPUT +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define RAMULATOR +#include +#include +#include + +#ifndef MEMORY_BANKS + #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #else + #define MEMORY_BANKS 2 + #endif +#endif + +#define ENABLE_MEM_STALLS + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +#ifndef VERILATOR_RESET_VALUE +#define VERILATOR_RESET_VALUE 2 +#endif + +#define VL_WDATA_GETW(lwp, i, n, w) \ + VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) + +using namespace vortex; + +static uint64_t timestamp = 0; + +double sc_time_stamp() { + return timestamp; +} + +/////////////////////////////////////////////////////////////////////////////// + +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +/////////////////////////////////////////////////////////////////////////////// + +class Processor::Impl { +public: + Impl() { + // force random values for unitialized signals + Verilated::randReset(VERILATOR_RESET_VALUE); + Verilated::randSeed(50); + + // turn off assertion before reset + Verilated::assertOn(false); + + // create RTL module instance + #ifdef AXI_BUS + device_ = new VVortex_axi(); + #else + device_ = new VVortex(); + #endif + + #ifdef VCD_OUTPUT + Verilated::traceEverOn(true); + trace_ = new VerilatedVcdC(); + device_->trace(trace_, 99); + trace_->open("trace.vcd"); + #endif + + ram_ = nullptr; + + // initialize dram simulator + ramulator::Config ram_config; + ram_config.add("standard", "DDR4"); + ram_config.add("channels", std::to_string(MEMORY_BANKS)); + ram_config.add("ranks", "1"); + ram_config.add("speed", "DDR4_2400R"); + ram_config.add("org", "DDR4_4Gb_x8"); + ram_config.add("mapping", "defaultmapping"); + ram_config.set_core_num(1); + dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); + Stats::statlist.output("ramulator.ddr4.log"); + + // reset the device + this->reset(); + } + + ~Impl() { + for (auto& buf : print_bufs_) { + auto str = buf.second.str(); + if (!str.empty()) { + std::cout << "#" << buf.first << ": " << str << std::endl; + } + } + + #ifdef VCD_OUTPUT + trace_->close(); + delete trace_; + #endif + + delete device_; + + if (dram_) { + dram_->finish(); + Stats::statlist.printall(); + delete dram_; + } + } + + void attach_ram(RAM* ram) { + ram_ = ram; + } + + void reset() { + print_bufs_.clear(); + + pending_mem_reqs_.clear(); + + mem_rd_rsp_active_ = false; + mem_wr_rsp_active_ = false; + + #ifdef AXI_BUS + this->reset_axi_bus(); + #else + this->reset_avs_bus(); + #endif + + device_->reset = 1; + + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } + + device_->reset = 0; + + // Turn on assertion after reset + Verilated::assertOn(true); + } + + int run() { + int exitcode = 0; + + #ifndef NDEBUG + std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; + #endif + + // execute program + while (device_->busy) { + if (get_ebreak()) { + exitcode = get_last_wb_value(3); + break; + } + this->step(); + } + + // wait 5 cycles to flush the pipeline + this->wait(5); + + return exitcode; + } + +private: + + void step() { + + device_->clk = 0; + this->eval(); + + #ifdef AXI_BUS + this->eval_axi_bus(0); + #else + this->eval_avs_bus(0); + #endif + + device_->clk = 1; + this->eval(); + + #ifdef AXI_BUS + this->eval_axi_bus(1); + #else + this->eval_avs_bus(1); + #endif + + dram_->tick(); + + #ifndef NDEBUG + fflush(stdout); + #endif + } + + void eval() { + device_->eval(); + #ifdef VCD_OUTPUT + if (sim_trace_enabled()) { + trace_->dump(timestamp); + } + #endif + ++timestamp; + } + +#ifdef AXI_BUS + + void reset_axi_bus() { + device_->m_axi_wready = 0; + device_->m_axi_awready = 0; + device_->m_axi_arready = 0; + device_->m_axi_rvalid = 0; + device_->m_axi_bvalid = 0; + } + + void eval_axi_bus(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->m_axi_rready; + mem_wr_rsp_ready_ = device_->m_axi_bready; + return; + } + + if (ram_ == nullptr) { + device_->m_axi_wready = 0; + device_->m_axi_awready = 0; + device_->m_axi_arready = 0; + return; + } + + // process memory responses + if (mem_rd_rsp_active_ + && device_->m_axi_rvalid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && !(*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_req = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + device_->m_axi_rvalid = 1; + device_->m_axi_rid = mem_req->tag; + device_->m_axi_rresp = 0; + device_->m_axi_rlast = 1; + memcpy((uint8_t*)device_->m_axi_rdata, mem_req->block.data(), MEM_BLOCK_SIZE); + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_req; + } else { + device_->m_axi_rvalid = 0; + } + } + + // send memory write response + if (mem_wr_rsp_active_ + && device_->m_axi_bvalid && mem_wr_rsp_ready_) { + mem_wr_rsp_active_ = false; + } + if (!mem_wr_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && (*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_req = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_req->addr); + */ + device_->m_axi_bvalid = 1; + device_->m_axi_bid = mem_req->tag; + device_->m_axi_bresp = 0; + pending_mem_reqs_.erase(mem_rsp_it); + mem_wr_rsp_active_ = true; + delete mem_req; + } else { + device_->m_axi_bvalid = 0; + } + } + + // select the memory bank + uint32_t req_addr = device_->m_axi_wvalid ? device_->m_axi_awaddr : device_->m_axi_araddr; + + // process memory requests + if (device_->m_axi_wvalid || device_->m_axi_arvalid) { + if (device_->m_axi_wvalid) { + uint64_t byteen = device_->m_axi_wstrb; + unsigned base_addr = device_->m_axi_awaddr; + uint8_t* data = (uint8_t*)(device_->m_axi_wdata); + + // check console output + if (base_addr >= IO_COUT_ADDR + && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + /* + printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_awid; + mem_req->addr = device_->m_axi_awaddr; + mem_req->write = true; + mem_req->ready = true; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + ramulator::Request dram_req( + device_->m_axi_awaddr, + ramulator::Request::Type::WRITE, + 0 + ); + dram_->send(dram_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_arid; + mem_req->addr = device_->m_axi_araddr; + ram_->read(mem_req->block.data(), device_->m_axi_araddr, MEM_BLOCK_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + ramulator::Request dram_req( + device_->m_axi_araddr, + ramulator::Request::Type::READ, + std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) { + mem_req->ready = true; + }, placeholders::_1, mem_req), + 0 + ); + dram_->send(dram_req); + } + } + + device_->m_axi_wready = 1; + device_->m_axi_awready = 1; + device_->m_axi_arready = 1; + } + +#else + + void reset_avs_bus() { + device_->mem_req_ready = 0; + device_->mem_rsp_valid = 0; + } + + void eval_avs_bus(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->mem_rsp_ready; + return; + } + + if (ram_ == nullptr) { + device_->mem_req_ready = 0; + return; + } + + // process memory responses + if (mem_rd_rsp_active_ + && device_->mem_rsp_valid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready) { + device_->mem_rsp_valid = 1; + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_req = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + memcpy((uint8_t*)device_->mem_rsp_data, mem_req->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag = mem_req->tag; + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_req; + } else { + device_->mem_rsp_valid = 0; + } + } + + // process memory requests + if (device_->mem_req_valid) { + uint32_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); + if (device_->mem_req_rw) { + // process writes + uint64_t byteen = device_->mem_req_byteen; + uint8_t* data = (uint8_t*)(device_->mem_req_data); + + // check console output + if (byte_addr >= IO_COUT_ADDR + && byte_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + /* + printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, byte_addr, byteen); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + // send dram request + ramulator::Request dram_req( + byte_addr, + ramulator::Request::Type::WRITE, + 0 + ); + dram_->send(dram_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag; + mem_req->addr = byte_addr; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + ramulator::Request dram_req( + byte_addr, + ramulator::Request::Type::READ, + std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) { + mem_req->ready = true; + }, placeholders::_1, mem_req), + 0 + ); + dram_->send(dram_req); + } + } + + device_->mem_req_ready = 1; + } + +#endif + + void wait(uint32_t cycles) { + for (int i = 0; i < cycles; ++i) { + this->step(); + } + } + + bool get_ebreak() const { + #ifdef AXI_BUS + return (bool)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; + #else + return (bool)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; + #endif + } + + int get_last_wb_value(int reg) const { + #ifdef AXI_BUS + return (int)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; + #else + return (int)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; + #endif + } + +private: + + typedef struct { + bool ready; + std::array block; + uint64_t addr; + uint64_t tag; + bool write; + } mem_req_t; + +#ifdef AXI_BUS + VVortex_axi *device_; +#else + VVortex *device_; +#endif +#ifdef VCD_OUTPUT + VerilatedVcdC *trace_; +#endif + + std::unordered_map print_bufs_; + + std::list pending_mem_reqs_; + + bool mem_rd_rsp_active_; + bool mem_rd_rsp_ready_; + + bool mem_wr_rsp_active_; + bool mem_wr_rsp_ready_; + + RAM *ram_; + + ramulator::Gem5Wrapper* dram_; +}; + +/////////////////////////////////////////////////////////////////////////////// + +Processor::Processor() + : impl_(new Impl()) +{} + +Processor::~Processor() { + delete impl_; +} + +void Processor::attach_ram(RAM* mem) { + impl_->attach_ram(mem); +} + +void Processor::reset() { + impl_->reset(); +} + +int Processor::run() { + return impl_->run(); +} \ No newline at end of file diff --git a/sim/rtlsim/processor.h b/sim/rtlsim/processor.h new file mode 100644 index 00000000..a877044f --- /dev/null +++ b/sim/rtlsim/processor.h @@ -0,0 +1,25 @@ +#pragma once + +namespace vortex { + +class RAM; + +class Processor { +public: + + Processor(); + virtual ~Processor(); + + void attach_ram(RAM* ram); + + void reset(); + + int run(); + +private: + + class Impl; + Impl* impl_; +}; + +} \ No newline at end of file diff --git a/sim/rtlsim/simulator.cpp b/sim/rtlsim/simulator.cpp deleted file mode 100644 index 0f6df7d7..00000000 --- a/sim/rtlsim/simulator.cpp +++ /dev/null @@ -1,579 +0,0 @@ -#include "simulator.h" - -#include - -#ifdef AXI_BUS -#include "VVortex_axi.h" -#include "VVortex_axi__Syms.h" -#else -#include "VVortex.h" -#include "VVortex__Syms.h" -#endif - -#ifdef VCD_OUTPUT -#include -#endif - -#include -#include -#include -#include - -#define ENABLE_MEM_STALLS - -#ifndef TRACE_START_TIME -#define TRACE_START_TIME 0ull -#endif - -#ifndef TRACE_STOP_TIME -#define TRACE_STOP_TIME -1ull -#endif - -#ifndef MEM_LATENCY -#define MEM_LATENCY 24 -#endif - -#ifndef MEM_RQ_SIZE -#define MEM_RQ_SIZE 16 -#endif - -#ifndef MEM_STALLS_MODULO -#define MEM_STALLS_MODULO 16 -#endif - -#ifndef VERILATOR_RESET_VALUE -#define VERILATOR_RESET_VALUE 2 -#endif - -#define VL_WDATA_GETW(lwp, i, n, w) \ - VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) - -using namespace vortex; - -static uint64_t timestamp = 0; - -double sc_time_stamp() { - return timestamp; -} - -/////////////////////////////////////////////////////////////////////////////// - -static bool trace_enabled = false; -static uint64_t trace_start_time = TRACE_START_TIME; -static uint64_t trace_stop_time = TRACE_STOP_TIME; - -bool sim_trace_enabled() { - if (timestamp >= trace_start_time - && timestamp < trace_stop_time) - return true; - return trace_enabled; -} - -void sim_trace_enable(bool enable) { - trace_enabled = enable; -} - -/////////////////////////////////////////////////////////////////////////////// - -namespace vortex { -class VL_OBJ { -public: -#ifdef AXI_BUS - VVortex_axi *device; -#else - VVortex *device; -#endif -#ifdef VCD_OUTPUT - VerilatedVcdC *trace; -#endif - - VL_OBJ() { - // force random values for unitialized signals - Verilated::randReset(VERILATOR_RESET_VALUE); - Verilated::randSeed(50); - - // Turn off assertion before reset - Verilated::assertOn(false); - - #ifdef AXI_BUS - this->device = new VVortex_axi(); - #else - this->device = new VVortex(); - #endif - - #ifdef VCD_OUTPUT - Verilated::traceEverOn(true); - this->trace = new VerilatedVcdC(); - this->device->trace(this->trace, 99); - this->trace->open("trace.vcd"); - #endif - } - - ~VL_OBJ() { - #ifdef VCD_OUTPUT - this->trace->close(); - delete this->trace; - #endif - delete this->device; - } -}; -} - -/////////////////////////////////////////////////////////////////////////////// - -Simulator::Simulator() { - vl_obj_ = new VL_OBJ(); - ram_ = nullptr; - // reset the device - this->reset(); -} - -Simulator::~Simulator() { - for (auto& buf : print_bufs_) { - auto str = buf.second.str(); - if (!str.empty()) { - std::cout << "#" << buf.first << ": " << str << std::endl; - } - } - delete vl_obj_; -} - -void Simulator::attach_ram(RAM* ram) { - ram_ = ram; - for (int b = 0; b < MEMORY_BANKS; ++b) { - mem_rsp_vec_[b].clear(); - } - last_mem_rsp_bank_ = 0; -} - -void Simulator::reset() { - print_bufs_.clear(); - - for (int b = 0; b < MEMORY_BANKS; ++b) { - mem_rsp_vec_[b].clear(); - } - last_mem_rsp_bank_ = 0; - mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; - -#ifdef AXI_BUS - this->reset_axi_bus(); -#else - this->reset_mem_bus(); -#endif - - vl_obj_->device->reset = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - vl_obj_->device->clk = 0; - this->eval(); - vl_obj_->device->clk = 1; - this->eval(); - } - - vl_obj_->device->reset = 0; - - // Turn on assertion after reset - Verilated::assertOn(true); -} - -void Simulator::step() { - - vl_obj_->device->clk = 0; - this->eval(); - -#ifdef AXI_BUS - this->eval_axi_bus(0); -#else - this->eval_mem_bus(0); -#endif - - vl_obj_->device->clk = 1; - this->eval(); - -#ifdef AXI_BUS - this->eval_axi_bus(1); -#else - this->eval_mem_bus(1); -#endif - -#ifndef NDEBUG - fflush(stdout); -#endif -} - -void Simulator::eval() { - vl_obj_->device->eval(); -#ifdef VCD_OUTPUT - if (sim_trace_enabled()) { - vl_obj_->trace->dump(timestamp); - } -#endif - ++timestamp; -} - -#ifdef AXI_BUS - -void Simulator::reset_axi_bus() { - vl_obj_->device->m_axi_wready = 0; - vl_obj_->device->m_axi_awready = 0; - vl_obj_->device->m_axi_arready = 0; - vl_obj_->device->m_axi_rvalid = 0; - vl_obj_->device->m_axi_bvalid = 0; -} - -void Simulator::eval_axi_bus(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = vl_obj_->device->m_axi_rready; - mem_wr_rsp_ready_ = vl_obj_->device->m_axi_bready; - return; - } - - if (ram_ == nullptr) { - vl_obj_->device->m_axi_wready = 0; - vl_obj_->device->m_axi_awready = 0; - vl_obj_->device->m_axi_arready = 0; - return; - } - - // update memory responses schedule - for (int b = 0; b < MEMORY_BANKS; ++b) { - for (auto& rsp : mem_rsp_vec_[b]) { - if (rsp.cycles_left > 0) - rsp.cycles_left -= 1; - } - } - - bool has_rd_response = false; - bool has_wr_response = false; - - // schedule memory responses that are ready - for (int i = 0; i < MEMORY_BANKS; ++i) { - uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS; - if (!mem_rsp_vec_[b].empty()) { - auto mem_rsp_it = mem_rsp_vec_[b].begin(); - if (mem_rsp_it->cycles_left <= 0) { - has_rd_response = !mem_rsp_it->write; - has_wr_response = mem_rsp_it->write; - last_mem_rsp_bank_ = b; - break; - } - } - } - - // send memory read response - if (mem_rd_rsp_active_ - && vl_obj_->device->m_axi_rvalid && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (has_rd_response) { - auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); - /* - printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); - } - printf("\n"); - */ - vl_obj_->device->m_axi_rvalid = 1; - vl_obj_->device->m_axi_rid = mem_rsp_it->tag; - vl_obj_->device->m_axi_rresp = 0; - vl_obj_->device->m_axi_rlast = 1; - memcpy((uint8_t*)vl_obj_->device->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); - mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - } else { - vl_obj_->device->m_axi_rvalid = 0; - } - } - - // send memory write response - if (mem_wr_rsp_active_ - && vl_obj_->device->m_axi_bvalid && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (has_wr_response) { - auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); - /* - printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); - */ - vl_obj_->device->m_axi_bvalid = 1; - vl_obj_->device->m_axi_bid = mem_rsp_it->tag; - vl_obj_->device->m_axi_bresp = 0; - mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - } else { - vl_obj_->device->m_axi_bvalid = 0; - } - } - - // select the memory bank - uint32_t req_addr = vl_obj_->device->m_axi_wvalid ? vl_obj_->device->m_axi_awaddr : vl_obj_->device->m_axi_araddr; - uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0; - - // handle memory stalls - bool mem_stalled = false; -#ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) { - mem_stalled = true; - } -#endif - - // process memory requests - if (!mem_stalled) { - if (vl_obj_->device->m_axi_wvalid || vl_obj_->device->m_axi_arvalid) { - if (vl_obj_->device->m_axi_wvalid) { - uint64_t byteen = vl_obj_->device->m_axi_wstrb; - unsigned base_addr = vl_obj_->device->m_axi_awaddr; - uint8_t* data = (uint8_t*)(vl_obj_->device->m_axi_wdata); - - // detect stdout write - if (base_addr >= IO_COUT_ADDR - && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - /* - printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - mem_req_t mem_req; - mem_req.tag = vl_obj_->device->m_axi_arid; - mem_req.addr = vl_obj_->device->m_axi_araddr; - mem_req.cycles_left = 0; - mem_req.write = 1; - mem_rsp_vec_[req_bank].emplace_back(mem_req); - } - } else { - mem_req_t mem_req; - mem_req.tag = vl_obj_->device->m_axi_arid; - mem_req.addr = vl_obj_->device->m_axi_araddr; - ram_->read(mem_req.block.data(), vl_obj_->device->m_axi_araddr, MEM_BLOCK_SIZE); - mem_req.cycles_left = MEM_LATENCY; - mem_req.write = 0; - for (auto& rsp : mem_rsp_vec_[req_bank]) { - if (mem_req.addr == rsp.addr) { - // duplicate requests receive the same cycle delay - mem_req.cycles_left = rsp.cycles_left; - break; - } - } - mem_rsp_vec_[req_bank].emplace_back(mem_req); - } - } - } - - vl_obj_->device->m_axi_wready = !mem_stalled; - vl_obj_->device->m_axi_awready = !mem_stalled; - vl_obj_->device->m_axi_arready = !mem_stalled; -} - -#else - -void Simulator::reset_mem_bus() { - vl_obj_->device->mem_req_ready = 0; - vl_obj_->device->mem_rsp_valid = 0; -} - -void Simulator::eval_mem_bus(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = vl_obj_->device->mem_rsp_ready; - return; - } - - if (ram_ == nullptr) { - vl_obj_->device->mem_req_ready = 0; - return; - } - - // update memory responses schedule - for (int b = 0; b < MEMORY_BANKS; ++b) { - for (auto& rsp : mem_rsp_vec_[b]) { - if (rsp.cycles_left > 0) - rsp.cycles_left -= 1; - } - } - - bool has_response = false; - - // schedule memory responses that are ready - for (int i = 0; i < MEMORY_BANKS; ++i) { - uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS; - if (!mem_rsp_vec_[b].empty() - && (mem_rsp_vec_[b].begin()->cycles_left) <= 0) { - has_response = true; - last_mem_rsp_bank_ = b; - break; - } - } - - // send memory response - if (mem_rd_rsp_active_ - && vl_obj_->device->mem_rsp_valid && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (has_response) { - vl_obj_->device->mem_rsp_valid = 1; - auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); - /* - printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); - } - printf("\n"); - */ - memcpy((uint8_t*)vl_obj_->device->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); - vl_obj_->device->mem_rsp_tag = mem_rsp_it->tag; - mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - } else { - vl_obj_->device->mem_rsp_valid = 0; - } - } - - // select the memory bank - uint32_t req_bank = (MEMORY_BANKS >= 2) ? (vl_obj_->device->mem_req_addr % MEMORY_BANKS) : 0; - - // handle memory stalls - bool mem_stalled = false; -#ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) { - mem_stalled = true; - } -#endif - - // process memory requests - if (!mem_stalled) { - if (vl_obj_->device->mem_req_valid) { - if (vl_obj_->device->mem_req_rw) { - uint64_t byteen = vl_obj_->device->mem_req_byteen; - unsigned base_addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE); - uint8_t* data = (uint8_t*)(vl_obj_->device->mem_req_data); - if (base_addr >= IO_COUT_ADDR - && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - /* - printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - } - } else { - mem_req_t mem_req; - mem_req.tag = vl_obj_->device->mem_req_tag; - mem_req.addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE); - ram_->read(mem_req.block.data(), vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE); - mem_req.cycles_left = MEM_LATENCY; - for (auto& rsp : mem_rsp_vec_[req_bank]) { - if (mem_req.addr == rsp.addr) { - // duplicate requests receive the same cycle delay - mem_req.cycles_left = rsp.cycles_left; - break; - } - } - mem_rsp_vec_[req_bank].emplace_back(mem_req); - } - } - } - - vl_obj_->device->mem_req_ready = !mem_stalled; -} - -#endif - -void Simulator::wait(uint32_t cycles) { - for (int i = 0; i < cycles; ++i) { - this->step(); - } -} - -bool Simulator::is_busy() const { - return vl_obj_->device->busy; -} - -int Simulator::run() { - int exitcode = 0; - -#ifndef NDEBUG - std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; -#endif - - // execute program - while (vl_obj_->device->busy) { - if (get_ebreak()) { - exitcode = get_last_wb_value(3); - break; - } - this->step(); - } - - // wait 5 cycles to flush the pipeline - this->wait(5); - - return exitcode; -} - -bool Simulator::get_ebreak() const { -#ifdef AXI_BUS - return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; -#else - return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; -#endif -} - -int Simulator::get_last_wb_value(int reg) const { -#ifdef AXI_BUS - return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; -#else - return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; -#endif -} - -void Simulator::print_stats(std::ostream& out) { - out << std::left; - out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl; -} \ No newline at end of file diff --git a/sim/rtlsim/simulator.h b/sim/rtlsim/simulator.h deleted file mode 100644 index 3b36c520..00000000 --- a/sim/rtlsim/simulator.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - -namespace vortex { - -class VL_OBJ; -class RAM; - -class Simulator { -public: - - Simulator(); - virtual ~Simulator(); - - void attach_ram(RAM* ram); - - bool is_busy() const; - - void reset(); - void step(); - void wait(uint32_t cycles); - - int run(); - - void print_stats(std::ostream& out); - -private: - - typedef struct { - int cycles_left; - std::array block; - uint64_t addr; - uint64_t tag; - bool write; - } mem_req_t; - - std::unordered_map print_bufs_; - - void eval(); - -#ifdef AXI_BUS - void reset_axi_bus(); - void eval_axi_bus(bool clk); -#else - void reset_mem_bus(); - void eval_mem_bus(bool clk); -#endif - - int get_last_wb_value(int reg) const; - - bool get_ebreak() const; - - std::list mem_rsp_vec_ [MEMORY_BANKS]; - uint32_t last_mem_rsp_bank_; - - bool mem_rd_rsp_active_; - bool mem_rd_rsp_ready_; - - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; - - RAM *ram_; - - VL_OBJ* vl_obj_; -}; - -} \ No newline at end of file diff --git a/sim/simx/Makefile b/sim/simx/Makefile index ad4e38c8..1d081c9e 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -1,3 +1,4 @@ +DESTDIR ?= . RTL_DIR = ../hw/rtl THIRD_PARTY_DIR = ../../third_party @@ -5,15 +6,17 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I. -I../common -I../../hw CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include -CXXFLAGS += -I$(THIRD_PARTY_DIR)/cocogfx/include +CXXFLAGS += -I$(THIRD_PARTY_DIR) CXXFLAGS += $(CONFIGS) -LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx +LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a +LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx +LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator TOP = vx_cache_sim -SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp -SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp main.cpp +SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp +SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS))) VPATH := $(sort $(dir $(SRCS))) @@ -30,23 +33,16 @@ endif PROJECT = simx -all: $(PROJECT) - -$(PROJECT): $(SRCS) +all: $(DESTDIR)/$(PROJECT) + +$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ -obj_dir/%.o: %.cpp - mkdir -p obj_dir - $(CXX) $(CXXFLAGS) -c $< -o $@ - -static: $(OBJS) - $(AR) rcs lib$(PROJECT).a $(OBJS) $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o +$(DESTDIR)/lib$(PROJECT).so: $(SRCS) + $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; -clean-static: - rm -rf lib$(PROJECT).a obj_dir .depend - -clean: clean-static - rm -rf $(PROJECT) \ No newline at end of file +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so \ No newline at end of file diff --git a/sim/simx/cache.cpp b/sim/simx/cache.cpp index 36da1b27..36c03eb9 100644 --- a/sim/simx/cache.cpp +++ b/sim/simx/cache.cpp @@ -116,6 +116,7 @@ struct bank_req_t { bool mshr_replay; uint64_t tag; uint32_t set_id; + uint32_t core_id; std::vector infos; bank_req_t(uint32_t size) @@ -124,6 +125,7 @@ struct bank_req_t { , mshr_replay(false) , tag(0) , set_id(0) + , core_id(0) , infos(size) {} }; @@ -292,7 +294,7 @@ public: auto& mem_rsp = bypass_port.front(); uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1); uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs; - MemRsp core_rsp(tag); + MemRsp core_rsp{tag, mem_rsp.core_id}; simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency); bypass_port.pop(); } @@ -327,7 +329,7 @@ public: auto& core_req = core_req_port.front(); // check cache bypassing - if (core_req.is_io) { + if (core_req.non_cacheable) { // send IO request this->processIORequest(core_req, req_id); @@ -348,6 +350,7 @@ public: bank_req.mshr_replay = false; bank_req.tag = tag; bank_req.set_id = set_id; + bank_req.core_id = core_req.core_id; bank_req.infos.at(port_id) = {true, req_id, core_req.tag}; auto& bank = banks_.at(bank_id); @@ -439,7 +442,8 @@ public: if (pipeline_req.mshr_replay) { // send core response for (auto& info : pipeline_req.infos) { - simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency); + MemRsp core_rsp{info.req_tag, pipeline_req.core_id}; + simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency); } } else { bool hit = false; @@ -480,6 +484,7 @@ public: MemReq mem_req; mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag); mem_req.write = true; + mem_req.core_id = pipeline_req.core_id; mem_req_ports_.at(bank_id).send(mem_req, 1); } else { // mark block as dirty @@ -488,8 +493,9 @@ public: } // send core response if (!pipeline_req.write || config_.write_reponse) { - for (auto& info : pipeline_req.infos) { - simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency); + for (auto& info : pipeline_req.infos) { + MemRsp core_rsp{info.req_tag, pipeline_req.core_id}; + simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency); } } } else { @@ -508,6 +514,7 @@ public: MemReq mem_req; mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag); mem_req.write = true; + mem_req.core_id = pipeline_req.core_id; mem_req_ports_.at(bank_id).send(mem_req, 1); ++perf_stats_.evictions; } @@ -519,12 +526,14 @@ public: MemReq mem_req; mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag); mem_req.write = true; + mem_req.core_id = pipeline_req.core_id; mem_req_ports_.at(bank_id).send(mem_req, 1); } // send core response if (config_.write_reponse) { - for (auto& info : pipeline_req.infos) { - simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency); + for (auto& info : pipeline_req.infos) { + MemRsp core_rsp{info.req_tag, pipeline_req.core_id}; + simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency); } } } else { @@ -540,6 +549,7 @@ public: mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag); mem_req.write = false; mem_req.tag = mshr_id; + mem_req.core_id = pipeline_req.core_id; mem_req_ports_.at(bank_id).send(mem_req, 1); ++pending_fill_reqs_; } diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 7d8daed5..a28bd806 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -2,12 +2,10 @@ #include "types.h" -#ifndef MEM_LATENCY -#define MEM_LATENCY 24 -#endif - #define RAM_PAGE_SIZE 4096 +#define DRAM_CHANNELS 2 + namespace vortex { enum Constants { diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 934ce1f8..0540151c 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -87,12 +87,12 @@ Core::Core(const SimContext& ctx, const ArchDef &arch, Word id) } // register execute units - exe_units_.at((int)ExeType::NOP) = SimPlatform::instance().CreateObject(this); - exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().CreateObject(this); - exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().CreateObject(this); - exe_units_.at((int)ExeType::CSR) = SimPlatform::instance().CreateObject(this); - exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().CreateObject(this); - exe_units_.at((int)ExeType::GPU) = SimPlatform::instance().CreateObject(this); + exe_units_.at((int)ExeType::NOP) = SimPlatform::instance().create_object(this); + exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().create_object(this); + exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().create_object(this); + exe_units_.at((int)ExeType::CSR) = SimPlatform::instance().create_object(this); + exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().create_object(this); + exe_units_.at((int)ExeType::GPU) = SimPlatform::instance().create_object(this); // connect l1 switch icache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[0]); @@ -216,6 +216,7 @@ void Core::fetch(uint64_t cycle) { mem_req.addr = trace->PC; mem_req.write = false; mem_req.tag = pending_icache_.allocate(trace); + mem_req.core_id = id_; icache_->CoreReqPorts.at(0).send(mem_req, 1); DT(3, cycle, "icache-req: addr=" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace); fetch_latch_.pop(); diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index be172830..d1df2637 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -403,7 +403,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { break; case JALR_INST: trace->exe_type = ExeType::ALU; - trace->alu.type = AluType::BRANCH; + trace->alu.type = AluType::BRANCH; trace->used_iregs.set(rsrc0); for (int t = 0; t < num_threads; ++t) { if (!tmask_.test(t)) @@ -535,6 +535,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { Word csr_value; if (func3 == 0) { trace->exe_type = ExeType::ALU; + trace->alu.type = AluType::SYSCALL; trace->fetch_stall = true; switch (csr_addr) { case 0: // ECALL diff --git a/sim/simx/exeunit.cpp b/sim/simx/exeunit.cpp index 1736101c..3b84ee8a 100644 --- a/sim/simx/exeunit.cpp +++ b/sim/simx/exeunit.cpp @@ -143,8 +143,9 @@ void LsuUnit::step(uint64_t cycle) { MemReq mem_req; mem_req.addr = mem_addr.addr; mem_req.write = is_write; + mem_req.non_cacheable = (type == AddrType::IO); mem_req.tag = tag; - mem_req.is_io = (type == AddrType::IO); + mem_req.core_id = core_->id(); if (type == AddrType::Shared) { core_->shared_mem_->Inputs.at(t).send(mem_req, 2); @@ -153,7 +154,7 @@ void LsuUnit::step(uint64_t cycle) { } else { dcache_req_port.send(mem_req, 2); DT(3, cycle, "dcache-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag - << ", type=" << trace->lsu.type << ", tid=" << t << ", io=" << mem_req.is_io << ", " << *trace); + << ", type=" << trace->lsu.type << ", tid=" << t << ", nc=" << mem_req.non_cacheable << ", " << *trace); } if (is_dup) @@ -182,6 +183,7 @@ void AluUnit::step(uint64_t cycle) { switch (trace->alu.type) { case AluType::ARITH: case AluType::BRANCH: + case AluType::SYSCALL: case AluType::CMOV: Output.send(trace, 1); break; @@ -359,6 +361,7 @@ bool GpuUnit::processTexRequest(uint64_t cycle, pipeline_trace_t* trace) { mem_req.addr = mem_addr.addr; mem_req.write = (trace->lsu.type == LsuType::STORE); mem_req.tag = tag; + mem_req.core_id = core_->id(); dcache_req_port.send(mem_req, 3); DT(3, cycle, "tex-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag << ", tid=" << t << ", "<< trace); diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 86829f3a..159fdab6 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -13,7 +13,7 @@ using namespace vortex; int main(int argc, char **argv) { - int exitcode; + int exitcode = 0; std::string archStr("rv32imf"); std::string imgFileName; @@ -54,12 +54,7 @@ int main(int argc, char **argv) { return -1; { - ArchDef arch(archStr, num_cores, num_warps, num_threads); - - Processor processor(arch); - RAM ram(RAM_PAGE_SIZE); - { std::string program_ext(fileExtension(imgFileName.c_str())); if (program_ext == "bin") { @@ -72,25 +67,40 @@ int main(int argc, char **argv) { } } - processor.attach_ram(&ram); + ArchDef arch(archStr, num_cores, num_warps, num_threads); + auto processor = Processor::Create(arch); + processor->attach_ram(&ram); - exitcode = processor.run(); + // setup memory simulator + auto memsim = MemSim::Create(MemSim::Config{ + DRAM_CHANNELS, + arch.num_cores() + }); + processor->MemReqPort.bind(&memsim->MemReqPort); + memsim->MemRspPort.bind(&processor->MemRspPort); - if (riscv_test) { - if (1 == exitcode) { - std::cout << "Passed." << std::endl; - exitcode = 0; - } else { - std::cout << "Failed." << std::endl; - } - } else { - if (exitcode != 0) { - std::cout << "*** error: exitcode=" << exitcode << std::endl; - } - } - } + // run simulation + for (;;) { + SimPlatform::instance().step(); + if (processor->check_exit(&exitcode)) + break; + }; + } SimPlatform::instance().finalize(); + if (riscv_test) { + if (1 == exitcode) { + std::cout << "Passed." << std::endl; + exitcode = 0; + } else { + std::cout << "Failed." << std::endl; + } + } else { + if (exitcode != 0) { + std::cout << "*** error: exitcode=" << exitcode << std::endl; + } + } + return exitcode; } diff --git a/sim/simx/memsim.cpp b/sim/simx/memsim.cpp index 012082d9..74979bc8 100644 --- a/sim/simx/memsim.cpp +++ b/sim/simx/memsim.cpp @@ -1,56 +1,99 @@ #include "memsim.h" #include #include +#include + +DISABLE_WARNING_PUSH +DISABLE_WARNING_UNUSED_PARAMETER +#define RAMULATOR +#include +#include +#include +DISABLE_WARNING_POP + #include "constants.h" +#include "types.h" using namespace vortex; class MemSim::Impl { private: MemSim* simobject_; - uint32_t num_banks_; - uint32_t latency_; + Config config_; PerfStats perf_stats_; + ramulator::Gem5Wrapper* dram_; public: - Impl(MemSim* simobject, uint32_t num_banks, uint32_t latency) + + Impl(MemSim* simobject, const Config& config) : simobject_(simobject) - , num_banks_(num_banks) - , latency_(latency) - {} + , config_(config) + { + ramulator::Config ram_config; + ram_config.add("standard", "DDR4"); + ram_config.add("channels", std::to_string(config.channels)); + ram_config.add("ranks", "1"); + ram_config.add("speed", "DDR4_2400R"); + ram_config.add("org", "DDR4_4Gb_x8"); + ram_config.add("mapping", "defaultmapping"); + ram_config.set_core_num(config.num_cores); + dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); + Stats::statlist.output("ramulator.ddr4.log"); + } + + ~Impl() { + dram_->finish(); + Stats::statlist.printall(); + delete dram_; + } const PerfStats& perf_stats() const { return perf_stats_; } + void dram_callback(ramulator::Request& req, uint32_t tag) { + MemRsp mem_rsp{tag, (uint32_t)req.coreid}; + simobject_->MemRspPort.send(mem_rsp, 1); + } + void step(uint64_t /*cycle*/) { - for (uint32_t i = 0, n = num_banks_; i < n; ++i) { - auto& mem_req_port = simobject_->MemReqPorts.at(i); - if (mem_req_port.empty()) - continue; - auto& mem_req = mem_req_port.front(); - if (!mem_req.write) { - MemRsp mem_rsp; - mem_rsp.tag = mem_req.tag; - simobject_->MemRspPorts.at(i).send(mem_rsp, latency_); - ++perf_stats_.reads; - } else { - ++perf_stats_.writes; - } - mem_req_port.pop(); + dram_->tick(); + + if (simobject_->MemReqPort.empty()) + return; + + auto& mem_req = simobject_->MemReqPort.front(); + + if (mem_req.write) { + ramulator::Request dram_req( + mem_req.addr, + ramulator::Request::Type::WRITE, + mem_req.core_id + ); + dram_->send(dram_req); + ++perf_stats_.writes; + } else { + ramulator::Request dram_req( + mem_req.addr, + ramulator::Request::Type::READ, + std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag), + mem_req.core_id + ); + dram_->send(dram_req); + ++perf_stats_.reads; } + + simobject_->MemReqPort.pop(); } }; /////////////////////////////////////////////////////////////////////////////// -MemSim::MemSim(const SimContext& ctx, - uint32_t num_banks, - uint32_t latency) +MemSim::MemSim(const SimContext& ctx, const Config& config) : SimObject(ctx, "MemSim") - , MemReqPorts(num_banks, this) - , MemRspPorts(num_banks, this) - , impl_(new Impl(this, num_banks, latency)) + , MemReqPort(this) + , MemRspPort(this) + , impl_(new Impl(this, config)) {} MemSim::~MemSim() { diff --git a/sim/simx/memsim.h b/sim/simx/memsim.h index c48361bc..24918a2e 100644 --- a/sim/simx/memsim.h +++ b/sim/simx/memsim.h @@ -8,6 +8,11 @@ namespace vortex { class MemSim : public SimObject{ public: + struct Config { + uint32_t channels; + uint32_t num_cores; + }; + struct PerfStats { uint64_t reads; uint64_t writes; @@ -18,10 +23,10 @@ public: {} }; - std::vector> MemReqPorts; - std::vector> MemRspPorts; + SimPort MemReqPort; + SimPort MemRspPort; - MemSim(const SimContext& ctx, uint32_t num_banks, uint32_t latency); + MemSim(const SimContext& ctx, const Config& config); ~MemSim(); void step(uint64_t cycle); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 6bb46229..bfda986e 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -3,147 +3,173 @@ using namespace vortex; -Processor::Processor(const ArchDef& arch) - : cores_(arch.num_cores()) - , l2caches_(NUM_CLUSTERS) - , l2_mem_switches_(NUM_CLUSTERS) -{ - uint32_t num_cores = arch.num_cores(); - uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS; +class Processor::Impl { +private: + Processor* simobject_; + std::vector cores_; + std::vector l2caches_; + std::vector::Ptr> l2_mem_switches_; + Cache::Ptr l3cache_; + Switch::Ptr l3_mem_switch_; - // create cores - for (uint32_t i = 0; i < num_cores; ++i) { - cores_.at(i) = Core::Create(arch, i); - } +public: + Impl(Processor* simobject, const ArchDef& arch) + : simobject_(simobject) + , cores_(arch.num_cores()) + , l2caches_(NUM_CLUSTERS) + , l2_mem_switches_(NUM_CLUSTERS) + { + uint32_t num_cores = arch.num_cores(); + uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS; - // connect memory sub-systen - memsim_ = MemSim::Create(1, MEM_LATENCY); - std::vector*> mem_req_ports(1); - std::vector*> mem_rsp_ports(1); - - mem_req_ports.at(0) = &memsim_->MemReqPorts.at(0); - mem_rsp_ports.at(0) = &memsim_->MemRspPorts.at(0); - - if (L3_ENABLE) { - l3cache_ = Cache::Create("l3cache", Cache::Config{ - log2ceil(L3_CACHE_SIZE), // C - log2ceil(MEM_BLOCK_SIZE), // B - 2, // W - 0, // A - 32, // address bits - L3_NUM_BANKS, // number of banks - L3_NUM_PORTS, // number of ports - NUM_CLUSTERS, // request size - true, // write-through - false, // write response - 0, // victim size - L3_MSHR_SIZE, // mshr - 2, // pipeline latency - } - ); - - mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort); - l3cache_->MemReqPort.bind(mem_req_ports.at(0)); - - mem_req_ports.resize(NUM_CLUSTERS); - mem_rsp_ports.resize(NUM_CLUSTERS); - - for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { - mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i); - mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i); + // create cores + for (uint32_t i = 0; i < num_cores; ++i) { + cores_.at(i) = Core::Create(arch, i); } - } else if (NUM_CLUSTERS > 1) { - l3_mem_switch_ = Switch::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS); - mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn); - l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0)); + + std::vector*> mem_req_ports(1); + std::vector*> mem_rsp_ports(1); - mem_req_ports.resize(NUM_CLUSTERS); - mem_rsp_ports.resize(NUM_CLUSTERS); + mem_req_ports.at(0) = &simobject_->MemReqPort; + mem_rsp_ports.at(0) = &simobject_->MemRspPort; - for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { - mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i); - mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i); - } - } - - for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { - std::vector*> cluster_mem_req_ports(cores_per_cluster); - std::vector*> cluster_mem_rsp_ports(cores_per_cluster); - - if (L2_ENABLE) { - auto& l2cache = l2caches_.at(i); - l2cache = Cache::Create("l2cache", Cache::Config{ - log2ceil(L2_CACHE_SIZE), // C + if (L3_ENABLE) { + l3cache_ = Cache::Create("l3cache", Cache::Config{ + log2ceil(L3_CACHE_SIZE), // C log2ceil(MEM_BLOCK_SIZE), // B 2, // W 0, // A - 32, // address bits - L2_NUM_BANKS, // number of banks - L2_NUM_PORTS, // number of ports - (uint8_t)cores_per_cluster, // request size + 32, // address bits + L3_NUM_BANKS, // number of banks + L3_NUM_PORTS, // number of ports + NUM_CLUSTERS, // request size true, // write-through false, // write response 0, // victim size - L2_MSHR_SIZE, // mshr + L3_MSHR_SIZE, // mshr 2, // pipeline latency - }); + } + ); + l3cache_->MemReqPort.bind(mem_req_ports.at(0)); + mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort); - mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort); - l2cache->MemReqPort.bind(mem_req_ports.at(i)); + mem_req_ports.resize(NUM_CLUSTERS); + mem_rsp_ports.resize(NUM_CLUSTERS); - for (uint32_t j = 0; j < cores_per_cluster; ++j) { - cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j); - cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j); + for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { + mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i); + mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i); } - } else { - auto& l2_mem_switch = l2_mem_switches_.at(i); - l2_mem_switch = Switch::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster); + } else if (NUM_CLUSTERS > 1) { + l3_mem_switch_ = Switch::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS); + l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0)); + mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn); - mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn); - l2_mem_switch->ReqOut.bind(mem_req_ports.at(i)); + mem_req_ports.resize(NUM_CLUSTERS); + mem_rsp_ports.resize(NUM_CLUSTERS); - for (uint32_t j = 0; j < cores_per_cluster; ++j) { - cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j); - cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j); + for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { + mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i); + mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i); } } - for (uint32_t j = 0; j < cores_per_cluster; ++j) { - auto& core = cores_.at((i * cores_per_cluster) + j); - cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort); - core->MemReqPort.bind(cluster_mem_req_ports.at(j)); + for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { + std::vector*> cluster_mem_req_ports(cores_per_cluster); + std::vector*> cluster_mem_rsp_ports(cores_per_cluster); + + if (L2_ENABLE) { + auto& l2cache = l2caches_.at(i); + l2cache = Cache::Create("l2cache", Cache::Config{ + log2ceil(L2_CACHE_SIZE), // C + log2ceil(MEM_BLOCK_SIZE), // B + 2, // W + 0, // A + 32, // address bits + L2_NUM_BANKS, // number of banks + L2_NUM_PORTS, // number of ports + (uint8_t)cores_per_cluster, // request size + true, // write-through + false, // write response + 0, // victim size + L2_MSHR_SIZE, // mshr + 2, // pipeline latency + }); + l2cache->MemReqPort.bind(mem_req_ports.at(i)); + mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort); + + for (uint32_t j = 0; j < cores_per_cluster; ++j) { + cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j); + cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j); + } + } else { + auto& l2_mem_switch = l2_mem_switches_.at(i); + l2_mem_switch = Switch::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster); + l2_mem_switch->ReqOut.bind(mem_req_ports.at(i)); + mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn); + + for (uint32_t j = 0; j < cores_per_cluster; ++j) { + cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j); + cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j); + } + } + + for (uint32_t j = 0; j < cores_per_cluster; ++j) { + auto& core = cores_.at((i * cores_per_cluster) + j); + core->MemReqPort.bind(cluster_mem_req_ports.at(j)); + cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort); + } } } -} -void Processor::attach_ram(RAM* ram) { - for (auto core : cores_) { - core->attach_ram(ram); + ~Impl() {} + + void step(uint64_t cycle) { + __unused (cycle); } -} -Processor::~Processor() {} + void attach_ram(RAM* ram) { + for (auto core : cores_) { + core->attach_ram(ram); + } + } -int Processor::run() { - bool running; - int exitcode = 0; - do { - SimPlatform::instance().step(); - - running = false; + bool check_exit(int* exitcode) { + bool running = false; for (auto& core : cores_) { if (core->running()) { running = true; } if (core->check_exit()) { - exitcode = core->getIRegValue(3); - running = false; - break; + *exitcode = core->getIRegValue(3); + return true; } } - } while (running); + return !running; + } +}; - std::cout << std::flush; +/////////////////////////////////////////////////////////////////////////////// - return exitcode; +Processor::Processor(const SimContext& ctx, const ArchDef& arch) + : SimObject(ctx, "Vortex") + , MemReqPort(this) + , MemRspPort(this) + , impl_(new Impl(this, arch)) +{} + +Processor::~Processor() { + delete impl_; +} + +void Processor::attach_ram(RAM* mem) { + impl_->attach_ram(mem); +} + +bool Processor::check_exit(int* exitcode) { + return impl_->check_exit(exitcode); +} + +void Processor::step(uint64_t cycle) { + impl_->step(cycle); } \ No newline at end of file diff --git a/sim/simx/processor.h b/sim/simx/processor.h index e41fd740..cfcde4da 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -4,24 +4,23 @@ namespace vortex { -class Processor { +class Processor : public SimObject { public: - typedef std::shared_ptr Ptr; + SimPort MemReqPort; + SimPort MemRspPort; - Processor(const ArchDef& arch); + Processor(const SimContext& ctx, const ArchDef& arch); ~Processor(); void attach_ram(RAM* mem); - int run(); + bool check_exit(int* exitcode); + + void step(uint64_t cycle); private: - std::vector cores_; - std::vector l2caches_; - std::vector::Ptr> l2_mem_switches_; - Cache::Ptr l3cache_; - Switch::Ptr l3_mem_switch_; - MemSim::Ptr memsim_; + class Impl; + Impl* impl_; }; } \ No newline at end of file diff --git a/sim/simx/sharedmem.h b/sim/simx/sharedmem.h index d984422d..6106ad25 100644 --- a/sim/simx/sharedmem.h +++ b/sim/simx/sharedmem.h @@ -65,8 +65,7 @@ public: if (!core_req.write || config_.write_reponse) { // send response - MemRsp core_rsp; - core_rsp.tag = core_req.tag; + MemRsp core_rsp{core_req.tag, core_req.core_id}; this->Outputs.at(req_id).send(core_rsp, 1); } diff --git a/sim/simx/types.h b/sim/simx/types.h index 7675ab82..67a14b5d 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -70,6 +70,7 @@ inline std::ostream &operator<<(std::ostream &os, const ExeType& type) { enum class AluType { ARITH, BRANCH, + SYSCALL, IMUL, IDIV, CMOV, @@ -77,11 +78,12 @@ enum class AluType { inline std::ostream &operator<<(std::ostream &os, const AluType& type) { switch (type) { - case AluType::ARITH: os << "ARITH"; break; - case AluType::BRANCH: os << "BRANCH"; break; - case AluType::IMUL: os << "IMUL"; break; - case AluType::IDIV: os << "IDIV"; break; - case AluType::CMOV: os << "CMOV"; break; + case AluType::ARITH: os << "ARITH"; break; + case AluType::BRANCH: os << "BRANCH"; break; + case AluType::SYSCALL: os << "SYSCALL"; break; + case AluType::IMUL: os << "IMUL"; break; + case AluType::IDIV: os << "IDIV"; break; + case AluType::CMOV: os << "CMOV"; break; } return os; } @@ -207,24 +209,31 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) { struct MemReq { uint64_t addr; - uint32_t tag; bool write; - bool is_io; + bool non_cacheable; + uint32_t tag; + uint32_t core_id; MemReq(uint64_t _addr = 0, + bool _write = false, + bool _non_cacheable = false, uint64_t _tag = 0, - bool _write = false, - bool _is_io = false + uint32_t _core_id = 0 ) : addr(_addr) - , tag(_tag) , write(_write) - , is_io(_is_io) + , non_cacheable(_non_cacheable) + , tag(_tag) + , core_id(_core_id) {} }; struct MemRsp { uint64_t tag; - MemRsp(uint64_t _tag = 0) : tag (_tag) {} + uint32_t core_id; + MemRsp(uint64_t _tag = 0, uint32_t _core_id = 0) + : tag (_tag) + , core_id(_core_id) + {} }; /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/vlsim/Makefile b/sim/vlsim/Makefile index 879bd954..bd34e60f 100644 --- a/sim/vlsim/Makefile +++ b/sim/vlsim/Makefile @@ -1,3 +1,4 @@ +DESTDIR ?= . RTL_DIR = ../../hw/rtl DPI_DIR = ../../hw/dpi SCRIPT_DIR = ../../hw/scripts @@ -7,8 +8,10 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I.. -I../../../hw -I../../common CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include +CXXFLAGS += -I../$(THIRD_PARTY_DIR) LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a +LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator # control RTL debug tracing states DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE @@ -87,22 +90,15 @@ VL_FLAGS += -DIDIV_DPI FPU_CORE ?= FPU_DPI VL_FLAGS += -D$(FPU_CORE) -PROJECT = libopae-c-vlsim +PROJECT = libopae-c-vlsim.so -all: $(PROJECT).so +all: $(PROJECT) vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh $(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h -$(PROJECT).so: $(SRCS) vortex_afu.h - verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT).so +$(DESTDIR)/$(PROJECT): $(SRCS) vortex_afu.h + verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT) -static: $(SRCS) vortex_afu.h - verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' - $(AR) rcs $(PROJECT).a obj_dir/*.o $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o - -clean-static: - rm -rf $(PROJECT).a obj_dir vortex_afu.h - -clean: clean-static - rm -rf $(PROJECT).so +clean: + rm -rf obj_dir $(DESTDIR)/$(PROJECT) diff --git a/sim/vlsim/opae_sim.cpp b/sim/vlsim/opae_sim.cpp index 5da617b5..d165dba6 100644 --- a/sim/vlsim/opae_sim.cpp +++ b/sim/vlsim/opae_sim.cpp @@ -13,6 +13,31 @@ #include #include +#define RAMULATOR +#include +#include +#include + +#include +#include + +#include +#include +#include + +#ifndef MEMORY_BANKS + #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #else + #define MEMORY_BANKS 2 + #endif +#endif + +#undef MEM_BLOCK_SIZE +#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) + +#define CACHE_BLOCK_SIZE 64 + #define CCI_LATENCY 8 #define CCI_RAND_MOD 8 #define CCI_RQ_SIZE 16 @@ -28,18 +53,6 @@ #define TRACE_STOP_TIME -1ull #endif -#ifndef MEM_LATENCY -#define MEM_LATENCY 24 -#endif - -#ifndef MEM_RQ_SIZE -#define MEM_RQ_SIZE 16 -#endif - -#ifndef MEM_STALLS_MODULO -#define MEM_STALLS_MODULO 16 -#endif - #ifndef VERILATOR_RESET_VALUE #define VERILATOR_RESET_VALUE 2 #endif @@ -88,357 +101,417 @@ void sim_trace_enable(bool enable) { /////////////////////////////////////////////////////////////////////////////// -namespace vortex { -class VL_OBJ { +class opae_sim::Impl { public: -#ifdef AXI_BUS - VVortex_axi *device; -#else - Vvortex_afu_shim *device; -#endif -#ifdef VCD_OUTPUT - VerilatedVcdC *trace; -#endif - - VL_OBJ() { + Impl() + : stop_(false) + , host_buffer_ids_(0) { // force random values for unitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); - // Turn off assertion before reset + // turn off assertion before reset Verilated::assertOn(false); - #ifdef AXI_BUS - this->device = new Vvortex_afu_shim(); - #else - this->device = new Vvortex_afu_shim(); - #endif + // create RTL module instance + device_ = new Vvortex_afu_shim(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - this->trace = new VerilatedVcdC(); - this->device->trace(this->trace, 99); - this->trace->open("trace.vcd"); + trace_ = new VerilatedVcdC(); + device_->trace(this->trace, 99); + trace_->open("trace.vcd"); #endif + + ram_ = new RAM(RAM_PAGE_SIZE); + + // initialize dram simulator + ramulator::Config ram_config; + ram_config.add("standard", "DDR4"); + ram_config.add("channels", std::to_string(MEMORY_BANKS)); + ram_config.add("ranks", "1"); + ram_config.add("speed", "DDR4_2400R"); + ram_config.add("org", "DDR4_4Gb_x8"); + ram_config.add("mapping", "defaultmapping"); + ram_config.set_core_num(1); + dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); + Stats::statlist.output("ramulator.ddr4.log"); + + // reset the device + this->reset(); + + // launch execution thread + future_ = std::async(std::launch::async, [&]{ + while (!stop_) { + std::lock_guard guard(mutex_); + this->step(); + } + }); } - ~VL_OBJ() { + ~Impl() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + for (auto& buffer : host_buffers_) { + __aligned_free(buffer.second.data); + } #ifdef VCD_OUTPUT - this->trace->close(); - delete this->trace; + trace_->close(); + delete trace_; #endif - delete this->device; - } -}; -} + delete device_; + + delete ram_; -/////////////////////////////////////////////////////////////////////////////// - -opae_sim::opae_sim() - : stop_(false) - , host_buffer_ids_(0) { - vl_obj_ = new VL_OBJ(); - ram_ = new RAM(RAM_PAGE_SIZE); - - // reset the device - this->reset(); - - // launch execution thread - future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->step(); - } - }); -} - -opae_sim::~opae_sim() { - stop_ = true; - if (future_.valid()) { - future_.wait(); - } - for (auto& buffer : host_buffers_) { - __aligned_free(buffer.second.data); - } - delete vl_obj_; - delete ram_; -} - -int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { - auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len); - if (alloc == NULL) - return -1; - host_buffer_t buffer; - buffer.data = (uint64_t*)alloc; - buffer.size = len; - buffer.ioaddr = uintptr_t(alloc); - auto buffer_id = host_buffer_ids_++; - host_buffers_.emplace(buffer_id, buffer); - *buf_addr = alloc; - *wsid = buffer_id; - return 0; -} - -void opae_sim::release_buffer(uint64_t wsid) { - auto it = host_buffers_.find(wsid); - if (it != host_buffers_.end()) { - __aligned_free(it->second.data); - host_buffers_.erase(it); - } -} - -void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { - *ioaddr = host_buffers_[wsid].ioaddr; -} - -void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { - std::lock_guard guard(mutex_); - - vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 1; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; - this->step(); - vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0; - assert(vl_obj_->device->af2cp_sTxPort_c2_mmioRdValid); - *value = vl_obj_->device->af2cp_sTxPort_c2_data; -} - -void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { - std::lock_guard guard(mutex_); - - vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 1; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; - vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; - memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, &value, 8); - this->step(); - vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0; -} - -/////////////////////////////////////////////////////////////////////////////// - -void opae_sim::reset() { - cci_reads_.clear(); - cci_writes_.clear(); - vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0; - vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0; - vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0; - vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0; - vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = 0; - vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = 0; - - for (int b = 0; b < MEMORY_BANKS; ++b) { - mem_reads_[b].clear(); - vl_obj_->device->avs_readdatavalid[b] = 0; - vl_obj_->device->avs_waitrequest[b] = 0; + if (dram_) { + dram_->finish(); + Stats::statlist.printall(); + delete dram_; + } } - vl_obj_->device->reset = 1; + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len); + if (alloc == NULL) + return -1; + host_buffer_t buffer; + buffer.data = (uint64_t*)alloc; + buffer.size = len; + buffer.ioaddr = uintptr_t(alloc); + auto buffer_id = host_buffer_ids_++; + host_buffers_.emplace(buffer_id, buffer); + *buf_addr = alloc; + *wsid = buffer_id; + return 0; + } - for (int i = 0; i < RESET_DELAY; ++i) { - vl_obj_->device->clk = 0; + void release_buffer(uint64_t wsid) { + auto it = host_buffers_.find(wsid); + if (it != host_buffers_.end()) { + __aligned_free(it->second.data); + host_buffers_.erase(it); + } + } + + void get_io_address(uint64_t wsid, uint64_t *ioaddr) { + *ioaddr = host_buffers_[wsid].ioaddr; + } + + void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + std::lock_guard guard(mutex_); + + device_->vcp2af_sRxPort_c0_mmioRdValid = 1; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + this->step(); + device_->vcp2af_sRxPort_c0_mmioRdValid = 0; + assert(device_->af2cp_sTxPort_c2_mmioRdValid); + *value = device_->af2cp_sTxPort_c2_data; + } + + void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { + std::lock_guard guard(mutex_); + + device_->vcp2af_sRxPort_c0_mmioWrValid = 1; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + memcpy(device_->vcp2af_sRxPort_c0_data, &value, 8); + this->step(); + device_->vcp2af_sRxPort_c0_mmioWrValid = 0; + } + +private: + + void reset() { + cci_reads_.clear(); + cci_writes_.clear(); + device_->vcp2af_sRxPort_c0_mmioRdValid = 0; + device_->vcp2af_sRxPort_c0_mmioWrValid = 0; + device_->vcp2af_sRxPort_c0_rspValid = 0; + device_->vcp2af_sRxPort_c1_rspValid = 0; + device_->vcp2af_sRxPort_c0_TxAlmFull = 0; + device_->vcp2af_sRxPort_c1_TxAlmFull = 0; + + for (int b = 0; b < MEMORY_BANKS; ++b) { + pending_mem_reqs_[b].clear(); + device_->avs_readdatavalid[b] = 0; + device_->avs_waitrequest[b] = 0; + } + + device_->reset = 1; + + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } + + device_->reset = 0; + + // Turn on assertion after reset + Verilated::assertOn(true); + } + + void step() { + this->sRxPort_bus(); + this->sTxPort_bus(); + this->avs_bus(); + + device_->clk = 0; this->eval(); - vl_obj_->device->clk = 1; + device_->clk = 1; this->eval(); - } - vl_obj_->device->reset = 0; - - // Turn on assertion after reset - Verilated::assertOn(true); -} + dram_->tick(); -void opae_sim::step() { - this->sRxPort_bus(); - this->sTxPort_bus(); - this->avs_bus(); - - vl_obj_->device->clk = 0; - this->eval(); - vl_obj_->device->clk = 1; - this->eval(); - -#ifndef NDEBUG - fflush(stdout); -#endif -} - -void opae_sim::eval() { - vl_obj_->device->eval(); -#ifdef VCD_OUTPUT - if (sim_trace_enabled()) { - vl_obj_->trace->dump(timestamp); - } -#endif - ++timestamp; -} - -void opae_sim::sRxPort_bus() { - // check mmio request - bool mmio_req_enabled = vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid - || vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid; - - // schedule CCI read responses - std::list::iterator cci_rd_it(cci_reads_.end()); - for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { - if (it->cycles_left > 0) - it->cycles_left -= 1; - if ((cci_rd_it == ie) && (it->cycles_left == 0)) { - cci_rd_it = it; - } + #ifndef NDEBUG + fflush(stdout); + #endif } - // schedule CCI write responses - std::list::iterator cci_wr_it(cci_writes_.end()); - for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { - if (it->cycles_left > 0) - it->cycles_left -= 1; - if ((cci_wr_it == ie) && (it->cycles_left == 0)) { - cci_wr_it = it; - } - } - - // send CCI write response - vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0; - if (cci_wr_it != cci_writes_.end()) { - vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 1; - vl_obj_->device->vcp2af_sRxPort_c1_hdr_resp_type = 0; - vl_obj_->device->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata; - cci_writes_.erase(cci_wr_it); - } - - // send CCI read response (ensure mmio disabled) - vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0; - if (!mmio_req_enabled - && (cci_rd_it != cci_reads_.end())) { - vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 1; - vl_obj_->device->vcp2af_sRxPort_c0_hdr_resp_type = 0; - memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); - vl_obj_->device->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; - /*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata); - for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) - printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]); - printf("\n");*/ - cci_reads_.erase(cci_rd_it); - } -} - -void opae_sim::sTxPort_bus() { - // process read requests - if (vl_obj_->device->af2cp_sTxPort_c0_valid) { - assert(!vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull); - cci_rd_req_t cci_req; - cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); - cci_req.addr = vl_obj_->device->af2cp_sTxPort_c0_hdr_address; - cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c0_hdr_mdata; - auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); - memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE); - //printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vl_obj_->device->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); - cci_reads_.emplace_back(cci_req); - } - - // process write requests - if (vl_obj_->device->af2cp_sTxPort_c1_valid) { - assert(!vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull); - cci_wr_req_t cci_req; - cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); - cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c1_hdr_mdata; - auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE); - memcpy(host_ptr, vl_obj_->device->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); - cci_writes_.emplace_back(cci_req); - } - - // check queues overflow - vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1)); - vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); -} - -void opae_sim::avs_bus() { - for (int b = 0; b < MEMORY_BANKS; ++b) { - // update memory responses schedule - for (auto& rsp : mem_reads_[b]) { - if (rsp.cycles_left > 0) - rsp.cycles_left -= 1; - } - - // schedule memory responses in FIFO order - std::list::iterator mem_rd_it(mem_reads_[b].end()); - if (!mem_reads_[b].empty() - && (0 == mem_reads_[b].begin()->cycles_left)) { - mem_rd_it = mem_reads_[b].begin(); - } - - // send memory response - vl_obj_->device->avs_readdatavalid[b] = 0; - if (mem_rd_it != mem_reads_[b].end()) { - vl_obj_->device->avs_readdatavalid[b] = 1; - memcpy(vl_obj_->device->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE); - uint32_t addr = mem_rd_it->addr; - mem_reads_[b].erase(mem_rd_it); - /*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE); - for (auto& req : mem_reads_[b]) { - if (req.cycles_left != 0) - printf(" !%0x", req.addr * MEM_BLOCK_SIZE); - else - printf(" %0x", req.addr * MEM_BLOCK_SIZE); - } - printf("}\n");*/ - } - - // handle memory stalls - bool mem_stalled = false; - #ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_reads_[b].size() >= MEM_RQ_SIZE) { - mem_stalled = true; + void eval() { + device_->eval(); + #ifdef VCD_OUTPUT + if (sim_trace_enabled()) { + trace_->dump(timestamp); } #endif + ++timestamp; + } - // process memory requests - if (!mem_stalled) { - assert(!vl_obj_->device->avs_read[b] || !vl_obj_->device->avs_write[b]); - if (vl_obj_->device->avs_write[b]) { - uint64_t byteen = vl_obj_->device->avs_byteenable[b]; - unsigned base_addr = vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE; - uint8_t* data = (uint8_t*)(vl_obj_->device->avs_writedata[b]); + void sRxPort_bus() { + // check mmio request + bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid + || device_->vcp2af_sRxPort_c0_mmioWrValid; + + // schedule CCI read responses + std::list::iterator cci_rd_it(cci_reads_.end()); + for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_rd_it == ie) && (it->cycles_left == 0)) { + cci_rd_it = it; + } + } + + // schedule CCI write responses + std::list::iterator cci_wr_it(cci_writes_.end()); + for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_wr_it == ie) && (it->cycles_left == 0)) { + cci_wr_it = it; + } + } + + // send CCI write response + device_->vcp2af_sRxPort_c1_rspValid = 0; + if (cci_wr_it != cci_writes_.end()) { + device_->vcp2af_sRxPort_c1_rspValid = 1; + device_->vcp2af_sRxPort_c1_hdr_resp_type = 0; + device_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata; + cci_writes_.erase(cci_wr_it); + } + + // send CCI read response (ensure mmio disabled) + device_->vcp2af_sRxPort_c0_rspValid = 0; + if (!mmio_req_enabled + && (cci_rd_it != cci_reads_.end())) { + device_->vcp2af_sRxPort_c0_rspValid = 1; + device_->vcp2af_sRxPort_c0_hdr_resp_type = 0; + memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); + device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; + /*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata); + for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) + printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]); + printf("\n");*/ + cci_reads_.erase(cci_rd_it); + } + } + + void sTxPort_bus() { + // process read requests + if (device_->af2cp_sTxPort_c0_valid) { + assert(!device_->vcp2af_sRxPort_c0_TxAlmFull); + cci_rd_req_t cci_req; + cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); + cci_req.addr = device_->af2cp_sTxPort_c0_hdr_address; + cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata; + auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); + memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE); + //printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); + cci_reads_.emplace_back(cci_req); + } + + // process write requests + if (device_->af2cp_sTxPort_c1_valid) { + assert(!device_->vcp2af_sRxPort_c1_TxAlmFull); + cci_wr_req_t cci_req; + cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); + cci_req.mdata = device_->af2cp_sTxPort_c1_hdr_mdata; + auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE); + memcpy(host_ptr, device_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); + cci_writes_.emplace_back(cci_req); + } + + // check queues overflow + device_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1)); + device_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); + } + + void avs_bus() { + for (int b = 0; b < MEMORY_BANKS; ++b) { + // process memory responses + device_->avs_readdatavalid[b] = 0; + if (!pending_mem_reqs_[b].empty() + && (*pending_mem_reqs_[b].begin())->ready) { + auto mem_rd_it = pending_mem_reqs_[b].begin(); + auto mem_req = *mem_rd_it; + device_->avs_readdatavalid[b] = 1; + memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE); + uint32_t addr = mem_req->addr; + pending_mem_reqs_[b].erase(mem_rd_it); + delete mem_req; + } + + // process memory requests + assert(!device_->avs_read[b] || !device_->avs_write[b]); + unsigned byte_addr = device_->avs_address[b] * MEM_BLOCK_SIZE; + if (device_->avs_write[b]) { + uint64_t byteen = device_->avs_byteenable[b]; + uint8_t* data = (uint8_t*)(device_->avs_writedata[b]); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; + (*ram_)[byte_addr + i] = data[i]; } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr); + + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); } printf("\n");*/ + + // send dram request + ramulator::Request dram_req( + byte_addr, + ramulator::Request::Type::WRITE, + 0 + ); + dram_->send(dram_req); } - if (vl_obj_->device->avs_read[b]) { - mem_rd_req_t mem_req; - mem_req.addr = vl_obj_->device->avs_address[b]; - ram_->read(mem_req.data.data(), vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE); - mem_req.cycles_left = MEM_LATENCY; - for (auto& rsp : mem_reads_[b]) { - if (mem_req.addr == rsp.addr) { - // duplicate requests receive the same cycle delay - mem_req.cycles_left = rsp.cycles_left; - break; - } - } - mem_reads_[b].emplace_back(mem_req); + + if (device_->avs_read[b]) { + auto mem_req = new mem_rd_req_t(); + mem_req->addr = device_->avs_address[b]; + ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); + mem_req->ready = false; + pending_mem_reqs_[b].emplace_back(mem_req); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE); - for (auto& req : mem_reads_[b]) { + for (auto& req : pending_mem_reqs_[b]) { if (req.cycles_left != 0) printf(" !%0x", req.addr * MEM_BLOCK_SIZE); else printf(" %0x", req.addr * MEM_BLOCK_SIZE); } printf("}\n");*/ - } - } - vl_obj_->device->avs_waitrequest[b] = mem_stalled; + // send dram request + ramulator::Request dram_req( + byte_addr, + ramulator::Request::Type::READ, + std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) { + mem_req->ready = true; + }, placeholders::_1, mem_req), + 0 + ); + dram_->send(dram_req); + } + + device_->avs_waitrequest[b] = false; + } } + + typedef struct { + bool ready; + std::array data; + uint32_t addr; + } mem_rd_req_t; + + typedef struct { + int cycles_left; + std::array data; + uint64_t addr; + uint32_t mdata; + } cci_rd_req_t; + + typedef struct { + int cycles_left; + uint32_t mdata; + } cci_wr_req_t; + + typedef struct { + uint64_t* data; + size_t size; + uint64_t ioaddr; + } host_buffer_t; + + std::future future_; + bool stop_; + + std::unordered_map host_buffers_; + int64_t host_buffer_ids_; + + std::list pending_mem_reqs_[MEMORY_BANKS]; + + std::list cci_reads_; + + std::list cci_writes_; + + std::mutex mutex_; + + RAM *ram_; + + ramulator::Gem5Wrapper* dram_; + + Vvortex_afu_shim *device_; +#ifdef VCD_OUTPUT + VerilatedVcdC *trace_; +#endif +}; + +/////////////////////////////////////////////////////////////////////////////// + +opae_sim::opae_sim() + : impl_(new Impl()) +{} + +opae_sim::~opae_sim() { + delete impl_; +} + +int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { + return impl_->prepare_buffer(len, buf_addr, wsid, flags); +} + +void opae_sim::release_buffer(uint64_t wsid) { + impl_->release_buffer(wsid); +} + +void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { + impl_->get_io_address(wsid, ioaddr); +} + +void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { + impl_->write_mmio64(mmio_num, offset, value); +} + +void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + impl_->read_mmio64(mmio_num, offset, value); } \ No newline at end of file diff --git a/sim/vlsim/opae_sim.h b/sim/vlsim/opae_sim.h index aa19532f..21010b94 100644 --- a/sim/vlsim/opae_sim.h +++ b/sim/vlsim/opae_sim.h @@ -1,29 +1,8 @@ #pragma once -#include -#include - -#include -#include -#include -#include - -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - -#define CACHE_BLOCK_SIZE 64 - +#include namespace vortex { -class VL_OBJ; class RAM; class opae_sim { @@ -44,57 +23,8 @@ public: private: - typedef struct { - int cycles_left; - std::array data; - uint32_t addr; - } mem_rd_req_t; - - typedef struct { - int cycles_left; - std::array data; - uint64_t addr; - uint32_t mdata; - } cci_rd_req_t; - - typedef struct { - int cycles_left; - uint32_t mdata; - } cci_wr_req_t; - - typedef struct { - uint64_t* data; - size_t size; - uint64_t ioaddr; - } host_buffer_t; - - void reset(); - - void eval(); - - void step(); - - void sRxPort_bus(); - void sTxPort_bus(); - void avs_bus(); - - std::future future_; - bool stop_; - - std::unordered_map host_buffers_; - int64_t host_buffer_ids_; - - std::list mem_reads_ [MEMORY_BANKS]; - - std::list cci_reads_; - - std::list cci_writes_; - - std::mutex mutex_; - - RAM *ram_; - - VL_OBJ* vl_obj_; + class Impl; + Impl* impl_; }; } \ No newline at end of file diff --git a/third_party/Makefile b/third_party/Makefile index 8a9ed890..26b730ff 100644 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -1,4 +1,4 @@ -all: fpnew cocogfx softfloat +all: fpnew cocogfx softfloat ramulator fpnew: @@ -8,8 +8,11 @@ cocogfx: softfloat: SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC +ramulator: + $(MAKE) -C ramulator libramulator.a + clean: $(MAKE) clean -C cocogfx $(MAKE) -C softfloat/build/Linux-x86_64-GCC clean -.PHONY: all fpnew cocogfx softfloat \ No newline at end of file +.PHONY: all fpnew cocogfx softfloat ramulator \ No newline at end of file