SimX timing simulation
This commit is contained in:
427
sim/common/simobject.h
Normal file
427
sim/common/simobject.h
Normal file
@@ -0,0 +1,427 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <assert.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class SimObjectBase;
|
||||
|
||||
class SimEventBase {
|
||||
public:
|
||||
typedef std::shared_ptr<SimEventBase> Ptr;
|
||||
|
||||
virtual ~SimEventBase() {}
|
||||
|
||||
virtual void fire() const = 0;
|
||||
|
||||
bool step() {
|
||||
return (0 == --delay_);
|
||||
}
|
||||
|
||||
protected:
|
||||
SimEventBase(uint64_t delay) : delay_(delay) {}
|
||||
|
||||
uint64_t delay_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Pkt>
|
||||
class SimSimpleEvent : public SimEventBase {
|
||||
public:
|
||||
typedef std::function<void (const Pkt&)> Func;
|
||||
|
||||
template <typename... Args>
|
||||
static Ptr Create(const Func& func, const Pkt& pkt, uint64_t delay) {
|
||||
return std::make_shared<SimSimpleEvent>(func, pkt, delay);
|
||||
}
|
||||
|
||||
SimSimpleEvent(const Func& func, const Pkt& pkt, uint64_t delay)
|
||||
: SimEventBase(delay)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
|
||||
void fire() const override {
|
||||
func_(pkt_);
|
||||
}
|
||||
|
||||
protected:
|
||||
Func func_;
|
||||
Pkt pkt_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Pkt>
|
||||
class SimPortEvent : public SimEventBase {
|
||||
public:
|
||||
typedef std::function<void (const Pkt&, uint32_t)> Func;
|
||||
|
||||
template <typename... Args>
|
||||
static Ptr Create(const Func& func, const Pkt& pkt, uint32_t port_id, uint64_t delay) {
|
||||
return std::make_shared<SimPortEvent>(func, pkt, port_id, delay);
|
||||
}
|
||||
|
||||
SimPortEvent(const Func& func, const Pkt& pkt, uint32_t port_id, uint64_t delay)
|
||||
: SimEventBase(delay)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
, port_id_(port_id)
|
||||
{}
|
||||
|
||||
void fire() const override {
|
||||
func_(pkt_, port_id_);
|
||||
}
|
||||
|
||||
private:
|
||||
Func func_;
|
||||
Pkt pkt_;
|
||||
uint32_t port_id_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimPortBase {
|
||||
public:
|
||||
typedef std::shared_ptr<SimPortBase> Ptr;
|
||||
|
||||
virtual ~SimPortBase() {}
|
||||
|
||||
SimObjectBase* module() const {
|
||||
return module_;
|
||||
}
|
||||
|
||||
uint32_t port_id() const {
|
||||
return port_id_;
|
||||
}
|
||||
|
||||
SimPortBase* peer() const {
|
||||
return peer_;
|
||||
}
|
||||
|
||||
bool connected() const {
|
||||
return (peer_ != nullptr);
|
||||
}
|
||||
|
||||
bool is_slave() const {
|
||||
return is_slave_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
SimPortBase(SimObjectBase* module, bool is_slave);
|
||||
|
||||
void connect(SimPortBase* peer) {
|
||||
assert(peer_ == nullptr);
|
||||
peer_ = peer;
|
||||
}
|
||||
|
||||
void disconnect() {
|
||||
assert(peer_ == nullptr);
|
||||
peer_ = nullptr;
|
||||
}
|
||||
|
||||
SimObjectBase* module_;
|
||||
uint32_t port_id_;
|
||||
bool is_slave_;
|
||||
SimPortBase* peer_;
|
||||
|
||||
template <typename Pkt> friend class MasterPort;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Pkt>
|
||||
class SlavePort : public SimPortBase {
|
||||
public:
|
||||
typedef std::shared_ptr<SlavePort<Ptr>> Ptr;
|
||||
typedef std::function<void (const Pkt&, uint32_t)> Func;
|
||||
|
||||
static Ptr Create(SimObjectBase* module, const Func& func) {
|
||||
return std::make_shared<SlavePort<Pkt>>(module, func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static Ptr Create(SimObjectBase* module, T *obj, void (T::*entry)(const Pkt&, uint32_t)) {
|
||||
return std::make_shared<SlavePort<Pkt>>(module, obj, entry);
|
||||
}
|
||||
|
||||
SlavePort(SimObjectBase* module, const Func& func)
|
||||
: SimPortBase(module, true)
|
||||
, func_(func)
|
||||
{}
|
||||
|
||||
template <typename T>
|
||||
SlavePort(SimObjectBase* module, T *obj, void (T::*entry)(const Pkt&, uint32_t))
|
||||
: SimPortBase(module, true)
|
||||
, func_(std::bind(entry, obj, std::placeholders::_1, std::placeholders::_2))
|
||||
{}
|
||||
|
||||
SlavePort(SimObjectBase* module, SlavePort* peer)
|
||||
: SimPortBase(module, false)
|
||||
{
|
||||
this->connect(peer);
|
||||
}
|
||||
|
||||
void send(const Pkt& pkt, uint64_t delay) const;
|
||||
|
||||
const Func& func() const {
|
||||
return func_;
|
||||
}
|
||||
|
||||
protected:
|
||||
SlavePort& operator=(const SlavePort&);
|
||||
Func func_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Pkt>
|
||||
class MasterPort : public SimPortBase {
|
||||
public:
|
||||
typedef std::shared_ptr<MasterPort<Ptr>> Ptr;
|
||||
typedef std::function<void (const Pkt&, uint32_t)> Func;
|
||||
|
||||
static Ptr Create() {
|
||||
return std::make_shared<MasterPort<Ptr>>(module);
|
||||
}
|
||||
|
||||
MasterPort(SimObjectBase* module) : SimPortBase(module, false) {}
|
||||
|
||||
MasterPort(SimObjectBase* module, MasterPort* peer)
|
||||
: SimPortBase(module, false)
|
||||
{
|
||||
peer->connect(this);
|
||||
}
|
||||
|
||||
void bind(SlavePort<Pkt>* peer) {
|
||||
this->connect(peer);
|
||||
}
|
||||
|
||||
void unbind() {
|
||||
peer_->disconnect();
|
||||
this->disconnect();
|
||||
}
|
||||
|
||||
void send(const Pkt& pkt, uint64_t delay) const {
|
||||
assert(peer_ != nullptr);
|
||||
if (peer_->is_slave()) {
|
||||
auto slave = reinterpret_cast<const SlavePort<Pkt>*>(peer_);
|
||||
slave->send(pkt, delay);
|
||||
} else {
|
||||
auto master = reinterpret_cast<const MasterPort<Pkt>*>(peer_);
|
||||
master->send(pkt, delay);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MasterPort& operator=(const MasterPort&);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimContext;
|
||||
|
||||
class SimObjectBase {
|
||||
public:
|
||||
typedef std::shared_ptr<SimObjectBase> Ptr;
|
||||
|
||||
virtual ~SimObjectBase() {}
|
||||
|
||||
template <typename T, typename Pkt>
|
||||
void schedule(T *obj, void (T::*entry)(const Pkt&), const Pkt& pkt, uint64_t delay);
|
||||
|
||||
virtual void step(uint64_t cycle) = 0;
|
||||
|
||||
const std::string& name() const {
|
||||
return name_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
SimObjectBase(const SimContext& ctx, const char* name);
|
||||
|
||||
uint32_t allocate_port(SimPortBase* port) {
|
||||
uint32_t id = ports_.size();
|
||||
ports_.push_back(port);
|
||||
return id;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
std::vector<SimPortBase*> ports_;
|
||||
|
||||
friend class SimPlatform;
|
||||
friend class SimPortBase;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Impl>
|
||||
class SimObject : public SimObjectBase {
|
||||
public:
|
||||
typedef std::shared_ptr<Impl> Ptr;
|
||||
|
||||
template <typename... Args>
|
||||
static Ptr Create(Args&&... args);
|
||||
|
||||
protected:
|
||||
|
||||
SimObject(const SimContext& ctx, const char* name) : SimObjectBase(ctx, name) {}
|
||||
|
||||
void step(uint64_t cycle) override {
|
||||
this->impl().step(cycle);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
const Impl& impl() const {
|
||||
return static_cast<const Impl&>(*this);
|
||||
}
|
||||
|
||||
Impl& impl() {
|
||||
return static_cast<Impl&>(*this);
|
||||
}
|
||||
};
|
||||
|
||||
class SimContext {
|
||||
private:
|
||||
SimContext() {}
|
||||
template <typename Impl> template <typename... Args>
|
||||
friend typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimPlatform {
|
||||
public:
|
||||
static SimPlatform& instance() {
|
||||
static SimPlatform s_inst;
|
||||
return s_inst;
|
||||
}
|
||||
|
||||
bool initialize() {
|
||||
//--
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize() {
|
||||
instance().clear();
|
||||
}
|
||||
|
||||
void register_object(const SimObjectBase::Ptr& obj) {
|
||||
objects_.push_back(obj);
|
||||
}
|
||||
|
||||
template <typename Pkt>
|
||||
void schedule(const typename SimSimpleEvent<Pkt>::Func& callback,
|
||||
const Pkt& pkt,
|
||||
uint64_t delay) {
|
||||
auto evt = SimSimpleEvent<Pkt>::Create(callback, pkt, delay);
|
||||
assert(delay != 0);
|
||||
events_.emplace_back(evt);
|
||||
}
|
||||
|
||||
template <typename Pkt>
|
||||
void schedule(const typename SimPortEvent<Pkt>::Func& callback,
|
||||
const Pkt& pkt,
|
||||
uint32_t port_id,
|
||||
uint64_t delay) {
|
||||
auto evt = SimPortEvent<Pkt>::Create(callback, pkt, port_id, delay);
|
||||
assert(delay != 0);
|
||||
events_.emplace_back(evt);
|
||||
}
|
||||
|
||||
void step() {
|
||||
// evaluate events
|
||||
auto evt_it = events_.begin();
|
||||
auto evt_it_end = events_.end();
|
||||
while (evt_it != evt_it_end) {
|
||||
auto& event = *evt_it;
|
||||
if (event->step()) {
|
||||
event->fire();
|
||||
evt_it = events_.erase(evt_it);
|
||||
} else {
|
||||
++evt_it;
|
||||
}
|
||||
}
|
||||
// evaluate components
|
||||
for (auto& object : objects_) {
|
||||
object->step(cycles_);
|
||||
}
|
||||
// advance clock
|
||||
++cycles_;
|
||||
}
|
||||
|
||||
uint64_t cycles() const {
|
||||
return cycles_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
SimPlatform() : cycles_(0) {}
|
||||
|
||||
virtual ~SimPlatform() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
objects_.clear();
|
||||
events_.clear();
|
||||
}
|
||||
|
||||
std::vector<SimObjectBase::Ptr> objects_;
|
||||
std::list<SimEventBase::Ptr> events_;
|
||||
uint64_t cycles_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline SimPortBase::SimPortBase(SimObjectBase* module, bool is_slave)
|
||||
: module_(module)
|
||||
, port_id_(module->allocate_port(this))
|
||||
, is_slave_(is_slave)
|
||||
, peer_(nullptr)
|
||||
{}
|
||||
|
||||
inline SimObjectBase::SimObjectBase(const SimContext&, const char* name)
|
||||
: name_(name)
|
||||
{}
|
||||
|
||||
template <typename Impl>
|
||||
template <typename... Args>
|
||||
typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
|
||||
auto obj = std::make_shared<Impl>(SimContext{}, std::forward<Args>(args)...);
|
||||
SimPlatform::instance().register_object(obj);
|
||||
return obj;
|
||||
}
|
||||
|
||||
template <typename Pkt>
|
||||
void SlavePort<Pkt>::send(const Pkt& pkt, uint64_t delay) const {
|
||||
if (func_) {
|
||||
SimPlatform::instance().schedule(func_, pkt, port_id_, delay);
|
||||
} else {
|
||||
assert(peer_ != nullptr);
|
||||
if (peer_->is_slave()) {
|
||||
auto slave = reinterpret_cast<const SlavePort<Pkt>*>(peer_);
|
||||
slave->send(pkt, delay);
|
||||
} else {
|
||||
auto master = reinterpret_cast<const MasterPort<Pkt>*>(peer_);
|
||||
master->send(pkt, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Pkt>
|
||||
void SimObjectBase::schedule(T *obj, void (T::*entry)(const Pkt&), const Pkt& pkt, uint64_t delay) {
|
||||
auto callback = std::bind(entry, obj, std::placeholders::_1);
|
||||
SimPlatform::instance().schedule(callback, pkt, delay);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
|
||||
template <typename... Args>
|
||||
@@ -8,24 +9,83 @@ void unused(Args&&...) {}
|
||||
|
||||
#define __unused(...) unused(__VA_ARGS__)
|
||||
|
||||
constexpr bool ispow2(uint64_t value) {
|
||||
constexpr uint32_t count_leading_zeros(uint32_t value) {
|
||||
return value ? __builtin_clz(value) : 32;
|
||||
}
|
||||
|
||||
constexpr uint32_t count_trailing_zeros(uint32_t value) {
|
||||
return value ? __builtin_ctz(value) : 32;
|
||||
}
|
||||
|
||||
constexpr bool ispow2(uint32_t value) {
|
||||
return value && !(value & (value - 1));
|
||||
}
|
||||
|
||||
constexpr unsigned log2ceil(uint32_t value) {
|
||||
return 32 - __builtin_clz(value - 1);
|
||||
constexpr uint32_t log2ceil(uint32_t value) {
|
||||
return 32 - count_leading_zeros(value - 1);
|
||||
}
|
||||
|
||||
inline uint64_t align_size(uint64_t size, uint64_t alignment) {
|
||||
inline unsigned log2up(uint32_t value) {
|
||||
return std::max<uint32_t>(1, log2ceil(value));
|
||||
}
|
||||
|
||||
constexpr unsigned log2floor(uint32_t value) {
|
||||
return 31 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
constexpr unsigned ceil2(uint32_t value) {
|
||||
return 32 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
inline uint64_t bit_clr(uint64_t bits, uint32_t index) {
|
||||
assert(index <= 63);
|
||||
return bits & ~(1ull << index);
|
||||
}
|
||||
|
||||
inline uint64_t bit_set(uint64_t bits, uint32_t index) {
|
||||
assert(index <= 63);
|
||||
return bits | (1ull << index);
|
||||
}
|
||||
|
||||
inline bool bit_get(uint64_t bits, uint32_t index) {
|
||||
assert(index <= 63);
|
||||
return (bits >> index) & 0x1;
|
||||
}
|
||||
|
||||
inline uint64_t bit_clrw(uint64_t bits, uint32_t start, uint32_t end) {
|
||||
assert(end >= start);
|
||||
assert(end <= 63);
|
||||
uint32_t shift = 63 - end;
|
||||
uint64_t mask = (0xffffffffffffffff << (shift + start)) >> shift;
|
||||
return bits & ~mask;
|
||||
}
|
||||
|
||||
inline uint64_t bit_setw(uint64_t bits, uint32_t start, uint32_t end, uint64_t value) {
|
||||
assert(end >= start);
|
||||
assert(end <= 63);
|
||||
uint32_t shift = 63 - end;
|
||||
uint64_t dirty = (value << (shift + start)) >> shift;
|
||||
return bit_clrw(bits, start, end) | dirty;
|
||||
}
|
||||
|
||||
inline uint64_t bit_getw(uint64_t bits, uint32_t start, uint32_t end) {
|
||||
assert(end >= start);
|
||||
assert(end <= 63);
|
||||
uint32_t shift = 63 - end;
|
||||
return (bits << shift) >> (shift + start);
|
||||
}
|
||||
|
||||
inline uint64_t aligned_size(uint64_t size, uint32_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
// Apply integer sign extension
|
||||
inline uint32_t signExt(uint32_t w, uint32_t bit, uint32_t mask) {
|
||||
if (w >> (bit - 1))
|
||||
w |= ~mask;
|
||||
return w;
|
||||
inline uint32_t sext32(uint32_t word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= 32);
|
||||
uint32_t mask = (1 << width) - 1;
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
|
||||
// return file extension
|
||||
|
||||
@@ -11,7 +11,7 @@ LDFLAGS += ../common/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
TOP = vx_cache_sim
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += args.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
|
||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp main.cpp
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
@@ -9,21 +9,31 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
class ArchDef {
|
||||
private:
|
||||
int num_cores_;
|
||||
int num_warps_;
|
||||
int num_threads_;
|
||||
int wsize_;
|
||||
int vsize_;
|
||||
int num_regs_;
|
||||
int num_csrs_;
|
||||
int num_barriers_;
|
||||
|
||||
public:
|
||||
ArchDef(const std::string &/*arch*/,
|
||||
ArchDef(const std::string& /*arch*/,
|
||||
int num_cores,
|
||||
int num_warps,
|
||||
int num_threads) {
|
||||
wsize_ = 4;
|
||||
vsize_ = 16;
|
||||
num_regs_ = 32;
|
||||
num_csrs_ = 4096;
|
||||
num_barriers_= NUM_BARRIERS;
|
||||
num_cores_ = num_cores;
|
||||
num_warps_ = num_warps;
|
||||
num_threads_ = num_threads;
|
||||
}
|
||||
int num_threads)
|
||||
: num_cores_(num_cores)
|
||||
, num_warps_(num_warps)
|
||||
, num_threads_(num_threads)
|
||||
, wsize_(4)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
{}
|
||||
|
||||
int wsize() const {
|
||||
return wsize_;
|
||||
@@ -56,17 +66,6 @@ public:
|
||||
int num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
int wsize_;
|
||||
int vsize_;
|
||||
int num_regs_;
|
||||
int num_csrs_;
|
||||
int num_barriers_;
|
||||
int num_threads_;
|
||||
int num_warps_;
|
||||
int num_cores_;
|
||||
};
|
||||
|
||||
}
|
||||
497
sim/simX/cache.cpp
Normal file
497
sim/simX/cache.cpp
Normal file
@@ -0,0 +1,497 @@
|
||||
#include "cache.h"
|
||||
#include "debug.h"
|
||||
#include <util.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct params_t {
|
||||
uint32_t sets_per_bank;
|
||||
uint32_t blocks_per_set;
|
||||
uint32_t words_per_block;
|
||||
|
||||
uint32_t word_select_addr_start;
|
||||
uint32_t word_select_addr_end;
|
||||
|
||||
uint32_t bank_select_addr_start;
|
||||
uint32_t bank_select_addr_end;
|
||||
|
||||
uint32_t set_select_addr_start;
|
||||
uint32_t set_select_addr_end;
|
||||
|
||||
uint32_t tag_select_addr_start;
|
||||
uint32_t tag_select_addr_end;
|
||||
|
||||
params_t(const CacheConfig& config) {
|
||||
uint32_t bank_bits = log2ceil(config.num_banks);
|
||||
uint32_t offset_bits = config.B - config.W;
|
||||
uint32_t log2_bank_size = config.C - bank_bits;
|
||||
uint32_t index_bits = log2_bank_size - (config.B << config.A);
|
||||
assert(log2_bank_size >= config.B);
|
||||
|
||||
|
||||
this->words_per_block = 1 << offset_bits;
|
||||
this->blocks_per_set = 1 << config.A;
|
||||
this->sets_per_bank = 1 << index_bits;
|
||||
|
||||
assert(config.ports_per_bank <= this->words_per_block);
|
||||
|
||||
// Word select
|
||||
this->word_select_addr_start = config.W;
|
||||
this->word_select_addr_end = (this->word_select_addr_start+offset_bits-1);
|
||||
|
||||
// Bank select
|
||||
this->bank_select_addr_start = (1+this->word_select_addr_end);
|
||||
this->bank_select_addr_end = (this->bank_select_addr_start+bank_bits-1);
|
||||
|
||||
// Set select
|
||||
this->set_select_addr_start = (1+this->bank_select_addr_end);
|
||||
this->set_select_addr_end = (this->set_select_addr_start+index_bits-1);
|
||||
|
||||
// Tag select
|
||||
this->tag_select_addr_start = (1+this->set_select_addr_end);
|
||||
this->tag_select_addr_end = (config.addr_width-1);
|
||||
}
|
||||
|
||||
uint32_t addr_bank_id(uint64_t word_addr) const {
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, bank_select_addr_start, bank_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t addr_set_id(uint64_t word_addr) const {
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, set_select_addr_start, set_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t addr_tag(uint64_t word_addr) const {
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
return bit_getw(word_addr, tag_select_addr_start, tag_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mem_addr(uint32_t bank_id, uint32_t set_id, uint64_t tag) const {
|
||||
uint64_t addr(0);
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
addr = bit_setw(addr, bank_select_addr_start, bank_select_addr_end, bank_id);
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
addr = bit_setw(addr, set_select_addr_start, set_select_addr_end, set_id);
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
addr = bit_setw(addr, tag_select_addr_start, tag_select_addr_end, tag);
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct block_t {
|
||||
bool valid;
|
||||
bool dirty;
|
||||
uint64_t tag;
|
||||
uint32_t lru_ctr;
|
||||
};
|
||||
|
||||
struct set_t {
|
||||
std::vector<block_t> blocks;
|
||||
set_t(uint32_t size) : blocks(size) {}
|
||||
};
|
||||
|
||||
struct bank_req_info_t {
|
||||
bool valid;
|
||||
uint32_t req_id;
|
||||
uint32_t req_tag;
|
||||
};
|
||||
|
||||
struct bank_req_t {
|
||||
bool valid;
|
||||
bool write;
|
||||
bool mshr_replay;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
std::vector<bank_req_info_t> infos;
|
||||
|
||||
bank_req_t(uint32_t size)
|
||||
: valid(false)
|
||||
, write(false)
|
||||
, mshr_replay(false)
|
||||
, tag(0)
|
||||
, set_id(0)
|
||||
, infos(size)
|
||||
{}
|
||||
};
|
||||
|
||||
struct mshr_entry_t : public bank_req_t {
|
||||
uint32_t block_id;
|
||||
|
||||
mshr_entry_t(uint32_t size = 0)
|
||||
: bank_req_t(size)
|
||||
, block_id(0)
|
||||
{}
|
||||
};
|
||||
|
||||
class MSHR {
|
||||
private:
|
||||
std::vector<mshr_entry_t> entries_;
|
||||
uint32_t capacity_;
|
||||
|
||||
public:
|
||||
MSHR(uint32_t size)
|
||||
: entries_(size)
|
||||
, capacity_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == capacity_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (capacity_ == entries_.size());
|
||||
}
|
||||
|
||||
int lookup(const bank_req_t& bank_req) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (entry.valid
|
||||
&& entry.set_id == bank_req.set_id
|
||||
&& entry.tag == bank_req.tag) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int allocate(const bank_req_t& bank_req, uint32_t block_id) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (!entry.valid) {
|
||||
*(bank_req_t*)&entry = bank_req;
|
||||
entry.valid = true;
|
||||
entry.mshr_replay = false;
|
||||
entry.block_id = block_id;
|
||||
++capacity_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mshr_entry_t& replay(uint32_t id) {
|
||||
auto& root_entry = entries_.at(id);
|
||||
assert(root_entry.valid);
|
||||
// make all related mshr entries for replay
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid
|
||||
&& entry.set_id == root_entry.set_id
|
||||
&& entry.tag == root_entry.tag) {
|
||||
entry.mshr_replay = true;
|
||||
}
|
||||
}
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
bool try_pop(bank_req_t* out) {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid && entry.mshr_replay) {
|
||||
*out = entry;
|
||||
entry.valid = false;
|
||||
--capacity_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_t {
|
||||
std::vector<set_t> sets;
|
||||
MSHR mshr;
|
||||
std::queue<bank_req_t> stall_buffer;
|
||||
bank_req_t active_req;
|
||||
|
||||
bank_t(const CacheConfig& config,
|
||||
const params_t& params)
|
||||
: sets(params.sets_per_bank, params.blocks_per_set)
|
||||
, mshr(config.mshr_size)
|
||||
, active_req(config.ports_per_bank)
|
||||
{}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Cache::Impl {
|
||||
private:
|
||||
Cache* const simobject_;
|
||||
CacheConfig config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
std::vector<std::pair<bool, MemReq>> core_reqs_;
|
||||
std::pair<bool, MemRsp> mem_rsp_;
|
||||
std::vector<std::queue<uint32_t>> core_rsps_;
|
||||
|
||||
public:
|
||||
Impl(Cache* simobject, const CacheConfig& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_(config.num_banks, {config, params_})
|
||||
, core_reqs_(config.num_inputs)
|
||||
, core_rsps_(config.num_inputs)
|
||||
{}
|
||||
|
||||
void handleMemResponse(const MemRsp& response, uint32_t) {
|
||||
mem_rsp_ = {true, response};
|
||||
}
|
||||
|
||||
void handleCoreRequest(const MemReq& request, uint32_t port_id) {
|
||||
core_reqs_.at(port_id) = {true, request};
|
||||
}
|
||||
|
||||
void step(uint64_t /*cycle*/) {
|
||||
// process core response
|
||||
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
|
||||
auto& core_rsp = core_rsps_.at(req_id);
|
||||
if (!core_rsp.empty()) {
|
||||
simobject_->CoreRspPorts.at(req_id).send(MemRsp{core_rsp.front()}, config_.latency);
|
||||
core_rsp.pop();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& bank : banks_) {
|
||||
auto& active_req = bank.active_req;
|
||||
|
||||
// try chedule mshr replay
|
||||
if (!active_req.valid) {
|
||||
bank.mshr.try_pop(&active_req);
|
||||
}
|
||||
|
||||
// try schedule stall replay
|
||||
if (!active_req.valid
|
||||
&& !bank.stall_buffer.empty()) {
|
||||
active_req = bank.stall_buffer.front();
|
||||
bank.stall_buffer.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// handle memory fills
|
||||
if (mem_rsp_.first) {
|
||||
mem_rsp_.first = false;
|
||||
auto bank_id = bit_getw(mem_rsp_.second.tag, 0, 15);
|
||||
auto mshr_id = bit_getw(mem_rsp_.second.tag, 16, 31);
|
||||
this->processMemoryFill(bank_id, mshr_id);
|
||||
}
|
||||
|
||||
// handle incoming core requests
|
||||
for (uint32_t i = 0, n = core_reqs_.size(); i < n; ++i) {
|
||||
auto& entry = core_reqs_.at(i);
|
||||
if (!entry.first)
|
||||
continue;
|
||||
|
||||
entry.first = false;
|
||||
|
||||
auto& core_req = entry.second;
|
||||
auto bank_id = params_.addr_bank_id(core_req.addr);
|
||||
auto set_id = params_.addr_set_id(core_req.addr);
|
||||
auto tag = params_.addr_tag(core_req.addr);
|
||||
auto port_id = i % config_.ports_per_bank;
|
||||
|
||||
// create abnk request
|
||||
bank_req_t bank_req(config_.ports_per_bank);
|
||||
bank_req.valid = true;
|
||||
bank_req.write = core_req.write;
|
||||
bank_req.mshr_replay = false;
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.infos.at(port_id) = {true, i, core_req.tag};
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
|
||||
// check MSHR capacity
|
||||
if (bank.mshr.full()) {
|
||||
// add to stall buffer
|
||||
bank.stall_buffer.emplace(bank_req);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& active_req = bank.active_req;
|
||||
|
||||
// check pending MSHR request
|
||||
if (active_req.valid
|
||||
&& active_req.mshr_replay) {
|
||||
// add to stall buffer
|
||||
bank.stall_buffer.emplace(bank_req);
|
||||
continue;
|
||||
}
|
||||
|
||||
// check bank conflicts
|
||||
if (active_req.valid) {
|
||||
// check port conflict
|
||||
if (active_req.write != core_req.write
|
||||
|| active_req.set_id != set_id
|
||||
|| active_req.tag != tag
|
||||
|| active_req.infos[port_id].valid) {
|
||||
// add to stall buffer
|
||||
bank.stall_buffer.emplace(bank_req);
|
||||
continue;
|
||||
}
|
||||
// update pending request infos
|
||||
active_req.infos[port_id] = bank_req.infos[port_id];
|
||||
} else {
|
||||
// schedule new request
|
||||
active_req = bank_req;
|
||||
}
|
||||
}
|
||||
|
||||
// process active request
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
this->processBankRequest(bank_id);
|
||||
}
|
||||
}
|
||||
|
||||
void processMemoryFill(uint32_t bank_id, uint32_t mshr_id) {
|
||||
// update block
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& root_entry = bank.mshr.replay(mshr_id);
|
||||
auto& set = bank.sets.at(root_entry.set_id);
|
||||
auto& block = set.blocks.at(root_entry.block_id);
|
||||
block.valid = true;
|
||||
block.tag = root_entry.tag;
|
||||
}
|
||||
|
||||
void processBankRequest(uint32_t bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& active_req = bank.active_req;
|
||||
if (!active_req.valid)
|
||||
return;
|
||||
|
||||
active_req.valid = false;
|
||||
|
||||
auto& set = bank.sets.at(active_req.set_id);
|
||||
|
||||
if (active_req.mshr_replay) {
|
||||
// send core response
|
||||
for (auto& info : active_req.infos) {
|
||||
core_rsps_.at(info.req_id).emplace(info.req_tag);
|
||||
}
|
||||
} else {
|
||||
bool hit = false;
|
||||
bool found_free_block = false;
|
||||
int hit_block_id = 0;
|
||||
int repl_block_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
for (int i = 0, n = set.blocks.size(); i < n; ++i) {
|
||||
auto& block = set.blocks.at(i);
|
||||
if (block.valid) {
|
||||
if (block.tag == active_req.tag) {
|
||||
block.lru_ctr = 0;
|
||||
hit_block_id = i;
|
||||
hit = true;
|
||||
} else {
|
||||
++block.lru_ctr;
|
||||
}
|
||||
if (max_cnt < block.lru_ctr) {
|
||||
max_cnt = block.lru_ctr;
|
||||
repl_block_id = i;
|
||||
}
|
||||
} else {
|
||||
found_free_block = true;
|
||||
repl_block_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (hit) {
|
||||
//
|
||||
// MISS handling
|
||||
//
|
||||
if (active_req.write) {
|
||||
// handle write hit
|
||||
auto& hit_block = set.blocks.at(hit_block_id);
|
||||
if (config_.write_through) {
|
||||
// forward write request to memory
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, hit_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.tag = 0;
|
||||
simobject_->MemReqPort.send(mem_req, 1);
|
||||
} else {
|
||||
// mark block as dirty
|
||||
hit_block.dirty = true;
|
||||
}
|
||||
}
|
||||
// send core response
|
||||
for (auto& info : active_req.infos) {
|
||||
core_rsps_.at(info.req_id).emplace(info.req_tag);
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// MISS handling
|
||||
//
|
||||
if (!found_free_block && !config_.write_through) {
|
||||
// write back dirty block
|
||||
auto& repl_block = set.blocks.at(repl_block_id);
|
||||
if (repl_block.dirty) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, repl_block.tag);
|
||||
mem_req.write = true;
|
||||
simobject_->MemReqPort.send(mem_req, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (active_req.write && config_.write_through) {
|
||||
// forward write request to memory
|
||||
{
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, active_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.tag = 0;
|
||||
simobject_->MemReqPort.send(mem_req, 1);
|
||||
}
|
||||
// send core response
|
||||
for (auto& info : active_req.infos) {
|
||||
core_rsps_.at(info.req_id).emplace(info.req_tag);
|
||||
}
|
||||
} else {
|
||||
// lookup
|
||||
int pending = bank.mshr.lookup(active_req);
|
||||
|
||||
// allocate MSHR
|
||||
int mshr_id = bank.mshr.allocate(active_req, repl_block_id);
|
||||
|
||||
// send fill request
|
||||
if (pending == -1) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, active_req.tag);
|
||||
mem_req.write = active_req.write;
|
||||
mem_req.tag = bit_setw(0, 0, 15, bank_id);
|
||||
mem_req.tag = bit_setw(mem_req.tag, 16, 31, mshr_id);
|
||||
simobject_->MemReqPort.send(mem_req, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Cache::Cache(const SimContext& ctx, const char* name, const CacheConfig& config)
|
||||
: SimObject<Cache>(ctx, name)
|
||||
, impl_(new Impl(this, config))
|
||||
, CoreReqPorts(config.num_inputs, {this, impl_, &Cache::Impl::handleCoreRequest})
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this, impl_, &Impl::handleMemResponse)
|
||||
{}
|
||||
|
||||
Cache::~Cache() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void Cache::step(uint64_t cycle) {
|
||||
impl_->step(cycle);
|
||||
}
|
||||
40
sim/simX/cache.h
Normal file
40
sim/simX/cache.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "memsim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct CacheConfig {
|
||||
uint8_t C; // log2 cache size
|
||||
uint8_t B; // log2 block size
|
||||
uint8_t W; // log2 word size
|
||||
uint8_t A; // log2 associativity
|
||||
uint8_t addr_width; // word address bits
|
||||
uint8_t num_banks; // number of banks
|
||||
uint8_t ports_per_bank; // number of ports per bank
|
||||
uint8_t num_inputs; // number of inputs
|
||||
bool write_through; // is write-through cache
|
||||
uint16_t victim_size; // victim cache size
|
||||
uint16_t mshr_size; // MSHR buffer size
|
||||
uint8_t latency; // pipeline latency
|
||||
};
|
||||
|
||||
class Cache : public SimObject<Cache> {
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
|
||||
public:
|
||||
Cache(const SimContext& ctx, const char* name, const CacheConfig& config);
|
||||
~Cache();
|
||||
|
||||
void step(uint64_t cycle);
|
||||
|
||||
std::vector<SlavePort<MemReq>> CoreReqPorts;
|
||||
std::vector<MasterPort<MemRsp>> CoreRspPorts;
|
||||
MasterPort<MemReq> MemReqPort;
|
||||
SlavePort<MemRsp> MemRspPort;
|
||||
};
|
||||
|
||||
}
|
||||
21
sim/simX/constants.h
Normal file
21
sim/simX/constants.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 18
|
||||
#endif
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct Constants {
|
||||
|
||||
static constexpr uint32_t CORE_TO_DCACHE_DELAY = 1 + SM_ENABLE;
|
||||
static constexpr uint32_t CORE_TO_ICACHE_DELAY = 1;
|
||||
|
||||
static constexpr uint32_t ICACHE_TO_MEM_DELAY = 2;
|
||||
static constexpr uint32_t DCACHE_TO_MEM_DELAY = 2;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
@@ -12,34 +12,92 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
: id_(id)
|
||||
Core::Core(const SimContext& ctx, const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
: SimObject(ctx, "Core")
|
||||
, id_(id)
|
||||
, arch_(arch)
|
||||
, decoder_(decoder)
|
||||
, mem_(mem)
|
||||
, shared_mem_(1, SMEM_SIZE)
|
||||
, inst_in_schedule_("schedule")
|
||||
, inst_in_fetch_("fetch")
|
||||
, inst_in_decode_("decode")
|
||||
, inst_in_issue_("issue")
|
||||
, inst_in_execute_("execute")
|
||||
, inst_in_writeback_("writeback") {
|
||||
in_use_iregs_.resize(arch.num_warps(), 0);
|
||||
in_use_fregs_.resize(arch.num_warps(), 0);
|
||||
in_use_vregs_.reset();
|
||||
|
||||
csrs_.resize(arch_.num_csrs(), 0);
|
||||
|
||||
fcsrs_.resize(arch_.num_warps(), 0);
|
||||
|
||||
barriers_.resize(arch_.num_barriers(), 0);
|
||||
|
||||
warps_.resize(arch_.num_warps());
|
||||
, warps_(arch.num_warps())
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, csrs_(arch.num_csrs(), 0)
|
||||
, fcsrs_(arch.num_warps(), 0)
|
||||
, ibuffers_(arch.num_warps(), IBUF_SIZE)
|
||||
, scoreboard_(arch_)
|
||||
, exe_units_((int)ExeType::MAX)
|
||||
, icache_(Cache::Create("Icache", CacheConfig{
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
log2ceil(L1_BLOCK_SIZE),// B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
1, // number of banks
|
||||
1, // number of ports
|
||||
1, // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
NUM_WARPS, // mshr
|
||||
2, // pipeline latency
|
||||
}))
|
||||
, dcache_(Cache::Create("Dcache", CacheConfig{
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
log2ceil(L1_BLOCK_SIZE),// B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
DCACHE_NUM_BANKS, // number of banks
|
||||
DCACHE_NUM_PORTS, // number of ports
|
||||
(uint8_t)arch.num_threads(), // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
DCACHE_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}))
|
||||
, l1_mem_switch_(Switch<MemReq, MemRsp>::Create("l1_arb", ArbiterType::Priority, 2))
|
||||
, icache_rsp_port_(this, this, &Core::icache_handleCacheReponse)
|
||||
, dcache_rsp_port_(arch.num_threads(), {this, reinterpret_cast<LsuUnit*>(exe_units_.at((int)ExeType::LSU).get()) , &LsuUnit::handleCacheReponse})
|
||||
, fetch_stage_("fetch")
|
||||
, decode_stage_("decode")
|
||||
, issue_stage_("issue")
|
||||
, execute_stage_("execute")
|
||||
, commit_stage_("writeback")
|
||||
, pending_icache_(arch_.num_warps())
|
||||
, stalled_warps_(0)
|
||||
, last_schedule_wid_(0)
|
||||
, pending_instrs_(0)
|
||||
, ebreak_(false)
|
||||
, stats_insts_(0)
|
||||
, stats_loads_(0)
|
||||
, stats_stores_(0)
|
||||
, MemRspPort(this, &l1_mem_switch_->RspIn)
|
||||
, MemReqPort(this, &l1_mem_switch_->ReqOut)
|
||||
{
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_[i] = std::make_shared<Warp>(this, i);
|
||||
warps_.at(i) = std::make_shared<Warp>(this, i);
|
||||
}
|
||||
|
||||
this->clear();
|
||||
// register execute units
|
||||
exe_units_.at((int)ExeType::ALU) = std::make_shared<AluUnit>(this);
|
||||
exe_units_.at((int)ExeType::LSU) = std::make_shared<LsuUnit>(this);
|
||||
exe_units_.at((int)ExeType::CSR) = std::make_shared<CsrUnit>(this);
|
||||
exe_units_.at((int)ExeType::FPU) = std::make_shared<FpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::GPU) = std::make_shared<GpuUnit>(this);
|
||||
|
||||
// connect l1 caches
|
||||
icache_->CoreRspPorts.at(0).bind(&icache_rsp_port_);
|
||||
for (int i = 0; i < arch_.num_threads(); ++i) {
|
||||
dcache_->CoreRspPorts.at(i).bind(&dcache_rsp_port_.at(i));
|
||||
}
|
||||
|
||||
// connect l1 switch
|
||||
icache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[0]);
|
||||
dcache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[1]);
|
||||
l1_mem_switch_->RspOut[0].bind(&icache_->MemRspPort);
|
||||
l1_mem_switch_->RspOut[1].bind(&dcache_->MemRspPort);
|
||||
|
||||
// activate warp0
|
||||
warps_.at(0)->setTmask(0, true);
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
@@ -51,194 +109,164 @@ Core::~Core() {
|
||||
}
|
||||
}
|
||||
|
||||
void Core::clear() {
|
||||
for (int w = 0; w < arch_.num_warps(); ++w) {
|
||||
in_use_iregs_[w].reset();
|
||||
in_use_fregs_[w].reset();
|
||||
}
|
||||
stalled_warps_.reset();
|
||||
|
||||
in_use_vregs_.reset();
|
||||
|
||||
for (auto& csr : csrs_) {
|
||||
csr = 0;
|
||||
}
|
||||
|
||||
for (auto& fcsr : fcsrs_) {
|
||||
fcsr = 0;
|
||||
}
|
||||
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
|
||||
for (auto warp : warps_) {
|
||||
warp->clear();
|
||||
}
|
||||
|
||||
inst_in_schedule_.clear();
|
||||
inst_in_fetch_.clear();
|
||||
inst_in_decode_.clear();
|
||||
inst_in_issue_.clear();
|
||||
inst_in_execute_.clear();
|
||||
inst_in_writeback_.clear();
|
||||
print_bufs_.clear();
|
||||
|
||||
steps_ = 0;
|
||||
insts_ = 0;
|
||||
loads_ = 0;
|
||||
stores_ = 0;
|
||||
|
||||
inst_in_schedule_.valid = true;
|
||||
warps_[0]->setTmask(0, true);
|
||||
|
||||
ebreak_ = false;
|
||||
void Core::icache_handleCacheReponse(const MemRsp& response, uint32_t /*port_id*/) {
|
||||
// advance to decode stage
|
||||
uint32_t wid = response.tag;
|
||||
pipeline_state_t state;
|
||||
pending_icache_.remove(wid, &state);
|
||||
auto latency = (SimPlatform::instance().cycles() - state.icache_latency);
|
||||
state.icache_latency = latency;
|
||||
decode_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::step() {
|
||||
void Core::step(uint64_t cycle) {
|
||||
__unused (cycle);
|
||||
D(2, "###########################################################");
|
||||
D(2, std::dec << "Core" << id_ << ": cycle: " << cycle);
|
||||
|
||||
steps_++;
|
||||
D(2, std::dec << "Core" << id_ << ": cycle: " << steps_);
|
||||
|
||||
this->writeback();
|
||||
this->commit();
|
||||
this->execute();
|
||||
this->issue();
|
||||
this->decode();
|
||||
this->fetch();
|
||||
this->schedule();
|
||||
|
||||
DPN(2, std::flush);
|
||||
}
|
||||
|
||||
void Core::schedule() {
|
||||
if (!inst_in_schedule_.enter(&inst_in_fetch_))
|
||||
return;
|
||||
|
||||
void Core::warp_scheduler() {
|
||||
bool foundSchedule = false;
|
||||
int scheduled_warp = inst_in_schedule_.wid;
|
||||
int scheduled_warp = last_schedule_wid_;
|
||||
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
// round robin scheduling
|
||||
// round robin scheduling
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
scheduled_warp = (scheduled_warp + 1) % warps_.size();
|
||||
bool is_active = warps_[scheduled_warp]->active();
|
||||
bool stalled = stalled_warps_[scheduled_warp];
|
||||
if (is_active && !stalled) {
|
||||
bool warp_active = warps_.at(scheduled_warp)->active();
|
||||
bool warp_stalled = stalled_warps_.test(scheduled_warp);
|
||||
if (warp_active && !warp_stalled) {
|
||||
last_schedule_wid_ = scheduled_warp;
|
||||
foundSchedule = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundSchedule)
|
||||
return;
|
||||
return;
|
||||
|
||||
D(2, "Schedule: wid=" << scheduled_warp);
|
||||
inst_in_schedule_.wid = scheduled_warp;
|
||||
// suspend warp until decode
|
||||
stalled_warps_.set(scheduled_warp);
|
||||
|
||||
// advance pipeline
|
||||
inst_in_schedule_.next(&inst_in_fetch_);
|
||||
auto& warp = warps_.at(scheduled_warp);
|
||||
stats_insts_ += warp->getActiveThreads();
|
||||
|
||||
pipeline_state_t state;
|
||||
warp->eval(&state);
|
||||
|
||||
D(4, state);
|
||||
|
||||
// advance to fetch stage
|
||||
++pending_instrs_;
|
||||
fetch_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::fetch() {
|
||||
if (!inst_in_fetch_.enter(&inst_in_issue_))
|
||||
return;
|
||||
// schedule icache request
|
||||
pipeline_state_t state;
|
||||
if (fetch_stage_.try_pop(&state)) {
|
||||
state.icache_latency = SimPlatform::instance().cycles();
|
||||
MemReq mem_req;
|
||||
mem_req.addr = state.PC;
|
||||
mem_req.write = false;
|
||||
mem_req.tag = pending_icache_.allocate(state);
|
||||
icache_->CoreReqPorts.at(0).send(mem_req, 1);
|
||||
}
|
||||
|
||||
int wid = inst_in_fetch_.wid;
|
||||
|
||||
auto active_threads_b = warps_[wid]->getActiveThreads();
|
||||
warps_[wid]->step(&inst_in_fetch_);
|
||||
auto active_threads_a = warps_[wid]->getActiveThreads();
|
||||
|
||||
insts_ += active_threads_b;
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "*** warp#" << wid << " active threads changed to " << active_threads_a);
|
||||
}
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
D(3, "*** warp#" << wid << " fetch stalled");
|
||||
stalled_warps_[wid] = true;
|
||||
}
|
||||
|
||||
D(4, inst_in_fetch_);
|
||||
|
||||
// advance pipeline
|
||||
inst_in_fetch_.next(&inst_in_issue_);
|
||||
// schedule next warp
|
||||
this->warp_scheduler();
|
||||
}
|
||||
|
||||
void Core::decode() {
|
||||
if (!inst_in_decode_.enter(&inst_in_issue_))
|
||||
return;
|
||||
pipeline_state_t state;
|
||||
if (!decode_stage_.try_pop(&state))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_decode_.next(&inst_in_issue_);
|
||||
if (state.stall_warp) {
|
||||
D(3, "*** warp#" << state.wid << " fetch stalled");
|
||||
} else {
|
||||
// release warp
|
||||
stalled_warps_.reset(state.wid);
|
||||
}
|
||||
|
||||
// advance to issue stage
|
||||
issue_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::issue() {
|
||||
if (!inst_in_issue_.enter(&inst_in_execute_))
|
||||
return;
|
||||
if (!issue_stage_.empty()) {
|
||||
// insert to ibuffer
|
||||
auto& state = issue_stage_.top();
|
||||
auto& ibuffer = ibuffers_.at(state.wid);
|
||||
if (!ibuffer.full()) {
|
||||
ibuffer.push(state);
|
||||
issue_stage_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// issue ibuffer instructions
|
||||
for (auto& ibuffer : ibuffers_) {
|
||||
if (ibuffer.empty())
|
||||
continue;
|
||||
|
||||
bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_vregs & in_use_vregs_) != 0;
|
||||
auto& state = ibuffer.top();
|
||||
|
||||
// check scoreboard
|
||||
if (scoreboard_.in_use(state))
|
||||
continue;
|
||||
|
||||
// update scoreboard
|
||||
scoreboard_.reserve(state);
|
||||
|
||||
// advance to execute stage
|
||||
execute_stage_.push(state);
|
||||
|
||||
ibuffer.pop();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Core::execute() {
|
||||
// process stage inputs
|
||||
if (!execute_stage_.empty()) {
|
||||
auto& state = execute_stage_.top();
|
||||
auto& exe_unit = exe_units_.at((int)state.exe_type);
|
||||
exe_unit->push_input(state);
|
||||
execute_stage_.pop();
|
||||
}
|
||||
|
||||
// advance execute units
|
||||
for (auto& exe_unit : exe_units_) {
|
||||
exe_unit->step();
|
||||
}
|
||||
|
||||
if (in_use_regs) {
|
||||
D(3, "*** Issue: registers not ready!");
|
||||
inst_in_issue_.stalled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (inst_in_issue_.rdest_type) {
|
||||
case 1:
|
||||
if (inst_in_issue_.rdest)
|
||||
in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
// commit completed instructions
|
||||
for (auto& exe_unit : exe_units_) {
|
||||
pipeline_state_t state;
|
||||
if (exe_unit->pop_output(&state)) {
|
||||
if (state.stall_warp) {
|
||||
stalled_warps_.reset(state.wid);
|
||||
}
|
||||
// advance to commit stage
|
||||
commit_stage_.push(state);
|
||||
}
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_issue_.next(&inst_in_execute_);
|
||||
}
|
||||
|
||||
void Core::execute() {
|
||||
if (!inst_in_execute_.enter(&inst_in_writeback_))
|
||||
void Core::commit() {
|
||||
pipeline_state_t state;
|
||||
if (!commit_stage_.try_pop(&state))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_execute_.next(&inst_in_writeback_);
|
||||
}
|
||||
|
||||
void Core::writeback() {
|
||||
if (!inst_in_writeback_.enter(NULL))
|
||||
return;
|
||||
|
||||
switch (inst_in_writeback_.rdest_type) {
|
||||
case 1:
|
||||
in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst_in_writeback_.stall_warp) {
|
||||
stalled_warps_[inst_in_writeback_.wid] = false;
|
||||
D(3, "*** warp#" << inst_in_writeback_.wid << " fetch released");
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_writeback_.next(NULL);
|
||||
// update scoreboard
|
||||
scoreboard_.release(state);
|
||||
}
|
||||
|
||||
Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
@@ -281,16 +309,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
return arch_.num_cores();
|
||||
} else if (addr == CSR_MINSTRET) {
|
||||
// NumInsts
|
||||
return insts_;
|
||||
return stats_insts_;
|
||||
} else if (addr == CSR_MINSTRET_H) {
|
||||
// NumInsts
|
||||
return (Word)(insts_ >> 32);
|
||||
return (Word)(stats_insts_ >> 32);
|
||||
} else if (addr == CSR_MCYCLE) {
|
||||
// NumCycles
|
||||
return (Word)steps_;
|
||||
return (Word)SimPlatform::instance().cycles();
|
||||
} else if (addr == CSR_MCYCLE_H) {
|
||||
// NumCycles
|
||||
return (Word)(steps_ >> 32);
|
||||
return (Word)(SimPlatform::instance().cycles() >> 32);
|
||||
} else {
|
||||
return csrs_.at(addr);
|
||||
}
|
||||
@@ -328,7 +356,7 @@ Word Core::icache_fetch(Addr addr) {
|
||||
}
|
||||
|
||||
Word Core::dcache_read(Addr addr, Size size) {
|
||||
++loads_;
|
||||
++stats_loads_;
|
||||
Word data = 0;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
@@ -342,7 +370,7 @@ Word Core::dcache_read(Addr addr, Size size) {
|
||||
}
|
||||
|
||||
void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
++stores_;
|
||||
++stats_stores_;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
@@ -359,23 +387,19 @@ void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
}
|
||||
|
||||
bool Core::running() const {
|
||||
return inst_in_fetch_.valid
|
||||
|| inst_in_decode_.valid
|
||||
|| inst_in_issue_.valid
|
||||
|| inst_in_execute_.valid
|
||||
|| inst_in_writeback_.valid;
|
||||
return pending_instrs_;
|
||||
}
|
||||
|
||||
void Core::printStats() const {
|
||||
std::cout << "Steps : " << steps_ << std::endl
|
||||
<< "Insts : " << insts_ << std::endl
|
||||
<< "Loads : " << loads_ << std::endl
|
||||
<< "Stores: " << stores_ << std::endl;
|
||||
std::cout << "Cycles: " << SimPlatform::instance().cycles() << std::endl
|
||||
<< "Insts : " << stats_insts_ << std::endl
|
||||
<< "Loads : " << stats_loads_ << std::endl
|
||||
<< "Stores: " << stats_stores_ << std::endl;
|
||||
}
|
||||
|
||||
void Core::writeToStdOut(Addr addr, Word data) {
|
||||
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
auto& ss_buf = print_bufs_.at(tid);
|
||||
char c = (char)data;
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
|
||||
@@ -4,10 +4,11 @@
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
#include <simobject.h>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
@@ -15,20 +16,21 @@
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "ibuffer.h"
|
||||
#include "scoreboard.h"
|
||||
#include "exeunit.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core {
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
|
||||
|
||||
Core(const SimContext& ctx, const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
|
||||
~Core();
|
||||
|
||||
void clear();
|
||||
|
||||
bool running() const;
|
||||
|
||||
void step();
|
||||
void step(uint64_t cycle);
|
||||
|
||||
void printStats() const;
|
||||
|
||||
@@ -40,7 +42,7 @@ public:
|
||||
return *warps_.at(i);
|
||||
}
|
||||
|
||||
Decoder& decoder() {
|
||||
const Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
@@ -48,16 +50,12 @@ public:
|
||||
return arch_;
|
||||
}
|
||||
|
||||
unsigned long num_insts() const {
|
||||
return insts_;
|
||||
}
|
||||
|
||||
unsigned long num_steps() const {
|
||||
return steps_;
|
||||
unsigned long stats_insts() const {
|
||||
return stats_insts_;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
return warps_[0]->getIRegValue(reg);
|
||||
return warps_.at(0)->getIRegValue(reg);
|
||||
}
|
||||
|
||||
Word get_csr(Addr addr, int tid, int wid);
|
||||
@@ -73,50 +71,66 @@ public:
|
||||
void dcache_write(Addr, Word, Size);
|
||||
|
||||
void trigger_ebreak();
|
||||
|
||||
bool check_ebreak() const;
|
||||
|
||||
private:
|
||||
private:
|
||||
|
||||
void schedule();
|
||||
void fetch();
|
||||
void decode();
|
||||
void issue();
|
||||
void execute();
|
||||
void writeback();
|
||||
void commit();
|
||||
|
||||
void warp_scheduler();
|
||||
|
||||
void icache_handleCacheReponse(const MemRsp& response, uint32_t port_id);
|
||||
|
||||
void writeToStdOut(Addr addr, Word data);
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
RegMask in_use_vregs_;
|
||||
WarpMask stalled_warps_;
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Word> csrs_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
Word id_;
|
||||
const ArchDef &arch_;
|
||||
Decoder &decoder_;
|
||||
MemoryUnit &mem_;
|
||||
const ArchDef& arch_;
|
||||
const Decoder& decoder_;
|
||||
MemoryUnit& mem_;
|
||||
#ifdef SM_ENABLE
|
||||
RAM shared_mem_;
|
||||
#endif
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Word> csrs_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
std::vector<ExeUnit::Ptr> exe_units_;
|
||||
Cache::Ptr icache_;
|
||||
Cache::Ptr dcache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
|
||||
SlavePort<MemRsp> icache_rsp_port_;
|
||||
std::vector<SlavePort<MemRsp>> dcache_rsp_port_;
|
||||
|
||||
PipelineStage fetch_stage_;
|
||||
PipelineStage decode_stage_;
|
||||
PipelineStage issue_stage_;
|
||||
PipelineStage execute_stage_;
|
||||
PipelineStage commit_stage_;
|
||||
|
||||
HashTable<pipeline_state_t> pending_icache_;
|
||||
WarpMask stalled_warps_;
|
||||
uint32_t last_schedule_wid_;
|
||||
uint32_t pending_instrs_;
|
||||
bool ebreak_;
|
||||
|
||||
Pipeline inst_in_schedule_;
|
||||
Pipeline inst_in_fetch_;
|
||||
Pipeline inst_in_decode_;
|
||||
Pipeline inst_in_issue_;
|
||||
Pipeline inst_in_execute_;
|
||||
Pipeline inst_in_writeback_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
uint64_t stats_insts_;
|
||||
uint64_t stats_loads_;
|
||||
uint64_t stats_stores_;
|
||||
|
||||
uint64_t steps_;
|
||||
uint64_t insts_;
|
||||
uint64_t loads_;
|
||||
uint64_t stores_;
|
||||
friend class LsuUnit;
|
||||
|
||||
public:
|
||||
SlavePort<MemRsp> MemRspPort;
|
||||
MasterPort<MemReq> MemReqPort;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
@@ -281,7 +281,7 @@ Decoder::Decoder(const ArchDef &arch) {
|
||||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) {
|
||||
std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) const {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
|
||||
instr->setOpcode(op);
|
||||
@@ -351,9 +351,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) {
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
|
||||
instr->setImm(signExt(rs2, 5, reg_mask_));
|
||||
instr->setImm(sext32(rs2, 5));
|
||||
} else {
|
||||
instr->setImm(signExt(code >> shift_rs2_, 12, i_imm_mask_));
|
||||
instr->setImm(sext32(code >> shift_rs2_, 12));
|
||||
}
|
||||
} break;
|
||||
|
||||
@@ -366,7 +366,7 @@ std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) {
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
Word imeed = (func7 << reg_s_) | rd;
|
||||
instr->setImm(signExt(imeed, 12, s_imm_mask_));
|
||||
instr->setImm(sext32(imeed, 12));
|
||||
} break;
|
||||
|
||||
case InstType::B_TYPE: {
|
||||
@@ -378,12 +378,12 @@ std::shared_ptr<Instr> Decoder::decode(Word code, Word PC) {
|
||||
Word bit_10_5 = func7 & 0x3f;
|
||||
Word bit_12 = func7 >> 6;
|
||||
Word imeed = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
instr->setImm(signExt(imeed, 13, b_imm_mask_));
|
||||
instr->setImm(sext32(imeed, 13));
|
||||
} break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg(rd);
|
||||
instr->setImm(signExt(code >> shift_func3_, 20, u_imm_mask_));
|
||||
instr->setImm(sext32(code >> shift_func3_, 20));
|
||||
break;
|
||||
|
||||
case InstType::J_TYPE: {
|
||||
|
||||
@@ -13,7 +13,7 @@ class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
|
||||
std::shared_ptr<Instr> decode(Word code, Word PC);
|
||||
std::shared_ptr<Instr> decode(Word code, Word PC) const;
|
||||
|
||||
private:
|
||||
|
||||
|
||||
2517
sim/simX/execute.cpp
2517
sim/simX/execute.cpp
File diff suppressed because it is too large
Load Diff
152
sim/simX/exeunit.cpp
Normal file
152
sim/simX/exeunit.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
#include "exeunit.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "core.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
LsuUnit::LsuUnit(Core* core)
|
||||
: ExeUnit("LSU")
|
||||
, core_(core)
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_dcache_(LSUQ_SIZE)
|
||||
, fence_lock_(false)
|
||||
{}
|
||||
|
||||
void LsuUnit::handleCacheReponse(const MemRsp& response, uint32_t port_id) {
|
||||
auto entry = pending_dcache_.at(response.tag);
|
||||
entry.second.reset(port_id); // track remaining blocks
|
||||
if (!entry.second.any()) {
|
||||
auto latency = (SimPlatform::instance().cycles() - entry.first.dcache_latency);
|
||||
entry.first.dcache_latency = latency;
|
||||
this->schedule_output(entry.first, 1);
|
||||
pending_dcache_.release(response.tag);
|
||||
}
|
||||
}
|
||||
|
||||
void LsuUnit::step() {
|
||||
if (fence_lock_) {
|
||||
// wait for all pending memory operations to complete
|
||||
if (!pending_dcache_.empty())
|
||||
return;
|
||||
this->schedule_output(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
if (inputs_.empty())
|
||||
return;
|
||||
|
||||
auto state = inputs_.top();
|
||||
|
||||
if (state.lsu.fence) {
|
||||
// schedule fence lock
|
||||
fence_state_ = state;
|
||||
fence_lock_ = true;
|
||||
inputs_.pop();
|
||||
return;
|
||||
}
|
||||
|
||||
// send dcache requests
|
||||
if (!pending_dcache_.full()) {
|
||||
state.dcache_latency = SimPlatform::instance().cycles();
|
||||
auto tag = pending_dcache_.allocate({state, state.tmask});
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
if (!state.tmask.test(t))
|
||||
continue;
|
||||
MemReq mem_req;
|
||||
mem_req.addr = state.mem_addrs.at(t);
|
||||
mem_req.write = state.lsu.store;
|
||||
mem_req.tag = tag;
|
||||
core_->dcache_->CoreReqPorts.at(t).send(mem_req, 1);
|
||||
}
|
||||
inputs_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
AluUnit::AluUnit(Core*) : ExeUnit("ALU") {}
|
||||
|
||||
void AluUnit::step() {
|
||||
pipeline_state_t state;
|
||||
if (!inputs_.try_pop(&state))
|
||||
return;
|
||||
switch (state.alu.type) {
|
||||
case AluType::ARITH:
|
||||
this->schedule_output(state, 1);
|
||||
break;
|
||||
case AluType::BRANCH:
|
||||
this->schedule_output(state, 1);
|
||||
break;
|
||||
case AluType::IMUL:
|
||||
this->schedule_output(state, LATENCY_IMUL);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
this->schedule_output(state, XLEN);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CsrUnit::CsrUnit(Core*) : ExeUnit("CSR") {}
|
||||
|
||||
void CsrUnit::step() {
|
||||
pipeline_state_t state;
|
||||
if (!inputs_.try_pop(&state))
|
||||
return;
|
||||
this->schedule_output(state, 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(Core*) : ExeUnit("FPU") {}
|
||||
|
||||
void FpuUnit::step() {
|
||||
pipeline_state_t state;
|
||||
if (!inputs_.try_pop(&state))
|
||||
return;
|
||||
switch (state.fpu.type) {
|
||||
case FpuType::FNCP:
|
||||
this->schedule_output(state, 1);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
this->schedule_output(state, LATENCY_FMA);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
this->schedule_output(state, LATENCY_FDIV);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
this->schedule_output(state, LATENCY_FSQRT);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
this->schedule_output(state, LATENCY_FCVT);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
GpuUnit::GpuUnit(Core*) : ExeUnit("GPU") {}
|
||||
|
||||
void GpuUnit::step() {
|
||||
pipeline_state_t state;
|
||||
if (!inputs_.try_pop(&state))
|
||||
return;
|
||||
switch (state.gpu.type) {
|
||||
case GpuType::TMC:
|
||||
case GpuType::WSPAWN:
|
||||
case GpuType::SPLIT:
|
||||
case GpuType::JOIN:
|
||||
case GpuType::BAR:
|
||||
this->schedule_output(state, 1);
|
||||
break;
|
||||
case GpuType::TEX:
|
||||
/* TODO */
|
||||
break;
|
||||
}
|
||||
}
|
||||
103
sim/simX/exeunit.h
Normal file
103
sim/simX/exeunit.h
Normal file
@@ -0,0 +1,103 @@
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
|
||||
class ExeUnit {
|
||||
protected:
|
||||
const char* name_;
|
||||
Queue<pipeline_state_t> inputs_;
|
||||
Queue<pipeline_state_t> outputs_;
|
||||
|
||||
void schedule_output(const pipeline_state_t& state, uint32_t delay) {
|
||||
if (delay > 1) {
|
||||
SimPlatform::instance().schedule(
|
||||
[&](const pipeline_state_t& req) {
|
||||
outputs_.push(req);
|
||||
},
|
||||
state,
|
||||
(delay - 1)
|
||||
);
|
||||
} else {
|
||||
outputs_.push(state);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
typedef std::shared_ptr<ExeUnit> Ptr;
|
||||
|
||||
ExeUnit(const char* name) : name_(name) {}
|
||||
|
||||
virtual ~ExeUnit() {}
|
||||
|
||||
void push_input(const pipeline_state_t& state) {
|
||||
inputs_.push(state);
|
||||
}
|
||||
|
||||
bool pop_output(pipeline_state_t* state) {
|
||||
return outputs_.try_pop(state);
|
||||
}
|
||||
|
||||
virtual void step() = 0;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LsuUnit : public ExeUnit {
|
||||
private:
|
||||
Core* core_;
|
||||
uint32_t num_threads_;
|
||||
HashTable<std::pair<pipeline_state_t, ThreadMask>> pending_dcache_;
|
||||
pipeline_state_t fence_state_;
|
||||
bool fence_lock_;
|
||||
|
||||
public:
|
||||
LsuUnit(Core*);
|
||||
|
||||
void handleCacheReponse(const MemRsp& response, uint32_t port_id);
|
||||
|
||||
void step();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class AluUnit : public ExeUnit {
|
||||
public:
|
||||
AluUnit(Core*);
|
||||
|
||||
void step();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CsrUnit : public ExeUnit {
|
||||
public:
|
||||
CsrUnit(Core*);
|
||||
|
||||
void step();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class FpuUnit : public ExeUnit {
|
||||
public:
|
||||
FpuUnit(Core*);
|
||||
|
||||
void step();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class GpuUnit : public ExeUnit {
|
||||
public:
|
||||
GpuUnit(Core*);
|
||||
|
||||
void step();
|
||||
};
|
||||
|
||||
}
|
||||
39
sim/simX/ibuffer.h
Normal file
39
sim/simX/ibuffer.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class IBuffer {
|
||||
private:
|
||||
std::queue<pipeline_state_t> entries_;
|
||||
uint32_t capacity_;
|
||||
|
||||
public:
|
||||
IBuffer(uint32_t size)
|
||||
: capacity_(size)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return entries_.empty();
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (entries_.size() == capacity_);
|
||||
}
|
||||
|
||||
const pipeline_state_t& top() const {
|
||||
return entries_.front();
|
||||
}
|
||||
|
||||
void push(const pipeline_state_t& state) {
|
||||
entries_.emplace(state);
|
||||
}
|
||||
|
||||
void pop() {
|
||||
return entries_.pop();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -113,15 +113,12 @@ private:
|
||||
int num_rsrcs_;
|
||||
bool has_imm_;
|
||||
int rdest_type_;
|
||||
int isrc_mask_;
|
||||
int fsrc_mask_;
|
||||
int vsrc_mask_;
|
||||
Word imm_;
|
||||
int rsrc_type_[MAX_REG_SOURCES];
|
||||
int rsrc_[MAX_REG_SOURCES];
|
||||
int rdest_;
|
||||
Word func3_;
|
||||
Word func7_;
|
||||
Word func6_;
|
||||
|
||||
//Vector
|
||||
Word vmask_;
|
||||
@@ -132,7 +129,7 @@ private:
|
||||
Word vlmul_;
|
||||
Word vsew_;
|
||||
Word vediv_;
|
||||
Word func6_;
|
||||
Word func7_;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Instr&);
|
||||
};
|
||||
|
||||
@@ -5,28 +5,26 @@
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "core.h"
|
||||
#include "processor.h"
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int ret;
|
||||
|
||||
std::string archString("rv32imf");
|
||||
std::string archStr("rv32imf");
|
||||
std::string imgFileName;
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
std::string imgFileName;
|
||||
int num_threads(NUM_THREADS);
|
||||
bool showHelp(false);
|
||||
bool showStats(false);
|
||||
bool riscv_test(false);
|
||||
|
||||
/* Read the command line arguments. */
|
||||
CommandLineArgFlag fh("-h", "--help", "", showHelp);
|
||||
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archString);
|
||||
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archStr);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
|
||||
@@ -48,62 +46,18 @@ int main(int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ArchDef arch(archString, num_cores, num_warps, num_threads);
|
||||
|
||||
Decoder decoder(arch);
|
||||
MemoryUnit mu(0, arch.wsize(), true);
|
||||
std::cout << "Running " << imgFileName << "..." << std::endl;
|
||||
|
||||
RAM ram((1<<12), (1<<20));
|
||||
|
||||
std::string program_ext(fileExtension(imgFileName.c_str()));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(imgFileName.c_str(), STARTUP_ADDR);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(imgFileName.c_str());
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
if (!SimPlatform::instance().initialize())
|
||||
return -1;
|
||||
}
|
||||
|
||||
mu.attach(ram, 0, 0xFFFFFFFF);
|
||||
{
|
||||
ArchDef arch(archStr, num_cores, num_warps, num_threads);
|
||||
Processor processor(arch);
|
||||
ret = processor.run(imgFileName, riscv_test, showStats);
|
||||
}
|
||||
|
||||
struct stat hello;
|
||||
fstat(0, &hello);
|
||||
SimPlatform::instance().finalize();
|
||||
|
||||
std::vector<std::shared_ptr<Core>> cores(num_cores);
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
running = false;
|
||||
for (auto& core : cores) {
|
||||
core->step();
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_ebreak()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
return ret;
|
||||
}
|
||||
|
||||
58
sim/simX/memsim.cpp
Normal file
58
sim/simX/memsim.cpp
Normal file
@@ -0,0 +1,58 @@
|
||||
#include "memsim.h"
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "constants.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class MemSim::Impl {
|
||||
private:
|
||||
MemSim* simobject_;
|
||||
std::vector<std::queue<MemReq>> inputs_;
|
||||
uint32_t latency_;
|
||||
|
||||
public:
|
||||
Impl(MemSim* simobject, uint32_t num_banks, uint32_t latency)
|
||||
: simobject_(simobject)
|
||||
, inputs_(num_banks)
|
||||
, latency_(latency)
|
||||
{}
|
||||
|
||||
void handleMemRequest(const MemReq& mem_req, uint32_t port_id) {
|
||||
inputs_.at(port_id).push(mem_req);
|
||||
}
|
||||
|
||||
void step(uint64_t /*cycle*/) {
|
||||
for (uint32_t i = 0, n = inputs_.size(); i < n; ++i) {
|
||||
auto& queue = inputs_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto& entry = queue.front();
|
||||
if (!entry.write) {
|
||||
MemRsp mem_rsp;
|
||||
mem_rsp.tag = entry.tag;
|
||||
simobject_->MemRspPorts.at(i).send(mem_rsp, latency_);
|
||||
}
|
||||
queue.pop();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemSim::MemSim(const SimContext& ctx,
|
||||
uint32_t num_banks,
|
||||
uint32_t latency)
|
||||
: SimObject<MemSim>(ctx, "MemSim")
|
||||
, impl_(new Impl(this, num_banks, latency))
|
||||
, MemReqPorts(num_banks, {this, impl_, &Impl::handleMemRequest})
|
||||
, MemRspPorts(num_banks, this)
|
||||
{}
|
||||
|
||||
MemSim::~MemSim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void MemSim::step(uint64_t cycle) {
|
||||
impl_->step(cycle);
|
||||
}
|
||||
35
sim/simX/memsim.h
Normal file
35
sim/simX/memsim.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct MemReq {
|
||||
uint64_t addr;
|
||||
uint32_t tag;
|
||||
bool write;
|
||||
};
|
||||
|
||||
struct MemRsp {
|
||||
uint32_t tag;
|
||||
};
|
||||
|
||||
class MemSim : public SimObject<MemSim>{
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
|
||||
public:
|
||||
|
||||
MemSim(const SimContext& ctx, uint32_t num_inputs, uint32_t latency);
|
||||
~MemSim();
|
||||
|
||||
void step(uint64_t cycle);
|
||||
|
||||
std::vector<SlavePort<MemReq>> MemReqPorts;
|
||||
std::vector<MasterPort<MemRsp>> MemRspPorts;
|
||||
};
|
||||
|
||||
};
|
||||
@@ -1,63 +0,0 @@
|
||||
#include <iostream>
|
||||
#include "pipeline.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Pipeline& pipeline) {
|
||||
os << pipeline.name_ << ": valid=" << pipeline.valid << std::endl;
|
||||
os << pipeline.name_ << ": stalled=" << pipeline.stalled << std::endl;
|
||||
os << pipeline.name_ << ": stall_warp=" << pipeline.stall_warp << std::endl;
|
||||
os << pipeline.name_ << ": wid=" << pipeline.wid << std::endl;
|
||||
os << pipeline.name_ << ": PC=" << std::hex << pipeline.PC << std::endl;
|
||||
os << pipeline.name_ << ": used_iregs=" << pipeline.used_iregs << std::endl;
|
||||
os << pipeline.name_ << ": used_fregs=" << pipeline.used_fregs << std::endl;
|
||||
os << pipeline.name_ << ": used_vregs=" << pipeline.used_vregs << std::endl;
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline::Pipeline(const char* name)
|
||||
: name_(name) {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void Pipeline::clear() {
|
||||
valid = false;
|
||||
stalled = false;
|
||||
stall_warp = false;
|
||||
wid = 0;
|
||||
PC = 0;
|
||||
used_iregs.reset();
|
||||
used_fregs.reset();
|
||||
used_vregs.reset();
|
||||
}
|
||||
|
||||
bool Pipeline::enter(Pipeline *drain) {
|
||||
if (drain) {
|
||||
if (drain->stalled) {
|
||||
this->stalled = true;
|
||||
return false;
|
||||
}
|
||||
drain->valid = false;
|
||||
}
|
||||
this->stalled = false;
|
||||
if (!this->valid)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Pipeline::next(Pipeline *drain) {
|
||||
if (drain) {
|
||||
drain->valid = this->valid;
|
||||
drain->stalled = this->stalled;
|
||||
drain->stall_warp = this->stall_warp;
|
||||
drain->wid = this->wid;
|
||||
drain->PC = this->PC;
|
||||
drain->rdest = this->rdest;
|
||||
drain->rdest_type = this->rdest_type;
|
||||
drain->used_iregs = this->used_iregs;
|
||||
drain->used_fregs = this->used_fregs;
|
||||
drain->used_vregs = this->used_vregs;
|
||||
}
|
||||
}
|
||||
@@ -2,47 +2,75 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Instr;
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
Pipeline(const char* name);
|
||||
|
||||
void clear();
|
||||
|
||||
bool enter(Pipeline* drain);
|
||||
|
||||
void next(Pipeline* drain);
|
||||
|
||||
//--
|
||||
bool valid;
|
||||
|
||||
//--
|
||||
bool stalled;
|
||||
bool stall_warp;
|
||||
|
||||
struct pipeline_state_t {
|
||||
//--
|
||||
int wid;
|
||||
Word PC;
|
||||
int wid;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
int rdest_type;
|
||||
int rdest;
|
||||
RegMask used_iregs;
|
||||
RegMask used_fregs;
|
||||
RegMask used_vregs;
|
||||
bool stall_warp;
|
||||
int rdest_type;
|
||||
int rdest;
|
||||
RegMask used_iregs;
|
||||
RegMask used_fregs;
|
||||
RegMask used_vregs;
|
||||
|
||||
private:
|
||||
//-
|
||||
ExeType exe_type;
|
||||
std::vector<uint64_t> mem_addrs;
|
||||
|
||||
//--
|
||||
union {
|
||||
struct {
|
||||
uint8_t load : 1;
|
||||
uint8_t store: 1;
|
||||
uint8_t fence : 1;
|
||||
uint8_t prefetch: 1;
|
||||
} lsu;
|
||||
struct {
|
||||
AluType type;
|
||||
} alu;
|
||||
struct {
|
||||
FpuType type;
|
||||
} fpu;
|
||||
struct {
|
||||
GpuType type;
|
||||
} gpu;
|
||||
};
|
||||
|
||||
// stats
|
||||
uint64_t icache_latency;
|
||||
uint64_t dcache_latency;
|
||||
};
|
||||
|
||||
class PipelineStage : public Queue<pipeline_state_t> {
|
||||
protected:
|
||||
const char* name_;
|
||||
friend std::ostream &operator<<(std::ostream &, const pipeline_state_t&);
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Pipeline&);
|
||||
};
|
||||
public:
|
||||
PipelineStage(const char* name = nullptr)
|
||||
: name_(name)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const pipeline_state_t& state) {
|
||||
os << "stall_warp=" << state.stall_warp;
|
||||
os << ", wid=" << state.wid;
|
||||
os << ", PC=" << std::hex << state.PC;
|
||||
os << ", used_iregs=" << state.used_iregs;
|
||||
os << ", used_fregs=" << state.used_fregs;
|
||||
os << ", used_vregs=" << state.used_vregs;
|
||||
os << std::endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
}
|
||||
189
sim/simX/processor.h
Normal file
189
sim/simX/processor.h
Normal file
@@ -0,0 +1,189 @@
|
||||
#pragma once
|
||||
|
||||
#include "constants.h"
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "core.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Processor {
|
||||
private:
|
||||
ArchDef arch_;
|
||||
Decoder decoder_;
|
||||
MemoryUnit mu_;
|
||||
RAM ram_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Cache::Ptr> l2caches_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
||||
Cache::Ptr l3cache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
||||
MemSim::Ptr memsim_;
|
||||
|
||||
public:
|
||||
Processor(const ArchDef& arch)
|
||||
: arch_(arch)
|
||||
, decoder_(arch)
|
||||
, mu_(0, arch.wsize(), true)
|
||||
, ram_((1<<12), (1<<20))
|
||||
, cores_(arch.num_cores())
|
||||
, l2caches_(NUM_CLUSTERS)
|
||||
, l2_mem_switches_(NUM_CLUSTERS)
|
||||
{
|
||||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
|
||||
// bind RAM to memory unit
|
||||
mu_.attach(ram_, 0, 0xFFFFFFFF);
|
||||
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
cores_.at(i) = Core::Create(arch, decoder_, mu_, i);
|
||||
}
|
||||
|
||||
// connect memory sub-systen
|
||||
memsim_ = MemSim::Create(1, MEM_LATENCY);
|
||||
std::vector<SlavePort<MemReq>*> mem_req_ports(1);
|
||||
std::vector<MasterPort<MemRsp>*> mem_rsp_ports(1);
|
||||
mem_req_ports.at(0) = &memsim_->MemReqPorts.at(0);
|
||||
mem_rsp_ports.at(0) = &memsim_->MemRspPorts.at(0);
|
||||
|
||||
if (L3_ENABLE) {
|
||||
l3cache_ = Cache::Create("l3cache", CacheConfig{
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
L3_NUM_PORTS, // number of ports
|
||||
NUM_CLUSTERS, // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
||||
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
}
|
||||
} else if (NUM_CLUSTERS > 1) {
|
||||
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
||||
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
||||
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", CacheConfig{
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
NUM_CORES, // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
|
||||
mem_req_ports.resize(cores_per_cluster);
|
||||
mem_rsp_ports.resize(cores_per_cluster);
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
}
|
||||
} else if (cores_per_cluster > 1) {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, NUM_CORES);
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
|
||||
mem_req_ports.resize(cores_per_cluster);
|
||||
mem_rsp_ports.resize(cores_per_cluster);
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * NUM_CLUSTERS) + j);
|
||||
mem_rsp_ports.at(i)->bind(&core->MemRspPort);
|
||||
core->MemReqPort.bind(mem_req_ports.at(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~Processor() {}
|
||||
|
||||
int run(const std::string& program, bool riscv_test, bool /*showStats*/) {
|
||||
{
|
||||
std::string program_ext(fileExtension(program.c_str()));
|
||||
if (program_ext == "bin") {
|
||||
ram_.loadBinImage(program.c_str(), STARTUP_ADDR);
|
||||
} else if (program_ext == "hex") {
|
||||
ram_.loadHexImage(program.c_str());
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().step();
|
||||
|
||||
running = false;
|
||||
for (auto& core : cores_) {
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_ebreak()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
|
||||
// get error status
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
71
sim/simX/scoreboard.h
Normal file
71
sim/simX/scoreboard.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Scoreboard {
|
||||
private:
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::vector<RegMask> in_use_vregs_;
|
||||
|
||||
public:
|
||||
Scoreboard(const ArchDef &arch)
|
||||
: in_use_iregs_(arch.num_warps())
|
||||
, in_use_fregs_(arch.num_warps())
|
||||
, in_use_vregs_(arch.num_warps())
|
||||
{
|
||||
for (int w = 0; w < arch.num_warps(); ++w) {
|
||||
in_use_iregs_.at(w).reset();
|
||||
in_use_fregs_.at(w).reset();
|
||||
in_use_vregs_.at(w).reset();
|
||||
}
|
||||
}
|
||||
|
||||
bool in_use(const pipeline_state_t& state) const {
|
||||
return (state.used_iregs & in_use_iregs_.at(state.wid)) != 0
|
||||
|| (state.used_fregs & in_use_fregs_.at(state.wid)) != 0
|
||||
|| (state.used_vregs & in_use_vregs_.at(state.wid)) != 0;
|
||||
}
|
||||
|
||||
void reserve(const pipeline_state_t& state) {
|
||||
if (!state.rdest)
|
||||
return;
|
||||
|
||||
switch (state.rdest_type) {
|
||||
case 1:
|
||||
in_use_iregs_.at(state.wid).set(state.rdest);
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_.at(state.wid).set(state.rdest);
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_.at(state.wid).set(state.rdest);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void release(const pipeline_state_t& state) {
|
||||
if (!state.rdest)
|
||||
return;
|
||||
switch (state.rdest_type) {
|
||||
case 1:
|
||||
in_use_iregs_.at(state.wid).reset(state.rdest);
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_.at(state.wid).reset(state.rdest);
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_.at(state.wid).reset(state.rdest);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
240
sim/simX/types.h
240
sim/simX/types.h
@@ -2,7 +2,10 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <bitset>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <VX_config.h>
|
||||
#include <simobject.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
@@ -14,9 +17,242 @@ typedef uint32_t Addr;
|
||||
typedef uint32_t Size;
|
||||
|
||||
typedef std::bitset<32> RegMask;
|
||||
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
|
||||
typedef std::bitset<32> WarpMask;
|
||||
|
||||
enum class ExeType {
|
||||
ALU,
|
||||
LSU,
|
||||
CSR,
|
||||
FPU,
|
||||
GPU,
|
||||
MAX,
|
||||
};
|
||||
|
||||
enum class AluType {
|
||||
ARITH,
|
||||
BRANCH,
|
||||
IMUL,
|
||||
IDIV,
|
||||
};
|
||||
|
||||
enum class FpuType {
|
||||
FNCP,
|
||||
FMA,
|
||||
FDIV,
|
||||
FSQRT,
|
||||
FCVT,
|
||||
};
|
||||
|
||||
enum class GpuType {
|
||||
TMC,
|
||||
WSPAWN,
|
||||
SPLIT,
|
||||
JOIN,
|
||||
BAR,
|
||||
TEX,
|
||||
};
|
||||
|
||||
enum class ArbiterType {
|
||||
Priority,
|
||||
RoundRobin
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
class Queue {
|
||||
protected:
|
||||
std::queue<T> queue_;
|
||||
|
||||
public:
|
||||
Queue() {}
|
||||
|
||||
bool empty() const {
|
||||
return queue_.empty();
|
||||
}
|
||||
|
||||
const T& top() const {
|
||||
return queue_.front();
|
||||
}
|
||||
|
||||
void push(const T& value) {
|
||||
queue_.push(value);
|
||||
}
|
||||
|
||||
void pop() {
|
||||
queue_.pop();
|
||||
}
|
||||
|
||||
bool try_pop(T* value) {
|
||||
if (queue_.empty())
|
||||
return false;
|
||||
*value = queue_.front();
|
||||
queue_.pop();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
class HashTable {
|
||||
private:
|
||||
std::vector<std::pair<bool, T>> entries_;
|
||||
uint32_t capacity_;
|
||||
|
||||
public:
|
||||
HashTable(uint32_t size)
|
||||
: entries_(size)
|
||||
, capacity_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == capacity_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (capacity_ == entries_.size());
|
||||
}
|
||||
|
||||
bool contains(uint32_t index) const {
|
||||
return entries_.at(index).first;
|
||||
}
|
||||
|
||||
const T& at(uint32_t index) const {
|
||||
auto& entry = entries_.at(index);
|
||||
assert(entry.first);
|
||||
return entry.second;
|
||||
}
|
||||
|
||||
T& at(uint32_t index) {
|
||||
auto& entry = entries_.at(index);
|
||||
assert(entry.first);
|
||||
return entry.second;
|
||||
}
|
||||
|
||||
uint32_t allocate(const T& value) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (!entry.first) {
|
||||
entry.first = true;
|
||||
entry.second = value;
|
||||
++capacity_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void release(uint32_t index) {
|
||||
auto& entry = entries_.at(index);
|
||||
assert(entry.first);
|
||||
entry.first = false;
|
||||
}
|
||||
|
||||
void remove(uint32_t index, T* value) {
|
||||
auto& entry = entries_.at(index);
|
||||
assert(entry.first);
|
||||
*value = entry.second;
|
||||
entry.first = false;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp, uint32_t MaxInputs = 32>
|
||||
class Switch : public SimObject<Switch<Req, Rsp>> {
|
||||
private:
|
||||
struct req_t {
|
||||
std::vector<Req> data;
|
||||
std::bitset<MaxInputs> valid;
|
||||
req_t() {}
|
||||
req_t(uint32_t size) : data(size) {}
|
||||
};
|
||||
|
||||
void handleIncomingRequest(const Req& req, uint32_t port_id) {
|
||||
cur_req_.data.at(port_id) = req;
|
||||
cur_req_.valid.set(port_id);
|
||||
}
|
||||
|
||||
void handleIncomingResponse(const Rsp& rsp, uint32_t) {
|
||||
rsps_.push(rsp);
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
std::queue<req_t> reqs_;
|
||||
std::queue<Rsp> rsps_;
|
||||
req_t cur_req_;
|
||||
uint32_t delay_;
|
||||
uint32_t cursor_;
|
||||
std::unordered_map<uint32_t, uint32_t> addr_table_;
|
||||
|
||||
public:
|
||||
Switch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<Switch<Req, Rsp, MaxInputs>>(ctx, name)
|
||||
, type_(type)
|
||||
, cur_req_(num_inputs)
|
||||
, delay_(delay)
|
||||
, cursor_(0)
|
||||
, ReqIn(num_inputs, {this, this, &Switch<Req, Rsp, MaxInputs>::handleIncomingRequest})
|
||||
, ReqOut(this)
|
||||
, RspIn(this, this, &Switch<Req, Rsp, MaxInputs>::handleIncomingResponse)
|
||||
, RspOut(num_inputs, this)
|
||||
{
|
||||
assert(delay_ != 0);
|
||||
assert(num_inputs <= MaxInputs);
|
||||
}
|
||||
|
||||
void step(uint64_t /*cycle*/) {
|
||||
if (cur_req_.valid.any()) {
|
||||
reqs_.push(cur_req_);
|
||||
cur_req_.valid.reset();
|
||||
}
|
||||
|
||||
while (!reqs_.empty()) {
|
||||
auto& entry = reqs_.front();
|
||||
bool found = false;
|
||||
for (uint32_t i = 0, n = entry.data.size(); i < n; ++i) {
|
||||
auto j = (cursor_ + i) % n;
|
||||
if (entry.valid.test(j)) {
|
||||
auto& req = entry.data.at(j);
|
||||
addr_table_[req.tag] = j;
|
||||
ReqOut.send(req, delay_);
|
||||
entry.valid.reset(j);
|
||||
this->update_cursor(j);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
break;
|
||||
reqs_.pop();
|
||||
}
|
||||
|
||||
if (!rsps_.empty()) {
|
||||
auto& rsp = rsps_.front();
|
||||
auto port_id = addr_table_.at(rsp.tag);
|
||||
RspOut.at(port_id).send(rsp, 1);
|
||||
rsps_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void update_cursor(uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursor_ = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SlavePort<Req>> ReqIn;
|
||||
MasterPort<Req> ReqOut;
|
||||
SlavePort<Rsp> RspIn;
|
||||
std::vector<MasterPort<Rsp>> RspOut;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -12,25 +12,21 @@ using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, Word id)
|
||||
: id_(id)
|
||||
, core_(core) {
|
||||
, core_(core)
|
||||
, active_(false)
|
||||
, PC_(STARTUP_ADDR)
|
||||
, tmask_(0) {
|
||||
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void Warp::clear() {
|
||||
PC_ = STARTUP_ADDR;
|
||||
tmask_.reset();
|
||||
active_ = false;
|
||||
}
|
||||
|
||||
void Warp::step(Pipeline *pipeline) {
|
||||
void Warp::eval(pipeline_state_t *pipeline_state) {
|
||||
assert(tmask_.any());
|
||||
|
||||
DPH(2, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_ << ", tmask=");
|
||||
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
DPN(2, tmask_[n-i-1]);
|
||||
DPN(2, tmask_.test(n-i-1));
|
||||
DPN(2, "\n");
|
||||
|
||||
/* Fetch and decode. */
|
||||
@@ -38,55 +34,24 @@ void Warp::step(Pipeline *pipeline) {
|
||||
Word fetched = core_->icache_fetch(PC_);
|
||||
auto instr = core_->decoder().decode(fetched, PC_);
|
||||
|
||||
// Update pipeline
|
||||
pipeline->valid = true;
|
||||
pipeline->PC = PC_;
|
||||
pipeline->rdest = instr->getRDest();
|
||||
pipeline->rdest_type = instr->getRDType();
|
||||
pipeline->used_iregs.reset();
|
||||
pipeline->used_fregs.reset();
|
||||
pipeline->used_vregs.reset();
|
||||
|
||||
switch (pipeline->rdest_type) {
|
||||
case 1:
|
||||
pipeline->used_iregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
case 2:
|
||||
pipeline->used_fregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
case 3:
|
||||
pipeline->used_vregs[pipeline->rdest] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < instr->getNRSrc(); ++i) {
|
||||
int type = instr->getRSType(i);
|
||||
int reg = instr->getRSrc(i);
|
||||
switch (type) {
|
||||
case 1:
|
||||
pipeline->used_iregs[reg] = 1;
|
||||
break;
|
||||
case 2:
|
||||
pipeline->used_fregs[reg] = 1;
|
||||
break;
|
||||
case 3:
|
||||
pipeline->used_vregs[reg] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Update state
|
||||
pipeline_state->wid = id_;
|
||||
pipeline_state->PC = PC_;
|
||||
pipeline_state->tmask = tmask_;
|
||||
pipeline_state->rdest = instr->getRDest();
|
||||
pipeline_state->rdest_type = instr->getRDType();
|
||||
pipeline_state->used_iregs.reset();
|
||||
pipeline_state->used_fregs.reset();
|
||||
pipeline_state->used_vregs.reset();
|
||||
|
||||
// Execute
|
||||
this->execute(*instr, pipeline);
|
||||
this->execute(*instr, pipeline_state);
|
||||
|
||||
D(4, "Register state:");
|
||||
for (int i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (int j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace vortex {
|
||||
|
||||
class Core;
|
||||
class Instr;
|
||||
class Pipeline;
|
||||
class pipeline_state_t;
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
@@ -41,8 +41,6 @@ struct vtype {
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, Word id);
|
||||
|
||||
void clear();
|
||||
|
||||
bool active() const {
|
||||
return active_;
|
||||
@@ -71,7 +69,7 @@ public:
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_[index] = value;
|
||||
tmask_.set(index, value);
|
||||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
@@ -82,18 +80,18 @@ public:
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
return iRegFile_[0][reg];
|
||||
return iRegFile_.at(0).at(reg);
|
||||
}
|
||||
|
||||
void step(Pipeline *);
|
||||
void eval(pipeline_state_t *);
|
||||
|
||||
private:
|
||||
|
||||
void execute(const Instr &instr, Pipeline *);
|
||||
void execute(const Instr &instr, pipeline_state_t *pipeline_state);
|
||||
|
||||
Word id_;
|
||||
bool active_;
|
||||
Core *core_;
|
||||
bool active_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
Reference in New Issue
Block a user