// Copyright © 2019-2023 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include #include #include #include "uuid_gen.h" #include "debug.h" namespace vortex { typedef uint8_t Byte; #if (XLEN == 32) typedef uint32_t Word; typedef int32_t WordI; typedef uint64_t DWord; typedef int64_t DWordI; typedef uint32_t WordF; #elif (XLEN == 64) typedef uint64_t Word; typedef int64_t WordI; typedef __uint128_t DWord; typedef __int128_t DWordI; typedef uint64_t WordF; #else #error unsupported XLEN #endif #define MAX_NUM_CORES 1024 #define MAX_NUM_THREADS 32 #define MAX_NUM_WARPS 32 #define MAX_NUM_REGS 32 typedef std::bitset CoreMask; typedef std::bitset RegMask; typedef std::bitset ThreadMask; typedef std::bitset WarpMask; typedef std::unordered_map CSRs; /////////////////////////////////////////////////////////////////////////////// enum class RegType { None, Integer, Float, Vector }; inline std::ostream &operator<<(std::ostream &os, const RegType& type) { switch (type) { case RegType::None: break; case RegType::Integer: os << "x"; break; case RegType::Float: os << "f"; break; case RegType::Vector: os << "v"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class ExeType { ALU, LSU, FPU, SFU, ExeTypeCount }; inline std::ostream &operator<<(std::ostream &os, const ExeType& type) { switch (type) { case ExeType::ALU: os << "ALU"; break; case ExeType::LSU: os << "LSU"; break; case ExeType::FPU: os << "FPU"; break; case ExeType::SFU: os << "SFU"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class AluType { ARITH, BRANCH, SYSCALL, IMUL, IDIV }; inline std::ostream &operator<<(std::ostream &os, const AluType& type) { switch (type) { case AluType::ARITH: os << "ARITH"; break; case AluType::BRANCH: os << "BRANCH"; break; case AluType::SYSCALL: os << "SYSCALL"; break; case AluType::IMUL: os << "IMUL"; break; case AluType::IDIV: os << "IDIV"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class LsuType { LOAD, STORE, FENCE }; inline std::ostream &operator<<(std::ostream &os, const LsuType& type) { switch (type) { case LsuType::LOAD: os << "LOAD"; break; case LsuType::STORE: os << "STORE"; break; case LsuType::FENCE: os << "FENCE"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class AddrType { Global, Shared, IO }; inline std::ostream &operator<<(std::ostream &os, const AddrType& type) { switch (type) { case AddrType::Global: os << "Global"; break; case AddrType::Shared: os << "Shared"; break; case AddrType::IO: os << "IO"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// struct mem_addr_size_t { uint64_t addr; uint32_t size; }; /////////////////////////////////////////////////////////////////////////////// enum class FpuType { FNCP, FMA, FDIV, FSQRT, FCVT }; inline std::ostream &operator<<(std::ostream &os, const FpuType& type) { switch (type) { case FpuType::FNCP: os << "FNCP"; break; case FpuType::FMA: os << "FMA"; break; case FpuType::FDIV: os << "FDIV"; break; case FpuType::FSQRT: os << "FSQRT"; break; case FpuType::FCVT: os << "FCVT"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class SfuType { TMC, WSPAWN, SPLIT, JOIN, BAR, PRED, CSRRW, CSRRS, CSRRC, CMOV }; inline std::ostream &operator<<(std::ostream &os, const SfuType& type) { switch (type) { case SfuType::TMC: os << "TMC"; break; case SfuType::WSPAWN: os << "WSPAWN"; break; case SfuType::SPLIT: os << "SPLIT"; break; case SfuType::JOIN: os << "JOIN"; break; case SfuType::BAR: os << "BAR"; break; case SfuType::PRED: os << "PRED"; break; case SfuType::CSRRW: os << "CSRRW"; break; case SfuType::CSRRS: os << "CSRRS"; break; case SfuType::CSRRC: os << "CSRRC"; break; case SfuType::CMOV: os << "CMOV"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// enum class ArbiterType { Priority, RoundRobin }; inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) { switch (type) { case ArbiterType::Priority: os << "Priority"; break; case ArbiterType::RoundRobin: os << "RoundRobin"; break; default: assert(false); } return os; } /////////////////////////////////////////////////////////////////////////////// struct MemReq { uint64_t addr; bool write; AddrType type; uint32_t tag; uint32_t cid; uint64_t uuid; MemReq(uint64_t _addr = 0, bool _write = false, AddrType _type = AddrType::Global, uint64_t _tag = 0, uint32_t _cid = 0, uint64_t _uuid = 0 ) : addr(_addr) , write(_write) , type(_type) , tag(_tag) , cid(_cid) , uuid(_uuid) {} }; inline std::ostream &operator<<(std::ostream &os, const MemReq& req) { os << "mem-" << (req.write ? "wr" : "rd") << ": "; os << "addr=0x" << std::hex << req.addr << ", type=" << req.type; os << std::dec << ", tag=" << req.tag << ", cid=" << req.cid; os << " (#" << std::dec << req.uuid << ")"; return os; } /////////////////////////////////////////////////////////////////////////////// struct MemRsp { uint64_t tag; uint32_t cid; uint64_t uuid; MemRsp(uint64_t _tag = 0, uint32_t _cid = 0, uint64_t _uuid = 0) : tag (_tag) , cid(_cid) , uuid(_uuid) {} }; inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) { os << "mem-rsp: tag=" << rsp.tag << ", cid=" << rsp.cid; os << " (#" << std::dec << rsp.uuid << ")"; return os; } /////////////////////////////////////////////////////////////////////////////// template class HashTable { public: HashTable(uint32_t capacity) : entries_(capacity) , size_(0) {} bool empty() const { return (0 == size_); } bool full() const { return (size_ == entries_.size()); } uint32_t size() const { return size_; } bool contains(uint32_t index) const { return entries_.at(index).first; } const T& at(uint32_t index) const { auto& entry = entries_.at(index); assert(entry.first); return entry.second; } T& at(uint32_t index) { auto& entry = entries_.at(index); assert(entry.first); return entry.second; } uint32_t allocate(const T& value) { for (uint32_t i = 0, n = entries_.size(); i < n; ++i) { auto& entry = entries_.at(i); if (!entry.first) { entry.first = true; entry.second = value; ++size_; return i; } } assert(false); return -1; } void release(uint32_t index) { auto& entry = entries_.at(index); assert(entry.first); entry.first = false; --size_; } void clear() { for (uint32_t i = 0, n = entries_.size(); i < n; ++i) { auto& entry = entries_.at(i); entry.first = false; } size_ = 0; } private: std::vector> entries_; uint32_t size_; }; /////////////////////////////////////////////////////////////////////////////// template class Mux : public SimObject> { public: std::vector> Inputs; std::vector> Outputs; Mux( const SimContext& ctx, const char* name, ArbiterType type, uint32_t num_inputs, uint32_t num_outputs = 1, uint32_t delay = 1 ) : SimObject>(ctx, name) , Inputs(num_inputs, this) , Outputs(num_outputs, this) , type_(type) , delay_(delay) , cursors_(num_outputs, 0) , num_reqs_(num_inputs / num_outputs) { assert(delay != 0); assert(num_inputs <= 32); assert(num_outputs <= 32); assert(num_inputs >= num_outputs); // bypass mode if (num_inputs == num_outputs) { for (uint32_t i = 0; i < num_inputs; ++i) { Inputs.at(i).bind(&Outputs.at(i)); } } } void reset() { for (auto& cursor : cursors_) { cursor = 0; } } void tick() { uint32_t I = Inputs.size(); uint32_t O = Outputs.size(); uint32_t R = num_reqs_; // skip bypass mode if (I == O) return; // process inputs for (uint32_t o = 0; o < O; ++o) { for (uint32_t r = 0; r < R; ++r) { uint32_t i = (cursors_.at(o) + r) & (R-1); uint32_t j = o * R + i; if (j >= I) continue; auto& req_in = Inputs.at(j); if (!req_in.empty()) { auto& req = req_in.front(); DT(4, this->name() << "-" << req); Outputs.at(o).send(req, delay_); req_in.pop(); this->update_cursor(o, i); break; } } } } private: void update_cursor(uint32_t index, uint32_t grant) { if (type_ == ArbiterType::RoundRobin) { cursors_.at(index) = grant + 1; } } ArbiterType type_; uint32_t delay_; std::vector cursors_; uint32_t num_reqs_; }; /////////////////////////////////////////////////////////////////////////////// template class Switch : public SimObject> { public: std::vector> ReqIn; std::vector> RspIn; std::vector> ReqOut; std::vector> RspOut; Switch( const SimContext& ctx, const char* name, ArbiterType type, uint32_t num_inputs, uint32_t num_outputs = 1, uint32_t delay = 1 ) : SimObject>(ctx, name) , ReqIn(num_inputs, this) , RspIn(num_inputs, this) , ReqOut(num_outputs, this) , RspOut(num_outputs, this) , type_(type) , delay_(delay) , cursors_(num_outputs, 0) , lg_num_reqs_(log2ceil(num_inputs / num_outputs)) { assert(delay != 0); assert(num_inputs <= 32); assert(num_outputs <= 32); assert(num_inputs >= num_outputs); // bypass mode if (num_inputs == num_outputs) { for (uint32_t i = 0; i < num_inputs; ++i) { ReqIn.at(i).bind(&ReqOut.at(i)); RspOut.at(i).bind(&RspIn.at(i)); } } } void reset() { for (auto& cursor : cursors_) { cursor = 0; } } void tick() { uint32_t I = ReqIn.size(); uint32_t O = ReqOut.size(); uint32_t R = 1 << lg_num_reqs_; // skip bypass mode if (I == O) return; // process incomming requests for (uint32_t o = 0; o < O; ++o) { for (uint32_t r = 0; r < R; ++r) { uint32_t i = (cursors_.at(o) + r) & (R-1); uint32_t j = o * R + i; if (j >= I) continue; auto& req_in = ReqIn.at(j); if (!req_in.empty()) { auto& req = req_in.front(); if (lg_num_reqs_ != 0) { req.tag = (req.tag << lg_num_reqs_) | i; } DT(4, this->name() << "-" << req); ReqOut.at(o).send(req, delay_); req_in.pop(); this->update_cursor(o, i); break; } } // process incoming reponses if (!RspOut.at(o).empty()) { auto& rsp = RspOut.at(o).front(); uint32_t i = 0; if (lg_num_reqs_ != 0) { i = rsp.tag & (R-1); rsp.tag >>= lg_num_reqs_; } DT(4, this->name() << "-" << rsp); uint32_t j = o * R + i; RspIn.at(j).send(rsp, 1); RspOut.at(o).pop(); } } } void update_cursor(uint32_t index, uint32_t grant) { if (type_ == ArbiterType::RoundRobin) { cursors_.at(index) = grant + 1; } } private: ArbiterType type_; uint32_t delay_; std::vector cursors_; uint32_t lg_num_reqs_; }; /////////////////////////////////////////////////////////////////////////////// class SMemDemux : public SimObject { public: SimPort ReqIn; SimPort RspIn; SimPort ReqSM; SimPort RspSM; SimPort ReqDC; SimPort RspDC; SMemDemux( const SimContext& ctx, const char* name, uint32_t delay = 1 ) : SimObject(ctx, name) , ReqIn(this) , RspIn(this) , ReqSM(this) , RspSM(this) , ReqDC(this) , RspDC(this) , delay_(delay) {} void reset() {} void tick() { // process incoming reponses if (!RspSM.empty()) { auto& rsp = RspSM.front(); DT(4, this->name() << "-" << rsp); RspIn.send(rsp, 1); RspSM.pop(); } if (!RspDC.empty()) { auto& rsp = RspDC.front(); DT(4, this->name() << "-" << rsp); RspIn.send(rsp, 1); RspDC .pop(); } // process incomming requests if (!ReqIn.empty()) { auto& req = ReqIn.front(); DT(4, this->name() << "-" << req); if (req.type == AddrType::Shared) { ReqSM.send(req, delay_); } else { ReqDC.send(req, delay_); } ReqIn.pop(); } } private: uint32_t delay_; }; /////////////////////////////////////////////////////////////////////////////// using MemSwitch = Switch; }