adding tracking for SFU stalls
This commit is contained in:
@@ -18,20 +18,20 @@ using namespace vortex;
|
||||
Cluster::Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch, const
|
||||
DCRS &dcrs)
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "cluster")
|
||||
, mem_req_port(this)
|
||||
, mem_rsp_port(this)
|
||||
, cluster_id_(cluster_id)
|
||||
, sockets_(NUM_SOCKETS)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, processor_(processor)
|
||||
, sockets_(NUM_SOCKETS)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, cores_per_socket_(arch.socket_size())
|
||||
{
|
||||
char sname[100];
|
||||
|
||||
auto sockets_per_cluster = sockets_.size();
|
||||
uint32_t sockets_per_cluster = sockets_.size();
|
||||
|
||||
// create sockets
|
||||
|
||||
@@ -43,7 +43,10 @@ Cluster::Cluster(const SimContext& ctx,
|
||||
|
||||
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
|
||||
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
|
||||
auto socket = Socket::Create(socket_id, this, arch, dcrs);
|
||||
auto socket = Socket::Create(socket_id,
|
||||
this,
|
||||
arch,
|
||||
dcrs);
|
||||
|
||||
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
|
||||
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
|
||||
@@ -154,7 +157,7 @@ void Cluster::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
|
||||
}
|
||||
|
||||
Cluster::PerfStats Cluster::perf_stats() const {
|
||||
Cluster::PerfStats perf;
|
||||
perf.l2cache = l2cache_->perf_stats();
|
||||
return perf;
|
||||
PerfStats perf_stats;
|
||||
perf_stats.l2cache = l2cache_->perf_stats();
|
||||
return perf_stats;
|
||||
}
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "dcrs.h"
|
||||
#include "arch.h"
|
||||
#include "cache_cluster.h"
|
||||
#include "shared_mem.h"
|
||||
#include "core.h"
|
||||
#include "socket.h"
|
||||
#include "constants.h"
|
||||
@@ -27,13 +28,8 @@ class ProcessorImpl;
|
||||
|
||||
class Cluster : public SimObject<Cluster> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats l2cache;
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->l2cache += rhs.l2cache;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
SimPort<MemReq> mem_req_port;
|
||||
@@ -67,15 +63,15 @@ public:
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
Cluster::PerfStats perf_stats() const;
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
uint32_t cluster_id_;
|
||||
std::vector<Socket::Ptr> sockets_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
CacheSim::Ptr l2cache_;
|
||||
ProcessorImpl* processor_;
|
||||
uint32_t cores_per_socket_;
|
||||
uint32_t cluster_id_;
|
||||
ProcessorImpl* processor_;
|
||||
std::vector<Socket::Ptr> sockets_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
CacheSim::Ptr l2cache_;
|
||||
uint32_t cores_per_socket_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
@@ -28,13 +28,18 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &arch, const DCRS &dcrs)
|
||||
Core::Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Socket* socket,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "core")
|
||||
, icache_req_ports(1, this)
|
||||
, icache_rsp_ports(1, this)
|
||||
, dcache_req_ports(NUM_LSU_LANES, this)
|
||||
, dcache_rsp_ports(NUM_LSU_LANES, this)
|
||||
, core_id_(core_id)
|
||||
, socket_(socket)
|
||||
, arch_(arch)
|
||||
, dcrs_(dcrs)
|
||||
, decoder_(arch)
|
||||
@@ -42,7 +47,7 @@ Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, fcsrs_(arch.num_warps(), 0)
|
||||
, ibuffers_(arch.num_warps(), IBUF_SIZE)
|
||||
, scoreboard_(arch_)
|
||||
, scoreboard_(arch_)
|
||||
, operands_(ISSUE_WIDTH)
|
||||
, dispatchers_((uint32_t)ExeType::ExeTypeCount)
|
||||
, exe_units_((uint32_t)ExeType::ExeTypeCount)
|
||||
@@ -50,8 +55,7 @@ Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &
|
||||
, fetch_latch_("fetch")
|
||||
, decode_latch_("decode")
|
||||
, pending_icache_(arch_.num_warps())
|
||||
, csrs_(arch.num_warps())
|
||||
, socket_(socket)
|
||||
, csrs_(arch.num_warps())
|
||||
, commit_arbs_(ISSUE_WIDTH)
|
||||
{
|
||||
char sname[100];
|
||||
@@ -69,6 +73,7 @@ Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &
|
||||
}
|
||||
|
||||
// initialize shared memory
|
||||
snprintf(sname, 100, "core%d-shared_mem", core_id);
|
||||
shared_mem_ = SharedMem::Create(sname, SharedMem::Config{
|
||||
(1 << SMEM_LOG_SIZE),
|
||||
sizeof(Word),
|
||||
@@ -77,17 +82,17 @@ Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &
|
||||
false
|
||||
});
|
||||
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
|
||||
snprintf(sname, 100, "smem_demux%d_%d", core_id, i);
|
||||
auto smem_demux = SMemDemux::Create(sname);
|
||||
|
||||
smem_demux->ReqDC.bind(&dcache_req_ports.at(i));
|
||||
dcache_rsp_ports.at(i).bind(&smem_demux->RspDC);
|
||||
snprintf(sname, 100, "core%d-smem_demux%d", core_id, i);
|
||||
auto smem_demux = SMemDemux::Create(sname);
|
||||
|
||||
smem_demux->ReqDC.bind(&dcache_req_ports.at(i));
|
||||
dcache_rsp_ports.at(i).bind(&smem_demux->RspDC);
|
||||
|
||||
smem_demux->ReqSM.bind(&shared_mem_->Inputs.at(i));
|
||||
shared_mem_->Outputs.at(i).bind(&smem_demux->RspSM);
|
||||
smem_demux->ReqSM.bind(&shared_mem_->Inputs.at(i));
|
||||
shared_mem_->Outputs.at(i).bind(&smem_demux->RspSM);
|
||||
|
||||
smem_demuxs_.at(i) = smem_demux;
|
||||
}
|
||||
smem_demuxs_.at(i) = smem_demux;
|
||||
}
|
||||
|
||||
// initialize dispatchers
|
||||
dispatchers_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_ALU_BLOCKS, NUM_ALU_LANES);
|
||||
@@ -103,7 +108,7 @@ Core::Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &
|
||||
|
||||
// bind commit arbiters
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
snprintf(sname, 100, "commit-arb%d", i);
|
||||
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
|
||||
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)ExeType::ExeTypeCount, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)ExeType::ExeTypeCount; ++j) {
|
||||
exe_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
|
||||
@@ -128,7 +133,7 @@ void Core::reset() {
|
||||
for (auto& exe_unit : exe_units_) {
|
||||
exe_unit->reset();
|
||||
}
|
||||
|
||||
|
||||
for (auto& commit_arb : commit_arbs_) {
|
||||
commit_arb->reset();
|
||||
}
|
||||
@@ -184,7 +189,7 @@ void Core::schedule() {
|
||||
}
|
||||
}
|
||||
if (scheduled_warp == -1) {
|
||||
++perf_stats_.sched_idles;
|
||||
++perf_stats_.sched_idle;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -229,7 +234,7 @@ void Core::fetch() {
|
||||
mem_req.uuid = trace->uuid;
|
||||
icache_req_ports.at(0).send(mem_req, 2);
|
||||
DT(3, "icache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
|
||||
fetch_latch_.pop();
|
||||
fetch_latch_.pop();
|
||||
++perf_stats_.ifetches;
|
||||
++pending_ifetches_;
|
||||
}
|
||||
@@ -311,7 +316,21 @@ void Core::issue() {
|
||||
case ExeType::ALU: ++perf_stats_.scrb_alu; break;
|
||||
case ExeType::FPU: ++perf_stats_.scrb_fpu; break;
|
||||
case ExeType::LSU: ++perf_stats_.scrb_lsu; break;
|
||||
case ExeType::SFU: ++perf_stats_.scrb_sfu; break;
|
||||
case ExeType::SFU: {
|
||||
++perf_stats_.scrb_sfu;
|
||||
switch (use.sfu_type) {
|
||||
case SfuType::TMC:
|
||||
case SfuType::WSPAWN:
|
||||
case SfuType::SPLIT:
|
||||
case SfuType::JOIN:
|
||||
case SfuType::BAR:
|
||||
case SfuType::PRED: ++perf_stats_.scrb_wctl; break;
|
||||
case SfuType::CSRRW:
|
||||
case SfuType::CSRRS:
|
||||
case SfuType::CSRRC: ++perf_stats_.scrb_csrs; break;
|
||||
default: assert(false);
|
||||
}
|
||||
} break;
|
||||
default: assert(false);
|
||||
}
|
||||
}
|
||||
@@ -356,7 +375,6 @@ void Core::commit() {
|
||||
auto& commit_arb = commit_arbs_.at(i);
|
||||
if (commit_arb->Outputs.at(0).empty())
|
||||
continue;
|
||||
|
||||
auto trace = commit_arb->Outputs.at(0).front();
|
||||
|
||||
// advance to commit stage
|
||||
@@ -558,8 +576,8 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
break;
|
||||
case VX_DCR_MPM_CLASS_CORE: {
|
||||
switch (addr) {
|
||||
case VX_CSR_MPM_SCHED_ID: return perf_stats_.sched_idles & 0xffffffff;
|
||||
case VX_CSR_MPM_SCHED_ID_H:return perf_stats_.sched_idles >> 32;
|
||||
case VX_CSR_MPM_SCHED_ID: return perf_stats_.sched_idle & 0xffffffff;
|
||||
case VX_CSR_MPM_SCHED_ID_H:return perf_stats_.sched_idle >> 32;
|
||||
case VX_CSR_MPM_SCHED_ST: return perf_stats_.sched_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_SCHED_ST_H:return perf_stats_.sched_stalls >> 32;
|
||||
case VX_CSR_MPM_IBUF_ST: return perf_stats_.ibuf_stalls & 0xffffffff;
|
||||
@@ -574,6 +592,10 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
case VX_CSR_MPM_SCRB_LSU_H:return perf_stats_.scrb_lsu >> 32;
|
||||
case VX_CSR_MPM_SCRB_SFU: return perf_stats_.scrb_sfu & 0xffffffff;
|
||||
case VX_CSR_MPM_SCRB_SFU_H:return perf_stats_.scrb_sfu >> 32;
|
||||
case VX_CSR_MPM_SCRB_WCTL: return perf_stats_.scrb_wctl & 0xffffffff;
|
||||
case VX_CSR_MPM_SCRB_WCTL_H: return perf_stats_.scrb_wctl >> 32;
|
||||
case VX_CSR_MPM_SCRB_CSRS: return perf_stats_.scrb_csrs & 0xffffffff;
|
||||
case VX_CSR_MPM_SCRB_CSRS_H: return perf_stats_.scrb_csrs >> 32;
|
||||
case VX_CSR_MPM_IFETCHES: return perf_stats_.ifetches & 0xffffffff;
|
||||
case VX_CSR_MPM_IFETCHES_H: return perf_stats_.ifetches >> 32;
|
||||
case VX_CSR_MPM_LOADS: return perf_stats_.loads & 0xffffffff;
|
||||
@@ -588,6 +610,7 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
} break;
|
||||
case VX_DCR_MPM_CLASS_MEM: {
|
||||
auto proc_perf = socket_->cluster()->processor()->perf_stats();
|
||||
auto cluster_perf = socket_->cluster()->perf_stats();
|
||||
auto socket_perf = socket_->perf_stats();
|
||||
auto smem_perf = shared_mem_->perf_stats();
|
||||
switch (addr) {
|
||||
@@ -611,18 +634,18 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
case VX_CSR_MPM_DCACHE_MSHR_ST: return socket_perf.dcache.mshr_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_DCACHE_MSHR_ST_H: return socket_perf.dcache.mshr_stalls >> 32;
|
||||
|
||||
case VX_CSR_MPM_L2CACHE_READS: return proc_perf.clusters.l2cache.reads & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_READS_H: return proc_perf.clusters.l2cache.reads >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_WRITES: return proc_perf.clusters.l2cache.writes & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_WRITES_H: return proc_perf.clusters.l2cache.writes >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_R: return proc_perf.clusters.l2cache.read_misses & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_R_H: return proc_perf.clusters.l2cache.read_misses >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_W: return proc_perf.clusters.l2cache.write_misses & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_W_H: return proc_perf.clusters.l2cache.write_misses >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_BANK_ST: return proc_perf.clusters.l2cache.bank_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_BANK_ST_H:return proc_perf.clusters.l2cache.bank_stalls >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MSHR_ST: return proc_perf.clusters.l2cache.mshr_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MSHR_ST_H:return proc_perf.clusters.l2cache.mshr_stalls >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_READS: return cluster_perf.l2cache.reads & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_READS_H: return cluster_perf.l2cache.reads >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_WRITES: return cluster_perf.l2cache.writes & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_WRITES_H: return cluster_perf.l2cache.writes >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_R: return cluster_perf.l2cache.read_misses & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_R_H: return cluster_perf.l2cache.read_misses >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_W: return cluster_perf.l2cache.write_misses & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MISS_W_H: return cluster_perf.l2cache.write_misses >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_BANK_ST: return cluster_perf.l2cache.bank_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_BANK_ST_H:return cluster_perf.l2cache.bank_stalls >> 32;
|
||||
case VX_CSR_MPM_L2CACHE_MSHR_ST: return cluster_perf.l2cache.mshr_stalls & 0xffffffff;
|
||||
case VX_CSR_MPM_L2CACHE_MSHR_ST_H:return cluster_perf.l2cache.mshr_stalls >> 32;
|
||||
|
||||
case VX_CSR_MPM_L3CACHE_READS: return proc_perf.l3cache.reads & 0xffffffff;
|
||||
case VX_CSR_MPM_L3CACHE_READS_H: return proc_perf.l3cache.reads >> 32;
|
||||
@@ -638,7 +661,7 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
case VX_CSR_MPM_L3CACHE_MSHR_ST_H:return proc_perf.l3cache.mshr_stalls >> 32;
|
||||
|
||||
case VX_CSR_MPM_MEM_READS: return proc_perf.mem_reads & 0xffffffff;
|
||||
case VX_CSR_MPM_MEM_READS_H: return proc_perf.mem_reads >> 32;
|
||||
case VX_CSR_MPM_MEM_READS_H: return proc_perf.mem_reads >> 32;
|
||||
case VX_CSR_MPM_MEM_WRITES: return proc_perf.mem_writes & 0xffffffff;
|
||||
case VX_CSR_MPM_MEM_WRITES_H: return proc_perf.mem_writes >> 32;
|
||||
case VX_CSR_MPM_MEM_LT: return proc_perf.mem_latency & 0xffffffff;
|
||||
@@ -652,6 +675,10 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
case VX_CSR_MPM_SMEM_BANK_ST_H: return smem_perf.bank_stalls >> 32;
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
std::cout << std::dec << "Error: invalid MPM CLASS: value=" << perf_class << std::endl;
|
||||
std::abort();
|
||||
} break;
|
||||
}
|
||||
} else {
|
||||
std::cout << std::hex << "Error: invalid CSR read addr=0x" << addr << std::endl;
|
||||
|
||||
@@ -49,7 +49,7 @@ public:
|
||||
struct PerfStats {
|
||||
uint64_t cycles;
|
||||
uint64_t instrs;
|
||||
uint64_t sched_idles;
|
||||
uint64_t sched_idle;
|
||||
uint64_t sched_stalls;
|
||||
uint64_t ibuf_stalls;
|
||||
uint64_t scrb_stalls;
|
||||
@@ -57,6 +57,8 @@ public:
|
||||
uint64_t scrb_fpu;
|
||||
uint64_t scrb_lsu;
|
||||
uint64_t scrb_sfu;
|
||||
uint64_t scrb_wctl;
|
||||
uint64_t scrb_csrs;
|
||||
uint64_t ifetches;
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
@@ -66,7 +68,7 @@ public:
|
||||
PerfStats()
|
||||
: cycles(0)
|
||||
, instrs(0)
|
||||
, sched_idles(0)
|
||||
, sched_idle(0)
|
||||
, sched_stalls(0)
|
||||
, ibuf_stalls(0)
|
||||
, scrb_stalls(0)
|
||||
@@ -74,6 +76,8 @@ public:
|
||||
, scrb_fpu(0)
|
||||
, scrb_lsu(0)
|
||||
, scrb_sfu(0)
|
||||
, scrb_wctl(0)
|
||||
, scrb_csrs(0)
|
||||
, ifetches(0)
|
||||
, loads(0)
|
||||
, stores(0)
|
||||
@@ -88,7 +92,11 @@ public:
|
||||
std::vector<SimPort<MemReq>> dcache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
|
||||
|
||||
Core(const SimContext& ctx, uint32_t core_id, Socket* socket, const Arch &arch, const DCRS &dcrs);
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Socket* socket,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Core();
|
||||
|
||||
@@ -158,6 +166,7 @@ private:
|
||||
void cout_flush();
|
||||
|
||||
uint32_t core_id_;
|
||||
Socket* socket_;
|
||||
const Arch& arch_;
|
||||
const DCRS &dcrs_;
|
||||
|
||||
@@ -193,10 +202,9 @@ private:
|
||||
|
||||
PerfStats perf_stats_;
|
||||
|
||||
Socket* socket_;
|
||||
|
||||
std::vector<TraceSwitch::Ptr> commit_arbs_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
uint32_t ibuffer_idx_;
|
||||
|
||||
friend class Warp;
|
||||
|
||||
@@ -113,6 +113,7 @@ void ProcessorImpl::reset() {
|
||||
perf_mem_writes_ = 0;
|
||||
perf_mem_latency_ = 0;
|
||||
perf_mem_pending_reads_ = 0;
|
||||
|
||||
}
|
||||
|
||||
void ProcessorImpl::write_dcr(uint32_t addr, uint32_t value) {
|
||||
@@ -125,9 +126,6 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
||||
perf.mem_writes = perf_mem_writes_;
|
||||
perf.mem_latency = perf_mem_latency_;
|
||||
perf.l3cache = l3cache_->perf_stats();
|
||||
for (auto cluster : clusters_) {
|
||||
perf.clusters += cluster->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,17 +24,10 @@ namespace vortex {
|
||||
class ProcessorImpl {
|
||||
public:
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats l3cache;
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
CacheSim::PerfStats l3cache;
|
||||
Cluster::PerfStats clusters;
|
||||
|
||||
PerfStats()
|
||||
: mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
ProcessorImpl(const Arch& arch);
|
||||
@@ -46,7 +39,7 @@ public:
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
ProcessorImpl::PerfStats perf_stats() const;
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
|
||||
@@ -55,7 +48,7 @@ private:
|
||||
const Arch& arch_;
|
||||
std::vector<std::shared_ptr<Cluster>> clusters_;
|
||||
DCRS dcrs_;
|
||||
MemSim::Ptr memsim_;
|
||||
MemSim::Ptr memsim_;
|
||||
CacheSim::Ptr l3cache_;
|
||||
uint64_t perf_mem_reads_;
|
||||
uint64_t perf_mem_writes_;
|
||||
|
||||
@@ -25,6 +25,7 @@ public:
|
||||
RegType reg_type;
|
||||
uint32_t reg_id;
|
||||
ExeType exe_type;
|
||||
SfuType sfu_type;
|
||||
uint64_t uuid;
|
||||
};
|
||||
|
||||
@@ -62,7 +63,7 @@ public:
|
||||
if (used_iregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Integer, r, owner->exe_type, owner->uuid});
|
||||
out.push_back({RegType::Integer, r, owner->exe_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +71,7 @@ public:
|
||||
if (used_fregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Float, r, owner->exe_type, owner->uuid});
|
||||
out.push_back({RegType::Float, r, owner->exe_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +79,7 @@ public:
|
||||
if (used_vregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Vector;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Vector, r, owner->exe_type, owner->uuid});
|
||||
out.push_back({RegType::Vector, r, owner->exe_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,16 +19,16 @@ using namespace vortex;
|
||||
Socket::Socket(const SimContext& ctx,
|
||||
uint32_t socket_id,
|
||||
Cluster* cluster,
|
||||
const Arch &arch, const
|
||||
DCRS &dcrs)
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "socket")
|
||||
, icache_mem_req_port(this)
|
||||
, icache_mem_rsp_port(this)
|
||||
, dcache_mem_req_port(this)
|
||||
, dcache_mem_rsp_port(this)
|
||||
, socket_id_(socket_id)
|
||||
, cores_(arch.socket_size())
|
||||
, cluster_(cluster)
|
||||
, cores_(arch.socket_size())
|
||||
{
|
||||
auto cores_per_socket = cores_.size();
|
||||
|
||||
@@ -77,7 +77,10 @@ Socket::Socket(const SimContext& ctx,
|
||||
|
||||
for (uint32_t i = 0; i < cores_per_socket; ++i) {
|
||||
uint32_t core_id = socket_id * cores_per_socket + i;
|
||||
cores_.at(i) = Core::Create(core_id, this, arch, dcrs);
|
||||
cores_.at(i) = Core::Create(core_id,
|
||||
this,
|
||||
arch,
|
||||
dcrs);
|
||||
|
||||
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
|
||||
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
|
||||
@@ -139,8 +142,8 @@ void Socket::resume(uint32_t core_index) {
|
||||
}
|
||||
|
||||
Socket::PerfStats Socket::perf_stats() const {
|
||||
Socket::PerfStats perf;
|
||||
perf.icache = icaches_->perf_stats();
|
||||
perf.dcache = dcaches_->perf_stats();
|
||||
return perf;
|
||||
PerfStats perf_stats;
|
||||
perf_stats.icache = icaches_->perf_stats();
|
||||
perf_stats.dcache = dcaches_->perf_stats();
|
||||
return perf_stats;
|
||||
}
|
||||
@@ -30,12 +30,6 @@ public:
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats icache;
|
||||
CacheSim::PerfStats dcache;
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->icache += rhs.icache;
|
||||
this->dcache += rhs.dcache;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
SimPort<MemReq> icache_mem_req_port;
|
||||
@@ -74,14 +68,14 @@ public:
|
||||
|
||||
void resume(uint32_t core_id);
|
||||
|
||||
Socket::PerfStats perf_stats() const;
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
uint32_t socket_id_;
|
||||
uint32_t socket_id_;
|
||||
Cluster* cluster_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
CacheCluster::Ptr icaches_;
|
||||
CacheCluster::Ptr dcaches_;
|
||||
Cluster* cluster_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
Reference in New Issue
Block a user