Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
all:
|
||||
$(MAKE) -C simx
|
||||
$(MAKE) -C rtlsim
|
||||
$(MAKE) -C vlsim
|
||||
$(MAKE) -C opaesim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C simx clean
|
||||
$(MAKE) -C rtlsim clean
|
||||
$(MAKE) -C vlsim clean
|
||||
$(MAKE) -C opaesim clean
|
||||
@@ -1,7 +1,19 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
|
||||
constexpr uint32_t count_leading_zeros(uint32_t value) {
|
||||
@@ -77,5 +89,15 @@ T sext(const T& word, uint32_t width) {
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask);
|
||||
}
|
||||
|
||||
template <typename T = uint32_t>
|
||||
T zext(const T& word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= (sizeof(T) * 8));
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return word & mask;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mem.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
@@ -20,8 +33,9 @@ RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize)
|
||||
contents_.push_back(input.get());
|
||||
} while (input);
|
||||
|
||||
while (contents_.size() & (wordSize-1))
|
||||
while (contents_.size() & (wordSize-1)) {
|
||||
contents_.push_back(0x00);
|
||||
}
|
||||
}
|
||||
|
||||
RamMemDevice::RamMemDevice(uint64_t size, uint32_t wordSize)
|
||||
@@ -29,7 +43,7 @@ RamMemDevice::RamMemDevice(uint64_t size, uint32_t wordSize)
|
||||
, wordSize_(wordSize)
|
||||
{}
|
||||
|
||||
void RamMemDevice::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
@@ -44,7 +58,7 @@ void RamMemDevice::read(void *data, uint64_t addr, uint64_t size) {
|
||||
}
|
||||
}
|
||||
|
||||
void RamMemDevice::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
@@ -68,26 +82,26 @@ void RomMemDevice::write(const void* /*data*/, uint64_t /*addr*/, uint64_t /*siz
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool MemoryUnit::ADecoder::lookup(uint64_t a, uint32_t wordSize, mem_accessor_t* ma) {
|
||||
uint64_t e = a + (wordSize - 1);
|
||||
assert(e >= a);
|
||||
bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor_t* ma) {
|
||||
uint64_t end = addr + (wordSize - 1);
|
||||
assert(end >= addr);
|
||||
for (auto iter = entries_.rbegin(), iterE = entries_.rend(); iter != iterE; ++iter) {
|
||||
if (a >= iter->start && e <= iter->end) {
|
||||
if (addr >= iter->start && end <= iter->end) {
|
||||
ma->md = iter->md;
|
||||
ma->addr = a - iter->start;
|
||||
ma->addr = addr - iter->start;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::map(uint64_t a, uint64_t e, MemDevice &m) {
|
||||
assert(e >= a);
|
||||
entry_t entry{&m, a, e};
|
||||
void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
|
||||
assert(end >= start);
|
||||
entry_t entry{&md, start, end};
|
||||
entries_.emplace_back(entry);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
@@ -96,7 +110,7 @@ void MemoryUnit::ADecoder::read(void *data, uint64_t addr, uint64_t size) {
|
||||
ma.md->read(data, ma.addr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
@@ -107,11 +121,11 @@ void MemoryUnit::ADecoder::write(const void *data, uint64_t addr, uint64_t size)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm)
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize)
|
||||
: pageSize_(pageSize)
|
||||
, addrBytes_(addrBytes)
|
||||
, disableVM_(disableVm) {
|
||||
if (!disableVm) {
|
||||
, enableVM_(pageSize != 0)
|
||||
, amo_reservation_({0x0, false}) {
|
||||
if (pageSize != 0) {
|
||||
tlb_[0] = TLBEntry(0, 077);
|
||||
}
|
||||
}
|
||||
@@ -133,30 +147,38 @@ MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) {
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryUnit::read(void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 8 : 1;
|
||||
if (enableVM_) {
|
||||
TLBEntry t = this->tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
} else {
|
||||
pAddr = addr;
|
||||
}
|
||||
return pAddr;
|
||||
}
|
||||
|
||||
void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1);
|
||||
return decoder_.read(data, pAddr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::write(const void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 16 : 2;
|
||||
TLBEntry t = tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
}
|
||||
void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1);
|
||||
decoder_.write(data, pAddr, size);
|
||||
amo_reservation_.valid = false;
|
||||
}
|
||||
|
||||
void MemoryUnit::amo_reserve(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
amo_reservation_.addr = pAddr;
|
||||
amo_reservation_.valid = true;
|
||||
}
|
||||
|
||||
bool MemoryUnit::amo_check(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
|
||||
}
|
||||
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) {
|
||||
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
|
||||
}
|
||||
@@ -168,12 +190,14 @@ void MemoryUnit::tlbRm(uint64_t va) {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
RAM::RAM(uint32_t page_size)
|
||||
: size_(0)
|
||||
RAM::RAM(uint32_t page_size, uint64_t capacity)
|
||||
: capacity_(capacity)
|
||||
, page_bits_(log2ceil(page_size))
|
||||
, last_page_(nullptr)
|
||||
, last_page_index_(0) {
|
||||
assert(ispow2(page_size));
|
||||
assert(0 == capacity || ispow2(capacity));
|
||||
assert(0 == (capacity % page_size));
|
||||
}
|
||||
|
||||
RAM::~RAM() {
|
||||
@@ -191,6 +215,9 @@ uint64_t RAM::size() const {
|
||||
}
|
||||
|
||||
uint8_t *RAM::get(uint64_t address) const {
|
||||
if (capacity_ != 0 && address >= capacity_) {
|
||||
throw OutOfRange();
|
||||
}
|
||||
uint32_t page_size = 1 << page_bits_;
|
||||
uint32_t page_offset = address & (page_size - 1);
|
||||
uint64_t page_index = address >> page_bits_;
|
||||
@@ -218,14 +245,14 @@ uint8_t *RAM::get(uint64_t address) const {
|
||||
return page + page_offset;
|
||||
}
|
||||
|
||||
void RAM::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void RAM::read(void* data, uint64_t addr, uint64_t size) {
|
||||
uint8_t* d = (uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
d[i] = *this->get(addr + i);
|
||||
}
|
||||
}
|
||||
|
||||
void RAM::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void RAM::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
const uint8_t* d = (const uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
*this->get(addr + i) = d[i];
|
||||
@@ -236,6 +263,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
@@ -268,6 +296,7 @@ void RAM::loadHexImage(const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
@@ -313,4 +342,4 @@ void RAM::loadHexImage(const char* filename) {
|
||||
++line;
|
||||
--size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
@@ -7,13 +20,14 @@
|
||||
|
||||
namespace vortex {
|
||||
struct BadAddress {};
|
||||
struct OutOfRange {};
|
||||
|
||||
class MemDevice {
|
||||
public:
|
||||
virtual ~MemDevice() {}
|
||||
virtual uint64_t size() const = 0;
|
||||
virtual void read(void *data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void write(const void *data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void read(void* data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void write(const void* data, uint64_t addr, uint64_t size) = 0;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -21,11 +35,11 @@ public:
|
||||
class RamMemDevice : public MemDevice {
|
||||
public:
|
||||
RamMemDevice(uint64_t size, uint32_t wordSize);
|
||||
RamMemDevice(const char *filename, uint32_t wordSize);
|
||||
RamMemDevice(const char* filename, uint32_t wordSize);
|
||||
~RamMemDevice() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void read(void* data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
virtual uint64_t size() const {
|
||||
return contents_.size();
|
||||
@@ -50,7 +64,7 @@ public:
|
||||
|
||||
~RomMemDevice();
|
||||
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -63,47 +77,56 @@ public:
|
||||
: faultAddr(a)
|
||||
, notFound(nf)
|
||||
{}
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
};
|
||||
|
||||
MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm = false);
|
||||
MemoryUnit(uint64_t pageSize = 0);
|
||||
|
||||
void attach(MemDevice &m, uint64_t start, uint64_t end);
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
void read(void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
|
||||
void amo_reserve(uint64_t addr);
|
||||
bool amo_check(uint64_t addr);
|
||||
|
||||
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags);
|
||||
void tlbRm(uint64_t va);
|
||||
void tlbRm(uint64_t vaddr);
|
||||
void tlbFlush() {
|
||||
tlb_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct amo_reservation_t {
|
||||
uint64_t addr;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
class ADecoder {
|
||||
public:
|
||||
ADecoder() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size);
|
||||
void write(const void *data, uint64_t addr, uint64_t size);
|
||||
void read(void* data, uint64_t addr, uint64_t size);
|
||||
void write(const void* data, uint64_t addr, uint64_t size);
|
||||
|
||||
void map(uint64_t start, uint64_t end, MemDevice &md);
|
||||
|
||||
private:
|
||||
|
||||
struct mem_accessor_t {
|
||||
MemDevice* md;
|
||||
uint64_t addr;
|
||||
MemDevice* md;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct entry_t {
|
||||
MemDevice *md;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
MemDevice* md;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
bool lookup(uint64_t a, uint32_t wordSize, mem_accessor_t*);
|
||||
bool lookup(uint64_t addr, uint32_t wordSize, mem_accessor_t*);
|
||||
|
||||
std::vector<entry_t> entries_;
|
||||
};
|
||||
@@ -120,11 +143,14 @@ private:
|
||||
|
||||
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
|
||||
|
||||
uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask);
|
||||
|
||||
std::unordered_map<uint64_t, TLBEntry> tlb_;
|
||||
uint64_t pageSize_;
|
||||
uint64_t addrBytes_;
|
||||
ADecoder decoder_;
|
||||
bool disableVM_;
|
||||
uint64_t pageSize_;
|
||||
ADecoder decoder_;
|
||||
bool enableVM_;
|
||||
|
||||
amo_reservation_t amo_reservation_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -132,15 +158,15 @@ private:
|
||||
class RAM : public MemDevice {
|
||||
public:
|
||||
|
||||
RAM(uint32_t page_size);
|
||||
RAM(uint32_t page_size, uint64_t capacity = 0);
|
||||
~RAM();
|
||||
|
||||
void clear();
|
||||
|
||||
uint64_t size() const override;
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void read(void* data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
void loadBinImage(const char* filename, uint64_t destination);
|
||||
void loadHexImage(const char* filename);
|
||||
@@ -157,11 +183,11 @@ private:
|
||||
|
||||
uint8_t *get(uint64_t address) const;
|
||||
|
||||
uint64_t size_;
|
||||
uint64_t capacity_;
|
||||
uint32_t page_bits_;
|
||||
mutable std::unordered_map<uint64_t, uint8_t*> pages_;
|
||||
mutable uint8_t* last_page_;
|
||||
mutable uint64_t last_page_index_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stack>
|
||||
@@ -18,8 +31,9 @@ public:
|
||||
void* allocate() {
|
||||
void* mem;
|
||||
if (!free_list_.empty()) {
|
||||
mem = static_cast<void*>(free_list_.top());
|
||||
auto entry = free_list_.top();
|
||||
free_list_.pop();
|
||||
mem = static_cast<void*>(entry);
|
||||
} else {
|
||||
mem = ::operator new(sizeof(T));
|
||||
}
|
||||
@@ -36,12 +50,13 @@ public:
|
||||
|
||||
void flush() {
|
||||
while (!free_list_.empty()) {
|
||||
::operator delete(free_list_.top());
|
||||
auto entry = free_list_.top();
|
||||
free_list_.pop();
|
||||
::operator delete(entry);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::stack<void*> free_list_;
|
||||
std::stack<T*> free_list_;
|
||||
uint32_t max_size_;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "rvfloats.h"
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -16,12 +29,9 @@ inline float64_t to_float64_t(uint64_t x) { return float64_t{x}; }
|
||||
inline uint32_t from_float32_t(float32_t x) { return uint32_t(x.v); }
|
||||
inline uint64_t from_float64_t(float64_t x) { return uint64_t(x.v); }
|
||||
|
||||
inline uint32_t get_fflags() {
|
||||
uint32_t fflags = softfloat_exceptionFlags;
|
||||
if (fflags) {
|
||||
softfloat_exceptionFlags = 0;
|
||||
}
|
||||
return fflags;
|
||||
inline void rv_init(uint32_t frm) {
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_roundingMode = frm;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -29,289 +39,296 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_add(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_add(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_sub(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_sub(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_mul(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_mul(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_div(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_div(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_sqrt(to_float32_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_sqrt(to_float64_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_i32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_i32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_ui32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_ui32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_i64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_i64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_ui64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_ui64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_lt(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_lt(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_le(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_le(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_eq(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -324,12 +341,13 @@ uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
@@ -342,12 +360,13 @@ uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -360,12 +379,13 @@ uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
@@ -378,7 +398,7 @@ uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
#ifndef RVFLOATS_H
|
||||
#define RVFLOATS_H
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
@@ -78,5 +90,3 @@ uint64_t rv_ftod(uint32_t a);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
@@ -84,33 +97,39 @@ public:
|
||||
}
|
||||
|
||||
uint64_t pop() {
|
||||
auto cycle = queue_.front().cycle;
|
||||
auto cycles = queue_.front().cycles;
|
||||
queue_.pop();
|
||||
return cycle;
|
||||
return cycles;
|
||||
}
|
||||
|
||||
void tx_callback(const TxCallback& callback) {
|
||||
tx_cb_ = callback;
|
||||
}
|
||||
|
||||
uint64_t arrival_time() const {
|
||||
if (queue_.empty())
|
||||
return 0;
|
||||
return queue_.front().cycles;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct timed_pkt_t {
|
||||
Pkt pkt;
|
||||
uint64_t cycle;
|
||||
uint64_t cycles;
|
||||
};
|
||||
|
||||
std::queue<timed_pkt_t> queue_;
|
||||
SimPort* peer_;
|
||||
TxCallback tx_cb_;
|
||||
|
||||
void push(const Pkt& data, uint64_t cycle) {
|
||||
void push(const Pkt& data, uint64_t cycles) {
|
||||
if (tx_cb_) {
|
||||
tx_cb_(data, cycle);
|
||||
tx_cb_(data, cycles);
|
||||
}
|
||||
if (peer_) {
|
||||
peer_->push(data, cycle);
|
||||
peer_->push(data, cycles);
|
||||
} else {
|
||||
queue_.push({data, cycle});
|
||||
queue_.push({data, cycles});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,14 +148,14 @@ public:
|
||||
|
||||
virtual void fire() const = 0;
|
||||
|
||||
uint64_t time() const {
|
||||
return time_;
|
||||
uint64_t cycles() const {
|
||||
return cycles_;
|
||||
}
|
||||
|
||||
protected:
|
||||
SimEventBase(uint64_t time) : time_(time) {}
|
||||
SimEventBase(uint64_t cycles) : cycles_(cycles) {}
|
||||
|
||||
uint64_t time_;
|
||||
uint64_t cycles_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -150,8 +169,8 @@ public:
|
||||
|
||||
typedef std::function<void (const Pkt&)> Func;
|
||||
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t time)
|
||||
: SimEventBase(time)
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
@@ -180,11 +199,11 @@ template <typename Pkt>
|
||||
class SimPortEvent : public SimEventBase {
|
||||
public:
|
||||
void fire() const override {
|
||||
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, time_);
|
||||
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, cycles_);
|
||||
}
|
||||
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t time)
|
||||
: SimEventBase(time)
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, port_(port)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
@@ -330,7 +349,7 @@ public:
|
||||
auto evt_it_end = events_.end();
|
||||
while (evt_it != evt_it_end) {
|
||||
auto& event = *evt_it;
|
||||
if (cycles_ >= event->time()) {
|
||||
if (cycles_ >= event->cycles()) {
|
||||
event->fire();
|
||||
evt_it = events_.erase(evt_it);
|
||||
} else {
|
||||
@@ -395,5 +414,5 @@ void SimPort<Pkt>::send(const Pkt& pkt, uint64_t delay) const {
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->send(pkt, delay);
|
||||
} else {
|
||||
SimPlatform::instance().schedule(this, pkt, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
78
sim/common/stringutil.h
Normal file
78
sim/common/stringutil.h
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
class ByteStream : public std::istream {
|
||||
public:
|
||||
ByteStream(const void *buf, std::size_t size) : buf_(buf), size_(size) {}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ByteStream& obj) {
|
||||
auto oldflags = os.flags();
|
||||
auto oldwidth = os.width();
|
||||
auto oldfill = os.fill();
|
||||
for (std::size_t i = 0, n = obj.size_; i < n; ++i) {
|
||||
int byte = *((uint8_t*)obj.buf_ + (n - 1 - i));
|
||||
os << std::hex << std::setw(2) << std::setfill('0') << byte;
|
||||
}
|
||||
os.fill(oldfill);
|
||||
os.width(oldwidth);
|
||||
os.flags(oldflags);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
const void *buf_;
|
||||
std::size_t size_;
|
||||
};
|
||||
|
||||
class IndentStream : public std::streambuf {
|
||||
public:
|
||||
explicit IndentStream(std::streambuf* dest, int indent = 4)
|
||||
: dest_(dest)
|
||||
, isBeginLine_(true)
|
||||
, indent_(indent, ' ')
|
||||
, owner_(nullptr)
|
||||
{}
|
||||
|
||||
explicit IndentStream(std::ostream& dest, int indent = 4)
|
||||
: dest_(dest.rdbuf())
|
||||
, isBeginLine_(true)
|
||||
, indent_(indent, ' ')
|
||||
, owner_(&dest) {
|
||||
owner_->rdbuf(this);
|
||||
}
|
||||
|
||||
virtual ~IndentStream() {
|
||||
if (owner_)
|
||||
owner_->rdbuf(dest_);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int overflow(int ch) {
|
||||
if (isBeginLine_ && ch != '\n') {
|
||||
dest_->sputn(indent_.data(), indent_.size());
|
||||
}
|
||||
isBeginLine_ = ch == '\n';
|
||||
return dest_->sputc(ch);
|
||||
}
|
||||
|
||||
private:
|
||||
std::streambuf* dest_;
|
||||
bool isBeginLine_;
|
||||
std::string indent_;
|
||||
std::ostream* owner_;
|
||||
};
|
||||
@@ -1,237 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cocogfx/include/fixed.h>
|
||||
#include <bitmanip.h>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
enum class WrapMode {
|
||||
Clamp,
|
||||
Repeat,
|
||||
Mirror,
|
||||
};
|
||||
|
||||
enum class TexFormat {
|
||||
A8R8G8B8,
|
||||
R5G6B5,
|
||||
A1R5G5B5,
|
||||
A4R4G4B4,
|
||||
A8L8,
|
||||
L8,
|
||||
A8,
|
||||
};
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
T Clamp(Fixed<F,T> fx, WrapMode mode) {
|
||||
switch (mode) {
|
||||
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > Fixed<F,T>::MASK) ? Fixed<F,T>::MASK : fx.data());
|
||||
case WrapMode::Repeat: return (fx.data() & Fixed<F,T>::MASK);
|
||||
case WrapMode::Mirror: return (bit_get(fx.data(), Fixed<F,T>::FRAC) ? ~fx.data() : fx.data());
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t Stride(TexFormat format) {
|
||||
switch (format) {
|
||||
case TexFormat::A8R8G8B8:
|
||||
return 4;
|
||||
case TexFormat::R5G6B5:
|
||||
case TexFormat::A1R5G5B5:
|
||||
case TexFormat::A4R4G4B4:
|
||||
case TexFormat::A8L8:
|
||||
return 2;
|
||||
case TexFormat::L8:
|
||||
case TexFormat::A8:
|
||||
return 1;
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Unpack8888(TexFormat format,
|
||||
uint32_t texel,
|
||||
uint32_t* lo,
|
||||
uint32_t* hi) {
|
||||
uint32_t r, g, b, a;
|
||||
switch (format) {
|
||||
case TexFormat::A8R8G8B8:
|
||||
r = (texel >> 16) & 0xff;
|
||||
g = (texel >> 8) & 0xff;
|
||||
b = texel & 0xff;
|
||||
a = texel >> 24;
|
||||
break;
|
||||
case TexFormat::R5G6B5:
|
||||
r = ((texel >> 11) << 3) | (texel >> 13);
|
||||
g = ((texel >> 3) & 0xfc) | ((texel >> 9) & 0x3);
|
||||
b = ((texel & 0x1f) << 3) | ((texel & 0x1c) >> 2);
|
||||
a = 0xff;
|
||||
break;
|
||||
case TexFormat::A1R5G5B5:
|
||||
r = ((texel >> 7) & 0xf8) | ((texel << 1) >> 13);
|
||||
g = ((texel >> 2) & 0xf8) | ((texel >> 7) & 7);
|
||||
b = ((texel & 0x1f) << 3) | ((texel & 0x1c) >> 2);
|
||||
a = 0xff * (texel >> 15);
|
||||
break;
|
||||
case TexFormat::A4R4G4B4:
|
||||
r = ((texel >> 4) & 0xf0) | ((texel >> 8) & 0x0f);
|
||||
g = ((texel & 0xf0) >> 0) | ((texel & 0xf0) >> 4);
|
||||
b = ((texel & 0x0f) << 4) | ((texel & 0x0f) >> 0);
|
||||
a = ((texel >> 8) & 0xf0) | (texel >> 12);
|
||||
break;
|
||||
case TexFormat::A8L8:
|
||||
r = texel & 0xff;
|
||||
g = r;
|
||||
b = r;
|
||||
a = texel >> 8;
|
||||
break;
|
||||
case TexFormat::L8:
|
||||
r = texel & 0xff;
|
||||
g = r;
|
||||
b = r;
|
||||
a = 0xff;
|
||||
break;
|
||||
case TexFormat::A8:
|
||||
r = 0xff;
|
||||
g = 0xff;
|
||||
b = 0xff;
|
||||
a = texel & 0xff;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
*lo = (r << 16) + b;
|
||||
*hi = (a << 16) + g;
|
||||
}
|
||||
|
||||
inline void Unpack8888(uint32_t texel, uint32_t* lo, uint32_t* hi) {
|
||||
*lo = texel & 0x00ff00ff;
|
||||
*hi = (texel >> 8) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
inline uint32_t Pack8888(uint32_t lo, uint32_t hi) {
|
||||
return (hi << 8) | lo;
|
||||
}
|
||||
|
||||
inline uint32_t Lerp8888(uint32_t a, uint32_t b, uint32_t f) {
|
||||
return (a + (((b - a) * f) >> 8)) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressLinear(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
WrapMode wrapv,
|
||||
uint32_t* addr00,
|
||||
uint32_t* addr01,
|
||||
uint32_t* addr10,
|
||||
uint32_t* addr11,
|
||||
uint32_t* alpha,
|
||||
uint32_t* beta
|
||||
) {
|
||||
auto delta_x = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_width);
|
||||
auto delta_y = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_height);
|
||||
|
||||
uint32_t u0 = Clamp(fu - delta_x, wrapu);
|
||||
uint32_t u1 = Clamp(fu + delta_x, wrapu);
|
||||
uint32_t v0 = Clamp(fv - delta_y, wrapv);
|
||||
uint32_t v1 = Clamp(fv + delta_y, wrapv);
|
||||
|
||||
uint32_t shift_u = (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t shift_v = (Fixed<F,T>::FRAC - log_height);
|
||||
|
||||
uint32_t x0s = (u0 << 8) >> shift_u;
|
||||
uint32_t y0s = (v0 << 8) >> shift_v;
|
||||
|
||||
uint32_t x0 = x0s >> 8;
|
||||
uint32_t y0 = y0s >> 8;
|
||||
uint32_t x1 = u1 >> shift_u;
|
||||
uint32_t y1 = v1 >> shift_v;
|
||||
|
||||
*addr00 = x0 + (y0 << log_width);
|
||||
*addr01 = x1 + (y0 << log_width);
|
||||
*addr10 = x0 + (y1 << log_width);
|
||||
*addr11 = x1 + (y1 << log_width);
|
||||
|
||||
*alpha = x0s & 0xff;
|
||||
*beta = y0s & 0xff;
|
||||
|
||||
//printf("*** fu=0x%x, fv=0x%x, u0=0x%x, u1=0x%x, v0=0x%x, v1=0x%x, x0=0x%x, x1=0x%x, y0=0x%x, y1=0x%x, addr00=0x%x, addr01=0x%x, addr10=0x%x, addr11=0x%x\n", fu.data(), fv.data(), u0, u1, v0, v1, x0, x1, y0, y1, *addr00, *addr01, *addr10, *addr11);
|
||||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressPoint(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
WrapMode wrapv,
|
||||
uint32_t* addr
|
||||
) {
|
||||
uint32_t u = Clamp(fu, wrapu);
|
||||
uint32_t v = Clamp(fv, wrapv);
|
||||
|
||||
uint32_t x = u >> (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t y = v >> (Fixed<F,T>::FRAC - log_height);
|
||||
|
||||
*addr = x + (y << log_width);
|
||||
|
||||
//printf("*** fu=0x%x, fv=0x%x, u=0x%x, v=0x%x, x=0x%x, y=0x%x, addr=0x%x\n", fu.data(), fv.data(), u, v, x, y, *addr);
|
||||
}
|
||||
|
||||
inline uint32_t TexFilterLinear(
|
||||
TexFormat format,
|
||||
uint32_t texel00,
|
||||
uint32_t texel01,
|
||||
uint32_t texel10,
|
||||
uint32_t texel11,
|
||||
uint32_t alpha,
|
||||
uint32_t beta
|
||||
) {
|
||||
uint32_t c01l, c01h;
|
||||
{
|
||||
uint32_t c0l, c0h, c1l, c1h;
|
||||
Unpack8888(format, texel00, &c0l, &c0h);
|
||||
Unpack8888(format, texel01, &c1l, &c1h);
|
||||
c01l = Lerp8888(c0l, c1l, alpha);
|
||||
c01h = Lerp8888(c0h, c1h, alpha);
|
||||
}
|
||||
|
||||
uint32_t c23l, c23h;
|
||||
{
|
||||
uint32_t c2l, c2h, c3l, c3h;
|
||||
Unpack8888(format, texel10, &c2l, &c2h);
|
||||
Unpack8888(format, texel11, &c3l, &c3h);
|
||||
c23l = Lerp8888(c2l, c3l, alpha);
|
||||
c23h = Lerp8888(c2h, c3h, alpha);
|
||||
}
|
||||
|
||||
uint32_t color;
|
||||
{
|
||||
uint32_t cl = Lerp8888(c01l, c23l, beta);
|
||||
uint32_t ch = Lerp8888(c01h, c23h, beta);
|
||||
color = Pack8888(cl, ch);
|
||||
}
|
||||
|
||||
//printf("*** texel00=0x%x, texel01=0x%x, texel10=0x%x, texel11=0x%x, color=0x%x\n", texel00, texel01, texel10, texel11, color);
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
inline uint32_t TexFilterPoint(TexFormat format, uint32_t texel) {
|
||||
uint32_t color;
|
||||
{
|
||||
uint32_t cl, ch;
|
||||
Unpack8888(format, texel, &cl, &ch);
|
||||
color = Pack8888(cl, ch);
|
||||
}
|
||||
|
||||
//printf("*** texel=0x%x, color=0x%x\n", texel, color);
|
||||
|
||||
return color;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "util.h"
|
||||
#include <string.h>
|
||||
|
||||
@@ -7,4 +20,20 @@ const char* fileExtension(const char* filepath) {
|
||||
if (ext == NULL || ext == filepath)
|
||||
return "";
|
||||
return ext + 1;
|
||||
}
|
||||
|
||||
void* aligned_malloc(size_t size, size_t alignment) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
assert((alignment & (alignment - 1)) == 0); // Power of 2 alignment.
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
void aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
@@ -49,4 +62,7 @@ const char* fileExtension(const char* filepath);
|
||||
#define DISABLE_WARNING_UNUSED_PARAMETER
|
||||
#define DISABLE_WARNING_UNREFERENCED_FUNCTION
|
||||
#define DISABLE_WARNING_ANONYMOUS_STRUCT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void *aligned_malloc(size_t size, size_t alignment);
|
||||
void aligned_free(void *ptr);
|
||||
55
sim/common/uuid_gen.h
Normal file
55
sim/common/uuid_gen.h
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class UUIDGenerator {
|
||||
public:
|
||||
UUIDGenerator() : ids_(0) {}
|
||||
virtual ~UUIDGenerator() {}
|
||||
|
||||
uint32_t get_uuid(uint64_t PC) {
|
||||
uint32_t id;
|
||||
uint32_t ref;
|
||||
auto it = uuid_map_.find(PC);
|
||||
if (it != uuid_map_.end()) {
|
||||
uint64_t value = it->second;
|
||||
id = value & 0xffff;
|
||||
ref = value >> 16;
|
||||
} else {
|
||||
id = ids_++;
|
||||
ref = -1;
|
||||
}
|
||||
++ref;
|
||||
uint64_t ret = (uint64_t(ref) << 16) | id;
|
||||
uuid_map_[PC] = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
uuid_map_.clear();
|
||||
ids_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::unordered_map<uint64_t, uint32_t> uuid_map_;
|
||||
uint32_t ids_;
|
||||
};
|
||||
|
||||
}
|
||||
134
sim/opaesim/Makefile
Normal file
134
sim/opaesim/Makefile
Normal file
@@ -0,0 +1,134 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
AFU_DIR = $(RTL_DIR)/afu/opae
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common -I$(abspath $(DESTDIR))
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
# Control logic analyzer monitors
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
|
||||
|
||||
# AFU parameters
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
|
||||
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += $(DBG_SCOPE_FLAGS)
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O3 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SCOPE_JSON = $(DESTDIR)/scope.json
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
PROJECT = libopae-c-sim.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/vortex.xml:
|
||||
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $(DESTDIR)/vortex.xml
|
||||
|
||||
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml
|
||||
$(SCRIPT_DIR)/scope.py $(DESTDIR)/vortex.xml -o $(DESTDIR)/scope.json
|
||||
|
||||
$(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(AFU_DIR)/vortex_afu.vh -o $(DESTDIR)/vortex_afu.h
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
||||
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/vortex.xml $(DESTDIR)/scope.json $(DESTDIR)/vortex_afu.h $(DESTDIR)/$(PROJECT)
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
@@ -8,10 +21,61 @@
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
|
||||
__unused (token, prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
|
||||
__unused (prop, objtype);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
|
||||
__unused (prop, guid);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyProperties(fpga_properties *prop) {
|
||||
__unused (prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
|
||||
__unused (filters, num_filters, num_filters, tokens, max_tokens);
|
||||
if (num_matches) {
|
||||
*num_matches = 1;
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyToken(fpga_token *token) {
|
||||
__unused (token);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filters, uint64_t* lms) {
|
||||
__unused (filters);
|
||||
if (lms) {
|
||||
#if (XLEN == 64)
|
||||
*lms = 0x200000000; // 8 GB
|
||||
#else
|
||||
*lms = 0x100000000; // 4 GB
|
||||
#endif
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
__unused (token);
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
@@ -83,4 +147,8 @@ extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,7 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -21,28 +34,21 @@ typedef enum {
|
||||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef enum {
|
||||
FPGA_DEVICE = 0,
|
||||
FPGA_ACCELERATOR
|
||||
} fpga_objtype;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
|
||||
typedef void *fpga_properties;
|
||||
|
||||
fpga_result fpgaClose(fpga_handle handle);
|
||||
|
||||
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
|
||||
|
||||
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
const char *fpgaErrStr(fpga_result e);
|
||||
typedef uint8_t fpga_guid[16];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __FPGA_H__
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "opae_sim.h"
|
||||
|
||||
#include <verilated.h>
|
||||
@@ -25,6 +38,7 @@
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
@@ -62,6 +76,8 @@
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
#define CPU_GPU_LATENCY 200
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
@@ -70,23 +86,6 @@ double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
static void *__aligned_malloc(size_t alignment, size_t size) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
static void __aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
@@ -158,7 +157,7 @@ public:
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
aligned_free(buffer.second.data);
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->close();
|
||||
@@ -176,9 +175,13 @@ public:
|
||||
}
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < len; ++i) {
|
||||
((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
}
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
@@ -193,7 +196,7 @@ public:
|
||||
void release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
@@ -205,6 +208,11 @@ public:
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
@@ -217,7 +225,12 @@ public:
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
@@ -254,7 +267,14 @@ private:
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
device_->reset = 0;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
@@ -289,7 +309,7 @@ private:
|
||||
#endif
|
||||
}
|
||||
|
||||
void eval() {
|
||||
void eval() {
|
||||
device_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
@@ -396,10 +416,10 @@ private:
|
||||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
unsigned byte_addr = device_->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE;
|
||||
if (device_->avs_write[b]) {
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
uint8_t* data = (uint8_t*)device_->avs_writedata[b].data();
|
||||
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
@@ -419,8 +439,7 @@ private:
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
}
|
||||
|
||||
} else
|
||||
if (device_->avs_read[b]) {
|
||||
auto mem_req = new mem_rd_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
@@ -491,7 +510,7 @@ private:
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM *ram_;
|
||||
RAM* ram_;
|
||||
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
@@ -531,4 +550,4 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
impl_->read_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
}
|
||||
43
sim/opaesim/opae_sim.h
Normal file
43
sim/opaesim/opae_sim.h
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
8
sim/opaesim/verilator.vlt
Normal file
8
sim/opaesim/verilator.vlt
Normal file
@@ -0,0 +1,8 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
|
||||
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
|
||||
lint_off -file "*/fpnew/src/*"
|
||||
|
||||
lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv"
|
||||
lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv"
|
||||
@@ -1,16 +1,24 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`include "vortex_afu.vh"
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module vortex_afu_shim (
|
||||
module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
||||
// global signals
|
||||
input clk,
|
||||
input reset,
|
||||
@@ -167,4 +175,4 @@ assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid;
|
||||
assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid;
|
||||
assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
@@ -1,3 +1,4 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
@@ -8,6 +9,7 @@ CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
@@ -23,13 +25,18 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
@@ -42,14 +49,18 @@ else
|
||||
TOP = Vortex
|
||||
endif
|
||||
|
||||
VL_FLAGS = --exe --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-EOFNEWLINE
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
VL_FLAGS += -DSIMULATION
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
@@ -59,8 +70,8 @@ VL_FLAGS += -j $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
@@ -72,20 +83,12 @@ ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = rtlsim
|
||||
|
||||
all: $(PROJECT)
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$@
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS) -DSTARTUP_ADDR=0x80000000' -LDFLAGS '$(LDFLAGS)' -o ../$@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '-shared $(LDFLAGS)' -o ../$@
|
||||
|
||||
@@ -1,11 +1,24 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <unistd.h>
|
||||
#include <unistd.h>
|
||||
#include <util.h>
|
||||
#include <mem.h>
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include "processor.h"
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
@@ -13,11 +26,11 @@
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Usage: [-r] [-h: help] programs.." << std::endl;
|
||||
std::cout << "Usage: [-r: riscv-test] [-h: help] <program>" << std::endl;
|
||||
}
|
||||
|
||||
bool riscv_test = false;
|
||||
std::vector<const char*> programs;
|
||||
const char* program = nullptr;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
@@ -35,56 +48,68 @@ static void parse_args(int argc, char **argv) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = optind; i < argc; ++i) {
|
||||
programs.push_back(argv[i]);
|
||||
if (optind < argc) {
|
||||
program = argv[optind];
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
} else {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
int exitcode = 0;
|
||||
bool failed = false;
|
||||
|
||||
parse_args(argc, argv);
|
||||
parse_args(argc, argv);
|
||||
|
||||
// create memory module
|
||||
vortex::RAM ram(RAM_PAGE_SIZE);
|
||||
|
||||
// create processor
|
||||
vortex::Processor processor;
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
for (auto program : programs) {
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
// setup base DCRs
|
||||
const uint64_t startup_addr(STARTUP_ADDR);
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
|
||||
#if (XLEN == 64)
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32);
|
||||
#endif
|
||||
processor.write_dcr(VX_DCR_BASE_MPM_CLASS, 0);
|
||||
|
||||
// load program
|
||||
{
|
||||
std::string program_ext(fileExtension(program));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(program, STARTUP_ADDR);
|
||||
ram.loadBinImage(program, startup_addr);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(program);
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
exitcode = processor.run();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed" << std::endl;
|
||||
} else {
|
||||
std::cout << "Failed: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed)
|
||||
break;
|
||||
}
|
||||
|
||||
return failed ? exitcode : 0;
|
||||
// run simulation
|
||||
exitcode = processor.run();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed" << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed" << std::endl;
|
||||
exitcode = 1;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "processor.h"
|
||||
|
||||
#include <verilated.h>
|
||||
@@ -56,6 +69,14 @@
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
#if (XLEN == 32)
|
||||
typedef uint32_t Word;
|
||||
#elif (XLEN == 64)
|
||||
typedef uint64_t Word;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
#define VL_WDATA_GETW(lwp, i, n, w) \
|
||||
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
|
||||
|
||||
@@ -71,7 +92,7 @@ double sc_time_stamp() {
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
@@ -126,6 +147,9 @@ public:
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
@@ -165,27 +189,46 @@ public:
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
|
||||
// reset device
|
||||
this->reset();
|
||||
// start execution
|
||||
running_ = true;
|
||||
device_->reset = 0;
|
||||
|
||||
// execute program
|
||||
// wait on device to go busy
|
||||
while (!device_->busy) {
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// wait on device to go idle
|
||||
while (device_->busy) {
|
||||
if (get_ebreak()) {
|
||||
exitcode = get_last_wb_value(3);
|
||||
exitcode = (int)get_last_wb_value(3);
|
||||
break;
|
||||
}
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// reset device
|
||||
this->reset();
|
||||
|
||||
// wait 5 cycles to flush the pipeline
|
||||
this->wait(5);
|
||||
this->cout_flush();
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value) {
|
||||
device_->dcr_wr_valid = 1;
|
||||
device_->dcr_wr_addr = addr;
|
||||
device_->dcr_wr_data = value;
|
||||
while (device_->dcr_wr_valid) {
|
||||
this->tick();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void reset() {
|
||||
void reset() {
|
||||
running_ = false;
|
||||
|
||||
print_bufs_.clear();
|
||||
|
||||
pending_mem_reqs_.clear();
|
||||
@@ -199,6 +242,8 @@ private:
|
||||
this->reset_avs_bus();
|
||||
#endif
|
||||
|
||||
this->reset_dcr_bus();
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
@@ -206,14 +251,7 @@ private:
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
|
||||
this->cout_flush();
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
@@ -226,6 +264,7 @@ private:
|
||||
#else
|
||||
this->eval_avs_bus(0);
|
||||
#endif
|
||||
this->eval_dcr_bus(0);
|
||||
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
@@ -235,6 +274,7 @@ private:
|
||||
#else
|
||||
this->eval_avs_bus(1);
|
||||
#endif
|
||||
this->eval_dcr_bus(1);
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
@@ -260,6 +300,8 @@ private:
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
} else {
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
@@ -268,30 +310,30 @@ private:
|
||||
#ifdef AXI_BUS
|
||||
|
||||
void reset_axi_bus() {
|
||||
device_->m_axi_wready = 0;
|
||||
device_->m_axi_awready = 0;
|
||||
device_->m_axi_arready = 0;
|
||||
device_->m_axi_rvalid = 0;
|
||||
device_->m_axi_bvalid = 0;
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
|
||||
void eval_axi_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = device_->m_axi_rready;
|
||||
mem_wr_rsp_ready_ = device_->m_axi_bready;
|
||||
mem_rd_rsp_ready_ = device_->m_axi_rready[0];
|
||||
mem_wr_rsp_ready_ = device_->m_axi_bready[0];
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
device_->m_axi_wready = 0;
|
||||
device_->m_axi_awready = 0;
|
||||
device_->m_axi_arready = 0;
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// process memory responses
|
||||
if (mem_rd_rsp_active_
|
||||
&& device_->m_axi_rvalid && mem_rd_rsp_ready_) {
|
||||
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
@@ -299,30 +341,30 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& !(*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
device_->m_axi_rvalid = 1;
|
||||
device_->m_axi_rid = mem_req->tag;
|
||||
device_->m_axi_rresp = 0;
|
||||
device_->m_axi_rlast = 1;
|
||||
memcpy((uint8_t*)device_->m_axi_rdata, mem_req->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->m_axi_rvalid[0] = 1;
|
||||
device_->m_axi_rid[0] = mem_rsp->tag;
|
||||
device_->m_axi_rresp[0] = 0;
|
||||
device_->m_axi_rlast[0] = 1;
|
||||
memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_rvalid = 0;
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory write response
|
||||
if (mem_wr_rsp_active_
|
||||
&& device_->m_axi_bvalid && mem_wr_rsp_ready_) {
|
||||
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
@@ -330,34 +372,34 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& (*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
*/
|
||||
device_->m_axi_bvalid = 1;
|
||||
device_->m_axi_bid = mem_req->tag;
|
||||
device_->m_axi_bresp = 0;
|
||||
device_->m_axi_bvalid[0] = 1;
|
||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||
device_->m_axi_bresp[0] = 0;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_wr_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_bvalid = 0;
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_addr = device_->m_axi_wvalid ? device_->m_axi_awaddr : device_->m_axi_araddr;
|
||||
uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0];
|
||||
|
||||
// process memory requests
|
||||
if (device_->m_axi_wvalid || device_->m_axi_arvalid) {
|
||||
if (device_->m_axi_wvalid) {
|
||||
uint64_t byteen = device_->m_axi_wstrb;
|
||||
unsigned base_addr = device_->m_axi_awaddr;
|
||||
uint8_t* data = (uint8_t*)(device_->m_axi_wdata);
|
||||
if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) {
|
||||
if (device_->m_axi_wvalid[0]) {
|
||||
uint64_t byteen = device_->m_axi_wstrb[0];
|
||||
uint64_t base_addr = device_->m_axi_awaddr[0];
|
||||
uint8_t* data = (uint8_t*)device_->m_axi_wdata[0].data();
|
||||
|
||||
// check console output
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
|
||||
if (base_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
@@ -384,15 +426,15 @@ private:
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_awid;
|
||||
mem_req->addr = device_->m_axi_awaddr;
|
||||
mem_req->tag = device_->m_axi_awid[0];
|
||||
mem_req->addr = device_->m_axi_awaddr[0];
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_awaddr,
|
||||
device_->m_axi_awaddr[0],
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
@@ -401,18 +443,18 @@ private:
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_arid;
|
||||
mem_req->addr = device_->m_axi_araddr;
|
||||
ram_->read(mem_req->block.data(), device_->m_axi_araddr, MEM_BLOCK_SIZE);
|
||||
mem_req->tag = device_->m_axi_arid[0];
|
||||
mem_req->addr = device_->m_axi_araddr[0];
|
||||
ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_araddr,
|
||||
device_->m_axi_araddr[0],
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
@@ -421,9 +463,9 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
device_->m_axi_wready = 1;
|
||||
device_->m_axi_awready = 1;
|
||||
device_->m_axi_arready = 1;
|
||||
device_->m_axi_wready[0] = running_;
|
||||
device_->m_axi_awready[0] = running_;
|
||||
device_->m_axi_arready[0] = running_;
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -454,35 +496,35 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready) {
|
||||
device_->mem_rsp_valid = 1;
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, tag=%0lx, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(device_->mem_rsp_data.data(), mem_req->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_req->tag;
|
||||
memcpy(device_->mem_rsp_data.data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_rsp->tag;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->mem_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid) {
|
||||
uint32_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_valid && running_) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw) {
|
||||
// process writes
|
||||
uint64_t byteen = device_->mem_req_byteen;
|
||||
uint8_t* data = (uint8_t*)device_->mem_req_data.data();
|
||||
uint8_t* data = (uint8_t*)(device_->mem_req_data.data());
|
||||
|
||||
// check console output
|
||||
if (byte_addr >= IO_COUT_ADDR
|
||||
&& byte_addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
@@ -496,7 +538,7 @@ private:
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, byte_addr, byteen);
|
||||
printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
@@ -515,7 +557,7 @@ private:
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
@@ -526,11 +568,13 @@ private:
|
||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
@@ -539,11 +583,24 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
device_->mem_req_ready = 1;
|
||||
device_->mem_req_ready = running_;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void reset_dcr_bus() {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
|
||||
void eval_dcr_bus(bool clk) {
|
||||
if (!clk) {
|
||||
return;
|
||||
}
|
||||
if (device_->dcr_wr_valid) {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->tick();
|
||||
@@ -552,17 +609,17 @@ private:
|
||||
|
||||
bool get_ebreak() const {
|
||||
#ifdef AXI_BUS
|
||||
return (bool)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
return (bool)device_->Vortex_axi->vortex->sim_ebreak;
|
||||
#else
|
||||
return (bool)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
return (bool)device_->Vortex->sim_ebreak;
|
||||
#endif
|
||||
}
|
||||
|
||||
int get_last_wb_value(int reg) const {
|
||||
uint64_t get_last_wb_value(int reg) const {
|
||||
#ifdef AXI_BUS
|
||||
return (int)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
return ((Word*)device_->Vortex_axi->vortex->sim_wb_value.data())[reg];
|
||||
#else
|
||||
return (int)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
return ((Word*)device_->Vortex->sim_wb_value.data())[reg];
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -600,6 +657,8 @@ private:
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
|
||||
bool running_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -618,4 +677,8 @@ void Processor::attach_ram(RAM* mem) {
|
||||
|
||||
int Processor::run() {
|
||||
return impl_->run();
|
||||
}
|
||||
|
||||
void Processor::write_dcr(uint32_t addr, uint32_t value) {
|
||||
return impl_->write_dcr(addr, value);
|
||||
}
|
||||
@@ -1,5 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
@@ -14,6 +29,8 @@ public:
|
||||
|
||||
int run();
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
|
||||
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
|
||||
lint_off -file "*/fpnew/src/*"
|
||||
|
||||
@@ -1,45 +1,36 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../hw/rtl
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../common -I../../hw
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
#$(info OBJS is $(OBJS))
|
||||
#$(info VPATH is $(VPATH))
|
||||
SRCS += processor.cpp cluster.cpp core.cpp warp.cpp decode.cpp execute.cpp exe_unit.cpp cache_sim.cpp mem_sim.cpp shared_mem.cpp dcrs.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
||||
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# XLEN parameterization
|
||||
ifdef XLEN
|
||||
CXXFLAGS += -DXLEN=$(XLEN)
|
||||
endif
|
||||
|
||||
PROJECT = simx
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
$(CXX) $(CXXFLAGS) -DSTARTUP_ADDR=0x80000000 $^ $(LDFLAGS) -o $@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
|
||||
@@ -48,4 +39,4 @@ $(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
rm -rf $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
|
||||
87
sim/simx/arch.h
Normal file
87
sim/simx/arch.h
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Arch {
|
||||
private:
|
||||
uint16_t num_threads_;
|
||||
uint16_t num_warps_;
|
||||
uint16_t num_cores_;
|
||||
uint16_t num_clusters_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_regs_;
|
||||
uint16_t num_csrs_;
|
||||
uint16_t num_barriers_;
|
||||
uint16_t ipdom_size_;
|
||||
|
||||
public:
|
||||
Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint16_t num_clusters)
|
||||
: num_threads_(num_threads)
|
||||
, num_warps_(num_warps)
|
||||
, num_cores_(num_cores)
|
||||
, num_clusters_(num_clusters)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
, ipdom_size_((num_threads-1) * 2)
|
||||
{}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
uint16_t num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
uint16_t num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
uint16_t num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
||||
uint16_t ipdom_size() const {
|
||||
return ipdom_size_;
|
||||
}
|
||||
|
||||
uint16_t num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
uint16_t num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
uint16_t num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
uint16_t num_clusters() const {
|
||||
return num_clusters_;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
private:
|
||||
uint16_t num_cores_;
|
||||
uint16_t num_warps_;
|
||||
uint16_t num_threads_;
|
||||
uint16_t wsize_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_regs_;
|
||||
uint16_t num_csrs_;
|
||||
uint16_t num_barriers_;
|
||||
|
||||
public:
|
||||
ArchDef(uint16_t num_cores,
|
||||
uint16_t num_warps,
|
||||
uint16_t num_threads)
|
||||
: num_cores_(num_cores)
|
||||
, num_warps_(num_warps)
|
||||
, num_threads_(num_threads)
|
||||
, wsize_(4)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
{}
|
||||
|
||||
uint16_t wsize() const {
|
||||
return wsize_;
|
||||
}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
uint16_t num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
uint16_t num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
uint16_t num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
||||
uint16_t num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
uint16_t num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
uint16_t num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
using std::string;
|
||||
|
||||
std::string CommandLineArg::helpString_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::longArgs_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::shortArgs_;
|
||||
|
||||
CommandLineArg::CommandLineArg(string s, string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
shortArgs_[s] = this;
|
||||
}
|
||||
|
||||
CommandLineArg::CommandLineArg(string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
}
|
||||
|
||||
void CommandLineArg::readArgs(int argc, char **argv) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
std::unordered_map<string, CommandLineArg *>::iterator
|
||||
s = shortArgs_.find(std::string(argv[i])),
|
||||
l = longArgs_.find(std::string(argv[i]));
|
||||
|
||||
if (s != shortArgs_.end()) {
|
||||
i += s->second->read(argc - i, &argv[i]);
|
||||
} else if (l != longArgs_.end()) {
|
||||
i += l->second->read(argc - i, &argv[i]);
|
||||
} else {
|
||||
throw BadArg(string(argv[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandLineArg::clearArgs() {
|
||||
shortArgs_.clear();
|
||||
longArgs_.clear();
|
||||
helpString_ = "";
|
||||
}
|
||||
|
||||
void CommandLineArg::showHelp(std::ostream &os) {
|
||||
os << helpString_;
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; };
|
||||
|
||||
class CommandLineArg {
|
||||
public:
|
||||
CommandLineArg(std::string s, std::string l, const char *helpText);
|
||||
CommandLineArg(std::string l, const char *helpText);
|
||||
virtual int read(int argc, char** argv) = 0;
|
||||
|
||||
static void readArgs(int argc, char **argv);
|
||||
static void clearArgs();
|
||||
static void showHelp(std::ostream &os);
|
||||
|
||||
private:
|
||||
static std::string helpString_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> longArgs_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> shortArgs_;
|
||||
};
|
||||
|
||||
template <typename T> class CommandLineArgSetter : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) {}
|
||||
|
||||
CommandLineArgSetter(std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(l, ht), arg_(x) {}
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused (argc);
|
||||
std::istringstream iss(argv[1]);
|
||||
iss >> arg_;
|
||||
return 1;
|
||||
}
|
||||
private:
|
||||
T &arg_;
|
||||
};
|
||||
|
||||
class CommandLineArgFlag : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
CommandLineArgFlag(std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused (argc, argv);
|
||||
arg_ = true;
|
||||
return 0;
|
||||
}
|
||||
private:
|
||||
bool &arg_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,637 +0,0 @@
|
||||
#include "cache.h"
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include <util.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct params_t {
|
||||
uint32_t sets_per_bank;
|
||||
uint32_t blocks_per_set;
|
||||
uint32_t words_per_block;
|
||||
uint32_t log2_num_inputs;
|
||||
|
||||
uint32_t word_select_addr_start;
|
||||
uint32_t word_select_addr_end;
|
||||
|
||||
uint32_t bank_select_addr_start;
|
||||
uint32_t bank_select_addr_end;
|
||||
|
||||
uint32_t set_select_addr_start;
|
||||
uint32_t set_select_addr_end;
|
||||
|
||||
uint32_t tag_select_addr_start;
|
||||
uint32_t tag_select_addr_end;
|
||||
|
||||
params_t(const Cache::Config& config) {
|
||||
uint32_t bank_bits = log2ceil(config.num_banks);
|
||||
uint32_t offset_bits = config.B - config.W;
|
||||
uint32_t log2_bank_size = config.C - bank_bits;
|
||||
uint32_t index_bits = log2_bank_size - (config.B << config.A);
|
||||
assert(log2_bank_size >= config.B);
|
||||
|
||||
this->log2_num_inputs = log2ceil(config.num_inputs);
|
||||
|
||||
this->words_per_block = 1 << offset_bits;
|
||||
this->blocks_per_set = 1 << config.A;
|
||||
this->sets_per_bank = 1 << index_bits;
|
||||
|
||||
assert(config.ports_per_bank <= this->words_per_block);
|
||||
|
||||
// Word select
|
||||
this->word_select_addr_start = config.W;
|
||||
this->word_select_addr_end = (this->word_select_addr_start+offset_bits-1);
|
||||
|
||||
// Bank select
|
||||
this->bank_select_addr_start = (1+this->word_select_addr_end);
|
||||
this->bank_select_addr_end = (this->bank_select_addr_start+bank_bits-1);
|
||||
|
||||
// Set select
|
||||
this->set_select_addr_start = (1+this->bank_select_addr_end);
|
||||
this->set_select_addr_end = (this->set_select_addr_start+index_bits-1);
|
||||
|
||||
// Tag select
|
||||
this->tag_select_addr_start = (1+this->set_select_addr_end);
|
||||
this->tag_select_addr_end = (config.addr_width-1);
|
||||
}
|
||||
|
||||
uint32_t addr_bank_id(uint64_t word_addr) const {
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, bank_select_addr_start, bank_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t addr_set_id(uint64_t word_addr) const {
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, set_select_addr_start, set_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t addr_tag(uint64_t word_addr) const {
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
return bit_getw(word_addr, tag_select_addr_start, tag_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mem_addr(uint32_t bank_id, uint32_t set_id, uint64_t tag) const {
|
||||
uint64_t addr(0);
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
addr = bit_setw(addr, bank_select_addr_start, bank_select_addr_end, bank_id);
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
addr = bit_setw(addr, set_select_addr_start, set_select_addr_end, set_id);
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
addr = bit_setw(addr, tag_select_addr_start, tag_select_addr_end, tag);
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct block_t {
|
||||
bool valid;
|
||||
bool dirty;
|
||||
uint64_t tag;
|
||||
uint32_t lru_ctr;
|
||||
};
|
||||
|
||||
struct set_t {
|
||||
std::vector<block_t> blocks;
|
||||
set_t(uint32_t size) : blocks(size) {}
|
||||
|
||||
void clear() {
|
||||
for (auto& block : blocks) {
|
||||
block.valid = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_info_t {
|
||||
bool valid;
|
||||
uint32_t req_id;
|
||||
uint64_t req_tag;
|
||||
};
|
||||
|
||||
struct bank_req_t {
|
||||
bool valid;
|
||||
bool write;
|
||||
bool mshr_replay;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
std::vector<bank_req_info_t> infos;
|
||||
|
||||
bank_req_t(uint32_t size)
|
||||
: valid(false)
|
||||
, write(false)
|
||||
, mshr_replay(false)
|
||||
, tag(0)
|
||||
, set_id(0)
|
||||
, core_id(0)
|
||||
, uuid(0)
|
||||
, infos(size)
|
||||
{}
|
||||
};
|
||||
|
||||
struct mshr_entry_t : public bank_req_t {
|
||||
uint32_t block_id;
|
||||
|
||||
mshr_entry_t(uint32_t size = 0)
|
||||
: bank_req_t(size)
|
||||
, block_id(0)
|
||||
{}
|
||||
};
|
||||
|
||||
class MSHR {
|
||||
private:
|
||||
std::vector<mshr_entry_t> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
MSHR(uint32_t size)
|
||||
: entries_(size)
|
||||
, size_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == size_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (size_ == entries_.size());
|
||||
}
|
||||
|
||||
int lookup(const bank_req_t& bank_req) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (entry.valid
|
||||
&& entry.set_id == bank_req.set_id
|
||||
&& entry.tag == bank_req.tag) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int allocate(const bank_req_t& bank_req, uint32_t block_id) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (!entry.valid) {
|
||||
*(bank_req_t*)&entry = bank_req;
|
||||
entry.valid = true;
|
||||
entry.mshr_replay = false;
|
||||
entry.block_id = block_id;
|
||||
++size_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mshr_entry_t& replay(uint32_t id) {
|
||||
auto& root_entry = entries_.at(id);
|
||||
assert(root_entry.valid);
|
||||
// make all related mshr entries for replay
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid
|
||||
&& entry.set_id == root_entry.set_id
|
||||
&& entry.tag == root_entry.tag) {
|
||||
entry.mshr_replay = true;
|
||||
}
|
||||
}
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
bool pop(bank_req_t* out) {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid && entry.mshr_replay) {
|
||||
*out = entry;
|
||||
entry.valid = false;
|
||||
--size_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid && entry.mshr_replay) {
|
||||
entry.valid = false;
|
||||
}
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_t {
|
||||
std::vector<set_t> sets;
|
||||
MSHR mshr;
|
||||
|
||||
bank_t(const Cache::Config& config,
|
||||
const params_t& params)
|
||||
: sets(params.sets_per_bank, params.blocks_per_set)
|
||||
, mshr(config.mshr_size)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
mshr.clear();
|
||||
for (auto& set : sets) {
|
||||
set.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Cache::Impl {
|
||||
private:
|
||||
Cache* const simobject_;
|
||||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
Switch<MemReq, MemRsp>::Ptr mem_switch_;
|
||||
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
uint32_t flush_cycles_;
|
||||
PerfStats perf_stats_;
|
||||
uint64_t pending_read_reqs_;
|
||||
uint64_t pending_write_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
|
||||
public:
|
||||
Impl(Cache* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_(config.num_banks, {config, params_})
|
||||
, mem_req_ports_(config.num_banks, simobject)
|
||||
, mem_rsp_ports_(config.num_banks, simobject)
|
||||
{
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create("bypass_arb", ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspIn);
|
||||
|
||||
if (config.num_banks > 1) {
|
||||
mem_switch_ = Switch<MemReq, MemRsp>::Create("mem_arb", ArbiterType::RoundRobin, config.num_banks);
|
||||
for (uint32_t i = 0, n = config.num_banks; i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&mem_switch_->ReqIn.at(i));
|
||||
mem_switch_->RspOut.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
mem_switch_->ReqOut.bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspOut.at(0).bind(&mem_switch_->RspIn);
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspOut.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
|
||||
// calculate tag flush cycles
|
||||
flush_cycles_ = params_.sets_per_bank * params_.blocks_per_set;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& bank : banks_) {
|
||||
bank.clear();
|
||||
}
|
||||
perf_stats_ = PerfStats();
|
||||
pending_read_reqs_ = 0;
|
||||
pending_write_reqs_ = 0;
|
||||
pending_fill_reqs_ = 0;
|
||||
}
|
||||
|
||||
void tick() {
|
||||
// wait on flush cycles
|
||||
if (flush_cycles_ != 0) {
|
||||
--flush_cycles_;
|
||||
return;
|
||||
}
|
||||
|
||||
// per-bank pipeline request
|
||||
std::vector<bank_req_t> pipeline_reqs(config_.num_banks, config_.ports_per_bank);
|
||||
|
||||
// calculate memory latency
|
||||
perf_stats_.mem_latency += pending_fill_reqs_;
|
||||
|
||||
// handle bypasss responses
|
||||
auto& bypass_port = bypass_switch_->RspOut.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.core_id, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
bypass_port.pop();
|
||||
}
|
||||
|
||||
// handle MSHR replay
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
bank.mshr.pop(&pipeline_req);
|
||||
}
|
||||
|
||||
// handle memory fills
|
||||
std::vector<bool> pending_fill_req(config_.num_banks, false);
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
|
||||
if (!mem_rsp_port.empty()) {
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
this->processMemoryFill(bank_id, mem_rsp.tag);
|
||||
pending_fill_req.at(bank_id) = true;
|
||||
mem_rsp_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// handle incoming core requests
|
||||
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
|
||||
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
// check cache bypassing
|
||||
if (core_req.non_cacheable) {
|
||||
// send IO request
|
||||
this->processIORequest(core_req, req_id);
|
||||
|
||||
// remove request
|
||||
core_req_port.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bank_id = params_.addr_bank_id(core_req.addr);
|
||||
auto set_id = params_.addr_set_id(core_req.addr);
|
||||
auto tag = params_.addr_tag(core_req.addr);
|
||||
auto port_id = req_id % config_.ports_per_bank;
|
||||
|
||||
// create bank request
|
||||
bank_req_t bank_req(config_.ports_per_bank);
|
||||
bank_req.valid = true;
|
||||
bank_req.write = core_req.write;
|
||||
bank_req.mshr_replay = false;
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.core_id = core_req.core_id;
|
||||
bank_req.uuid = core_req.uuid;
|
||||
bank_req.infos.at(port_id) = {true, req_id, core_req.tag};
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
|
||||
// check pending MSHR replay
|
||||
if (pipeline_req.valid
|
||||
&& pipeline_req.mshr_replay) {
|
||||
// stall
|
||||
continue;
|
||||
}
|
||||
|
||||
// check pending fill request
|
||||
if (pending_fill_req.at(bank_id)) {
|
||||
// stall
|
||||
continue;
|
||||
}
|
||||
|
||||
// check MSHR capacity if read or writeback
|
||||
if ((!core_req.write || !config_.write_through)
|
||||
&& bank.mshr.full()) {
|
||||
++perf_stats_.mshr_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check bank conflicts
|
||||
if (pipeline_req.valid) {
|
||||
// check port conflict
|
||||
if (pipeline_req.write != core_req.write
|
||||
|| pipeline_req.set_id != set_id
|
||||
|| pipeline_req.tag != tag
|
||||
|| pipeline_req.infos[port_id].valid) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
// update pending request infos
|
||||
pipeline_req.infos[port_id] = bank_req.infos[port_id];
|
||||
} else {
|
||||
// schedule new request
|
||||
pipeline_req = bank_req;
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
++perf_stats_.writes;
|
||||
else
|
||||
++perf_stats_.reads;
|
||||
|
||||
// remove request
|
||||
auto time = core_req_port.pop();
|
||||
perf_stats_.pipeline_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
// process active request
|
||||
this->processBankRequest(pipeline_reqs);
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void processIORequest(const MemReq& core_req, uint32_t req_id) {
|
||||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.core_id, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
void processMemoryFill(uint32_t bank_id, uint32_t mshr_id) {
|
||||
// update block
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& entry = bank.mshr.replay(mshr_id);
|
||||
auto& set = bank.sets.at(entry.set_id);
|
||||
auto& block = set.blocks.at(entry.block_id);
|
||||
block.valid = true;
|
||||
block.tag = entry.tag;
|
||||
--pending_fill_reqs_;
|
||||
}
|
||||
|
||||
void processBankRequest(const std::vector<bank_req_t>& pipeline_reqs) {
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
if (!pipeline_req.valid)
|
||||
continue;
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& set = bank.sets.at(pipeline_req.set_id);
|
||||
|
||||
if (pipeline_req.mshr_replay) {
|
||||
// send core response
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
} else {
|
||||
bool hit = false;
|
||||
bool found_free_block = false;
|
||||
uint32_t hit_block_id = 0;
|
||||
uint32_t repl_block_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
for (uint32_t i = 0, n = set.blocks.size(); i < n; ++i) {
|
||||
auto& block = set.blocks.at(i);
|
||||
if (block.valid) {
|
||||
if (block.tag == pipeline_req.tag) {
|
||||
block.lru_ctr = 0;
|
||||
hit_block_id = i;
|
||||
hit = true;
|
||||
} else {
|
||||
++block.lru_ctr;
|
||||
}
|
||||
if (max_cnt < block.lru_ctr) {
|
||||
max_cnt = block.lru_ctr;
|
||||
repl_block_id = i;
|
||||
}
|
||||
} else {
|
||||
found_free_block = true;
|
||||
repl_block_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (hit) {
|
||||
//
|
||||
// Hit handling
|
||||
//
|
||||
if (pipeline_req.write) {
|
||||
// handle write hit
|
||||
auto& hit_block = set.blocks.at(hit_block_id);
|
||||
if (config_.write_through) {
|
||||
// forward write request to memory
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
} else {
|
||||
// mark block as dirty
|
||||
hit_block.dirty = true;
|
||||
}
|
||||
}
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// Miss handling
|
||||
//
|
||||
if (pipeline_req.write)
|
||||
++perf_stats_.write_misses;
|
||||
else
|
||||
++perf_stats_.read_misses;
|
||||
|
||||
if (!found_free_block && !config_.write_through) {
|
||||
// write back dirty block
|
||||
auto& repl_block = set.blocks.at(repl_block_id);
|
||||
if (repl_block.dirty) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_req.write && config_.write_through) {
|
||||
// forward write request to memory
|
||||
{
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// MSHR lookup
|
||||
int pending = bank.mshr.lookup(pipeline_req);
|
||||
|
||||
// allocate MSHR
|
||||
int mshr_id = bank.mshr.allocate(pipeline_req, repl_block_id);
|
||||
|
||||
// send fill request
|
||||
if (pending == -1) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = false;
|
||||
mem_req.tag = mshr_id;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Cache::Cache(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<Cache>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
Cache::~Cache() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void Cache::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void Cache::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const Cache::PerfStats& Cache::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
106
sim/simx/cache_cluster.h
Normal file
106
sim/simx/cache_cluster.h
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cache_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class CacheCluster : public SimObject<CacheCluster> {
|
||||
public:
|
||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_units,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, caches_(MAX(num_caches, 0x1)) {
|
||||
|
||||
CacheSim::Config config2(config);
|
||||
if (0 == num_caches) {
|
||||
num_caches = 1;
|
||||
config2.bypass = true;
|
||||
}
|
||||
|
||||
char sname[100];
|
||||
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> unit_arbs(num_units);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
||||
unit_arbs.at(u) = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
||||
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> mem_arbs(config.num_inputs);
|
||||
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
||||
mem_arbs.at(i) = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
||||
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
caches_.at(i) = CacheSim::Create(sname, config2);
|
||||
|
||||
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||
}
|
||||
|
||||
~CacheCluster() {}
|
||||
|
||||
void reset() {}
|
||||
|
||||
void tick() {}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<CacheSim::Ptr> caches_;
|
||||
};
|
||||
|
||||
}
|
||||
707
sim/simx/cache_sim.cpp
Normal file
707
sim/simx/cache_sim.cpp
Normal file
@@ -0,0 +1,707 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache_sim.h"
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include <util.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct params_t {
|
||||
uint32_t sets_per_bank;
|
||||
uint32_t lines_per_set;
|
||||
uint32_t words_per_line;
|
||||
uint32_t log2_num_inputs;
|
||||
|
||||
uint32_t word_select_addr_start;
|
||||
uint32_t word_select_addr_end;
|
||||
|
||||
uint32_t bank_select_addr_start;
|
||||
uint32_t bank_select_addr_end;
|
||||
|
||||
uint32_t set_select_addr_start;
|
||||
uint32_t set_select_addr_end;
|
||||
|
||||
uint32_t tag_select_addr_start;
|
||||
uint32_t tag_select_addr_end;
|
||||
|
||||
params_t(const CacheSim::Config& config) {
|
||||
int32_t bank_bits = log2ceil(config.num_banks);
|
||||
int32_t offset_bits = config.B - config.W;
|
||||
int32_t log2_bank_size = config.C - bank_bits;
|
||||
int32_t index_bits = log2_bank_size - (config.B + config.A);
|
||||
assert(log2_bank_size > 0);
|
||||
assert(offset_bits >= 0);
|
||||
assert(index_bits >= 0);
|
||||
|
||||
this->log2_num_inputs = log2ceil(config.num_inputs);
|
||||
|
||||
this->words_per_line = 1 << offset_bits;
|
||||
this->lines_per_set = 1 << config.A;
|
||||
this->sets_per_bank = 1 << index_bits;
|
||||
|
||||
assert(config.ports_per_bank <= this->words_per_line);
|
||||
|
||||
// Word select
|
||||
this->word_select_addr_start = config.W;
|
||||
this->word_select_addr_end = (this->word_select_addr_start+offset_bits-1);
|
||||
|
||||
// Bank select
|
||||
this->bank_select_addr_start = (1+this->word_select_addr_end);
|
||||
this->bank_select_addr_end = (this->bank_select_addr_start+bank_bits-1);
|
||||
|
||||
// Set select
|
||||
this->set_select_addr_start = (1+this->bank_select_addr_end);
|
||||
this->set_select_addr_end = (this->set_select_addr_start+index_bits-1);
|
||||
|
||||
// Tag select
|
||||
this->tag_select_addr_start = (1+this->set_select_addr_end);
|
||||
this->tag_select_addr_end = (config.addr_width-1);
|
||||
}
|
||||
|
||||
uint32_t addr_bank_id(uint64_t word_addr) const {
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, bank_select_addr_start, bank_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t addr_set_id(uint64_t word_addr) const {
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, set_select_addr_start, set_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t addr_tag(uint64_t word_addr) const {
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
return bit_getw(word_addr, tag_select_addr_start, tag_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mem_addr(uint32_t bank_id, uint32_t set_id, uint64_t tag) const {
|
||||
uint64_t addr(0);
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
addr = bit_setw(addr, bank_select_addr_start, bank_select_addr_end, bank_id);
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
addr = bit_setw(addr, set_select_addr_start, set_select_addr_end, set_id);
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
addr = bit_setw(addr, tag_select_addr_start, tag_select_addr_end, tag);
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct line_t {
|
||||
uint64_t tag;
|
||||
uint32_t lru_ctr;
|
||||
bool valid;
|
||||
bool dirty;
|
||||
|
||||
void clear() {
|
||||
valid = false;
|
||||
dirty = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct set_t {
|
||||
std::vector<line_t> lines;
|
||||
|
||||
set_t(uint32_t num_ways)
|
||||
: lines(num_ways)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& line : lines) {
|
||||
line.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_port_t {
|
||||
uint32_t req_id;
|
||||
uint64_t req_tag;
|
||||
bool valid;
|
||||
|
||||
void clear() {
|
||||
valid = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_t {
|
||||
|
||||
enum ReqType {
|
||||
None = 0,
|
||||
Fill = 1,
|
||||
Replay = 2,
|
||||
Core = 3
|
||||
};
|
||||
|
||||
std::vector<bank_req_port_t> ports;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
ReqType type;
|
||||
bool write;
|
||||
|
||||
bank_req_t(uint32_t num_ports)
|
||||
: ports(num_ports)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& port : ports) {
|
||||
port.clear();
|
||||
}
|
||||
type = ReqType::None;
|
||||
}
|
||||
};
|
||||
|
||||
struct mshr_entry_t {
|
||||
bank_req_t bank_req;
|
||||
uint32_t line_id;
|
||||
|
||||
mshr_entry_t(uint32_t num_ports)
|
||||
: bank_req(num_ports)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
bank_req.clear();
|
||||
}
|
||||
};
|
||||
|
||||
class MSHR {
|
||||
private:
|
||||
std::vector<mshr_entry_t> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
MSHR(uint32_t size, uint32_t num_ports)
|
||||
: entries_(size, num_ports)
|
||||
, size_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == size_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (size_ == entries_.size());
|
||||
}
|
||||
|
||||
bool lookup(const bank_req_t& bank_req) {
|
||||
for (auto& entry : entries_) {;
|
||||
if (entry.bank_req.type != bank_req_t::None
|
||||
&& entry.bank_req.set_id == bank_req.set_id
|
||||
&& entry.bank_req.tag == bank_req.tag) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int allocate(const bank_req_t& bank_req, uint32_t line_id) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (entry.bank_req.type == bank_req_t::None) {
|
||||
entry.bank_req = bank_req;
|
||||
entry.line_id = line_id;
|
||||
++size_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mshr_entry_t& replay(uint32_t id) {
|
||||
auto& root_entry = entries_.at(id);
|
||||
assert(root_entry.bank_req.type == bank_req_t::Core);
|
||||
// mark all related mshr entries for replay
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.bank_req.type == bank_req_t::Core
|
||||
&& entry.bank_req.set_id == root_entry.bank_req.set_id
|
||||
&& entry.bank_req.tag == root_entry.bank_req.tag) {
|
||||
entry.bank_req.type = bank_req_t::Replay;
|
||||
}
|
||||
}
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
bool pop(bank_req_t* out) {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.bank_req.type == bank_req_t::Replay) {
|
||||
*out = entry.bank_req;
|
||||
entry.bank_req.type = bank_req_t::None;
|
||||
--size_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (auto& entry : entries_) {
|
||||
entry.clear();
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_t {
|
||||
std::vector<set_t> sets;
|
||||
MSHR mshr;
|
||||
|
||||
bank_t(const CacheSim::Config& config,
|
||||
const params_t& params)
|
||||
: sets(params.sets_per_bank, params.lines_per_set)
|
||||
, mshr(config.mshr_size, config.ports_per_bank)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& set : sets) {
|
||||
set.clear();
|
||||
}
|
||||
mshr.clear();
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CacheSim::Impl {
|
||||
private:
|
||||
CacheSim* const simobject_;
|
||||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
Switch<MemReq, MemRsp>::Ptr bank_switch_;
|
||||
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
std::vector<bank_req_t> pipeline_reqs_;
|
||||
uint32_t init_cycles_;
|
||||
PerfStats perf_stats_;
|
||||
uint64_t pending_read_reqs_;
|
||||
uint64_t pending_write_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
|
||||
public:
|
||||
Impl(CacheSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_(config.num_banks, {config, params_})
|
||||
, mem_req_ports_(config.num_banks, simobject)
|
||||
, mem_rsp_ports_(config.num_banks, simobject)
|
||||
, pipeline_reqs_(config.num_banks, config.ports_per_bank)
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
|
||||
|
||||
if (config_.bypass) {
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
|
||||
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
}
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
return;
|
||||
}
|
||||
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
|
||||
if (config.num_banks > 1) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, config.num_banks);
|
||||
for (uint32_t i = 0, n = config.num_banks; i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
|
||||
// calculate cache initialization cycles
|
||||
init_cycles_ = params_.sets_per_bank * params_.lines_per_set;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
if (config_.bypass)
|
||||
return;
|
||||
|
||||
for (auto& bank : banks_) {
|
||||
bank.clear();
|
||||
}
|
||||
perf_stats_ = PerfStats();
|
||||
pending_read_reqs_ = 0;
|
||||
pending_write_reqs_ = 0;
|
||||
pending_fill_reqs_ = 0;
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (config_.bypass)
|
||||
return;
|
||||
|
||||
// wait on cache initialization cycles
|
||||
if (init_cycles_ != 0) {
|
||||
--init_cycles_;
|
||||
return;
|
||||
}
|
||||
|
||||
// handle cache bypasss responses
|
||||
{
|
||||
auto& bypass_port = bypass_switch_->RspIn.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
this->processBypassResponse(mem_rsp);
|
||||
bypass_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// initialize pipeline request
|
||||
for (auto& pipeline_req : pipeline_reqs_) {
|
||||
pipeline_req.clear();
|
||||
}
|
||||
|
||||
// schedule MSHR replay
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
bank.mshr.pop(&pipeline_req);
|
||||
}
|
||||
|
||||
// schedule memory fill
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
|
||||
if (mem_rsp_port.empty())
|
||||
continue;
|
||||
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
if (pipeline_req.type != bank_req_t::None)
|
||||
continue;
|
||||
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
DT(3, simobject_->name() << "-dram-" << mem_rsp);
|
||||
pipeline_req.type = bank_req_t::Fill;
|
||||
pipeline_req.tag = mem_rsp.tag;
|
||||
mem_rsp_port.pop();
|
||||
}
|
||||
|
||||
// schedule core requests
|
||||
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
|
||||
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
// check cache bypassing
|
||||
if (core_req.type == AddrType::IO) {
|
||||
// send bypass request
|
||||
this->processBypassRequest(core_req, req_id);
|
||||
// remove request
|
||||
core_req_port.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bank_id = params_.addr_bank_id(core_req.addr);
|
||||
auto set_id = params_.addr_set_id(core_req.addr);
|
||||
auto tag = params_.addr_tag(core_req.addr);
|
||||
auto port_id = req_id % config_.ports_per_bank;
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
|
||||
// check MSHR capacity
|
||||
if ((!core_req.write || !config_.write_through)
|
||||
&& bank.mshr.full()) {
|
||||
++perf_stats_.mshr_stalls;
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check bank conflicts
|
||||
if (pipeline_req.type == bank_req_t::Core) {
|
||||
// check port conflict
|
||||
if (pipeline_req.write != core_req.write
|
||||
|| pipeline_req.set_id != set_id
|
||||
|| pipeline_req.tag != tag
|
||||
|| pipeline_req.ports.at(port_id).valid) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
// extend request ports
|
||||
pipeline_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
|
||||
} else if (pipeline_req.type == bank_req_t::None) {
|
||||
// schedule new request
|
||||
bank_req_t bank_req(config_.ports_per_bank);
|
||||
bank_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.cid = core_req.cid;
|
||||
bank_req.uuid = core_req.uuid;
|
||||
bank_req.type = bank_req_t::Core;
|
||||
bank_req.write = core_req.write;
|
||||
pipeline_req = bank_req;
|
||||
} else {
|
||||
// bank in use
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
++perf_stats_.writes;
|
||||
else
|
||||
++perf_stats_.reads;
|
||||
|
||||
// remove request
|
||||
DT(3, simobject_->name() << "-core-" << core_req);
|
||||
auto time = core_req_port.pop();
|
||||
perf_stats_.pipeline_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
// process active request
|
||||
this->processBankRequests();
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void processBypassResponse(const MemRsp& mem_rsp) {
|
||||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
|
||||
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
|
||||
DT(3, simobject_->name() << "-core-" << core_req);
|
||||
|
||||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
void processBankRequests() {
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
|
||||
switch (pipeline_req.type) {
|
||||
case bank_req_t::None:
|
||||
break;
|
||||
case bank_req_t::Fill: {
|
||||
// update cache line
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& entry = bank.mshr.replay(pipeline_req.tag);
|
||||
auto& set = bank.sets.at(entry.bank_req.set_id);
|
||||
auto& line = set.lines.at(entry.line_id);
|
||||
line.valid = true;
|
||||
line.tag = entry.bank_req.tag;
|
||||
--pending_fill_reqs_;
|
||||
} break;
|
||||
case bank_req_t::Replay: {
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case bank_req_t::Core: {
|
||||
bool hit = false;
|
||||
bool found_free_line = false;
|
||||
uint32_t hit_line_id = 0;
|
||||
uint32_t repl_line_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
auto& set = bank.sets.at(pipeline_req.set_id);
|
||||
|
||||
// tag lookup
|
||||
for (uint32_t i = 0, n = set.lines.size(); i < n; ++i) {
|
||||
auto& line = set.lines.at(i);
|
||||
if (line.valid) {
|
||||
if (line.tag == pipeline_req.tag) {
|
||||
line.lru_ctr = 0;
|
||||
hit_line_id = i;
|
||||
hit = true;
|
||||
} else {
|
||||
++line.lru_ctr;
|
||||
}
|
||||
if (max_cnt < line.lru_ctr) {
|
||||
max_cnt = line.lru_ctr;
|
||||
repl_line_id = i;
|
||||
}
|
||||
} else {
|
||||
found_free_line = true;
|
||||
repl_line_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (hit) {
|
||||
//
|
||||
// Hit handling
|
||||
//
|
||||
if (pipeline_req.write) {
|
||||
// handle write hit
|
||||
auto& hit_line = set.lines.at(hit_line_id);
|
||||
if (config_.write_through) {
|
||||
// forward write request to memory
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_line.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
hit_line.dirty = true;
|
||||
}
|
||||
}
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// Miss handling
|
||||
//
|
||||
if (pipeline_req.write)
|
||||
++perf_stats_.write_misses;
|
||||
else
|
||||
++perf_stats_.read_misses;
|
||||
|
||||
if (!found_free_line && !config_.write_through) {
|
||||
// write back dirty line
|
||||
auto& repl_line = set.lines.at(repl_line_id);
|
||||
if (repl_line.dirty) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_line.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_req.write && config_.write_through) {
|
||||
// forward write request to memory
|
||||
{
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// MSHR lookup
|
||||
auto mshr_pending = bank.mshr.lookup(pipeline_req);
|
||||
|
||||
// allocate MSHR
|
||||
auto mshr_id = bank.mshr.allocate(pipeline_req, repl_line_id);
|
||||
|
||||
// send fill request
|
||||
if (!mshr_pending) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = false;
|
||||
mem_req.tag = mshr_id;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
}
|
||||
}
|
||||
// calculate memory latency
|
||||
perf_stats_.mem_latency += pending_fill_reqs_;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<CacheSim>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
CacheSim::~CacheSim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void CacheSim::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void CacheSim::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const CacheSim::PerfStats& CacheSim::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
@@ -1,13 +1,27 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "memsim.h"
|
||||
#include "mem_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Cache : public SimObject<Cache> {
|
||||
class CacheSim : public SimObject<CacheSim> {
|
||||
public:
|
||||
struct Config {
|
||||
bool bypass; // cache bypass
|
||||
uint8_t C; // log2 cache size
|
||||
uint8_t B; // log2 block size
|
||||
uint8_t W; // log2 word size
|
||||
@@ -45,6 +59,19 @@ public:
|
||||
, mshr_stalls(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->read_misses += rhs.read_misses;
|
||||
this->write_misses += rhs.write_misses;
|
||||
this->evictions += rhs.evictions;
|
||||
this->pipeline_stalls += rhs.pipeline_stalls;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
this->mshr_stalls += rhs.mshr_stalls;
|
||||
this->mem_latency += rhs.mem_latency;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||
@@ -52,8 +79,8 @@ public:
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
Cache(const SimContext& ctx, const char* name, const Config& config);
|
||||
~Cache();
|
||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~CacheSim();
|
||||
|
||||
void reset();
|
||||
|
||||
219
sim/simx/cluster.cpp
Normal file
219
sim/simx/cluster.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cluster.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Cluster::Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch, const
|
||||
DCRS &dcrs)
|
||||
: SimObject(ctx, "cluster")
|
||||
, mem_req_port(this)
|
||||
, mem_rsp_port(this)
|
||||
, cluster_id_(cluster_id)
|
||||
, cores_(arch.num_cores())
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, sharedmems_(arch.num_cores())
|
||||
, processor_(processor)
|
||||
{
|
||||
auto num_cores = arch.num_cores();
|
||||
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
log2ceil(L2_NUM_WAYS), // W
|
||||
0, // A
|
||||
XLEN, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
5, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
l2cache_->MemReqPort.bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-icaches", cluster_id);
|
||||
icaches_ = CacheCluster::Create(sname, num_cores, NUM_ICACHES, 1, CacheSim::Config{
|
||||
!ICACHE_ENABLED,
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // B
|
||||
log2ceil(sizeof(uint32_t)), // W
|
||||
log2ceil(ICACHE_NUM_WAYS),// A
|
||||
XLEN, // address bits
|
||||
1, // number of banks
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
(uint8_t)arch.num_warps(), // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
icaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icaches_->MemRspPort);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-dcaches", cluster_id);
|
||||
dcaches_ = CacheCluster::Create(sname, num_cores, NUM_DCACHES, NUM_LSU_LANES, CacheSim::Config{
|
||||
!DCACHE_ENABLED,
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // B
|
||||
log2ceil(sizeof(Word)), // W
|
||||
log2ceil(DCACHE_NUM_WAYS),// A
|
||||
XLEN, // address bits
|
||||
DCACHE_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
DCACHE_NUM_BANKS, // number of inputs
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
DCACHE_MSHR_SIZE, // mshr
|
||||
4, // pipeline latency
|
||||
});
|
||||
|
||||
dcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(1));
|
||||
l2cache_->CoreRspPorts.at(1).bind(&dcaches_->MemRspPort);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// create shared memory blocks
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
snprintf(sname, 100, "cluster%d-shared_mem%d", cluster_id, i);
|
||||
sharedmems_.at(i) = SharedMem::Create(sname, SharedMem::Config{
|
||||
(1 << SMEM_LOG_SIZE),
|
||||
sizeof(Word),
|
||||
NUM_LSU_LANES,
|
||||
NUM_LSU_LANES,
|
||||
false
|
||||
});
|
||||
}
|
||||
|
||||
// create cores
|
||||
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
uint32_t core_id = cluster_id * num_cores + i;
|
||||
cores_.at(i) = Core::Create(core_id,
|
||||
this,
|
||||
arch,
|
||||
dcrs,
|
||||
sharedmems_.at(i));
|
||||
|
||||
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
|
||||
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
|
||||
|
||||
for (uint32_t j = 0; j < NUM_LSU_LANES; ++j) {
|
||||
snprintf(sname, 100, "cluster%d-smem_demux%d_%d", cluster_id, i, j);
|
||||
auto smem_demux = SMemDemux::Create(sname);
|
||||
|
||||
cores_.at(i)->dcache_req_ports.at(j).bind(&smem_demux->ReqIn);
|
||||
smem_demux->RspIn.bind(&cores_.at(i)->dcache_rsp_ports.at(j));
|
||||
|
||||
smem_demux->ReqDc.bind(&dcaches_->CoreReqPorts.at(i).at(j));
|
||||
dcaches_->CoreRspPorts.at(i).at(j).bind(&smem_demux->RspDc);
|
||||
|
||||
smem_demux->ReqSm.bind(&sharedmems_.at(i)->Inputs.at(j));
|
||||
sharedmems_.at(i)->Outputs.at(j).bind(&smem_demux->RspSm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Cluster::~Cluster() {
|
||||
//--
|
||||
}
|
||||
|
||||
void Cluster::reset() {
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void Cluster::tick() {
|
||||
//--
|
||||
}
|
||||
|
||||
void Cluster::attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
|
||||
bool Cluster::running() const {
|
||||
for (auto& core : cores_) {
|
||||
if (core->running())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Cluster::check_exit(Word* exitcode, bool riscv_test) const {
|
||||
bool done = true;
|
||||
Word exitcode_ = 0;
|
||||
for (auto& core : cores_) {
|
||||
Word ec;
|
||||
if (core->check_exit(&ec, riscv_test)) {
|
||||
exitcode_ |= ec;
|
||||
} else {
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
*exitcode = exitcode_;
|
||||
return done;
|
||||
}
|
||||
|
||||
void Cluster::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
|
||||
uint32_t local_core_id = core_id % cores_.size();
|
||||
barrier.set(local_core_id);
|
||||
|
||||
DP(3, "*** Suspend core #" << core_id << " at barrier #" << bar_id);
|
||||
|
||||
if (barrier.count() == (size_t)count) {
|
||||
// resume all suspended cores
|
||||
for (uint32_t i = 0; i < cores_.size(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
|
||||
cores_.at(i)->resume();
|
||||
}
|
||||
}
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ProcessorImpl* Cluster::processor() const {
|
||||
return processor_;
|
||||
}
|
||||
|
||||
Cluster::PerfStats Cluster::perf_stats() const {
|
||||
Cluster::PerfStats perf;
|
||||
perf.icache = icaches_->perf_stats();
|
||||
perf.dcache = dcaches_->perf_stats();
|
||||
perf.l2cache = l2cache_->perf_stats();
|
||||
|
||||
for (auto sharedmem : sharedmems_) {
|
||||
perf.sharedmem += sharedmem->perf_stats();
|
||||
}
|
||||
|
||||
return perf;
|
||||
}
|
||||
86
sim/simx/cluster.h
Normal file
86
sim/simx/cluster.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "dcrs.h"
|
||||
#include "arch.h"
|
||||
#include "cache_cluster.h"
|
||||
#include "shared_mem.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ProcessorImpl;
|
||||
|
||||
class Cluster : public SimObject<Cluster> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats icache;
|
||||
CacheSim::PerfStats dcache;
|
||||
SharedMem::PerfStats sharedmem;
|
||||
CacheSim::PerfStats l2cache;
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->icache += rhs.icache;
|
||||
this->dcache += rhs.dcache;
|
||||
this->sharedmem += rhs.sharedmem;
|
||||
this->l2cache += rhs.l2cache;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
SimPort<MemReq> mem_req_port;
|
||||
SimPort<MemRsp> mem_rsp_port;
|
||||
|
||||
Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Cluster();
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
bool check_exit(Word* exitcode, bool riscv_test) const;
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
ProcessorImpl* processor() const;
|
||||
|
||||
Cluster::PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
uint32_t cluster_id_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
CacheSim::Ptr l2cache_;
|
||||
CacheCluster::Ptr icaches_;
|
||||
CacheCluster::Ptr dcaches_;
|
||||
std::vector<SharedMem::Ptr> sharedmems_;
|
||||
CacheCluster::Ptr tcaches_;
|
||||
CacheCluster::Ptr ocaches_;
|
||||
CacheCluster::Ptr rcaches_;
|
||||
ProcessorImpl* processor_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef RAM_PAGE_SIZE
|
||||
@@ -10,14 +23,4 @@
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
|
||||
namespace vortex {
|
||||
|
||||
enum Constants {
|
||||
|
||||
SMEM_BANK_OFFSET = log2ceil(sizeof(uint32_t)) + log2ceil(STACK_SIZE / sizeof(uint32_t)),
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
149
sim/simx/core.h
149
sim/simx/core.h
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
@@ -11,101 +24,104 @@
|
||||
#include <simobject.h>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "decode.h"
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "sharedmem.h"
|
||||
#include "cache_sim.h"
|
||||
#include "shared_mem.h"
|
||||
#include "ibuffer.h"
|
||||
#include "scoreboard.h"
|
||||
#include "exeunit.h"
|
||||
#include "tex_unit.h"
|
||||
#include "operand.h"
|
||||
#include "dispatcher.h"
|
||||
#include "exe_unit.h"
|
||||
#include "dcrs.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Cluster;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t cycles;
|
||||
uint64_t instrs;
|
||||
uint64_t ibuf_stalls;
|
||||
uint64_t scrb_stalls;
|
||||
uint64_t alu_stalls;
|
||||
uint64_t lsu_stalls;
|
||||
uint64_t csr_stalls;
|
||||
uint64_t fpu_stalls;
|
||||
uint64_t gpu_stalls;
|
||||
uint64_t sfu_stalls;
|
||||
uint64_t ifetches;
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
uint64_t branches;
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
uint64_t tex_reads;
|
||||
uint64_t tex_latency;
|
||||
uint64_t ifetch_latency;
|
||||
uint64_t load_latency;
|
||||
|
||||
PerfStats()
|
||||
: instrs(0)
|
||||
: cycles(0)
|
||||
, instrs(0)
|
||||
, ibuf_stalls(0)
|
||||
, scrb_stalls(0)
|
||||
, alu_stalls(0)
|
||||
, lsu_stalls(0)
|
||||
, csr_stalls(0)
|
||||
, fpu_stalls(0)
|
||||
, gpu_stalls(0)
|
||||
, sfu_stalls(0)
|
||||
, ifetches(0)
|
||||
, loads(0)
|
||||
, stores(0)
|
||||
, branches(0)
|
||||
, mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
, tex_reads(0)
|
||||
, tex_latency(0)
|
||||
, ifetch_latency(0)
|
||||
, load_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
std::vector<SimPort<MemReq>> icache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> icache_rsp_ports;
|
||||
|
||||
std::vector<SimPort<MemReq>> dcache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
|
||||
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs,
|
||||
SharedMem::Ptr sharedmem);
|
||||
|
||||
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
|
||||
~Core();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void resume();
|
||||
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
return core_id_;
|
||||
}
|
||||
|
||||
const Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
const ArchDef& arch() const {
|
||||
const Arch& arch() const {
|
||||
return arch_;
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
uint32_t getIRegValue(int reg) const {
|
||||
return warps_.at(0)->getIRegValue(reg);
|
||||
const DCRS& dcrs() const {
|
||||
return dcrs_;
|
||||
}
|
||||
|
||||
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
|
||||
|
||||
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
|
||||
|
||||
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
|
||||
void wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
|
||||
AddrType get_addr_type(uint64_t addr);
|
||||
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
@@ -113,19 +129,22 @@ public:
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
void dcache_amo_reserve(uint64_t addr);
|
||||
|
||||
bool dcache_amo_check(uint64_t addr);
|
||||
|
||||
void trigger_ecall();
|
||||
|
||||
void trigger_ebreak();
|
||||
|
||||
bool check_exit() const;
|
||||
bool check_exit(Word* exitcode, bool riscv_test) const;
|
||||
|
||||
private:
|
||||
|
||||
void schedule();
|
||||
void fetch();
|
||||
void decode();
|
||||
void issue();
|
||||
void execute();
|
||||
void commit();
|
||||
|
||||
@@ -133,49 +152,51 @@ private:
|
||||
|
||||
void cout_flush();
|
||||
|
||||
uint32_t id_;
|
||||
const ArchDef arch_;
|
||||
uint32_t core_id_;
|
||||
const Arch& arch_;
|
||||
const DCRS &dcrs_;
|
||||
|
||||
const Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
RAM smem_;
|
||||
std::vector<TexUnit> tex_units_;
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<uint32_t> csrs_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
std::vector<Operand::Ptr> operands_;
|
||||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<ExeUnit::Ptr> exe_units_;
|
||||
Cache::Ptr icache_;
|
||||
Cache::Ptr dcache_;
|
||||
SharedMem::Ptr shared_mem_;
|
||||
Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> dcache_switch_;
|
||||
SharedMem::Ptr sharedmem_;
|
||||
|
||||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
||||
HashTable<pipeline_trace_t*> pending_icache_;
|
||||
std::vector<pipeline_trace_t*> committed_traces_;
|
||||
WarpMask active_warps_;
|
||||
WarpMask stalled_warps_;
|
||||
uint32_t last_schedule_wid_;
|
||||
uint64_t issued_instrs_;
|
||||
uint64_t committed_instrs_;
|
||||
uint32_t csr_tex_unit_;
|
||||
bool ecall_;
|
||||
bool ebreak_;
|
||||
bool exited_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::vector<std::vector<CSRs>> csrs_;
|
||||
|
||||
PerfStats perf_stats_;
|
||||
uint64_t perf_mem_pending_reads_;
|
||||
|
||||
Cluster* cluster_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
|
||||
friend class Warp;
|
||||
friend class LsuUnit;
|
||||
friend class AluUnit;
|
||||
friend class CsrUnit;
|
||||
friend class FpuUnit;
|
||||
friend class GpuUnit;
|
||||
friend class SfuUnit;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
28
sim/simx/dcrs.cpp
Normal file
28
sim/simx/dcrs.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dcrs.h"
|
||||
#include <iostream>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
void DCRS::write(uint32_t addr, uint32_t value) {
|
||||
if (addr >= VX_DCR_BASE_STATE_BEGIN
|
||||
&& addr < VX_DCR_BASE_STATE_END) {
|
||||
base_dcrs.write(addr, value);
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << std::hex << "Error: invalid global DCR addr=0x" << addr << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
45
sim/simx/dcrs.h
Normal file
45
sim/simx/dcrs.h
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <util.h>
|
||||
#include <VX_types.h>
|
||||
#include <array>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class BaseDCRS {
|
||||
public:
|
||||
uint32_t read(uint32_t addr) const {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
return states_.at(state);
|
||||
}
|
||||
|
||||
void write(uint32_t addr, uint32_t value) {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
states_.at(state) = value;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
||||
};
|
||||
|
||||
class DCRS {
|
||||
public:
|
||||
void write(uint32_t addr, uint32_t value);
|
||||
|
||||
BaseDCRS base_dcrs;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef DEBUG_LEVEL
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
@@ -9,41 +22,36 @@
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "decode.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "instr.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct InstTableEntry_t {
|
||||
bool controlFlow;
|
||||
InstType iType;
|
||||
};
|
||||
|
||||
static const std::unordered_map<Opcode, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::NOP, {false, InstType::N_TYPE}},
|
||||
{Opcode::R_INST, {false, InstType::R_TYPE}},
|
||||
{Opcode::L_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::I_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::S_INST, {false, InstType::S_TYPE}},
|
||||
{Opcode::B_INST, {true , InstType::B_TYPE}},
|
||||
{Opcode::LUI_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::AUIPC_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::JAL_INST, {true , InstType::J_TYPE}},
|
||||
{Opcode::JALR_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::SYS_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::FENCE, {true , InstType::I_TYPE}},
|
||||
{Opcode::FL, {false, InstType::I_TYPE}},
|
||||
{Opcode::FS, {false, InstType::S_TYPE}},
|
||||
{Opcode::FCI, {false, InstType::R_TYPE}},
|
||||
{Opcode::FMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::VSET, {false, InstType::V_TYPE}},
|
||||
{Opcode::GPGPU, {false, InstType::R_TYPE}},
|
||||
{Opcode::GPU, {false, InstType::R4_TYPE}},
|
||||
{Opcode::R_INST_W, {false, InstType::R_TYPE}},
|
||||
{Opcode::I_INST_W, {false, InstType::I_TYPE}},
|
||||
static const std::unordered_map<Opcode, InstType> sc_instTable = {
|
||||
{Opcode::R_INST, InstType::R_TYPE},
|
||||
{Opcode::L_INST, InstType::I_TYPE},
|
||||
{Opcode::I_INST, InstType::I_TYPE},
|
||||
{Opcode::S_INST, InstType::S_TYPE},
|
||||
{Opcode::B_INST, InstType::B_TYPE},
|
||||
{Opcode::LUI_INST, InstType::U_TYPE},
|
||||
{Opcode::AUIPC_INST, InstType::U_TYPE},
|
||||
{Opcode::JAL_INST, InstType::J_TYPE},
|
||||
{Opcode::JALR_INST, InstType::I_TYPE},
|
||||
{Opcode::SYS_INST, InstType::I_TYPE},
|
||||
{Opcode::FENCE, InstType::I_TYPE},
|
||||
{Opcode::AMO, InstType::R_TYPE},
|
||||
{Opcode::FL, InstType::I_TYPE},
|
||||
{Opcode::FS, InstType::S_TYPE},
|
||||
{Opcode::FCI, InstType::R_TYPE},
|
||||
{Opcode::FMADD, InstType::R4_TYPE},
|
||||
{Opcode::FMSUB, InstType::R4_TYPE},
|
||||
{Opcode::FMNMADD, InstType::R4_TYPE},
|
||||
{Opcode::FMNMSUB, InstType::R4_TYPE},
|
||||
{Opcode::VSET, InstType::V_TYPE},
|
||||
{Opcode::EXT1, InstType::R_TYPE},
|
||||
{Opcode::EXT2, InstType::R4_TYPE},
|
||||
{Opcode::R_INST_W, InstType::R_TYPE},
|
||||
{Opcode::I_INST_W, InstType::I_TYPE},
|
||||
};
|
||||
|
||||
enum Constants {
|
||||
@@ -58,6 +66,8 @@ enum Constants {
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_imm = 11,
|
||||
width_aq = 1,
|
||||
width_rl = 1,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
@@ -72,15 +82,15 @@ enum Constants {
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
|
||||
mask_opcode = (1<<width_opcode)-1,
|
||||
mask_reg = (1<<width_reg)-1,
|
||||
mask_func2 = (1<<width_func2)-1,
|
||||
mask_func3 = (1<<width_func3)-1,
|
||||
mask_func6 = (1<<width_func6)-1,
|
||||
mask_func7 = (1<<width_func7)-1,
|
||||
mask_i_imm = (1<<width_i_imm)-1,
|
||||
mask_j_imm = (1<<width_j_imm)-1,
|
||||
mask_v_imm = (1<<width_v_imm)-1,
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func6 = (1 << width_func6) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
mask_v_imm = (1 << width_v_imm) - 1,
|
||||
};
|
||||
|
||||
static const char* op_string(const Instr &instr) {
|
||||
@@ -92,7 +102,6 @@ static const char* op_string(const Instr &instr) {
|
||||
auto imm = instr.getImm();
|
||||
|
||||
switch (opcode) {
|
||||
case Opcode::NOP: return "NOP";
|
||||
case Opcode::LUI_INST: return "LUI";
|
||||
case Opcode::AUIPC_INST: return "AUIPC";
|
||||
case Opcode::R_INST:
|
||||
@@ -116,7 +125,7 @@ static const char* op_string(const Instr &instr) {
|
||||
case 2: return "SLT";
|
||||
case 3: return "SLTU";
|
||||
case 4: return "XOR";
|
||||
case 5: return func7 ? "SRA" : "SRL";
|
||||
case 5: return (func7 & 0x20) ? "SRA" : "SRL";
|
||||
case 6: return "OR";
|
||||
case 7: return "AND";
|
||||
default:
|
||||
@@ -130,7 +139,7 @@ static const char* op_string(const Instr &instr) {
|
||||
case 2: return "SLTI";
|
||||
case 3: return "SLTIU";
|
||||
case 4: return "XORI";
|
||||
case 5: return func7 ? "SRAI" : "SRLI";
|
||||
case 5: return (func7 & 0x20) ? "SRAI" : "SRLI";
|
||||
case 6: return "ORI";
|
||||
case 7: return "ANDI";
|
||||
default:
|
||||
@@ -151,8 +160,8 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::JALR_INST: return "JALR";
|
||||
case Opcode::L_INST:
|
||||
switch (func3) {
|
||||
case 0: return "LBI";
|
||||
case 1: return "LHI";
|
||||
case 0: return "LB";
|
||||
case 1: return "LH";
|
||||
case 2: return "LW";
|
||||
case 3: return "LD";
|
||||
case 4: return "LBU";
|
||||
@@ -192,11 +201,11 @@ static const char* op_string(const Instr &instr) {
|
||||
}
|
||||
case Opcode::I_INST_W:
|
||||
switch (func3) {
|
||||
case 0: return "ADDIW";
|
||||
case 1: return "SLLIW";
|
||||
case 5: return func7 ? "SRAIW" : "SRLIW";
|
||||
default:
|
||||
std::abort();
|
||||
case 0: return "ADDIW";
|
||||
case 1: return "SLLIW";
|
||||
case 5: return func7 ? "SRAIW" : "SRLIW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::SYS_INST:
|
||||
switch (func3) {
|
||||
@@ -222,20 +231,59 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL:
|
||||
switch (func3) {
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
std::abort();
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FS:
|
||||
switch (func3) {
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::AMO: {
|
||||
auto amo_type = func7 >> 2;
|
||||
switch (func3) {
|
||||
case 0x2:
|
||||
switch (amo_type) {
|
||||
case 0x00: return "AMOADD.W";
|
||||
case 0x01: return "AMOSWAP.W";
|
||||
case 0x02: return "LR.W";
|
||||
case 0x03: return "SC.W";
|
||||
case 0x04: return "AMOXOR.W";
|
||||
case 0x08: return "AMOOR.W";
|
||||
case 0x0c: return "AMOAND.W";
|
||||
case 0x10: return "AMOMIN.W";
|
||||
case 0x14: return "AMOMAX.W";
|
||||
case 0x18: return "AMOMINU.W";
|
||||
case 0x1c: return "AMOMAXU.W";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x3:
|
||||
switch (amo_type) {
|
||||
case 0x00: return "AMOADD.D";
|
||||
case 0x01: return "AMOSWAP.D";
|
||||
case 0x02: return "LR.D";
|
||||
case 0x03: return "SC.D";
|
||||
case 0x04: return "AMOXOR.D";
|
||||
case 0x08: return "AMOOR.D";
|
||||
case 0x0c: return "AMOAND.D";
|
||||
case 0x10: return "AMOMIN.D";
|
||||
case 0x14: return "AMOMAX.D";
|
||||
case 0x18: return "AMOMINU.D";
|
||||
case 0x1c: return "AMOMAXU.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x00: return "FADD.S";
|
||||
@@ -332,9 +380,9 @@ static const char* op_string(const Instr &instr) {
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.W";
|
||||
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.S";
|
||||
case 0x71: return func3 ? "FCLASS.D" : "FMV.X.D";
|
||||
case 0x78: return "FMV.W.X";
|
||||
case 0x78: return "FMV.S.X";
|
||||
case 0x79: return "FMV.D.X";
|
||||
default:
|
||||
std::abort();
|
||||
@@ -344,23 +392,27 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S";
|
||||
case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S";
|
||||
case Opcode::VSET: return "VSET";
|
||||
case Opcode::GPGPU:
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
case 5: return "PREFETCH";
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
case 5: return "PRED";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::GPU:
|
||||
case Opcode::EXT2:
|
||||
switch (func3) {
|
||||
case 0: return "TEX";
|
||||
case 1: {
|
||||
switch (func2) {
|
||||
case 0: return "CMOV";
|
||||
case 0: return "CMOV";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
@@ -375,43 +427,36 @@ static const char* op_string(const Instr &instr) {
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
auto opcode = instr.getOpcode();
|
||||
auto func2 = instr.getFunc2();
|
||||
auto opcode = instr.getOpcode();
|
||||
auto func3 = instr.getFunc3();
|
||||
|
||||
os << op_string(instr) << ": ";
|
||||
|
||||
if (opcode == S_INST
|
||||
|| opcode == FS) {
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "] <- ";
|
||||
os << instr.getRSType(1) << std::dec << instr.getRSrc(1);
|
||||
} else
|
||||
if (opcode == L_INST
|
||||
|| opcode == FL) {
|
||||
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "]";
|
||||
} else {
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
|
||||
}
|
||||
uint32_t i = 0;
|
||||
for (; i < instr.getNRSrc(); ++i) {
|
||||
if (i) os << ", ";
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
}
|
||||
if (instr.hasImm()) {
|
||||
if (i) os << ", ";
|
||||
os << "imm=0x" << std::hex << instr.getImm();
|
||||
}
|
||||
if (opcode == GPU && func3 == 0) {
|
||||
os << ", unit=" << std::dec << func2;
|
||||
}
|
||||
os << op_string(instr);
|
||||
|
||||
int sep = 0;
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << instr.getRDType() << std::dec << instr.getRDest();
|
||||
}
|
||||
for (uint32_t i = 0; i < instr.getNRSrc(); ++i) {
|
||||
if (instr.getRSType(i) == RegType::None)
|
||||
continue;
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
}
|
||||
if (instr.hasImm()) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getImm();
|
||||
}
|
||||
if (opcode == Opcode::SYS_INST && func3 >= 5) {
|
||||
// CSRs with immediate values
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getRSrc(0);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef&) {}
|
||||
Decoder::Decoder(const Arch&) {}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
@@ -434,7 +479,7 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto iType = op_it->second.iType;
|
||||
auto iType = op_it->second;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
if (func3 != 0x2 && func3 != 0x3) {
|
||||
iType = InstType::V_TYPE;
|
||||
@@ -442,57 +487,88 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
|
||||
switch (iType) {
|
||||
case InstType::N_TYPE:
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
if (op == Opcode::FCI) {
|
||||
switch (func7) {
|
||||
switch (op) {
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x2c: // FSQRT.S
|
||||
case 0x2d: // FSQRT.D
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x50: // FLE.S, FLT.S, FEQ.S
|
||||
case 0x51: // FLE.D, FLT.D, FEQ.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
case 0x60: // FCVT.W.D, FCVT.WU.D, FCVT.L.D, FCVT.LU.D
|
||||
case 0x61: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::None);
|
||||
break;
|
||||
case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU
|
||||
case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::None);
|
||||
break;
|
||||
case 0x70: // FCLASS.S, FMV.X.W
|
||||
case 0x70: // FCLASS.S, FMV.X.S
|
||||
case 0x71: // FCLASS.D, FMV.X.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x78: // FMV.W.X
|
||||
case 0x78: // FMV.S.X
|
||||
case 0x79: // FMV.D.X
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
case 0: // TMC
|
||||
case 3: // JOIN
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
case 1: // WSPAWN
|
||||
case 4: // BAR
|
||||
case 5: // PRED
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 2: // SPLIT
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FL) {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
} else {
|
||||
@@ -503,15 +579,23 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
switch (op) {
|
||||
case Opcode::SYS_INST:
|
||||
if (func3 != 0) {
|
||||
// RV32I: CSR*
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
}
|
||||
// RV32I: CSR
|
||||
if (func3 >= 5) {
|
||||
// rs1 holds zimm
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::None);
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
}
|
||||
// uint12
|
||||
instr->setImm(code >> shift_rs2);
|
||||
break;
|
||||
case Opcode::FENCE:
|
||||
// uint12
|
||||
instr->setImm(code >> shift_rs2);
|
||||
instr->setDestReg(rd, RegType::None);
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
break;
|
||||
case Opcode::I_INST:
|
||||
case Opcode::I_INST_W:
|
||||
@@ -538,11 +622,11 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
} break;
|
||||
case InstType::S_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
} else {
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
auto imm = (func7 << width_reg) | rd;
|
||||
@@ -550,8 +634,8 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
} break;
|
||||
|
||||
case InstType::B_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
auto bit_11 = rd & 0x1;
|
||||
auto bits_4_1 = rd >> 1;
|
||||
@@ -581,8 +665,8 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
case InstType::V_TYPE:
|
||||
switch (op) {
|
||||
case Opcode::VSET: {
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 == 7) {
|
||||
instr->setImm(!(code >> shift_vset));
|
||||
@@ -593,20 +677,20 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
instr->setVediv((immed >> 4) & 0x3);
|
||||
instr->setVsew((immed >> 2) & 0x3);
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
}
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
}
|
||||
} break;
|
||||
|
||||
case Opcode::FL:
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
@@ -614,9 +698,9 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
|
||||
case Opcode::FS:
|
||||
instr->setVs3(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
@@ -627,16 +711,28 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
break;
|
||||
case R4_TYPE:
|
||||
if (op == Opcode::GPU) {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->setSrcReg(rs3, RegType::Integer);
|
||||
if (op == Opcode::EXT2) {
|
||||
switch (func3) {
|
||||
case 1:
|
||||
switch (func2) {
|
||||
case 0: // CMOV
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs3, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->setSrcReg(rs3, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
}
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
@@ -5,12 +18,12 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Arch;
|
||||
class Instr;
|
||||
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
Decoder(const Arch &);
|
||||
|
||||
std::shared_ptr<Instr> decode(uint32_t code) const;
|
||||
};
|
||||
|
||||
141
sim/simx/dispatcher.h
Normal file
141
sim/simx/dispatcher.h
Normal file
@@ -0,0 +1,141 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Dispatcher : public SimObject<Dispatcher> {
|
||||
public:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<pipeline_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
, batch_count_(ISSUE_WIDTH / block_size)
|
||||
, pid_count_(arch.num_threads() / num_lanes)
|
||||
, batch_idx_(0)
|
||||
, start_p_(block_size, 0)
|
||||
{}
|
||||
|
||||
virtual ~Dispatcher() {}
|
||||
|
||||
virtual void reset() {
|
||||
batch_idx_ = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& queue = queues_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).send(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
|
||||
uint32_t block_sent = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
uint32_t i = batch_idx_ * block_size_ + b;
|
||||
auto& input = Inputs_.at(i);
|
||||
if (input.empty()) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
if (pid_count_ != 1) {
|
||||
auto start_p = start_p_.at(b);
|
||||
if (start_p == -1) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
int start(-1), end(-1);
|
||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||
if (!trace->tmask.test(j))
|
||||
continue;
|
||||
if (start == -1)
|
||||
start = j;
|
||||
end = j;
|
||||
}
|
||||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
auto new_trace = new pipeline_trace_t(*trace);
|
||||
new_trace->tmask.reset();
|
||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||
new_trace->tmask[j] = trace->tmask[j];
|
||||
}
|
||||
new_trace->pid = start;
|
||||
new_trace->sop = (start_p == 0);
|
||||
if (start == end) {
|
||||
new_trace->eop = 1;
|
||||
start_p_.at(b) = -1;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
delete trace;
|
||||
} else {
|
||||
new_trace->eop = 0;
|
||||
start_p_.at(b) = start + 1;
|
||||
}
|
||||
output.send(new_trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
} else {
|
||||
trace->pid = 0;
|
||||
input.pop();
|
||||
output.send(trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *trace);
|
||||
++block_sent;
|
||||
}
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool push(uint32_t issue_index, pipeline_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
queue.push(trace);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<pipeline_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
uint32_t batch_count_;
|
||||
uint32_t pid_count_;
|
||||
uint32_t batch_idx_;
|
||||
std::vector<int> start_p_;
|
||||
};
|
||||
|
||||
}
|
||||
329
sim/simx/exe_unit.cpp
Normal file
329
sim/simx/exe_unit.cpp
Normal file
@@ -0,0 +1,329 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "exe_unit.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
#include "cache_sim.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "ALU") {}
|
||||
|
||||
void AluUnit::tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
switch (trace->alu_type) {
|
||||
case AluType::ARITH:
|
||||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::IMUL:
|
||||
output.send(trace, LATENCY_IMUL+1);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
output.send(trace, XLEN+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->alu_type << ", " << *trace);
|
||||
if (trace->eop && trace->fetch_stall) {
|
||||
assert(core_->stalled_warps_.test(trace->wid));
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.alu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "FPU") {}
|
||||
|
||||
void FpuUnit::tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
switch (trace->fpu_type) {
|
||||
case FpuType::FNCP:
|
||||
output.send(trace, 2);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
output.send(trace, LATENCY_FMA+1);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
output.send(trace, LATENCY_FDIV+1);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
output.send(trace, LATENCY_FSQRT+1);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
output.send(trace, LATENCY_FCVT+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->fpu_type << ", " << *trace);
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "LSU")
|
||||
, pending_rd_reqs_(LSUQ_SIZE)
|
||||
, num_lanes_(NUM_LSU_LANES)
|
||||
, pending_loads_(0)
|
||||
, fence_lock_(false)
|
||||
, input_idx_(0)
|
||||
{}
|
||||
|
||||
void LsuUnit::reset() {
|
||||
pending_rd_reqs_.clear();
|
||||
pending_loads_ = 0;
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
void LsuUnit::tick() {
|
||||
core_->perf_stats_.load_latency += pending_loads_;
|
||||
|
||||
// handle dcache response
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_rsp_ports.at(t);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
DT(3, "dcache-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.count);
|
||||
--entry.count; // track remaining addresses
|
||||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
// handle shared memory response
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
auto& smem_rsp_port = core_->sharedmem_->Outputs.at(t);
|
||||
if (smem_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = smem_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
DT(3, "smem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type << ", tid=" << t << ", " << *trace);
|
||||
assert(entry.count);
|
||||
--entry.count; // track remaining addresses
|
||||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
smem_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
if (fence_lock_) {
|
||||
// wait for all pending memory operations to complete
|
||||
if (!pending_rd_reqs_.empty())
|
||||
return;
|
||||
int iw = fence_state_->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
int iw = (input_idx_ + i) % ISSUE_WIDTH;
|
||||
auto& input = Inputs.at(iw);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(iw);
|
||||
auto trace = input.front();
|
||||
auto trace_data = std::dynamic_pointer_cast<LsuTraceData>(trace->data);
|
||||
|
||||
auto t0 = trace->pid * num_lanes_;
|
||||
|
||||
if (trace->lsu_type == LsuType::FENCE) {
|
||||
// schedule fence lock
|
||||
fence_state_ = trace;
|
||||
fence_lock_ = true;
|
||||
DT(3, "fence-lock: " << *trace);
|
||||
// remove input
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
break;
|
||||
}
|
||||
|
||||
// check pending queue capacity
|
||||
if (pending_rd_reqs_.full()) {
|
||||
if (!trace->log_once(true)) {
|
||||
DT(3, "*** " << this->name() << "-lsu-queue-stall: " << *trace);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
trace->log_once(false);
|
||||
}
|
||||
|
||||
bool is_write = (trace->lsu_type == LsuType::STORE);
|
||||
|
||||
// duplicates detection
|
||||
bool is_dup = false;
|
||||
if (trace->tmask.test(t0)) {
|
||||
uint64_t addr_mask = sizeof(uint32_t)-1;
|
||||
uint32_t addr0 = trace_data->mem_addrs.at(0).addr & ~addr_mask;
|
||||
uint32_t matches = 1;
|
||||
for (uint32_t t = 1; t < num_lanes_; ++t) {
|
||||
if (!trace->tmask.test(t0 + t))
|
||||
continue;
|
||||
auto mem_addr = trace_data->mem_addrs.at(t).addr & ~addr_mask;
|
||||
matches += (addr0 == mem_addr);
|
||||
}
|
||||
is_dup = (matches == trace->tmask.count());
|
||||
}
|
||||
|
||||
uint32_t addr_count;
|
||||
if (is_dup) {
|
||||
addr_count = 1;
|
||||
} else {
|
||||
addr_count = trace->tmask.count();
|
||||
}
|
||||
|
||||
auto tag = pending_rd_reqs_.allocate({trace, addr_count});
|
||||
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
if (!trace->tmask.test(t0 + t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_req_ports.at(t);
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto type = core_->get_addr_type(mem_addr.addr);
|
||||
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.type = type;
|
||||
mem_req.tag = tag;
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, "dcache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", tid=" << t << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
||||
++pending_loads_;
|
||||
++core_->perf_stats_.loads;
|
||||
if (is_dup)
|
||||
break;
|
||||
}
|
||||
|
||||
// do not wait on writes
|
||||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
output.send(trace, 1);
|
||||
++core_->perf_stats_.stores;
|
||||
}
|
||||
|
||||
// remove input
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
|
||||
break; // single block
|
||||
}
|
||||
++input_idx_;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SfuUnit::SfuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "SFU")
|
||||
, input_idx_(0)
|
||||
{}
|
||||
|
||||
void SfuUnit::tick() {
|
||||
// check input queue
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
int iw = (input_idx_ + i) % ISSUE_WIDTH;
|
||||
auto& input = Inputs.at(iw);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(iw);
|
||||
auto trace = input.front();
|
||||
auto sfu_type = trace->sfu_type;
|
||||
bool release_warp = trace->fetch_stall;
|
||||
|
||||
switch (sfu_type) {
|
||||
case SfuType::TMC:
|
||||
case SfuType::WSPAWN:
|
||||
case SfuType::SPLIT:
|
||||
case SfuType::JOIN:
|
||||
case SfuType::PRED:
|
||||
case SfuType::CSRRW:
|
||||
case SfuType::CSRRS:
|
||||
case SfuType::CSRRC:
|
||||
output.send(trace, 1);
|
||||
break;
|
||||
case SfuType::BAR: {
|
||||
output.send(trace, 1);
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
if (trace->eop) {
|
||||
core_->barrier(trace_data->bar.id, trace_data->bar.count, trace->wid);
|
||||
}
|
||||
release_warp = false;
|
||||
} break;
|
||||
case SfuType::CMOV:
|
||||
output.send(trace, 3);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
DT(3, "pipeline-execute: op=" << trace->sfu_type << ", " << *trace);
|
||||
if (trace->eop && release_warp) {
|
||||
assert(core_->stalled_warps_.test(trace->wid));
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
|
||||
auto time = input.pop();
|
||||
auto stalls = (SimPlatform::instance().cycles() - time);
|
||||
|
||||
core_->perf_stats_.sfu_stalls += stalls;
|
||||
|
||||
break; // single block
|
||||
}
|
||||
++input_idx_;
|
||||
}
|
||||
@@ -1,8 +1,21 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "cache_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
@@ -10,13 +23,13 @@ class Core;
|
||||
|
||||
class ExeUnit : public SimObject<ExeUnit> {
|
||||
public:
|
||||
SimPort<pipeline_trace_t*> Input;
|
||||
SimPort<pipeline_trace_t*> Output;
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs;
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
|
||||
ExeUnit(const SimContext& ctx, Core* core, const char* name)
|
||||
: SimObject<ExeUnit>(ctx, name)
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
, Inputs(ISSUE_WIDTH, this)
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, core_(core)
|
||||
{}
|
||||
|
||||
@@ -32,32 +45,6 @@ protected:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class NopUnit : public ExeUnit {
|
||||
public:
|
||||
NopUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LsuUnit : public ExeUnit {
|
||||
private:
|
||||
uint32_t num_threads_;
|
||||
HashTable<std::pair<pipeline_trace_t*, uint32_t>> pending_rd_reqs_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
bool fence_lock_;
|
||||
|
||||
public:
|
||||
LsuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class AluUnit : public ExeUnit {
|
||||
public:
|
||||
AluUnit(const SimContext& ctx, Core*);
|
||||
@@ -67,15 +54,6 @@ public:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CsrUnit : public ExeUnit {
|
||||
public:
|
||||
CsrUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class FpuUnit : public ExeUnit {
|
||||
public:
|
||||
FpuUnit(const SimContext& ctx, Core*);
|
||||
@@ -85,19 +63,37 @@ public:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class GpuUnit : public ExeUnit {
|
||||
private:
|
||||
uint32_t num_threads_;
|
||||
HashTable<std::pair<pipeline_trace_t*, uint32_t>> pending_tex_reqs_;
|
||||
|
||||
bool processTexRequest(pipeline_trace_t* trace);
|
||||
|
||||
class LsuUnit : public ExeUnit {
|
||||
public:
|
||||
GpuUnit(const SimContext& ctx, Core*);
|
||||
LsuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
private:
|
||||
struct pending_req_t {
|
||||
pipeline_trace_t* trace;
|
||||
uint32_t count;
|
||||
};
|
||||
HashTable<pending_req_t> pending_rd_reqs_;
|
||||
uint32_t num_lanes_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
uint64_t pending_loads_;
|
||||
bool fence_lock_;
|
||||
uint32_t input_idx_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SfuUnit : public ExeUnit {
|
||||
public:
|
||||
SfuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
|
||||
private:
|
||||
uint32_t input_idx_;
|
||||
};
|
||||
|
||||
}
|
||||
1173
sim/simx/execute.cpp
1173
sim/simx/execute.cpp
File diff suppressed because it is too large
Load Diff
@@ -1,383 +0,0 @@
|
||||
#include "exeunit.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
NopUnit::NopUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "NOP") {}
|
||||
|
||||
void NopUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
Output.send(trace, 1);
|
||||
Input.pop();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "LSU")
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_rd_reqs_(LSUQ_SIZE)
|
||||
, fence_lock_(false)
|
||||
{}
|
||||
|
||||
void LsuUnit::reset() {
|
||||
pending_rd_reqs_.clear();
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
void LsuUnit::tick() {
|
||||
// handle dcache response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_switch_.at(t)->RspOut.at(0);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "dcache-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu.type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
}
|
||||
|
||||
// handle shared memory response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& smem_rsp_port = core_->shared_mem_->Outputs.at(t);
|
||||
if (smem_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = smem_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "smem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu.type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
smem_rsp_port.pop();
|
||||
}
|
||||
|
||||
if (fence_lock_) {
|
||||
// wait for all pending memory operations to complete
|
||||
if (!pending_rd_reqs_.empty())
|
||||
return;
|
||||
Output.send(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
if (Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = Input.front();
|
||||
|
||||
if (trace->lsu.type == LsuType::FENCE) {
|
||||
// schedule fence lock
|
||||
fence_state_ = trace;
|
||||
fence_lock_ = true;
|
||||
DT(3, "fence-lock: " << *trace);
|
||||
// remove input
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
return;
|
||||
}
|
||||
|
||||
// check pending queue capacity
|
||||
if (pending_rd_reqs_.full()) {
|
||||
if (!trace->suspend()) {
|
||||
DT(3, "*** lsu-queue-stall: " << *trace);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
trace->resume();
|
||||
}
|
||||
|
||||
bool is_write = (trace->lsu.type == LsuType::STORE);
|
||||
|
||||
// duplicates detection
|
||||
bool is_dup = false;
|
||||
if (trace->tmask.test(0)) {
|
||||
uint64_t addr_mask = sizeof(uint32_t)-1;
|
||||
uint32_t addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
|
||||
uint32_t matches = 1;
|
||||
for (uint32_t t = 1; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
auto mem_addr = trace->mem_addrs.at(t).at(0).addr & ~addr_mask;
|
||||
matches += (addr0 == mem_addr);
|
||||
}
|
||||
is_dup = (matches == trace->tmask.count());
|
||||
}
|
||||
|
||||
uint32_t valid_addrs = 0;
|
||||
if (is_dup) {
|
||||
valid_addrs = 1;
|
||||
} else {
|
||||
for (auto& mem_addr : trace->mem_addrs) {
|
||||
valid_addrs += mem_addr.size();
|
||||
}
|
||||
}
|
||||
|
||||
auto tag = pending_rd_reqs_.allocate({trace, valid_addrs});
|
||||
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_switch_.at(t)->ReqIn.at(0);
|
||||
auto mem_addr = trace->mem_addrs.at(t).at(0);
|
||||
auto type = get_addr_type(mem_addr.addr, mem_addr.size);
|
||||
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.non_cacheable = (type == AddrType::IO);
|
||||
mem_req.tag = tag;
|
||||
mem_req.core_id = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
if (type == AddrType::Shared) {
|
||||
core_->shared_mem_->Inputs.at(t).send(mem_req, 2);
|
||||
DT(3, "smem-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", " << *trace);
|
||||
} else {
|
||||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, "dcache-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", nc=" << mem_req.non_cacheable << ", " << *trace);
|
||||
}
|
||||
|
||||
if (is_dup)
|
||||
break;
|
||||
}
|
||||
|
||||
// do not wait on writes
|
||||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
Output.send(trace, 1);
|
||||
}
|
||||
|
||||
// remove input
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "ALU") {}
|
||||
|
||||
void AluUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
switch (trace->alu.type) {
|
||||
case AluType::ARITH:
|
||||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::CMOV:
|
||||
Output.send(trace, 1);
|
||||
break;
|
||||
case AluType::IMUL:
|
||||
Output.send(trace, LATENCY_IMUL+1);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
Output.send(trace, XLEN+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->alu.type << ", " << *trace);
|
||||
if (trace->fetch_stall) {
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.alu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CsrUnit::CsrUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "CSR") {}
|
||||
|
||||
void CsrUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
Output.send(trace, 1);
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.csr_stalls += (SimPlatform::instance().cycles() - time);
|
||||
DT(3, "pipeline-execute: op=CSR, " << *trace);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "FPU") {}
|
||||
|
||||
void FpuUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
switch (trace->fpu.type) {
|
||||
case FpuType::FNCP:
|
||||
Output.send(trace, 2);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
Output.send(trace, LATENCY_FMA+1);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
Output.send(trace, LATENCY_FDIV+1);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
Output.send(trace, LATENCY_FSQRT+1);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
Output.send(trace, LATENCY_FCVT+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->fpu.type << ", " << *trace);
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
GpuUnit::GpuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "GPU")
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_tex_reqs_(TEXQ_SIZE)
|
||||
{}
|
||||
|
||||
void GpuUnit::reset() {
|
||||
pending_tex_reqs_.clear();
|
||||
}
|
||||
|
||||
void GpuUnit::tick() {
|
||||
#ifdef EXT_TEX_ENABLE
|
||||
// handle memory response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_switch_.at(t)->RspOut.at(1);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_tex_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "tex-rsp: tag=" << mem_rsp.tag << ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_tex_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
}
|
||||
#endif
|
||||
|
||||
// check input queue
|
||||
if (Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = Input.front();
|
||||
|
||||
bool issued = false;
|
||||
|
||||
switch (trace->gpu.type) {
|
||||
case GpuType::TMC:
|
||||
Output.send(trace, 1);
|
||||
core_->active_warps_.set(trace->wid, trace->gpu.active_warps.test(trace->wid));
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::WSPAWN:
|
||||
Output.send(trace, 1);
|
||||
core_->active_warps_ = trace->gpu.active_warps;
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::SPLIT:
|
||||
case GpuType::JOIN:
|
||||
Output.send(trace, 1);
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::BAR:
|
||||
Output.send(trace, 1);
|
||||
if (trace->gpu.active_warps != 0)
|
||||
core_->active_warps_ |= trace->gpu.active_warps;
|
||||
else
|
||||
core_->active_warps_.reset(trace->wid);
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::TEX:
|
||||
if (this->processTexRequest(trace))
|
||||
issued = true;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
if (issued) {
|
||||
DT(3, "pipeline-execute: op=" << trace->gpu.type << ", " << *trace);
|
||||
if (trace->fetch_stall) {
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuUnit::processTexRequest(pipeline_trace_t* trace) {
|
||||
// check pending queue capacity
|
||||
if (pending_tex_reqs_.full()) {
|
||||
if (!trace->suspend()) {
|
||||
DT(3, "*** tex-queue-stall: " << *trace);
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
trace->resume();
|
||||
}
|
||||
|
||||
// send memory request
|
||||
|
||||
uint32_t valid_addrs = 0;
|
||||
for (auto& mem_addr : trace->mem_addrs) {
|
||||
valid_addrs += mem_addr.size();
|
||||
}
|
||||
|
||||
auto tag = pending_tex_reqs_.allocate({trace, valid_addrs});
|
||||
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_switch_.at(t)->ReqIn.at(1);
|
||||
for (auto& mem_addr : trace->mem_addrs.at(t)) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = (trace->lsu.type == LsuType::STORE);
|
||||
mem_req.tag = tag;
|
||||
mem_req.core_id = core_->id();
|
||||
mem_req.uuid = trace->uuid;
|
||||
dcache_req_port.send(mem_req, 3);
|
||||
DT(3, "tex-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", tid=" << t << ", "<< trace);
|
||||
++ core_->perf_stats_.tex_reads;
|
||||
++ core_->perf_stats_.tex_latency += pending_tex_reqs_.size();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
@@ -6,10 +19,6 @@
|
||||
namespace vortex {
|
||||
|
||||
class IBuffer {
|
||||
private:
|
||||
std::queue<pipeline_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
|
||||
public:
|
||||
IBuffer(uint32_t size)
|
||||
: capacity_(size)
|
||||
@@ -39,6 +48,10 @@ public:
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::swap(entries_, empty );
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<pipeline_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
@@ -7,7 +20,7 @@ namespace vortex {
|
||||
class Warp;
|
||||
|
||||
enum Opcode {
|
||||
NOP = 0,
|
||||
NONE = 0,
|
||||
R_INST = 0x33,
|
||||
L_INST = 0x3,
|
||||
I_INST = 0x13,
|
||||
@@ -19,6 +32,7 @@ enum Opcode {
|
||||
JALR_INST = 0x67,
|
||||
SYS_INST = 0x73,
|
||||
FENCE = 0x0f,
|
||||
AMO = 0x2f,
|
||||
// F Extension
|
||||
FL = 0x7,
|
||||
FS = 0x27,
|
||||
@@ -26,19 +40,20 @@ enum Opcode {
|
||||
FMADD = 0x43,
|
||||
FMSUB = 0x47,
|
||||
FMNMSUB = 0x4b,
|
||||
FMNMADD = 0x4f,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// GPGPU Extension
|
||||
GPGPU = 0x6b,
|
||||
GPU = 0x5b,
|
||||
// RV64 Standard Extensions
|
||||
FMNMADD = 0x4f,
|
||||
// RV64 Standard Extension
|
||||
R_INST_W = 0x3b,
|
||||
I_INST_W = 0x1b,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// Custom Extensions
|
||||
EXT1 = 0x0b,
|
||||
EXT2 = 0x2b,
|
||||
EXT3 = 0x5b,
|
||||
EXT4 = 0x7b
|
||||
};
|
||||
|
||||
enum InstType {
|
||||
N_TYPE,
|
||||
enum InstType {
|
||||
R_TYPE,
|
||||
I_TYPE,
|
||||
S_TYPE,
|
||||
@@ -52,25 +67,45 @@ enum InstType {
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
: opcode_(Opcode::NOP)
|
||||
: opcode_(Opcode::NONE)
|
||||
, num_rsrcs_(0)
|
||||
, has_imm_(false)
|
||||
, rdest_type_(RegType::None)
|
||||
, imm_(0)
|
||||
, rdest_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func6_(0)
|
||||
, func7_(0) {
|
||||
, func7_(0)
|
||||
, vmask_(0)
|
||||
, vlsWidth_(0)
|
||||
, vMop_(0)
|
||||
, vNf_(0)
|
||||
, vs3_(0)
|
||||
, vlmul_(0)
|
||||
, vsew_(0)
|
||||
, vediv_(0) {
|
||||
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = RegType::None;
|
||||
rsrc_[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(uint32_t destReg, RegType type) { rdest_type_ = type; rdest_ = destReg; }
|
||||
void setSrcReg(uint32_t srcReg, RegType type) { rsrc_type_[num_rsrcs_] = type; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestVReg(uint32_t destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
|
||||
void setSrcVReg(uint32_t srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestReg(uint32_t destReg, RegType type) {
|
||||
rdest_type_ = type;
|
||||
rdest_ = destReg;
|
||||
}
|
||||
void addSrcReg(uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[num_rsrcs_] = type;
|
||||
rsrc_[num_rsrcs_] = srcReg;
|
||||
++num_rsrcs_;
|
||||
}
|
||||
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[index] = type;
|
||||
rsrc_[index] = srcReg;
|
||||
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
|
||||
}
|
||||
void setFunc2(uint32_t func2) { func2_ = func2; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; }
|
||||
void setFunc7(uint32_t func7) { func7_ = func7; }
|
||||
@@ -85,17 +120,17 @@ public:
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; }
|
||||
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
uint32_t getFunc2() const { return func2_; }
|
||||
uint32_t getFunc3() const { return func3_; }
|
||||
uint32_t getFunc6() const { return func6_; }
|
||||
uint32_t getFunc7() const { return func7_; }
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
uint32_t getRDest() const { return rdest_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
uint32_t getVlsWidth() const { return vlsWidth_; }
|
||||
uint32_t getVmop() const { return vMop_; }
|
||||
|
||||
@@ -1,98 +1,132 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include "processor.h"
|
||||
#include "archdef.h"
|
||||
#include "mem.h"
|
||||
#include "constants.h"
|
||||
#include <util.h>
|
||||
#include "args.h"
|
||||
#include "core.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Usage: [-c <cores>] [-w <warps>] [-t <threads>] [-r: riscv-test] [-s: stats] [-h: help] <program>" << std::endl;
|
||||
}
|
||||
|
||||
uint32_t num_threads = NUM_THREADS;
|
||||
uint32_t num_warps = NUM_WARPS;
|
||||
uint32_t num_cores = NUM_CORES;
|
||||
uint32_t num_clusters = NUM_CLUSTERS;
|
||||
bool showStats = false;;
|
||||
bool riscv_test = false;
|
||||
const char* program = nullptr;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:w:c:g:rsh?")) != -1) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
num_threads = atoi(optarg);
|
||||
break;
|
||||
case 'w':
|
||||
num_warps = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
num_cores = atoi(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
num_clusters = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
riscv_test = true;
|
||||
break;
|
||||
case 's':
|
||||
showStats = true;
|
||||
break;
|
||||
case 'h':
|
||||
case '?':
|
||||
show_usage();
|
||||
exit(0);
|
||||
break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (optind < argc) {
|
||||
program = argv[optind];
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
} else {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int exitcode = 0;
|
||||
|
||||
std::string imgFileName;
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
bool showHelp(false);
|
||||
bool showStats(false);
|
||||
bool riscv_test(false);
|
||||
parse_args(argc, argv);
|
||||
|
||||
// parse the command line arguments
|
||||
CommandLineArgFlag fh("-h", "--help", "show command line options", showHelp);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "program binary", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "number of cores", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "number of warps", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "number of threads", num_threads);
|
||||
CommandLineArgFlag fr("-r", "--riscv", "enable riscv tests", riscv_test);
|
||||
CommandLineArgFlag fs("-s", "--stats", "show stats", showStats);
|
||||
|
||||
CommandLineArg::readArgs(argc - 1, argv + 1);
|
||||
|
||||
if (showHelp || imgFileName.empty()) {
|
||||
std::cout << "Vortex emulator command line arguments:\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -r, --riscv riscv test\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "Running " << imgFileName << "..." << std::endl;
|
||||
|
||||
{
|
||||
// create processor configuation
|
||||
ArchDef arch(num_cores, num_warps, num_threads);
|
||||
Arch arch(num_threads, num_warps, num_cores, num_clusters);
|
||||
|
||||
// create memory module
|
||||
RAM ram(RAM_PAGE_SIZE);
|
||||
|
||||
// create processor
|
||||
Processor processor(arch);
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
// setup base DCRs
|
||||
const uint64_t startup_addr(STARTUP_ADDR);
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
|
||||
#if (XLEN == 64)
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32);
|
||||
#endif
|
||||
processor.write_dcr(VX_DCR_BASE_MPM_CLASS, 0);
|
||||
|
||||
// load program
|
||||
{
|
||||
std::string program_ext(fileExtension(imgFileName.c_str()));
|
||||
{
|
||||
std::string program_ext(fileExtension(program));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(imgFileName.c_str(), STARTUP_ADDR);
|
||||
ram.loadBinImage(program, startup_addr);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(imgFileName.c_str());
|
||||
ram.loadHexImage(program);
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// create processor
|
||||
Processor processor(arch);
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
// run simulation
|
||||
exitcode = processor.run();
|
||||
exitcode = processor.run(riscv_test);
|
||||
}
|
||||
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,17 @@
|
||||
#include "memsim.h"
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mem_sim.h"
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <stdlib.h>
|
||||
@@ -83,7 +96,7 @@ public:
|
||||
mem_req.addr,
|
||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||
mem_req.core_id
|
||||
mem_req.cid
|
||||
);
|
||||
|
||||
if (!dram_->send(dram_req))
|
||||
@@ -1,8 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "types.h"
|
||||
#include <vector>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
61
sim/simx/operand.h
Normal file
61
sim/simx/operand.h
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Operand : public SimObject<Operand> {
|
||||
public:
|
||||
SimPort<pipeline_trace_t*> Input;
|
||||
SimPort<pipeline_trace_t*> Output;
|
||||
|
||||
Operand(const SimContext& ctx)
|
||||
: SimObject<Operand>(ctx, "Operand")
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
{}
|
||||
|
||||
virtual ~Operand() {}
|
||||
|
||||
virtual void reset() {}
|
||||
|
||||
virtual void tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
|
||||
int delay = 1;
|
||||
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
||||
bool is_iregs = trace->used_iregs.test(i);
|
||||
bool is_fregs = trace->used_fregs.test(i);
|
||||
bool is_vregs = trace->used_vregs.test(i);
|
||||
if (is_iregs || is_fregs || is_vregs) {
|
||||
if (is_iregs && i == 0)
|
||||
continue;
|
||||
++delay;
|
||||
}
|
||||
}
|
||||
|
||||
Output.send(trace, delay);
|
||||
|
||||
DT(3, "pipeline-operands: " << *trace);
|
||||
|
||||
Input.pop();
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -5,14 +18,38 @@
|
||||
#include <iostream>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ITraceData {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ITraceData>;
|
||||
ITraceData() {}
|
||||
virtual ~ITraceData() {}
|
||||
};
|
||||
|
||||
struct LsuTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<LsuTraceData>;
|
||||
std::vector<mem_addr_size_t> mem_addrs;
|
||||
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
|
||||
};
|
||||
|
||||
struct SFUTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<SFUTraceData>;
|
||||
struct {
|
||||
uint32_t id;
|
||||
uint32_t count;
|
||||
} bar;
|
||||
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
|
||||
};
|
||||
|
||||
struct pipeline_trace_t {
|
||||
public:
|
||||
//--
|
||||
uint64_t uuid;
|
||||
const uint64_t uuid;
|
||||
const Arch& arch;
|
||||
|
||||
//--
|
||||
uint32_t cid;
|
||||
@@ -21,12 +58,9 @@ struct pipeline_trace_t {
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
bool fetch_stall;
|
||||
|
||||
//--
|
||||
bool wb;
|
||||
RegType rdest_type;
|
||||
uint32_t rdest;
|
||||
RegType rdest_type;
|
||||
bool wb;
|
||||
|
||||
//--
|
||||
RegMask used_iregs;
|
||||
@@ -36,73 +70,104 @@ struct pipeline_trace_t {
|
||||
//-
|
||||
ExeType exe_type;
|
||||
|
||||
//--
|
||||
std::vector<std::vector<mem_addr_size_t>> mem_addrs;
|
||||
|
||||
//--
|
||||
union {
|
||||
struct {
|
||||
LsuType type;
|
||||
} lsu;
|
||||
struct {
|
||||
AluType type;
|
||||
} alu;
|
||||
struct {
|
||||
FpuType type;
|
||||
} fpu;
|
||||
struct {
|
||||
GpuType type;
|
||||
WarpMask active_warps;
|
||||
} gpu;
|
||||
uint32_t unit_type;
|
||||
LsuType lsu_type;
|
||||
AluType alu_type;
|
||||
FpuType fpu_type;
|
||||
SfuType sfu_type;
|
||||
};
|
||||
|
||||
bool stalled;
|
||||
ITraceData::Ptr data;
|
||||
|
||||
pipeline_trace_t(uint64_t uuid_, const ArchDef& arch) {
|
||||
uuid = uuid_;
|
||||
cid = 0;
|
||||
wid = 0;
|
||||
tmask.reset();
|
||||
PC = 0;
|
||||
fetch_stall = false;
|
||||
wb = false;
|
||||
rdest = 0;
|
||||
rdest_type = RegType::None;
|
||||
used_iregs.reset();
|
||||
used_fregs.reset();
|
||||
used_vregs.reset();
|
||||
exe_type = ExeType::NOP;
|
||||
mem_addrs.resize(arch.num_threads());
|
||||
stalled = false;
|
||||
}
|
||||
int pid;
|
||||
bool sop;
|
||||
bool eop;
|
||||
|
||||
bool suspend() {
|
||||
bool old = stalled;
|
||||
stalled = true;
|
||||
bool fetch_stall;
|
||||
|
||||
pipeline_trace_t(uint64_t uuid, const Arch& arch)
|
||||
: uuid(uuid)
|
||||
, arch(arch)
|
||||
, cid(0)
|
||||
, wid(0)
|
||||
, tmask(0)
|
||||
, PC(0)
|
||||
, rdest(0)
|
||||
, rdest_type(RegType::None)
|
||||
, wb(false)
|
||||
, used_iregs(0)
|
||||
, used_fregs(0)
|
||||
, used_vregs(0)
|
||||
, exe_type(ExeType::ALU)
|
||||
, unit_type(0)
|
||||
, data(nullptr)
|
||||
, pid(-1)
|
||||
, sop(true)
|
||||
, eop(true)
|
||||
, fetch_stall(false)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
pipeline_trace_t(const pipeline_trace_t& rhs)
|
||||
: uuid(rhs.uuid)
|
||||
, arch(rhs.arch)
|
||||
, cid(rhs.cid)
|
||||
, wid(rhs.wid)
|
||||
, tmask(rhs.tmask)
|
||||
, PC(rhs.PC)
|
||||
, rdest(rhs.rdest)
|
||||
, rdest_type(rhs.rdest_type)
|
||||
, wb(rhs.wb)
|
||||
, used_iregs(rhs.used_iregs)
|
||||
, used_fregs(rhs.used_fregs)
|
||||
, used_vregs(rhs.used_vregs)
|
||||
, exe_type(rhs.exe_type)
|
||||
, unit_type(rhs.unit_type)
|
||||
, data(rhs.data)
|
||||
, pid(rhs.pid)
|
||||
, sop(rhs.sop)
|
||||
, eop(rhs.eop)
|
||||
, fetch_stall(rhs.fetch_stall)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
~pipeline_trace_t() {}
|
||||
|
||||
bool log_once(bool enable) {
|
||||
bool old = log_once_;
|
||||
log_once_ = enable;
|
||||
return old;
|
||||
}
|
||||
|
||||
void resume() {
|
||||
stalled = false;
|
||||
}
|
||||
private:
|
||||
bool log_once_;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const pipeline_trace_t& state) {
|
||||
os << "coreid=" << state.cid << ", wid=" << state.wid << ", PC=" << std::hex << state.PC;
|
||||
os << "cid=" << state.cid;
|
||||
os << ", wid=" << state.wid;
|
||||
os << ", tmask=";
|
||||
for (uint32_t i = 0, n = state.arch.num_threads(); i < n; ++i) {
|
||||
os << state.tmask.test(i);
|
||||
}
|
||||
os << ", PC=0x" << std::hex << state.PC;
|
||||
os << ", wb=" << state.wb;
|
||||
if (state.wb) {
|
||||
os << ", rd=" << state.rdest_type << std::dec << state.rdest;
|
||||
}
|
||||
os << ", ex=" << state.exe_type;
|
||||
if (state.pid != -1) {
|
||||
os << ", pid=" << state.pid;
|
||||
os << ", sop=" << state.sop;
|
||||
os << ", eop=" << state.eop;
|
||||
}
|
||||
os << " (#" << std::dec << state.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
class PipelineLatch {
|
||||
protected:
|
||||
const char* name_;
|
||||
std::queue<pipeline_trace_t*> queue_;
|
||||
|
||||
public:
|
||||
PipelineLatch(const char* name = nullptr)
|
||||
: name_(name)
|
||||
@@ -132,6 +197,10 @@ public:
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::swap(queue_, empty );
|
||||
}
|
||||
|
||||
protected:
|
||||
const char* name_;
|
||||
std::queue<pipeline_trace_t*> queue_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,168 +1,141 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "processor.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
#include "processor_impl.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class Processor::Impl {
|
||||
private:
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Cache::Ptr> l2caches_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
||||
Cache::Ptr l3cache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
||||
ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||
: arch_(arch)
|
||||
, clusters_(arch.num_clusters())
|
||||
{
|
||||
SimPlatform::instance().initialize();
|
||||
|
||||
public:
|
||||
Impl(const ArchDef& arch)
|
||||
: cores_(arch.num_cores())
|
||||
, l2caches_(NUM_CLUSTERS)
|
||||
, l2_mem_switches_(NUM_CLUSTERS)
|
||||
{
|
||||
SimPlatform::instance().initialize();
|
||||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
uint32_t(arch.num_cores()) * arch.num_clusters()
|
||||
});
|
||||
|
||||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
cores_.at(i) = Core::Create(arch, i);
|
||||
// create L3 cache
|
||||
l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{
|
||||
!L3_ENABLED,
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
log2ceil(L3_NUM_WAYS), // W
|
||||
0, // A
|
||||
XLEN, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
uint8_t(arch.num_clusters()), // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}
|
||||
);
|
||||
|
||||
// connect L3 memory ports
|
||||
l3cache_->MemReqPort.bind(&memsim_->MemReqPort);
|
||||
memsim_->MemRspPort.bind(&l3cache_->MemRspPort);
|
||||
|
||||
// setup memory simulator
|
||||
auto memsim = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
arch.num_cores()
|
||||
});
|
||||
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1, &memsim->MemReqPort);
|
||||
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1, &memsim->MemRspPort);
|
||||
|
||||
if (L3_ENABLE) {
|
||||
l3cache_ = Cache::Create("l3cache", Cache::Config{
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
L3_NUM_PORTS, // number of ports
|
||||
NUM_CLUSTERS, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}
|
||||
);
|
||||
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
}
|
||||
} else if (NUM_CLUSTERS > 1) {
|
||||
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
||||
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
||||
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
||||
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", Cache::Config{
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
(uint8_t)cores_per_cluster, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
}
|
||||
} else {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
}
|
||||
}
|
||||
// create clusters
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
|
||||
// connect L3 core ports
|
||||
clusters_.at(i)->mem_req_port.bind(&l3cache_->CoreReqPorts.at(i));
|
||||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
SimPlatform::instance().finalize();
|
||||
}
|
||||
// set up memory perf recording
|
||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
--perf_mem_pending_reads_;
|
||||
});
|
||||
|
||||
void attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
this->reset();
|
||||
}
|
||||
|
||||
int run() {
|
||||
SimPlatform::instance().reset();
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().tick();
|
||||
running = false;
|
||||
for (auto& core : cores_) {
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_exit()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
ProcessorImpl::~ProcessorImpl() {
|
||||
SimPlatform::instance().finalize();
|
||||
}
|
||||
|
||||
void ProcessorImpl::attach_ram(RAM* ram) {
|
||||
for (auto cluster : clusters_) {
|
||||
cluster->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
|
||||
int ProcessorImpl::run(bool riscv_test) {
|
||||
SimPlatform::instance().reset();
|
||||
this->reset();
|
||||
|
||||
bool done;
|
||||
Word exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().tick();
|
||||
done = true;
|
||||
for (auto cluster : clusters_) {
|
||||
if (cluster->running()) {
|
||||
Word ec;
|
||||
if (cluster->check_exit(&ec, riscv_test)) {
|
||||
exitcode |= ec;
|
||||
} else {
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
}
|
||||
perf_mem_latency_ += perf_mem_pending_reads_;
|
||||
} while (!done);
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
};
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
void ProcessorImpl::reset() {
|
||||
perf_mem_reads_ = 0;
|
||||
perf_mem_writes_ = 0;
|
||||
perf_mem_latency_ = 0;
|
||||
perf_mem_pending_reads_ = 0;
|
||||
}
|
||||
|
||||
void ProcessorImpl::write_dcr(uint32_t addr, uint32_t value) {
|
||||
dcrs_.write(addr, value);
|
||||
}
|
||||
|
||||
ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
||||
ProcessorImpl::PerfStats perf;
|
||||
perf.mem_reads = perf_mem_reads_;
|
||||
perf.mem_writes = perf_mem_writes_;
|
||||
perf.mem_latency = perf_mem_latency_;
|
||||
perf.l3cache = l3cache_->perf_stats();
|
||||
for (auto cluster : clusters_) {
|
||||
perf.clusters += cluster->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Processor::Processor(const ArchDef& arch)
|
||||
: impl_(new Impl(arch))
|
||||
Processor::Processor(const Arch& arch)
|
||||
: impl_(new ProcessorImpl(arch))
|
||||
{}
|
||||
|
||||
Processor::~Processor() {
|
||||
@@ -173,6 +146,10 @@ void Processor::attach_ram(RAM* mem) {
|
||||
impl_->attach_ram(mem);
|
||||
}
|
||||
|
||||
int Processor::run() {
|
||||
return impl_->run();
|
||||
int Processor::run(bool riscv_test) {
|
||||
return impl_->run(riscv_test);
|
||||
}
|
||||
|
||||
void Processor::write_dcr(uint32_t addr, uint32_t value) {
|
||||
return impl_->write_dcr(addr, value);
|
||||
}
|
||||
@@ -1,22 +1,39 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Arch;
|
||||
class RAM;
|
||||
class ProcessorImpl;
|
||||
|
||||
class Processor {
|
||||
public:
|
||||
Processor(const ArchDef& arch);
|
||||
Processor(const Arch& arch);
|
||||
~Processor();
|
||||
|
||||
void attach_ram(RAM* mem);
|
||||
|
||||
int run();
|
||||
int run(bool riscv_test);
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
ProcessorImpl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
66
sim/simx/processor_impl.h
Normal file
66
sim/simx/processor_impl.h
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mem_sim.h"
|
||||
#include "cache_sim.h"
|
||||
#include "constants.h"
|
||||
#include "dcrs.h"
|
||||
#include "cluster.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ProcessorImpl {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
CacheSim::PerfStats l3cache;
|
||||
Cluster::PerfStats clusters;
|
||||
|
||||
PerfStats()
|
||||
: mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
ProcessorImpl(const Arch& arch);
|
||||
~ProcessorImpl();
|
||||
|
||||
void attach_ram(RAM* mem);
|
||||
|
||||
int run(bool riscv_test);
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
ProcessorImpl::PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
|
||||
void reset();
|
||||
|
||||
const Arch& arch_;
|
||||
std::vector<std::shared_ptr<Cluster>> clusters_;
|
||||
DCRS dcrs_;
|
||||
MemSim::Ptr memsim_;
|
||||
CacheSim::Ptr l3cache_;
|
||||
uint64_t perf_mem_reads_;
|
||||
uint64_t perf_mem_writes_;
|
||||
uint64_t perf_mem_latency_;
|
||||
uint64_t perf_mem_pending_reads_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
@@ -6,20 +19,15 @@
|
||||
namespace vortex {
|
||||
|
||||
class Scoreboard {
|
||||
private:
|
||||
public:
|
||||
|
||||
struct reg_use_t {
|
||||
RegType type;
|
||||
uint32_t reg;
|
||||
uint64_t owner;
|
||||
};
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::vector<RegMask> in_use_vregs_;
|
||||
std::unordered_map<uint32_t, uint64_t> owners_;
|
||||
|
||||
public:
|
||||
Scoreboard(const ArchDef &arch)
|
||||
|
||||
Scoreboard(const Arch &arch)
|
||||
: in_use_iregs_(arch.num_warps())
|
||||
, in_use_fregs_(arch.num_warps())
|
||||
, in_use_vregs_(arch.num_warps())
|
||||
@@ -84,8 +92,7 @@ public:
|
||||
}
|
||||
|
||||
void reserve(pipeline_trace_t* state) {
|
||||
if (!state->wb)
|
||||
return;
|
||||
assert(state->wb);
|
||||
switch (state->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(state->wid).set(state->rdest);
|
||||
@@ -105,8 +112,7 @@ public:
|
||||
}
|
||||
|
||||
void release(pipeline_trace_t* state) {
|
||||
if (!state->wb)
|
||||
return;
|
||||
assert(state->wb);
|
||||
switch (state->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(state->wid).reset(state->rdest);
|
||||
@@ -123,6 +129,13 @@ public:
|
||||
uint32_t tag = (state->rdest << 16) | (state->wid << 4) | (int)state->rdest_type;
|
||||
owners_.erase(tag);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::vector<RegMask> in_use_vregs_;
|
||||
std::unordered_map<uint32_t, uint64_t> owners_;
|
||||
};
|
||||
|
||||
}
|
||||
138
sim/simx/shared_mem.cpp
Normal file
138
sim/simx/shared_mem.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "shared_mem.h"
|
||||
#include "core.h"
|
||||
#include <bitmanip.h>
|
||||
#include <vector>
|
||||
#include "types.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class SharedMem::Impl {
|
||||
protected:
|
||||
SharedMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||
uint32_t line_bits = log2ceil(total_lines);
|
||||
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
||||
return offset;
|
||||
}
|
||||
|
||||
public:
|
||||
Impl(SharedMem* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity, config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~Impl() {}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void read(void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Shared Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.read(data, s_addr, size);
|
||||
}
|
||||
|
||||
void write(const void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Shared Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.write(data, s_addr, size);
|
||||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
uint32_t bank_id = 0;
|
||||
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
}
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id)) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
simobject_->Outputs.at(req_id).send(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SharedMem::SharedMem(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<SharedMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
SharedMem::~SharedMem() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void SharedMem::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void SharedMem::read(void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->read(data, addr, size);
|
||||
}
|
||||
|
||||
void SharedMem::write(const void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->write(data, addr, size);
|
||||
}
|
||||
|
||||
void SharedMem::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const SharedMem::PerfStats& SharedMem::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
72
sim/simx/shared_mem.h
Normal file
72
sim/simx/shared_mem.h
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class SharedMem : public SimObject<SharedMem> {
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t capacity;
|
||||
uint32_t line_size;
|
||||
uint32_t num_reqs;
|
||||
uint32_t num_banks;
|
||||
bool write_reponse;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t bank_stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, bank_stalls(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> Inputs;
|
||||
std::vector<SimPort<MemRsp>> Outputs;
|
||||
|
||||
SharedMem(const SimContext& ctx, const char* name, const Config& config);
|
||||
virtual ~SharedMem();
|
||||
|
||||
void reset();
|
||||
|
||||
void read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
protected:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include <bitmanip.h>
|
||||
#include <vector>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
|
||||
class SharedMem : public SimObject<SharedMem> {
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t num_reqs;
|
||||
uint32_t num_banks;
|
||||
uint32_t bank_offset;
|
||||
uint32_t latency;
|
||||
bool write_reponse;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t bank_stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, bank_stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> Inputs;
|
||||
std::vector<SimPort<MemRsp>> Outputs;
|
||||
|
||||
SharedMem(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<SharedMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, config_(config)
|
||||
, bank_sel_addr_start_(config.bank_offset)
|
||||
, bank_sel_addr_end_(config.bank_offset + log2up(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~SharedMem() {}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = this->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
uint32_t bank_id = (uint32_t)bit_getw(
|
||||
core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id))
|
||||
continue;
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.core_id};
|
||||
this->Outputs.at(req_id).send(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Config config_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
#include "tex_unit.h"
|
||||
#include "core.h"
|
||||
#include <texturing.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
using namespace vortex;
|
||||
using namespace cocogfx;
|
||||
|
||||
enum class FilterMode {
|
||||
Point,
|
||||
Bilinear,
|
||||
Trilinear,
|
||||
};
|
||||
|
||||
TexUnit::TexUnit(Core* core) : core_(core) {}
|
||||
|
||||
TexUnit::~TexUnit() {}
|
||||
|
||||
void TexUnit::clear() {
|
||||
for (auto& state : states_) {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TexUnit::get_state(uint32_t state) {
|
||||
return states_.at(state);
|
||||
}
|
||||
|
||||
void TexUnit::set_state(uint32_t state, uint32_t value) {
|
||||
states_.at(state) = value;
|
||||
}
|
||||
|
||||
uint32_t TexUnit::read(int32_t u,
|
||||
int32_t v,
|
||||
int32_t lod,
|
||||
std::vector<mem_addr_size_t>* mem_addrs) {
|
||||
//--
|
||||
auto xu = Fixed<TEX_FXD_FRAC>::make(u);
|
||||
auto xv = Fixed<TEX_FXD_FRAC>::make(v);
|
||||
uint32_t base_addr = states_.at(TEX_STATE_ADDR) + states_.at(TEX_STATE_MIPOFF(lod));
|
||||
uint32_t log_width = std::max<int32_t>(states_.at(TEX_STATE_WIDTH) - lod, 0);
|
||||
uint32_t log_height = std::max<int32_t>(states_.at(TEX_STATE_HEIGHT) - lod, 0);
|
||||
auto format = (TexFormat)states_.at(TEX_STATE_FORMAT);
|
||||
auto filter = (FilterMode)states_.at(TEX_STATE_FILTER);
|
||||
auto wrapu = (WrapMode)states_.at(TEX_STATE_WRAPU);
|
||||
auto wrapv = (WrapMode)states_.at(TEX_STATE_WRAPV);
|
||||
|
||||
auto stride = Stride(format);
|
||||
|
||||
switch (filter) {
|
||||
case FilterMode::Bilinear: {
|
||||
// addressing
|
||||
uint32_t offset00, offset01, offset10, offset11;
|
||||
uint32_t alpha, beta;
|
||||
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
|
||||
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
|
||||
|
||||
uint32_t addr00 = base_addr + offset00 * stride;
|
||||
uint32_t addr01 = base_addr + offset01 * stride;
|
||||
uint32_t addr10 = base_addr + offset10 * stride;
|
||||
uint32_t addr11 = base_addr + offset11 * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel00(0), texel01(0), texel10(0), texel11(0);
|
||||
core_->dcache_read(&texel00, addr00, stride);
|
||||
core_->dcache_read(&texel01, addr01, stride);
|
||||
core_->dcache_read(&texel10, addr10, stride);
|
||||
core_->dcache_read(&texel11, addr11, stride);
|
||||
|
||||
mem_addrs->push_back({addr00, stride});
|
||||
mem_addrs->push_back({addr01, stride});
|
||||
mem_addrs->push_back({addr10, stride});
|
||||
mem_addrs->push_back({addr11, stride});
|
||||
|
||||
// filtering
|
||||
auto color = TexFilterLinear(
|
||||
format, texel00, texel01, texel10, texel11, alpha, beta);
|
||||
return color;
|
||||
}
|
||||
case FilterMode::Point: {
|
||||
// addressing
|
||||
uint32_t offset;
|
||||
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
|
||||
|
||||
uint32_t addr = base_addr + offset * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel(0);
|
||||
core_->dcache_read(&texel, addr, stride);
|
||||
mem_addrs->push_back({addr, stride});
|
||||
|
||||
// filtering
|
||||
auto color = TexFilterPoint(format, texel);
|
||||
return color;
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
|
||||
class TexUnit {
|
||||
public:
|
||||
TexUnit(Core* core);
|
||||
~TexUnit();
|
||||
|
||||
void clear();
|
||||
|
||||
uint32_t get_state(uint32_t state);
|
||||
|
||||
void set_state(uint32_t state, uint32_t value);
|
||||
|
||||
uint32_t read(int32_t u, int32_t v, int32_t lod, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
|
||||
private:
|
||||
|
||||
std::array<uint32_t, NUM_TEX_STATES> states_;
|
||||
Core* core_;
|
||||
};
|
||||
|
||||
}
|
||||
363
sim/simx/types.h
363
sim/simx/types.h
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
@@ -5,31 +18,42 @@
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
#include <stringutil.h>
|
||||
#include <VX_config.h>
|
||||
#include <simobject.h>
|
||||
#include "uuid_gen.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
#if XLEN == 32
|
||||
#if (XLEN == 32)
|
||||
typedef uint32_t Word;
|
||||
typedef int32_t WordI;
|
||||
typedef uint64_t DWord;
|
||||
typedef int64_t DWordI;
|
||||
#elif XLEN == 64
|
||||
typedef uint32_t WordF;
|
||||
#elif (XLEN == 64)
|
||||
typedef uint64_t Word;
|
||||
typedef int64_t WordI;
|
||||
typedef __uint128_t DWord;
|
||||
typedef __int128_t DWordI;
|
||||
typedef uint64_t WordF;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
typedef uint64_t FWord;
|
||||
#define MAX_NUM_CORES 1024
|
||||
#define MAX_NUM_THREADS 32
|
||||
#define MAX_NUM_WARPS 32
|
||||
#define MAX_NUM_REGS 32
|
||||
|
||||
typedef std::bitset<32> RegMask;
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
typedef std::bitset<32> WarpMask;
|
||||
typedef std::bitset<MAX_NUM_CORES> CoreMask;
|
||||
typedef std::bitset<MAX_NUM_REGS> RegMask;
|
||||
typedef std::bitset<MAX_NUM_THREADS> ThreadMask;
|
||||
typedef std::bitset<MAX_NUM_WARPS> WarpMask;
|
||||
|
||||
typedef std::unordered_map<uint32_t, uint32_t> CSRs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -40,8 +64,8 @@ enum class RegType {
|
||||
Vector
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
|
||||
switch (clss) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
||||
switch (type) {
|
||||
case RegType::None: break;
|
||||
case RegType::Integer: os << "x"; break;
|
||||
case RegType::Float: os << "f"; break;
|
||||
@@ -53,23 +77,19 @@ inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class ExeType {
|
||||
NOP,
|
||||
ALU,
|
||||
LSU,
|
||||
CSR,
|
||||
FPU,
|
||||
GPU,
|
||||
SFU,
|
||||
MAX,
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const ExeType& type) {
|
||||
switch (type) {
|
||||
case ExeType::NOP: os << "NOP"; break;
|
||||
case ExeType::ALU: os << "ALU"; break;
|
||||
case ExeType::LSU: os << "LSU"; break;
|
||||
case ExeType::CSR: os << "CSR"; break;
|
||||
case ExeType::FPU: os << "FPU"; break;
|
||||
case ExeType::GPU: os << "GPU"; break;
|
||||
case ExeType::SFU: os << "SFU"; break;
|
||||
case ExeType::MAX: break;
|
||||
}
|
||||
return os;
|
||||
@@ -82,8 +102,7 @@ enum class AluType {
|
||||
BRANCH,
|
||||
SYSCALL,
|
||||
IMUL,
|
||||
IDIV,
|
||||
CMOV,
|
||||
IDIV
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
@@ -93,7 +112,6 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
case AluType::SYSCALL: os << "SYSCALL"; break;
|
||||
case AluType::IMUL: os << "IMUL"; break;
|
||||
case AluType::IDIV: os << "IDIV"; break;
|
||||
case AluType::CMOV: os << "CMOV"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -103,16 +121,14 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
enum class LsuType {
|
||||
LOAD,
|
||||
STORE,
|
||||
FENCE,
|
||||
PREFETCH,
|
||||
FENCE
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const LsuType& type) {
|
||||
switch (type) {
|
||||
case LsuType::LOAD: os << "LOAD"; break;
|
||||
case LsuType::STORE: os << "STORE"; break;
|
||||
case LsuType::FENCE: os << "FENCE"; break;
|
||||
case LsuType::PREFETCH: os << "PREFETCH"; break;
|
||||
case LsuType::LOAD: os << "LOAD"; break;
|
||||
case LsuType::STORE: os << "STORE"; break;
|
||||
case LsuType::FENCE: os << "FENCE"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -141,21 +157,6 @@ struct mem_addr_size_t {
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
inline AddrType get_addr_type(Word addr, uint32_t size) {
|
||||
__unused (size);
|
||||
if (SM_ENABLE) {
|
||||
if (addr >= (SMEM_BASE_ADDR - SMEM_SIZE)
|
||||
&& addr < SMEM_BASE_ADDR) {
|
||||
assert((addr + size) <= SMEM_BASE_ADDR);
|
||||
return AddrType::Shared;
|
||||
}
|
||||
}
|
||||
if (addr >= IO_BASE_ADDR) {
|
||||
return AddrType::IO;
|
||||
}
|
||||
return AddrType::Global;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class FpuType {
|
||||
@@ -179,23 +180,31 @@ inline std::ostream &operator<<(std::ostream &os, const FpuType& type) {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class GpuType {
|
||||
enum class SfuType {
|
||||
TMC,
|
||||
WSPAWN,
|
||||
SPLIT,
|
||||
JOIN,
|
||||
BAR,
|
||||
TEX,
|
||||
PRED,
|
||||
CSRRW,
|
||||
CSRRS,
|
||||
CSRRC,
|
||||
CMOV
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const GpuType& type) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const SfuType& type) {
|
||||
switch (type) {
|
||||
case GpuType::TMC: os << "TMC"; break;
|
||||
case GpuType::WSPAWN: os << "WSPAWN"; break;
|
||||
case GpuType::SPLIT: os << "SPLIT"; break;
|
||||
case GpuType::JOIN: os << "JOIN"; break;
|
||||
case GpuType::BAR: os << "BAR"; break;
|
||||
case GpuType::TEX: os << "TEX"; break;
|
||||
case SfuType::TMC: os << "TMC"; break;
|
||||
case SfuType::WSPAWN: os << "WSPAWN"; break;
|
||||
case SfuType::SPLIT: os << "SPLIT"; break;
|
||||
case SfuType::JOIN: os << "JOIN"; break;
|
||||
case SfuType::BAR: os << "BAR"; break;
|
||||
case SfuType::PRED: os << "PRED"; break;
|
||||
case SfuType::CSRRW: os << "CSRRW"; break;
|
||||
case SfuType::CSRRS: os << "CSRRS"; break;
|
||||
case SfuType::CSRRC: os << "CSRRC"; break;
|
||||
case SfuType::CMOV: os << "CMOV"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -218,31 +227,32 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct MemReq {
|
||||
uint64_t addr;
|
||||
bool write;
|
||||
bool non_cacheable;
|
||||
uint32_t tag;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
uint64_t addr;
|
||||
bool write;
|
||||
AddrType type;
|
||||
uint32_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
MemReq(uint64_t _addr = 0,
|
||||
bool _write = false,
|
||||
bool _non_cacheable = false,
|
||||
uint64_t _tag = 0,
|
||||
uint32_t _core_id = 0,
|
||||
uint64_t _uuid = 0
|
||||
) : addr(_addr)
|
||||
, write(_write)
|
||||
, non_cacheable(_non_cacheable)
|
||||
, tag(_tag)
|
||||
, core_id(_core_id)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
MemReq(uint64_t _addr = 0,
|
||||
bool _write = false,
|
||||
AddrType _type = AddrType::Global,
|
||||
uint64_t _tag = 0,
|
||||
uint32_t _cid = 0,
|
||||
uint64_t _uuid = 0
|
||||
) : addr(_addr)
|
||||
, write(_write)
|
||||
, type(_type)
|
||||
, tag(_tag)
|
||||
, cid(_cid)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
os << "mem-" << (req.write ? "wr" : "rd") << ": ";
|
||||
os << "addr=" << std::hex << req.addr << std::dec << ", tag=" << req.tag << ", core_id=" << req.core_id;
|
||||
os << "addr=0x" << std::hex << req.addr << ", type=" << req.type;
|
||||
os << std::dec << ", tag=" << req.tag << ", cid=" << req.cid;
|
||||
os << " (#" << std::dec << req.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
@@ -250,18 +260,19 @@ inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct MemRsp {
|
||||
uint64_t tag;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
MemRsp(uint64_t _tag = 0, uint32_t _core_id = 0, uint64_t _uuid = 0)
|
||||
: tag (_tag)
|
||||
, core_id(_core_id)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
uint64_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
MemRsp(uint64_t _tag = 0, uint32_t _cid = 0, uint64_t _uuid = 0)
|
||||
: tag (_tag)
|
||||
, cid(_cid)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
||||
os << "mem-rsp: tag=" << rsp.tag << ", core_id=" << rsp.core_id;
|
||||
os << "mem-rsp: tag=" << rsp.tag << ", cid=" << rsp.cid;
|
||||
os << " (#" << std::dec << rsp.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
@@ -270,10 +281,6 @@ inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
||||
|
||||
template <typename T>
|
||||
class HashTable {
|
||||
private:
|
||||
std::vector<std::pair<bool, T>> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
HashTable(uint32_t capacity)
|
||||
: entries_(capacity)
|
||||
@@ -336,92 +343,180 @@ public:
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<bool, T>> entries_;
|
||||
uint32_t size_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp, uint32_t MaxInputs = 32>
|
||||
template <typename Req, typename Rsp>
|
||||
class Switch : public SimObject<Switch<Req, Rsp>> {
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
uint32_t cursor_;
|
||||
uint32_t tag_shift_;
|
||||
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
Switch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_inputs = 1,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<Switch<Req, Rsp, MaxInputs>>(ctx, name)
|
||||
: SimObject<Switch<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursor_(0)
|
||||
, tag_shift_(log2ceil(num_inputs))
|
||||
, ReqIn(num_inputs, this)
|
||||
, ReqOut(this)
|
||||
, RspIn(this)
|
||||
, RspOut(num_inputs, this)
|
||||
, cursors_(num_outputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay_ != 0);
|
||||
assert(num_inputs <= MaxInputs);
|
||||
if (num_inputs == 1) {
|
||||
// bypass
|
||||
ReqIn.at(0).bind(&ReqOut);
|
||||
RspIn.bind(&RspOut.at(0));
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
if (num_inputs == num_outputs) {
|
||||
// bypass mode
|
||||
for (uint32_t i = 0; i < num_inputs; ++i) {
|
||||
ReqIn.at(i).bind(&ReqOut.at(i));
|
||||
RspOut.at(i).bind(&RspIn.at(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
cursor_ = 0;
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (ReqIn.size() == 1)
|
||||
void tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
uint32_t R = 1 << lg_num_reqs_;
|
||||
|
||||
// skip bypass mode
|
||||
if (I == O)
|
||||
return;
|
||||
|
||||
// process incomming requests
|
||||
for (uint32_t i = 0, n = ReqIn.size(); i < n; ++i) {
|
||||
uint32_t j = (cursor_ + i) % n;
|
||||
auto& req_in = ReqIn.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (tag_shift_) {
|
||||
req.tag = (req.tag << tag_shift_) | j;
|
||||
// process incomming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
auto& req_in = ReqIn.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | i;
|
||||
}
|
||||
DT(4, this->name() << "-" << req);
|
||||
ReqOut.at(o).send(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
break;
|
||||
}
|
||||
ReqOut.send(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspIn.empty()) {
|
||||
auto& rsp = RspIn.front();
|
||||
uint32_t port_id = 0;
|
||||
if (tag_shift_) {
|
||||
port_id = rsp.tag & ((1 << tag_shift_)-1);
|
||||
rsp.tag >>= tag_shift_;
|
||||
}
|
||||
RspOut.at(port_id).send(rsp, 1);
|
||||
RspIn.pop();
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspOut.at(o).empty()) {
|
||||
auto& rsp = RspOut.at(o).front();
|
||||
uint32_t i = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
i = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg_num_reqs_;
|
||||
}
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
uint32_t j = o * R + i;
|
||||
RspIn.at(j).send(rsp, 1);
|
||||
RspOut.at(o).pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_cursor(uint32_t grant) {
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursor_ = grant + 1;
|
||||
cursors_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
SimPort<Req> ReqOut;
|
||||
SimPort<Rsp> RspIn;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
uint32_t lg_num_reqs_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SMemDemux : public SimObject<SMemDemux> {
|
||||
public:
|
||||
SimPort<MemReq> ReqIn;
|
||||
SimPort<MemRsp> RspIn;
|
||||
|
||||
SimPort<MemReq> ReqSm;
|
||||
SimPort<MemRsp> RspSm;
|
||||
|
||||
SimPort<MemReq> ReqDc;
|
||||
SimPort<MemRsp> RspDc;
|
||||
|
||||
SMemDemux(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay = 1
|
||||
) : SimObject<SMemDemux>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqSm(this)
|
||||
, RspSm(this)
|
||||
, ReqDc(this)
|
||||
, RspDc(this)
|
||||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void reset() {}
|
||||
|
||||
void tick() {
|
||||
// process incomming requests
|
||||
if (!ReqIn.empty()) {
|
||||
auto& req = ReqIn.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
if (req.type == AddrType::Shared) {
|
||||
ReqSm.send(req, delay_);
|
||||
} else {
|
||||
ReqDc.send(req, delay_);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspSm.empty()) {
|
||||
auto& rsp = RspSm.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspSm.pop();
|
||||
}
|
||||
if (!RspDc.empty()) {
|
||||
auto& rsp = RspDc.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspDc.pop();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t delay_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
@@ -10,21 +23,25 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, uint32_t id)
|
||||
: id_(id)
|
||||
Warp::Warp(Core *core, uint32_t warp_id)
|
||||
: warp_id_(warp_id)
|
||||
, arch_(core->arch())
|
||||
, core_(core)
|
||||
, ireg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<FWord>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<uint64_t>(core->arch().num_regs()))
|
||||
, vreg_file_(core->arch().num_threads(), std::vector<Byte>(core->arch().vsize()))
|
||||
{
|
||||
this->clear();
|
||||
this->reset();
|
||||
}
|
||||
|
||||
void Warp::clear() {
|
||||
active_ = false;
|
||||
PC_ = STARTUP_ADDR;
|
||||
void Warp::reset() {
|
||||
PC_ = core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
#if (XLEN == 64)
|
||||
PC_ = (uint64_t(core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) | PC_;
|
||||
#endif
|
||||
tmask_.reset();
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i) {
|
||||
issued_instrs_ = 0;
|
||||
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) {
|
||||
for (auto& reg : ireg_file_.at(i)) {
|
||||
reg = 0;
|
||||
}
|
||||
@@ -35,31 +52,44 @@ void Warp::clear() {
|
||||
reg = 0;
|
||||
}
|
||||
}
|
||||
uui_gen_.reset();
|
||||
}
|
||||
|
||||
void Warp::eval(pipeline_trace_t *trace) {
|
||||
pipeline_trace_t* Warp::eval() {
|
||||
assert(tmask_.any());
|
||||
|
||||
DPH(2, "Fetch: coreid=" << core_->id() << ", wid=" << id_ << ", tmask=");
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
DPN(2, tmask_.test(n-i-1));
|
||||
DPN(2, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << trace->uuid << ")" << std::endl);
|
||||
|
||||
/* Fetch and decode. */
|
||||
#ifndef NDEBUG
|
||||
uint32_t instr_uuid = uui_gen_.get_uuid(PC_);
|
||||
uint32_t g_wid = core_->id() * arch_.num_warps() + warp_id_;
|
||||
uint32_t instr_id = instr_uuid & 0xffff;
|
||||
uint32_t instr_ref = instr_uuid >> 16;
|
||||
uint64_t uuid = (uint64_t(instr_ref) << 32) | (g_wid << 16) | instr_id;
|
||||
#else
|
||||
uint64_t uuid = 0;
|
||||
#endif
|
||||
|
||||
DPH(1, "Fetch: cid=" << core_->id() << ", wid=" << warp_id_ << ", tmask=");
|
||||
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i)
|
||||
DPN(1, tmask_.test(i));
|
||||
DPN(1, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << uuid << ")" << std::endl);
|
||||
|
||||
// Fetch
|
||||
uint32_t instr_code = 0;
|
||||
core_->icache_read(&instr_code, PC_, sizeof(uint32_t));
|
||||
auto instr = core_->decoder().decode(instr_code);
|
||||
|
||||
// Decode
|
||||
auto instr = core_->decoder_.decode(instr_code);
|
||||
if (!instr) {
|
||||
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=" << PC_ << std::endl;
|
||||
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=0x" << PC_ << " (#" << std::dec << uuid << ")" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
DP(2, "Instr 0x" << std::hex << instr_code << ": " << *instr);
|
||||
DP(1, "Instr 0x" << std::hex << instr_code << ": " << *instr);
|
||||
|
||||
// Update trace
|
||||
// Create trace
|
||||
auto trace = new pipeline_trace_t(uuid, arch_);
|
||||
trace->cid = core_->id();
|
||||
trace->wid = id_;
|
||||
trace->wid = warp_id_;
|
||||
trace->PC = PC_;
|
||||
trace->tmask = tmask_;
|
||||
trace->rdest = instr->getRDest();
|
||||
@@ -68,18 +98,20 @@ void Warp::eval(pipeline_trace_t *trace) {
|
||||
// Execute
|
||||
this->execute(*instr, trace);
|
||||
|
||||
DP(4, "Register state:");
|
||||
for (uint32_t i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
DP(5, "Register state:");
|
||||
for (uint32_t i = 0; i < arch_.num_regs(); ++i) {
|
||||
DPN(5, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
// Integer register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
|
||||
DPN(5, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, '|');
|
||||
DPN(5, '|');
|
||||
// Floating point register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
|
||||
DPN(5, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
DPN(5, std::endl);
|
||||
}
|
||||
|
||||
return trace;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __WARP_H
|
||||
#define __WARP_H
|
||||
|
||||
@@ -7,28 +20,26 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Arch;
|
||||
class Core;
|
||||
class Instr;
|
||||
class pipeline_trace_t;
|
||||
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
, PC(PC)
|
||||
, fallThrough(false)
|
||||
, unanimous(false)
|
||||
, fallthrough(false)
|
||||
{}
|
||||
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, PC(0)
|
||||
, fallThrough(true)
|
||||
, unanimous(false)
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, fallthrough(true)
|
||||
{}
|
||||
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
bool fallThrough;
|
||||
bool unanimous;
|
||||
bool fallthrough;
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
@@ -40,72 +51,58 @@ struct vtype {
|
||||
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, uint32_t id);
|
||||
Warp(Core *core, uint32_t warp_id);
|
||||
|
||||
void clear();
|
||||
|
||||
bool active() const {
|
||||
return active_;
|
||||
}
|
||||
|
||||
void suspend() {
|
||||
active_ = false;
|
||||
}
|
||||
|
||||
void activate() {
|
||||
active_ = true;
|
||||
}
|
||||
|
||||
std::size_t getActiveThreads() const {
|
||||
if (active_)
|
||||
return tmask_.count();
|
||||
return 0;
|
||||
}
|
||||
void reset();
|
||||
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
return warp_id_;
|
||||
}
|
||||
|
||||
uint32_t getPC() const {
|
||||
Word getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void setPC(uint32_t PC) {
|
||||
void setPC(Word PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_.set(index, value);
|
||||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
uint32_t getTmask() const {
|
||||
if (active_)
|
||||
return tmask_.to_ulong();
|
||||
return 0;
|
||||
uint64_t getTmask() const {
|
||||
return tmask_.to_ulong();
|
||||
}
|
||||
|
||||
uint32_t getIRegValue(uint32_t reg) const {
|
||||
Word getIRegValue(uint32_t reg) const {
|
||||
return ireg_file_.at(0).at(reg);
|
||||
}
|
||||
|
||||
void eval(pipeline_trace_t *);
|
||||
uint64_t incr_instrs() {
|
||||
return issued_instrs_++;
|
||||
}
|
||||
|
||||
pipeline_trace_t* eval();
|
||||
|
||||
private:
|
||||
|
||||
void execute(const Instr &instr, pipeline_trace_t *trace);
|
||||
|
||||
UUIDGenerator uui_gen_;
|
||||
|
||||
uint32_t id_;
|
||||
uint32_t warp_id_;
|
||||
const Arch& arch_;
|
||||
Core *core_;
|
||||
bool active_;
|
||||
uint64_t issued_instrs_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> ireg_file_;
|
||||
std::vector<std::vector<FWord>> freg_file_;
|
||||
std::vector<std::vector<Byte>> vreg_file_;
|
||||
std::stack<DomStackEntry> dom_stack_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> ireg_file_;
|
||||
std::vector<std::vector<uint64_t>> freg_file_;
|
||||
std::vector<std::vector<Byte>> vreg_file_;
|
||||
std::stack<DomStackEntry> ipdom_stack_;
|
||||
|
||||
struct vtype vtype_;
|
||||
uint32_t vl_;
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS = --exe --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-EOFNEWLINE
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
@@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../rtl/fp_cores/fpnew/*"
|
||||
Reference in New Issue
Block a user